#!/usr/bin/awk -f
#****************************************************************************
#  ##   ##         #####   #####  ##     **       NoSQL RDBMS - ddjoin      *
#  ###  ##        ####### ####### ##     **        $Revision: 1.1.1.1 $       *
#  #### ##        ###     ##   ## ##     ************************************
#  #######  ####  #####   ##   ## ##     **   Carlo Strozzi (c) 1998-2000   *
#  ####### ######   ##### ## # ## ##     ************************************
#  ## #### ##  ##     ### ##  ### ##     **           Written by            *
#  ##  ### ###### ####### ######  ###### **          Carlo Strozzi          *
#  ##   ##  ####   #####   #### # ###### **     e-mail: carlos@linux.it     *
#****************************************************************************
#   NoSQL RDBMS, Copyright (C) 1998 Carlo Strozzi.                          *
#   This program comes with ABSOLUTELY NO WARRANTY; for details             *
#   refer to the GNU General Public License.                                *
#****************************************************************************
# NOTE: to edit, set ts=8 in 'vi' (or equivalent)
#       to print, pipe through 'pr -t -e8'
#****************************************************************************
# NAME
#        ddjoin - print column positions, in a form suitable for join(1).
#
# SYNOPSIS
#        ddjoin < table2
#
#        Note: options must be passed through the environment 
#        variable _awk_args, i.e.:
#
#        _awk_args='[options] table1 [-j column] [-1 column_1] [-2 column_2]'
#
#
# DESCRIPTION
#
#     Takes two table headers, one as a command-line argument and the other
#     one from STDIN, plus a join column specification, and prints the
#     corresponding statements in a format suitable for the join(1) utility.
#     An error code is returned if the command-line table cannot be read.
#
#     If only one of '-1' or '-2' is specified, then the other one defaults
#     to the first (leftmost) field of the associated table.
#
#     Either '-j' or the '-1,-2' options should be specified. If they are
#     both present, 'join' will try to apply them in a sensible fashion,
#     i.e. if '-j' comes last it will override both '-1' and '-2', while if
#     '-j' comes first then '-1,-2' will override the '-j' specification
#     only for the associate table.
#
#     If neither '-j' nor '-1|-2' are specified, then the two tables
#     are joined on their respective first (leftmost) fields.
#
# OPTIONS
#     -J|--suppress-join-column
#          Exclude the join column (leftmost column) from the output table.
#
#     -l|--last
#          If the input table contains duplicated column names
#          pick the last occurrence of each. The default is to
#          pick the first one. This is sometimes useful after
#          the 'join' operator.
#
########################################################################

BEGIN {
  NULL = "" ; FS = OFS = "\t"; split( ENVIRON["_awk_args"], args, " " )

  # Join columns should default to non-existent names.
  c_names[1] = c_names[2] = "---"

  while ( args[++i] != NULL )
  {
    if ( args[i] == "-j" ) c_names[1] = c_names[2] = args[++i]
    else if ( args[i] == "-J" ) no_jcol = 1
    else if ( args[i] == "-1" ) c_names[1] = args[++i]
    else if ( args[i] == "-2" ) c_names[2] = args[++i]
    else if ( args[i] == "-l" || args[i] == "--last" ) pick_last = 1
    else h_file = args[++i]
  }

  # Exit with an error if the command-line header file is missing.
  if ( h_file == NULL ) exit(1)

  # Load the command-line header file.
  getline < h_file
  i = 0
  while ( ++i <= NF )
  {
    if ( pick_last ) { P1[$i] = i ; N1[i] = $i }
    else
    {
      if ( ! P1[$i] ) { P1[$i] = i ; N1[i] = $i }
    }
  }

  # Exit if unable to read the command-line header file.
  if ( !N1[1] ) exit(1)

  j1 = P1[c_names[1]]

  # Default to 1st column on invalid column name.
  if ( j1 == NULL ) { c_names[1] = N1[1] ; j1 = P1[c_names[1]] }
}

########################################################################
# Main loop
########################################################################

NR == 1 {
  # Load the second header file.
  i = 0
  while ( ++i <= NF )
  {
    if ( pick_last ) { P2[$i] = i ; N2[i] = $i }
    else
    {
      if ( ! P2[$i] ) { P2[$i] = i ; N2[i] = $i }
    }
  }

  # Exit if unable to read the 2nd header file.
  if ( !N2[1] ) exit(1)

  j2 = P2[c_names[2]]

  # Default to 1st column on invalid column name.
  if ( j2 == NULL ) { c_names[2] = N2[1] ; j2 = P2[c_names[2]] }

  # Build output field list for join(1).
  if ( !no_jcol ) field_list = " 1." P1[N1[j1]]

  while ( N1[++c] != NULL )
  {
    if ( N1[c] !="." && c != j1 )
    {
      field_list = field_list ",1." P1[N1[c]]
    }
  }

  if ( no_jcol ) sub( /^,/, " ", field_list )
  c = 0
  while ( N2[++c] != NULL )
  {
    if ( N2[c] != "." && c != j2 )
    {
      field_list = field_list ",2." P2[N2[c]]
    }
  }
  printf("-o %s -1 %s -2 %s", field_list, j1, j2)
}

NR > 1 { exit }		# Skip the rest of the input table.

########################################################################
# End of program.
########################################################################

