#!/usr/bin/mawk -We
# *********************************************************************
#  update: inserts/updates/deletes table rows based on the contents
#	   of an edit table.
#  Copyright (c) 2000,2001,2002,2003 Carlo Strozzi
# 
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 2 of the License, or
#  (at your option) any later version.
# 
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
# 
#  You should have received a copy of the GNU General Public License
#  along with this program; if not, write to the Free Software
#  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
# *********************************************************************
#  $Id: update,v 1.3 2003/03/29 20:45:16 carlo Exp $

BEGIN {
  NULL = "" ; FS = OFS = "\t"; d_string = "..DEL.."

  # Get local settings.
  nosql_install = ENVIRON["NOSQL_INSTALL"]
  stdout = ENVIRON["NOSQL_STDOUT"]
  stderr = ENVIRON["NOSQL_STDERR"]

  # Set default values if necessary.
  if (nosql_install == NULL) nosql_install = "/usr/local/nosql"
  if (stdout == NULL) stdout = "/dev/stdout"
  if (stderr == NULL) stderr = "/dev/stderr"

  while (ARGV[++i] != NULL) {
    if (ARGV[i] == "-d" || ARGV[i] == "--delete") d_string = ARGV[++i]
    else if (ARGV[i] == "-N" || ARGV[i] == "--no-header") no_hdr = 1
    else if (ARGV[i] == "-l" || ARGV[i] == "--last") pick_last = 1
    else if (ARGV[i] == "-s" || ARGV[i] == "--stdin") swap = 1
    else if (ARGV[i] == "-w" || ARGV[i] == "--write-size") s_file = ARGV[++i]
    else if (ARGV[i] == "-h" || ARGV[i] == "--help") {
       system("grep -v '^#' " nosql_install "/help/update.txt")
       rc = 1
       exit(rc)
    }
    else if (ARGV[i] !~ /^-/) e_file = ARGV[i]
  }

  if (e_file == NULL) {
     print "Usage: update [options] table_1 < table_2" > stderr
     exit(1)
  }

  #
  # Load the associative array with the updates.
  #

  i = 0
  if (swap) {
     while (getline < "-" > 0) {	       # Read updates from stdin.
        if (++j == 1) {
	   gsub(/\001/, "")		       # Remove SOH markers
	   while (++i <= NF) {
	     if (pick_last) ep[$i] = i
	     else if (ep[$i] == NULL) ep[$i] = i
	   }
        }  else {
	   edits[$1] = $0		       # Edit array.
	   keys[$1] = 1			       # Key array.
	}
     }
     ### close(stdin)			       # Let's spare resources.
     ARGV[1] = e_file			       # Set new stdin stream.
  }  else {				       # Read updates from e_file.
     while (getline < e_file > 0) {
	if (++j == 1) {
	   gsub(/\001/, "")		       # Remove SOH markers.
	   while (++i <= NF) {
	     if (pick_last) ep[$i] = i
	     else if (ep[$i] == NULL) ep[$i] = i
	   }
        }  else {
	   edits[$1] = $0		       # Edit array.
	   keys[$1] = 1			       # Key array.
	}
     }
     close(e_file)			       # Let's spare resources.
     ARGV[1] = "-"
  }

  if (s_file != NULL) print j-1 > s_file
  close(s_file)
  NR = 0				       # Reset record counter.
  NF = 0
  ARGC = 2				       # Fix argv[].
  edit_numfields = split($0, n)		       # No. of fields in edit table.
}

# At this point whichever file we have on stdin it is always
# the one that is being updated.

NR == 1 {
  gsub(/\001/, "")			       # Remove SOH markers.
  old_numfields = split($0, n)		       # No. of fields in main table.
  if (!no_hdr) {
     out_hdr = $0			       # Do not add SOH's to $0 yet.
     printf("\001"); gsub(/\t/,"\t\001",out_hdr); print out_hdr
     fflush(stdout)
  }
  i = 0
  while (++i <= NF) {
    if (!P[$i]) { 
      if (i == 1) auto_col = $i
      else auto_col = auto_col " " $i
    }

    if (pick_last) P[$i] = i
    else {
      if (!P[$i]) P[$i] = i
    }
  }
  split(auto_col, c_names, " ")
  next
}

#
# Apply the updates to already existing keys.
#

split(edits[$1], a) {			# Something to do ?
   status[$1] = 1			# Mark key as updated.
   if (a[2] == d_string) next		# Skip this record.
   printf("%s", a[1])
   for (i = 2; i <= NF; i++) {
      if (ep[c_names[i]]) repl = a[ep[c_names[i]]]
      else {
	 #
	 # This is to cope with the case where 'table_2'
	 # itself contains delete strings in column 2,
	 # i.e. when we use 'update' against a Table Journal.
	 #
	 if ($i == d_string) $i = NULL
	 repl = $i
      }
      printf("\t%s", repl)
   }
   printf("\n")				# Print record separator.
   next
}

{ print }

END {
   if (rc) exit(rc)

   # Now handle new keys, i.e. edits referring to keys that do not
   # occur in the file being updated. This is to cope with 'insert'
   # operations.

   for (j in keys) {
      split(edits[j], a)
      if (status[j] || a[2] == d_string) continue
      printf("%s", a[1])
      for (i = 2; i <= old_numfields; i++) {
         if (ep[c_names[i]]) repl = a[ep[c_names[i]]]
         else repl = NULL
         printf("\t%s", repl)
      }
      printf("\n")			# Print record separator.
   }
}

# End of program.
