#!/usr/bin/mawk -We
# *********************************************************************
# muxtosql: turns an unordered sequence of name/value pairs into the
# 	    SQL statements that are necessary to insert/update a
#	    database table.
# Copyright (c) 2004,2006 Carlo Strozzi
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; version 2 dated June, 1991.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
# *********************************************************************
# $Id: muxtosql,v 1.2 2006/03/10 11:26:13 carlo Exp $

BEGIN {
  NULL = "" ; OFS = "\t"; cont = "^@"

  # Get local settings.
  nosql_install = ENVIRON["NOSQL_INSTALL"]
  stdout = ENVIRON["NOSQL_STDOUT"]
  stderr = ENVIRON["NOSQL_STDERR"]

  # Set default values if necessary.
  if (nosql_install == NULL) nosql_install = "/usr/local/nosql"
  if (stdout == NULL) stdout = "/dev/stdout"
  if (stderr == NULL) stderr = "/dev/stderr"

  while (ARGV[++i] != NULL) {
    if (ARGV[i] == "-u" || ARGV[i] == "--update") updt = 1
    else if (ARGV[i] == "-c" || ARGV[i] == "--continuation") {
       cont = ARGV[++i]
       gsub(/[]\\\$()\[\|\^\*\?\.]/,"\\\\&",cont)	# sanitize input.
       cont = "^" cont
    }
    else if (ARGV[i] == "-C" || ARGV[i] == "--commit") commit = 1
    else if (ARGV[i] == "-K" || ARGV[i] == "--key-columns")
					klist = "," ARGV[++i] ","
    else if (ARGV[i] == "-h" || ARGV[i] == "--help") {
       system("grep -v '^#' " nosql_install "/help/muxtosql.txt")
       exit(rc=1)
    }
    else if (ARGV[i] == "--show-copying") {
       system("cat " nosql_install "/doc/COPYING")
       exit(rc=1)
    }
    else if (ARGV[i] == "--show-warranty") {
       system("cat " nosql_install "/doc/WARRANTY")
       exit(rc=1)
    }
    else if (ARGV[i] !~ /^-/) table = ARGV[i]
  }

  ARGC = 1				# Fix argv[]

  # target table name is mandatory.
  if (table == NULL) {
     print "Usage: muxtosql [options] table" > stderr
     exit(rc=1)
  }
}

# The whole input stream must be stuffed into an array in memory
# for this program to work. This is normally not a problem though,
# as the amount of input is usually small in most practical cases.

# Accept also NoSQL list input format.
/^$/ { next }

# Handle continuation records. Preserve data formatting by
# inserting a '\\n' sequence.

name != NULL && sub(cont,"\\\\n") {

  value[rec,p[name]] = value[rec,p[name]] $0
  next
}

{
  # skip invalid names, verbosely.
  if ($1 !~ /^[A-Za-z][A-Za-z0-9_]*$/) {
     print "muxtosql: invalid column name '" $1 "'" > stderr
     next
  }

  name = $1
  sub(/^[A-Za-z0-9_]+[ \t]*/,NULL)		# strip column name

  # new column ?
  if (!p[name]) {
     p[name] = ++col
     n[col] = name
     if (!updt) {
     	if (header == NULL) header = "INSERT INTO " table " (\n" name
     	else header = header ",\n" name
     }
  }

  # check whether it is a key field. 
  if (klist == NULL) klist = "," name ","	# take as key field

  # When a new key value comes in:
  #
  # 1) if this is the first record then place the new value
  #    in the relevant column position.
  #
  # 2) if this is not the first record, then if the column
  #    corresponding to the new key value is null then set
  #    it to the new value, otherwise start a new record
  #    and then insert the new value in the relevant position.

  kre = "," name ","
  if (klist ~ kre) {
     k = 1
     if (!rec || value[rec,p[name]] != NULL) rec++
  }
  else k = 0
  value[rec,p[name]] = $0
  if (k) {
     if (where[rec] == NULL)
		where[rec] = "WHERE " name " = '" value[rec,p[name]] "'"
     else where[rec] = where[rec] " AND " \
					name " = '" value[rec,p[name]] "'"
  }
}

END {

  if (rc) exit(rc)

  if (updt) {
     for (i=1; i<=rec; i++) {
	 print "UPDATE " table " SET"
      	 for (j=1; j<=col; j++) {
  	     kre = "," n[j] ","
  	     if (klist ~ kre) {
		comma = NULL
		continue
	     }
	     gsub(/\t/,"\\\\t",value[i,j])	# escape TABs in values
  	     gsub(/['\\]/,"\\\&",value[i,j])	# escape SQL special chars.
	     printf("%s%s = '%s'",comma,n[j],value[i,j])
	     comma = ",\n"
	 }
	 print "\n" where[i] ";\n"
     }
     if (commit) print "COMMIT;"
     exit(rc=0)
  }

  # skip final printing if no valid input (or if updt).
  if (header == "") exit(rc=0)

  for (i=1; i<=rec; i++) {
      out = "VALUES (\n'"
      for (j=1; j<=col; j++) {
	  gsub(/\t/,"\\\\t",value[i,j])		# escape TABs in values
  	  gsub(/['\\]/,"\\\&",value[i,j])	# escape SQL special chars.
	  if (j > 1) out = out "',\n'"
	  out = out value[i,j]
      }
      # omit empty records.
      if (out !~ /^[\t]*$/) {
	 print header ")"
	 print out "'\n);\n"			# omit empty records.
      }
  }
  if (commit) print "COMMIT;"
}

# End of program.
