#!/usr/bin/mawk -We
# *********************************************************************
#  Written by and copyright Carlo Strozzi <carlos@linux.it>.
#
#  tabletoxml: convert a NoSQL table to basic XML format.
#  Copyright (C) 1998-2001 Carlo Strozzi <carlos@linux.it>
# 
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 2 of the License, or
#  (at your option) any later version.
# 
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
# 
#  You should have received a copy of the GNU General Public License
#  along with this program; if not, write to the Free Software
#  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
#  2001-02-08 Ported to NoSQL v3
#
# *********************************************************************
#
#  Usage: tabletoxml [-n|--no-unescape] [-d|--database name] \
#	    [-t|--table name] [-r|--row-number] [-f|--field-number] \
#	    [-P|--no-prolog] [-K|--with-key]
#
# *********************************************************************

BEGIN {
  NULL = ""; FS = "\t"; unescape = 1; prolog = 1

  while (ARGV[++i] != NULL) {
     if (ARGV[i] == "-n" || ARGV[i] == "--no-unescape") unescape = 0
     else if (ARGV[i] == "-r" || ARGV[i] == "--row-number") rnum = 1
     else if (ARGV[i] == "-f" || ARGV[i] == "--field-number") fnum = 1
     else if (ARGV[i] == "-d" || ARGV[i] == "--database") dbname = ARGV[++i]
     else if (ARGV[i] == "-t" || ARGV[i] == "--table") tblname = ARGV[++i]
     else if (ARGV[i] == "-P" || ARGV[i] == "--no-prolog") prolog = 0
     else if (ARGV[i] == "-K" || ARGV[i] == "--with-key") key = 1
     else if (ARGV[i] == "-i" || ARGV[i] == "--input") i_file = ARGV[++i]
     else if (ARGV[i] == "-o" || ARGV[i] == "--output") o_file = ARGV[++i]
  }

  ARGC = 1					# Fix argv[]

  if (o_file == NULL) o_file = "/dev/stdout"
  if (i_file != NULL) { ARGV[1] = i_file; ARGC = 2 }

  # Print XML Prolog.

  if (prolog) {
     #print "<?xml version=\"1.0\" standalone=\"yes\"?>" > o_file
     print "<?xml version=\"1.0\"?>" > o_file

     # Print DTD in Internal Subset of the Prolog.
     print "<!DOCTYPE nosql-db [" > o_file

     if (ENVIRON["NOSQL_PATH"] == NULL)
	sDtd = "@NOSQLPATH@/lib/nosqldb.dtd"
     else
	sDtd = ENVIRON["NOSQL_PATH"] "/lib/nosqldb.dtd"

     while (getline < sDtd) print > o_file
     print "]>" > o_file

     close(sDtd)

     # DB element.
     printf("<nosql-db") > o_file
     if (dbname != NULL) printf(" name=\"%s\"", dbname) > o_file
     print ">" > o_file
  }
}

NR == 1 {					# Column names.
   ncol = split($0, C)
   if (ncol) {
      printf("  <table") > o_file
      if (tblname != NULL) printf(" name=\"%s\"", tblname) > o_file
      print ">" > o_file

      for (i = 1; i <= ncol; i++) {
         printf("    <column") > o_file
         if (fnum) printf(" number=\"%d\"", i) > o_file
         if (key && i == 1) printf(" key=\"yes\"") > o_file
         printf(">%s</column>\n", C[i]) > o_file
      }
   }
   next
}

NR == 2 { next }				# Dashline

{
  printf("    <row") > o_file
  if (rnum) printf(" number=\"%d\"", NR-2) > o_file
  print ">" > o_file
  for (i = 1; i <= ncol; i++) {
    # Unescape tabs and newlines first.
    if (unescape) $i = NoSQL_Unescape($i)
    # Escape &, > and < next, for XML.
    gsub(/&/, "\\&amp;", $i)
    gsub(/</, "\\&lt;", $i)
    gsub(/>/, "\\&gt;", $i)
    printf("      <data") > o_file
    if (fnum) printf(" field=\"%d\"", i) > o_file
    printf(">%s</data>\n", $i) > o_file
  }
  print "    </row>" > o_file
}

END {
   if (ncol) print "  </table>" > o_file
   if (prolog) print "</nosql-db>" > o_file
}

# *********************************************************************
# NoSQL_Unescape(string)
#
# Takes a string and translates any unescaped '\t' and '\n' strings into
# physical tabs and newlines respectively. Returns the converted string.
# *********************************************************************
function NoSQL_Unescape(s,      S,i,s_length,a,escaped) {
  s_length = split(s, a, "")
  s_length++                # Cope with s_length==1
  while ( ++i <= s_length ) {
    if ( a[i] == "\\" && !escaped ) { escaped = 1; continue }
    if ( a[i] == "n" && escaped ) { S = S "\n"; escaped = 0; continue }
    if ( a[i] == "t" && escaped ) { S = S "\t"; escaped = 0; continue }
    if ( escaped ) { S = S "\\" a[i]; escaped = 0; continue }
    S = S a[i]
  }
  return S
}

#
# End of program.
#
