/*
  row: select table rows matching a given expression that refers to
  column names.

  Copyright (c) 2000,2001,2002,2003 Carlo Strozzi

  This program is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation; either version 2 of the License, or
  (at your option) any later version.

  This program is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.

  You should have received a copy of the GNU General Public License
  along with this program; if not, write to the Free Software
  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

  $Id: row.c,v 1.4 2003/09/23 10:20:36 carlo Exp $

*/

#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <ctype.h>
#include <sys/file.h>
#include <string.h>

#ifndef HELPDIR
#define HELPDIR "/usr/local/nosql/help"
#endif
#ifndef HELPFILE
#define HELPFILE (HELPDIR "/row.txt")
#endif

/* malloc() block size, in bytes */
#define ALLOCSIZ 32

/* malloc() block size, in bytes. Do not set to less than sizeof(int) */
#define ALLOCSIZ 32

/* max. column name length is 31 (plus the trailing zero) */
#define MAXCOLNAME 31

typedef struct Header {
  char *names,			/* column names */
       *cmd;			/* buffer for optional back-end cmd */
  int  top,			/* top used location in *names (0-n) */
       end,			/* last available position in *names */
       *offset,			/* element offsets in *names */
       otop,			/* top used location in *offset */
       oend,			/* last available position in *offset */
       ctop,			/* top used location in *cmd (0-n) */
       cend;			/* last available position in *cmd */
} Header;

struct {
  unsigned int header     : 1;
  unsigned int debug      : 1;
  unsigned int notempty   : 1;
  unsigned int testonly   : 1;
  unsigned int firstmatch : 1;
  unsigned int keyonly    : 1;
} flags;

/* global variables */
static char *progname;		/* global pointer to argv[0] */

/* Function declarations */

/* Unbuffered input routine */
static int getch(void) {
  char c;
  return (read(0, &c, 1) == 1) ? (unsigned char) c : EOF;
}

static int eusage(void) {
  fprintf(stderr, "Usage: %s [options] 'AWK statements'\n", progname);
  return 1;
}

static int etoolong(void) {
  fprintf(stderr, "%s: max. column name width (%d) exceeded\n",\
        progname,MAXCOLNAME-1);
  return 1;
}

/* Add new column to the table header */

static void addcol(Header *h, char *name) {

  static int init=1;
  int i;

  /* init header if first time */

  if (init) {
     h->top = -1;
     h->end = -1;
     h->otop = -1;
     h->oend = -1;

     if ((h->names = (char *)malloc(h->end+=ALLOCSIZ)) == NULL ||
	  (h->offset = malloc(ALLOCSIZ*sizeof(int))) == NULL){
	perror(progname);
	exit(1);
     }
     h->oend += ALLOCSIZ;
     h->names[0] = '\0';			/* init string */
     init = 0;
  }

  /* ignore duplicated column names, if any */
  for (i = 0; i <= h->otop; i++)
     if (!strcmp(name, h->names + h->offset[i])) return;

  /* use '>=' in comparison, to account for string terminator */
  if ((h->top+strlen(name)) >= h->end) { /* realloc needed */
     if ((h->names = (char *)realloc(h->names,h->end+=ALLOCSIZ)) == NULL) {
	perror(progname);
	exit(1);
     }
  }

  /* add new column offset to the relevant array */
  if (h->otop >= h->oend) {		/* realloc needed */
     if ((h->offset = realloc(h->offset,ALLOCSIZ*sizeof(int))) == NULL) {
	perror(progname);
	exit(1);
     }
     h->oend += ALLOCSIZ;
  }

  /* add offset for the new column, relative to h->names */
  *(h->offset + ++h->otop) = (h->top + 1);

  /* add new name to column names */
  h->top += (sprintf(h->names + h->top + 1, "%s", name) + 1);
}

static int addcmd(Header *h, const char *cmd, int len) {

  static int init=1;
  int l;

  /* init h->cmd if first time */

  if (init) {
     h->ctop = -1;
     h->cend = -1;

     if ((h->cmd = (char *)malloc(h->cend+=ALLOCSIZ)) == NULL) {
	perror(progname);
	exit(1);
     }
     h->cmd[0] = '\0';			/* init string (mandatory!) */
     init = 0;
  }

  l = strlen(cmd);

  if (len <= 0 || len > l) len = l;		/* set default */

  /* use '>=' in comparison, to account for string terminator */
  if ((h->ctop+len) >= h->cend) { /* realloc needed */
     if ((h->cmd = (char *)realloc(h->cmd,h->cend+=ALLOCSIZ)) == NULL) {
	perror(progname);
	exit(1);
     }
  }

  strncat(h->cmd, cmd, len);		/* append passed string to cmd */
  h->ctop += len;
  return len;
}

static int colpos(Header *h, char *name) {

  int i;

  for (i = 0; i <= h->otop; i++)
      if (!strcmp(name, h->names + h->offset[i]) && h->offset[i] >= 0)
	return i;

  return -1;
}

static void printhdr(Header *h) {

  int i, j;

  if (!flags.header) return;

   for (i=j=0; i <= h->otop; i++) {
      if (h->offset[i] >= 0) {

	 /* prepend TAB if not first column */
	 if (j++) printf("\t\001");
	 else printf("\001");

	 printf("%s", h->names + h->offset[i]);
      }
   }

   if (j) printf("\n");		/* add NL if at least one column */
}

static void parser(Header *h, char *s) {

  int i=0, j=0, found=0, quoted=0, escaped=0, slashed=0, tilde=0;

  char tmpbuf[MAXCOLNAME] = "";	 	/* local work buffer */

  for (; *s; s++) {

    if (flags.debug)
	fprintf(stderr, ">>> %c q=%d, e=%d, t=%d, s=%d\n",\
				*s,quoted,escaped,tilde,slashed);

    if (escaped) {
       addcmd(h, s, 1);		/* append character to h->cmd */
       escaped = 0;
       continue;
    }

    if (*s == '~' && !quoted && !slashed) tilde = 1;

    else if (*s == '\\') escaped = 1;

    /* The "/" character triggers regexp matching only after an
       unquoted "~", otherwise it stands for the divide operator. */

    else if (tilde) {
       if (*s == '/') slashed = 1;
       else if (*s == '"') quoted = 1;
       tilde = 0;
    }

    else if (*s == '/') slashed = 0;

    else if (*s == '"') {
       if (quoted) quoted = 0;
       else quoted = 1;
    }

    if (i >= MAXCOLNAME) exit(etoolong());

    if (isupper(*s) || islower(*s) || *s == '_') {
       tmpbuf[i++] = *s;
       continue;
    }

    if (*tmpbuf) {
      if (isdigit(*s)) {
       	 tmpbuf[i++] = *s;
       	 continue;
      }

      tmpbuf[i++] = '\0';

      /* lookup the column position corresponding to this name, if any */
      if ((j=colpos(h, tmpbuf)) >= 0) {
	 found = 1;
	 sprintf(tmpbuf, "$(%d)", j+1);		/* re-use same buffer */
	 addcmd(h, tmpbuf, 0);
      }

      if (!found) addcmd(h, tmpbuf, 0);		/* add leterally to h->cmd */
    }

    addcmd(h, s, 1);			/* append character to h->cmd */
    tmpbuf[0] = '\0';				/* clear wirk buffer */
    i = found = 0;
  }

  /* In case there are leftovers in *tmpbuf */

  if (*tmpbuf) {

    tmpbuf[i] = '\0';

    /* lookup the column position corresponding to this name, if any */
    if ((j=colpos(h, tmpbuf)) >= 0) {
       found = 1;
       sprintf(tmpbuf, "$(%d)", j+1);		/* re-use same buffer */
       addcmd(h, tmpbuf, 0);
    }

    if (!found) addcmd(h, tmpbuf, 0);		/* add literally to h->cmd */
  }

  addcmd(h, s, 1);			/* append character to h->cmd */

  return;
}

int main(int argc, char *argv[]) {

  int i = 0, min_args = 2;
  char c;

  char tmpbuf[MAXCOLNAME] = "";	 	/* local work buffer */

  Header h;

  FILE *ifile = NULL, *ofile = NULL;

  const char *awk[] = {
      "BEGIN{FS=OFS=\"\\t\";}",
      "$0~/^[ \\t]+$/{next}",
      "{_nosql_nr++;",
      "_nosql_ok=1;",
      "print $1;",
      "print;",
      "exit",
      "}END{",
      "print (_nosql_ok/1);",
      "}"
  };

  progname = argv[0];

  flags.header = 1;		/* default it to print the header */
  flags.debug = 0;
  flags.notempty = 0;
  flags.testonly = 0;
  flags.firstmatch = 0;
  flags.keyonly = 0;

  while (++i < argc && *argv[i] == '-') {

    min_args++;

    if (!strcmp(argv[i], "-x") ||
  	     !strcmp(argv[i], "--debug")) flags.debug = 1;

    else if (!strcmp(argv[i], "-N") ||
  	     !strcmp(argv[i], "--no-header")) flags.header = 0;

    else if (!strcmp(argv[i], "-e") ||
  	     !strcmp(argv[i], "--not-empty")) flags.notempty = 1;

    else if (!strcmp(argv[i], "-t") || !strcmp(argv[i], "--test")) {
	 flags.testonly = 1;
	 flags.firstmatch = 1;
	 flags.header = 0;
    }

    else if (!strcmp(argv[i], "-f") ||
  	     !strcmp(argv[i], "--first-match")) flags.firstmatch = 1;

    else if (!strcmp(argv[i], "-K") ||
  	     !strcmp(argv[i], "--key-only")) flags.keyonly = 1;

    else if (!strcmp(argv[i], "-i") ||
    	     !strcmp(argv[i], "--input")) {

      if (++i >= argc || *argv[i] == '-') exit(eusage());

      min_args++;

      if ((ifile = freopen(argv[i], "r", stdin)) < 0) {
         perror(argv[i]);
         exit(1);
      }
    }

    else if (!strcmp(argv[i], "-o") ||
    	     !strcmp(argv[i], "--output")) {

      if (++i >= argc || *argv[i] == '-') exit(eusage());

      min_args++;

      if ((ofile = freopen(argv[i], "w", stdout)) < 0) {
    	 perror(argv[i]);
    	 exit(1);
      }
    }

    else if (!strcmp(argv[i], "-h") ||
    	     !strcmp(argv[i], "--help")) {

      execlp("grep","grep","-v","^#",HELPFILE,(char *) 0);
      perror("grep");
      exit(1);
    }
  }

  if (argc < min_args) exit(eusage());

  i = 0;					/* Re-use counter */
  while ((c = getch()) != EOF) {

     if (c == '\001') continue;			/* ignore SOH chars */

     if (i >= MAXCOLNAME) exit(etoolong());

     if (c != '\t' && c != '\n') {
	tmpbuf[i++] = c;
	continue;
     }

     tmpbuf[i++] = '\0';			/* set terminator */
     addcol(&h, tmpbuf);			/* append to header */
     i = 0;

     if (c == '\n') {

	addcmd(&h, awk[0], 0);			/* init AWK program */

	if (flags.notempty) addcmd(&h, awk[1], 0);

	parser(&h, argv[argc-1]);		/* call the parser */

	addcmd(&h, awk[2], 0);

	if (flags.testonly) addcmd(&h, awk[3], 0);
	else if (flags.keyonly) addcmd(&h, awk[4], 0);
	else addcmd(&h, awk[5], 0);

	if (flags.firstmatch) addcmd(&h, awk[6], 0);

	addcmd(&h, awk[7], 0);

	if (flags.testonly) addcmd(&h, awk[8], 0);

	addcmd(&h, awk[9], 0);

	if (flags.debug) fprintf(stderr, "\n%s\n\n", h.cmd);

	/* print header to stdout */
	printhdr(&h);

        fflush(NULL);	/* Make sure the header is output */
        execlp("mawk", "mawk", h.cmd, (char *) 0);
        perror("mawk");
        exit(1);
     }
  }

  exit(0);			/* Not reached */
}

/* EOF */
