/*
  keysearch: extract lines beginning with a given string (key) from
  a NoSQL table.

  Copyright (c) 2003,2006 Carlo Strozzi

  This program is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation; version 2 dated June, 1991.

  This program is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.

  You should have received a copy of the GNU General Public License
  along with this program; if not, write to the Free Software
  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

  $Id: keysearch.c,v 1.5 2006/03/10 11:26:13 carlo Exp $

*/

#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <sys/file.h>
#include <string.h>
#include <limits.h>
#include <ctype.h>

#ifndef HELPDIR
#define HELPDIR "/usr/local/nosql/help"
#endif
#ifndef HELPFILE
#define HELPFILE (HELPDIR "/keysearch.txt")
#endif

#ifndef DOCDIR 
#define DOCDIR "/usr/local/nosql/doc"
#endif
#ifndef COPYING
#define COPYING (DOCDIR "/COPYING")
#endif
#ifndef WARRANTY
#define WARRANTY (DOCDIR "/WARRANTY")
#endif

#define MAXKEYLEN 256

/* global variables */

struct {
  unsigned int header  : 1;
  unsigned int fold    : 1;
  unsigned int partial : 1;
  unsigned int index   : 1;
  unsigned int test    : 1;
  unsigned int sndx    : 1;
  unsigned int debug   : 1;
} flags;

static const char *progname = "keysearch";  /* global pointer to argv[0] */

/**************************************************************************** 
 * Original code by N. Dean Pentcheff, who says :
 *
 * "This implementation of the Soundex algorithm is released to the public
 *  domain: anyone may use it for any purpose.  See if I care."
 *
 * N. Dean Pentcheff
 * 1/13/89
 * Dept. of Zoology
 * University of California
 * Berkeley, CA  94720
 * E-mail: <dean@violet.berkeley.edu>
 *
 * char * soundex( char *, int length, int ct )
 *
 * where:
 *
 *   'char *' is the string to be soundexed.
 *   'length' is the desired soudex code length (default 4)
 *   'ct'     is the 'census type': 0=normal, 1=special
 *	      I use always type '0' here, but the function
 *	      would handle also type '1';
 *
 * Given as argument: Pointer to a character string.
 * Returns: Pointer to a static string, 'sl' characters long, plus a terminal
 *    '\0'.  This string is the Soundex key for the argument string.
 * Side effects and limitations:
 *    Does not clobber the string passed in as the argument.
 *    No limit on argument string length.
 *    Assumes a character set with continuously ascending and contiguous
 *       letters within each case and within the digits (e.g. this works for
 *       ASCII and bombs in EBCDIC.  But then, most things do.).
 * Reference: Adapted from Knuth, D.E. (1973) The art of computer programming;
 *    Volume 3: Sorting and searching.  Addison-Wesley Publishing Company:
 *    Reading, Mass. Page 392.
 * Special cases:
 *    Leading or embedded spaces, numerals, or punctuation are squeezed out
 *       before encoding begins.
 *    Null strings or those with no encodable letters return the code 'Z000'.
 * Test data from Knuth (1973):
 *    Euler   Gauss   Hilbert Knuth   Lloyd   Lukasiewicz
 *    E460    G200    H416    K530    L300    L222
 */

/* soundex() function code */
static char *soundex( char *in, int sl, int ct )
{
    static  int code[] =
      {  0,1,2,3,0,1,2,0,0,2,2,4,5,5,0,1,2,6,2,3,0,1,0,2,0,2 };
      /* a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z */
    static     char key[11] = "Z000000000";	/* default key */
    static     char result[11] = "";
    register   char ch;
    register   int last;
    register   int count;

    if (sl < 4 || sl > 10) sl = 4;		/* default code length */

    /* Advance to the first letter.  If none present, return default key */
    while ( *in != '\0'  &&  !isalpha( *in ) ) ++in;

    if ( *in == '\0' ) return( strncpy(result, key, sl) );

    /* Pull out the first letter, uppercase it, and set up for main loop */
    key[0] = islower( *in ) ? toupper( *in ) : *in;
    last = code[key[0] - 'A'];
    ++in;

    /* Scan rest of string, stop at end of string or when the key is full */

    /* for ( count = 1;  count < sl  &&  *in != '\0';  ++in ) */

    for ( count = 1;  *in != '\0';  ++in )
    {
      /* If non-alpha, ignore the character altogether */
      if ( isalpha( *in ) )
      {
        ch = isupper( *in ) ? tolower( *in ) : *in;

	/* completely ignore H and W if normal census type */
	if (!ct && (in[0] == 'h' || in[0] == 'w')) continue;

        /* Fold together adjacent letters sharing the same code */
        if ( last != code[ch - 'a'] )
        {
          last = code[ch - 'a'];
          /* Ignore code==0 letters except as separators */
          if ( last != 0 ) key[count++] = '0' + last;
        }
      }
    }

    return( strncpy(result, key, sl) );
}

/* Unbuffered input routine */

static int getch(int fd) {
    char c;
    return (read(fd, &c, 1) == 1) ? (unsigned char) c : EOF;
}

static int eusage(void) {
    fprintf(stderr, "Usage: %s [-df] [-p] [-S[n]] [-I index] string table\n",\
		progname);
    return 2;
}

static int etoolong(void) {
    fprintf(stderr, "%s: max. search string width (%d) exceeded\n",\
	progname, MAXKEYLEN);
    return 2;
}

static int etoolong2(void) {
    fprintf(stderr, "%s: max. length of table path (%d) exceeded\n",\
	progname, _POSIX_PATH_MAX);
    return 2;
}

int main(int argc, char *argv[]) {

    int i=0, ifd, min_args=3, sl=4, ct=0;
    char c;
    char *ifile = 0;

    char tmpbuf[_POSIX_PATH_MAX*3+MAXKEYLEN] = "";	/* work buffer */

    FILE *ofile = NULL;

    flags.header  = 1;
    flags.fold    = 0;
    flags.partial = 0;
    flags.index   = 0;
    flags.test    = 0;
    flags.sndx    = 0;
    flags.debug	  = 0;

    if (getenv("NOSQL_DEBUG") != NULL) {
       if (!strncmp(getenv("NOSQL_DEBUG"),"1",1)) flags.debug = 1;
    }

    while (++i < argc && *argv[i] == '-') {

            min_args++;

            if (!strcmp(argv[i], "-N") ||
                     !strcmp(argv[i], "--no-header")) flags.header = 0;

            if (!strcmp(argv[i], "-t") || !strcmp(argv[i], "--test")) {
		     flags.test = 1;
		     flags.header = 0;
		     if ((ofile = freopen("/dev/null", "w", stdout)) < 0) {
			perror("/dev/null");
			exit(2);
		     }
	    }

            if (!strcmp(argv[i], "-p") ||
                     !strcmp(argv[i], "--partial")) flags.partial = 1;

            else if (!strcmp(argv[i], "-f") ||
                     !strcmp(argv[i], "--fold")) flags.fold = 1;

            else if (!strcmp(argv[i], "-x") ||
                     !strcmp(argv[i], "--debug")) flags.debug = 1;

            else if (!strcmp(argv[i], "-S") ||
                     !strcmp(argv[i], "--soundex")) flags.sndx = 1;

            else if (!strncmp(argv[i], "-S", 2)) {
		    flags.sndx = 1;
		    if (argv[i]+2) sl = atoi(argv[i]+2);
	    }
            else if (!strncmp(argv[i], "--soundex=", 10)) {
		    flags.sndx = 1;
		    if (argv[i]+10) sl = atoi(argv[i]+10);
	    }
            else if (!strcmp(argv[i], "-o") ||
                     !strcmp(argv[i], "--output")) {

              if (++i >= argc || *argv[i] == '-') exit(eusage());

              min_args++;

              if ((ofile = freopen(argv[i], "w", stdout)) < 0) {
                 perror(argv[i]);
                 exit(2);
	      }
            }

            else if (!strcmp(argv[i], "-I") ||
                     !strcmp(argv[i], "--index")) {

              if (++i >= argc || *argv[i] == '-') exit(eusage());

              min_args++;

	      flags.index = 1;

	      ifile = argv[i];
            }

            else if (!strcmp(argv[i], "-h") ||
                     !strcmp(argv[i], "--help")) {

                    execlp("grep","grep","-v","^#",HELPFILE,(char *) 0);
                    perror("grep");
                    exit(1);
            }

            else if (!strcmp(argv[i], "--show-copying")) {
                    execlp("cat","cat",COPYING,(char *) 0);
                    perror("cat");
                    exit(1);
            }

            else if (!strcmp(argv[i], "--show-warranty")) {
                    execlp("cat","cat",WARRANTY,(char *) 0);
                    perror("cat");
                    exit(1);
            }
    }

    if (argc < min_args) exit(eusage());

    /* Soundex ignores '-p' and '-f' */

    if (flags.sndx) flags.fold = flags.partial = 0;

    /* account for the append of a TAB, depending on flags.partial */
    if (strlen(argv[i]) >= (MAXKEYLEN-1)) exit(etoolong());

    if ((ifd = open(argv[i+1], O_RDONLY, 0)) < 0) {
	   fprintf(stderr, "%s: unable to open file %s\n",\
					progname, argv[i+1]);
       exit(2);
    }

    while ((c = getch(ifd)) != EOF) {
          if (flags.header) putchar(c);
          if (c == '\n') {
             fflush(NULL);   /* Make sure the header is output */
	     close(ifd);		/* close when no longer needed */
	     if (!flags.index) {
		strcpy(tmpbuf, "LC_ALL=POSIX");
		putenv(tmpbuf);

		strcpy(tmpbuf, argv[i]);

		if (!flags.partial) strcat(tmpbuf, "\t"); 

		if (flags.fold) {

		   if (flags.debug)
			fprintf(stderr,"look -f '-t' \t %s %s\n",\
						tmpbuf, argv[i+1]);

		   execlp("look", "look", "-f", "-t",\
				   "\t", tmpbuf, argv[i+1], (char *) 0);
		}
		else {
		   if (flags.debug)
			fprintf(stderr,"look -t '\t' %s %s\n",\
						tmpbuf, argv[i+1]);

		   execlp("look", "look", "-t", "\t",\
				   tmpbuf, argv[i+1], (char *) 0);
		}

                perror("look");
                exit(2);
             }
	     else {
		strcpy(tmpbuf, "export LC_ALL=POSIX;");

		/* make sure the shell cmd buffer will not overflow */
		if (strlen(ifile) > _POSIX_PATH_MAX ||
		     strlen(argv[i]) > _POSIX_PATH_MAX) exit(etoolong2());

		if (flags.fold) strcat(tmpbuf, "look -f -t '\t' ");
		else strcat(tmpbuf, "look -t '\t' ");

		/* append (possibly soundexed) search string */

		if (flags.sndx) strcat(tmpbuf, soundex(argv[i],sl,ct));
		else strcat(tmpbuf, argv[i]);

		if (!flags.partial) strcat(tmpbuf, "'\t' ");
		else strcat(tmpbuf, " ");

		strcat(tmpbuf, ifile);	/* append index table name */

		if (!flags.test) {
		   strcat(tmpbuf, " |seektable --no-header ");

		   strcat(tmpbuf, argv[i+1]);	/* append table name */
		}

		if (flags.debug) fprintf(stderr,"sh -c \"%s\"\n", tmpbuf);

		execlp("sh", "sh", "-c", tmpbuf, (char *) 0);

                perror(tmpbuf);
                exit(2);
             }
	  }
    }

    exit(0);					/* Not reached */
}

/* EOF */
