#!/usr/bin/perl
# *********************************************************************
#  Original code: search,v 2.11 1994/03/14 15:10:02 hobbs
#
#  Adapted to NoSQL by Carlo Strozzi <carlos@linux.it>.
#
#  search: fast-search of an indexed NoSQL table.
#  Copyright (c) 1998,1999,2000,2001,2002,2003 Carlo Strozzi
#
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 2 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with this program; if not, write to the Free Software
#  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.#
#
# *********************************************************************
#  $Id: search,v 1.4 2003/04/19 20:35:35 carlo Exp $

# Get local settings and set defaults.

$NOSQL_INSTALL = $ENV{'NOSQL_INSTALL'};
$NOSQL_INSTALL = "/usr/local/nosql" if not $NOSQL_INSTALL;

$0 =~ s-.*/-- ;
$ACLIM = 31 ;   # max nr of access attempts (safety valve).
while ( $ARGV[0] =~ /^-/ ){             # Get args
    $_ = shift ;
    if( /-i.*/ || /^--index$/ ){ $INDX++ ; $NHCM++ ; next ; }
    if( /-nc.*/ ){ $NHCM++ ; next ; }
    if( /-N.*/ || /^--no-header$/ ){ $NHDR++ ; $NHCM++ ; next ; }
    if( /-p.*/ || /^--partial$/ ){ $PART++ ; next ; }
    if( /-s.*/ || /^--first-match$/ ){ $SGL++ ; next ; }
    if( /-v.*/ || /^--reverse$/ ){ $REVO++ ; next ; }
    if( /-t.*/ || /^--test$/ ){ $VOM++ ; next ; }
    if( /-x.*/ || /^--debug$/ ){ $XBUG++ ; next ; }
    if( /-h.*/ || /^--help$/ ){
	$HelpInfo = `grep -v '^#' $NOSQL_INSTALL/help/search.txt`;
	print $HelpInfo ;
	exit 1;
    }
    die "\n$0: unknown option: $_\n" ; 
}
die "\n$0: no file name given.\n", "For help type \"$0 --help\".\n"
    unless @ARGV ;
$intbl = shift ;
if( $INDX ){
    if( @ARGV ){
    $mtbl = shift ; }
    else{
    ($base = $intbl) =~ s/\.x\..*$// ;
    # ($base = $intbl) =~ s/\.rdb\.x\..*$// ;
    # $mtbl = "$base.rdb" ; }
    $mtbl = "$base" ; }
    open( MT, $mtbl ) || die "$0: can't open input: $mtbl\n" ;
    while( <MT> ){
    print unless $NHDR || $VOM ;
    #next if /^\s*#/ ;   # comment line
    #next unless $second++ ;	# Column defs.
    last ; }
}
open( RR, $intbl ) || die "$0: can't open input: $intbl\n" ;
while( <RR> ){                      # read rdbtbl header
    #if( /^\s*#/ ){      # comment 
    #print unless $NHCM || $VOM ;
    #next ; }
    print unless $NHDR || $VOM || $INDX ;
    chop ;
    if( ++$lln == 1 ){
    $_ =~ s/\001//g;		# remove SOH markers.
    @CN = split( /\t/, $_ );# col names
    #next ; }
    #@CD = split( /\t/, $_ );    # col definitions
    #for (@CD){
    #s/^\s*\S+/$&/ ;
    #($_) = /(\S+)/ ; }  # keep only 1st word
    last ; } };
$lowz = tell ;          # curr position is starting low position
$hiz = (stat( $intbl ))[7] ;    # end of file is starting hi position
while( <STDIN> ){                   # read keytbl header
    #next if /^\s*#/ ;       # comment 
    if( ++$kln == 1 ){      # column names
    chop ;
    $_ =~ s/\001//g;		# remove SOH markers.
    @K = split( /\t/, $_ );
    for (@K){
        for( $k=$i=0 ; $i < @CN ; $i++ ){
        if( $_ eq $CN[$i] ){
            $k++ ;
            push( @KEY, $i ) ;  # keys for tbl rows
            #$x = ($CD[$i] =~ /N/i ? 1 : 0 ) ;
            #push( @numcmp, $x ) ;
            #warn "$x .. $CD[$i],\n" if $XBUG ;
            last ; }
        }
        die "$0: keytbl name no match: $_\n" unless $k ;
    }
    #next ; }			    # skip column definitions
    last ; } };
while( <STDIN> ){                   # read keytbl data
    $arg = $_ ;
    chop ;
    @kt = split( /\t/, $_ );
    @spos = () ;    # sort pos in main tbl
    &do_bin ;
    if( $INDX ){
    $x = @spos ;
    warn "Nr Hits: $x\n" if ($x && $XBUG) ;
    for (@spos){
        seek( MT, $_, 0 ) ;
        $_ = <MT> ;
        print unless $VOM ; }
    }
}
exit $errval ;

sub do_bin {                    # do the binary search
    $low = $lowz ;
    $hi = $hiz ;
    $uplim = $cnt = $pmid = $ppmid = $close = $multimode = 0 ;
    while( 1 ){
    $mid = ($hi + $low) / 2.0 ; # next search point
    $cnt++ ;
    seek( RR, $mid, 0 ) ;
    <RR> ;      # get to end of line
    $amid = tell ;  # actual read point of next row
    if( $amid == $pmid || $amid == $ppmid ){
        &do_close ;
        last ; }
    $ppmid = $pmid ;    # prior previous point
    $pmid  = $amid ;    # previous point
    if( $amid >= $hiz ){    # high end of tbl
        &do_close ;     # special case
        last ; }
    $_ = <RR> ; # read complete row
    chop ;
    @a = split( /\t/, $_ );
    &x_info if $XBUG ;
    $phi = $hi ;        # previous hi
    if( $multimode ){   # in multi arg search process
        if( &cmp_key == 0 ){
        $hi = $mid ; }
        else{
        $low = $mid ; }
    }
    else{           # no match yet
        if( ($cv = &cmp_key) == 0 ){
        if( $SGL ){ # single row key match request
            if( $INDX ){
            push( @spos, $a[$#a] ) ; }
            else{
            unless( $VOM ){
                print $_, "\n" ; } }
            #else{
            #    warn "ok\n" ; } }
            last ; }
        else{       # multi row key match request
            $multimode++ ;
            $uplim = $amid ;
            $hi = $mid ; } }
        else{
        if( $cv < 0 ){
            $hi = $mid ; }
        else{
            $low = $mid ; } }
    }
    if( $cnt >= $ACLIM ){   # safety valve, if tbl not sorted, or ...
        warn "Access limit: $arg\n" ;
        return ; }
    }
}
sub do_close {      # find all match rows, in order, starting at $low
    local( $hit ) = 0 ;
    warn "CLOSE...\t($amid)\n" if $XBUG ;
    $uplim = $phi unless $uplim ;
    if( ( $low - $lowz) < ($hi - $low) ){ # close to init low point
    $low = $amid = $lowz ; }
    seek( RR, $low, 0 ) ;
    <RR> unless $low == $lowz ; # special: the first row
    $amid = tell ;
    while( <RR> ){
    last if $amid > $uplim ;# upper limit of search (for 1st match)
    chop ;
    @a = split( /\t/, $_ );
    &x_info if $XBUG ;
    unless( &cmp_key == 0 ){
        $amid = tell ;
        next ; }
    $hit++ ;
    if( $INDX ){
        push( @spos, $a[$#a] ) ; }
    else{
        unless( $VOM ){
        print $_, "\n" ; } }
        #else{
        #warn "ok\n" ; } }
    $amid = tell ;
    last if $SGL || $VOM ;  # stop if only single match wanted or VOM
    while( <RR> ){
        chop ;
        @a = split( /\t/, $_ );
        return unless &cmp_key == 0 ;
        &x_info if $XBUG ;
        if( $INDX ){
        push( @spos, $a[$#a] ) ; }
        else{
        print $_, "\n" ; }
        $amid = tell ; }
    }
    &no_find unless $hit ;
}
sub cmp_key {   # cmp key cols in @kt, @a. Return -1, 0, 1 as appropriate.
    local( $less, $greater ) = ( -1, 1 ) ;
    if( $REVO ){ $less = 1, $greater = -1 ; }   # if reverse sort order
    for( $i=0 ; $i < @KEY ; $i++ ){
    $k = $KEY[$i] ;
    #if( $numcmp[$i] ){      # numeric comparsion
    #    if( $kt[$i] < $a[$k] ){
    #    return $less ; }
    #    if( $kt[$i] > $a[$k] ){
    #    return $greater ; }
    #}
    #else{               # string comparsion
        print STDERR "\t($a[$k])  " if $XBUG ;
        next if $PART && (substr($a[$k],0,length($kt[$i])) eq $kt[$i]);
        if( $kt[$i] lt $a[$k] ){
        warn "<<<<<\n" if $XBUG ;
        return $less ; }
        if( $kt[$i] gt $a[$k] ){
        warn ">>>>>\n" if $XBUG ;
        return $greater ; }
    #}
    }
    warn "MATCH\n" if $XBUG ;
    0 ;
}
sub x_info {                    # print debug info to STDERR
    printf STDERR "%2d %8.1f %8.1f (%8.1f) %8.1f %s\n",
    $cnt, $low, $mid, $amid, $hi, $a[$k] ;
}
sub no_find {
    $errval++ ;
    warn "Not found: $arg" if $XBUG ;
    #warn "\n" unless $VOM ;
}

# End of program.
