#!/usr/local/bin/perl -w 

# Copyright (c) 2001 Carnegie Mellon University
# All rights reserved.
# See Copyright section at end of file use and distribution information.
#
# 
# usage: hostpairs [-h1 hosts][-p1 ports][-h2 hosts][-p2 ports][...] files
#
# hostpairs does data aggregation and summarization on Argus data files.  
# To work, it requires the following:
#    1) The module Net::Argus::HostPairs must be installed.  This module
#       requires the Net::Patricia module (available from www.cpan.org),
#       along with the Socket, POSIX, Getopts::Long and Text::Wrap modules 
#       (which should be part of the regular Perl distribution).
#
#    2) hostpairs needs to be able to find the ra program.  This can be
#       done either by setting the path explicitly or putting it in a
#       known path.  See below for instructions/details.
#
#    3) hostpairs needs an rarc file that:
#        1) Uses ',' (comma) for field separators in ra output
#        2) Prints transaction start and end times
#        3) Prints timestamps in Unix (absolute) time with usec precision
#        4) Prints packet and byte counts
#        5) Does not resolve hostnames.
#        6) Does not print indicators
#       So, ra output should be:
# stime,etime,proto,srcip,srcp,dir,dstip,dstp,srcpkt,dstpkt,srcbyte,dstbyte,sta
#       See instructions below for more details on setting the location of
#       this file.
#
# 'hostpairs -help' will print out help information.
# 'hostpairs -help < hosts|ports|modes|sortfields|outputfields|forms >'
# will print out detailed help.
# The POD at the end of this file has more information.
#
#########################################################################
use strict;
#########################################################################
#  Specifying locations of the ra program and rarc file:
#
# You can specify them directly on the command line each time you run
# hostpairs (yuck). This  will over-ride any other setting.
# OR
# Specify them directly by setting the following strings. If set, hostpairs
# will not use environment variables to look for these files.


my $ra = "";
my $rarc = "";


# OR have hostpairs search for the given basenames in the given paths.
# These can be set to something more appropriate for your environment.
# Any path component beginning with a '$' will be considered an environment
# variable.

my $ra_basename = "ra";
my $rarc_basename = "rarc.hostpairs";

my @ra_paths = qw($ARGUSHOME/bin $HOME/bin);
my @rarc_paths = qw($ARGUSHOME $HOME);

# hostpairs will look for formfiles here if not specified on the command
# line or by  $HOSTPAIRS_FORMFILE :

my $formfile_basename = "hostpairs.forms";
my @formfile_paths = qw($HOME $ARGUSHOME);

#########################################################################

#########################################################################
# Some program defaults are included below.  These can be changed if you
# don't like them.  All of the defaults below can be (and usually are) 
# specified on the command line.

# default formats to use for generating format strings in cases where a
# format string is not explicitly set

my %default_formats = (h1_ip => '%-15.15s', h2_ip => '%-15.15s',
                   h1_bytes => '%10s', h2_bytes => '%10s',
                   h1_pkts =>  '%8s', h2_pkts => '%8s', records => '%7s',
                   h1_name => '%-12.12s', h2_name => '%-12.12s', 
                   h1_port => '%5.5s',h2_port => '%5.5s', h1_pc => '%-5.5s',
                   h2_pc => '%-5.5s',proto => '%-5.5s', h1_portlist => '%-s',
                   h2_portlist => '%-s', total_bytes => '%11s',
                   total_pkts => '%9s', etime => '%-8.8s', stime => '%-8.8s',
                   num_peers => '%5s');

# default output fields for various modes 

my %default_outputs = (summary => 
                    'h1_ip,h1_name,h1_pkts,h2_pkts,h1_bytes,h2_bytes,records',
                hostpair =>
                   'h1_ip,h2_ip,h1_pkts,h2_pkts,h1_bytes,h2_bytes,records',
                portpair =>
     'stime,etime,h1_port,h2_port,proto,h1_pkts,h2_pkts,h1_bytes,h2_bytes');

# default things to sort by if not specified.

my %default_sortbys = (summary => 'h1_bytes',
                       hostpair => 'h1_bytes',
                       portpair => 'stime,h1_port');


# default sorting orders.  My preference is to reverse the "normal" 
# (smallest to largest) sort order for counters and totals so that the
# largest values get printed out first, and to use the normal order for
# other sortable fields (hostnames, IP addresses, etc). This may not be
# what is usually expected.

my %default_summary_sort_reverse = (h1_pkts => 1, h2_pkts => 1, h1_bytes => 1,
    h2_bytes => 1, total_bytes => 1, total_pkts => 1, num_peers => 1, 
    records => 1);
            
my %default_hostpair_sort_reverse = (h1_pkts => 1, h2_pkts => 1, h1_bytes => 1,
   h2_bytes => 1, total_bytes => 1, total_pkts => 1, h1_pc => 1, h2_pc => 1,
   records => 1);
                    
my %default_portpair_sort_reverse = (h1_pkts => 1, h2_pkts => 1, h1_bytes => 1,
   h2_bytes => 1, total_bytes => 1, total_pkts => 1, num_peers => 1, 
   records => 1);
                            
# various other defaults:

my $default_time_format = '%T';
my $default_print_portnames = 0;
my $default_print_header = 1;

#
# End of default settings. 
# Warranty is voided if changes are made past this point.
#########################################################################



use Getopt::Long;
use Net::Argus::HostPairs;

my $Debug = 0;

my %common_sortfields = (h1_pkts => 1, h2_pkts => 1, h1_bytes => 1,
                         h2_bytes => 1, total_bytes => 1, total_pkts => 1,
                         records => 1);

my %sortfields = (summary => {
                    %common_sortfields,
                    num_peers => 1, h1_name => 1, h1_ip => 1 },
                  hostpair => {
                    %common_sortfields, h1_name => 1, h2_name => 1,
                    h1_ip => 1, h2_ip => 1,
                    h1_pc => 1, h2_pc => 1 },
                  portpair => {
                    %common_sortfields,
                    stime => 1, etime => 1, proto => 1,
                    h1_port => 1, h2_port => 1 });

my %outputfields = (summary => {
                      %{$sortfields{'summary'}} },
                    hostpair => {
                      %{$sortfields{'hostpair'}},
                      h1_portlist => 1, h2_portlist => 1 },
                    portpair => {
                      %{$sortfields{'portpair'}} });

my %form_options = (h1 => 1, h2 => 1, p1 => 1, p2 => 1, filter => 1,
                    extra_ra_args => 1, h1file => 1, h2file => 1,
                    p1file => 1, h2file => 1, p2file => 1, filterfile => 1,
                    A => 0, ra => 1, rarc => 1, noheader => 0, 
                    timeformat => 1, summary => 0, hostpair => 0,
                    portpair => 0, summary_output => 1, hostpair_output => 1,
                    portpair_output => 1, summary_sortby => 1, 
                    portpair_sortby => 1, hostpair_sortby => 1,
                    pn => 0);

my @options = qw(h1=s h2=s p1=s p2=s filter|f|fi|fil|filt=s 
                 extra_ra_args=s  h1file=s  h2file=s  p1file=s 
                 p2file=s  filterfile=s  A  ra=s  rarc=s  noheader 
                 timeformat=s  summary|s  hostpair|hp|h  portpair|pp|p 
                 summary_output|s_output=s  hostpair_output|h_output=s 
                 portpair_output|p_output=s summary_sortby|s_sortby=s 
                 hostpair_sortby|h_sortby=s  portpair_sortby|p_sortby=s 
                 summary_format|s_format=s  hostpair_format|h_format=s 
                 ff=s  form=s  portpair_format|p_format=s w=s pn
                 help:s);


my %cmd_opts;

GetOptions(\%cmd_opts, @options);

help($cmd_opts{'help'}) if exists $cmd_opts{'help'};

my %opts;
my %file_options;


if (($cmd_opts{'ff'}) && (! $cmd_opts{'form'})) {
    print STDERR "Warning: formfile option ignored since form not specified\n";
}

if ($cmd_opts{'form'}) {
    my $formfile;
    my $form = $cmd_opts{'form'};
    
    if ($cmd_opts{'ff'}) {
        $formfile = $cmd_opts{'ff'};
    } elsif ($ENV{HOSTPAIRS_FORMFILE}) {
        $formfile = $ENV{HOSTPAIRS_FORMFILE};
    } elsif ($formfile = find_file($formfile_basename,@formfile_paths)) {
    } else {
        print STDERR "Unable to find formfile\n";
        exit 1;
    }
    my @file_opts = read_formfile($formfile, $form);

    if (@file_opts) {
        local @ARGV = @file_opts;
        GetOptions(\%file_options, @options);
        
       # make sure that command line mode option over-rides file mode option:
        
        if ($cmd_opts{'summary'} || $cmd_opts{'hostpair'} ||
            $cmd_opts{'portpair'}) {
            delete $file_options{'summary'};
            delete $file_options{'hostpair'};
            delete $file_options{'portpair'};
        }
    }
    else {
        print STDERR "Form $form not present in $formfile\n";
        exit 1;
    }        
}


%opts = (%file_options,%cmd_opts);

my $hp = Net::Argus::HostPairs->new();

if (($opts{'portpair'} && $opts{'hostpair'}) ||
    ($opts{'summary'} && $opts{'hostpair'}) ||
    ($opts{'summary'} && $opts{'portpair'})) {
    print STDERR "Ambiguous mode: Can only specify one of -s -h or -p\n";
    exit 1;
}    

if ($opts{'summary'}) {
    $hp->mode('summary');
} elsif ($opts{'hostpair'}) {
    $hp->mode('hostpair');
} elsif ($opts{'portpair'}) {
    $hp->mode('portpair');
} else {
    print STDERR "Must specify a mode\n";
    exit 1;
}
my $mode = $hp->mode();

if ($opts{'ra'}) {
    $hp->ra_prog($opts{'ra'});
} elsif ($ra) {
    $hp->ra_prog($ra);
} else {
    my $raprog = find_file($ra_basename, @ra_paths);
    if (-x $raprog && -f _) {
        $hp->ra_prog($raprog);
    } else {
        print STDERR 
         "Need to specify location of ra program. See source for details.\n";
        exit 1;
    }
}

if ($opts{'rarc'}) {
    $hp->rarc($opts{'rarc'});
} elsif ($rarc) {
    $hp->rarc($rarc);
} else {
    my $rarc = find_file($rarc_basename, @rarc_paths);
    if (-r $rarc && -f _) {
        $hp->rarc($rarc);
    } else {
        print STDERR 
         "Need to specify location of rarc file. See source for details.\n";
        exit 1;
    }
} 



if (@ARGV) {
    $hp->argus_files(@ARGV);
} elsif ($ENV{'ARGUSFILES'}) {
    $hp->argus_files(split(' ', $ENV{'ARGUSFILES'}));
} else {
    print "# No files to process. Exiting...\n";
    exit 0;
}

if ($mode eq 'summary') {
    
    my $s_out;
    
    if ($opts{'summary_output'}) {
        $s_out = $opts{'summary_output'};
    } else {  
        $s_out = $default_outputs{'summary'};
    }
    $hp->summary_output(verify_fields('summary','output',$s_out));

    foreach my $k (keys %default_summary_sort_reverse) {
        $hp->sort_order('summary',$k,'r');
    }
    
    my $s_sortby;
    
    if ($opts{'summary_sortby'}) {
        $s_sortby = sort_orders('summary', $opts{'summary_sortby'});
    } else {  
        $s_sortby = sort_orders('summary',$default_sortbys{'summary'});
    }
    $hp->summary_sortby(verify_fields('summary','sort',$s_sortby));

    my $s_format;

    if ($opts{'summary_format'}) {
        $s_format = $opts{'summary_format'};
    } else {  
        $s_format = make_format_string($hp->summary_output());
    }
    $hp->summary_format($s_format);
    
}

if (($mode eq 'hostpair') || ($mode eq 'portpair')) {

    my $h_out;
    
    if ($opts{'hostpair_output'}) {
        $h_out = $opts{'hostpair_output'};
    } else {  
        $h_out = $default_outputs{'hostpair'};
    }
    $hp->hostpair_output(verify_fields('hostpair','output',$h_out));

    foreach my $k (keys %default_hostpair_sort_reverse) {
        $hp->sort_order('hostpair' ,$k,'r');
    }
    
    my $h_sortby;

    if ($opts{'hostpair_sortby'}) {
        $h_sortby = sort_orders('hostpair', $opts{'hostpair_sortby'});
    } else {  
        $h_sortby = sort_orders('hostpair',$default_sortbys{'hostpair'});
    }
    $hp->hostpair_sortby(verify_fields('hostpair','sort',$h_sortby));

    my $h_format;
    
    if ($opts{'hostpair_format'}) {
        $h_format = $opts{'hostpair_format'};
    } else {  
        $h_format = make_format_string($hp->hostpair_output());
    }
    $hp->hostpair_format($h_format);

    my $p_out;
    
    if ($opts{'portpair_output'}) {
        $p_out = $opts{'portpair_output'};
    } else {  
        $p_out = $default_outputs{'portpair'};
    }
    $hp->portpair_output(verify_fields('portpair','output',$p_out));

    foreach my $k (keys %default_portpair_sort_reverse) {
        $hp->sort_order('portpair',$k,'r');
    }
    
    my $p_sortby;
    
    if ($opts{'portpair_sortby'}) {
        $p_sortby = sort_orders('portpair', $opts{'portpair_sortby'});
    } else {  
        $p_sortby = sort_orders('portpair',$default_sortbys{'portpair'});
    }
    $hp->portpair_sortby(verify_fields('portpair','sort',$p_sortby));

    my $p_format;
    
    if ($opts{'portpair_format'}) {
        $p_format = $opts{'portpair_format'};
    } else {
        $p_format = ' ' . make_format_string($hp->portpair_output());
    }
    $hp->portpair_format($p_format);

}

if ($opts{'noheader'}) {
    $hp->print_header(0);
} else {
    $hp->print_header($default_print_header);
}
  
if ($opts{'pn'}) {
    $hp->print_port_names(1);
} else {
    $hp->print_port_names($default_print_portnames);
}
  
if ($opts{'timeformat'}) {
    $hp->time_format($opts{'timeformat'});
} else {
    $hp->time_format($default_time_format);
}


my @extra_ra_arglist = ();
push(@extra_ra_arglist, '-A') if ($opts{'A'});
push(@extra_ra_arglist, split(' ', $opts{'extra_ra_args'}))
                            if ($opts{'extra_ra_args'});
                            
$hp->ra_extra_args(@extra_ra_arglist) if (@extra_ra_arglist);

if (($opts{'h1file'} && $opts{'h1'}) || ($opts{'h2file'} && $opts{'h2'}) ||
    ($opts{'p1file'} && $opts{'p1'}) || ($opts{'p2file'} && $opts{'p2'}) ||
    ($opts{'filterfile'} && $opts{'filter'})) {
    print STDERR
    "Specify file or commandline hosts/ports/filter, not both at once\n";
    exit 1;
}

$hp->hostset1($opts{'h1'}) if ($opts{'h1'});
$hp->hostset2($opts{'h2'}) if ($opts{'h2'});
$hp->portset1($opts{'p1'}) if exists($opts{'p1'});
$hp->portset2($opts{'p2'}) if exists($opts{'p2'});

$hp->h1_file($opts{'h1file'}) if ($opts{'h1file'});
$hp->h2_file($opts{'h2file'}) if ($opts{'h2file'});
$hp->p1_file($opts{'p1file'}) if ($opts{'p1file'});
$hp->p2_file($opts{'p2file'}) if ($opts{'p2file'});

$hp->filter($opts{'filter'}) if ($opts{'filter'});
$hp->filter_file($opts{'filterfile'}) if ($opts{'filterfile'});

my $outfile = $opts{'w'};

if ($outfile) {
    if (-e $outfile) {
        print STDERR "Will not write to existing file: $outfile\n";
        exit 1;
    } else {
    open(STDOUT, ">$outfile") or die "Cannot open $outfile\n";
    }
}

print_object($hp) if ($Debug);

$hp->accumulate();


$hp->printout();

close STDOUT if ($outfile);   


# take care of +/- sort order specifiers.  return sortby string with
# specifiers stripped out.

sub sort_orders {
    my $mode = shift;
    my $sortby = shift;
    return "" unless $sortby && $mode;
    my @fields = split /,/, $sortby;
    foreach my $f (@fields) {
        if ($f =~ m/^\+(\w+)$/) {
            $hp->sort_order($mode,$1,'n');
        } elsif ($f =~ m/^\-(\w+)$/) {
            $hp->sort_order($mode,$f,$1,'r');
        }
    }
    map { s/^[+-]// } @fields;
    return join(',', @fields);
}

# check to make sure that valid sort/outputfields are given.
# mode = summary|hostpair|portpair
# type = sort|output
# fields = field specifier after any sort order information is stripped out


sub verify_fields {
    my $mode = shift;
    my $type = shift;
    my $fields = shift;
    my @bad_fields = ();
    my $typehash = ($type eq 'sort') ? \%sortfields : \%outputfields;
    return "" unless $fields;
    foreach my $f (split /,/, $fields) {
            push @bad_fields, $f unless ($typehash->{$mode}->{$f});
    }
    if (@bad_fields) {
       print STDERR "$type field(s): @bad_fields  not valid for $mode data.\n";
        exit 1;
    } else {
        return $fields;
    }
}



# Given a field specifier, make up a plausible format string for it.

sub make_format_string {
    my $fields = shift;
    my @forms = ();
    return "" unless $fields;
    foreach my $f (split /,/, $fields) {
        push @forms, $default_formats{$f};
    }
    return join(' ', @forms) . '\n';
}

# given a basename and a "path" of form ($FOO/bin $FOOB) etc, looks for
# basename under that path.  Returns full path to first one found, or
# false value if not found.  

sub find_file {
    my $basename = shift;
    my @paths = @_;
    if (substr($basename,0,1) eq '/') {
        return (-e $basename) ? $basename : 0;
    }
    return 0 unless (@paths);
PATH: foreach my $p (@paths) {
        my @components = split /\//, $p;
        my @result = ();
        foreach my $comp (@components) {
            if (substr($comp,0,1) eq '$') {
                $comp = substr $comp,1;
                next PATH unless $ENV{$comp};
                push @result, $ENV{$comp};
            } else {
                push @result, $comp;
            }
        }
        my $fullpath = join('/', @result, $basename);
        return $fullpath if (-e $fullpath);
    }
    return 0;
}    

sub read_formfile {
    my $file = shift;
    my $form = shift;
    unless ((-f $file) && (-r $file)) {
        print STDERR "Cannot access form file: $file\n";
        exit 1;
    }

    my @args = ();
    my $OUT_OF_BLOCK = 1;
    my $WAITING_BLOCK = 2;
    my $IN_BLOCK = 3;
    my $linenumber = 0;
    my $state = $OUT_OF_BLOCK;
    my $blockname;
    local *FORMFILE;
    local $_;
    open(FORMFILE, "$file") or die "Cannot open $file\n";

    while (<FORMFILE>) {
        chomp;
        $linenumber++;
        
        # next if /whitespace #anything/ or /whitespace/ :
        next if ((/^(\s*\#.*$)/) || (/^\s*$/));
        
        # remove comments in non-whitespace, non-^# lines :
        s/\#.*//;
        if ((m/^\s*([\w\-]+)\s*$/) && ($state == $OUT_OF_BLOCK)) {
            $state = $WAITING_BLOCK;    # seen blockname but not bracket.
            $blockname = $1;
            next;
        }

        if ((m/^\s*([\w\-]+)\s*\{\s*$/) && ($state == $OUT_OF_BLOCK)) {
            $state = $IN_BLOCK;
            $blockname = $1;
            next;
        }

        if ((m/^\s*\{\s*$/) && ($state == $WAITING_BLOCK)) {
            $state = $IN_BLOCK;
            next;
        }

        if ((m/^\s*(-\w+)\s*(.*?)\s*$/) && ($state == $IN_BLOCK) &&
            ($blockname eq $form)) {

            my $opt = $1;
            my $val = $2;
            $val =~ s/^[\'\"]//;
            $val =~ s/[\'\"]$//;
            push @args, $opt;
            push(@args,$val) if ($val);
            next;
        }    

        if ((m/^\s*\}\s*$/) && ($state == $IN_BLOCK)) {
            $state = $OUT_OF_BLOCK;
            next;
        }
        
        if ($state == $IN_BLOCK) {
            next;
        }
        
        # If we got here, something went wrong.
        
        close(FORMFILE);
        print STDERR "At line $linenumber in $file, parse error\n";
        exit 1;

    }        
        # out of loop. clean up and return hash
    close(FORMFILE);
    return @args;

}

# for debugging
sub print_object {
    my $ob = shift;
    my @fields = keys %$ob;
    foreach my $k (@fields) {
        print "$k = $$ob{$k}\n";
    }
}



sub help {
    my $arg = shift;
    
    if (! $arg) {
        usage();
        exit 0;
    }
    
    if ($arg =~ m/^host/) {
        print <<'_END_';
        
    A "hostset" is a comma-separated list of IP addresses, CIDR-form
    netmasks, or hostnames.  Hostnames must be resolvable. For example:
        128.2.11.43,www.cmu.edu,128.2.0.0/16
        
    The 'h1file' and 'h2file' options will read a single hostset from a 
    file, combining multiple lines.  Each line in the file must be a valid 
    hostset specification.  Blank lines and any characters after a '#' will
    be ignored.
    
    To specify a particular protocol, use the -f option to specify a filter
    that will be passed to ra.
            
            



_END_
    exit 0;
    }        
    if ($arg =~ m/port/) {
        print <<'_END_';
        
    A "portset" is a comma-separated list of port numbers, ranges of port 
    numbers, or TCP/UDP service names. For example:
        80,https,8000-8100
        
    The 'p1file' and 'p2file' options will read a single portset from a 
    file, combining multiple lines.  Each line in the file must be a valid
    portset specification.  Blank lines and any characters after a '#' will
    be ignored.
    
    To specify a particular protocol, use the -f option to specify a filter
    that will be passed to ra.

        
_END_
        exit 0;
    }
    if ($arg =~ m/mod/) {
        print <<'_END_';
        
    summary mode:  Prints one line of output per h1 host, aggregating h2 data.
    
    hostpair mode: Prints one line of output per each distinct pair of hosts 
                   that appear in a single Argus transaction record.
                   
    portpair mode: Prints the line of hostpair mode output, plus one line per
                   distinct pair of ports associated with that hostpair.
                   
    Each mode has an associated set of output, sortby and format specifiers
    
        s_output h_output p_output :  determines the fields that are output
                                      in summary, hostpair, and portpair
                                      modes, respectively.
        
        s_sortby h_sortby p_sortby :  determines how the output is sorted
        
        s_format h_format p_format :  printf-compatible strings describing
                                      how the output fields are printed
                                      
    Since portpair mode prints out hostpair mode data as well, both
    hostpair and portpair can be used to specify output in portpair mode.
                                      
_END_
    exit 0;
    }
    if ($arg =~ m/^sort/) {
        print <<'_END_';
        
    A sort fields specifier is a comma-separated list of fields. Valid sort
    fields depend on mode:
    
    Sort fields valid in all modes: 
       h1_pkts         - Packets transmitted by h1 host
       h2_pkts         - Packets transmitted by h2 host
       h1_bytes        - Bytes transmitted by h1 host
       h2_bytes        - Bytes transmitted by h2 host
       records         - Number of matching Argus records encountered
       total_pkts      - h1_pkts + h2_pkts
       total_bytes     - h1_bytes + h2_bytes

    Additional valid sort fields in summary mode:
       h1_ip           - IP address of h1 host
       h1_name         - Name of h1 host (will be '???' if not resolvable)
       num_peers       - Number of distinct h2 peers per h1 host
       
    Additional valid sort fields in hostpair mode:
       h1_ip           - IP address of h1 host
       h2_ip           - IP address of h2 host
       h1_name         - Name of h1 host (will be '???' if not resolvable)
       h2_name         - Name of h2 host (will be '???' if not resolvable)
       h1_pc           - Number of distinct h1 ports involved in transactions 
                         with the h2 host
       h2_pc           - Number of distinct h2 ports involved in transactions
                         with the h1 host
                         
    Additional valid sort fields in portpair mode (hostpair data will be
    sorted by hostpair sort fields):
       stime           - Starting time of first matching Argus record
       etime           - Ending time of last matching Argus record
       h1_port         - Port number of h1 host
       h2_port         - Port number of h2 host
       proto           - Protocol of transaction
       
    By default, all counters (packets, bytes, etc) are sorted in reverse
    order (largest to smallest). Other fields are sorted in "normal" order.
    Prefixing a field with a '+' or '-' will force normal or reverse sort
    orders, respectively.
_END_
    exit 0;
    }    
    
    if ($arg =~ m/^output/) {
        print <<'_END_';
        
    An output fields specifier is a comma-separated list of fields. Valid
    output fields depend on mode (a '*' before the field name means that it
    cannot be sorted on):
    
    Output fields valid in all modes: 
       h1_pkts         - Packets transmitted by h1 host
       h2_pkts         - Packets transmitted by h2 host
       h1_bytes        - Bytes transmitted by h1 host
       h2_bytes        - Bytes transmitted by h2 host
       records         - Number of matching Argus records encountered
       total_pkts      - h1_pkts + h2_pkts
       total_bytes     - h1_bytes + h2_bytes

    Additional valid output fields in summary mode:
       h1_ip           - IP address of h1 host
       h1_name         - Name of h1 host (will be '???' if not resolvable)
       num_peers       - Number of distinct h2 peers per h1 host
       
    Additional valid output fields in hostpair mode:
       h1_ip           - IP address of h1 host
       h2_ip           - IP address of h2 host
       h1_name         - Name of h1 host (will be '???' if not resolvable)
       h2_name         - Name of h2 host (will be '???' if not resolvable)
*      h1_portlist     - List of distinct h1 ports involved in transactions
                         with the h2 host (may be very long)
*      h2_portlist     - List of distinct h2 ports involved in transactions
                         with the h1 host (may be very long)
       h1_pc           - Number of distinct ports in h1_portlist.
       h2_pc           - Number of distinct ports in h2_portlist.
                         
    Additional valid output fields in portpair mode:
       stime           - Starting time of first matching Argus record
       etime           - Ending time of last matching Argus record
       h1_port         - Port number of h1 host
       h2_port         - Port number of h2 host
       proto           - Protocol of transaction
       
_END_
    exit 0;
    }
    
    if ($arg =~ m/^form/) {
    print <<'_END_';
    
    To save typing, arguments can be read from a "formfile".  A formfile 
    consists of labeled blocks, where the lines in each block consist of
    options to hostpairs.  Arguments specified on a command line will 
    over-ride, and be merged in with options in a formfile. For example, 
    one might have a file called 'hostpairs.forms' with entries:
    
    ftp_downloads {
    -h1 ftp.mydomain.com
    -f  'host ftp.mydomain.com'        
    -p1 20,21
    -h         
    -h_output h2_ip,h2_name,h1_pkts,h1_bytes
    -h_sortby h1_bytes,h2_ip
    -h_format '%-15.15s %-30.30s %12.12s %12.12s\n'
    }
    peer_count {
    -s
    -h_output h1_ip,h1_name,num_peers
    -s_sortby num_peers
    -s_format '%-15.15s %-30.30s %12.12s'
    }
    
    One could then invoke hostpairs as:
      hostpairs -ff hostpairs.forms  -fo ftp_downloads
    or
      hostpairs -h1 123.45.67.0/24 -ff hostpairs.forms -fo peercount
    to use those forms.
    
    Form labels must consist of letters, digits, and the characters '-' and 
    '_'.  Blank lines and anything after a '#' are ignored. One may set the
    $HOSTPAIRS_FORMFILE environment variable to point to a formfile, and 
    $HOME and $ARGUSHOME will be searched for a 'hostpairs.form' file if
    $HOSTPAIRS_FORMFILE is not set.

_END_
        exit 0;
    }
    
    usage();
    exit 0;

}

sub usage {
    print <<'_END_';

Usage: hostpairs [options] [argus files]
       

Options (may be abbreviated to uniqueness):

  -h1 <hosts>               Hosts and/or netmasks for hostset1
  -h2 <hosts>               Hosts and/or netmasks for hostset2
  -p1 <ports>               Ports for portset1
  -p2 <ports>               Ports for portset2
  -filter 'filter'          Filter to pass to ra. Can be abbreviated as -f.
  -ff <formfile>            Read from formfile
  -form <label>             Set option vals using form <label> in formfile
  -s, -h, -p                Set mode: s(ummary) or  h(ostpair) or p(ortpair)
  -s_output <outputfields>  Set summary output fields
  -s_sortby <sortfields>    Set summary mode sort fields
  -s_format <format>        Set Perl printf-compatible summary format string  
  -h_output <outputfields>  Set hostpair output fields
  -h_sortby <sortfields>    Set hostpair mode sort fields
  -h_format <format>        Set Perl printf-compatible summary format string  
  -p_output <outputfields>  Set portpair output fields
  -p_sortby <sortfields>    Set portpair mode sort fields
  -p_format <format>        Set Perl printf-compatible summary format string
  -timeformat <format>      Set strftime-compatible time format string
  -h1file <hostfile>        Read hostset1 from <file>
  -h2file <hostfile>        Read hostset2 from <file>
  -p1file <portfile>        Read portset1 from <file>
  -p2file <portfile>        Read portset2 from <file>
  -filterfile <filterfile>  Read filter from <file>
  -noheader                 Turn off printing of header summary information
  -A                        Use application byte counts
  -pn                       Print port names instead of numbers
  -extra_ra_args 'args'     Pass extra arguments to ra
  -ra <ra_binary>           Path to ra binary 
  -rarc <rarc_file>         Path to rarc to use
  -w  <file>                Write output to file (file must not already exist)
  -help                     Print usage information
  
  -help < hosts | ports | modes | sortfields | outputfields | forms > 
        for help on specific topics.

  Also, see:
      - POD documentation for hostpairs for more details on usage.
      - The man page for ra for information on specifying filters and 
        additional ra arguments.
      - Perl documentation (perldoc -f sprintf) for specifying output format 
        strings for the -s_format, -h_format, and -p_format options.
      - Man page for strftime for specifying the -timeformat format string.
  
_END_

}

__END__

=head1 Name

hostpairs -- Aggregate host information from Argus data files

=head1 Synopsis

hostpairs [-h1 hosts] [-p1 ports] [-h2 hosts] [-p2 ports] [files]
          [...]

=head1 Description

Hostpairs is a program that aggregates and displays IP host information 
from Argus data files. It parses an ra(1) data stream and selects records 
in which specified sets of hosts and their associated sets of ports match 
a given set of IP address and port specifications. The goal is to provide 
a way to easily specify queries and extract data from Argus files in a 
human-readable form.

Pairs of hosts are specified by setting (via the -h1, -p1, -h2, -p2 
options) values (all optional) for 'hostset1' and an associated 

'portset1', 'hostset2' and an associated 'portset2', and by setting an
optional Argus filter to further narrow down the data that will be seen by
the hostpairs program. If a hostset or portset is not specified, any host 
or port in the ra datastream will match. Pairs are selected when they match 
all specified host and port sets, in any order (ie no attempt is made to 
use flow direction or src/dst information).  For example:

 hostpairs -h1 www1.mydom.com,www2.mydom.com -p1 80 -h2 128.2.0.0/16
 
would match all traffic going to/from port 80 on www1.mydom.com or
www2.mydom.com from/to any port on hosts matching the given network/mask.
If a given host would match both h1/p1 and h2/p2 specifications, 
h1 and h2 will be assigned based on the order of the first record seen.

=head2 Modes

Hostpairs has three different modes for gathering data and displaying
output.  Each mode has associated sets of fields for controlling which
data is output and how this data is sorted:

=over 4

=item Summary mode

In summary mode, hostpairs will aggregrate h2 data.  In the above example 
invocation, hostpairs in summary mode would print out two lines, one per 
each of the h1 hosts, with data aggregated over all matching h2 hosts.

=item Hostpair mode

In hostpair mode, hostpairs will print out one line per each distinct pair 
of matching hosts, along with associated data for that pair. 

=item Portpair mode

In portpair mode, hostpairs will print out the hostpair mode data, along 
with one line for each distinct pair of ports by which the hosts connected.

=back

=head1 Options and Usage

The basic usage: Pick values for h1/p1 and h2/p2.  Choose a mode.
Choose output fields, sort fields, and a format for the output for that 
mode (portpair mode uses both hospair and portpair output/sort/formats).

More information on usage can be found by using hostpairs with the 
-help option:

 hostpairs -help 

will print out a list of options and their meanings.

 hostpairs -help < hosts|ports|modes|sortfields|outputfields|forms >

will print out help on specific topics, including descriptions of the 
various output fields and sort fields.

=head1 Output and Sorting

Hostpairs selects which data to gather and output based on the given mode
and associated values for sort fields and output fields.  Before printing 
data specified by the output fields, the data is sorted according to the
given sort fields, in the order given (first by the first sort field, then
by the second, etc).  An optional header containing file meta-data is also
printed. In all modes, most (if not all) output fields are sortable.

Output data is formatted according to printf and strftime (for the portpair
mode time fields) style format strings.  If output fields are specified 
without also specifying a corresponding format string, hostpairs will 
generate a plausible format string to use.

=head1 Environment

Hostpairs understands the following environment variables.  All environment
variable settings are over-ridden if a corresponding command-line argument
is given.

=over 4

=item ARGUSFILES

A white-space separated list of Argus data files that will be processed
if data files are not specified on the command line.

=item HOSTPAIRS_FORMFILE

Location of formfile to use.

=item ARGUSHOME HOME

Hostpairs (unless modified) will attempt to find an ra binary in
$ARGUSHOME/bin or $HOME/bin, and will search for a 'rarc.hostpairs' 
or given formfiles in $ARGUSHOME and $HOME.  See the beginning of the 
hostpairs source for details.

=head1 Performance and Filters

Hostpairs isn't fast.  In order to get better performance, one should
specify an argus filter, to reduce the number of records seen.  The
speed improvement by doing so is often an order of magnitude or more. 
A filter is also necessary in order to specify a particular protocol or
exclude hosts and ports from the ra data stream.

Some of the data collection options, in particular gathering port and (to a
lesser extent) host information may be fairly memory intensive.  Hostname
lookup may take a while.  Hostpairs will only do DNS lookups and gather
data if required by output or sort fields.

=head1 Author

Clauss Strauch (cbs@cs.cmu.edu)

=head1 Copyright

Copyright (c) 2001 Carnegie Mellon University
All Rights Reserved.

Permission to use, copy, modify, and distribute this software and
its documentation for any purpose and without fee is hereby granted, 
provided that the above copyright notice appear in all copies and
that both that copyright notice and this permission notice appear
in supporting documentation, and that the name of CMU not be
used in advertising or publicity pertaining to distribution of the
software without specific, written prior permission.  

CMU DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
CMU BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
SOFTWARE.

Carnegie Mellon requests users of this software to return to

Software_Distribution@CS.CMU.EDU or

Software Distribution Coordinator
School of Computer Science
Carnegie Mellon University
Pittsburgh PA 15213-3890

any improvements or extensions that they make and grant Carnegie Mellon
the rights to redistribute these changes.


=cut
