#!/usr/bin/perl

=head1 NAME

 apt-cacher2 - WWW proxy optimized for use with APT

 Copyright (C) 2005 Eduard Bloch <blade@debian.org>
 Distributed under the terms of the GNU Public Licence (GPL).

=head1 SYNOPSIS

 ./setup.pl /home/me/cache
 edit /etc/apt/sources.list (use sources like deb http://proxy:3142/archiveserver/debian ...)
 apt-get update
 apt-get -u upgrade

=head1 DESCRIPTION

If you have two or more Debian GNU/Linux machines on a fast local
network and you wish to upgrade packages from the Internet, you
don't want to download every package several times.

apt-cacher2 is a tiny HTTP proxy that keeps a cache on disk of Debian
binary/source packages and meta files which have been received from Debian
distribution servers on the Internet. When an apt-get client issues
a request for a file to apt-cacher2, if the file is already on disk
it is served to the client immediately, otherwise it is fetched from the
Internet and served to the client while a copy is beeing stored on the disk.
This means that several Debian machines can be upgraded but each package needs
to be downloaded only once.

apt-cacher2 is a rewrite of the original apt-cacher.pl CGI script, keeping
compatibility in mind. The cached data can be shared by the both
implementations, while apt-cacher2 providers better performance and less server
load.

=head1 INSTALLATION

Assuming your cache server is called B<www.myserver.com>
and your cache directory is called B</home/me/cache>, then:

1. Edit apt-cacher.conf to customize your settings

2. Run apt-cacher2

=cut
# ----------------------------------------------------------------------------

use strict;
#use warnings;

use Fcntl ':flock';
use POSIX;

use LWP::UserAgent;
use IO::Socket::INET;
use HTTP::Response;

use Time::HiRes qw( sleep gettimeofday tv_interval );


my @index_files = (
    'Index',
	'Packages.gz',
	'Packages.bz2',
	'Release',
	'Release.gpg',
	'Sources.gz',
	'Sources.bz2',
	'Contents-.+\.gz',
    'pkglist.*\.bz2',
    'release$',
    'release\..*',
    'srclist.*\.bz2'
);
my $index_files_regexp = '(' . join('|', @index_files) . ')$';


# Include the library for the config file parser
require '/usr/share/apt-cacher/apt-cacher-lib.pl';
require '/etc/apt-cacher/checksumming.conf';


# Set some defaults
my $version='0.1'; # this will be auto-replaced when the Debian package is beeing built
my $configfile_default = '/etc/apt-cacher/apt-cacher.conf';
my $daemon_port_default=3142;
my $client="local";

# Read in the config file and set the necessary variables
my $configfile = $configfile_default;

my $direct_mode; # defines using STDIN/STDOUT
my $inetd_mode; # no security checks
my $cgi_mode;
my $cgi_path;

my $cfg;

my $pidfile;
my @extraconfig;

my $chroot;
my $retnum;
my $do_fork_away;

# this script needs to be executed trough a CGI wrapper setting a flag variable
if($ENV{CGI_MODE})
{
    # yahoo, back to the roots, assume beeing in CGI mode
    $cgi_mode=1;
    $direct_mode=1;
    # pick up the URL
    $cgi_path=$ENV{PATH_INFO} if ! $cgi_path;
    $cgi_path=$ENV{QUERY_STRING} if ! $cgi_path;
    $cgi_path="/" if ! $cgi_path; # set an invalid path to display infos below
}
else {
    while(scalar @ARGV) {

        my $arg=shift(@ARGV);

        if($arg eq "-c") {
            $configfile=shift(@ARGV);
            die "$configfile unreadable" if ! -r $configfile;
        }
        elsif($arg eq "-r") {
            $chroot=shift(@ARGV);
            die "No such directory: $chroot\n" if ! -d $chroot;
        }
        elsif($arg eq "-R") {
            $retnum=shift(@ARGV);
        }
        elsif($arg eq "-i") {
            $inetd_mode=1;
            $direct_mode=1;
        }
        elsif($arg eq "-d") {
            $do_fork_away=1;
        }
        elsif($arg eq "-p") {
            $pidfile=shift(@ARGV);
        }
        elsif($arg=~/(\S+)=(\S+)/) {
            push(@extraconfig, $1, $2);
        }
        elsif($arg eq "-h" || $arg eq "--help") {
            print <<EOM;
USAGE: $0 <options> <override(s)>
Options:

-c configfile   Custom config file (default: $configfile_default)
-i              Inetd mode, STDIN and STDOUT are used for service
(default: standalone server mode)
-d              become a background daemon

Advanced options (root only):
-r directory    (experimental option) 
                path to chroot to after reading the config and opening the log
                files. cache directory setting must be relative to the new root.
                WARNING: log files should be created before and be owned by tne
                effective user/group if -g or -u are used
-p pidfile      write the server process ID into this file

Overrides:     override config variables (see config file), eg. daemon_port=9999

EOM
            exit(0);
        }
        else {
            die "Unknown parameter $arg\n";
        }
    }
}

eval {
        $cfg = read_config($configfile);
};

# not sure what to do if we can't read the config file...
die "Could not read config file: $@" if $@;

# Now set some things from the config file
# $logfile used to be set in the config file: now we derive it from $logdir
$$cfg{logfile} = "$$cfg{logdir}/access.log";

# $errorfile used to be set in the config file: now we derive it from $logdir
$$cfg{errorfile} = "$$cfg{logdir}/error.log";

$$cfg{fetch_timeout}=300; # five minutes from now

my $private_dir = "$$cfg{cache_dir}/private";
define_global_lockfile("$private_dir/exlock");

# override config values with the user-specified parameters
while(@extraconfig) { 
    my $k=shift(@extraconfig);
    my $v=shift(@extraconfig); 
    $$cfg{$k}=$v;
}


my ($aclog_fh, $erlog_fh);
#FIXME: genauer die Scopes betrachten
my ($path, $filename, $new_filename, $con, $source);

my %pathmap;

if($$cfg{path_map}) {
    for(split(/\s*;\s*/, $$cfg{path_map})) {
        my @tmp = split(/\s+/, $_);
        # must have at least one path and target
        next if ($#tmp < 1);
        my $key=shift(@tmp);
        $pathmap{$key}=[@tmp];
    }
}


# Output data as soon as we print it
$| = 1;

# Function prototypes
sub ipv4_addr_in_list ($$);
sub ipv6_addr_in_list ($$);
sub get_abort_time ();

# ----------------------------------------------------------------------------
# Die if we have not been configured correctly
die "$0: No cache_dir directory!\n" if (!-d $$cfg{cache_dir});
die "$0: No cache_dir/private directory!\n" if (!-d $private_dir);

# ----------------------------------------------------------------------------
# Data shared between functions

my $cached_file;
my $cached_head;
my $complete_file;
my $notify_file;

my $do_import=0;
my $concloseflag;
my $is_index_file;

my $ua;
my $daemon;
my $server_pid;
my $fetcher_pid;
my %childPids;
my $terminating;

sub term_handler {
    $terminating=1;

    # close all connections or shutdown the server if parent and kill 
    debug_message("received SIGTERM, terminating");
    $con->close if defined($con);

    
    if($server_pid && $server_pid == $$) {
        $daemon->shutdown(2);
    }

    # stop all children
    #{ doesn't work, signal comes delayed. Why?!
    #    local $SIG{"TERM"} = 'IGNORE';          
    #    kill("TERM", -$$);
    #}
    for(keys %childPids) { 
        &debug_message("killing subprocess: $_"); 
        kill 15, $_;
    };
    exit 0;
};

sub reload_config {
    debug_message("Got SIGHUP, reloading config");
    $cfg = read_config($configfile);
};

# broken, kills unrelated processes. Not using for now.
# perlipc(1)
# also remove them from the to-be-killed list
#sub reap_children {
#    my $child;
#    while (($child = waitpid(-1,WNOHANG)) > 0) {
#        delete $childPids{$child};
#    }
#    $SIG{CHLD} = \&reap_children;  # still loathe sysV
#
#}
#$SIG{CHLD} = \&reap_children;
$SIG{CHLD} = 'IGNORE';
$SIG{'TERM'} = \&term_handler;
$SIG{'HUP'} = \&reload_config;

my $getBufLen=10000;
my $maxspeed;

my ($chfd, $pkfd);

# for rate limit support
if($$cfg{limit}>0) {
    $maxspeed = $$cfg{limit}*1024;
    $getBufLen = $maxspeed/20; # 20 portions per second should be enough
}

sub setup_agent {

   return if(defined($ua));

   $ua=LWP::UserAgent->new('keep_alive' => 1);

   # Check whether a proxy is to be used, and set the appropriate environment variable
   my $proxystring;
   if ( $$cfg{use_proxy} eq 1 && $$cfg{http_proxy}) {
       $proxystring="http://";
       if ( $$cfg{use_proxy_auth} eq 1) {
           $proxystring.=$$cfg{http_proxy_auth}.'@';
       }
       $proxystring.=$$cfg{http_proxy};
   }
   $ua->proxy("http", $proxystring) if $proxystring;
}





# BEGINN MAIN PART

if($cgi_mode && defined($$cfg{cgi_advise_to_use}) && $$cfg{cgi_advise_to_use}) {
    print "Status: 410 $$cfg{cgi_advise_to_use}\r\n\r\n";
    exit 0;
}

if($direct_mode) {
    &setup_ownership;
    &open_log_files;
#optional checksumming support
    db_init("$$cfg{cache_dir}/md5sums.sl3");
    $client = "INETD" if $inetd_mode;

    # get the string if available even in inetd / direct mode so local calles can
    # identify themselves in the logs.
    $client=$ENV{REMOTE_ADDR} if exists $ENV{REMOTE_ADDR};

    &handle_connection;
    exit 0;
}

my %daemonopts = (LocalPort => $$cfg{daemon_port}, Proto => 'tcp', Listen => 1, ReuseAddr => 1);
$daemonopts{LocalAddr}=$$cfg{daemon_addr} if(defined($$cfg{daemon_addr}));

while(1) {
    $daemon = IO::Socket::INET->new(%daemonopts);
    last if $daemon;
    $retnum--;
    last if($retnum<=0);
    print STDERR "Unable to bind socket (port $$cfg{daemon_port}), trying again in 5 seconds.\n";
    sleep 5;
}
die "Unable to bind socket (port $$cfg{daemon_port}), $0 not started.\n" if ! $daemon;

$server_pid=$$;

if($do_fork_away) {
    my $pid = fork();
    if ($pid < 0) {
        barf("fork() failed");
    }
    if ($pid > 0) {
        # parent
        exit 0;
    }
}

# STATE: Port open, still beeing root. Create pidfiles, logfiles, then su
# 
if($pidfile) {
    open(my $fh, ">$pidfile");
    print $fh $$;
    close($fh);
}


&setup_ownership;
&open_log_files;
#optional checksumming support
db_init("$$cfg{cache_dir}/md5sums.sl3");

# State: READY
# That is the working condition (daemon mode)

debug_message("Apt-Cacher started with Debug output enabled, accepting connections...");

while (1)
{
    my $newcon = $daemon->accept;
    # we don't stop, only by term_handler since the accept method is unreliable
    next if(!$newcon);
    last if $terminating;

    $client = $newcon->peerhost;
    debug_message("Connection from $client");

    my $pid = fork();
    if ($pid < 0) {
        barf("fork() failed");
    }

    if ($pid > 0) {
        # parent
        debug_message("registred child process: $pid");
        $childPids{$pid}=1;
        next;
    }
    # child
    undef %childPids;

    &handle_connection($newcon);
    exit (0);

}
exit 0;
# exit from the daemon loop



sub handle_connection {
    # now begin connection's personal stuff
    debug_message("New HTTP connection open");
    
    if($direct_mode) {
        # beeing in forced mode, ie. manual call
        $source=*STDIN;
        $con = *STDOUT;
    }
    else {

        # serving a network client
        
        $con = shift;
        $source = $con;
    }
    

    if(!$inetd_mode) {
        # ----------------------------------------------------------------------------
        # Let's do some security checking. We only want to respond to clients within an
        # authorised address range (127.0.0.1 and ::1 are always allowed).

        my $ip_pass = 1;
        my $ip_fail = 0;
        my $clientaddr;

        # allowed_hosts == '*' means allow all ('' means deny all)
        # denied_hosts == '' means don't explicitly deny any
        # localhost is always accepted
        # otherwise host must be in allowed list and not in denied list to be accepted

        if ($client =~ /:/) # IPv6?
        {
            defined ($clientaddr = ipv6_normalise ($client)) or goto badaddr;
            if (substr ($clientaddr, 0, 12) eq "\0\0\0\0\0\0\0\0\0\0\xFF\xFF")
            {
                $clientaddr = substr ($clientaddr, 12);
                goto is_ipv4;
            }
            elsif ($clientaddr eq "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\1")
            {
                debug_message("client is localhost");
            }
            else
            {
                $ip_pass = ($$cfg{allowed_hosts_6} =~ /^\*?$/) ||
                ipv6_addr_in_list ($clientaddr, 'allowed_hosts_6');
                $ip_fail = ipv6_addr_in_list ($clientaddr, 'denied_hosts_6');
            }
        }
        elsif (defined ($clientaddr = ipv4_normalise ($client))) # IPv4?
        {
            is_ipv4:
            if ($clientaddr eq "\x7F\0\0\1")
            {
                debug_message("client is localhost");
            }
            else
            {
                $ip_pass = ($$cfg{allowed_hosts} =~ /^\*?$/) ||
                ipv4_addr_in_list ($clientaddr, 'allowed_hosts');
                $ip_fail = ipv4_addr_in_list ($clientaddr, 'denied_hosts');
            }
        }
        else
        {
            goto badaddr;
        }

        # Now check if the client address falls within this range
        if ($ip_pass && !$ip_fail)
        {
            # Everything's cool, client is in allowed range
            debug_message("Client $client passed access control rules");
        }
#        elsif($client eq "local")
#        {
#            # Everything's cool, client is in allowed range
#            debug_message("Client $client passed access control rules");
#        }
        else
        {
            # Bzzzt, client is outside allowed range. Send 'em a 403 and bail.
            badaddr:
            debug_message("Alert: client $client disallowed by access control");

            &sendrsp(403, "Access to cache prohibited");
            exit(4);
        }

    }

    REQUEST:
    while(!$concloseflag) {

        my $testpath; # temporary, to be set by GET lines, undef on GO
        my $ifmosince;# to be undef by new GET lines
        my $send_head_only=0; # to be undef by new GET lines
        my $tolerated_empty_lines=20;
        my $rangereq;

        # reading input line by line, trough the secure input method
        CLIENTLINE: while(1) {

            debug_message("Processing a new request line");

            $_=&getRequestLine;
            debug_message("got: $_");

            exit if !defined($_);

            if(/^$/) {
                if(defined($testpath)) {
                    # done reading request
                    $path=$testpath;
                    last CLIENTLINE;
                }
                elsif(!$tolerated_empty_lines)   {
                    &sendrsp(403, "Go away");
                    exit(4);
                }
                else {
                    $tolerated_empty_lines--;
                }
            }
            else {
                if(/^(GET|HEAD)\s+(\S+)/) {
                    if(defined($testpath)) {
                        &sendrsp(403, "Confusing request");
                        exit(4);
                    }
                    $testpath=$2;
                    # also support pure HEAD calls
                    if($1 eq 'HEAD') {
                        $send_head_only=1;
                    }
                }
                elsif(/^Connection: close/i) {
                    $concloseflag=1;
                }
                elsif(/^Connection: .*TE/) {
                    $concloseflag=1;
                }
                elsif(/^Range/i) {
                    $rangereq=1;
                }
                elsif(/^If-Modified-Since:\s+(.*)/i) {
                    $ifmosince=$1;
                }
                elsif(/^\S+: [^:]*/) {
                    # whatever, but valid
                }
                else {
                    &sendrsp(403, "Could not understand $_");
                    exit(4);
                }
            }
        }

        # always resend the file if a part was requested since we don't support ranges
        $ifmosince=0 if !$rangereq;

        # tolerate CGI specific junk and two slashes in the beginning
        $path =~ s!^/apt-cacher\??/!/!;
        $path =~ s!^//!/!;
        $path =~ s!^http://!!; # allow proxy style

        # Now parse the path
        if ($path =~ /^\/?report/) {
            usage_report();
            exit(0);
        }

        if ($path !~ m(^/?.+/.+)) {
            usage_error();
        }

        REPARSE:
        
        my($host,$uri) = ($path =~ m#^/?([^/]+)(/.+)#);
        
        if ( !$host || !$uri ) {
            usage_error();
        }

        ($filename) = ($uri =~ /\/?([^\/]+)$/);

        if($$cfg{allowed_locations}) {
            #         debug_message("Doing location check for ".$$cfg{allowed_locations} );
            my $mess;
            my $cleanuri=$uri;
            $cleanuri=~s!/[^/]+/[\.]{2}/!/!g;
            if ($host eq ".." ) {
                $mess = "'..' contained in the hostname";
            }
            elsif ($cleanuri =~/\/\.\./) {
                $mess = "File outside of the allowed path";
            }
            else {
                for( split(/\s*,\s*/,$$cfg{allowed_locations}) ) {
                    debug_message("Testing URI: $host$cleanuri on $_");
                    goto location_allowed if ("$host$cleanuri" =~ /^$_/);
                }
                $mess = "Host '$host' is not configured in the allowed_locations directive";
            }
            badguy:
            debug_message("$mess; access denied");
            &sendrsp(403, "Access to cache prohibited, $mess");
            exit(4);
        }
        location_allowed:

        $do_import=0;
        $is_index_file=0;

        if ($filename =~ /(\.deb|\.rpm|\.dsc|\.tar\.gz|\.diff\.gz|\.udeb)$/) {
            # We must be fetching a .deb or a .rpm, so let's cache it.
            # Place the file in the cache with just its basename
            $new_filename = $filename;
            debug_message("new filename with just basename: $new_filename");
        } 
        elsif ($filename =~ /2\d\d\d-\d\d-\d\d.*\.gz$/) {
            # a patch file. Needs a unique filename but no freshness checks
            $new_filename = "$host$uri";
            $new_filename =~ s/\//_/g;
            debug_message("new long filename: $new_filename");
        }
        elsif ($filename =~ /$index_files_regexp/) {
            $is_index_file=1;
            # It's a Packages.gz or related file: make a long filename so we can cache these files without
            # the names colliding
            $new_filename = "$host$uri";
            $new_filename =~ s/\//_/g;
            debug_message("new long filename: $new_filename");
            # optional checksumming support
            if ($filename =~ /(Packages|Sources)/) {
                # warning, an attacker could poison the checksum cache easily
                $do_import=1;
            }
        } else {
            # Maybe someone's trying to use us as a general purpose proxy / relay.
            # Let's stomp on that now.
            debug_message("Sorry, not allowed to fetch that type of file: $filename");
            &sendrsp(403, "Sorry, not allowed to fetch that type of file: $filename");
            exit(4);
        }

        $cached_file = "$$cfg{cache_dir}/packages/$new_filename";
        $cached_head = "$$cfg{cache_dir}/headers/$new_filename";
        $complete_file = "$private_dir/$new_filename.complete";
        $notify_file = "$private_dir/$new_filename.notify";

        my $force_download=0;

        my $cache_status;

        debug_message("looking for $cached_file");

        if ($is_index_file) {
            debug_message("known as index file: $filename");
            # in offline mode, deliver it as-is, otherwise check freshness
            if (-f $cached_file && -f $cached_head && !$$cfg{offline_mode}) {
                if($$cfg{expire_hours} > 0) {
                    my $now = time();
                    my @stat = stat($cached_file);
                    if (@stat && int(($now - $stat[9])/3600) > $$cfg{expire_hours}) {
                        debug_message("unlinking $new_filename because it is too old");
                        # Set the status to EXPIRED so the log file can show it was downloaded again
                        $cache_status = "EXPIRED";
                        debug_message("$cache_status");
                        $force_download=1;
                    }
                }
                else {
                    # use HTTP timestamping
                    my ($oldhead, $testfile, $newhead);
                    my $response = &ua_act(1, $host, $uri);
                    #my $response = $ua->head("http://$host$uri");
                    $newhead = $response->header("Last-Modified");
                    if($newhead && open($testfile, $cached_head)) {
                        
                        $newhead =~ s/\n|\r//g;

                        for(<$testfile>){
                            if(/^.*Last-Modified:\s(.*)(\r|\n)/) {
                                $oldhead = $1;
                                last
                            }
                        }
                        close($testfile);
                    }
                    if($oldhead && ($oldhead eq $newhead) ) {
                        # that's ok
                        debug_message("remote file not changed, $oldhead vs. $newhead");
                    }
                    else {
                        debug_message("unlinking $new_filename because it differs from server's version");
                        $cache_status = "EXPIRED";
                        debug_message("$cache_status");
                        $force_download=1;
                    }
                }
            }
        }

        # handle if-modified-since in a better way (check the equality of
        # the time stamps). Do only if download not forced above.

        if($ifmosince && !$force_download) {
            $ifmosince=~s/\n|\r//g;

            my $oldhead;
            if(open(my $testfile, $cached_head)) {
                LINE: for(<$testfile>){
                    if(/^.*Last-Modified:\s(.*)(\r|\n)/) {
                        $oldhead = $1;
                        last LINE;
                    }
                }
                close($testfile);
            }

            if($oldhead && $ifmosince eq $oldhead) {
                &sendrsp(304, "Not Modified");
                debug_message("File not changed: $ifmosince");
                next REQUEST;
            }
        }

        &set_global_lock(": file download decission"); # file state decissions, lock that area

        my $fromfile; # handle for the reader

        # download or not decission. Also releases the global lock
        dl_check:
        if( !$force_download && -e $cached_head && -e $cached_file) {
            if (-f $complete_file) {
                # not much to do if complete
                $cache_status = "HIT";
                debug_message("$cache_status");
            }
            else {
                # a fetcher was either not successfull or is still running
                # look for activity...
                sysopen($fromfile, $cached_file, O_RDONLY) || undef $fromfile;
                if (flock($fromfile, LOCK_EX|LOCK_NB)) {
                    flock($fromfile, LOCK_UN);
                    # bad, no fetcher working on this package. Redownload it.
                    close($fromfile); undef $fromfile;
                    debug_message("no fetcher running, forcing download");
                    $force_download=1;
                    goto dl_check;
                }
            }

            &release_global_lock;
        }
        else {
            # bypass for offline mode, no forking, just report the "problem"
            if($$cfg{offline_mode})
            {
                &sendrsp(503, "Apt-Cacher in Offline Mode");
                next REQUEST;
            }

            # (re) download them
            unlink($cached_file, $cached_head, $complete_file, $notify_file);
            debug_message("file does not exist or so, creating it");
            # Set the status to MISS so the log file can show it had to be downloaded
            if(!defined($cache_status)) { # except on special presets from index file checks above
                $cache_status = "MISS"; 
                debug_message("$cache_status");
            }

            # the writer releases the global lock after opening the target file
            my $pid = fork();
            if ($pid < 0) {
                barf("fork() failed");
            }
            if ($pid == 0) {
                # child, the fetcher thread
                undef %childPids;
                sysopen($pkfd, $cached_file, O_RDWR|O_CREAT|O_EXCL, 0644) || barf("Unable to store files");
                open ( $chfd, ">$cached_head");

                if (flock($pkfd, LOCK_EX)) {
                    # jump from the global lock to a lock on the target file
                    &release_global_lock;

                    &fetch_store ($host, $uri); 

                    exit 0;
                }
                else {
                    barf("Problem locking the target file!");
                }
                # child exiting above, so or so
            }
            # parent continues
            $childPids{$pid}=1;
            debug_message("registred child process: $pid");
            # &release_global_lock; to be release by downloader thread, not here
        }

        debug_message("checks done, can return now");
        my $ret = &return_file (\$fromfile, $send_head_only);
        goto dl_check if $ret==2; # retry code
        debug_message("Package sent");

        # Write all the stuff to the log file
        writeaccesslog("$cache_status", "$new_filename");
        if(!$is_index_file && !check_sum($new_filename)) {
            writeerrorlog("   ALARM!    Faulty package in local cache detected! Removing, to be replaced with the next download.");
            unlink $cached_file;
            exit(5);
        }
        
    }

}


sub return_file {
    # At this point the file is open, and it's either complete or somebody
    # is fetching its contents

    our ($ffref, $send_head_only) =@_;
    my $fromfile=$$ffref;

    my $header_printed=0;

    data_init();
    
    my $abort_time = get_abort_time();

    my $buf;

    my $geslen=0;
    my $curlen=0;
    my $explen;

    my $complete_found;

    # needs to print the header first
    CHUNK: while (1) {

        #debug_message("Send loop iteration:");

        if (time() > $abort_time) {
            debug_message("abort (timeout)");
            exit(4);
        }

        if(! $header_printed) {

            # add this reader to the notification list before printing anything useful to the client
            if(! -f $complete_file) { # there is no point if the package is already downloaded
                open(my $nf, ">>$notify_file");
                flock($nf, LOCK_EX);
                print $nf "$$\n";
                flock($nf, LOCK_UN);
                close($nf);
            }

            my $headstring;
            if(-s $cached_head) {
                # header file seen, protect the reading 
                &set_global_lock(": reading the header file");
                if(! -f $cached_head) {
                    # file removed while waiting for lock - download failure?!
                    # start over, maybe spawning an own fetcher
                    &release_global_lock;
                    return 2;
                    #goto dl_check;
                }

                open(my $in, $cached_head);
                my $code=200;
                my $msg='';
                my $headstring='';

                $headstring=<$in>; # read exactly one status line

                ($code, $msg) = ($headstring=~/^HTTP\S+\s+(\d+)\s(.*)/);
                # alternative for critical errors
                if(!defined($code)) {
                    ($code, $msg) = ($headstring=~/^(5\d\d)\s(.*)/);
                }

                if(!defined($code)) {
                    writeerrorlog("Faulty header file detected: $cached_head, first line was: $headstring");
                    unlink $cached_head;
                    exit 3;
                }

                # in CGI mode, use alternative status line. Don't print one
                # for normal data output (apache does not like that) but on
                # anormal codes, and then exit immediately
                if($cgi_mode) {
                    # don't print the head line but a Status on errors instead
                    $headstring=~s/^HTTP\S+/Status:/;
                    if($code == 200) {
                        $headstring=''; # kick headline by default
                    }
                    else {
                        print $con $headstring."\n\n";
                        exit 1;
                    }
                }

                # keep alive or not? Just follow the client
                $headstring .= "Connection: ".($concloseflag?"Close":"Keep-Alive")."\r\n";

                # keep only parts interesting for apt
                if($code==200) {
                    for(<$in>) {
                        if(/^Last-Modified|Content|Accept/) {
                            $headstring.=$_;
                            if(/^Content-Length:\ *(\d+)/) {
                                $explen=$1;
                            }
                        }
                    }
                }
                close($in);
                &release_global_lock;

                print $con $headstring."\r\n";

                $header_printed=1;
                debug_message("Header sent: $headstring");

                # Stop after sending the header with errors
                return if($code != 200); 

            }
            else {
                sleep(0.5);
                next CHUNK;
            }

            # pure HEAD request, we are done
            return if $send_head_only;
            debug_message("ready to send contents of $cached_file");
        }

        if(! $fromfile) # is the data file open already? open in this iteration if needed
        {
            debug_message("opening file first: $cached_file");
            if( ! -f $cached_file) {
                sleep(1);
                next CHUNK;
            }

            sysopen($fromfile, $cached_file, O_RDONLY); #FIXME, checken
            next CHUNK;
        }
        else
        {
            my $n=0;
            $n = sysread($fromfile, $buf, 65536);
            debug_message("read $n bytes");

            if(!defined($n)) {
                debug_message("Error detected, closing connection");
                exit(4);
            }
            
            if($n==0) {
                
                if($complete_found) { # comlete file was found in the previous iteration
                    # this is the loop exit condition
                    # 
                    # some extra error cases
                    #if($explen && $curlen != $explen) {
                    #    writeerrorlog("  ALARM!   $cached_file file is smaller than expected ($explen). Renaming to $cached_file.corrupted for further investigation, check your filesystem!");
                    #    unlink "$cached_file.corrupted";
                    #    rename($cached_file, "$cached_file.corrupted");
                    #    exit(5);
                    #}
                    last CHUNK;
                }

                if (-f $complete_file) {
                    # do another iteration, may need to read remaining data
                    debug_message("complete file found");
                    $complete_found=1;
                    next CHUNK;
                }

                #debug_message("waiting for new data");
                # wait for fresh data
                sleep(0.5);
                next CHUNK;

            }
            else {
                $curlen+=$n;
                if($explen && $curlen > $explen) {
                    writeerrorlog("  ALARM!   $cached_file file is larger than expected ($explen). Renaming to $cached_file.corrupted for further investigation, check your filesystem!");
                    unlink "$cached_file.corrupted";
                    rename($cached_file, "$cached_file.corrupted");
                    exit(5);
                }
                #debug_message("write $n / $curlen bytes");
                # send data and update watchdog
                print $con $buf;
                debug_message("wrote $n (sum: $curlen) bytes");
                $abort_time = get_abort_time();
                data_feed(\$buf);
            }
        }
    }
}
















sub barf {
	my $errs = shift;

	die "--- $0: Fatal: $errs\n";
}

sub usage_error {
    &open_log_files;
	writeerrorlog("--- $0: Usage error");

    if(! defined($$cfg{example_sources_line})) {
        $$cfg{example_sources_line}="deb&nbsp;http://<b>yourcache.example.com:$$cfg{daemon_port}/</b>ftp.au.debian.org/debian&nbsp;unstable&nbsp;main&nbsp;contrib&nbsp;non-free";
    }

    &sendrsp(200, "OK", "Content-Type", "text/html", "Expires", 0);
	print $con <<EOF;

<html>
<title>Apt-cacher version $version
</title><style type="text/css"><!--
a { text-decoration: none; }
a:hover { text-decoration: underline; }
h1 { font-family: arial, helvetica, sans-serif; font-size: 18pt; font-weight: bold;}
h2 { font-family: arial, helvetica, sans-serif; font-size: 14pt; font-weight: bold;}
body, td { font-family: arial, helvetica, sans-serif; font-size: 10pt; }
th { font-family: arial, helvetica, sans-serif; font-size: 11pt; font-weight: bold; }
//--></style>
</head>
<body>
<p>
<table border=0 cellpadding=8 cellspacing=1 bgcolor="#000000" align="center" width="600">
<tr bgcolor="#9999cc"><td> <h1>Apt-cacher version $version</h1> </td></tr>
<tr bgcolor="#cccccc"><td>
Usage: edit your /etc/apt/sources.list so all your HTTP sources are prepended 
with the address of your apt-cacher machine and the port, like this:
<blockquote>deb&nbsp;http://ftp.au.debian.org/debian&nbsp;unstable&nbsp;main&nbsp;contrib&nbsp;non-free</blockquote>
becomes
<blockquote>$$cfg{example_sources_line}</blockquote>
</td></tr>
</table>

<h2 align="center">config values</h2>
<table border=0 cellpadding=3 cellspacing=1 bgcolor="#000000" align="center">
<tr bgcolor="#9999cc"><th> Directive </th><th> Value </th></tr>
<tr bgcolor="#cccccc"><td bgcolor="#ccccff"> configfile </td><td> $configfile </td></tr>
<tr bgcolor="#cccccc"><td bgcolor="#ccccff"> admin_email </td><td> <a href="mailto:$$cfg{admin_email}">$$cfg{admin_email}</a> </td></tr>
<tr bgcolor="#cccccc"><td bgcolor="#ccccff"> generate_reports </td><td> $$cfg{generate_reports} </td></tr>
<tr bgcolor="#cccccc"><td bgcolor="#ccccff"> cache_dir </td><td> $$cfg{cache_dir} </td></tr>
<tr bgcolor="#cccccc"><td bgcolor="#ccccff"> logfile </td><td> $$cfg{logfile} </td></tr>
<tr bgcolor="#cccccc"><td bgcolor="#ccccff"> errorfile </td><td> $$cfg{errorfile} </td></tr>
<tr bgcolor="#cccccc"><td bgcolor="#ccccff"> expire_hours </td><td> $$cfg{expire_hours} </td></tr>
<tr bgcolor="#cccccc"><td bgcolor="#ccccff"> http_proxy </td><td> $$cfg{http_proxy} </td></tr>
<tr bgcolor="#cccccc"><td bgcolor="#ccccff"> use_proxy </td><td> $$cfg{use_proxy} </td></tr>
<tr bgcolor="#cccccc"><td bgcolor="#ccccff"> use_proxy_auth </td><td> $$cfg{use_proxy_auth} </td></tr>
<tr bgcolor="#cccccc"><td bgcolor="#ccccff"> debug </td><td> $$cfg{debug} </td></tr>
</table>

<p>
<h2 align="center">license</h2>
<table border=0 cellpadding=8 cellspacing=1 bgcolor="#000000" align="center" width="600">
<tr bgcolor="#cccccc"><td>
<p>Apt-cacher is free software; you can redistribute it and/or modify it under the terms of the GNU General 
Public License as published by the Free Software Foundation; either version 2 of the License, or (at your 
option) any later version.

<p>Apt-cacher is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the 
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 
License for more details.

<p>A copy of the GNU General Public License is available as /usr/share/common-licenses/GPL in the Debian 
GNU/Linux distribution or on the World Wide Web at http://www.gnu.org/copyleft/gpl.html. You can also 
obtain it by writing to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 
02111-1307, USA.
</td></tr>
</table>
</body>
</html>
EOF

    exit 1;

}


# Jon's extra stuff to write the event to a log file.
sub writeaccesslog {
    my $cache_status = shift;
    my $new_filename = shift;

    # The format is 'time|cache status (HIT, MISS or EXPIRED)|client IP address|file size|name of requested file'
    my $time = localtime;
    my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,$atime,$mtime,$ctime,$blksize,$blocks) = stat($cached_file);
    my $file_length = 0;
    $file_length+=$size if defined($size);

    flock($aclog_fh, LOCK_EX);
    print $aclog_fh "$time|$client|$cache_status|$file_length|$new_filename\n";
    flock($aclog_fh, LOCK_UN);
}

# Jon's extra stuff to write errors to a log file.
sub writeerrorlog {
	my $message = shift;
	
	my $time = localtime;

    flock($erlog_fh, LOCK_EX);
    # files may need to be reopened sometimes - reason unknown yet, EBADF
    # results
	syswrite($erlog_fh,"$time|$client|$message\n") || &open_log_files;
    flock($erlog_fh, LOCK_UN);
}

# Stuff to append debug messages to the error log.
sub debug_message {
    if ($$cfg{debug} eq 1) {
        my $message = shift;
        &writeerrorlog("debug [$$]: $message");
    }
}

sub open_log_files {
	if(!$erlog_fh)
    {
        open($erlog_fh,">>$$cfg{errorfile}") or barf("Unable to open $$cfg{errorfile}");
    }
    if(!$aclog_fh) {
        open($aclog_fh,">>$$cfg{logfile}") or barf("Unable to open $$cfg{logfile}");
    }
}
 
sub get_abort_time () {
  return time () + $$cfg{fetch_timeout}; # five minutes from now
}

my $header_stored=0;

my $tstart;
my $geslen;

sub get_callback {
    my $errors=0;

    my ($data, $response, $proto) = @_;
#    debug_message("Callback got data\n");
    if(!$header_stored) {
        $header_stored=1;
        my $headstring = $response->as_string;

        # print $con $headstring;
        
        &set_global_lock(": Callback, storing the header"); # set the lock before writting the first byte to that file, and release it after the file is closed
        (scalar print $chfd $headstring ) || $errors++;
        close($chfd);
        &release_global_lock;

        if($maxspeed) {
            $geslen=-$getBufLen; # will be re-added below
            $tstart = [gettimeofday];
        }

    }
    (scalar print $pkfd $data ) || $errors++;
    #print $con $data;

    data_feed(\$data);

    # delay for rate limiting
    if($maxspeed) {
        $geslen+=$getBufLen;
        my $delta= $geslen/$maxspeed - ( scalar tv_interval ( $tstart ));
        sleep($delta) if ($delta > 0);
    }

    if($errors) {
        writeerrorlog("Write error. Disk full?");
        # don't just exit here, fetcher needs to handle that
        die();
    }
}

sub fetch_store {

    my ($host, $uri) = @_;

    my $url = "http://$host$uri";
    debug_message("fetcher: try to fetch $url");

    # for checksumming
    data_init();

    my $response = &ua_act(0, $host, $uri);
    #my $response = $ua->get($url, ':content_cb' => \&get_callback, ':read_size_hint' => $getBufLen);
    #$geslen=0;

    debug_message("Get is back");

    if ($response->is_success && !defined($response->header("X-Died")) )
    {

        close($pkfd) if $pkfd;
        undef $pkfd;

        debug_message("stored $url as $cached_file");

        # check missmatch or fetcher failure, could not connect the server
        if( !$is_index_file && !check_sum($new_filename)) {
            &set_global_lock(": file corruption report");
            writeerrorlog("Do00h, checksum mismatch on $new_filename");
            unlink $cached_file, $cached_head;
            open(MF, ">$cached_head");
            print MF "HTTP/1.1 502 Data corruption";
            close(MF);
            &kill_readers;
            &release_global_lock;
        }

        # assuming here that the filesystem really closes the file and writes
        # it out to disk before creating the complete flag file
        
        debug_message("setting complete flag for $new_filename");
        # Now create the file to show the pickup is complete, also store the original URL there
        open(MF, ">$private_dir/$new_filename.complete");
        print MF $path;
        close(MF); 

        # index file seen? Get checksums
        import_sums($cached_file) if $do_import;
        
        # store the sum, it may be not available yet but better this one than
        # nothing. 
        # disabled for now store_sum($new_filename);
        # The sum may change on index files but that case is handled
        # separately, the stored sum is allowed to differ from the data
        # contents.

    }
    else
    {
        if(defined($response->header("X-Died"))) {
            $response->code(502);
            $response->message("Apt-Cacher: Transfer terminated");
        }

        debug_message("Reporting error: ".$response->code);
        &set_global_lock(": HTTP error report");
        open(my $ch, $cached_head);
        my $headstring = $response->as_string;
        if($headstring=~/^5\d\d/) { 
            # work around LWP bug, incorrect status line with internal messages
            $headstring = "HTTP/1.1 $headstring";
        }
        print $chfd $headstring;
        close($chfd);
        &release_global_lock;
        if(defined($response->header("X-Died"))) { # was critical, most likely frozen now
            &kill_readers;
        }
    }

    debug_message("fetcher exiting");
    unlink $notify_file;

    # reset the shared vars
    $header_stored=0; # FIXME, really needed? fetcher thread runs only once

    _exit(0);
}

# FIXME: that sucks. Still needed?!
sub kill_readers {
    my $nf;
    if(open($nf, $notify_file)) {
        while(<$nf>) {
            chomp;
        debug_message("Stopping reader: $_");
               kill 9, $_; # hard, bypassing the handler
        }
        close($nf);
    }
    # should be okay to unlink the file after all readers are "notified"
    unlink $cached_file;
}


# Check if there has been a usage report generated and display it
sub usage_report {
	my $usage_file = "$$cfg{logdir}/report.html";
    &sendrsp(200, "OK", "Content-Type", "text/html", "Expires", 0);
	if (!-f $usage_file) {
		print $con <<EOF;

<html>
<title>Apt-cacher traffic report</title><style type="text/css"><!--
a { text-decoration: none; }
a:hover { text-decoration: underline; }
h1 { font-family: arial, helvetica, sans-serif; font-size: 18pt; font-weight: bold;}
h2 { font-family: arial, helvetica, sans-serif; font-size: 14pt; font-weight: bold;}
body, td { font-family: arial, helvetica, sans-serif; font-size: 10pt; }
th { font-family: arial, helvetica, sans-serif; font-size: 11pt; font-weight: bold; }
//--></style>
</head>
<body>
<table border=0 cellpadding=8 cellspacing=1 bgcolor="#000000" align="center" width="600">
<tr bgcolor="#9999cc"><td> <h1>Apt-cacher traffic report</h1> </td></tr>
</td></tr>
</table>
		
<p><table border=0 cellpadding=3 cellspacing=1 bgcolor="#000000" align="center" width="600">
<tr bgcolor="#9999cc"><th bgcolor="#9999cc"> An Apt-cacher usage report has not yet been generated </th></tr>
<tr bgcolor="#cccccc"><td bgcolor="#ccccff"> Reports are generated every 24 hours. If you want reports to be generated, make sure you set '<b>generate_reports=1</b>' in <b>$configfile</b>.</td></tr>
</table>
		</body>
		</html>
EOF

	}
	else
	{
        open(my $usefile, $usage_file);
        my @usedata = <$usefile>;
        close($usefile);
        print $con @usedata;
	}
}

# IP address filtering.
sub ipv4_addr_in_list ($$)
{
	return 0 if $_[0] eq '';
	debug_message ("testing $_[1]");
	return 0 unless $$cfg{$_[1]};

	my ($client, $cfitem) = @_;
	my @allowed_hosts = split(/,\s*/, $$cfg{$cfitem});
	for my $ahp (@allowed_hosts)
	{
		goto unknown if $ahp !~ /^[-\/,.[:digit:]]+$/;

		# single host
		if ($ahp =~ /^([^-\/]*)$/)
		{
			my $ip = $1;
			debug_message("checking against $ip");
			defined ($ip = ipv4_normalise($ip)) or goto unknown;
			return 1 if $ip eq $client;
		}
		# range of hosts (netmask)
		elsif ($ahp =~ /^([^-\/]*)\/([^-\/]*)$/)
		{
			my ($base, $mask) = ($1, $2);
			debug_message("checking against $ahp");
			defined ($base = ipv4_normalise($base)) or goto unknown;
			$mask = ($mask =~ /^\d+$/) ? make_mask ($mask, 32)
																 : ipv4_normalise ($mask);
			goto unknown unless defined $mask;
			return 1 if ($client & $mask) eq ($base & $mask);
		}
		# range of hosts (start & end)
		elsif ($ahp =~ /^([^-\/]*)-([^-\/]*)$/)
		{
			my ($start, $end) = ($1, $2);
			debug_message("checking against $start to $end");
			defined ($start = ipv4_normalise($start)) or goto unknown;
			defined ($end = ipv4_normalise($end)) or goto unknown;
			return 1 if $client ge $start && $client le $end;
		}
		# unknown
		else
		{
			unknown:
			debug_message("Alert: $cfitem ($ahp) is bad");
			&sendrsp(500, "Configuration error");
			exit(4);
		}
	}
	return 0; # failed
}

sub ipv6_addr_in_list ($$)
{
	return 0 if $_[0] eq '';
	debug_message ("testing $_[1]");
	return 0 unless $$cfg{$_[1]};

	my ($client, $cfitem) = @_;
	my @allowed_hosts = split(/,\s*/, $$cfg{$cfitem});
	for my $ahp (@allowed_hosts)
	{
		goto unknown if $ahp !~ /^[-\/,:[:xdigit:]]+$/;

		# single host
		if ($ahp =~ /^([^-\/]*)$/)
		{
			my $ip = $1;
			debug_message("checking against $ip");
			$ip = ipv6_normalise($ip);
			goto unknown if $ip eq '';
			return 1 if $ip eq $client;
		}
		# range of hosts (netmask)
		elsif ($ahp =~ /^([^-\/]*)\/([^-\/]*)$/)
		{
			my ($base, $mask) = ($1, $2);
			debug_message("checking against $ahp");
			$base = ipv6_normalise($base);
			goto unknown if $base eq '';
			goto unknown if $mask !~ /^\d+$/ || $mask < 0 || $mask > 128;
			my $m = ("\xFF" x ($mask / 8));
			$m .= chr ((-1 << (8 - $mask % 8)) & 255) if $mask % 8;
			$mask = $m . ("\0" x (16 - length ($m)));
			return 1 if ($client & $mask) eq ($base & $mask);
		}
		# range of hosts (start & end)
		elsif ($ahp =~ /^([^-\/]*)-([^-\/]*)$/)
		{
			my ($start, $end) = ($1, $2);
			debug_message("checking against $start to $end");
			$start = ipv6_normalise($start);
			$end = ipv6_normalise($end);
			goto unknown if $start eq '' || $end eq '';
			return 1 if $client ge $start && $client le $end;
		}
		# unknown
		else
		{
			unknown:
			debug_message("Alert: $cfitem ($ahp) is bad");
            &sendrsp(500, "Configuration error");
			exit(4);
		}
	}
	return 0; # failed
}

sub sendrsp {
    my $code=shift;
    my $msg=shift;
    $msg="" if !defined($msg);
    
    my $initmsg=
    $cgi_mode ? 
    "Status: $code $msg\r\n" :
    "HTTP/1.1 $code $msg\r\n";
    
    $initmsg.="Connection: Keep-Alive\r\nAccept-Ranges: bytes\r\nKeep-Alive: timeout=15, max=100\r\n" if ($code ne 403);

    #debug_message("Sending Response: $initmsg");
    print $con $initmsg;

    my $altbit=0;
    for(@_) {
        $altbit=!$altbit;
        if($altbit) {
            #debug_message("$_: ");
            print $con $_.": ";
        }
        else {
            #debug_message($_."\r\n);
            print $con $_."\r\n";
        }
    }
    print $con "\r\n";

}

# DOS attack safe input reader
my @reqLineBuf;
my $reqTail;
sub getRequestLine {
    if($cgi_path) { 
        push(@reqLineBuf, "GET $cgi_path", "", undef); # undef stops operation
        undef $cgi_path; # don't re-add it
    }
    if(! @reqLineBuf) {
        my $buf="";

        # after every read at least one line MUST have been found. Read length
        # is large enough.

        my $n=sysread($source, $buf, 1024);
        $buf=$reqTail.$buf if(defined($reqTail));
        undef $reqTail;

        # pushes the lines found into the buffer. The last one may be incomplete,
        # extra handling below
        push(@reqLineBuf, split(/\r\n/, $buf, 1000) );

        # buf did not end in a line terminator so the last line is an incomplete
        # chunk. Does also work if \r and \n are separated
        if(substr($buf, -2) ne "\r\n") {
            $reqTail=pop(@reqLineBuf);
        }
    }
    return shift(@reqLineBuf);
}

# runs the get or head operations on the user agent
sub ua_act {
    my ($only_head, $vhost, $uri) = @_;

    my $url="http://$vhost$uri";
    
    &setup_agent;

    my $do_hopping = (exists $pathmap{$vhost});

    my $response;
    my $hostcand;

    RETRY_ACTION:

    # make the virtual hosts real. The list is reduced which is not so smart,
    # but since the fetcher process dies anyway it does not matter.
    if($do_hopping) {
        $hostcand = shift(@{$pathmap{$vhost}});
        debug_message("Candidate: $hostcand");
        $url=($hostcand =~ /^http:/?"" : "http://").$hostcand.$uri;
        #$url="http://$hostcand$uri";
        #$url="$hostcand$uri" if not $hostcand =~ /:\/\//;
    }

    debug_message("download agent: getting $url");
    
    if($only_head) {
        $response = $ua->head($url);
    }
    else {
        $response = $ua->get($url, ':content_cb' => \&get_callback, ':read_size_hint' => $getBufLen);
    }

    if($do_hopping) {
        # if okay or the last candidate failes, put it back into the list
        if($response->is_success || ! @{$pathmap{$vhost}} ) { 
            unshift(@{$pathmap{$vhost}}, $hostcand);
        }
        else {
            goto RETRY_ACTION;
        }
    }

    return $response;
}


sub setup_ownership {
    my $uid=$$cfg{user};
    my $gid=$$cfg{group};

    if($chroot) {
        if($uid || $gid) {
            # open them now, before it is too late
            # FIXME: reopening won't work, but the lose of file handles needs to be
            # made reproducible first
            &open_log_files;
        }
        chroot $chroot || die "Unable to chroot, aborting.\n";
        chdir $chroot;
    }

    if($gid) {
        if($gid=~/^\d+$/) {
            my $name=getgrgid($gid);
            die "Unknown group ID: $gid (exiting)\n" if !$name;
        }
        else {
            $gid=getgrnam($gid);
            die "No such group (exiting)\n" if !defined($gid);
        }
        $) = $gid;
        $( = $gid;
        $) =~ /^$gid\b/ && $( =~ /^$gid\b/ || barf("Unable to change group id");
    }

    if($uid) {
        if($uid=~/^\d+$/) {
            my $name=getpwuid($uid);
            die "Unknown user ID: $uid (exiting)\n" if !$name;
        }
        else {
            $uid=getpwnam($uid);
            die "No such user (exiting)\n" if !defined($uid);
        }
        $> = $uid;
        $< = $uid;
        $> == $uid && $< == $uid || barf("Unable to change user id");
    }
}
