/* -*- c-file-style: "java"; indent-tabs-mode: nil; fill-column: 78 -*-
 * 
 * distcc -- A simple distributed compiler system
 * $Header: /data/cvs/distcc/src/distcc.c,v 1.143 2003/01/27 12:13:06 mbp Exp $ 
 *
 * Copyright (C) 2002, 2003 by Martin Pool <mbp@samba.org>
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License as
 * published by the Free Software Foundation; either version 2 of the
 * License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
 * USA
 */


			/* 4: The noise of a multitude in the
			 * mountains, like as of a great people; a
			 * tumultuous noise of the kingdoms of nations
			 * gathered together: the LORD of hosts
			 * mustereth the host of the battle.
			 *		-- Isaiah 13 */



#include "config.h"

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <assert.h>
#include <string.h>
#include <fcntl.h>
#include <errno.h>

#include <sys/stat.h>
#include <sys/types.h>
#include <sys/wait.h>

#include "distcc.h"
#include "trace.h"
#include "io.h"
#include "rpc.h"
#include "exitcode.h"
#include "util.h"
#include "clinet.h"
#include "hosts.h"
#include "bulk.h"
#include "tempfile.h"
#include "strip.h"
#include "implicit.h"
#include "exec.h"
#include "where.h"
#include "lock.h"
#include "cpp.h"


/* Name of this program, for trace.c */
const char *rs_program_name = "distcc";


/**
 * @file
 *
 * Entry point for the distcc client.
 *
 * In most parts of this program, functions return 0 for success and something
 * from exitcode.h for failure.  However it is not completely consistent.
 *
 * @todo Make absolutely sure that if we fail, the .o file is removed.
 * Perhaps it would be better to receive to a temporary file and then
 * rename into place?  On the other hand, gcc seems to just write
 * directly, and if we fail or crash then Make ought to know not to
 * use it.
 *
 * @todo Count the preprocessor, and any compilations run locally, against the
 * load of localhost.  In doing this, make sure that we cannot deadlock
 * against a load limit, by having a case where we need to hold one lock and
 * take another to make progress.  I don't think there should be any such case
 * -- we can release the cpp lock before starting the main compiler.
 *
 * @todo If we have produced a .i file and need to fall back to running
 * locally then use that rather than the original source.
 **/


static void dcc_show_usage(void)
{
    dcc_show_version("distcc");
    dcc_show_copyright();
    printf(
"Usage:\n"
"   distcc [COMPILER] [compile options] -o OBJECT -c SOURCE\n"
"   distcc --help\n"
"\n"
"Options:\n"
"   COMPILER                   defaults to \"cc\"\n"
"   --help                     explain usage and exit\n"
"   --version                  show version and exit\n"
"\n"
"Environment variables:\n"
"   DISTCC_HOSTS=\"HOST ...\"\n"
"            list of volunteer hosts, should include localhost\n"
"   DISTCC_VERBOSE=1           give debug messages\n"
"   DISTCC_LOG=LOGFILE         send messages here, not stderr\n"
"   DISTCC_TCP_CORK=0          disable TCP corks\n"
"\n"
"Host specifications:\n"
"   localhost                  run in place\n"
"   HOST                       TCP connection, port %d\n"
"   HOST:PORT                  TCP connection, specified port\n"
"\n"
"distcc distributes compilation jobs across volunteer machines running\n"
"distccd.  Jobs that cannot be distributed, such as linking or \n"
"preprocessing are run locally.  distcc should be used with make's -jN\n"
"option to execute in parallel on several machines.\n",
    DISTCC_DEFAULT_PORT);
}


/*
 * Transmit header for whole request.
 */
static int dcc_x_req_header(int fd)
{
    return dcc_x_token_int(fd, "DIST", PROTO_VER);
}



static int dcc_x_argv(int fd, char **argv)
{
    int i;
    int argc;
    
    argc = dcc_argv_len(argv);
    
    if (dcc_x_token_int(fd, "ARGC", argc))
        return -1;
    
    for (i = 0; i < argc; i++) {
        if (dcc_x_token_int(fd, "ARGV", (unsigned) strlen(argv[i])))
            return -1;
        if (dcc_write_str(fd, argv[i]))
            return -1;
    }

    return 0;
}


static int dcc_send_job(int net_fd,
                        char **argv,
                        pid_t cpp_pid,
                        int *status,
                        const char *cpp_fname)
{
    long stime_usec, utime_usec;
    int ret;

    if ((ret = dcc_x_req_header(net_fd))
        || (ret = dcc_x_argv(net_fd, argv)))
        return ret;

    if (cpp_pid) {
        /* Wait for cpp to finish (if not already done), check the
         * result, then send the .i file */
        
        if ((ret = dcc_collect_child(cpp_pid, status, &utime_usec, &stime_usec))
            || (ret = dcc_report_rusage("cpp", utime_usec, stime_usec)))
            return ret;

        /* Although cpp failed, there is no need to try running the command
         * locally, because we'd presumably get the same result.  Therefore
         * critique the command and log a message and return an indication
         * that compilation is complete. */
        if (dcc_critique_status(*status, "cpp", dcc_gethostname()))
            return 0;
    }

    /* TODO: Add test case for this marginal case: cpp returns 0, but does not
     * create an output file. */
    if (access(cpp_fname, R_OK) != 0) {
        rs_log_error("can't read cpp output \"%s\": %s",
                     cpp_fname, strerror(errno));
        return EXIT_IO_ERROR;
    }

    if ((ret = dcc_x_file(net_fd, cpp_fname, "DOTI", NULL)))
        return ret;

    rs_trace("client finished sending request to server");

    return 0;
}


/**
 * Send job with socket corked.
 *
 * Make sure to uncork on return.  For success, this is necessary to
 * make sure the whole request gets pushed to the other side quickly.
 *
 * For failure, we need to uncork the socket to work around a bug in
 * Linux 2.2 that causes the socket to get stuck in FIN_WAIT1 if it is
 * closed while corked.  Unfortunately, this is not a full solution:
 * if distcc crashes or is killed the same situation will pertain.
 *
 * http://marc.theaimsgroup.com/?l=linux-netdev&r=1&b=200209&w=2
 **/
static int dcc_send_job_corked(int net_fd,
                               char **argv,
                               pid_t cpp_pid,
                               int *status,
                               const char *cpp_fname)
{
    int ret;

    tcp_cork_sock(net_fd, 1);

    ret = dcc_send_job(net_fd, argv, cpp_pid, status, cpp_fname);
    
    tcp_cork_sock(net_fd, 0);
    
    return ret;
}


static int dcc_retrieve_results(int net_fd, int *status, const char *output_fname)
{
    int len;
    int ret;
    int o_len;

    if ((ret = dcc_r_result_header(net_fd))
        || (ret = dcc_r_cc_status(net_fd, status))
        || (ret = dcc_r_token_int(net_fd, "SERR", &len))
        || (ret = dcc_r_file_body(STDERR_FILENO, net_fd, len))
        || (ret = dcc_r_token_int(net_fd, "SOUT", &len))
        || (ret = dcc_r_file_body(STDOUT_FILENO, net_fd, len))
        || (ret = dcc_r_token_int(net_fd, "DOTO", &o_len)))
        return ret;

    /* Previously we would skip retrieving the .o file unless the compiler
     * completed successfully, but it seems cleaner to have the protocol
     * always the same and to always drain the network.  The server always
     * sends an 0 byte file on failure anyhow. */

    if (o_len)
        return dcc_r_file(net_fd, output_fname, o_len);
    else {
        rs_log_notice("skipping retrieval of 0 byte object file %s",
                      output_fname);
        return 0;
    }
}


/**
 * Pass a compilation across the network.
 *
 * When this function is called, the preprocessor has already been
 * started in the background.  It may have already completed, or it
 * may still be running.  The goal is that preprocessing will overlap
 * with setting up the network connection, which may take some time
 * but little CPU.
 *
 * If this function fails, compilation will be retried on the local
 * machine.
 *
 * @param argv Compiler command to run.
 *
 * @param cpp_fname Filename of preprocessed source.  May not be complete yet,
 * depending on @p cpp_pid.
 *
 * @param output_fname File that the object code should be delivered to.
 * 
 * @param cpp_pid If nonzero, the pid of the preprocessor.  Must be
 * allowed to complete before we send the input file.
 *
 * @param host Definition of host to send this job to.
 *
 * @param status on return contains the wait-status of the remote
 * compiler.
 *
 * @return 0 on success, otherwise error.  Returning nonzero does not
 * necessarily imply the remote compiler itself succeeded, only that
 * there were no communications problems. 
 **/
static int dcc_compile_remote(char **argv, 
                              char *cpp_fname, char *output_fname,
                              pid_t cpp_pid,
                              const struct dcc_hostdef *host,
                              int *status)
{
    int net_fd;
    int ret;

    *status = 0;

    dcc_note_execution(host->hostname, argv);
    if ((ret = dcc_open_socket_out(host->hostname, host->port, &net_fd)) != 0)
        return ret;

    /* This waits for cpp and puts its status in *status.  If cpp failed, then
     * the connection will have been dropped and we need not bother trying to
     * get any response from the server. */
    ret = dcc_send_job_corked(net_fd, argv, cpp_pid, status, cpp_fname);
    if (ret == 0 && *status == 0) {
        ret = dcc_retrieve_results(net_fd, status, output_fname);
    }

    /* Close socket so that the server can terminate, rather than
     * making it wait until we've finished our work. */
    dcc_close(net_fd);

    return ret;
}


/**
 * Invoke a compiler locally.  This is, obviously, the alternative to
 * dcc_compile_remote().
 *
 * The server does basically the same thing, but it doesn't call this
 * routine because it wants to overlap execution of the compiler with
 * copying the input from the network.
 *
 * This routine used to exec() the compiler in place of distcc.  That
 * is slightly more efficient, because it avoids the need to create,
 * schedule, etc another process.  The problem is that in that case we
 * can't clean up our temporary files, and (not so important) we can't
 * log our resource usage.
 **/
static int dcc_compile_local(char *argv[])
{
    pid_t pid;
    int ret;
    int status;
    long u_us, s_us;
    
    char *buildhost = "localhost";

    /* FIXME: This has the problem that if we originally want to build on some
     * other machine, but then fall back to building locally (e.g. because of
     * network error), the client will run with both local and remote locks
     * held.  That shouldn't cause problems, but it will skew load balancing.
     * Probably not a problem since the remote machine is presumably having
     * trouble anyhow. */

    dcc_lock_local("compiler");

    dcc_note_execution(buildhost, argv);

    /* We don't do any redirection of file descriptors when running locally,
     * so if for example cpp is being used in a pipeline we should be fine. */
    if ((ret = dcc_spawn_child("compile", argv, &pid, NULL, NULL, NULL)) != 0)
        return ret;

    if ((ret = dcc_collect_child(pid, &status, &u_us, &s_us)))
        return ret;

    dcc_report_rusage(argv[0], u_us, s_us);
    return dcc_critique_status(status, "compile", dcc_gethostname());
}


/**
 * Execute the commands in argv remotely or locally as appropriate.
 *
 * We may need to run cpp locally; we can do that in the background
 * while trying to open a remote connection.
 *
 * This function is slightly inefficient when it falls back to running
 * gcc locally, because cpp may be run twice.  Perhaps we could adjust
 * the command line to pass in the .i file.  On the other hand, if
 * something has gone wrong, we should probably take the most
 * conservative course and run the command unaltered.  It should not
 * be a big performance problem because this should occur only rarely.
 *
 * @param argv Command to execute.  Does not include 0='distcc'.
 * Treated as read-only, because it is a pointer to the program's real
 * argv.
 *
 * @param status On return, contains the waitstatus of the compiler or
 * preprocessor.  This function can succeed (in running the compiler) even if
 * the compiler itself fails.  If either the compiler or preprocessor fails,
 * @p status is guaranteed to hold a failure value.
 **/
static int dcc_build_somewhere(char *argv[], int *status)
{
    char *input_fname, *output_fname, *cpp_fname;
    char **argv_stripped;
    pid_t cpp_pid = 0;
    int ret;
    struct dcc_hostdef *host = NULL;

    if (dcc_scan_args(argv, &input_fname, &output_fname, &argv) != 0) {
        /* we need to scan the arguments even if we already know it's
         * local, so that we can pick up distcc client options. */
        goto run_local;
    }

    if ((ret = dcc_pick_buildhost("compile", &host)) != 0) {
        /* doesn't happen at the moment */
        goto fallback;
    }

    if (host->mode == DCC_MODE_LOCAL)
        goto run_local;
    
    if ((ret = dcc_cpp_maybe(argv, input_fname, &cpp_fname, &cpp_pid) != 0))
        goto fallback;

    if ((ret = dcc_strip_local_args(argv, &argv_stripped)))
        goto fallback;

    if ((ret = dcc_compile_remote(argv_stripped, cpp_fname, output_fname,
                                  cpp_pid, host, status)) != 0) {
        /* Returns zero if we successfully ran the compiler, even if
         * the compiler itself bombed out. */
        goto fallback;
    }
        
    return dcc_critique_status(*status, argv[0], host->hostname);

  fallback:
    /* "You guys are so lazy!  Do I have to do all the work myself??" */
    rs_log_warning("failed to distribute to \"%s\", running locally instead",
                   host && host->hostname ? host->hostname : "(unknown)");
    
  run_local:
    return dcc_compile_local(argv);
}


/**
 * Send trace to append to the file specified by DISTCC_LOG.  If
 * that's something you didn't want to write to, tough.
 *
 * The exact setting of log level is a little strange, but for a good
 * reason: if you ask for verbose, you get everything.  Otherwise, if
 * you set a file, you get INFO and above.  Otherwise, you only get
 * WARNING messages.  In practice this seems to be a nice balance.
 **/
static void dcc_set_trace_from_env(void)
{
    const char *logfile;

    if ((logfile = getenv("DISTCC_LOG"))) {
        int fd;

        rs_trace_set_level(RS_LOG_INFO);

        fd = open(logfile, O_WRONLY|O_APPEND|O_CREAT, 0666);
        if (fd == -1) {
            rs_log_error("failed to open logfile %s: %s",
                         logfile, strerror(errno));
        }
        rs_add_logger(rs_logger_file, RS_LOG_DEBUG, NULL, fd);
        rs_trace_set_level(RS_LOG_INFO);
    } else {
        rs_trace_set_level(RS_LOG_WARNING);
        rs_add_logger(rs_logger_file, RS_LOG_DEBUG, NULL, STDERR_FILENO);
    }

    if (dcc_getenv_bool("DISTCC_VERBOSE", 0)) {
        rs_trace_set_level(RS_LOG_DEBUG);
    }
}


/**
 * distcc client entry point.
 *
 * This is typically called by make in place of the real compiler.
 *
 * Performs basic setup and checks for distcc arguments, and then kicks of
 * dcc_build_somewhere().
 **/
int main(int argc, char **argv)
{
    int status;
    char **compiler_args;

    atexit(dcc_cleanup_tempfiles);

    dcc_set_trace_from_env();

    if (argc <= 1 || !strcmp(argv[1], "--help")) {
        dcc_show_usage();
        exit(0);
    } else if (!strcmp(argv[1], "--version")) {
        dcc_show_version("distcc");
        exit(0);
    }

    dcc_recursion_safeguard();

    dcc_find_compiler(argv, &compiler_args);

    dcc_exit(dcc_build_somewhere(compiler_args, &status));
}
