/* -*- c-file-style: "java"; indent-tabs-mode: nil -*-
 * 
 * distcc -- A simple distributed compiler system
 * $Header: /data/cvs/distcc/src/where.c,v 1.20 2002/06/21 09:48:06 mbp Exp $ 
 *
 * Copyright (C) 2002 by Martin Pool <mbp@samba.org>
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License as
 * published by the Free Software Foundation; either version 2 of the
 * License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
 * USA
 */


                /* His hand is stretched out, and who shall turn it
                 * back?  -- Isaiah 14:27 */

    
/**
 * @file
 *
 * Routines to look at the <tt>$DISCC_HOSTS</tt> and decide where to
 * run a job.
 *
 * We use a simple locking algorithm to try to make sure work is fairly
 * evenly distributed across all hosts.
 *
 * For each destination host, we keep a lock file in the temporary
 * directory.  A lock is held on that file all the time that a client
 * is connected to it, so that other invocations of distcc on the same
 * client machine by the same user can synchronize with it.
 *
 * There are an unbounded number of lock files for each volunteer,
 * from 0 up.  We repeatedly walk through all the volunteers in order
 * until we find a lock file that is not locked; we then try to
 * connect to that server.
 *
 * So whatever -j value you give to make, you should end up with
 * roughly that number of tasks running at any time, except that
 * remote tasks may be deferred in their accept queue until the
 * machine is ready to serve them.
 *
 * We use locks rather than e.g. deleting files because we want to
 * make sure that the lock will be removed if the client terminates
 * unexpectedly.  There is in fact no explicit unlock: the lock goes
 * away when the client terminates.
 *
 * The files themselves (as opposed to the lock on them) is never
 * cleaned up; since locking & creation is nonatomic I can't think of
 * a clean way to do it.  There shouldn't be many of them, and dead
 * ones will be caught by the tmpreaper.  In any case they're zero
 * bytes.
 *
 * Semaphores might work well here, but the interface is a bit ugly
 * and they have a reputation for being nonportable.
 */

#define _GNU_SOURCE

#include "config.h"

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <assert.h>
#include <string.h>
#include <fcntl.h>
#include <errno.h>
#include <time.h>

#include <sys/stat.h>
#include <sys/file.h>

#include "distcc.h"
#include "trace.h"
#include "io.h"
#include "util.h"
#include "rpc.h"
#include "hosts.h"

#define MAX_BUILDHOSTS 256

char *all_hosts[MAX_BUILDHOSTS];
int n_hosts;


/* TODO: Write a test harness for the host selection algorithm.
 * Perhaps a really simple simulation of machines taking different
 * amounts of time to build stuff? */


static int dcc_parse_hosts_env(void)
{
    char *running;
    char *where;

    bzero(all_hosts, sizeof all_hosts);

    where = getenv("DISTCC_HOSTS");
    if (!where) goto dud;
    
    running = strdup(where);
    if (!running) {
        rs_log_error("failed to strdup");
        return -1;
    }

    for (n_hosts = 0; n_hosts < MAX_BUILDHOSTS; n_hosts++) {
        /* FIXME: This doesn't handle very well the case there is more
         * than one space, or no fields at all.  Rewrite to avoid
         * that. */

        /* TODO: Write a test harness that allows us to test the
         * parser directly. */
        if ((all_hosts[n_hosts] = strsep(&running, " ")) == NULL) 
            break;

        if (strchr(all_hosts[n_hosts], '/')) {
            rs_log_error("$DISTCC_HOSTS contains naughty character '/'");
            return -1;
        }
    }
            
    if (n_hosts == 0)
        goto dud;

    return 0;
    
dud:
    rs_log_warning("$DISTCC_HOSTS is empty or undefined; can't distribute work");
    return -1;
}


static int dcc_try_lock_host(const char *host, int iter)
{
    const char *tempdir;
    char *fname;
    int fd;
    int ret;

    tempdir = dcc_get_tempdir();
    asprintf(&fname, "%s/lock_%s_%07d", tempdir, host, iter);

    /* XXX: tridge warns that O_CREAT ought to use O_EXCL */
    /* create if it doesn't exist */
    if ((fd = creat(fname, 0600)) == -1) {
        rs_log_error("failed to creat %s: %s", fname, strerror(errno));
        goto bomb;
    }

#if defined(HAVE_FLOCK)
    ret = flock(fd, LOCK_EX|LOCK_NB);
#elif defined(HAVE_LOCKF)
    ret = lockf(fd, F_TLOCK, 0);
#else
#  error "No supported lock method.  Please port this code."
#endif
    if (ret != -1) {
        rs_trace("locked %s", fname);
        free(fname);
        return 0;
    } else
        switch (errno) {
#ifdef EWOULDBLOCK
        case EWOULDBLOCK:
#endif
#if defined(EAGAIN) && EAGAIN != EWOULDBLOCK
        case EAGAIN:
#endif
            rs_trace("%s already locked", fname);
            break;
        default:
            rs_log_error("flock %s failed: %s", fname, strerror(errno));
            break;
    }

    bomb:
    free(fname);
    return -1;
}


int dcc_pick_buildhost(char **buildhost)
{
    int i_try, i_host;

    if (dcc_parse_hosts_env() == -1)
        return -1;
    
    for (i_try = 0; i_try < 50; i_try++) {
        for (i_host = 0; i_host < n_hosts; i_host++) {
            *buildhost = all_hosts[i_host];
            if (dcc_try_lock_host(*buildhost, i_try) == 0) {
                rs_trace("building on %s", *buildhost);
                return 0;
            }
        }
    }

    rs_log_error("couldn't lock any host!");
    return -1;
}
