/* -*- c-file-style: "java"; indent-tabs-mode: nil -*-
 * 
 * distcc -- A simple distributed compiler system
 * $Header: /data/cvs/distcc/src/where.c,v 1.42 2002/11/12 20:01:18 mbp Exp $ 
 *
 * Copyright (C) 2002 by Martin Pool <mbp@samba.org>
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License as
 * published by the Free Software Foundation; either version 2 of the
 * License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
 * USA
 */


                /* His hand is stretched out, and who shall turn it back?
                 * -- Isaiah 14:27 */

    
/**
 * @file
 *
 * Routines to decide on which machine to run a distributable job.
 *
 * We choose from the defined hosts, allowing for the fact that some
 * machines can run more tasks than others.
 *
 * We use a simple locking algorithm to try to make sure work is
 * fairly evenly distributed across all slots.
 *
 * For each slot, we keep a lock file in the temporary directory.  A
 * lock is held on that file all the time that a client is connected
 * to it, so that other invocations of distcc on the same client
 * machine by the same user can synchronize with it.
 *
 * So whatever -j value you give to make, you should end up with
 * roughly that number of tasks running at any time, except that
 * remote tasks may be deferred in their accept queue until the
 * machine is ready to serve them.
 *
 * We use locks rather than e.g. a database or a central daemon
 * because we want to make sure that the lock will be removed if the
 * client terminates unexpectedly.  There is in fact no explicit
 * unlock: the lock goes away when the client terminates.
 *
 * The files themselves (as opposed to the lock on them) are never
 * cleaned up; since locking & creation is nonatomic I can't think of
 * a clean way to do it.  There shouldn't be many of them, and dead
 * ones will be caught by the tmpreaper.  In any case they're zero
 * bytes.
 *
 * Semaphores might work well here, but the interface is a bit ugly
 * and they have a reputation for being nonportable.
 *
 * TODO: Write a test harness for the host selection algorithm.
 * Perhaps a really simple simulation of machines taking different
 * amounts of time to build stuff?
 *
 * FIXME: If we don't get any of the locks, pause, rather than dying.
 * This is in fact a special case of load-limiting.  This can
 * sometimes make the concurrent-compile case fail if too many tasks
 * get scheduled in a narrow window.
 */

#define _GNU_SOURCE

#include "config.h"

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <assert.h>
#include <string.h>
#include <fcntl.h>
#include <errno.h>
#include <time.h>

#include <sys/stat.h>
#include <sys/file.h>

#include "distcc.h"
#include "trace.h"
#include "util.h"
#include "hosts.h"
#include "tempfile.h"
#include "lock.h"
#include "where.h"


/* TODO: Write a test harness for the host selection algorithm.
 * Perhaps a really simple simulation of machines taking different
 * amounts of time to build stuff?
 *
 * FIXME: If we don't get any of the locks, pause, rather than dying.
 * This is in fact a special case of load-limiting.  This can
 * sometimes make the concurrent-compile case fail if too many tasks
 * get scheduled in a narrow window.
 *
 * TODO: Perhaps take a lock on a host only while trying to connect to
 * it.  If we don't use fifos on the server, this ought to make sure
 * that one task gets a monopoly on network bandwidth to that server.
 * If the server locks us out until it has room, then only that single
 * task will be blocked.  */

int dcc_pick_buildhost(const char *what,
                       struct dcc_hostdef **buildhost)
{
    struct dcc_hostdef *hostlist;
    int ret;
    int n_hosts;
    
    if ((ret = dcc_parse_hosts_env(&hostlist, &n_hosts)) != 0) {
        /* an error occured; but let's be helpful and build locally
         * rather than giving up. */
        *buildhost = (struct dcc_hostdef *) dcc_hostdef_local;
        return 0;
    }

    return dcc_lock_one(what, hostlist, buildhost);
}


int dcc_lock_one(const char *what,
                 struct dcc_hostdef *hostlist,
                 struct dcc_hostdef **buildhost)
{
    int i_try;
    struct dcc_hostdef *h;

    for (i_try = 0; i_try < 50; i_try++) {
        for (h = hostlist; h; h = h->next) {
            if (i_try >= h->n_slots)
                continue;       /* already full */
            
            if (dcc_lock_host(h, i_try, 0) == 0) {
                rs_trace("%s on %s", what, h->hostname);
                *buildhost = h;
                return 0;
            }
        }
    }

    return dcc_random_block(what, hostlist, buildhost);
}


static int dcc_count_slots(struct dcc_hostdef *hostlist)
{
    struct dcc_hostdef *h;
    int total = 0;

    for (h = hostlist; h; h = h->next)
        total += h->n_slots;

    return total;
}


/**
 * Pick a slot at random, and block waiting for it to be free
 **/
int dcc_random_block(const char *what,
                     struct dcc_hostdef *hostlist,
                     struct dcc_hostdef **buildhost)
{
    int total_slots;
    int s;
    struct dcc_hostdef *h;

    total_slots = dcc_count_slots(hostlist);
    s = (getppid() + getpid()) % total_slots;
    rs_trace("total_slots=%d, s=%d", total_slots, s);

    for (h = hostlist; h; h = h->next) {
        if (s < h->n_slots) {
            int ret;
            *buildhost = h;
            rs_trace("blocking lock for %s on %s slot %d", what,
                     h->hostname, s);
            ret = dcc_lock_host(h, s, 1);
            rs_trace("returned from blocking lock");
            return ret;
        }
        else {
            s -= h->n_slots;
        }
    }

    rs_fatal("somehow failed to lock anything");
}




/**
 * Lock localhost.  Used to get the right balance of jobs when some of
 * them must be local.
 **/
int dcc_lock_local(const char *what)
{
    struct dcc_hostdef *chosen;
    
    return dcc_lock_one(what, dcc_hostdef_local, &chosen);
}
