/*   $Source: bitbucket.org:berkeleylab/gasnet.git/other/amudp/amudp_spmd.cpp $
 * Description: AMUDP Implementations of SPMD operations (bootstrapping and parallel job control)
 * Copyright 2000, Dan Bonachea <bonachea@cs.berkeley.edu>
 */

#include <amudp_internal.h>
#include <amudp_spmd.h>

#include <errno.h>
#include <stdio.h>
#if PLATFORM_OS_MSWINDOWS
  #undef sched_yield
  #define sched_yield() Sleep(0)
  #define sleep(x) Sleep(x*1000)
  #include <process.h>
#else
  #include <unistd.h>
  #if PLATFORM_ARCH_CRAYT3E || PLATFORM_OS_SUPERUX || PLATFORM_OS_NETBSD || \
      PLATFORM_OS_MTA || PLATFORM_OS_BLRTS || PLATFORM_OS_CATAMOUNT || PLATFORM_OS_OPENBSD
    /* these implement sched_yield() in libpthread only, which we may not want */
    #undef sched_yield
    #define sched_yield() sleep(0)
  #else
    #include <sched.h>
  #endif
  #if (PLATFORM_OS_LINUX || PLATFORM_OS_UCLINUX) && !defined(__USE_GNU)
    /* some Linuxes need this to pull in F_SETSIG */
    #define __USE_GNU
    #include <fcntl.h>
    #undef __USE_GNU
  #else
    #include <fcntl.h>
  #endif
  #ifdef AMUDP_BLCR_ENABLED
    #include <sys/types.h>
    #include <sys/stat.h>
    #include <unistd.h>
  #endif
#endif

extern char **environ; 

#include "sockutil.h"
#include "socklist.h"
#include "sig.h"

#define FD_STDIN 0
#define FD_STDOUT 1
#define FD_STDERR 2

#ifndef FREEZE_SLAVE
#define FREEZE_SLAVE  0
#endif
volatile bool amudp_frozen = true;
/*  all this to make sure we get a full stack frame for debugger */
static void _freezeForDebugger(int depth) {
  if (!depth) _freezeForDebugger(1);
  else {
    volatile int i = 0;
    while (amudp_frozen) {
      i++;
      sleep(1);
    }
  }
}
static void freezeForDebugger() {
  char name[255];
  gethostname(name, 255);
  fprintf(stderr,"slave frozen for debugger: host=%s  pid=%i\n", name, (int)getpid()); fflush(stderr);
  _freezeForDebugger(0);
}

#if AMUDP_DEBUG_VERBOSE
  #define DEBUG_SLAVE(msg)  do { fprintf(stderr,"slave %i: %s\n", AMUDP_SPMDMYPROC, msg); fflush(stderr); } while(0)
  #define DEBUG_MASTER(msg) do { fprintf(stderr,"master: %s\n", msg); fflush(stderr); } while(0)
#else
  #define DEBUG_SLAVE(msg)  do {} while(0) /* prevent silly warnings about empty statements */
  #define DEBUG_MASTER(msg) do {} while(0)
#endif

#define AMUDP_SPMDSLAVE_ARGS "AMUDP_SLAVE_ARGS"

static int AMUDP_SPMDShutdown(int exitcode);

/* master only */
  static SOCKET AMUDP_SPMDListenSocket = INVALID_SOCKET; /* TCP bootstrapping listener */
  static SOCKET AMUDP_SPMDStdinListenSocket = INVALID_SOCKET; 
  static SOCKET AMUDP_SPMDStdoutListenSocket = INVALID_SOCKET; 
  static SOCKET AMUDP_SPMDStderrListenSocket = INVALID_SOCKET; 
  static SOCKET *AMUDP_SPMDSlaveSocket = NULL; /* table of TCP control sockets */
  static en_t *AMUDP_SPMDTranslation_name = NULL; 
  static tag_t *AMUDP_SPMDTranslation_tag = NULL; /* network byte order */
  int AMUDP_SPMDSpawnRunning = FALSE; /* true while spawn is active */
  int AMUDP_SPMDRedirectStdsockets; /* true if stdin/stdout/stderr should be redirected */

/* slave only */
  SOCKET AMUDP_SPMDControlSocket = INVALID_SOCKET; 
  static ep_t AMUDP_SPMDEndpoint = NULL;
  static eb_t AMUDP_SPMDBundle = NULL;
  static en_t AMUDP_SPMDName = {0};
  volatile int AMUDP_SPMDIsActiveControlSocket = 0; 
  static SOCKET newstdin = INVALID_SOCKET;
  static SOCKET newstdout = INVALID_SOCKET;
  static SOCKET newstderr = INVALID_SOCKET;
  static int AMUDP_SPMDMYPROC = AMUDP_PROCID_NEXT; /* -1 requests next avail procid */
  static volatile int AMUDP_SPMDBarrierDone = 0; /* flag barrier as complete */
  static volatile int AMUDP_SPMDGatherDone = 0;  /* flag gather as complete */
  static volatile int AMUDP_SPMDGatherLen = 0;
  static void * volatile AMUDP_SPMDGatherData = NULL;
  int AMUDP_SPMDwakeupOnControlActivity = 0;
  int AMUDP_FailoverAcksOutstanding = 0;


/* master & slave */
  static int AMUDP_SPMDStartupCalled = 0;
  static int AMUDP_SPMDNUMPROCS = -1;
  static char *AMUDP_SPMDMasterEnvironment = NULL;

#ifdef AMUDP_BLCR_ENABLED
/* checkpoint/restart */
  int AMUDP_SPMDRestartActive = 0;
  static int AMUDP_SPMDNetworkDepth = 0;
#else
  #define AMUDP_SPMDRestartActive 0
#endif

// used to pass info - always stored in network byte order
// fields carefully ordered by size to avoid cross-platform struct packing differences
typedef struct {
  double faultInjectionRate; // AMUDP_FaultInjectionRate

  uint64_t networkpid;  // globally unique pid

  tag_t tag;            // tag for this processor
  
  int32_t procid;       // id for this processor
  int32_t numprocs;     // num procs in job

  int32_t depth;        // network depth
  uint32_t environtablesz; // size of environment table we're about to send

  uint16_t stdinMaster; // address of stdin listener
  uint16_t stdoutMaster; // address of stdout listener
  uint16_t stderrMaster; // address of stderr listener
  uint16_t _pad1; // ensure platform-independent table size

} AMUDP_SPMDBootstrapInfo_t;

/*
  Protocol for TCP bootstrapping/control sockets
  initialization: 
    slave->master (int32) - send my procid for init
    slave->master (en_t) - send my endpoint name for init
   if received procid == AMUDP_PROCID_ALLOC
    master->slave (int32 next_rank++)
   else
    master->slave (int32 sizeof(AMUDP_SPMDBootstrapInfo_t))
    master->slave (AMUDP_SPMDBootstrapInfo_t) 
    master->slave (AMUDP_SPMDTranslation_name (variable size)) 
    master->slave (AMUDP_SPMDTranslation_tag (variable size)) 
    master->slave (AMUDP_SPMDMasterEnvironment (variable size)) 

  master->slave messages
    "E"(int32 exitcode) - die now with this exit code
    "F"(int32 i)(old en_t)(new en_t) - slave i's NIC just failed over to new en_t
    "A"(int32 i) - (to slave i) slave acknowledged fail-over of slave i's NIC
    "B" - barrier complete
    "G"(int32 perproclen)(data) - end an AllGather, here's the result

  slave->master messages
    "E"(int32 exitcode) - exit with this code
    "F"(int32 i)(old en_t)(new en_t) - slave i's NIC just failed over to new en_t
    "A"(int32 i) - acknowledge fail-over of slave i's NIC
    "B" - enter barrier
    "G"(int32 i)(int32 perproclen)(data) - slave i begin an AllGather, here's the length and my data
*/
/* ------------------------------------------------------------------------------------ 
 *  misc helpers
 * ------------------------------------------------------------------------------------ */
static void flushStreams(const char *context) {
  if (!context) context = "flushStreams()";

  if (fflush(NULL)) { /* passing NULL to fflush causes it to flush all open FILE streams */
    perror("fflush");
    AMUDP_FatalErr("failed to fflush(NULL) in %s", context); 
  }
  if (fflush(stdout)) {
    perror("fflush");
    AMUDP_FatalErr("failed to flush stdout in %s", context); 
  }
  if (fflush(stderr)) {
    perror("fflush");
    AMUDP_FatalErr("failed to flush stderr in %s", context); 
  }
  fsync(STDOUT_FILENO); /* ignore errors for output is a console */
  fsync(STDERR_FILENO); /* ignore errors for output is a console */

  static int do_sync = -1;
  if (do_sync < 0) {
    /* Approximate match to GASNet's acceptance of 'Y|YES|y|yes|1' */
    char *envval = AMUDP_getenv_prefixed_withdefault("FS_SYNC", "NO");
    char c = envval[0];
    do_sync = ((c == '1') || (c == 'y') || (c == 'Y'));
  }
  if (do_sync) {
  #if PLATFORM_OS_MTA
    mta_sync();
  #elif !PLATFORM_OS_CATAMOUNT
    sync();
  #endif
  }
  sched_yield();
}
//------------------------------------------------------------------------------------
extern char *AMUDP_enStr(en_t en, char *buf) {
  AMUDP_assert(buf != NULL);
  #ifdef UETH
    sprintf(buf, "(fixed: %i variable: %i)", en.fixed, en.variable.index);
  #else
    SockAddr tmp((sockaddr*)&en);
    sprintf(buf, "(%s:%i)", tmp.IPStr(), tmp.port());
  #endif
  return buf;
}
extern char *AMUDP_tagStr(tag_t tag, char *buf) {
  AMUDP_assert(buf != NULL);
  sprintf(buf, "0x%08x%08x", 
    (int)(uint32_t)(tag >> 32), 
    (int)(uint32_t)(tag & 0xFFFFFFFF));
  return buf;
}
extern const char *sockErrDesc() {
  return errorCodeString(getSocketErrorCode());
}
//------------------------------------------------------------------------------------
static void setupStdSocket(SOCKET& ls, SocketList& list, SocketList& allList) {
  if ((int)list.getCount() < AMUDP_SPMDNUMPROCS) {
    SockAddr remoteAddr;
    SOCKET newsock = accept_socket(ls, remoteAddr);
    list.insert(newsock);
    allList.insert(newsock);
    if ((int)list.getCount() == AMUDP_SPMDNUMPROCS) {
      // close listener
      close_socket(ls);
      allList.remove(ls);
      ls = INVALID_SOCKET;
    }
  } else AMUDP_Err("master detected some unrecognized activity on a std listener");
}
//------------------------------------------------------------------------------------
static void handleStdOutput(FILE *fd, fd_set *psockset, SocketList& list, SocketList& allList, int nproc) {
  int numset;
  static SOCKET *tempSockArr = NULL;
  if (!tempSockArr) tempSockArr = (SOCKET *)AMUDP_malloc(sizeof(SOCKET)*nproc);
  if ((numset = list.getIntersection(psockset, tempSockArr, nproc))) { // we have some active std sockets
    for (int i=0; i < numset; i++) {
      SOCKET s = tempSockArr[i];
      AMUDP_assert(FD_ISSET(s, psockset));
      #ifndef AMUDP_STD_BUFSZ
      #define AMUDP_STD_BUFSZ 1024
      #endif
      static char buf[AMUDP_STD_BUFSZ+1];
      ssize_t sz = recv(s, buf, AMUDP_STD_BUFSZ, 0);
      AMUDP_assert(sz <= AMUDP_STD_BUFSZ);
      if (sz == SOCKET_ERROR) {
        DEBUG_MASTER("recv error in handleStdOutput, closing.");
        close_socket(s);
      } else if (sz == 0) { // socket closed
        DEBUG_MASTER("dropping a std output socket...");
        list.remove(s);
        allList.remove(s);
      } else {
        AMUDP_assert(sz > 0);
        buf[sz] = '\0';
        #if AMUDP_DEBUG_VERBOSE
          fprintf(fd, "got some output: %s%s", buf, (buf[sz-1]=='\n'?"":"\n"));
        #else
          fwrite(buf, 1, sz, fd);
        #endif
        fflush(fd);
      }
    }
  }
}
//------------------------------------------------------------------------------------
#if USE_ASYNC_TCP_CONTROL
  static void AMUDP_SPMDControlSocketCallback(int sig) {
    AMUDP_SPMDIsActiveControlSocket = TRUE;
    #if AMUDP_DEBUG_VERBOSE
      fprintf(stderr, "got an AMUDP_SIGIO signal\n");fflush(stderr);
    #endif
    reghandler(AMUDP_SIGIO, AMUDP_SPMDControlSocketCallback);
  }
#endif
/* ------------------------------------------------------------------------------------ 
 *  basic inquiries
 * ------------------------------------------------------------------------------------ */
extern int AMUDP_SPMDNumProcs() {
  if (!AMUDP_SPMDStartupCalled) {
    AMUDP_Err("called AMUDP_SPMDNumProcs before AMUDP_SPMDStartup()");
    return -1;
  }
  AMUDP_assert(AMUDP_SPMDNUMPROCS >= 1);
  return AMUDP_SPMDNUMPROCS;
}
/* ------------------------------------------------------------------------------------ */
extern int AMUDP_SPMDMyProc() {
  if (!AMUDP_SPMDStartupCalled) {
    AMUDP_Err("called AMUDP_SPMDMyProc before AMUDP_SPMDStartup()");
    return -1;
  }
  AMUDP_assert(AMUDP_SPMDMYPROC >= 0);
  return AMUDP_SPMDMYPROC;
}
/* ------------------------------------------------------------------------------------ */
extern int AMUDP_SPMDIsWorker(char **argv) {
  if (AMUDP_SPMDStartupCalled) return 1; 
  else {
    const char *env_val = getenv(AMUDP_SPMDSLAVE_ARGS);
    return (env_val && (0 != atoi(env_val)));
  }
}
/* ------------------------------------------------------------------------------------ */
extern int AMUDP_SPMDStartup(int *argc, char ***argv,
                             int nproc, int networkdepth, 
                             amudp_spawnfn_t spawnfn,
                             uint64_t *networkpid,
                             eb_t *eb, ep_t *ep) {
  
  if (AMUDP_SPMDStartupCalled) AMUDP_RETURN_ERR(RESOURCE);
  /* we need a separate socklibinit for master 
     and to prevent AM_Terminate from murdering all our control sockets */
  if (!socklibinit()) AMUDP_RETURN_ERRFR(RESOURCE, AMUDP_SPMDStartup, "socklibinit() failed");

  const char *env_var = getenv(AMUDP_SPMDSLAVE_ARGS);
  const int slave_flag = env_var ? atoi(env_var) : 0;

  /* ------------------------------------------------------------------------------------ 
   *  I'm a master 
   * ------------------------------------------------------------------------------------ */
  if (! slave_flag) {
    int usingdefaultdegree = 0;
    uint64_t npid;
    if (nproc < 0 || nproc > AMUDP_MAX_SPMDPROCS) AMUDP_RETURN_ERR(BAD_ARG);

    if (!argc || !argv) AMUDP_RETURN_ERR(BAD_ARG);

    #if AMUDP_DEBUG_VERBOSE
      AMUDP_SilentMode = 0;
    #else
      AMUDP_SilentMode = !AMUDP_getenv_prefixed("VERBOSEENV");
    #endif

    /* defaulting */
    if (networkdepth < 0) AMUDP_RETURN_ERR(BAD_ARG);
    if (networkdepth == 0) {
      networkdepth = atoi(
        AMUDP_getenv_prefixed_withdefault("NETWORKDEPTH", _STRINGIFY(AMUDP_DEFAULT_NETWORKDEPTH)));
      if (networkdepth <= 0) networkdepth = AMUDP_DEFAULT_NETWORKDEPTH;
    }

    if (nproc == 0) { /* default to read from args */
      if (*argc > 1) nproc = atoi((*argv)[1]);
      if (nproc < 1) {
        fprintf(stderr, 
          "AMUDP SPMD Runtime Layer v%s, Copyright 2001, Dan Bonachea\n"
          "This program requires you specify the parallel degree\n"
          "as the first argument to %s\n" 
          , AMUDP_LIBRARY_VERSION_STR, (*argv)[0]);
        exit(1);
        AMUDP_RETURN_ERR(BAD_ARG);
      }
      
      usingdefaultdegree = 1;

      /* readjust params */
      (*argv)[1] = (*argv)[0];
      (*argv)++;
      (*argc)--;
    }

    AMUDP_SPMDNUMPROCS = nproc;

    { /* check job size */
      int maxtranslations = 0;
      int temp = AM_MaxNumTranslations(&maxtranslations);
      if (temp != AM_OK) {
        AMUDP_Err("Failed to AM_MaxNumTranslations() in AMUDP_SPMDStartup");
        AMUDP_RETURN(temp);
      } else if (AMUDP_SPMDNUMPROCS > maxtranslations) {
        AMUDP_Err("Too many nodes: AM_MaxNumTranslations (%d) less than number of requested nodes (%d)",
                maxtranslations, AMUDP_SPMDNUMPROCS);
        AMUDP_RETURN_ERR(RESOURCE);
      }
    }

    if (argv && !spawnfn && *argc > 1 && strlen((*argv)[1]) == 1) {
      for (int i=0; AMUDP_Spawnfn_Desc[i].abbrev; i++) {
        if (toupper((*argv)[1][0]) == toupper(AMUDP_Spawnfn_Desc[i].abbrev)) {
          spawnfn = AMUDP_Spawnfn_Desc[i].fnptr;
          break;
        }
      }
      if (spawnfn) {
        /* readjust params */
        (*argv)[1] = (*argv)[0];
        (*argv)++;
        (*argc)--;
      }
    }
    if (!spawnfn) {
      const char *spawnfn_str = AMUDP_getenv_prefixed_withdefault("SPAWNFN","S");
      if (spawnfn_str) {
        for (int i=0; AMUDP_Spawnfn_Desc[i].abbrev; i++) {
          if (toupper(spawnfn_str[0]) == toupper(AMUDP_Spawnfn_Desc[i].abbrev)) {
            spawnfn = AMUDP_Spawnfn_Desc[i].fnptr;
            break;
          }
        }
      }
    }
    if (!spawnfn) {
      fprintf(stderr, 
        "AMUDP SPMD Runtime Layer v%s, Copyright 2001, Dan Bonachea\n"
        "Usage: %s%s <spawnfn> program args...\n"
        " <spawnfn> = one of the following mechanisms for spawning remote workers:\n"
        , AMUDP_LIBRARY_VERSION_STR, (*argv)[0], (usingdefaultdegree?" <paralleldegree>":""));
      for (int i=0; AMUDP_Spawnfn_Desc[i].abbrev; i++) {
        fprintf(stderr, "    '%c'  %s\n",  
              toupper(AMUDP_Spawnfn_Desc[i].abbrev), AMUDP_Spawnfn_Desc[i].desc);
      }
      exit(1);
      AMUDP_RETURN_ERR(BAD_ARG);
    }

    // setup bootstrap info 
    AMUDP_SPMDBootstrapInfo_t bootstrapinfo;
    memset(&bootstrapinfo, 0, sizeof(bootstrapinfo)); // prevent valgrind warnings about sending uninit padding
    bootstrapinfo.numprocs = hton32(AMUDP_SPMDNUMPROCS);
    bootstrapinfo.depth = hton32(networkdepth);

    { char *faultRate = AMUDP_getenv_prefixed_withdefault("FAULT_RATE", "0.0");
      if (faultRate && atof(faultRate) != 0.0) {      
        bootstrapinfo.faultInjectionRate = atof(faultRate);
      } else bootstrapinfo.faultInjectionRate = 0.0;
      hton64a(&bootstrapinfo.faultInjectionRate);
    }

    const char *masterHostname = getMyHostName();
    if (!AMUDP_SilentMode) 
      printf("master host name: %s\n", masterHostname); fflush(stdout);

    // TCP socket lists
    SocketList allList(AMUDP_SPMDNUMPROCS*4+10); // a list of all active sockets
    SocketList coordList(AMUDP_SPMDNUMPROCS);    // a list of all coordination sockets
    SocketList stdinList(AMUDP_SPMDNUMPROCS);    // a list of all stdin routing sockets
    SocketList stdoutList(AMUDP_SPMDNUMPROCS);   // a list of all stdout routing sockets
    SocketList stderrList(AMUDP_SPMDNUMPROCS);   // a list of all stderr routing sockets
    AMUDP_SPMDSlaveSocket = (SOCKET*)AMUDP_malloc(AMUDP_SPMDNUMPROCS * sizeof(SOCKET));

    try {
      // create our TCP listen ports 
      unsigned short anyport = 0;
      AMUDP_SPMDListenSocket = listen_socket(anyport, false);
      AMUDP_SPMDStdinListenSocket = listen_socket(anyport, false);
      AMUDP_SPMDStdoutListenSocket = listen_socket(anyport, false);
      AMUDP_SPMDStderrListenSocket = listen_socket(anyport, false);
      bootstrapinfo.stdinMaster = hton16(getsockname(AMUDP_SPMDStdinListenSocket).port());
      bootstrapinfo.stdoutMaster = hton16(getsockname(AMUDP_SPMDStdoutListenSocket).port());
      bootstrapinfo.stderrMaster = hton16(getsockname(AMUDP_SPMDStderrListenSocket).port());
    } catch (xBase &exn) {
      AMUDP_RETURN_ERRFR(RESOURCE, AMUDP_SPMDStartup, exn.why());
    }

    allList.insert(AMUDP_SPMDListenSocket);
    allList.insert(AMUDP_SPMDStdinListenSocket);
    allList.insert(AMUDP_SPMDStdoutListenSocket);
    allList.insert(AMUDP_SPMDStderrListenSocket);

    { /* flatten a snapshot of the master's environment for transmission to slaves
       * here we assume the standard representation where a pointer to the environment 
       * is stored in a global variable 'environ' and the environment is represented as an array 
       * of null-terminated strings where each has the form 'key=value' and value may be empty, 
       * and the final string pointer is a NULL pointer
       * we flatten this into a list of null-terminated 'key=value' strings, 
       * terminated with a double-null
       */
      int i;
      int totalEnvSize = 0;
      for(i = 0; environ[i]; i++) 
        totalEnvSize += strlen(environ[i]) + 1;
      totalEnvSize++;

      AMUDP_SPMDMasterEnvironment = (char *)AMUDP_malloc(totalEnvSize);
      char *p = AMUDP_SPMDMasterEnvironment;
      p[0] = '\0';
      for(i = 0; environ[i]; i++) {
        strcpy(p, environ[i]);
        p += strlen(p) + 1;
      }
      *p = '\0';
      AMUDP_assert((p+1) - AMUDP_SPMDMasterEnvironment == totalEnvSize);
      bootstrapinfo.environtablesz = hton32(totalEnvSize);
    }

    // find the master addr
    SockAddr masterAddr = getsockname(AMUDP_SPMDListenSocket);
    const char *masterIPstr = AMUDP_getenv_prefixed_withdefault("MASTERIP", "");
    if (*masterIPstr) masterAddr = SockAddr(masterIPstr, masterAddr.port());
    if (masterAddr.IP() == 0) {
      try { /* requires master can resolve its own address */
        SockAddr dnsAddr = DNSLookup(getMyHostName());
        masterAddr = SockAddr(dnsAddr.IP(), masterAddr.port());
      } catch (xBase &exn) {
        AMUDP_Warn("Master %s failed to resolve its own hostname: %s%s",
          getMyHostName(),exn.why(),
          (USE_NUMERIC_MASTER_ADDR?"\nTry setting AMUDP_MASTERIP":"")); 
        AMUDP_RETURN_ERRFR(RESOURCE, AMUDP_SPMDStartup, exn.why());
      }
    }

    // setup NULL-terminated array of extra environment vars for slave
    // Currently have only one such variable, "AMUDP_SLAVE_ARGS":
    //          flag[,master,[network]]
    //     flag: zero = this is not a slave
    //           positive = this is a slave and value is verbosity (1 = not verbose)
    //           -1 = this is a slave performing restart
    //   master: IP or hostname of the master node (require if flag != 0)
    //  network: value of [PREFIX]_WORKERIP if given
    char slave_env[1024] = AMUDP_SPMDSLAVE_ARGS "=";
    strncat(slave_env,
            (AMUDP_SPMDRestartActive ? "-1," : (AMUDP_SilentMode ? "1," : "2,")),
            sizeof(slave_env) - 1);
    ssize_t remain = sizeof(slave_env) - (strlen(slave_env) + 1);
    if (*masterIPstr) {
      strncat(slave_env, masterAddr.FTPStr(), remain);
    }
    else {
      #if USE_NUMERIC_MASTER_ADDR
        strncat(slave_env, masterAddr.FTPStr(), remain);
      #else
        char *tmp = slave_env + strlen(slave_env);
        snprintf(tmp, remain, "%s:%i", masterHostname, masterAddr.port());
      #endif
    }
    remain = sizeof(slave_env) - (strlen(slave_env) + 1);
    strncat(slave_env, ",", remain);
    // append WORKERIP which it is needed before the master env is sent
    { char *network = AMUDP_getenv_prefixed_withdefault("WORKERIP","");
      if (network && network[0]) {
        #if HAVE_GETIFADDRS
          strncat(slave_env, network, remain-1);
        #else
          fprintf(stderr,"AMUDP: Warning: WORKERIP set in the environment, but your platform "
                         "lacks the required getifaddrs() support.  Ignoring WORKERIP.\n");
        #endif
      }
    }
    if (!remain) { // ran out of space!
      AMUDP_FatalErr("Error assembling arguments to SPMD worker threads. Exiting...");
    }
    char *extra_env[2] = { slave_env, NULL };

    { int masterpid = getpid();
      uint32_t masterIP = masterAddr.IP();
      npid = ((uint64_t)masterIP) << 32 | 
             (((uint64_t)masterpid) & 0xFFFF);
      bootstrapinfo.networkpid = hton64(npid);
      if (networkpid) *networkpid = npid;
    }

    // create and initialize the translation table that we'll fill in as slaves connect
    AMUDP_SPMDTranslation_name = (en_t*)AMUDP_malloc(AMUDP_SPMDNUMPROCS*sizeof(en_t));
    AMUDP_SPMDTranslation_tag = (tag_t*)AMUDP_malloc(AMUDP_SPMDNUMPROCS*sizeof(tag_t));
    for (int i=0; i < AMUDP_SPMDNUMPROCS; i++) {
      AMUDP_SPMDSlaveSocket[i] = INVALID_SOCKET;
      AMUDP_SPMDTranslation_tag[i] = hton64(npid | ((uint64_t)i) << 16);
    }

    AMUDP_SPMDRedirectStdsockets = strcmp(AMUDP_getenv_prefixed_withdefault("ROUTE_OUTPUT",(DISABLE_STDSOCKET_REDIRECT?"0":"1")),"0");

    // call system-specific spawning routine
    AMUDP_SPMDSpawnRunning = TRUE;
    if (!spawnfn(AMUDP_SPMDNUMPROCS, *argc, *argv, extra_env))
      AMUDP_FatalErr("Error spawning SPMD worker threads. Exiting...");
    AMUDP_SPMDSpawnRunning = FALSE;

    if (!AMUDP_SPMDRedirectStdsockets) {
      // spawn function disabled our stdsocket redirect - signal the slaves of this fact
      bootstrapinfo.stdinMaster = hton16(0);
      bootstrapinfo.stdoutMaster = hton16(0);
      bootstrapinfo.stderrMaster = hton16(0);
    }
#if !PLATFORM_OS_MSWINDOWS
    else {
      // Insurance against strangely intermixed stdout/stderr
      int rc;
      rc = fcntl(STDOUT_FILENO, F_GETFL, 0);
      if (rc >= 0) (void)fcntl(STDOUT_FILENO, F_SETFL, rc | O_APPEND);
      rc = fcntl(STDERR_FILENO, F_GETFL, 0);
      if (rc >= 0) (void)fcntl(STDERR_FILENO, F_SETFL, rc | O_APPEND);
    }
#endif 

    // main communication loop for master
    try {
      int numSlavesAttached = 0;

      fd_set sockset;
      fd_set* psockset = &sockset;
      int numset; // helpers for coord socket
      SOCKET *tempSockArr = (SOCKET*)AMUDP_malloc(sizeof(SOCKET)*AMUDP_SPMDNUMPROCS);
      while (1) {
       #ifdef FD_SETSIZE /* Should always be present, but just in case */
        if (allList.getMaxFd() >= FD_SETSIZE)
          AMUDP_FatalErr("Open sockets exceed FD_SETSIZE. Exiting...");
       #endif
        allList.makeFD_SET(psockset);

        if (select(allList.getMaxFd()+1, psockset, NULL, NULL, NULL) == -1) { // block for activity
          perror("select");
          exit(1);
        }
        //------------------------------------------------------------------------------------
        // stdin/stderr/stdout listeners - incoming connections
        // must continue to re-select after accepting a connection that might alias a closed listener socket id
        if (AMUDP_SPMDStdinListenSocket != INVALID_SOCKET &&
            FD_ISSET(AMUDP_SPMDStdinListenSocket, psockset))  
           { setupStdSocket(AMUDP_SPMDStdinListenSocket, stdinList, allList); continue; }
        if (AMUDP_SPMDStdoutListenSocket != INVALID_SOCKET &&
            FD_ISSET(AMUDP_SPMDStdoutListenSocket, psockset)) 
           { setupStdSocket(AMUDP_SPMDStdoutListenSocket, stdoutList, allList); continue; }
        if (AMUDP_SPMDStderrListenSocket != INVALID_SOCKET &&
            FD_ISSET(AMUDP_SPMDStderrListenSocket, psockset)) 
           { setupStdSocket(AMUDP_SPMDStderrListenSocket, stderrList, allList); continue; }
        //------------------------------------------------------------------------------------
        // stdout/err sockets - must come before possible exit to drain output
        handleStdOutput(stdout, psockset, stdoutList, allList, AMUDP_SPMDNUMPROCS);
        handleStdOutput(stderr, psockset, stderrList, allList, AMUDP_SPMDNUMPROCS);
        // stdin (illegal to receive anything here)
        if ((numset = stdinList.getIntersection(psockset, tempSockArr, AMUDP_SPMDNUMPROCS))) {
          for (int i=0; i < numset; i++) {
            SOCKET s = tempSockArr[i];
            AMUDP_assert(FD_ISSET(s, psockset));
            if (isClosed(s)) {
              DEBUG_MASTER("dropping a stdinList socket...");
              stdinList.remove(s);
              allList.remove(s);
            } else {
              AMUDP_Err("Master got illegal input on a stdin socket");
              stdinList.remove(s); // prevent subsequent warnings
              allList.remove(s);
            }
          }
        }
        //------------------------------------------------------------------------------------
        // coordination listener
        if (AMUDP_SPMDListenSocket != INVALID_SOCKET && 
            FD_ISSET(AMUDP_SPMDListenSocket, psockset)) { // incoming connection on coordination socket
          //DEBUG_MASTER("got some activity on AMUDP_SPMDListenSocket");
          if (numSlavesAttached < AMUDP_SPMDNUMPROCS) { // attach a slave
            SockAddr remoteAddr;
            SOCKET newcoord = accept_socket(AMUDP_SPMDListenSocket, remoteAddr);

            #if USE_COORD_KEEPALIVE
            { // make sure we get connection termination notification in a timely manner
              int val = 1;
              if (setsockopt(newcoord, SOL_SOCKET, SO_KEEPALIVE, (char *)&val, sizeof(int)) == SOCKET_ERROR)
                DEBUG_MASTER("failed to setsockopt(SO_KEEPALIVE) on coord socket");
            }
            #endif

            { // receive bootstrapping info
              static int32_t next_procid = 0;
              int32_t procid, procid_nb;
              en_t name;

              recvAll(newcoord, &procid_nb, sizeof(procid_nb));
              recvAll(newcoord, &name, sizeof(name));
              procid = ntoh32(procid_nb);
              if (procid == AMUDP_PROCID_ALLOC) {
                // This is a request (e.g. by a spawner) for a procid assignment
                procid = next_procid++;
                procid_nb = hton32(procid);
                sendAll(newcoord, &procid_nb, sizeof(procid_nb));
                shutdown(newcoord, SHUT_RDWR);
                close_socket(newcoord);
              } else {
                // This is a slave connecting
                if (procid == AMUDP_PROCID_NEXT) procid = next_procid++;
                AMUDP_SPMDSlaveSocket[procid] = newcoord;
                AMUDP_SPMDTranslation_name[procid] = name;
                coordList.insert(newcoord);
                allList.insert(newcoord);
                numSlavesAttached++;
              }
            }

            if (numSlavesAttached == AMUDP_SPMDNUMPROCS) { // all have now reported in, so we can begin computation
              // close listener
              close_socket(AMUDP_SPMDListenSocket);
              allList.remove(AMUDP_SPMDListenSocket);
              AMUDP_SPMDListenSocket = INVALID_SOCKET;

              int32_t bootstrapinfosz_nb = hton32(sizeof(bootstrapinfo));
              // transmit bootstrapping info
              for (int i=0; i < AMUDP_SPMDNUMPROCS; i++) {
                // fill out process-specific bootstrap info
                bootstrapinfo.procid = hton32(i);
                bootstrapinfo.tag = AMUDP_SPMDTranslation_tag[i];
                // send it
                sendAll(AMUDP_SPMDSlaveSocket[i], &bootstrapinfosz_nb, sizeof(int32_t));
                sendAll(AMUDP_SPMDSlaveSocket[i], &bootstrapinfo, sizeof(bootstrapinfo));
                sendAll(AMUDP_SPMDSlaveSocket[i], AMUDP_SPMDTranslation_name, AMUDP_SPMDNUMPROCS*sizeof(en_t));
                sendAll(AMUDP_SPMDSlaveSocket[i], AMUDP_SPMDTranslation_tag, AMUDP_SPMDNUMPROCS*sizeof(tag_t));
                sendAll(AMUDP_SPMDSlaveSocket[i], AMUDP_SPMDMasterEnvironment, ntoh32(bootstrapinfo.environtablesz));
              }
              if (!AMUDP_SilentMode) {
                printf("Endpoint table (nproc=%i):\n", AMUDP_SPMDNUMPROCS);
                for (int j=0; j < AMUDP_SPMDNUMPROCS; j++) {
                  char temp[80];
                  printf(" P#%i:\t%s", j, AMUDP_enStr(AMUDP_SPMDTranslation_name[j], temp));
                  printf("\ttag: %s\n", AMUDP_tagStr(ntoh64(AMUDP_SPMDTranslation_tag[j]), temp));
                }
                fflush(stdout);
              }
            }
          } else AMUDP_Err("master detected some unrecognized activity on AMUDP_SPMDListenSocket");
        }
        //------------------------------------------------------------------------------------
        // coord sockets
        if ((numset = coordList.getIntersection(psockset, tempSockArr, AMUDP_SPMDNUMPROCS))) { // we have some active coord sockets
          //DEBUG_MASTER("got some activity on coord sockets");
          for (int i=0; i < numset; i++) {
            SOCKET s = tempSockArr[i];
            AMUDP_assert(FD_ISSET(s, psockset));
            if (isClosed(s)) {
              DEBUG_MASTER("dropping a coordList socket...\n");
              coordList.remove(s);
              allList.remove(s);

              #if ABORT_JOB_ON_NODE_FAILURE
                int exitCode = -1;
                int32_t exitCode_nb = hton32(exitCode);
                for (int i=0; i < (int)coordList.getCount(); i++) {
                  sendAll(coordList[i], "E");
                  sendAll(coordList[i], &exitCode_nb, sizeof(int32_t));
                  close_socket(coordList[i]);
                }
                if (!socklibend()) AMUDP_Err("master failed to socklibend()");
                DEBUG_MASTER("Lost a worker process - job aborting...");
                exit(exitCode);
              #endif
              continue;
            }
            char command;
            recvAll(s, &command, 1);
            switch(command) {
              case 'B': { // enter barrier
                static int AMUDP_SPMDBarrierCount = 0; /* number of processors that have entered barrier */
                AMUDP_SPMDBarrierCount++;
                if (AMUDP_SPMDBarrierCount == AMUDP_SPMDNUMPROCS) { // barrier complete
                  DEBUG_MASTER("Completed barrier");
                  // broadcast completion message
                  for (int i=0; i < (int)coordList.getCount(); i++) {
                    sendAll(coordList[i], "B");
                  }
                  AMUDP_SPMDBarrierCount = 0;
                }
                break;
              }

              case 'G': { // enter gather
                static int AMUDP_SPMDGatherCount = 0; /* number of processors that have sent gather messages */
                static int AMUDP_SPMDGatherLen = 0;
                static char *AMUDP_SPMDGatherBuf = NULL;
                int32_t len=0;
                int32_t len_nb=0;
                int32_t id=0;
                int32_t id_nb=0;
                try {
                  recvAll(s, &id_nb, sizeof(int32_t));
                  recvAll(s, &len_nb, sizeof(int32_t));
                } catch (xSocket& exn) {
                  AMUDP_Err("got exn while reading gather len: %s", exn.why());
                }
                id = ntoh32(id_nb);
                len = ntoh32(len_nb);
                AMUDP_assert(id >= 0 && id < AMUDP_SPMDNUMPROCS && len > 0);
                if (AMUDP_SPMDGatherCount == 0) { // first slave to report
                  AMUDP_assert(AMUDP_SPMDGatherBuf == NULL && AMUDP_SPMDGatherLen == 0);
                  AMUDP_SPMDGatherLen = len;
                  AMUDP_SPMDGatherBuf = (char *)AMUDP_malloc(AMUDP_SPMDGatherLen*AMUDP_SPMDNUMPROCS);
                } else AMUDP_assert(len == AMUDP_SPMDGatherLen);
                try {
                  recvAll(s, &(AMUDP_SPMDGatherBuf[AMUDP_SPMDGatherLen*id]), AMUDP_SPMDGatherLen);
                } catch (xSocket& exn) {
                  AMUDP_Err("got exn while reading gather data: %s", exn.why());
                }
                AMUDP_SPMDGatherCount++;
                if (AMUDP_SPMDGatherCount == AMUDP_SPMDNUMPROCS) { // gather complete
                  DEBUG_MASTER("Completed gather");
                  hton32a(&len);
                  // broadcast completion data
                  for (int i=0; i < (int)coordList.getCount(); i++) {
                    sendAll(coordList[i], "G");
                    sendAll(coordList[i], &len_nb, sizeof(int32_t));
                    sendAll(coordList[i], AMUDP_SPMDGatherBuf, AMUDP_SPMDGatherLen*AMUDP_SPMDNUMPROCS);
                  }
                  AMUDP_free(AMUDP_SPMDGatherBuf);
                  AMUDP_SPMDGatherBuf = NULL;
                  AMUDP_SPMDGatherCount = 0;
                  AMUDP_SPMDGatherLen = 0;
                }
                break;
              }

            #ifdef UETH
              case 'F': { // NIC fail-over
                // get relevant en_t's
                en_t olden;
                en_t newen;
                int failedidx=-1;
                int32_t failedidx_nb=-1;
                try {
                  recvAll(s, &failedidx_nb, sizeof(int32_t));
                  recvAll(s, &olden, sizeof(en_t));
                  recvAll(s, &newen, sizeof(en_t));
                } catch (xSocket& exn) {
                  AMUDP_Err("got exn while reading fail-over addresses: %s", exn.why());
                }
                failedidx = ntoh32(failedidx_nb);
                if (failedidx < 0 || failedidx >= AMUDP_SPMDNUMPROCS)
                  AMUDP_Err("unrecognized endpoint received in fail-over message");
                if (!enEqual(AMUDP_SPMDTranslation_name[failedidx], olden)) 
                  AMUDP_Err("mismatched slaveid in fail-over message");
                // update our local table 
                AMUDP_SPMDTranslation_name[failedidx] = newen;
                // tell all slaves about the change
                for (int i=0; i < (int)coordList.getCount(); i++) {
                  sendAll(coordList[i], "F");
                  sendAll(coordList[i], failedidx_nb, sizeof(int32_t));
                  sendAll(coordList[i], &olden, sizeof(en_t));
                  sendAll(coordList[i], &newen, sizeof(en_t));
                }
                if (!AMUDP_SilentMode) {
                  char temp[80];
                  printf("master: processed NIC failover on slave %i: ", failedidx);
                  printf("%s ->", AMUDP_enStr(olden, temp));
                  printf(" %s\n", AMUDP_enStr(newen, temp));
                }
                break;
              }

              case 'A': { // NIC fail-over acknowledgement - bounce to slave
                // get relevant en_t's
                int failedidx=-1;
                int32_t failedidx_nb=-1;
                try {
                  recvAll(s, &failedidx_nb, sizeof(int32_t));
                  failedidx = ntoh32(failedidx_nb);

                  AMUDP_assert(failedidx > 0 && failedidx < AMUDP_SPMDNUMPROCS);

                  sendAll(coordList[failedidx], "A");
                  sendAll(coordList[failedidx], &failedidx_nb, sizeof(int32_t));
                } catch (xSocket& exn) {
                  AMUDP_Err("got exn while handling fail-over ack: %s", exn.why());
                }
                break;
              }
            #endif

              case 'E': { // exit code
                // get slave terminate code
                int32_t exitCode_nb = -1;
                int exitCode = -1;
                try {
                  recvAll(s, &exitCode_nb, sizeof(int32_t));
                } catch (xSocket& exn) {
                  AMUDP_Err("got exn while reading exit code: %s", exn.why());
                }
                exitCode = ntoh32(exitCode_nb);
                // tell all other slaves to terminate
                // TODO: perhaps use an active message for this? for now, just rely on coord socket dying
                exitCode_nb = hton32(0);
                for (int i=0; i < (int)coordList.getCount(); i++) {
                  sendAll(coordList[i], "E");
                  sendAll(coordList[i], &exitCode_nb, sizeof(int32_t));
                  close_socket(coordList[i]);
                }
                /* bug 2029 - wait for any final stdout/stderr to arrive before shutdown */
                uint64_t wait_iter = 0;
                while (stdoutList.getCount() || stderrList.getCount()) { // await final output
                  if (!AMUDP_SilentMode && (!wait_iter++)) {
                    printf("Awaiting final slave outputs...\n");
                    fflush(stdout);
                  }
                  if (stdoutList.getCount()) {
                    stdoutList.makeFD_SET(psockset);
                    handleStdOutput(stdout, psockset, stdoutList, allList, stdoutList.getCount());
                  }
                  if (stderrList.getCount()) {
                    stderrList.makeFD_SET(psockset);
                    handleStdOutput(stderr, psockset, stderrList, allList, stderrList.getCount());
                  }
                  sched_yield();
                }
                if (!socklibend()) AMUDP_Err("master failed to socklibend()");
                if (!AMUDP_SilentMode) {
                  printf("Exiting after AMUDP_SPMDExit(%i)...\n", exitCode);
                  fflush(stdout);
                }
                exit(exitCode);
                break;
              }

              default:
                AMUDP_Err("master got an unknown command on coord socket: %c", command);
            }
          }
          if (coordList.getCount() == 0) {
            DEBUG_MASTER("Exiting after losing all worker slave connections (noone called AMUDP_Exit())\n");
            exit(0); // program exit, noone called terminate
          }
        }
        //------------------------------------------------------------------------------------
      } // loop
    } catch (xSocket& exn) {
      AMUDP_FatalErr("Master got an xSocket: %s", exn.why());
    } catch (xBase& exn) {
      AMUDP_FatalErr("Master got an xBase: %s", exn.why());
    }
  }
  /* ------------------------------------------------------------------------------------ 
   *  I'm a worker slave 
   * ------------------------------------------------------------------------------------ */
  else {  
    #ifdef AMUDP_BLCR_ENABLED
      // Restart Step 1: Gets procid from master and restarts corresponding context file
      const int doRunRestart = (slave_flag < 0);
      // Not either of the restart cases:
      const int doFullBoostrap = !(doRunRestart || AMUDP_SPMDRestartActive);
    #else
      #define doRunRestart 0
      #define doFullBoostrap 1
    #endif

    int temp;

    /* propagate verbosity setting from master */
    AMUDP_SilentMode = (slave_flag < 2); // TODO: values >2 for more verbose

    if (doFullBoostrap) {
    #if FREEZE_SLAVE
      freezeForDebugger();
    #else
      /* do *not* use prefixed getenv here - want an independent freeze point */
      if (getenv("AMUDP_FREEZE")) freezeForDebugger();
    #endif
    }

    if (!eb || !ep) AMUDP_RETURN_ERR(BAD_ARG);
    if (doFullBoostrap && AM_Init() != AM_OK) {
      AMUDP_Err("Failed to AM_Init() in AMUDP_SPMDStartup");
      AMUDP_RETURN_ERRFR(RESOURCE, AMUDP_SPMDStartup, "AM_Init() failed");
    }

    // parse special env var with our arguments
    char * slave_args = strdup(env_var);
    SockAddr masterAddr;
    { // Strip required "flag," off beginning
      char *endptr;
      (void) strtol(slave_args, &endptr, 0);
      if (! endptr || (',' != endptr[0])) AMUDP_Err("Malformed arguments '%s' to slave process", env_var);
      slave_args = endptr + 1;
    }
  #if HAVE_GETIFADDRS
    // extract appended WORKERIP which it is needed before the master env is sent
    const char *network = "";
    { char *delimiter = strrchr(slave_args,',');
      if (delimiter != NULL) {
        network = delimiter+1;
        *delimiter = '\0';
      }
    }
  #endif
    { // extract master's address
      if (strchr(slave_args,',')) {
        masterAddr = SockAddr(slave_args);
      } else {
        char *IPStr = (char *)AMUDP_malloc(strlen(slave_args)+10);
        strcpy(IPStr, slave_args);
        char *portStr = strchr(IPStr, ':');
        if (!portStr) {
          AMUDP_Err("Malformed address argument passed to slave:'%s' (missing port)", slave_args);
          AMUDP_RETURN_ERR(BAD_ARG);
        }
        int masterPort = atoi(portStr+1);
        if (masterPort < 1 || masterPort > 65535) {
          AMUDP_Err("Malformed address argument passed to slave:'%s' (bad port=%i)", slave_args, masterPort);
          AMUDP_RETURN_ERR(BAD_ARG);
        }
        (*portStr) = '\0';
        try {
          masterAddr = SockAddr((uint32_t)DNSLookup(IPStr).IP(), (uint16_t)masterPort);
        } catch (xSocket &exn) {
          AMUDP_RETURN_ERRFR(RESOURCE, AMUDP_SPMDStartup, "slave failed DNSLookup on master host name");
        }
        AMUDP_free(IPStr);
      }
    }

    try {
      if (!AMUDP_SilentMode) {
        fprintf(stderr, "slave connecting to %s:%i\n", masterAddr.IPStr(), masterAddr.port());
        fflush(stderr);
      }

      AMUDP_SPMDControlSocket = connect_socket(masterAddr);

      #if USE_COORD_KEEPALIVE
      { // make sure we get connection termination notification in a timely manner
        int val = 1;
        if (setsockopt(AMUDP_SPMDControlSocket, SOL_SOCKET, SO_KEEPALIVE, (char *)&val, sizeof(int)) == SOCKET_ERROR)
          DEBUG_MASTER("failed to setsockopt(SO_KEEPALIVE) on coord socket");
      }
      #endif

      #ifdef AMUDP_BLCR_ENABLED
        if (doRunRestart) {
          // construct args for use by the caller
          static const char *new_argv[] =  { (*argv)[0], /* spawner */
                                             (*argv)[1], /* DIR     */
                                             env_var+1,  /* env_var w/ "-1" -> "1"  */
                                             NULL };
          *argc = 4;
          *argv = (char**)new_argv;

          // Get procid from master and return it to the caller
          int32_t procid_nb = hton32(AMUDP_PROCID_ALLOC);
          sendAll(AMUDP_SPMDControlSocket, &procid_nb, sizeof(procid_nb));
          sendAll(AMUDP_SPMDControlSocket, &AMUDP_SPMDName, sizeof(AMUDP_SPMDName));
          recvAll(AMUDP_SPMDControlSocket, &procid_nb, sizeof(procid_nb));
          shutdown(AMUDP_SPMDControlSocket, SHUT_RDWR);
          close_socket(AMUDP_SPMDControlSocket);

          return ntoh32(procid_nb);
        }
      #endif // AMUDP_BLCR_ENABLED

      #ifndef UETH
        /* here we assume the interface used to contact the master is the same 
           one to be used for UDP endpoints */
        SockAddr myinterface = getsockname(AMUDP_SPMDControlSocket);
        #if HAVE_GETIFADDRS // allow user to override our same-interface assumption
          network = AMUDP_getenv_prefixed_withdefault("WORKERIP",network);
          if (network && network[0]) {
            SockAddr networkaddr(network, 0);
            if (! getIfaceAddr(networkaddr, myinterface)) {
              AMUDP_Err("Failed to find interface on requested subnet %s", network);
              AMUDP_RETURN(AM_ERR_RESOURCE);
            }
          }
        #endif
        if (!AMUDP_SilentMode) {
          fprintf(stderr, "slave using IP %s\n", myinterface.IPStr());
          fflush(stderr);
        }
        AMUDP_SetUDPInterface(myinterface.IP());
      #endif
        
      /* create endpoint and get name */
      temp = AM_AllocateBundle(AM_SEQ, &AMUDP_SPMDBundle);
      if (temp != AM_OK) {
        AMUDP_Err("Failed to create bundle in AMUDP_SPMDStartup");
        AMUDP_RETURN(temp);
      }
      temp = AM_AllocateEndpoint(AMUDP_SPMDBundle, &AMUDP_SPMDEndpoint, &AMUDP_SPMDName);
      if (temp != AM_OK) {
        AMUDP_Err("Failed to create endpoint in AMUDP_SPMDStartup");
        AMUDP_RETURN(temp);
      }

      // send our procid and endpoint name to the master
      int32_t procid_nb = hton32(AMUDP_SPMDMYPROC);
      sendAll(AMUDP_SPMDControlSocket, &procid_nb, sizeof(procid_nb));
      sendAll(AMUDP_SPMDControlSocket, &AMUDP_SPMDName, sizeof(AMUDP_SPMDName));

      // get information from master 
      // get the bootstrap info and translation table
      AMUDP_SPMDBootstrapInfo_t bootstrapinfo;
      int32_t bootstrapinfosz_nb;
      recvAll(AMUDP_SPMDControlSocket, &bootstrapinfosz_nb, sizeof(int32_t));
      int32_t bootstrapinfosz = ntoh32(bootstrapinfosz_nb);
      AMUDP_assert(bootstrapinfosz == sizeof(AMUDP_SPMDBootstrapInfo_t));
      recvAll(AMUDP_SPMDControlSocket, &bootstrapinfo, sizeof(AMUDP_SPMDBootstrapInfo_t));
      
      // unpack the bootstrapping info
      if (doFullBoostrap) {
        AMUDP_SPMDNUMPROCS = ntoh32(bootstrapinfo.numprocs);
        AMUDP_SPMDMYPROC = ntoh32(bootstrapinfo.procid);
      } else {
        if (AMUDP_SPMDNUMPROCS != (int32_t)ntoh32(bootstrapinfo.numprocs)) {
          AMUDP_Err("Restarting with wrong numprocs in AMUDP_SPMDStartup");
          AMUDP_RETURN_ERR(BAD_ARG);
        }
        if (AMUDP_SPMDMYPROC != (int32_t)ntoh32(bootstrapinfo.procid)) {
          AMUDP_Err("Restarting with wrong procid in AMUDP_SPMDStartup");
          AMUDP_RETURN_ERR(BAD_ARG);
        }
      }
      if (networkpid) *networkpid = ntoh64(bootstrapinfo.networkpid);

      // sanity checking on bootstrap info
      AMUDP_assert(AMUDP_SPMDNUMPROCS > 0 && AMUDP_SPMDNUMPROCS < AMUDP_MAX_SPMDPROCS);
      AMUDP_assert(AMUDP_SPMDMYPROC >= 0 && AMUDP_SPMDMYPROC < AMUDP_SPMDNUMPROCS);

      en_t *tempTranslation_name = (en_t *)AMUDP_malloc(AMUDP_SPMDNUMPROCS*sizeof(en_t));
      tag_t *tempTranslation_tag = (tag_t *)AMUDP_malloc(AMUDP_SPMDNUMPROCS*sizeof(tag_t));
      AMUDP_assert(tempTranslation_name && tempTranslation_tag);
      recvAll(AMUDP_SPMDControlSocket, tempTranslation_name, AMUDP_SPMDNUMPROCS*sizeof(en_t));
      recvAll(AMUDP_SPMDControlSocket, tempTranslation_tag, AMUDP_SPMDNUMPROCS*sizeof(tag_t));

      AMUDP_assert(ntoh64(tempTranslation_tag[AMUDP_SPMDMYPROC]) == ntoh64(bootstrapinfo.tag));
      AMUDP_assert(enEqual(tempTranslation_name[AMUDP_SPMDMYPROC], AMUDP_SPMDName));

      // setup translation table
      for (int i = 0; i < AMUDP_SPMDNUMPROCS; i++) {
        temp = AM_Map(AMUDP_SPMDEndpoint, i, tempTranslation_name[i], ntoh64(tempTranslation_tag[i]));
        if (temp != AM_OK) {
          AMUDP_Err("Failed to AM_Map() in AMUDP_SPMDStartup");
          AMUDP_RETURN(temp);
        }
      }

      AMUDP_free(tempTranslation_name);
      tempTranslation_name = NULL;
      AMUDP_free(tempTranslation_tag);
      tempTranslation_tag = NULL;

      // receive snapshot of master environment
      int environtablesz = ntoh32(bootstrapinfo.environtablesz);
      char *tempEnvironment = (char *)AMUDP_malloc(environtablesz);
      AMUDP_assert(tempEnvironment != NULL);
      recvAll(AMUDP_SPMDControlSocket, tempEnvironment, environtablesz);
      if (doFullBoostrap) {
        AMUDP_SPMDMasterEnvironment = tempEnvironment;
      } else  {
        // On restart we keep the environment from the initial run
        AMUDP_assert(AMUDP_SPMDMasterEnvironment != NULL);
        AMUDP_free(tempEnvironment);
      }
      
      /* allocate network buffers */
      if (doFullBoostrap) networkdepth = ntoh32(bootstrapinfo.depth);
      temp = AM_SetExpectedResources(AMUDP_SPMDEndpoint, AMUDP_SPMDNUMPROCS, networkdepth);
      if (temp != AM_OK) {
        AMUDP_Err("Failed to AM_SetExpectedResources() in AMUDP_SPMDStartup");
        AMUDP_RETURN(temp);
      }
      #ifdef AMUDP_BLCR_ENABLED
        AMUDP_SPMDNetworkDepth = networkdepth;
      #endif
      
      // set tag
      temp = AM_SetTag(AMUDP_SPMDEndpoint, ntoh64(bootstrapinfo.tag));
      if (temp != AM_OK) {
        AMUDP_Err("Failed to AM_SetTag() in AMUDP_SPMDStartup");
        AMUDP_RETURN(temp);
      }

      // BLCR-TODO: problems if we change spawner?
      #if !DISABLE_STDSOCKET_REDIRECT
        if (bootstrapinfo.stdinMaster) {
            // perform stdin/out/err redirection
            newstdin  = connect_socket(SockAddr(masterAddr.IP(),ntoh16(bootstrapinfo.stdinMaster)));
            newstdout = connect_socket(SockAddr(masterAddr.IP(),ntoh16(bootstrapinfo.stdoutMaster)));
            newstderr = connect_socket(SockAddr(masterAddr.IP(),ntoh16(bootstrapinfo.stderrMaster)));
            #if 0
              // disable buffering
              setvbuf(stdin, NULL, _IONBF, 0);
              setvbuf(stdout, NULL, _IONBF, 0);
              setvbuf(stderr, NULL, _IONBF, 0);
            #endif
            #if PLATFORM_OS_MSWINDOWS
              #if 0
              // not sure how to do this on Win32 yet - maybe use _fdopen() and/or _fileno()
              { FILE* newf;
                if( ( newf = fopen( "c:\\data", "w" ) ) == NULL ) {
                  puts( "Can't open file 'data'\n" );
                  exit( 1 );
                }
                if( ( newf = _fdopen( newstdout, "w" ) ) == NULL ) {
                  puts( "fdopen failed\n" );
                  exit( 1 );
                }
                if (dup2(_fileno(newf), FD_STDOUT) < 0) { // redirect stdout to socket
                  perror("dup2(stdout)");
                  _exit(1); 
                }
                printf("yomama\n");
                fflush(stdout);
                fclose(newf);
                exit(0);
              }
              #endif
            #else
              /* UNIX */
              if (dup2(newstdin, FD_STDIN) < 0) { // redirect stdout to socket
                perror("dup2(stdin)");
                _exit(1); 
              }
              if (dup2(newstdout, FD_STDOUT) < 0) { // redirect stdout to socket
                perror("dup2(stdout)");
                _exit(1); 
              }
              if (dup2(newstderr, FD_STDERR) < 0) { // redirect stdout to socket
                perror("dup2(stderr)");
                _exit(1); 
              }
            #endif
        }
     #endif

      AMUDP_FaultInjectionRate = bootstrapinfo.faultInjectionRate;
      ntoh64a(&AMUDP_FaultInjectionRate);
      if (AMUDP_FaultInjectionRate != 0.0) {
        AMUDP_FaultInjectionEnabled = 1;
        fprintf(stderr, "*** Warning: AMUDP running with fault injection enabled. Rate = %6.2f %%\n",
          100.0 * AMUDP_FaultInjectionRate);
        fflush(stderr);
      }
    } catch (xSocket& exn) {
      AMUDP_FatalErr("Got an xSocket while spawning slave process: %s", exn.why());
    }

    *eb = AMUDP_SPMDBundle;
    *ep = AMUDP_SPMDEndpoint;
    AMUDP_SPMDStartupCalled = 1;

    /* Ensure that any children we fork() won't appear to be slaves */
    #if 1
      unsetenv(AMUDP_SPMDSLAVE_ARGS);
    #else
      putenv((char*)AMUDP_SPMDSLAVE_ARGS "=0");
    #endif

    #if USE_ASYNC_TCP_CONTROL
      // enable async notification
      reghandler(AMUDP_SIGIO, AMUDP_SPMDControlSocketCallback);
      if (fcntl(AMUDP_SPMDControlSocket, F_SETOWN, getpid())) {
        perror("fcntl(F_SETOWN, getpid())");
        AMUDP_FatalErr("Failed to fcntl(F_SETOWN, getpid()) on TCP control socket - try disabling USE_ASYNC_TCP_CONTROL");
      }
      if (fcntl(AMUDP_SPMDControlSocket, F_SETSIG, AMUDP_SIGIO)) {
        perror("fcntl(F_SETSIG)");
        AMUDP_FatalErr("Failed to fcntl(F_SETSIG, AMUDP_SIGIO) on TCP control socket - try disabling USE_ASYNC_TCP_CONTROL");
      }
      if (fcntl(AMUDP_SPMDControlSocket, F_SETFL, O_ASYNC|O_NONBLOCK)) { 
        perror("fcntl(F_SETFL, O_ASYNC|O_NONBLOCK)");
        AMUDP_FatalErr("Failed to fcntl(F_SETFL, O_ASYNC|O_NONBLOCK) on TCP control socket - try disabling USE_ASYNC_TCP_CONTROL");
      }
    #endif

    if (!AMUDP_SilentMode) {
      char temp[80];
      tag_t tag;
      AM_GetTag(AMUDP_SPMDEndpoint, &tag);
      fprintf(stderr, "Slave %i/%i starting (tag=%s)...\n", 
        AMUDP_SPMDMyProc(), AMUDP_SPMDNumProcs(), AMUDP_tagStr(tag, temp));
      fflush(stderr);
    }

    return AM_OK;
  }
  /* ------------------------------------------------------------------------------------ */
  AMUDP_FatalErr("never reach here");
  return AM_OK;
}

/* ------------------------------------------------------------------------------------ 
 *  worker control handler
 * ------------------------------------------------------------------------------------ */
// called by slave to handle traffic on control socket
// sets controlMessagesServiced to indicate how many message serviced
extern int AMUDP_SPMDHandleControlTraffic(int *controlMessagesServiced) {
  if (AMUDP_SPMDControlSocket == INVALID_SOCKET) return AM_OK; // not running in SPMD mode
  #if USE_ASYNC_TCP_CONTROL
    if (!AMUDP_SPMDIsActiveControlSocket) return AM_OK; // nothing to do
    ASYNC_TCP_DISABLE();
    AMUDP_SPMDIsActiveControlSocket = FALSE; 
  #endif 
  if (controlMessagesServiced) *controlMessagesServiced = 0;
  
  while (1) { // service everything waiting
    try {
      if (!inputWaiting(AMUDP_SPMDControlSocket)) {
        ASYNC_TCP_ENABLE();
        return AM_OK; // nothing more to do
      }
    } catch (xBase &exn) {
      AMUDP_Err("Error checking AMUDP_SPMDControlSocket: %s", exn.why()); // probably conn reset
    }

    try {
      SOCKET s = AMUDP_SPMDControlSocket;

      if (isClosed(s)) {
        DEBUG_SLAVE("master control socket slammed shut. Exiting...\n");
        AMUDP_SPMDShutdown(1);
      }

      // there's something waiting on the control socket for us - grab it
      char command;
      recvAll(s, &command, 1);
      switch(command) {
        case 'B': { // barrier complete
          AMUDP_assert(!AMUDP_SPMDBarrierDone);
          AMUDP_SPMDBarrierDone = 1; // flag completion
          break;
        }

        case 'G': { // gather complete
          AMUDP_assert(!AMUDP_SPMDGatherDone && AMUDP_SPMDGatherLen > 0 && AMUDP_SPMDGatherData != NULL);
          try {
            int32_t len_nb = -1;
            recvAll(s, &len_nb, sizeof(int32_t));
            int32_t len = ntoh32(len_nb);
            AMUDP_assert(len == AMUDP_SPMDGatherLen);
            recvAll(s, AMUDP_SPMDGatherData, AMUDP_SPMDGatherLen*AMUDP_SPMDNUMPROCS);
          } catch (xSocket& exn) {
            AMUDP_FatalErr("got exn while reading gather data: %s", exn.why());
          }
          AMUDP_SPMDGatherDone = 1; // flag completion
          break;
        }

      #ifdef UETH
        case 'F': { // NIC fail-over
          // get relevant en_t's
          en_t olden;
          en_t newen;
          int32_t failidx_nb = -1;
          int failidx = -1;
          try {
            recvAll(s, &failidx_nb, sizeof(int32_t));
            recvAll(s, &olden, sizeof(en_t));
            recvAll(s, &newen, sizeof(en_t));
          } catch (xSocket& exn) {
            AMUDP_FatalErr("got exn while reading fail-over addresses: %s", exn.why());
          }
          failidx = ntoh32(failidx_nb);

          // this update could be rather slow, but we expect it to run extremely infrequently
          DEBUG_SLAVE("Received a NIC fail-over notification. Updating tables...");

          // update all translation tables
          for (int i = 0; i < AMUDP_SPMDBundle->n_endpoints; i++) {
            ep_t ep = AMUDP_SPMDBundle->endpoints[i];
            AMUDP_assert(ep);

            for (int j = 0; j < AMUDP_MAX_NUMTRANSLATIONS; j++) {
              if (ep->translation[j].inuse) {
                if (enEqual(ep->translation[j].name, olden)) { // need to re-map
                  ep->translation[j].name = newen;
                }
              }
            }
            for (int procid = 0; procid < ep->P; procid++) {
              if (enEqual(ep->perProcInfo[procid].remoteName, olden)) { // need to re-map
                AMUDP_assert(procid == failidx);
                ep->perProcInfo[procid].remoteName = newen;
                /* need to remap the preset request/reply destinations */
                for (int inst = 0; inst < ep->depth; inst++) {
                  amudp_bufdesc_t *reqdesc = GET_REQ_DESC(ep, procid, inst);
                  amudp_bufdesc_t *repdesc = GET_REP_DESC(ep, procid, inst);
                  amudp_buf_t *reqbuf = GET_REQ_BUF(ep, procid, inst);
                  amudp_buf_t *repbuf = GET_REP_BUF(ep, procid, inst);

                  if (reqdesc->transmitCount > 0)
                    ueth_cancel_send(reqbuf, reqbuf->bufhandle);
                  if (ueth_set_packet_destination(reqbuf, &newen) != UETH_OK)
                    AMUDP_Err("ueth_set_packet_destination failed on NIC fail-over");

                  if (repdesc->transmitCount > 0)
                    ueth_cancel_send(repbuf, repbuf->bufhandle);
                  if (ueth_set_packet_destination(repbuf, &newen) != UETH_OK)
                    AMUDP_Err("ueth_set_packet_destination failed on NIC fail-over");
                }
              }
            }

            #ifndef UETH
              // update any messages already accepted into the rx buffers from this guy
              // this currently never runs because UETH has no explicit receive buffers we can see
              for (int i = 0; i < AMUDP_SPMDBundle->n_endpoints; i++) {
                ep_t ep = AMUDP_SPMDBundle->endpoints[i];
                AMUDP_assert(ep);

                for (int j = ep->rxReadyIdx; j != ep->rxFreeIdx; j = (j+1)%ep->rxNumBufs) {
                  if (enEqual(ep->rxBuf[j].source, olden) {
                    ep->rxBuf[j].source = newen;
                  }
                }
              }
            #endif
          }
          DEBUG_SLAVE("Update complete.");
          if (!AMUDP_SilentMode) {
            char temp[80];
            printf("slave: handled NIC failover: ");
            printf("%s ->", AMUDP_enStr(olden, temp));
            printf(" %s\n", AMUDP_enStr(newen, temp));
          }

          try { // send acknowledgement to master
            sendAll(s, "A");
            sendAll(s, &failidx_nb, sizeof(int32_t));
          } catch (xSocket& exn) {
            AMUDP_Err("Slave got an xSocket sending failure ACK: %s. Exiting...", exn.why());
            AMUDP_SPMDShutdown(1);
          }
          break;
        }
        case 'A': { // NIC fail-over acknowledgement - record an ACK
          int32_t failedidx_nb = -1;
          int failedidx = -1;
          try {
            recvAll(s, &failedidx_nb, sizeof(int32_t));
            failedidx = ntoh32(failedidx_nb);
            AMUDP_assert(failedidx == AMUDP_SPMDMYPROC);
            AMUDP_assert(AMUDP_FailoverAcksOutstanding > 0);

            AMUDP_FailoverAcksOutstanding--;
          } catch (xSocket& exn) {
            AMUDP_Err("got exn while handling fail-over ack: %s", exn.why());
          }
          break;
        }
      #endif

        case 'E': { // exit code
          // get slave terminate code
          int32_t exitCode_nb = -1;
          int exitCode = -1;
          try {
            recvAll(s, &exitCode_nb, sizeof(int32_t));
            exitCode = ntoh32(exitCode_nb);
          } catch (xSocket& exn) {
            AMUDP_Err("got exn while reading exit code: %s", exn.why());
          }
          if (!AMUDP_SilentMode) {
            printf("Exiting after exit signal from master (%i)...\n", exitCode);
            fflush(stdout);
          }
          AMUDP_SPMDShutdown(exitCode);
          break;
        }

        default:
          AMUDP_FatalErr("slave got an unknown command on coord socket: %c", command);
        }
    } catch (xSocket& exn) {
      AMUDP_Err("Slave got an xSocket: %s. Exiting...", exn.why());
      AMUDP_SPMDShutdown(1);
    } catch (xBase& exn) {
      AMUDP_Err("Slave got an xBase: %s. Exiting...", exn.why());
      AMUDP_SPMDShutdown(1);
    }
    if (controlMessagesServiced) (*controlMessagesServiced)++;
  }
}
/* ------------------------------------------------------------------------------------ 
 *  handler for NIC fail-over
 * ------------------------------------------------------------------------------------ */
#ifdef UETH
extern void AMUDP_SPMDAddressChangeCallback(ueth_addr_t *address) {
  DEBUG_SLAVE("AMUDP_SPMDAddressChangeCallback() called.. Fail-over starting...");
  AMUDP_assert(AMUDP_UETH_endpoint);
  AMUDP_assert(address);
  int32_t failid_nb = hton32(AMUDP_SPMDMYPROC);
  en_t olden = AMUDP_UETH_endpoint->name;
  en_t newen = *address;

  AMUDP_UETH_endpoint->name = newen;
  AMUDP_SPMDName = newen;

  // send change to master, who will propagate new address info to peers and back to us
  // we update our translation table on recieving the reflection from the master
  AMUDP_FailoverAcksOutstanding = AMUDP_SPMDNUMPROCS;
  try {
    ASYNC_TCP_DISABLE();
    sendAll(AMUDP_SPMDControlSocket, "F");
    sendAll(AMUDP_SPMDControlSocket, &failid_nb, sizeof(int));
    sendAll(AMUDP_SPMDControlSocket, &olden, sizeof(en_t));
    sendAll(AMUDP_SPMDControlSocket, &newen, sizeof(en_t));
    ASYNC_TCP_ENABLE();
  } catch (xSocket& exn) {
    AMUDP_Err("Slave got an xSocket: %s. Exiting...", exn.why());
    AMUDP_SPMDShutdown(1);
  } catch (xBase& exn) {
    AMUDP_Err("Slave got an xBase: %s. Exiting...", exn.why());
    AMUDP_SPMDShutdown(1);
  }

  ep_t ep = AMUDP_UETH_endpoint;
  /* need to remap all preset request/reply destinations for failed node */
  for (int inst = 0; inst < ep->depth; inst++) {
    for (int procid = 0; procid < ep->P; procid++) {
      amudp_bufdesc_t *reqdesc = GET_REQ_DESC(ep, procid, inst);
      amudp_bufdesc_t *repdesc = GET_REP_DESC(ep, procid, inst);
      amudp_buf_t *reqbuf = GET_REQ_BUF(ep, procid, inst);
      amudp_buf_t *repbuf = GET_REP_BUF(ep, procid, inst);

      if (reqdesc->transmitCount > 0)
        ueth_cancel_send(reqbuf, reqbuf->bufhandle);
      if (ueth_set_packet_destination(reqbuf, &ep->perProcInfo[procid].remoteName) != UETH_OK)
        AMUDP_Err("ueth_set_packet_destination failed on NIC fail-over");

      if (repdesc->transmitCount > 0)
        ueth_cancel_send(repbuf, repbuf->bufhandle);
      if (ueth_set_packet_destination(repbuf, &ep->perProcInfo[procid].remoteName) != UETH_OK)
        AMUDP_Err("ueth_set_packet_destination failed on NIC fail-over");
    }
  }

  // update any messages already accepted into the rx buffers
  #ifdef UETH
    int packetschanged = ueth_fixup_recv(&olden, &newen);
    if (packetschanged < 0)
      AMUDP_Err("ueth_fixup_recv failed on NIC fail-over");
  #else
    // this currently never runs because UETH has no explicit receive buffers we can see
    for (int i = 0; i < AMUDP_SPMDBundle->n_endpoints; i++) {
      ep_t ep = AMUDP_SPMDBundle->endpoints[i];
      AMUDP_assert(ep);

      for (int j = ep->rxReadyIdx; j != ep->rxFreeIdx; j = (j+1)%ep->rxNumBufs) {
        if (enEqual(ep->rxBuf[j].dest, olden) {
          ep->rxBuf[j].dest = newen;
        }
      }
    }
  #endif

  // wait until all the slaves recieve the failover notification and acknowledge
  // before we allow this node to continue transmitting on the new NIC
  // can't use a regular barrier here because it may cause us to poll
  while (AMUDP_FailoverAcksOutstanding > 0) {
    int junk=0;
    AMUDP_SPMDHandleControlTraffic(&junk);
    sched_yield();
  }

  DEBUG_SLAVE("Fail-over complete.");
}
#endif
/* ------------------------------------------------------------------------------------ 
 *  process termination
 * ------------------------------------------------------------------------------------ */
static amudp_exitcallback_t AMUDP_SPMDExitCallback = NULL;
extern int AMUDP_SPMDSetExitCallback(amudp_exitcallback_t fp) {
  AMUDP_SPMDExitCallback = fp;
  return AM_OK;
}
extern "C" {
  void (*AMUDP_SPMDkillmyprocess)(int) = &_exit;
}

/* shutdown this process */
static int AMUDP_SPMDShutdown(int exitcode) {
  ASYNC_TCP_DISABLE_IGNOREERR(); /* (bug 765) prevent race where master has already reset async control socket */
  /* this function is not re-entrant - if someone tries, something is seriously wrong */
  { static int shutdownInProgress = FALSE;
    if (shutdownInProgress) AMUDP_FatalErr("recursive failure in AMUDP_SPMDShutdown"); 
    shutdownInProgress = TRUE;
  }

  flushStreams("AMUDP_SPMDShutdown");

  if (AMUDP_SPMDExitCallback) (*AMUDP_SPMDExitCallback)(exitcode);

  /* important to make this call to release resources */
  if (AM_Terminate() != AM_OK) 
    AMUDP_Err("failed to AM_Terminate() in AMUDP_SPMDExit()");

  flushStreams("AMUDP_SPMDShutdown");

  if (fclose(stdin)) {
    #if AMUDP_DEBUG_VERBOSE
      AMUDP_Warn("failed to fclose stdin in AMUDP_SPMDExit()"); 
      perror("fclose");
    #endif
  }
  if (fclose(stdout)) {
    #if AMUDP_DEBUG_VERBOSE
      AMUDP_Warn("failed to fclose stdout in AMUDP_SPMDExit()"); 
      perror("fclose");
    #endif
  }
  if (fclose(stderr)) {
    #if AMUDP_DEBUG_VERBOSE
      AMUDP_Warn("failed to fclose stderr in AMUDP_SPMDExit()"); 
      perror("fclose");
    #endif
  }

  /* use normal shutdown and closesocket to ignore errors */
  if (newstdin != INVALID_SOCKET) {
    shutdown(newstdin, SHUT_RDWR);
    closesocket(newstdin); 
  }
  if (newstdout != INVALID_SOCKET) {
    shutdown(newstdout, SHUT_RDWR);
    closesocket(newstdout); 
  }
  if (newstderr != INVALID_SOCKET) {
    shutdown(newstderr, SHUT_RDWR);
    closesocket(newstderr);
  }

  sched_yield();

  if (AMUDP_SPMDControlSocket != INVALID_SOCKET) {
    closesocket(AMUDP_SPMDControlSocket);
  }

  if (!socklibend()) AMUDP_Err("slave failed to socklibend()");

  AMUDP_SPMDStartupCalled = 0;
  DEBUG_SLAVE("exiting..");
  AMUDP_SPMDkillmyprocess(exitcode);
  AMUDP_FatalErr("AMUDP_SPMDkillmyprocess failed");
  return AM_OK;
}

extern int AMUDP_SPMDExit(int exitcode) {
  DEBUG_SLAVE("AMUDP_SPMDExit");
  if (!AMUDP_SPMDStartupCalled) AMUDP_RETURN_ERR(NOT_INIT);

  ASYNC_TCP_DISABLE_IGNOREERR(); /* (bug 765) prevent race where master has already reset async control socket */
  /* this function is not re-entrant - if someone tries, something is seriously wrong */
  { static int exitInProgress = FALSE;
    if (exitInProgress) AMUDP_FatalErr("recursive failure in AMUDP_SPMDExit"); 
    exitInProgress = TRUE;
  }

  flushStreams("AMUDP_SPMDExit");

  sched_yield();

  /* notify master we're exiting */

  // We disable exceptions on the following sendALL calls because the C++
  // spec warns that exceptions may not be usable in signal handlers, and
  // GASNet calls here when handling a fatal or termination signal.
  /* try */ {
    int exitcode_nb = hton32(exitcode);
    sendAll(AMUDP_SPMDControlSocket, "E", -1, 0);
    sendAll(AMUDP_SPMDControlSocket, &exitcode_nb, sizeof(int32_t), 0);
    while (1) { // swallow everything and wait for master to close
      char temp;
      int retval = recv(AMUDP_SPMDControlSocket, &temp, 1, 0); 
      if (retval == 0 || retval == SOCKET_ERROR) break;
    }
  } /* catch (xBase& ) { } */ // ignore errors that may happen on conn reset

  AMUDP_SPMDStartupCalled = 0;
  DEBUG_SLAVE("AMUDP_SPMDShutdown..");
  /* exit this proc gracefully */
  AMUDP_SPMDShutdown(0);
  AMUDP_FatalErr("AMUDP_SPMDShutdown failed");
  return AM_OK;
}
/* ------------------------------------------------------------------------------------ 
 *  poll-wait for a flag to become non-zero as a result of a control message
 * ------------------------------------------------------------------------------------ */
static void AMUDP_SPMDWaitForControl(volatile int *done) {
  #if USE_BLOCKING_SPMD_BARRIER
    { int oldmask;
      AM_GetEventMask(AMUDP_SPMDBundle, &oldmask);
      // wait for completion
      AM_Poll(AMUDP_SPMDBundle);
      while (!*done) {
        AM_SetEventMask(AMUDP_SPMDBundle, AM_NOTEMPTY);
        AMUDP_SPMDwakeupOnControlActivity = 1;
        AM_WaitSema(AMUDP_SPMDBundle);
        AMUDP_SPMDwakeupOnControlActivity = 0;
        AM_Poll(AMUDP_SPMDBundle);
      }
      AM_SetEventMask(AMUDP_SPMDBundle, oldmask);
    }
  #else
    { uint32_t timeoutusec = 100;
      AM_Poll(AMUDP_SPMDBundle);
      while (!*done) {

        struct timeval tv;
        tv.tv_sec  = timeoutusec / 1000000;
        tv.tv_usec = timeoutusec % 1000000;
        select(1, NULL, NULL, NULL, &tv); /* sleep a little while */

        AM_Poll(AMUDP_SPMDBundle);
        if (timeoutusec < 10000) timeoutusec *= 2;
      }
    }
  #endif
}
/* ------------------------------------------------------------------------------------ 
 *  barrier
 * ------------------------------------------------------------------------------------ */
extern int AMUDP_SPMDBarrier() {
  if (!AMUDP_SPMDStartupCalled) {
    AMUDP_Err("called AMUDP_SPMDBarrier before AMUDP_SPMDStartup()");
    AMUDP_RETURN_ERR(NOT_INIT);
  }

  flushStreams("AMUDP_SPMDBarrier");
  AMUDP_assert(AMUDP_SPMDBarrierDone == 0);
  ASYNC_TCP_DISABLE();
  sendAll(AMUDP_SPMDControlSocket, "B");
  ASYNC_TCP_ENABLE();

  AMUDP_SPMDWaitForControl(&AMUDP_SPMDBarrierDone);

  AMUDP_SPMDBarrierDone = 0;
  DEBUG_SLAVE("Leaving barrier");
  return AM_OK;
}
/* ------------------------------------------------------------------------------------ 
 *  AMUDP_SPMDAllGather: gather len bytes from source buf on each node, concatenate them and write 
 *  them into the dest buffer (which must have length len*numnodes) in rank order
 * ------------------------------------------------------------------------------------ */
extern int AMUDP_SPMDAllGather(void *source, void *dest, size_t len) {
  if (!AMUDP_SPMDStartupCalled) {
    AMUDP_Err("called AMUDP_SPMDAllGather before AMUDP_SPMDStartup()");
    AMUDP_RETURN_ERR(NOT_INIT);
  }
  if (source == NULL) AMUDP_RETURN_ERR(BAD_ARG);
  if (dest == NULL) AMUDP_RETURN_ERR(BAD_ARG);
  if (len <= 0) AMUDP_RETURN_ERR(BAD_ARG);

  AMUDP_assert(AMUDP_SPMDGatherDone == 0);
  AMUDP_SPMDGatherData = dest;
  AMUDP_SPMDGatherLen = len;
  int32_t myid_nb = hton32(AMUDP_SPMDMYPROC);
  int32_t mylen_nb = hton32(len);

  ASYNC_TCP_DISABLE();
  sendAll(AMUDP_SPMDControlSocket, "G");
  sendAll(AMUDP_SPMDControlSocket, &myid_nb, sizeof(int));
  sendAll(AMUDP_SPMDControlSocket, &mylen_nb, sizeof(int));
  sendAll(AMUDP_SPMDControlSocket, source, len);
  ASYNC_TCP_ENABLE();
  
  AMUDP_SPMDWaitForControl(&AMUDP_SPMDGatherDone);

  AMUDP_SPMDGatherDone = 0;
  DEBUG_SLAVE("Leaving gather");
  return AM_OK;
}

/* ------------------------------------------------------------------------------------ 
 *  global getenv()
 * ------------------------------------------------------------------------------------ */
extern const char* AMUDP_SPMDgetenvMaster(const char *keyname) {
  if (!AMUDP_SPMDStartupCalled) {
    AMUDP_Err("called AMUDP_SPMDgetenvMaster before AMUDP_SPMDStartup()");
    return NULL;
  }

  AMUDP_assert(AMUDP_SPMDMasterEnvironment != NULL);
  char *p = AMUDP_SPMDMasterEnvironment;
  if (!keyname) return NULL;
  int keylen = strlen(keyname);
  while (*p) {
    if (!strncmp(keyname, p, keylen) && p[keylen] == '=') {
      return p + keylen + 1;
    }
    p += strlen(p) + 1;
  }
  return NULL; // not found
}

extern char *AMUDP_getenv_prefixed(const char *basekey) {
  char key[3][255];
  const char *val[3];
  int winner = -1;
  char *(*getfn)(const char *) = NULL;
  if (AMUDP_SPMDStartupCalled && AMUDP_SPMDMasterEnvironment != NULL) getfn = (char *(*)(const char *))AMUDP_SPMDgetenvMaster;
  else getfn = (char *(*)(const char *))getenv;

  if (basekey == NULL || !*basekey) return NULL;
  sprintf(key[0], "%s_%s", AMUDP_ENV_PREFIX_STR, basekey);
  val[0] = getfn(key[0]);
  sprintf(key[1], "%s_%s", "AMUDP", basekey);
  val[1] = getfn(key[1]);
  strcpy(key[2], basekey);
  val[2] = getfn(key[2]);
  for (int i=0; i < 3; i++) {
    if (val[i] != NULL) {
      if (winner == -1) winner = i;
      else if (strcmp(val[winner], val[i])) {
        fprintf(stderr,"AMUDP: Warning: both $%s and $%s are set, to different values. Using the former.\n",
          key[winner], key[i]);
      }
    }
  }
  if (winner == -1) return NULL;
  else return (char *)val[winner];
}

extern char *AMUDP_getenv_prefixed_withdefault(const char *basekey, const char *defaultval) {
  static int firsttime = 1;
  static int verboseenv = 0;
  char * retval = NULL;
  int usingdefault = 0;
  const char *dflt = "";
  if (firsttime) {
    #if AMUDP_DEBUG_VERBOSE
      verboseenv = 1;
    #else
      verboseenv = !!AMUDP_getenv_prefixed("VERBOSEENV");
    #endif
    firsttime = 0;
  }
  AMUDP_assert(defaultval != NULL);
  retval = AMUDP_getenv_prefixed(basekey);
  if (retval == NULL) {
    retval = (char *)defaultval;
    dflt = "   (default)";
  }
#ifdef gasnett_envstr_display
  { char displaykey[255];
    sprintf(displaykey,"%s_%s",AMUDP_ENV_PREFIX_STR,basekey);
    gasnett_envstr_display(displaykey, retval, usingdefault);
  }
#else
  if (verboseenv && (AMUDP_SPMDMYPROC == -1 || AMUDP_SPMDMYPROC == 0)) {
    const char *displayval = retval;
    char displaykey[255];
    int width;
    if (strlen(retval) == 0) displayval = "*empty*";
    AMUDP_assert(strlen(basekey)+strlen(AMUDP_ENV_PREFIX_STR) < 200);
    sprintf(displaykey,"%s_%s",AMUDP_ENV_PREFIX_STR,basekey);
    width = MAX(10,55 - strlen(displaykey) - strlen(displayval));
    fprintf(stderr, "ENV parameter: %s = %s%*s\n", displaykey, displayval, width, dflt);
    fflush(stderr);
  }
#endif
  return retval;
}

#ifdef AMUDP_BLCR_ENABLED
/* ------------------------------------------------------------------------------------
 *  checkpoint/restart
 * ------------------------------------------------------------------------------------ */
extern void AMUDP_SPMDRunRestart(char *argv0, char *dir, int nproc) {
  // BLCR-TODO: return errors on bad args?
  AMUDP_assert(argv0 != NULL);
  AMUDP_assert(dir != NULL);
  AMUDP_assert(nproc > 0);
  {
    eb_t eb; ep_t ep;
    int argc = 2;
    char **argv = (char**)AMUDP_malloc(3*sizeof(char*));
    argv[0] = argv0;
    argv[1] = dir;
    argv[2] = NULL;
    AMUDP_SPMDRestartActive = 1;
    AMUDP_SPMDStartup(&argc, &argv, nproc, 0, NULL, NULL, &eb, &ep);
    AMUDP_FatalErr("never reach here");
  }
}
extern int AMUDP_SPMDRestartProcId(int *argc, char ***argv) {
  const char *env_var = getenv(AMUDP_SPMDSLAVE_ARGS);
  const int slave_flag = env_var ? atoi(env_var) : 0;
  AMUDP_assert(argv != NULL);
  if (!AMUDP_SPMDStartupCalled && slave_flag == -1) {
    eb_t eb; ep_t ep;
    return AMUDP_SPMDStartup(argc, argv, 0, 0, NULL, NULL, &eb, &ep);
  }
  return -1;
}
static int AMUDP_SPMDReStartup(int fd, eb_t *eb, ep_t *ep) {
  struct stat st;
  int temp;

  AMUDP_SPMDRestartActive = 1;

  // Get location of new master from our special fd
  temp = fstat(fd, &st);
  if (temp < 0) {
    AMUDP_Err("Failed to read restart-master");
    exit(1);
  }
  size_t len = st.st_size;
  char *env_var = (char*)AMUDP_malloc(sizeof(char)*len);
  size_t rc = read(fd, env_var, len);
  if (rc != len) {
    AMUDP_Err("Failed to read restart env_var");
    AMUDP_RETURN(temp);
  }
  AMUDP_assert(env_var[len-1] == '\0');
  setenv(AMUDP_SPMDSLAVE_ARGS, env_var, 1);
  AMUDP_free(env_var);

  // Free old bundle
  temp = AM_FreeBundle(AMUDP_SPMDBundle);
  if (temp != AM_OK) {
    AMUDP_Err("Failed to free bundle in AMUDP_SPMDStartup");
    AMUDP_RETURN(temp);
  }

  // Re-bootstrap from the new master
  AMUDP_SPMDStartupCalled = 0;
  temp = AMUDP_SPMDStartup(NULL, NULL,
                           0, AMUDP_SPMDNetworkDepth,
                           NULL, NULL, eb, ep);

  AMUDP_SPMDRestartActive = 0;
  return temp;
}
/* ------------------------------------------------------------------------------------ */
#include "libcr.h"
int AMUDP_SPMDCheckpoint(eb_t *eb, ep_t *ep, const char *dir) {
  AMUDP_assert(dir != NULL);

  /* Drain all sends */
  for (int i = 0; i < AMUDP_SPMDBundle->n_endpoints; i++) {
    ep_t ep = AMUDP_SPMDBundle->endpoints[i];
    AMUDP_assert(ep);
    while (ep->outstandingRequests) {
      AM_Poll(AMUDP_SPMDBundle);
    }
  }
  AMUDP_SPMDBarrier();

  /* Start -- equivalent to gasnet_checkpoint_create(dir) */
  size_t len = strlen(dir) + 19; // 19 = "/context.123456789\0"
  char *buf = (char*)AMUDP_malloc(len);
  snprintf(buf, len, "%s/context.%d", dir, AMUDP_SPMDMYPROC);

  int contextFd = -1;
  {
    const int flags = O_WRONLY|O_APPEND|O_CREAT|O_EXCL|O_LARGEFILE|O_TRUNC;
    const int mode = S_IRUSR;
    contextFd = open(buf, flags, mode); // BLCR-TODO: error checking
    if (contextFd < 0) {
      fprintf(stderr, "Failed to create '%s' errno=%d(%s)\n", buf, errno, strerror(errno));
      AMUDP_free(buf);
      return -1;
    }
  }
  AMUDP_free(buf);
  /* End -- equivalent to gasnet_checkpoint_create(dir) */

  // Open a "masterFd" socket, and write fileno to start of the context file
  int masterFd = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
  write(contextFd, &masterFd, sizeof(masterFd));
  // BLCR-TODO: error checking for socket()

  /* Start -- equivalent to gasnet_checkpoint_write(contextFD) */
  cr_checkpoint_args_t cr_args;
  cr_checkpoint_handle_t cr_handle;
  int retval;
  int rc;

  cr_initialize_checkpoint_args_t(&cr_args);
  cr_args.cr_scope  = CR_SCOPE_TREE;
  cr_args.cr_target = 0; /* self */
  cr_args.cr_fd = contextFd;

  rc = cr_request_checkpoint(&cr_args, &cr_handle);
  // BLCR-TODO: error checking for cr_request_checkpoint()

  do { // This loop is necessary because checkpointing self causes EINTR
    rc = cr_wait_checkpoint(&cr_handle, NULL);
    // BLCR-TODO: error checking for cr_wait_checkpoint()
  } while ((rc < 0) && (errno == EINTR));

  rc = cr_reap_checkpoint(&cr_handle);
  if (rc >= 0) {
    (void)close(cr_args.cr_fd);
    retval = 0; // Continue case
  } else if (errno == CR_ERESTARTED) {
    retval = 1; // Restart case
  } else {
    retval = -1; // ERROR case
  }
  /* END -- equivalent to gasnet_checkpoint_write(contextFD) */

  if (0 > retval) {
    // Continue case
    // Nothing to do here
  } else if (1 == retval) {
    // Restart case
    AMUDP_SPMDReStartup(masterFd, eb, ep);
    // BLCR-TODO: error checking for ReStartup
  } else {
    // ERROR case
    // BLCR-TODO: error handling/reporting
  }

  (void)close(masterFd);

  return retval;
}
#endif // AMUDP_BLCR_ENABLED
/* ------------------------------------------------------------------------------------ */
