R : Copyright 2005, The R Foundation for Statistical Computing Version 2.1.1 (2005-06-20), ISBN 3-900051-07-0 R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for a HTML browser interface to help. Type 'q()' to quit R. > ### *
> ### > attach(NULL, name = "CheckExEnv") > assign(".CheckExEnv", as.environment(2), pos = length(search())) # base > ## add some hooks to label plot pages for base and grid graphics > setHook("plot.new", ".newplot.hook") > setHook("persp", ".newplot.hook") > setHook("grid.newpage", ".gridplot.hook") > > assign("cleanEx", + function(env = .GlobalEnv) { + rm(list = ls(envir = env, all.names = TRUE), envir = env) + RNGkind("default", "default") + set.seed(1) + options(warn = 1) + delayedAssign("T", stop("T used instead of TRUE"), + assign.env = .CheckExEnv) + delayedAssign("F", stop("F used instead of FALSE"), + assign.env = .CheckExEnv) + sch <- search() + newitems <- sch[! sch %in% .oldSearch] + for(item in rev(newitems)) + eval(substitute(detach(item), list(item=item))) + missitems <- .oldSearch[! .oldSearch %in% sch] + if(length(missitems)) + warning("items ", paste(missitems, collapse=", "), + " have been removed from the search path") + }, + env = .CheckExEnv) > assign("..nameEx", "__{must remake R-ex/*.R}__", env = .CheckExEnv) # for now > assign("ptime", proc.time(), env = .CheckExEnv) > grDevices::postscript("biopara-Examples.ps") > assign("par.postscript", graphics::par(no.readonly = TRUE), env = .CheckExEnv) > options(contrasts = c(unordered = "contr.treatment", ordered = "contr.poly")) > options(warn = 1) > library('biopara') > > assign(".oldSearch", search(), env = .CheckExEnv) > assign(".oldNS", loadedNamespaces(), env = .CheckExEnv) > cleanEx(); ..nameEx <- "biopara" > > ### * biopara > > flush(stderr()); flush(stdout()) > > ### Name: biopara > ### Title: Distributed Parallel System for R > ### Aliases: biopara > ### Keywords: misc utilities optimize manip data > > ### ** Examples > > #These examples assume a master called my.server.edu running on port 39000 and a client > #1.2.3.4 using return port 40000. The client port is chosen arbitrarily from free ports by the user > #The request is for 5 runs of myfxn(a,b). This will place the output of myfxn(a,b) into > #variable "f" as a list in the order of the run iterator. Please note that one must > #have already defined a and b in a setenv (described below) in order for the cluster > #to be aware of the values of a and b. > ## Not run: f<-biopara(list("my.server.edu",39000),list("1.2.3.4",40000),5,list("myfxn(a,b)")); > #This is a request to produce 3 runs of all different functions. Nruns is ignored and the return > #list is in the order of the request list. > ## Not run: f<-biopara(list("my.server.edu",39000),list("1.2.3.4",40000),5,list("myfxn(a)","1+1","sum(4)")) > #The following will fail unless the client and master are on the same node. The reference of > #localhost is evaluated on the master computer and the return connection will not be established. > ## Not run: f<-biopara(list("my.server.edu",39000),list("localhost",40000),5,list("myfxn(a)") > #How to export your environment to the cluster of workers without fine tuning > #This will collect your username from your client machine and attach the environment to this label > ## Not run: f<- biopara(list("my.server.edu",39000),list("1.2.3.4",40000),0,list("setenv")) > #How to export your environment using fine tuning to export the list mylist and the function myfxn > ## Not run: f<- biopara(list("my.server.edu",39000),list("1.2.3.4",40000),list("mylist","myfxn"),list("setenv")) > #This specifies to the system to attach your environment to tag "myusername" instead > #This has no other system effect as everything is being evaluated under the user executing biopara > #Please note that you must use this same tag in order to manipulate this environment in the future > #Also, any runs must also bear this tag to take advantage of this environment > ## Not run: f<-biopara(list("my.server.edu",39000,"myusername"),list("1.2.3.4",40000),5,list("myfxn(a,b)")) > #This is a request to list your environment on the cluster. > ## Not run: f<-biopara(list("my.server.edu",39000),list("1.2.3.4",40000),1,list("ls()")) > #Here is a special command "reset" that requests the cluster to clear one's environment > ## Not run: f<-biopara(list("my.server.edu",39000),list("1.2.3.4",40000),1,list("reset")) > #This is a special command "last" that lists cluster usage and timestamps of user access > ## Not run: f<-biopara(list("my.server.edu",39000),list("1.2.3.4",40000),1,list("last")) > #Here is a special command "numservers" that queries the cluster for the number of active workers. > #This is useful when determining how to break up a large computation for maximum parallelism > ## Not run: f<-biopara(list("my.server.edu",39000),list("1.2.3.4",40000),1,list("numservers")) > #Here is a special command "hosts" that returns a list of connections from the master. > #This is useful for determining if certain hosts are part of the worker ring > ## Not run: f<-biopara(list("my.server.edu",39000),list("1.2.3.4",40000),1,list("hosts")) > > #Master process configuration > > #This is a special command to display the master's configuration instructions > biopara("help") [1] In order to operate the master, you will need to define 5 variables. [1] These are: [1] 1. Master Working Directory [1] 2. Hostname of Master [1] 3. Port Number for Master to communicate with workers on [1] 4. Port Number for Master to communicate with clients on [1] 5. List of objects (worker's hostname,worker R install dir,worker tmpdir, worker port) [1] [1] bioparamastername is the hostname of the master as the workers would see it: [1] bioparamastername <- "master-host-name"; [1] bioparamasterport is the port used by the master to recieve worker connections [1] bioparamasterport <- 36000; [1] bioparaclientport is the port used by clients to connect to master. [1] bioparaclientport <- 39000; [1] format for the main config list is [1] list(list("computer-name",worker-out-port,"worker working directory",worker-in-port,"command that can be executed from master to launch worker session on remote computer"),....) [1] Example: for 2 workers compute-0-0 and compute-0-1, [1] both having R installed in /opt/r, with working directory in /tmp and using port 38000. [1] [1] bioparaconfig <- list(list("compute-0-0",37000,"/tmp",38000,"ssh -f compute-0-0 runwkr.sh"),list("compute-0-1",37000,"/tmp",38000,"ssh -f compute-0-1 runwkr.sh")); [1] [1] The following is an example command string that passes through ssh to accomplish the worker launch [1] "ssh -f compute-0-1 \\" echo -e 'source\\50\\42/tmp/biopara.r\\42\\51\\73biopara\\50 37000,\\42/tmp\\42,\\42rescluster2\\42,38000\\51\\73'|/opt/R/bin/R --vanilla \\" 1> /dev/null 2>/dev/null " [1] The octal allows the quotes, ()'s and ; characters to pass through ssh gracefully [1] The piping of stdin and stdout is necessary to not clutter the master screen [1] [1] After defining these elements you must either: [1] 1. Manually start your workers [1] 2. Rely on your start commands to start the workers [1] The master will only incorporate workers present at launchtime [1] Finally run the following command to start your master: [1] [1] biopara(bioparamastername,bioparamasterport,bioparaclientport,bioparaconfig) NULL > #This configures a master on my.server.edu using port 36000 to listen for worker connections > #and port 39000 for client connections. > #It is configured to connect to a worker on localhost listening on port 38000 and sending on 37000 > #This assumes the worker has already been started with those parameters prior to master launch. > #The start command is blank so the master will make no effort to establish any workers. > #The master will, however, attempt to contact the workers inside the config list. > bioparamastername <- "my.server.edu"; > bioparamasterport <- 36000; > bioparaclientport <- 39000; > bioparaconfig <- list(list("localhost",37000,"c:/",38000,"")); > ## Not run: biopara(bioparamastername,bioparamasterport,bioparaclientport,bioparaconfig) > #Here is a bioparaconfig for a pair of workers. > #The launch commands are ssh single-command background sessions to run a script. > #Included in the "help" command is an example using ssh, echo and piping that allows > #one to launch R and pipe it a commandline with arguments. > bioparaconfig<-list( + list("worker1.edu",42000,"/tmp",43000,"ssh -f worker1.edu ~myuser/runwkr.sh"), + list("worker2.edu",37000,"/temp",38000,"ssh -f worker2.edu /usr/local/R/runwkr.sh")); > > #Worker process configuration > > #Please note this is only necessary if the master command string cannot be used to launch > #the worker processes automatically. > #This launches a worker listening on port 38000 and transmitting on port 37000. > #This worker is configured for master on "localhost" and uses /tmp as it's working directory > ## Not run: biopara(37000,"/tmp","localhost",38000) > #To launch a worker to listen to my.server.edu using ports 38000 and 37000 for communications > ## Not run: biopara(37000,"/tmp","my.server.edu",38000) > > #Single host test for system > > #You will need 3 instances of R for this test. It uses the / directory as temp this is not > #recommended but should be sufficient for simple tests. > #Start by launching the worker > ## Not run: biopara(37000,"/","localhost",38000) > #Then start a master pointing at this one worker > ## Not run: biopara("localhost",36000,39000,list(list("localhost",37000,"/",38000,""))) > #Issue a simple run > ## Not run: out<-biopara(list("localhost",39000),list("localhost",40000),5,list("sum(1,1)")) > #A more complex run > ## Not run: out<-biopara(list("localhost",39000),list("localhost",40000),5,list("sum(1,1)","1+1","library(\"base\")")) > #A setenv for a single var > myvar<-3 > ## Not run: out<-biopara(list("localhost",39000),list("localhost",40000),1,list("setenv")) > #A setenv for a function only exporting the function > myfunc <- function(a,b){a+b} > ## Not run: out<-biopara(list("localhost",39000),list("localhost",40000),list("myfunc"),list("setenv")) > #Using the new variables and function > ## Not run: out<-biopara(list("localhost",39000),list("localhost",40000),3,list("myfunc(myvar,myvar)")) > #To see your variables. You will see biopara holding variables as well. Do not redefine these. > ## Not run: out<-biopara(list("localhost",39000),list("localhost",40000),1,list("ls()")) > #Clear your variables > ## Not run: out<-biopara(list("localhost",39000),list("localhost",40000),1,list("reset")) > #See your worker process. It will be the entry before the 36000 and 39000 entries > ## Not run: out<-biopara(list("localhost",39000),list("localhost",40000),1,list("hosts")) > > #How to use example function pboot with example from boot > library(boot) > data(city) > ratio <- function(d, w) sum(d$x * w)/sum(d$u * w) > ## Not run: out<-biopara(list("localhost",39000),list("localhost",40000),list("myfunc"),list("setenv")) > ## Not run: out<-biopara(list("localhost",39000),list("localhost",40000),1,list("numservers")) > ## Not run: out<-biopara(list("localhost",39000),list("localhost",40000),out,list("data(city)")) > ## Not run: out<-pboot(list("localhost",39000),list("localhost",40000),5,mydata, ratio, R=999, stype="w") > > > > > cleanEx(); ..nameEx <- "pboot" > > ### * pboot > > flush(stderr()); flush(stdout()) > > ### Name: pboot > ### Title: Parallel Bootstrap > ### Aliases: pboot > ### Keywords: misc utilities optimize manip data > > ### ** Examples > > #These examples assume a master called my.server.edu running on port 39000 and a client > #1.2.3.4 using return port 40000. This can be configured by running the single machine > #example at the bottom of user function biopara. > #The examples are copied from the standard function boot and shown run through biopara in parallel. > > #We need to load the boot library to get the datasets. This will need to be done for the workers > library(boot) > > data(city); > ratio <- function(d, w) sum(d$x * w)/sum(d$u * w) > ## Not run: out<-biopara(list("localhost",39000),list("localhost",40000),1,list("setenv")) > #Since we are using a data set directly, we will need to query the number of servers and send > #that many runs library and data > ## Not run: out<-biopara(list("localhost",39000),list("localhost",40000),1,list("numservers")) > ## Not run: out<-biopara(list("localhost",39000),list("localhost",40000),out,list("library(boot);data(city)")) > #Finally a call to pboot > ## Not run: out<-pboot(list("localhost",39000),list("localhost",40000),5,city, ratio, R=999, stype="w") > > #We do not have to call biopara on data here since the data set becomes a user defined object > data(gravity) > diff.means <- function(d, f) + { n <- nrow(d) + gp1 <- 1:table(as.numeric(d$series))[1] + m1 <- sum(d[gp1,1] * f[gp1])/sum(f[gp1]) + m2 <- sum(d[-gp1,1] * f[-gp1])/sum(f[-gp1]) + ss1 <- sum(d[gp1,1]^2 * f[gp1]) - + (m1 * m1 * sum(f[gp1])) + ss2 <- sum(d[-gp1,1]^2 * f[-gp1]) - + (m2 * m2 * sum(f[-gp1])) + c(m1-m2, (ss1+ss2)/(sum(f)-2)) + } > grav1 <- gravity[as.numeric(gravity[,2])>=7,] > ## Not run: out<-biopara(list("localhost",39000),list("localhost",40000),1,list("setenv")) > ## Not run: out<-pboot(list("localhost",39000),list("localhost",40000),5,grav1, diff.means, R=999, stype="f", strata=grav1[,2]) > > data(nuclear) > nuke <- nuclear[,c(1,2,5,7,8,10,11)] > nuke.lm <- glm(log(cost)~date+log(cap)+ne+ ct+log(cum.n)+pt, data=nuke) > nuke.diag <- glm.diag(nuke.lm) > nuke.res <- nuke.diag$res*nuke.diag$sd > nuke.res <- nuke.res-mean(nuke.res) > nuke.data <- data.frame(nuke,resid=nuke.res,fit=fitted(nuke.lm)) > new.data <- data.frame(cost=1, date=73.00, cap=886, ne=0, ct=0, cum.n=11, pt=1) > new.fit <- predict(nuke.lm, new.data) > nuke.fun <- function(dat, inds, i.pred, fit.pred, x.pred) + { + assign(".inds", inds, envir=.GlobalEnv) + lm.b <- glm(fit+resid[.inds] ~date+log(cap)+ne+ct+ + log(cum.n)+pt, data=dat) + pred.b <- predict(lm.b,x.pred) + remove(".inds", envir=.GlobalEnv) + c(coef(lm.b), pred.b-(fit.pred+dat$resid[i.pred])) + } > ## Not run: out<-biopara(list("localhost",39000),list("localhost",40000),1,list("setenv")) > ## Not run: nuke.boot<-pboot(list("localhost",39000),list("localhost",40000),5,nuke.data, nuke.fun, R=999, m=1, fit.pred=new.fit, x.pred=new.data) > #The bootstrap prediction error for the first bootstrap > ## Not run: mean(nuke.boot[[1]][[2]][,8]^2) > #Basic bootstrap prediction limits on first bootstrap > ## Not run: new.fit-sort(nuke.boot[[1]][[2]][,8]^2)[c(975,25)] > > > > > ### *