### Name: biopara
### Title: Distributed Parallel System for R
### Aliases: biopara
### Keywords: misc utilities optimize manip data

### ** Examples

#These examples assume a master called my.server.edu running on port 39000 and a client 
#1.2.3.4 using return port 40000. The client port is chosen arbitrarily from free ports by the user
#The request is for 5 runs of myfxn(a,b). This will place the output of myfxn(a,b) into 
#variable "f" as a list in the order of the run iterator. Please note that one must
#have already defined a and b in a setenv (described below) in order for the cluster
#to be aware of the values of a and b.
## Not run: f<-biopara(list("my.server.edu",39000),list("1.2.3.4",40000),5,list("myfxn(a,b)"));
#This is a request to produce 3 runs of all different functions. Nruns is ignored and the return 
#list is in the order of the request list.
## Not run: f<-biopara(list("my.server.edu",39000),list("1.2.3.4",40000),5,list("myfxn(a)","1+1","sum(4)"))
#The following will fail unless the client and master are on the same node. The reference of
#localhost is evaluated on the master computer and the return connection will not be established.
## Not run: f<-biopara(list("my.server.edu",39000),list("localhost",40000),5,list("myfxn(a)")
#How to export your environment to the cluster of workers without fine tuning
#This will collect your username from your client machine and attach the environment to this label
## Not run: f<- biopara(list("my.server.edu",39000),list("1.2.3.4",40000),0,list("setenv"))
#How to export your environment using fine tuning to export the list mylist and the function myfxn 
## Not run: f<- biopara(list("my.server.edu",39000),list("1.2.3.4",40000),list("mylist","myfxn"),list("setenv"))
#This specifies to the system to attach your environment to tag "myusername" instead
#This has no other system effect as everything is being evaluated under the user executing biopara 
#Please note that you must use this same tag in order to manipulate this environment in the future
#Also, any runs must also bear this tag to take advantage of this environment
## Not run: f<-biopara(list("my.server.edu",39000,"myusername"),list("1.2.3.4",40000),5,list("myfxn(a,b)"))
#This is a request to list your environment on the cluster. 
## Not run: f<-biopara(list("my.server.edu",39000),list("1.2.3.4",40000),1,list("ls()"))
#Here is a special command "reset" that requests the cluster to clear one's environment
## Not run: f<-biopara(list("my.server.edu",39000),list("1.2.3.4",40000),1,list("reset"))
#This is a special command "last" that lists cluster usage and timestamps of user access
## Not run: f<-biopara(list("my.server.edu",39000),list("1.2.3.4",40000),1,list("last"))
#Here is a special command "numservers" that queries the cluster for the number of active workers.
#This is useful when determining how to break up a large computation for maximum parallelism
## Not run: f<-biopara(list("my.server.edu",39000),list("1.2.3.4",40000),1,list("numservers"))
#Here is a special command "hosts" that returns a list of connections from the master.
#This is useful for determining if certain hosts are part of the worker ring 
## Not run: f<-biopara(list("my.server.edu",39000),list("1.2.3.4",40000),1,list("hosts"))

#Master process configuration

#This is a special command to display the master's configuration instructions
biopara("help")
#This configures a master on my.server.edu using port 36000 to listen for worker connections
#and port 39000 for client connections. 
#It is configured to connect to a worker on localhost listening on port 38000 and sending on 37000
#This assumes the worker has already been started with those parameters prior to master launch.
#The start command is blank so the master will make no effort to establish any workers. 
#The master will, however, attempt to contact the workers inside the config list. 
bioparamastername <- "my.server.edu";
bioparamasterport <- 36000;
bioparaclientport <- 39000;
bioparaconfig <- list(list("localhost",37000,"c:/",38000,""));
## Not run: biopara(bioparamastername,bioparamasterport,bioparaclientport,bioparaconfig)
#Here is a bioparaconfig for a pair of workers.
#The launch commands are ssh single-command background sessions to run a script.
#Included in the "help" command is an example using ssh, echo and piping that allows 
#one to launch R and pipe it a commandline with arguments. 
bioparaconfig<-list(
list("worker1.edu",42000,"/tmp",43000,"ssh -f worker1.edu ~myuser/runwkr.sh"), 
list("worker2.edu",37000,"/temp",38000,"ssh -f worker2.edu /usr/local/R/runwkr.sh"));

#Worker process configuration

#Please note this is only necessary if the master command string cannot be used to launch 
#the worker processes automatically. 
#This launches a worker listening on port 38000 and transmitting on port 37000.
#This worker is configured for master on "localhost" and uses /tmp as it's working directory
## Not run: biopara(37000,"/tmp","localhost",38000)
#To launch a worker to listen to my.server.edu using ports 38000 and 37000 for communications
## Not run: biopara(37000,"/tmp","my.server.edu",38000)

#Single host test for system

#You will need 3 instances of R for this test. It uses the / directory as temp this is not 
#recommended but should be sufficient for simple tests.
#Start by launching the worker
## Not run: biopara(37000,"/","localhost",38000)
#Then start a master pointing at this one worker
## Not run: biopara("localhost",36000,39000,list(list("localhost",37000,"/",38000,"")))
#Issue a simple run
## Not run: out<-biopara(list("localhost",39000),list("localhost",40000),5,list("sum(1,1)"))
#A more complex run
## Not run: out<-biopara(list("localhost",39000),list("localhost",40000),5,list("sum(1,1)","1+1","library(\"base\")"))
#A setenv for a single var
myvar<-3
## Not run: out<-biopara(list("localhost",39000),list("localhost",40000),1,list("setenv"))
#A setenv for a function only exporting the function
myfunc <- function(a,b){a+b}
## Not run: out<-biopara(list("localhost",39000),list("localhost",40000),list("myfunc"),list("setenv"))
#Using the new variables and function
## Not run: out<-biopara(list("localhost",39000),list("localhost",40000),3,list("myfunc(myvar,myvar)"))
#To see your variables. You will see biopara holding variables as well. Do not redefine these.
## Not run: out<-biopara(list("localhost",39000),list("localhost",40000),1,list("ls()"))
#Clear your variables
## Not run: out<-biopara(list("localhost",39000),list("localhost",40000),1,list("reset"))
#See your worker process. It will be the entry before the 36000 and 39000 entries
## Not run: out<-biopara(list("localhost",39000),list("localhost",40000),1,list("hosts"))

#How to use example function pboot with example from boot
library(boot)
data(city)
ratio <- function(d, w) sum(d$x * w)/sum(d$u * w)
## Not run: out<-biopara(list("localhost",39000),list("localhost",40000),list("myfunc"),list("setenv"))
## Not run: out<-biopara(list("localhost",39000),list("localhost",40000),1,list("numservers"))
## Not run: out<-biopara(list("localhost",39000),list("localhost",40000),out,list("data(city)"))
## Not run: out<-pboot(list("localhost",39000),list("localhost",40000),5,mydata, ratio, R=999, stype="w")




