### Name: rm.impute
### Title: Imputation of Repeated Measures
### Aliases: rm.impute pbind
### Keywords: regression htest multivariate array

### ** Examples

## Not run: 
##D # Generate multiple imputes of the response matrix for later use
##D Y <- rm.impute(~treatment + pol(age,2)*sex, responses, 
##D                last=lastvisit, data=mydata)$Y
##D # Do some analysis for each imputation
##D fits <- vector('list',10)
##D for(i in 1:10) {
##D   y <- Y[,,i]
##D   fits[[i]] <- my.analysis(X,y)
##D }
##D 
##D # Function to generate a 4-variate equal correlation pattern response
##D # with missing-at-random responses; missingness is a function of x and
##D # previous responses.
##D #
##D # pna is a function that computes the probability that a subject
##D # drops out at the current visit.  For visit 1 pna is a function
##D # of treatment and baseline covariable x.  For visits > 1 pna is
##D # a function of the matrix of responses for all previous visits.
##D #
##D # If second=TRUE we also generate a second response variable having
##D # NAs in the same positions as this first one.  y2 is generated
##D # so that its NAs are completely unrelated to any y2 values if
##D # y2B.effect=0, as the pna function is only given the first 
##D # response variable.
##D # y2 is N(0,1) for treat='A' and N(y2.treat.effect,1) for treat='B'.
##D 
##D testdf <- function(n=1500, seed=7, pna, second=FALSE, y2.treat.effect=0) {
##D 
##D set.seed(seed)
##D treat <- sample(c('A','B'),n,TRUE)
##D x <- runif(n)
##D nt <- 4
##D 
##D mvrnorm <- function(n, p = 1, u = rep(0, p), S = diag(p)) {
##D   Z <- matrix(rnorm(n * p), p, n)
##D   t(u + t(chol(S)) %*% Z)
##D }
##D 
##D # Generate multivariate normal errors for n subjects at nt times
##D # Assume equal correlations of rho=.5, independent subjects
##D 
##D rho   <- .5
##D y <- mvrnorm(n, p=nt, S=diag(rep(1-rho,nt))+rho)
##D 
##D y[treat=='B',] <- y[treat=='B',] + 1
##D 
##D cat('\n\nTreatment-specific means for last period in response variable 1 before generating NAs:\n')
##D print(tapply(y[,4], treat, mean, na.rm=TRUE))
##D 
##D y[runif(n) < pna(treat, x), 1] <- NA
##D y[is.na(y[,1]) | runif(n) < pna(treat, x, y[,1]),   2] <- NA
##D y[is.na(y[,2]) | runif(n) < pna(treat, x, y[,1:2]), 3] <- NA
##D y[is.na(y[,3]) | runif(n) < pna(treat, x, y[,1:3]), 4] <- NA
##D 
##D last <- rep(4, n)
##D last[is.na(y[,4])] <- 3
##D last[is.na(y[,3])] <- 2
##D last[is.na(y[,2])] <- 1
##D last[is.na(y[,1])] <- 0
##D 
##D cat('\nNumber of NAs for each time period:\n')
##D print(apply(y, 2, function(x)sum(is.na(x))))
##D cat('\n\nTreatment-specific means for last period in response variable 1 after excluding NAs:\n')
##D print(tapply(y[,4], treat, mean, na.rm=TRUE))
##D cat('\n\nNaive complete-case analysis:\n\n')
##D prn(ols(y[,4] ~ pol(x,2) + treat))
##D 
##D if(second) {
##D   y2 <- matrix(rnorm(n*4),ncol=4)
##D   y2[treat=='B',] <- y2[treat=='B',] + y2.treat.effect
##D   cat('\n\nTreatment-specific means for last period in response variable 2 before generating NAs:\n')
##D   print(tapply(y2[,4], treat, mean, na.rm=TRUE))
##D 
##D   y2[is.na(y[,1]),1] <- NA
##D   y2[is.na(y[,2]),2] <- NA
##D   y2[is.na(y[,3]),3] <- NA
##D   y2[is.na(y[,4]),4] <- NA
##D   cat('\n\nTreatment-specific means for last period in response variable 2 after excluding NAs:\n')
##D   print(tapply(y2[,4], treat, mean, na.rm=TRUE))
##D 
##D   y <- pbind(y1=y, y2=y2)
##D }
##D 
##D list(x=x, treat=treat, y=y, last=last)
##D }
##D 
##D pna <- function(treat, x, yprev) {
##D # In this model for the probability of dropout just before the
##D # current visit, the probability does not depend on the baseline
##D # covariable x.  For treat='B' the probability of dropout is a
##D # constant 0.1.  For treat='A' it is a curtailed quadratic
##D # function of the previous visit's response.
##D #
##D # If no previous responses available, we are at first follow-up visit
##D 
##D if(missing(yprev)) 0 else {
##D   if(is.matrix(yprev)) yprev <- yprev[,ncol(yprev)]
##D   ifelse(treat=='B', .1,
##D          pmax(0, pmin(1, .124 +.0835*yprev + .020868*yprev^2)))
##D   }
##D }
##D 
##D df <- testdf(pna = pna, second=TRUE)
##D 
##D g <- rm.impute(~ pol(x,2) + treat, df$y, last=df$last, 
##D                rformula=~ pol(x,2) + treat,
##D                n.impute=10, g=4, nk=3, 
##D                rinteraction='treat', rint.with='all',
##D                pr=TRUE, pra=TRUE, data=df, keep.prop=TRUE, keep.pfits=TRUE)
##D # Base propensity model is in.study ~ pol(x,2) + treat
##D # for visits 2,3,4, filled-in y's from previous visits will also be
##D # used as predictors, and these interact with treat.  
##D # Restricted cubic spline with 3 knots is assumed for the propensity models
##D # To fit the multiply-imputed last (4th) response an additive model
##D # in quadratic x and treat is used
##D 
##D g$fit[[1]]       # shows response fit for first response variable
##D                  # (y1), with variances adj. for imputation
##D page(g$Y)        # show all 10 imputations for both responses x 4 periods
##D 
##D # Check for the first imputation how well propensity matching achieved 
##D # balance in baseline and period 3 filled-in responses for
##D # dropouts and non-dropouts.  For continuous variables show ECDFs
##D # using the Hmisc ecdf function, for first 4 imputations.  Do this
##D # with and without stratifying on quintiles of propensity, and also
##D # show the estimated 3rd period response  vs. propensity stratified 
##D # by dropout status.  Use only first response (y1) for all of this.
##D 
##D for(imp in 1:4) {
##D   y3     <- g$Y[,3,1,imp]
##D   prop3  <- g$propensity[,3,imp]
##D   prop3g <- cut2(prop3,g=5)
##D   ti <- paste('Imputation',imp)
##D   print(ecdf(~ y3, groups=df$last >= 3, subset=unclass(prop3g)<5))
##D   title(ti)
##D   print(ecdf(~ y3 | prop3g, groups=df$last >= 3, 
##D              subset=unclass(prop3g)<5))
##D   # Not enough dropouts in highest quintile of propensity completing
##D   # visit 3
##D   title(ti)
##D   plsmo(prop3, y3, group=df$last >= 3, datadensity=TRUE, col=1:2)
##D   title(ti)
##D }
##D 
##D # Examine propensity fit for sixth imputation, 4th response
##D f <- g$pfits[4,6][[1]]
##D dfr <- as.data.frame(df)
##D # Edit names of dfr so that responses called y.1, y.2, etc.
##D # For this example, these are already OK
##D dd <- datadist(dfr)
##D options(datadist='dd')   
##D # datadist makes plot below work without specifying variable settings
##D plot(f, y.3=NA, treat=NA, conf.int=FALSE)
##D 
##D # Analyze multiple response variables.  Both systolic.bp and
##D # diastolic.bp are matrices (columns = time periods)
##D 
##D f <- rm.impute(~treatment + pol(age,2)*sex,
##D                pbind(systolic.bp, diastolic.bp),
##D                last=lastvisit, data=mydata)
##D 
##D # To deal with a continuous and a binary endpoint you can specify
##D # pbind(sysbolic.bp, stroke), fitter=list(ols, lrm)
## End(Not run)



