### Name: plot.Design
### Title: Plot Effects of Variables
### Aliases: plot.Design print.plot.Design perimeter lines.perimeter Legend
###   Legend.default Legend.plot.Design datadensity datadensity.plot.Design
### Keywords: models hplot htest

### ** Examples

n <- 1000    # define sample size
set.seed(17) # so can reproduce the results
age            <- rnorm(n, 50, 10)
blood.pressure <- rnorm(n, 120, 15)
cholesterol    <- rnorm(n, 200, 25)
sex            <- factor(sample(c('female','male'), n,TRUE))
label(age)            <- 'Age'      # label is in Hmisc
label(cholesterol)    <- 'Total Cholesterol'
label(blood.pressure) <- 'Systolic Blood Pressure'
label(sex)            <- 'Sex'
units(cholesterol)    <- 'mg/dl'   # uses units.default in Hmisc
units(blood.pressure) <- 'mmHg'

# Specify population model for log odds that Y=1
L <- .4*(sex=='male') + .045*(age-50) +
  (log(cholesterol - 10)-5.2)*(-2*(sex=='female') + 2*(sex=='male'))
# Simulate binary y to have Prob(y=1) = 1/[1+exp(-L)]
y <- ifelse(runif(n) < plogis(L), 1, 0)

ddist <- datadist(age, blood.pressure, cholesterol, sex)
options(datadist='ddist')

fit <- lrm(y ~ blood.pressure + sex * (age + rcs(cholesterol,4)),
               x=TRUE, y=TRUE)

par(mfrow=c(2,2))
plot(fit)                # Plot effects of all 4 predictors
par(mfrow=c(1,2))
plot(fit, name=c('age','cholesterol'))   # Make 2 plots
par(mfrow=c(1,1))
plot(fit, age=seq(20,80,length=100), sex=NA, conf.int=FALSE)
                         # Plot relationship between age and log
                         # odds, separate curve for each sex,
                         # no C.I.
z <- plot(fit, age=NA, sex=NA, label.curves=FALSE)
                         # use label.curves=list(keys=c('a','b'))'
                         # to use 1-letter abbreviations
datadensity(z, age, sex) # rug plots (1-dimensional scatterplots)
                         # on each treatment curve, with treatment-
                         # specific density of age
plot(fit, age=seq(20,80,length=100), sex='male')  # works if datadist not used
plot(fit, age=NA, cholesterol=NA)# 3-dimensional perspective plot for age,
                         # cholesterol, and log odds using default
                         # ranges for both variables
boundaries <- perimeter(age, cholesterol, lowess=TRUE)
plot(age, cholesterol)   # show bivariate data density
lines(boundaries)        # and perimeter that will be used for 3-D plot
z <- plot(fit, age=NA, cholesterol=NA, perim=boundaries, method='image')
                         # draws image() plot
                         # don't show estimates where data are sparse
                         # doesn't make sense here since vars don't interact
if(!.R.)Legend(z, fun=plogis, at=qlogis(c(.01,.05,.1,.2,.3,.4,.5)),
               zlab='Probability')   # gray scale or color legend for prob.
plot(fit, age=NA, fun=function(x) 1/(1+exp(-x)) , # or fun=plogis
     ylab="Prob", conf.int=.9)    # Plot estimated probabilities instead of
                                  # log odds

# Plot the age effect as an odds ratio
# comparing the age shown on the x-axis to age=30 years

ddist$limits$age[2] <- 30    # make 30 the reference value for age
# Could also do: ddist$limits["Adjust to","age"] <- 30
fit <- update(fit)   # make new reference value take effect
plot(fit, age=NA, ref.zero=TRUE, fun=exp, ylab='Age=x:Age=30 Odds Ratio')
abline(h=1, lty=2, col=2); abline(v=30, lty=2, col=2)

# Make two curves, and plot the predicted curves as two trellis panels
w <- plot(fit, age=NA, sex=NA)   # Would be nice if a pl=FALSE option was avail.
z <- data.frame(w$x.xbeta)     # Makes variable names legal
if(.R.) library(lattice)
xyplot(log.odds ~ age | sex, data=z, type='l')
# To add confidence bands we need to use the Hmisc xYplot function in
# place of xyplot
xYplot(Cbind(log.odds,lower,upper) ~ age | sex, data=z, 
       method='bands', type='l')
# If non-displayed variables were in the model, add a subtitle to show
# their settings using title(sub=paste('Adjusted to',w$adjust),adj=0)
# See predict.Design for an example using predict and xYplot without plot()



# Plots for a parametric survival model
n <- 1000
set.seed(731)
age <- 50 + 12*rnorm(n)
label(age) <- "Age"
sex <- factor(sample(c('Male','Female'), n, 
              rep=TRUE, prob=c(.6, .4)))
cens <- 15*runif(n)
h <- .02*exp(.04*(age-50)+.8*(sex=='Female'))
t <- -log(runif(n))/h
label(t) <- 'Follow-up Time'
e <- ifelse(t<=cens,1,0)
t <- pmin(t, cens)
units(t) <- "Year"
ddist <- datadist(age, sex)
Srv <- Surv(t,e)

# Fit log-normal survival model and plot median survival time vs. age
f <- psm(Surv(t, e) ~ rcs(age), dist=if(.R.)'lognormal' else 'gaussian')
med <- Quantile(f)       # Creates function to compute quantiles
                         # (median by default)
plot(f, age=NA, fun=function(x)med(lp=x), ylab="Median Survival Time")
# Note: This works because med() expects the linear predictor (X*beta)
#       as an argument.  Would not work if use 
#       plot(..., ref.zero=TRUE or adj.zero=TRUE)
# Also, confidence intervals from this method are approximate since
# they don't take into account estimation of scale parameter

# Fit an ols model to log(y) and plot the relationship between x1
# and the predicted mean(y) on the original scale without assuming
# normality of residuals; use the smearing estimator
set.seed(1)
x1 <- runif(300)
x2 <- runif(300)
ddist <- datadist(x1,x2)
y  <- exp(x1+x2-1+rnorm(300))
f <- ols(log(y) ~ pol(x1,2)+x2)
r <- resid(f)
smean <- function(yhat)smearingEst(yhat, exp, res, statistic='mean')
formals(smean) <- list(yhat=numeric(0), res=r[!is.na(r)])
#smean$res <- r[!is.na(r)]   # define default res argument to function
plot(f, x1=NA, fun=smean, ylab='Predicted Mean on y-scale')

options(datadist=NULL)

## Not run: 
##D # Example in which separate curves are shown for 4 income values
##D # For each curve the estimated percentage of voters voting for
##D # the democratic party is plotted against the percent of voters
##D # who graduated from college.  scat1d is used to indicate
##D # the income-interval-specific data density for college.  For
##D # this purpose show the distribution of percent in college for
##D # those having an income level within +/- the half-width of
##D # the income interval.  scat1d shows the rug plot superimposed
##D # on the estimated curve.  Data are county-level percents.
##D # This can't be done automatically using datadensity on the object
##D # returned by plot.Design, as the variable representing different
##D # curves (income) is a continuous variable.
##D 
##D incomes <- seq(22900, 32800, length=4)  
##D # equally spaced to outer quintiles
##D pl <- plot(f, college=NA, income=incomes, 
##D            conf.int=FALSE, xlim=c(0,35), ylim=c(30,55),
##D            lty=1, lwd=c(.25,1.5,3.5,6), col=c(1,1,2,2))
##D graph.points <- pl$x.xbeta
##D for(i in 1:4) {
##D   college.in.income.group <- college[abs(income-incomes[i]) < 1650]
##D   this.income <- graph.points[,'income']==incomes[i]
##D   scat1d(college.in.income.group,
##D          curve=list(x=graph.points[this.income,'college'],
##D            y=graph.points[this.income,'democrat']))
##D }
##D 
##D # Instead of showing a rug plot on each curve, erase end portions
##D # of each curve where there are fewer than 10 counties having
##D # % college graduates to the left of the x-coordinate being plotted,
##D # for the subset of counties having median family income with 1650
##D # of the target income for the curve
##D 
##D show.pts <- function(college.pts, income.pt) {
##D   s <- abs(income - income.pt) < 1650  #assumes income known to top frame
##D   x <- college[s]
##D   x <- sort(x[!is.na(x)])
##D   n <- length(x)
##D   low <- x[10]; high <- x[n-9]
##D   college.pts >= low & college.pts <= high
##D }
##D 
##D plot(f, college=NA, income=incomes,
##D      conf.int=FALSE, xlim=c(0,35), ylim=c(30,55),
##D      lty=1, lwd=c(.25,1.5,3.5,6), col=c(1,1,2,2),
##D      perim=show.pts)
## End(Not run)



