psm                  package:Design                  R Documentation

_P_a_r_a_m_e_t_r_i_c _S_u_r_v_i_v_a_l _M_o_d_e_l

_D_e_s_c_r_i_p_t_i_o_n:

     'psm' is a modification of Therneau's 'survreg' function for
     fitting the accelerated failure time family of parametric survival
     models.  'psm' uses the 'Design' class for automatic 'anova',
     'fastbw', 'calibrate', 'validate', and other functions. 
     'Hazard.psm', 'Survival.psm', 'Quantile.psm', and 'Mean.psm'
     create S functions that evaluate the hazard, survival, quantile,
     and mean (expected value) functions analytically, as functions of
     time or probabilities and the linear predictor values.

     The 'residuals.psm' function exists mainly to compute normalized
     (standardized) residuals and to censor them (i.e., return them as
     'Surv' objects) just as the original failure time variable was
     censored.  These residuals are useful for checking the underlying
     distributional assumption (see the examples).  To get these
     residuals, the fit must have specified 'y=TRUE'.  A 'lines' method
     for these residuals automatically draws a curve with the assumed
     standardized survival distribution.  A 'survplot' method runs the
     standardized censored residuals through 'survfit' to get
     Kaplan-Meier estimates, with optional stratification
     (automatically grouping a continuous variable into quantiles) and
     then through 'survplot.survfit' to plot them.  Then 'lines' is
     invoked to show the theoretical curve.  Other types of residuals
     are computed by 'residuals' using 'residuals.survreg'.

     Older versions of 'survreg' used by 'psm' (e.g., on S-Plus 2000)
     had the following additional arguments 'method, link, parms,
     fixed'.  See 'survreg' on such systems for details. 'psm' passes
     those arguments to 'survreg'.

_U_s_a_g_e:

     psm(formula=formula(data),
         data=if (.R.) parent.frame() else sys.parent(), weights,
         subset, na.action=na.delete, dist="weibull",
         init=NULL, scale=0, 
         control=if(!.R.) survReg.control() else survreg.control(),
         parms=NULL, 
         model=FALSE, x=FALSE, y=TRUE, time.inc, ...)
     # dist=c("extreme", "logistic", "gaussian", "exponential", 
     #        "rayleigh", "t")      for S-Plus before 5.0
     # dist=c("extreme", "logistic", "gaussian", "weibull",
     #        "exponential", "rayleigh", "lognormal",
     #        "loglogistic" "t")    for R, S-Plus 5,6
     # Older versions had arguments method, link, parms, fixed

     ## S3 method for class 'psm':
     print(x, correlation=FALSE, ...)

     Hazard(object, ...)
     ## S3 method for class 'psm':
     Hazard(object, ...)   # for psm fit
     # E.g. lambda <- Hazard(fit)

     Survival(object, ...)
     ## S3 method for class 'psm':
     Survival(object, ...) # for psm
     # E.g. survival <- Survival(fit)

     ## S3 method for class 'psm':
     Quantile(object, ...) # for psm
     # E.g. quantsurv <- Quantile(fit)

     ## S3 method for class 'psm':
     Mean(object, ...)     # for psm
     # E.g. meant   <- Mean(fit)

     # lambda(times, lp)   # get hazard function at t=times, xbeta=lp
     # survival(times, lp) # survival function at t=times, lp
     # quantsurv(q, lp)    # quantiles of survival time
     # meant(lp)           # mean survival time

     ## S3 method for class 'psm':
     residuals(object, type="censored.normalized", ...)

     ## S3 method for class 'residuals.psm.censored.normalized':
     survplot(fit, x, g=4, col, main, ...)

     ## S3 method for class 'residuals.psm.censored.normalized':
     lines(x, n=100, lty=1, xlim,
     lwd=3, ...)
     # for type="censored.normalized"

_A_r_g_u_m_e_n_t_s:

 formula: an S statistical model formula. Interactions up to third
          order are supported. The left hand side must be a 'Surv'
          object. 

  object: a fit created by 'psm'.  For 'survplot' with residuals from
          'psm', 'object' is the result of 'residuals.psm'. 

     fit: a fit created by 'psm'

    data: 

  subset: 

 weights: 

    dist: 

   scale: 

    init: 

na.action: 

 control: see 'survreg' ('survReg' for S-Plus 5. or 6.). 'fixed' is
          used for S-Plus before 5., 'parms' is used for S-Plus 5, 6,
          and R.  See 'cph' for 'na.action'.  

   parms: a list of fixed parameters.  For the t-distribution this is
          the degrees of freedom; most of the distributions have no
          parameters.

   model: set to 'TRUE' to include the model frame in the returned
          object 

       x: set to 'TRUE' to include the design matrix in the object
          produced by 'psm'.  For the 'survplot' method, 'x' is an
          optional stratification variable (character, numeric, or
          categorical).  For 'lines.residuals.psm.censored.normalized',
          'x' is the result of 'residuals.psm'.  For 'print' it is the
          result of 'psm'. 

       y: set to 'TRUE' to include the 'Surv()' matrix 

time.inc: setting for default time spacing. Used in constructing time
          axis in 'survplot', and also in make confidence bars. Default
          is 30 if time variable has 'units="Day"', 1 otherwise, unless
          maximum follow-up time < 1. Then max time/10 is used as
          'time.inc'. If 'time.inc' is not given and max time/default
          'time.inc' is > 25, 'time.inc' is increased. 

correlation: set to 'TRUE' to print the correlation matrix for
          parameter estimates

     ...: other arguments to fitting routines, or to pass to 'survplot'
          from 
           'survplot.residuals.psm.censored.normalized'.  Ignored for
          'lines'.

   times: a scalar or vector of times for which to evaluate survival
          probability or hazard 

      lp: a scalar or vector of linear predictor values at which to
          evaluate survival probability or hazard.  If both 'times' and
          'lp' are vectors, they must be of the same length. 

       q: a scalar or vector of probabilities.  The default is .5, so
          just the median survival time is returned.  If 'q' and 'lp'
          are both vectors, a matrix of quantiles is returned, with
          rows corresponding to 'lp' and columns to 'q'. 

    type: type of residual desired.  Default is censored normalized
          residuals, defined as (link(Y) - linear.predictors)/scale
          parameter, where the link function was usually the log
          function.  See 'survreg' for other types ('survReg' for
          S-Plus 6). 

       n: number of points to evaluate theoretical standardized
          survival function for  
           'lines.residuals.psm.censored.normalized' 

     lty: line type for 'lines', default is 1 

    xlim: range of times (or transformed times) for which to evaluate
          the standardized survival function.  Default is range in
          normalized residuals. 

     lwd: line width for theoretical distribution, default is 3 

       g: number of quantile groups to use for stratifying continuous
          variables having more than 5 levels 

     col: vector of colors for 'survplot' method, corresponding to
          levels of 'x' (must be a scalar if there is no 'x') 

    main: main plot title for 'survplot'.  If omitted, is the name or
          label of 'x' if 'x' is given.  Use 'main=""' to suppress a
          title when you specify 'x'. 

_D_e_t_a_i_l_s:

     The object 'survreg.distributions' contains definitions of
     properties of the various survival distributions.  
      'psm' does not trap singularity errors due to the way
     'survreg.fit' does matrix inversion.  It will trap non-convergence
     (thus returning 'fit$fail=TRUE') if you give the argument
     'failure=2' inside the 'control' list which is passed to
     'survreg.fit'.  For example, use 'f <- psm(S ~ x,
     control=list(failure=2, maxiter=20))' to allow up to 20 iterations
     and to set 'f$fail=TRUE' in case of non-convergence. This is
     especially useful in simulation work.

_V_a_l_u_e:

     'psm' returns a fit object with all the information 'survreg'
     would store as  well as what 'Design' stores and 'units' and
     'time.inc'. 'Hazard', 'Survival', and 'Quantile' return
     S-functions. 'residuals.psm' with 'type="censored.normalized"'
     returns a 'Surv' object which has a special attribute
     '"theoretical"' which is used by the 'lines' routine.  This is the
     assumed standardized survival function as a function of time or
     transformed time.

_A_u_t_h_o_r(_s):

     Frank Harrell
      Department of Biostatistics
      Vanderbilt University 
      f.harrell@vanderbilt.edu

_S_e_e _A_l_s_o:

     'Design', 'survreg', 'survReg', 'residuals.survreg',
     'survreg.object',  'survreg.distributions', 'pphsm', 'survplot',
     'survest', 'Surv', 'na.delete', 'na.detail.response', 'datadist',
     'latex.psm'

_E_x_a_m_p_l_e_s:

     n <- 400
     set.seed(1)
     age <- rnorm(n, 50, 12)
     sex <- factor(sample(c('Female','Male'),n,TRUE))
     dd <- datadist(age,sex)
     options(datadist='dd')
     # Population hazard function:
     h <- .02*exp(.06*(age-50)+.8*(sex=='Female'))
     d.time <- -log(runif(n))/h
     cens <- 15*runif(n)
     death <- ifelse(d.time <= cens,1,0)
     d.time <- pmin(d.time, cens)

     f <- psm(Surv(d.time,death) ~ sex*pol(age,2), 
              dist=if(.R.)'lognormal' else 'gaussian')
     # Log-normal model is a bad fit for proportional hazards data

     anova(f)
     fastbw(f)  # if deletes sex while keeping age*sex ignore the result
     f <- update(f, x=TRUE,y=TRUE)       # so can validate, compute certain resids
     validate(f, dxy=TRUE, B=10)      # ordinarily use B=150 or more
     plot(f, age=NA, sex=NA)       # needs datadist since no explicit age, hosp.
     survplot(f, age=c(20,60))     # needs datadist since hospital not set here
     # latex(f)

     S <- Survival(f)
     plot(f$linear.predictors, S(6, f$linear.predictors),
          xlab=if(.R.)expression(X*hat(beta)) else 'X*Beta',
          ylab=if(.R.)expression(S(6,X*hat(beta))) else 'S(6|X*Beta)')
     # plots 6-month survival as a function of linear predictor (X*Beta hat)

     times <- seq(0,24,by=.25)
     plot(times, S(times,0), type='l')   # plots survival curve at X*Beta hat=0
     lam <- Hazard(f)
     plot(times, lam(times,0), type='l') # similarly for hazard function

     med <- Quantile(f)        # new function defaults to computing median only
     lp <- seq(-3, 5, by=.1)
     plot(lp, med(lp=lp), ylab="Median Survival Time")
     med(c(.25,.5), f$linear.predictors)
                               # prints matrix with 2 columns

     # fit a model with no predictors
     f <- psm(Surv(d.time,death) ~ 1, dist=if(.R.)"weibull" else "extreme")
     f
     pphsm(f)          # print proportional hazards form
     g <- survest(f)
     plot(g$time, g$surv, xlab='Time', type='l',
          ylab=if(.R.)expression(S(t)) else 'S(t)')

     f <- psm(Surv(d.time,death) ~ age, 
              dist=if(.R.)"loglogistic" else "logistic", y=TRUE)
     r <- resid(f, 'cens') # note abbreviation
     survplot(survfit(r), conf='none') 
                           # plot Kaplan-Meier estimate of 
                           # survival function of standardized residuals
     survplot(survfit(r ~ cut2(age, g=2)), conf='none')  
                           # both strata should be n(0,1)
     lines(r)              # add theoretical survival function
     #More simply:
     survplot(r, age, g=2)

     options(datadist=NULL)

