R : Copyright 2005, The R Foundation for Statistical Computing Version 2.1.1 (2005-06-20), ISBN 3-900051-07-0 R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for a HTML browser interface to help. Type 'q()' to quit R. > ### *
> ### > attach(NULL, name = "CheckExEnv") > assign(".CheckExEnv", as.environment(2), pos = length(search())) # base > ## add some hooks to label plot pages for base and grid graphics > setHook("plot.new", ".newplot.hook") > setHook("persp", ".newplot.hook") > setHook("grid.newpage", ".gridplot.hook") > > assign("cleanEx", + function(env = .GlobalEnv) { + rm(list = ls(envir = env, all.names = TRUE), envir = env) + RNGkind("default", "default") + set.seed(1) + options(warn = 1) + delayedAssign("T", stop("T used instead of TRUE"), + assign.env = .CheckExEnv) + delayedAssign("F", stop("F used instead of FALSE"), + assign.env = .CheckExEnv) + sch <- search() + newitems <- sch[! sch %in% .oldSearch] + for(item in rev(newitems)) + eval(substitute(detach(item), list(item=item))) + missitems <- .oldSearch[! .oldSearch %in% sch] + if(length(missitems)) + warning("items ", paste(missitems, collapse=", "), + " have been removed from the search path") + }, + env = .CheckExEnv) > assign("..nameEx", "__{must remake R-ex/*.R}__", env = .CheckExEnv) # for now > assign("ptime", proc.time(), env = .CheckExEnv) > grDevices::postscript("Design-Examples.ps") > assign("par.postscript", graphics::par(no.readonly = TRUE), env = .CheckExEnv) > options(contrasts = c(unordered = "contr.treatment", ordered = "contr.poly")) > options(warn = 1) > library('Design') Loading required package: Hmisc Hmisc library by Frank E Harrell Jr Type library(help='Hmisc'), ?Overview, or ?Hmisc.Overview') to see overall documentation. NOTE:Hmisc no longer redefines [.factor to drop unused levels when subsetting. To get the old behavior of Hmisc type dropUnusedLevels(). Attaching package: 'Hmisc' The following object(s) are masked from package:stats : ecdf Loading required package: survival Loading required package: splines Attaching package: 'survival' The following object(s) are masked from package:Hmisc : untangle.specials Design library by Frank E Harrell Jr Type library(help='Design'), ?Overview, or ?Design.Overview') to see overall documentation. Attaching package: 'Design' The following object(s) are masked from package:survival : Surv cox.zph survfit The following object(s) are masked from package:Hmisc : .R. .SV4. .noGenenerics > > assign(".oldSearch", search(), env = .CheckExEnv) > assign(".oldNS", loadedNamespaces(), env = .CheckExEnv) > cleanEx(); ..nameEx <- "Design.Misc" > > ### * Design.Misc > > flush(stderr()); flush(stdout()) > > ### Name: Design.Misc > ### Title: Miscellaneous Design Attributes and Utility Functions > ### Aliases: Design.Misc Varcov.cph Varcov.glmD Varcov.glsD Varcov.lrm > ### Varcov.ols Varcov.psm oos.loglik oos.loglik.ols oos.loglik.lrm > ### oos.loglik.cph oos.loglik.psm oos.loglik.glmD num.intercepts Getlim > ### Getlimi related.predictors interactions.containing param.order > ### Penalty.matrix Penalty.setup lrtest univarLR Newlabels Newlevels > ### Newlabels.Design Newlevels.Design DesignFit print.Design > ### residuals.Design print.lrtest > ### Keywords: models methods > > ### ** Examples > > ## Not run: > ##D f <- psm(S ~ x1 + x2 + sex + race, dist='gau') > ##D g <- psm(S ~ x1 + sex + race, dist='gau', > ##D fixed=list(scale=exp(f$parms))) > ##D lrtest(f, g) > ##D > ##D g <- Newlabels(f, c(x2='Label for x2')) > ##D g <- Newlevels(g, list(sex=c('Male','Female'),race=c('B','W'))) > ##D nomogram(g) > ## End(Not run) > > > > cleanEx(); ..nameEx <- "Design" > > ### * Design > > flush(stderr()); flush(stdout()) > > ### Name: Design > ### Title: Design Methods and Generic Functions > ### Aliases: Design > ### Keywords: models regression survival math manip methods > > ### ** Examples > > ## Not run: > ##D library(Design, first=TRUE) # omit first for R > ##D dist <- datadist(data=2) # can omit if not using summary, plot, survplot, > ##D # or if specify all variable values to them. Can > ##D # also defer. data=2: get distribution summaries > ##D # for all variables in search position 2 > ##D # run datadist once, for all candidate variables > ##D dist <- datadist(age,race,bp,sex,height) # alternative > ##D options(datadist="dist") > ##D f <- cph(Surv(d.time, death) ~ rcs(age,4)*strat(race) + > ##D bp*strat(sex)+lsp(height,60),x=TRUE,y=TRUE) > ##D anova(f) > ##D anova(f,age,height) # Joint test of 2 vars > ##D fastbw(f) > ##D summary(f, sex="female") # Adjust sex to "female" when testing > ##D # interacting factor bp > ##D plot(f, age=NA, height=NA) # 3-D plot > ##D plot(f, age=10:70, height=60) > ##D latex(f) # LaTeX representation of fit > ##D > ##D f <- lm(y ~ x) # Can use with any fitting function that > ##D # calls model.frame.default, e.g. lm, glm > ##D specs.Design(f) # Use .Design since class(f)="lm" > ##D anova(f) # Works since Varcov(f) (=Varcov.lm(f)) works > ##D fastbw(f) > ##D options(datadist=NULL) > ##D f <- ols(y ~ x1*x2) # Saves enough information to do fastbw, anova > ##D anova(f) # Will not do plot.Design since distributions > ##D fastbw(f) # of predictors not saved > ##D plot(f, x1=seq(100,300,by=.5), x2=.5) > ##D # all values defined - don't need datadist > ##D dist <- datadist(x1,x2) # Equivalent to datadist(f) > ##D options(datadist="dist") > ##D plot(f, x1=NA, x2=.5) # Now you can do plot, summary > ##D nomogram(f, interact=list(x2=c(.2,.7))) > ## End(Not run) > > > > cleanEx(); ..nameEx <- "Design.trans" > > ### * Design.trans > > flush(stderr()); flush(stdout()) > > ### Name: Design.trans > ### Title: Design Special Transformation Functions > ### Aliases: Design.trans asis pol lsp rcs catg scored strat matrx \%ia\% > ### Keywords: models regression math manip methods survival smooth > > ### ** Examples > > ## Not run: > ##D options(knots=4, poly.degree=2) > ##D country <- factor(country.codes) > ##D blood.pressure <- cbind(sbp=systolic.bp, dbp=diastolic.bp) > ##D fit <- lrm(Y ~ sqrt(x1)*rcs(x2) + rcs(x3,c(5,10,15)) + > ##D lsp(x4,c(10,20)) + country + blood.pressure + poly(age,2)) > ##D # sqrt(x1) is an implicit asis variable, but limits of x1, not sqrt(x1) > ##D # are used for later plotting and effect estimation > ##D # x2 fitted with restricted cubic spline with 4 default knots > ##D # x3 fitted with r.c.s. with 3 specified knots > ##D # x4 fitted with linear spline with 2 specified knots > ##D # country is an implied catg variable > ##D # blood.pressure is an implied matrx variable > ##D # since poly is not a Design function (pol is), it creates a > ##D # matrx type variable with no automatic linearity testing > ##D # or plotting > ##D f1 <- lrm(y ~ rcs(x1) + rcs(x2) + rcs(x1) %ia% rcs(x2)) > ##D # %ia% restricts interactions. Here it removes terms nonlinear in > ##D # both x1 and x2 > ##D f2 <- lrm(y ~ rcs(x1) + rcs(x2) + x1 %ia% rcs(x2)) > ##D # interaction linear in x1 > ##D f3 <- lrm(y ~ rcs(x1) + rcs(x2) + x1 %ia% x2) > ##D # simple product interaction (doubly linear) > ##D # Use x1 %ia% x2 instead of x1:x2 because x1 %ia% x2 triggers > ##D # anova to pool x1*x2 term into x1 terms to test total effect > ##D # of x1 > ## End(Not run) > > > > cleanEx(); ..nameEx <- "Function" > > ### * Function > > flush(stderr()); flush(stdout()) > > ### Name: Function > ### Title: Compose an S Function to Compute X beta from a Fit > ### Aliases: Function.Design Function.cph sascode > ### Keywords: regression methods interface models survival math > > ### ** Examples > > set.seed(1331) > x1 <- exp(rnorm(100)) > x2 <- factor(sample(c('a','b'),100,rep=TRUE)) > dd <- datadist(x1, x2) > options(datadist='dd') > y <- log(x1)^2+log(x1)*(x2=='b')+rnorm(100)/4 > f <- ols(y ~ pol(log(x1),2)*x2) > f$coef Intercept x1 x1^2 x2=b x1 * x2=b x1^2 * x2=b 0.050809859 0.012335335 0.991077315 -0.049218199 1.001128354 -0.003440629 > g <- Function(f, digits=5) > g function (x1 = 1.0906, x2 = "a") { x1 <- log(x1) 0.05081 + 0.012335 * x1 + 0.99108 * x1^2 - 0.049218 * (x2 == "b") + (x2 == "b") * (1.0011 * x1 - 0.0034406 * x1^2) } > sascode(g) x1 = log(x1) 0.05081 + 0.012335 * x1 + 0.99108 * x1**2 - 0.049218 * (x2 = "b") + (x2 = "b") * (1.0011 * x1 - 0.0034406 * x1**2); > g() [1] 0.05933444 > g(x1=c(2,3), x2='b') #could omit x2 since b is default category [1] 1.178566 2.306994 > predict(f, expand.grid(x1=c(2,3),x2='b')) 1 2 1.178584 2.307022 > g8 <- Function(f) # default is 8 sig. digits > g8(x1=c(2,3), x2='b') [1] 1.178584 2.307022 > options(datadist=NULL) > > ## Not run: > ##D # Make self-contained functions for computing survival probabilities > ##D # using a log-normal regression > ##D f <- psm(Surv(d.time, death) ~ rcs(age,4)*sex, dist='gaussian') > ##D g <- Function(f) > ##D surv <- Survival(f) > ##D # Compute 2 and 5-year survival estimates for 50 year old male > ##D surv(c(2,5), g(age=50, sex='male')) > ## End(Not run) > > > > cleanEx(); ..nameEx <- "Overview" > > ### * Overview > > flush(stderr()); flush(stdout()) > > ### Name: Overview > ### Title: Overview of Design Library > ### Aliases: Overview Design.Overview > ### Keywords: models > > ### ** Examples > > ###################### > # Detailed Example 1 # > ###################### > # May want to first invoke the Hmisc store function > # so that new variables will go into a temporary directory > set.seed(17) # So can repeat random number sequence > n <- 500 > > sex <- factor(sample(c('female','male'), n, rep=TRUE)) > age <- rnorm(n, 50, 10) > sys.bp <- rnorm(n, 120, 7) > > # Use two population models, one with a systolic > # blood pressure effect and one without > > L <- ifelse(sex=='female', .1*(pmin(age,50)-50), .005*(age-50)^2) > L.bp <- L + .4*(pmax(sys.bp,120)-120) > > dz <- ifelse(runif(n) <= plogis(L), 1, 0) > dz.bp <- ifelse(runif(n) <= plogis(L.bp), 1, 0) > > # Use summary.formula in the Hmisc library to summarize the > # data one predictor at a time > > s <- summary(dz.bp ~ age + sex + sys.bp) > options(digits=3) > print(s) dz.bp N=500 +-------+-----------+---+-----+ | | |N |dz.bp| +-------+-----------+---+-----+ |age |[15.1,43.6)|125|0.648| | |[43.6,50.3)|125|0.688| | |[50.3,56.7)|125|0.616| | |[56.7,89.4]|125|0.760| +-------+-----------+---+-----+ |sex |female |240|0.596| | |male |260|0.754| +-------+-----------+---+-----+ |sys.bp |[102,115) |125|0.512| | |[115,120) |125|0.480| | |[120,125) |125|0.744| | |[125,141] |125|0.976| +-------+-----------+---+-----+ |Overall| |500|0.678| +-------+-----------+---+-----+ > plot(s) > > plsmo(age, dz, group=sex, fun=qlogis, ylim=c(-3,3)) > plsmo(age, L, group=sex, method='raw', add=TRUE, prefix='True', trim=0) > title('Lowess-smoothed Estimates with True Regression Functions') > > dd <- datadist(age, sex, sys.bp) > options(datadist='dd') > # can also do: dd <- datadist(dd, newvar) > > f <- lrm(dz ~ rcs(age,5)*sex, x=TRUE, y=TRUE) > f Logistic Regression Model lrm(formula = dz ~ rcs(age, 5) * sex, x = TRUE, y = TRUE) Frequencies of Responses 0 1 235 265 Obs Max Deriv Model L.R. d.f. P C Dxy 500 1e-07 50.8 9 0 0.663 0.327 Gamma Tau-a R2 Brier 0.329 0.163 0.129 0.226 Coef S.E. Wald Z P Intercept -7.9925 3.9390 -2.03 0.0425 age 0.1810 0.1029 1.76 0.0786 age' -0.3458 0.4323 -0.80 0.4238 age'' 1.5460 2.2145 0.70 0.4851 age''' -2.6579 3.3658 -0.79 0.4297 sex=male 14.0546 4.6913 3.00 0.0027 age * sex=male -0.3335 0.1228 -2.72 0.0066 age' * sex=male 0.8253 0.5303 1.56 0.1197 age'' * sex=male -2.8522 2.8360 -1.01 0.3146 age''' * sex=male 3.3676 4.5642 0.74 0.4606 > # x=TRUE, y=TRUE for pentrace > > fpred <- Function(f) > fpred function (age = 50.310462, sex = "male") { -7.9925102 + 0.18097811 * age - 0.00032804104 * pmax(age - 33.874461, 0)^3 + 0.0014667311 * pmax(age - 44.433245, 0)^3 - 0.0025216207 * pmax(age - 50.310462, 0)^3 + 0.0018029552 * pmax(age - 55.835572, 0)^3 - 0.00042002458 * pmax(age - 66.340264, 0)^3 + 14.054577 * (sex == "male") + (sex == "male") * (-0.33352947 * age + 0.00078298646 * pmax(age - 33.874461, 0)^3 - 0.0027059801 * pmax(age - 44.433245, 0)^3 + 0.0031949484 * pmax(age - 50.310462, 0)^3 - 0.0016520917 * pmax(age - 55.835572, 0)^3 + 0.00038013702 * pmax(age - 66.340264, 0)^3) } > fpred(age=30, sex=levels(sex)) [1] -2.56 1.49 > > anova(f) Wald Statistics Response: dz Factor Chi-Square d.f. P age (Factor+Higher Order Factors) 30.2 8 0.0002 All Interactions 16.6 4 0.0024 Nonlinear (Factor+Higher Order Factors) 23.3 6 0.0007 sex (Factor+Higher Order Factors) 25.6 5 0.0001 All Interactions 16.6 4 0.0024 age * sex (Factor+Higher Order Factors) 16.6 4 0.0024 Nonlinear 16.0 3 0.0011 Nonlinear Interaction : f(A,B) vs. AB 16.0 3 0.0011 TOTAL NONLINEAR 23.3 6 0.0007 TOTAL NONLINEAR + INTERACTION 23.5 7 0.0014 TOTAL 37.2 9 <.0001 > > p <- plot(f, age=NA, sex=NA, conf.int=FALSE, ylim=c(-3,3)) > datadensity(p, age, sex) > scat1d(age) > > plsmo(age, L, group=sex, method='raw', add=TRUE, prefix='True', trim=0) > title('Spline Fits with True Regression Functions') > > f.bp <- lrm(dz.bp ~ rcs(age,5)*sex + rcs(sys.bp,5)) > > for(method in c('persp','image')) + p <- plot(f.bp, age=NA, sys.bp=NA, method=method) > # Legend(p) # NOTE: Needs subplot - not in R > > cat('Doing 25 bootstrap repetitions to validate model\n') Doing 25 bootstrap repetitions to validate model > validate(f, B=25) # in practice try to use 150 Iteration: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 index.orig training test optimism index.corrected n Dxy 0.3272 0.336 0.30141 0.03445 0.29272 25 R2 0.1290 0.142 0.11279 0.02953 0.09944 25 Intercept 0.0000 0.000 0.00886 -0.00886 0.00886 25 Slope 1.0000 1.000 0.86917 0.13083 0.86917 25 Emax 0.0000 0.000 0.03274 0.03274 0.03274 25 D 0.0996 0.111 0.08629 0.02470 0.07490 25 U -0.0040 -0.004 0.00186 -0.00586 0.00186 25 Q 0.1036 0.115 0.08444 0.03056 0.07305 25 B 0.2258 0.223 0.22953 -0.00637 0.23221 25 > > cat('Doing 25 bootstrap reps to check model calibration\n') Doing 25 bootstrap reps to check model calibration > cal <- calibrate(f, B=25) # use 150 in practice Iteration: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 > plot(cal) n=500 Mean absolute error=0.0185 Mean squared error=0.000536 0.9 Quantile of absolute error=0.0319 > title('Calibration of Unpenalized Model') > > p <- if(.R.) pentrace(f, penalty=c(.009,.009903,.02,.2,.5,1)) else + pentrace(f, penalty=1, method='optimize') > > f <- update(f, penalty=p$penalty) > f Logistic Regression Model lrm(formula = dz ~ rcs(age, 5) * sex, x = TRUE, y = TRUE, penalty = p$penalty) Frequencies of Responses 0 1 235 265 Penalty factors: simple nonlinear interaction nonlinear.interaction 0.02 0.02 0.02 0.02 Final penalty on -2 log L: 1.71 Obs Max Deriv Model L.R. d.f. P C Dxy 500 2e-05 49.8 7.95 0 0.664 0.327 Gamma Tau-a R2 Brier 0.329 0.163 0.122 0.226 Coef S.E. Wald Z P Penalty Scale Intercept -4.940899 2.47309 -2.00 0.0457 0.0000 age 0.099822 0.06297 1.59 0.1129 1.4410 age' -0.008624 0.24123 -0.04 0.9715 1.4759 age'' -0.049210 1.25333 -0.04 0.9687 0.4255 age''' -0.520005 2.06692 -0.25 0.8014 0.1320 sex=male 9.646487 2.96111 3.26 0.0011 0.1000 age * sex=male -0.216102 0.07559 -2.86 0.0042 3.6160 age' * sex=male 0.328440 0.30085 1.09 0.2750 1.0759 age'' * sex=male -0.454827 1.68334 -0.27 0.7870 0.2820 age''' * sex=male 0.077775 3.01079 0.03 0.9794 0.0841 > specs(f,long=TRUE) lrm(formula = dz ~ rcs(age, 5) * sex, x = TRUE, y = TRUE, penalty = p$penalty) Assumption Parameters d.f. age rcspline 33.874 44.433 50.31 55.836 66.34 4 sex category female male 1 age * sex interaction nonlinear x linear - f(A)B 4 age sex Low:effect 43.6 Adjust to 50.3 male High:effect 56.6 Low:prediction 30.4 female High:prediction 72.4 male Low 15.1 female High 89.4 male > edf <- effective.df(f) Original and Effective Degrees of Freedom Original Penalized All 9 7.95 Simple Terms 2 1.90 Interaction or Nonlinear 7 6.05 Nonlinear 6 5.12 Interaction 4 3.53 Nonlinear Interaction 3 2.61 > > p <- plot(f, age=NA, sex=NA, conf.int=FALSE, ylim=c(-3,3)) > datadensity(p, age, sex) > scat1d(age) > > plsmo(age, L, group=sex, method='raw', add=TRUE, prefix='True', trim=0) > title('Penalized Spline Fits with True Regression Functions') > > options(digits=3) > s <- summary(f) > s Effects Response : dz Factor Low High Diff. Effect S.E. Lower 0.95 Upper 0.95 age 43.6 56.6 13.0 0.80 0.35 0.12 1.49 Odds Ratio 43.6 56.6 13.0 2.23 NA 1.12 4.42 sex - female:male 2.0 1.0 NA -0.07 0.31 -0.67 0.53 Odds Ratio 2.0 1.0 NA 0.93 NA 0.51 1.70 Adjusted to: age=50.3 sex=male > plot(s) > > s <- summary(f, sex='male') > plot(s) > > fpred <- Function(f) > fpred function (age = 50.310462, sex = "male") { -4.9408986 + 0.099821811 * age - 8.1818002e-06 * pmax(age - 33.874461, 0)^3 - 4.6687755e-05 * pmax(age - 44.433245, 0)^3 - 0.00049334968 * pmax(age - 50.310462, 0)^3 + 0.0008754865 * pmax(age - 55.835572, 0)^3 - 0.00032726726 * pmax(age - 66.340264, 0)^3 + 9.6464867 * (sex == "male") + (sex == "male") * (-0.21610185 * age + 0.00031160482 * pmax(age - 33.874461, 0)^3 - 0.00043151337 * pmax(age - 44.433245, 0)^3 + 7.3788465e-05 * pmax(age - 50.310462, 0)^3 - 0.00017574468 * pmax(age - 55.835572, 0)^3 + 0.00022186476 * pmax(age - 66.340264, 0)^3) } > fpred(age=30, sex=levels(sex)) [1] -1.95 1.22 > sascode(fpred) -4.9408986 + 0.099821811 * age - 8.1818002e-06 * max(age - 33.874461, 0)**3 - 4.6687755e-05 * max(age - 44.433245, 0)**3 - 0.00049334968 * max(age - 50.310462, 0)**3 + 0.0008754865 * max(age - 55.835572, 0)**3 - 0.00032726726 * max(age - 66.340264, 0)**3 + 9.6464867 * (sex = "male") + (sex = "male") * (-0.21610185 * age + 0.00031160482 * max(age - 33.874461, 0)**3 - 0.00043151337 * max(age - 44.433245, 0)**3 + 7.3788465e-05 * max(age - 50.310462, 0)**3 - 0.00017574468 * max(age - 55.835572, 0)**3 + 0.00022186476 * max(age - 66.340264, 0)**3); > > cat('Doing 40 bootstrap reps to validate penalized model\n') Doing 40 bootstrap reps to validate penalized model > validate(f, B=40) Iteration: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 index.orig training test optimism index.corrected n Dxy 0.3273 0.339 0.29923 0.03963 0.28764 40 R2 0.1224 0.139 0.11053 0.02883 0.09356 40 Intercept 0.0000 0.000 0.00409 -0.00409 0.00409 40 Slope 1.0000 1.000 0.89841 0.10159 0.89841 40 Emax 0.0000 0.000 0.02459 0.02459 0.02459 40 D 0.0976 0.108 0.08447 0.02401 0.07356 40 U -0.0040 -0.004 0.00200 -0.00600 0.00200 40 Q 0.1016 0.112 0.08247 0.03001 0.07156 40 B 0.2263 0.222 0.23027 -0.00798 0.23425 40 > > cat('Doing 40 bootstrap reps to check penalized model calibration\n') Doing 40 bootstrap reps to check penalized model calibration > cal <- calibrate(f, B=40) Iteration: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 > plot(cal) n=500 Mean absolute error=0.0170 Mean squared error=0.000442 0.9 Quantile of absolute error=0.0287 > title('Calibration of Penalized Model') > > nomogram(f.bp, fun=plogis, + funlabel='Prob(dz)', + fun.at=c(.15,.2,.3,.4,.5,.6,.7,.8,.9,.95,.975), + fun.side=c(1,3,1,3,1,3,1,3,1,3,1)) > options(datadist=NULL) > > ##################### > #Detailed Example 2 # > ##################### > # Simulate the data. > n <- 1000 # define sample size > set.seed(17) # so can reproduce the results > treat <- factor(sample(c('a','b','c'), n, TRUE)) > num.diseases <- sample(0:4, n, TRUE) > age <- rnorm(n, 50, 10) > cholesterol <- rnorm(n, 200, 25) > weight <- rnorm(n, 150, 20) > sex <- factor(sample(c('female','male'), n, TRUE)) > label(age) <- 'Age' # label is in Hmisc > label(num.diseases) <- 'Number of Comorbid Diseases' > label(cholesterol) <- 'Total Cholesterol' > label(weight) <- 'Weight, lbs.' > label(sex) <- 'Sex' > units(cholesterol) <- 'mg/dl' # uses units.default in Hmisc > > # Specify population model for log odds that Y=1 > L <- .1*(num.diseases-2) + .045*(age-50) + + (log(cholesterol - 10)-5.2)*(-2*(treat=='a') + + 3.5*(treat=='b')+2*(treat=='c')) > # Simulate binary y to have Prob(y=1) = 1/[1+exp(-L)] > y <- ifelse(runif(n) < plogis(L), 1, 0) > cholesterol[1:3] <- NA # 3 missings, at random > > ddist <- datadist(cholesterol, treat, num.diseases, + age, weight, sex) > # Could have used ddist <- datadist(data.frame.name) > options(datadist="ddist") # defines data dist. to Design > cholesterol <- impute(cholesterol) # see impute in Hmisc library > # impute, describe, and several other basic functions are > # distributed as part of the Hmisc library > > fit <- lrm(y ~ treat*log(cholesterol - 10) + + scored(num.diseases) + rcs(age)) > > describe(y ~ treat + scored(num.diseases) + rcs(age)) y ~ treat + scored(num.diseases) + rcs(age) 4 Variables 1000 Observations --------------------------------------------------------------------------- y n missing unique Sum Mean 1000 0 2 522 0.522 --------------------------------------------------------------------------- treat n missing unique 1000 0 3 a (352, 35%), b (323, 32%), c (325, 32%) --------------------------------------------------------------------------- scored(num.diseases) : Number of Comorbid Diseases n missing unique 1000 0 5 0 1 2 3 4 Frequency 187 197 221 231 164 % 19 20 22 23 16 --------------------------------------------------------------------------- age : Age n missing unique Mean .05 .10 .25 .50 .75 .90 1000 0 1000 49.93 33.04 37.05 43.06 49.56 57.05 63.12 .95 65.92 lowest : 21.43 22.64 23.60 23.89 25.13, highest: 74.64 74.90 77.43 77.74 80.32 --------------------------------------------------------------------------- age' : Age n missing unique Mean .05 .10 .25 .50 1000 0 951 8.578 4.214e-12 5.953e-02 9.286e-01 4.169e+00 .75 .90 .95 1.280e+01 2.415e+01 3.005e+01 lowest : 0.000e+00 4.436e-12 1.637e-06 8.878e-06 1.872e-05 highest: 48.54 49.09 54.44 55.10 60.57 --------------------------------------------------------------------------- age'' : Age n missing unique Mean .05 .10 .25 .50 .75 .90 1000 0 726 1.702 0.0000 0.0000 0.0000 0.1735 2.1325 5.9507 .95 8.0651 lowest : 0.000e+00 1.347e-11 2.236e-09 1.647e-08 6.044e-07 highest: 14.71 14.91 16.84 17.07 19.04 --------------------------------------------------------------------------- age''' : Age n missing unique Mean .05 .10 .25 .50 1000 0 501 0.4724 0.000e+00 0.000e+00 0.000e+00 3.646e-10 .75 .90 .95 3.877e-01 1.802e+00 2.644e+00 lowest : 0.000e+00 7.291e-10 9.830e-07 1.677e-06 2.905e-06 highest: 5.304 5.384 6.154 6.248 7.035 --------------------------------------------------------------------------- > # or use describe(formula(fit)) for all variables used in fit > # describe function (in Hmisc) gets simple statistics on variables > #fit <- robcov(fit) # Would make all statistics which follow > # use a robust covariance matrix > # would need x=TRUE, y=TRUE in lrm > specs(fit) # Describe the design characteristics lrm(formula = y ~ treat * log(cholesterol - 10) + scored(num.diseases) + rcs(age)) Units Label Transformation treat treat cholesterol mg/dl Total Cholesterol log(cholesterol - 10) num.diseases Number of Comorbid Diseases age Age treat * cholesterol treat * cholesterol Assumption Parameters d.f. treat category a b c 2 cholesterol asis 1 num.diseases scored 0 1 2 3 4 4 age rcspline 33.043 43.838 49.562 56.28 65.924 4 treat * cholesterol interaction linear x linear - AB 2 > a <- anova(fit) > print(a, which='subscripts') # print which parameters being tested Wald Statistics Response: y Factor Chi-Square d.f. P treat (Factor+Higher Order Factors) 15.86 4 0.0032 All Interactions 10.77 2 0.0046 cholesterol (Factor+Higher Order Factors) 12.76 3 0.0052 All Interactions 10.77 2 0.0046 num.diseases 14.92 4 0.0049 Nonlinear 0.73 3 0.8662 age 67.91 4 <.0001 Nonlinear 1.11 3 0.7736 treat * cholesterol (Factor+Higher Order Factors) 10.77 2 0.0046 TOTAL NONLINEAR 2.03 6 0.9167 TOTAL NONLINEAR + INTERACTION 13.19 8 0.1056 TOTAL 90.62 13 <.0001 Tested 1-2,12-13 12-13 3,12-13 12-13 4-7 5-7 8-11 9-11 12-13 5-7,9-11 5-7,9-13 1-13 Subscripts correspond to: [1] treat=b treat=c cholesterol [4] num.diseases num.diseases=2 num.diseases=3 [7] num.diseases=4 age age' [10] age'' age''' treat=b * cholesterol [13] treat=c * cholesterol > plot(anova(fit)) # Depict Wald statistics graphically > anova(fit, treat, cholesterol) # Test these 2 by themselves Wald Statistics Response: y Factor Chi-Square d.f. P treat (Factor+Higher Order Factors) 15.9 4 0.0032 All Interactions 10.8 2 0.0046 cholesterol (Factor+Higher Order Factors) 12.8 3 0.0052 All Interactions 10.8 2 0.0046 TOTAL 17.7 5 0.0034 > summary(fit) # Estimate effects using default ranges Effects Response : y Factor Low High Diff. Effect S.E. Lower 0.95 Upper 0.95 cholesterol 183.8 217 32.9 -0.24 0.15 -0.54 0.06 Odds Ratio 183.8 217 32.9 0.79 NA 0.58 1.06 age 43.1 57 14.0 0.84 0.18 0.48 1.20 Odds Ratio 43.1 57 14.0 2.31 NA 1.61 3.30 treat - b:a 1.0 2 NA 0.40 0.16 0.08 0.73 Odds Ratio 1.0 2 NA 1.50 NA 1.09 2.07 treat - c:a 1.0 3 NA 0.20 0.16 -0.11 0.52 Odds Ratio 1.0 3 NA 1.23 NA 0.89 1.69 num.diseases - 0:2 3.0 1 NA -0.47 0.21 -0.89 -0.06 Odds Ratio 3.0 1 NA 0.62 NA 0.41 0.94 num.diseases - 1:2 3.0 2 NA -0.35 0.21 -0.76 0.05 Odds Ratio 3.0 2 NA 0.70 NA 0.47 1.06 num.diseases - 3:2 3.0 4 NA 0.08 0.20 -0.31 0.47 Odds Ratio 3.0 4 NA 1.08 NA 0.73 1.60 num.diseases - 4:2 3.0 5 NA 0.24 0.22 -0.20 0.67 Odds Ratio 3.0 5 NA 1.27 NA 0.82 1.96 Adjusted to: treat=a cholesterol=200 > plot(summary(fit)) # Graphical display of effects with C.L. > summary(fit, treat="b", age=60) Effects Response : y Factor Low High Diff. Effect S.E. Lower 0.95 Upper 0.95 cholesterol 183.8 217 32.9 0.50 0.17 0.17 0.83 Odds Ratio 183.8 217 32.9 1.65 NA 1.18 2.30 age 43.1 57 14.0 0.84 0.18 0.48 1.20 Odds Ratio 43.1 57 14.0 2.31 NA 1.61 3.30 treat - a:b 2.0 1 NA -0.40 0.16 -0.73 -0.08 Odds Ratio 2.0 1 NA 0.67 NA 0.48 0.92 treat - c:b 2.0 3 NA -0.20 0.17 -0.53 0.13 Odds Ratio 2.0 3 NA 0.82 NA 0.59 1.14 num.diseases - 0:2 3.0 1 NA -0.47 0.21 -0.89 -0.06 Odds Ratio 3.0 1 NA 0.62 NA 0.41 0.94 num.diseases - 1:2 3.0 2 NA -0.35 0.21 -0.76 0.05 Odds Ratio 3.0 2 NA 0.70 NA 0.47 1.06 num.diseases - 3:2 3.0 4 NA 0.08 0.20 -0.31 0.47 Odds Ratio 3.0 4 NA 1.08 NA 0.73 1.60 num.diseases - 4:2 3.0 5 NA 0.24 0.22 -0.20 0.67 Odds Ratio 3.0 5 NA 1.27 NA 0.82 1.96 Adjusted to: treat=b cholesterol=200 > # Specify reference cell and adjustment val > > summary(fit, age=c(50,70)) # Estimate effect of increasing age from Effects Response : y Factor Low High Diff. Effect S.E. Lower 0.95 Upper 0.95 cholesterol 184 217 32.9 -0.24 0.15 -0.54 0.06 Odds Ratio 184 217 32.9 0.79 NA 0.58 1.06 age 50 70 20.0 1.18 0.32 0.54 1.81 Odds Ratio 50 70 20.0 3.25 NA 1.72 6.13 treat - b:a 1 2 NA 0.40 0.16 0.08 0.73 Odds Ratio 1 2 NA 1.50 NA 1.09 2.07 treat - c:a 1 3 NA 0.20 0.16 -0.11 0.52 Odds Ratio 1 3 NA 1.23 NA 0.89 1.69 num.diseases - 0:2 3 1 NA -0.47 0.21 -0.89 -0.06 Odds Ratio 3 1 NA 0.62 NA 0.41 0.94 num.diseases - 1:2 3 2 NA -0.35 0.21 -0.76 0.05 Odds Ratio 3 2 NA 0.70 NA 0.47 1.06 num.diseases - 3:2 3 4 NA 0.08 0.20 -0.31 0.47 Odds Ratio 3 4 NA 1.08 NA 0.73 1.60 num.diseases - 4:2 3 5 NA 0.24 0.22 -0.20 0.67 Odds Ratio 3 5 NA 1.27 NA 0.82 1.96 Adjusted to: treat=a cholesterol=200 > # 50 to 70 > summary(fit, age=c(50,60,70)) # Increase age from 50 to 70, Effects Response : y Factor Low High Diff. Effect S.E. Lower 0.95 Upper 0.95 cholesterol 184 217 32.9 -0.24 0.15 -0.54 0.06 Odds Ratio 184 217 32.9 0.79 NA 0.58 1.06 age 50 70 20.0 1.18 0.32 0.54 1.81 Odds Ratio 50 70 20.0 3.25 NA 1.72 6.13 treat - b:a 1 2 NA 0.40 0.16 0.08 0.73 Odds Ratio 1 2 NA 1.50 NA 1.09 2.07 treat - c:a 1 3 NA 0.20 0.16 -0.11 0.52 Odds Ratio 1 3 NA 1.23 NA 0.89 1.69 num.diseases - 0:2 3 1 NA -0.47 0.21 -0.89 -0.06 Odds Ratio 3 1 NA 0.62 NA 0.41 0.94 num.diseases - 1:2 3 2 NA -0.35 0.21 -0.76 0.05 Odds Ratio 3 2 NA 0.70 NA 0.47 1.06 num.diseases - 3:2 3 4 NA 0.08 0.20 -0.31 0.47 Odds Ratio 3 4 NA 1.08 NA 0.73 1.60 num.diseases - 4:2 3 5 NA 0.24 0.22 -0.20 0.67 Odds Ratio 3 5 NA 1.27 NA 0.82 1.96 Adjusted to: treat=a cholesterol=200 > # adjust to 60 when estimating > # effects of other factors > # If had not defined datadist, would have to define > # ranges for all var. > > # Estimate and test treatment (b-a) effect averaged > # over 3 cholesterols > contrast(fit, list(treat='b',cholesterol=c(150,200,250)), + list(treat='a',cholesterol=c(150,200,250)), + type='average') Contrast S.E. Lower Upper Z Pr(>|z|) 1 0.291 0.166 -0.0343 0.616 1.75 0.0796 > # Remove type='average' to get 3 separate contrasts for b-a > > # Plot effects. plot(fit) plots effects of all predictors, > # showing values used for interacting factors as subtitles > # The ref.zero parameter is helpful for showing effects of > # predictors on a common scale for comparison of strength > plot(fit, ref.zero=TRUE, ylim=c(-2,2)) > > plot(fit, age=seq(20,80,length=100), treat=NA, conf.int=FALSE) > # Plots relationship between age and log > # odds, separate curve for each treat, no C.I. > plot(fit, age=NA, cholesterol=NA) > # 3-dimensional perspective plot for age, cholesterol, and > # log odds using default ranges for both variables > plot(fit, num.diseases=NA, fun=function(x) 1/(1+exp(-x)), #or fun=plogis + ylab="Prob", conf.int=.9) > # Plot estimated probabilities instead of log odds > # Again, if no datadist were defined, would have to > # tell plot all limits > logit <- predict(fit, expand.grid(treat="b",num.diseases=1:3, + age=c(20,40,60), + cholesterol=seq(100,300,length=10))) > #logit <- predict(fit, gendata(fit, nobs=12)) > # Interactively specify 12 predictor combinations using UNIX > # For UNIX or Windows, generate 9 combinations with other variables > # set to defaults, get predicted values > logit <- predict(fit, gendata(fit, age=c(20,40,60), + treat=c('a','b','c'))) > > # Since age doesn't interact with anything, we can quickly and > # interactively try various transformations of age, > # taking the spline function of age as the gold standard. We are > # seeking a linearizing transformation. Here age is linear in the > # population so this is not very productive. Also, if we simplify the > # model the total degrees of freedom will be too small and > # confidence limits too narrow > > ag <- 10:80 > logit <- predict(fit, expand.grid(treat="a", + num.diseases=0, age=ag, + cholesterol=median(cholesterol)), + type="terms")[,"age"] > # Note: if age interacted with anything, this would be the age > # "main effect" ignoring interaction terms > # Could also use > # logit <- plot(f, age=ag, ...)$x.xbeta[,2] > # which allows evaluation of the shape for any level > # of interacting factors. When age does not interact with > # anything, the result from > # predict(f, ..., type="terms") would equal the result from > # plot if all other terms were ignored > # Could also use > # logit <- predict(fit, gendata(fit, age=ag, cholesterol=median...)) > > plot(ag^.5, logit) # try square root vs. spline transform. > plot(ag^1.5, logit) # try 1.5 power > > # w <- latex(fit) # invokes latex.lrm, creates fit.tex > # print(w) # display or print model on screen > > # Draw a nomogram for the model fit > nomogram(fit, fun=plogis, funlabel="Prob[Y=1]") > > # Compose S function to evaluate linear predictors from fit > g <- Function(fit) > g(treat='b', cholesterol=260, age=50) [1] 1.30 > # Leave num.diseases at reference value > > # Use the Hmisc dataRep function to summarize sample > # sizes for subjects as cross-classified on 2 key > # predictors > drep <- dataRep(~ roundN(age,10) + num.diseases) > print(drep, long=TRUE) Data Representativeness n=1000 dataRep(formula = ~roundN(age, 10) + num.diseases) Specifications for Matching Type Parameters age round to nearest 10 num.diseases exact numeric Unique Combinations of Descriptor Variables age num.diseases Frequency 2 30 0 11 3 40 0 44 4 50 0 68 5 60 0 53 6 70 0 11 8 20 1 1 9 30 1 10 10 40 1 57 11 50 1 75 12 60 1 43 13 70 1 11 15 20 2 1 16 30 2 21 17 40 2 47 18 50 2 85 19 60 2 43 20 70 2 22 21 80 2 2 22 20 3 2 23 30 3 17 24 40 3 48 25 50 3 88 26 60 3 63 27 70 3 13 30 30 4 8 31 40 4 47 32 50 4 61 33 60 4 40 34 70 4 7 35 80 4 1 > > # Some approaches to making a plot showing how > # predicted values vary with a continuous predictor > # on the x-axis, with two other predictors varying > > fit <- lrm(y ~ log(cholesterol - 10) + + num.diseases + rcs(age) + rcs(weight) + sex) > > combos <- gendata(fit, age=10:100, + cholesterol=c(170,200,230), + weight=c(150,200,250)) > # num.diseases, sex not specified -> set to mode > # can also used expand.grid > > combos$pred <- predict(fit, combos) > library(lattice) > xyplot(pred ~ age | cholesterol*weight, data=combos) > xYplot(pred ~ age | cholesterol, groups=weight, + data=combos, type='l') # in Hmisc Loading required package: grid > xYplot(pred ~ age, groups=interaction(cholesterol,weight), + data=combos, type='l') > > # Can also do this with plot.Design but a single > # plot may be busy: > ch <- c(170, 200, 230) > plot(fit, age=NA, cholesterol=ch, weight=150, + conf.int=FALSE) > plot(fit, age=NA, cholesterol=ch, weight=200, + conf.int=FALSE, add=TRUE) > plot(fit, age=NA, cholesterol=ch, weight=250, + conf.int=FALSE, add=TRUE) > > #Here we use plot.Design to make 9 separate plots, with CLs > d <- expand.grid(cholesterol=c(170,200,230), + weight=c(150,200,250)) > for(i in 1:nrow(d)) { + plot(fit, age=NA, cholesterol=d$cholesterol[i], + weight=d$weight[i]) + title(paste('Chol=',format(d$cholesterol[i]),' ', + 'Wt=',format(d$weight[i]),sep='')) + } > options(datadist=NULL) > > ###################### > # Detailed Example 3 # > ###################### > n <- 2000 > set.seed(731) > age <- 50 + 12*rnorm(n) > label(age) <- "Age" > sex <- factor(sample(c('Male','Female'), n, + rep=TRUE, prob=c(.6, .4))) > cens <- 15*runif(n) > h <- .02*exp(.04*(age-50)+.8*(sex=='Female')) > t <- -log(runif(n))/h > label(t) <- 'Follow-up Time' > e <- ifelse(t<=cens,1,0) > t <- pmin(t, cens) > units(t) <- "Year" > age.dec <- cut2(age, g=10, levels.mean=TRUE) > dd <- datadist(age, sex, age.dec) > options(datadist='dd') > Srv <- Surv(t,e) > > # Fit a model that doesn't assume anything except > # that deciles are adequate representations of age > f <- cph(Srv ~ strat(age.dec)+strat(sex), surv=TRUE) > # surv=TRUE speeds up computations, and confidence limits when > # there are no covariables are still accurate. > > # Plot log(-log 3-year survival probability) vs. mean age > # within age deciles and vs. sex > plot(f, age.dec=NA, sex=NA, time=3, + loglog=TRUE, val.lev=TRUE, ylim=c(-5,-1)) > > # Fit a model assuming proportional hazards for age and > # absence of age x sex interaction > f <- cph(Srv ~ rcs(age,4)+strat(sex), surv=TRUE) > survplot(f, sex=NA, n.risk=TRUE) > # Add ,age=60 after sex=NA to tell survplot use age=60 > # Validate measures of model performance using the bootstrap > # First must add data (design matrix and Srv) to fit object > f <- update(f, x=TRUE, y=TRUE) > validate(f, B=10, dxy=TRUE, u=5) # use t=5 for Dxy (only) Iteration: 1 2 3 4 5 6 7 8 9 10 index.orig training test optimism index.corrected n Dxy 0.3224 0.278818 0.271017 0.007801 0.314561 10 R2 0.0426 0.043477 0.059103 -0.015626 0.058246 10 Slope 1.0000 1.000000 1.048188 -0.048188 1.048188 10 D 0.0158 0.015902 0.013839 0.002063 0.013706 10 U -0.0004 -0.000394 0.000248 -0.000643 0.000242 10 Q 0.0162 0.016297 0.013591 0.002706 0.013463 10 > # Use B=150 in practice > # Validate model for accuracy of predicting survival at t=1 > # Get Kaplan-Meier estimates by divided subjects into groups > # of size 200 (for other values of u must put time.inc=u in > # call to cph) > cal <- calibrate(f, B=10, u=1, m=200) # B=150 in practice Using Cox survival estimates at 1 Years Averaging 1 repetitions of B= 10 Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations x n events KM std.err [1,] 0.9353 200 85 0.9324 0.2775 [2,] 0.9525 200 62 0.9541 0.3335 [3,] 0.9621 200 49 0.9690 0.4086 [4,] 0.9689 200 43 0.9842 0.5774 [5,] 0.9734 200 38 0.9743 0.4474 [6,] 0.9770 200 30 0.9585 0.3538 [7,] 0.9800 200 36 0.9897 0.7072 [8,] 0.9829 200 26 0.9845 0.5776 [9,] 0.9865 200 21 0.9742 0.4474 [10,] 0.9923 200 11 0.9895 0.7072 Iteration: 1 Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 2 Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 3 Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 4 Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 5 Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 6 Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 7 Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 8 Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 9 Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 10 Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Mean over 1 overall replications mean.optimism mean.corrected n [1,] 0.024385 -0.027285 10 [2,] 0.002499 -0.000925 10 [3,] -0.000549 0.007481 10 [4,] 0.006833 0.008510 10 [5,] 0.001451 -0.000509 10 [6,] -0.001259 -0.017206 10 [7,] -0.000195 0.009937 10 [8,] 0.003631 -0.002014 10 [9,] 0.002904 -0.015251 10 [10,] 0.001350 -0.004211 10 > plot(cal) > # Check proportional hazards assumption for age terms > z <- cox.zph(f, 'identity') > print(z); plot(z) rho chisq p age 0.1014 5.36 0.0206 age' -0.0862 3.78 0.0518 age'' 0.0679 2.33 0.1266 GLOBAL NA 7.74 0.0518 > > # Re-fit this model without storing underlying survival > # curves for reference groups, but storing raw data with > # the fit (could also use f <- update(f, surv=FALSE, x=TRUE, y=TRUE)) > f <- cph(Srv ~ rcs(age,4)+strat(sex), x=TRUE, y=TRUE) > # Get accurate C.L. for any age > # Note: for evaluating shape of regression, we would not ordinarily > # bother to get 3-year survival probabilities - would just use X * beta > # We do so here to use same scale as nonparametric estimates > f Cox Proportional Hazards Model cph(formula = Srv ~ rcs(age, 4) + strat(sex), x = TRUE, y = TRUE) Obs Events Model L.R. d.f. P Score Score P 2000 401 79.8 3 0 76.3 0 R2 0.043 Status Stratum No Event Event sex=Female 557 226 sex=Male 1042 175 coef se(coef) z p age 0.0705 0.0236 2.984 0.00285 age' -0.0587 0.0546 -1.075 0.28220 age'' 0.1526 0.2039 0.748 0.45421 > anova(f) Wald Statistics Response: Srv Factor Chi-Square d.f. P age 68.56 3 <.0001 Nonlinear 3.56 2 0.168 TOTAL 68.56 3 <.0001 > ages <- seq(20, 80, by=4) # Evaluate at fewer points. Default is 100 > # For exact C.L. formula n=100 -> much memory > plot(f, age=ages, sex=NA, time=3, loglog=TRUE, ylim=c(-5,-1)) > > # Fit a model assuming proportional hazards for age but > # allowing for general interaction between age and sex > f <- cph(Srv ~ rcs(age,4)*strat(sex), x=TRUE, y=TRUE) > anova(f) Wald Statistics Response: Srv Factor Chi-Square d.f. P age (Factor+Higher Order Factors) 68.52 6 <.0001 All Interactions 1.94 3 0.585 Nonlinear (Factor+Higher Order Factors) 4.92 4 0.296 age * sex (Factor+Higher Order Factors) 1.94 3 0.585 Nonlinear 1.90 2 0.386 Nonlinear Interaction : f(A,B) vs. AB 1.90 2 0.386 TOTAL NONLINEAR 4.92 4 0.296 TOTAL NONLINEAR + INTERACTION 4.92 5 0.426 TOTAL 68.52 6 <.0001 > ages <- seq(20, 80, by=6) > # Still fewer points - more parameters in model > > # Plot 3-year survival probability (log-log and untransformed) > # vs. age and sex, obtaining accurate confidence limits > plot(f, age=ages, sex=NA, time=3, loglog=TRUE, ylim=c(-5,-1)) > plot(f, age=ages, sex=NA, time=3) > # Having x=TRUE, y=TRUE in fit also allows computation of influence stats > r <- resid(f, "dfbetas") > which.influence(f) $age [1] "177" "350" "734" "918" "1011" "1906" $"age * sex" [1] "352" "710" "918" "1113" "1387" > # Use survest to estimate 3-year survival probability and > # confidence limits for selected subjects > survest(f, expand.grid(age=c(20,40,60), sex=c('Female','Male')), + times=c(2,4,6), conf.int=.95) $time [1] 2 4 6 $surv 2 4 6 1 0.975 0.954 0.925 2 0.942 0.895 0.832 3 0.879 0.786 0.672 4 0.996 0.993 0.990 5 0.968 0.943 0.919 6 0.937 0.890 0.846 $std.err 2 4 6 1 0.5006 0.4891 0.4737 2 0.1119 0.1060 0.0977 3 0.0661 0.0587 0.0504 4 0.7669 0.7641 0.7608 5 0.1195 0.1160 0.1128 6 0.0887 0.0838 0.0792 $lower 2 4 6 1 0.933 0.879 0.808 2 0.928 0.869 0.793 3 0.862 0.757 0.631 4 0.983 0.969 0.956 5 0.959 0.928 0.899 6 0.925 0.869 0.818 $upper 2 4 6 1 0.991 0.983 0.972 2 0.954 0.916 0.864 3 0.895 0.813 0.709 4 0.999 0.998 0.998 5 0.975 0.955 0.936 6 0.947 0.908 0.870 $strata [1] 1 1 1 2 2 2 attr(,"levels") [1] "sex=Female" "sex=Male" > > # Create an S function srv that computes fitted > # survival probabilities on demand, for non-interaction model > f <- cph(Srv ~ rcs(age,4)+strat(sex), surv=TRUE) > srv <- Survival(f) > # Define functions to compute 3-year estimates as a function of > # the linear predictors (X*Beta) > surv.f <- function(lp) srv(3, lp, stratum="sex=Female") > surv.m <- function(lp) srv(3, lp, stratum="sex=Male") > # Create a function that computes quantiles of survival time > # on demand > quant <- Quantile(f) > # Define functions to compute median survival time > med.f <- function(lp) quant(.5, lp, stratum="sex=Female") > med.m <- function(lp) quant(.5, lp, stratum="sex=Male") > # Draw a nomogram to compute several types of predicted values > nomogram(f, fun=list(surv.m, surv.f, med.m, med.f), + funlabel=c("S(3 | Male)","S(3 | Female)", + "Median (Male)","Median (Female)"), + fun.at=list(c(.8,.9,.95,.98,.99),c(.1,.3,.5,.7,.8,.9,.95,.98), + c(8,12),c(1,2,4,8,12))) Warning in approx(fu[s], xseq[s], fat) : collapsing to unique 'x' values Warning in approx(fu[s], xseq[s], fat) : collapsing to unique 'x' values > options(datadist=NULL) > > ######################################################## > # Simple examples using small datasets for checking # > # calculations across different systems in which random# > # number generators cannot be synchronized. # > ######################################################## > > x1 <- 1:20 > x2 <- abs(x1-10) > x3 <- factor(rep(0:2,length.out=20)) > y <- c(rep(0:1,8),1,1,1,1) > dd <- datadist(x1,x2,x3) > options(datadist='dd') > f <- lrm(y ~ rcs(x1,3) + x2 + x3) > f Logistic Regression Model lrm(formula = y ~ rcs(x1, 3) + x2 + x3) Frequencies of Responses 0 1 8 12 Obs Max Deriv Model L.R. d.f. P C Dxy 20 4e-06 5.14 5 0.399 0.74 0.479 Gamma Tau-a R2 Brier 0.479 0.242 0.306 0.192 Coef S.E. Wald Z P Intercept 12.5719 14.387 0.87 0.382 x1 -1.3999 1.537 -0.91 0.362 x1' 2.2669 2.267 1.00 0.317 x2 -1.2581 1.412 -0.89 0.373 x3=1 0.8049 1.276 0.63 0.528 x3=2 -0.1263 1.283 -0.10 0.922 > specs(f, TRUE) lrm(formula = y ~ rcs(x1, 3) + x2 + x3) Assumption Parameters d.f. x1 rcspline 5 10.5 16 2 x2 asis 1 x3 category 0 1 2 2 x1 x2 x3 Low:effect 5.75 2.75 Adjust to 10.50 5.00 0 High:effect 15.25 7.25 Low:prediction 1.95 0.95 0 High:prediction 19.05 9.05 2 Low 1.00 0.00 0 High 20.00 10.00 2 > anova(f) Wald Statistics Response: y Factor Chi-Square d.f. P x1 2.32 2 0.314 Nonlinear 1.00 1 0.317 x2 0.79 1 0.373 x3 0.62 2 0.735 TOTAL 2.74 5 0.740 > anova(f, x1, x2) Wald Statistics Response: y Factor Chi-Square d.f. P x1 2.32 2 0.314 Nonlinear 1.00 1 0.317 x2 0.79 1 0.373 TOTAL 2.37 3 0.499 > plot(anova(f)) > s <- summary(f) > s Effects Response : y Factor Low High Diff. Effect S.E. Lower 0.95 Upper 0.95 x1 5.75 15.25 9.5 2.85 1.93 -0.94 6.64 Odds Ratio 5.75 15.25 9.5 17.34 NA 0.39 767.23 x2 2.75 7.25 4.5 -5.66 6.35 -18.11 6.79 Odds Ratio 2.75 7.25 4.5 0.00 NA 0.00 887.93 x3 - 1:0 1.00 2.00 NA 0.80 1.28 -1.70 3.31 Odds Ratio 1.00 2.00 NA 2.24 NA 0.18 27.29 x3 - 2:0 1.00 3.00 NA -0.13 1.28 -2.64 2.39 Odds Ratio 1.00 3.00 NA 0.88 NA 0.07 10.89 > plot(s, log=TRUE) > par(mfrow=c(2,2)) > plot(f) > par(mfrow=c(1,1)) > nomogram(f) > g <- Function(f) > g(11,7,'1') [1] -6.79 > contrast(f, list(x1=11,x2=7,x3='1'), list(x1=10,x2=6,x3='2')) Contrast S.E. Lower Upper Z Pr(>|z|) 1 -0.0266 1.80 -3.55 3.50 -0.01 0.988 > fastbw(f) Deleted Chi-Sq d.f. P Residual d.f. P AIC x3 0.62 2 0.735 0.62 2 0.735 -3.38 x1 2.07 2 0.355 2.69 4 0.611 -5.31 x2 0.05 1 0.818 2.74 5 0.740 -7.26 Approximate Estimates after Deleting Factors Coef S.E. Wald Z P [1,] 0.1387 0.5163 0.2685 0.7883 Factors in Final Model None > gendata(f, x1=1:5) x1 x2 x3 1 1 5 0 2 2 5 0 3 3 5 0 4 4 5 0 5 5 5 0 > # w <- latex(f) > > f <- update(f, x=TRUE,y=TRUE) > which.influence(f) $Intercept [1] "6" "9" "10" "11" "12" "13" "14" "15" $x1 [1] "6" "9" "10" "11" "12" "13" "14" "15" $x2 [1] "4" "6" "9" "10" "11" "12" "13" "14" "15" $x3 [1] "1" "2" "3" "4" "5" "6" "7" "8" "9" "10" "11" "12" "13" "14" "15" > residuals(f,'gof') Sum of squared errors Expected value|H0 SD 3.838 3.751 0.142 Z P 0.614 0.539 > robcov(f)$var Intercept x1 x1' x2 x3=1 x3=2 Intercept 120.23 -12.790 18.536 -11.869 3.084 -1.239 x1 -12.79 1.373 -1.993 1.262 -0.372 0.081 x1' 18.54 -1.993 2.912 -1.840 0.561 -0.114 x2 -11.87 1.262 -1.840 1.193 -0.388 0.055 x3=1 3.08 -0.372 0.561 -0.388 1.401 0.529 x3=2 -1.24 0.081 -0.114 0.055 0.529 1.612 > validate(f, B=10) Iteration: 1 2 3 4 5 6 7 8 9 10 Divergence or singularity in 1 samples index.orig training test optimism index.corrected n Dxy 0.479 0.670 0.394 0.2766 0.2026 9 R2 0.306 0.491 0.158 0.3335 -0.0271 9 Intercept 0.000 0.000 0.374 -0.3737 0.3737 9 Slope 1.000 1.000 0.411 0.5888 0.4112 9 Emax 0.000 0.000 0.271 0.2709 0.2709 9 D 0.207 0.449 0.076 0.3727 -0.1656 9 U -0.100 -0.100 5.324 -5.4244 5.3244 9 Q 0.307 0.549 -5.248 5.7970 -5.4900 9 B 0.192 0.145 0.238 -0.0926 0.2845 9 > cal <- calibrate(f, B=10) Iteration: 1 2 3 4 5 6 7 8 9 10 > plot(cal) n=20 Mean absolute error=0.103 Mean squared error=0.0157 0.9 Quantile of absolute error=0.218 > > f <- ols(y ~ rcs(x1,3) + x2 + x3, x=TRUE, y=TRUE) > anova(f) Analysis of Variance Response: y Factor d.f. Partial SS MS F P x1 2 0.6080 0.3040 1.10 0.359 Nonlinear 1 0.1139 0.1139 0.41 0.530 x2 1 0.0845 0.0845 0.31 0.588 x3 2 0.0980 0.0490 0.18 0.839 REGRESSION 5 0.9454 0.1891 0.69 0.641 ERROR 14 3.8546 0.2753 > anova(f, x1, x2) Analysis of Variance Response: y Factor d.f. Partial SS MS F P x1 2 0.6080 0.3040 1.10 0.359 Nonlinear 1 0.1139 0.1139 0.41 0.530 x2 1 0.0845 0.0845 0.31 0.588 REGRESSION 3 0.7883 0.2628 0.95 0.441 ERROR 14 3.8546 0.2753 > plot(anova(f)) > s <- summary(f) > s Effects Response : y Factor Low High Diff. Effect S.E. Lower 0.95 Upper 0.95 x1 5.75 15.25 9.5 0.40 0.29 -0.16 0.96 x2 2.75 7.25 4.5 -0.74 1.33 -3.35 1.88 x3 - 1:0 1.00 2.00 NA 0.12 0.28 -0.43 0.68 x3 - 2:0 1.00 3.00 NA -0.04 0.30 -0.62 0.54 > plot(s, log=TRUE) > par(mfrow=c(2,2)) > plot(f) > par(mfrow=c(1,1)) > nomogram(f) > g <- Function(f) > g(11,7,'1') [1] -0.341 > contrast(f, list(x1=11,x2=7,x3='1'), list(x1=10,x2=6,x3='2')) Contrast S.E. Lower Upper t Pr(>|t|) 1 0.0431 0.416 -0.849 0.935 0.1 0.919 Error d.f.= 14 > fastbw(f) Deleted Chi-Sq d.f. P Residual d.f. P AIC R2 x3 0.36 2 0.837 0.36 2 0.837 -3.64 0.177 x1 2.31 2 0.315 2.66 4 0.615 -5.34 0.044 x2 0.77 1 0.381 3.43 5 0.633 -6.57 0.000 Approximate Estimates after Deleting Factors Coef S.E. Wald Z P [1,] 0.6 0.1173 5.114 3.158e-07 Factors in Final Model None > gendata(f, x1=1:5) x1 x2 x3 1 1 5 0 2 2 5 0 3 3 5 0 4 4 5 0 5 5 5 0 > # w <- latex(f) > > f <- update(f, x=TRUE,y=TRUE) > which.influence(f) $Intercept [1] 9 10 11 12 13 14 15 $x1 [1] 9 10 12 13 14 15 20 $x2 [1] 9 10 11 13 14 15 $x3 [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 > residuals(f,'dfbetas') [,1] [,2] [,3] [,4] [,5] [,6] [1,] -0.0936 0.0919 -0.05658 -0.01012 0.32639 0.27234 [2,] 0.0653 -0.0882 0.05533 -0.00718 0.37994 0.05626 [3,] -0.0578 0.0742 -0.05248 0.01802 -0.02299 -0.34424 [4,] -0.0509 0.0782 -0.10175 0.12113 -0.36849 -0.35476 [5,] 0.0670 -0.0672 0.08629 -0.08764 -0.34359 0.00142 [6,] -0.0642 0.0668 -0.08449 0.08093 0.01397 0.45502 [7,] 0.0777 -0.1116 0.11752 -0.09713 0.24649 0.26189 [8,] 0.0329 -0.0145 0.00881 -0.04448 0.30022 -0.01865 [9,] -0.3838 0.3611 -0.36027 0.40431 -0.03441 -0.40693 [10,] 1.2161 -1.1185 1.12853 -1.22468 -0.42513 -0.39218 [11,] -0.2090 0.1707 -0.17653 0.25217 -0.46282 0.02633 [12,] -0.2109 0.2320 -0.22419 0.18175 -0.01904 0.35785 [13,] 0.4742 -0.5273 0.51571 -0.47613 0.40619 0.44833 [14,] -0.3388 0.3478 -0.34018 0.32129 0.22391 -0.06502 [15,] 0.3526 -0.3519 0.33667 -0.33594 0.04203 -0.36204 [16,] -0.1551 0.1730 -0.16172 0.16361 -0.23284 -0.22553 [17,] -0.0156 0.0139 -0.01075 0.01373 0.05527 -0.00131 [18,] 0.0396 -0.0477 0.05557 -0.04031 -0.00693 0.15061 [19,] 0.0133 -0.0128 0.01473 -0.01136 -0.02656 -0.02050 [20,] -0.1792 0.1954 -0.20883 0.17801 -0.17176 -0.04267 > robcov(f)$var Intercept x1 x1' x2 x3=1 x3=2 Intercept 5.1283 -0.53514 0.75372 -0.50713 0.0810 0.02854 x1 -0.5351 0.05642 -0.07939 0.05284 -0.0102 -0.00534 x1' 0.7537 -0.07939 0.11220 -0.07502 0.0143 0.00774 x2 -0.5071 0.05284 -0.07502 0.05121 -0.0107 -0.00542 x3=1 0.0810 -0.01022 0.01429 -0.01072 0.0506 0.02366 x3=2 0.0285 -0.00534 0.00774 -0.00542 0.0237 0.06305 > validate(f, B=10) Iteration: 1 2 3 4 5 6 7 8 9 10 index.orig training test optimism index.corrected n R-square 0.197 0.415 -0.276 0.691 -0.494 10 MSE 0.193 0.137 0.306 -0.170 0.362 10 Intercept 0.000 0.000 0.269 -0.269 0.269 10 Slope 1.000 1.000 0.529 0.471 0.529 10 > cal <- calibrate(f, B=10) Iteration: 1 2 3 4 5 6 7 8 9 10 > plot(cal) n=20 Mean absolute error=0.056 Mean squared error=0.00416 0.9 Quantile of absolute error=0.110 > > S <- Surv(c(1,4,2,3,5,8,6,7,20,18,19,9,12,10,11,13,16,14,15,17)) > survplot(survfit(S ~ x3)) > f <- psm(S ~ rcs(x1,3)+x2+x3, x=TRUE,y=TRUE) > f Parametric Survival Model: Weibull Distribution psm(formula = S ~ rcs(x1, 3) + x2 + x3, x = TRUE, y = TRUE) Obs Events Model L.R. d.f. P R2 20 20 39.7 5 0 0.86 Value Std. Error z p (Intercept) 5.595 1.406 3.98 6.95e-05 x1 -0.315 0.144 -2.18 2.90e-02 x1' 0.595 0.204 2.91 3.57e-03 x2 -0.509 0.135 -3.76 1.70e-04 x3=1 0.248 0.133 1.86 6.22e-02 x3=2 0.226 0.134 1.69 9.07e-02 Log(scale) -1.548 0.184 -8.42 3.77e-17 Scale= 0.213 > # NOTE: LR chi-sq of 39.67 disagrees with that from old survreg > # and old psm (77.65); suspect were also testing sigma=1 > > for(w in c('survival','hazard')) + print(survest(f, data.frame(x1=7,x2=3,x3='1'), + times=c(5,7), conf.int=.95, what=w)) N: 20 Events: 20 Time survival Lower Upper SE 1 5 0.925 0.722 0.981 0.729 2 7 0.684 0.323 0.880 0.556 Warning in survest.psm(f, data.frame(x1 = 7, x2 = 3, x3 = "1"), times = c(5, : conf.int ignored for what="hazard" N: 20 Events: 20 Time hazard 1 5 0.0734 2 7 0.2550 > # S-Plus 2000 using old survival library: > # S(t):.925 .684 SE:0.729 0.556 Hazard:0.0734 0.255 > > plot(f, x1=NA, time=5) > f$var (Intercept) x1 x1' x2 x3=1 x3=2 (Intercept) 1.97814 -0.201945 0.285110 -0.18836 -0.05242 -0.057829 x1 -0.20194 0.020788 -0.029378 0.01923 0.00485 0.004839 x1' 0.28511 -0.029378 0.041700 -0.02735 -0.00611 -0.006385 x2 -0.18836 0.019231 -0.027353 0.01831 0.00334 0.004776 x3=1 -0.05242 0.004851 -0.006114 0.00334 0.01762 0.008531 x3=2 -0.05783 0.004839 -0.006385 0.00478 0.00853 0.017852 Log(scale) -0.00138 -0.000078 0.000575 -0.00067 0.00161 -0.000767 Log(scale) (Intercept) -0.001379 x1 -0.000078 x1' 0.000575 x2 -0.000670 x3=1 0.001613 x3=2 -0.000767 Log(scale) 0.033807 > set.seed(3) > # robcov(f)$var when score residuals implemented > bootcov(f, B=30)$var Warning in survreg.fit2(x, y, dist = dist, parms = parms, fixed = fixed, : Ran out of iterations and did not converge Warning in bootcov(f, B = 30) : fit failure in 1 resamples. Might try increasing maxit (Intercept) x1 x1' x2 x3=1 x3=2 log scale (Intercept) 4.44053 -0.45064 0.64240 -0.430944 -0.07468 -0.11140 0.009966 x1 -0.45064 0.04620 -0.06563 0.043312 0.00676 0.01074 -0.002065 x1' 0.64240 -0.06563 0.09402 -0.062694 -0.01098 -0.01478 0.002747 x2 -0.43094 0.04331 -0.06269 0.043460 0.00623 0.00768 -0.000885 x3=1 -0.07468 0.00676 -0.01098 0.006231 0.02989 0.01552 0.010113 x3=2 -0.11140 0.01074 -0.01478 0.007676 0.01552 0.03733 0.017960 log scale 0.00997 -0.00207 0.00275 -0.000885 0.01011 0.01796 0.081652 > validate(f, B=10) Iteration: 1 2 3 4 5 6 7 8 9 10 index.orig training test optimism index.corrected n R2 0.8640 0.8922 0.8086 0.083569 0.7805 10 Intercept 0.0000 0.0000 0.0237 -0.023697 0.0237 10 Slope 1.0000 1.0000 1.0010 -0.000957 1.0010 10 D 0.3069 0.3618 0.2587 0.103170 0.2038 10 U -0.0159 -0.0163 0.0409 -0.057284 0.0414 10 Q 0.3228 0.3782 0.2177 0.160454 0.1624 10 > cal <- calibrate(f, u=5, B=10, m=10) Averaging 1 repetitions of B= 10 Warning in groupkm(psurv, Surv(inverse(y[, 1]), y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations x n events KM std.err [1,] 0.5069 10 10 0.5 0.4562 [2,] 0.9945 10 10 1.0 0.0000 Iteration: 1 Warning in groupkm(psurv, Surv(inverse(y[, 1]), y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(psurv, Surv(inverse(y[, 1]), y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 2 Warning in groupkm(psurv, Surv(inverse(y[, 1]), y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(psurv, Surv(inverse(y[, 1]), y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 3 Warning in groupkm(psurv, Surv(inverse(y[, 1]), y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(psurv, Surv(inverse(y[, 1]), y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 4 Warning in groupkm(psurv, Surv(inverse(y[, 1]), y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(psurv, Surv(inverse(y[, 1]), y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 5 Warning in groupkm(psurv, Surv(inverse(y[, 1]), y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(psurv, Surv(inverse(y[, 1]), y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 6 Warning in groupkm(psurv, Surv(inverse(y[, 1]), y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(psurv, Surv(inverse(y[, 1]), y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 7 Warning in groupkm(psurv, Surv(inverse(y[, 1]), y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(psurv, Surv(inverse(y[, 1]), y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 8 Warning in groupkm(psurv, Surv(inverse(y[, 1]), y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(psurv, Surv(inverse(y[, 1]), y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 9 Warning in groupkm(psurv, Surv(inverse(y[, 1]), y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(psurv, Surv(inverse(y[, 1]), y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 10 Warning in groupkm(psurv, Surv(inverse(y[, 1]), y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(psurv, Surv(inverse(y[, 1]), y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations index.orig training test optimism index.corrected n [1,] -0.00691 -0.01774 -0.01821 4.72e-04 -0.00739 9 [2,] 0.00548 0.00304 0.00307 -3.56e-05 0.00552 10 Mean over 1 overall replications mean.optimism mean.corrected n [1,] 4.72e-04 -0.00739 9 [2,] -3.56e-05 0.00552 10 > plot(cal) > r <- resid(f) > survplot(r) > > f <- cph(S ~ rcs(x1,3)+x2+x3, x=TRUE,y=TRUE,surv=TRUE,time.inc=5) > f Cox Proportional Hazards Model cph(formula = S ~ rcs(x1, 3) + x2 + x3, x = TRUE, y = TRUE, surv = TRUE, time.inc = 5) Obs Events Model L.R. d.f. P Score Score P 20 20 35.4 5 0 37.8 0 R2 0.842 coef se(coef) z p x1 2.41 1.174 2.06 0.03972 x1' -4.13 1.707 -2.42 0.01548 x2 3.35 1.209 2.77 0.00562 x3=1 -1.07 0.687 -1.56 0.11802 x3=2 -1.10 0.767 -1.44 0.15108 > plot(f, x1=NA, time=5) > robcov(f)$var x1 x1' x2 x3=1 x3=2 x1 1.3954 -2.0182 1.439 -0.0672 0.434 x1' -2.0182 2.9262 -2.092 0.0986 -0.611 x2 1.4391 -2.0919 1.505 -0.0901 0.409 x3=1 -0.0672 0.0986 -0.090 0.3518 0.123 x3=2 0.4339 -0.6106 0.409 0.1235 0.621 > bootcov(f, B=10) Cox Proportional Hazards Model cph(formula = S ~ rcs(x1, 3) + x2 + x3, x = TRUE, y = TRUE, surv = TRUE, time.inc = 5) Obs Events Model L.R. d.f. P Score Score P 20 20 35.4 5 0 37.8 0 R2 0.842 coef se(coef) z p x1 2.41 2.94 0.821 0.412 x1' -4.13 6.11 -0.676 0.499 x2 3.35 5.30 0.632 0.528 x3=1 -1.07 2.45 -0.438 0.661 x3=2 -1.10 7.58 -0.145 0.885 > validate(f, B=10) Iteration: 1 2 3 4 5 6 7 8 9 10 index.orig training test optimism index.corrected n R2 0.8417 0.8629 0.7572 0.106 0.7361 10 Slope 1.0000 1.0000 0.5942 0.406 0.5942 10 D 0.4061 0.4590 0.3213 0.138 0.2684 10 U -0.0236 -0.0236 0.3382 -0.362 0.3382 10 Q 0.4297 0.4826 -0.0169 0.499 -0.0698 10 > cal <- calibrate(f, u=5, B=10, m=10) Using Cox survival estimates at 5 Days Averaging 1 repetitions of B= 10 Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations x n events KM std.err [1,] 0.5051 10 10 0.5 0.4562 [2,] 0.9941 10 10 1.0 0.0000 Iteration: 1 Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 2 Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 3 Warning in fitter(X, Y, strata = Strata, offset = offset, weights = weights, : Ran out of iterations and did not converge Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 4 Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 5 Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 6 Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 7 Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 8 Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 9 Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 10 Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Mean over 1 overall replications mean.optimism mean.corrected n [1,] -0.00411 -0.000971 9 [2,] 0.00542 0.000431 10 > survplot(f, x1=c(2,19)) > options(datadist=NULL) > > > > graphics::par(get("par.postscript", env = .CheckExEnv)) > cleanEx(); ..nameEx <- "anova.Design" > > ### * anova.Design > > flush(stderr()); flush(stdout()) > > ### Name: anova.Design > ### Title: Analysis of Variance (Wald and F Statistics) > ### Aliases: anova.Design print.anova.Design text.anova.Design > ### plot.anova.Design latex.anova.Design > ### Keywords: models regression htest aplot > > ### ** Examples > > n <- 1000 # define sample size > set.seed(17) # so can reproduce the results > treat <- factor(sample(c('a','b','c'), n,TRUE)) > num.diseases <- sample(0:4, n,TRUE) > age <- rnorm(n, 50, 10) > cholesterol <- rnorm(n, 200, 25) > weight <- rnorm(n, 150, 20) > sex <- factor(sample(c('female','male'), n,TRUE)) > label(age) <- 'Age' # label is in Hmisc > label(num.diseases) <- 'Number of Comorbid Diseases' > label(cholesterol) <- 'Total Cholesterol' > label(weight) <- 'Weight, lbs.' > label(sex) <- 'Sex' > units(cholesterol) <- 'mg/dl' # uses units.default in Hmisc > > # Specify population model for log odds that Y=1 > L <- .1*(num.diseases-2) + .045*(age-50) + + (log(cholesterol - 10)-5.2)*(-2*(treat=='a') + + 3.5*(treat=='b')+2*(treat=='c')) > # Simulate binary y to have Prob(y=1) = 1/[1+exp(-L)] > y <- ifelse(runif(n) < plogis(L), 1, 0) > > fit <- lrm(y ~ treat + scored(num.diseases) + rcs(age) + + log(cholesterol+10) + treat:log(cholesterol+10)) > anova(fit) # Test all factors Wald Statistics Response: y Factor Chi-Square d.f. P treat (Factor+Higher Order Factors) 15.88 4 0.0032 All Interactions 10.79 2 0.0045 num.diseases 14.91 4 0.0049 Nonlinear 0.73 3 0.8660 age 67.97 4 <.0001 Nonlinear 1.11 3 0.7738 cholesterol (Factor+Higher Order Factors) 12.99 3 0.0047 All Interactions 10.79 2 0.0045 treat * cholesterol (Factor+Higher Order Factors) 10.79 2 0.0045 TOTAL NONLINEAR 2.03 6 0.9168 TOTAL NONLINEAR + INTERACTION 13.20 8 0.1051 TOTAL 90.80 13 <.0001 > anova(fit, treat, cholesterol) # Test these 2 by themselves Wald Statistics Response: y Factor Chi-Square d.f. P treat (Factor+Higher Order Factors) 15.9 4 0.0032 All Interactions 10.8 2 0.0045 cholesterol (Factor+Higher Order Factors) 13.0 3 0.0047 All Interactions 10.8 2 0.0045 TOTAL 17.9 5 0.0031 > # to get their pooled effects > g <- lrm(y ~ treat*rcs(age)) > dd <- datadist(treat, num.diseases, age, cholesterol) > options(datadist='dd') > plot(g, age=NA, treat="b") > s <- anova(g) > print(s) Wald Statistics Response: y Factor Chi-Square d.f. P treat (Factor+Higher Order Factors) 5.62 10 0.846 All Interactions 1.30 8 0.996 age (Factor+Higher Order Factors) 65.99 12 <.0001 All Interactions 1.30 8 0.996 Nonlinear (Factor+Higher Order Factors) 2.23 9 0.987 treat * age (Factor+Higher Order Factors) 1.30 8 0.996 Nonlinear 0.99 6 0.986 Nonlinear Interaction : f(A,B) vs. AB 0.99 6 0.986 TOTAL NONLINEAR 2.23 9 0.987 TOTAL NONLINEAR + INTERACTION 2.57 11 0.995 TOTAL 69.06 14 <.0001 > #p <- locator(1) # click mouse at upper left corner of table > p <- list(x=32,y=2.1) > text(s, at=p) # add anova table to regression plot > plot(s) # new plot - dot chart of chisq-d.f. > # latex(s) # nice printout - creates anova.g.tex > options(datdist=NULL) > > > # Simulate data with from a given model, and display exactly which > # hypotheses are being tested > > set.seed(123) > age <- rnorm(500, 50, 15) > treat <- factor(sample(c('a','b','c'), 500,TRUE)) > bp <- rnorm(500, 120, 10) > y <- ifelse(treat=='a', (age-50)*.05, abs(age-50)*.08) + 3*(treat=='c') + + pmax(bp, 100)*.09 + rnorm(500) > f <- ols(y ~ treat*lsp(age,50) + rcs(bp,4)) > print(names(coef(f)), quote=FALSE) [1] Intercept treat=b treat=c age age' [6] bp bp' bp'' treat=b * age treat=c * age [11] treat=b * age' treat=c * age' > specs(f) ols(formula = y ~ treat * lsp(age, 50) + rcs(bp, 4)) Assumption Parameters d.f. treat category a b c 2 age lspline 50 2 bp rcspline 103.28 116.6 123.63 137.53 3 treat * age interaction linear x nonlinear - Ag(B) 4 > anova(f) Analysis of Variance Response: y Factor d.f. Partial SS MS F treat (Factor+Higher Order Factors) 6 1421.70 236.950 241.73 All Interactions 4 61.55 15.387 15.70 age (Factor+Higher Order Factors) 6 222.01 37.001 37.75 All Interactions 4 61.55 15.387 15.70 Nonlinear (Factor+Higher Order Factors) 3 156.88 52.294 53.35 bp 3 344.33 114.778 117.09 Nonlinear 2 1.41 0.706 0.72 treat * age (Factor+Higher Order Factors) 4 61.55 15.387 15.70 Nonlinear 2 22.87 11.436 11.67 Nonlinear Interaction : f(A,B) vs. AB 2 22.87 11.436 11.67 TOTAL NONLINEAR 5 157.75 31.550 32.19 TOTAL NONLINEAR + INTERACTION 7 194.53 27.790 28.35 REGRESSION 11 1861.11 169.192 172.61 ERROR 488 478.35 0.980 P <.0001 <.0001 <.0001 <.0001 <.0001 <.0001 0.487 <.0001 <.0001 <.0001 <.0001 <.0001 <.0001 > an <- anova(f) > options(digits=3) > print(an, 'subscripts') Analysis of Variance Response: y Factor d.f. Partial SS MS F treat (Factor+Higher Order Factors) 6 1421.70 236.950 241.73 All Interactions 4 61.55 15.387 15.70 age (Factor+Higher Order Factors) 6 222.01 37.001 37.75 All Interactions 4 61.55 15.387 15.70 Nonlinear (Factor+Higher Order Factors) 3 156.88 52.294 53.35 bp 3 344.33 114.778 117.09 Nonlinear 2 1.41 0.706 0.72 treat * age (Factor+Higher Order Factors) 4 61.55 15.387 15.70 Nonlinear 2 22.87 11.436 11.67 Nonlinear Interaction : f(A,B) vs. AB 2 22.87 11.436 11.67 TOTAL NONLINEAR 5 157.75 31.550 32.19 TOTAL NONLINEAR + INTERACTION 7 194.53 27.790 28.35 REGRESSION 11 1861.11 169.192 172.61 ERROR 488 478.35 0.980 P Tested <.0001 1-2,8-11 <.0001 8-11 <.0001 3-4,8-11 <.0001 8-11 <.0001 4,10-11 <.0001 5-7 0.487 6-7 <.0001 8-11 <.0001 10-11 <.0001 10-11 <.0001 4,6-7,10-11 <.0001 4,6-11 <.0001 1-11 Subscripts correspond to: [1] treat=b treat=c age age' bp [6] bp' bp'' treat=b * age treat=c * age treat=b * age' [11] treat=c * age' > print(an, 'dots') Analysis of Variance Response: y Factor d.f. Partial SS MS F treat (Factor+Higher Order Factors) 6 1421.70 236.950 241.73 All Interactions 4 61.55 15.387 15.70 age (Factor+Higher Order Factors) 6 222.01 37.001 37.75 All Interactions 4 61.55 15.387 15.70 Nonlinear (Factor+Higher Order Factors) 3 156.88 52.294 53.35 bp 3 344.33 114.778 117.09 Nonlinear 2 1.41 0.706 0.72 treat * age (Factor+Higher Order Factors) 4 61.55 15.387 15.70 Nonlinear 2 22.87 11.436 11.67 Nonlinear Interaction : f(A,B) vs. AB 2 22.87 11.436 11.67 TOTAL NONLINEAR 5 157.75 31.550 32.19 TOTAL NONLINEAR + INTERACTION 7 194.53 27.790 28.35 REGRESSION 11 1861.11 169.192 172.61 ERROR 488 478.35 0.980 P Tested <.0001 .. .... <.0001 .... <.0001 .. .... <.0001 .... <.0001 . .. <.0001 ... 0.487 .. <.0001 .... <.0001 .. <.0001 .. <.0001 . .. .. <.0001 . ...... <.0001 ........... Subscripts correspond to: [1] treat=b treat=c age age' bp [6] bp' bp'' treat=b * age treat=c * age treat=b * age' [11] treat=c * age' > > an <- anova(f, test='Chisq', ss=FALSE) > plot(0:1) # make some plot > text(an, at=list(x=1.5,y=.6)) # add anova table to plot > plot(an) # new plot - dot chart of chisq-d.f. > # latex(an) # nice printout - creates anova.f.tex > > # Suppose that a researcher wants to make a big deal about a variable > # because it has the highest adjusted chi-square. We use the > # bootstrap to derive 0.95 confidence intervals for the ranks of all > # the effects in the model. We use the plot method for anova, with > # pl=FALSE to suppress actual plotting of chi-square - d.f. for each > # bootstrap repetition. We rank the negative of the adjusted > # chi-squares so that a rank of 1 is assigned to the highest. > # It is important to tell plot.anova.Design not to sort the results, > # or every bootstrap replication would have ranks of 1,2,3 for the stats. > > mydata <- data.frame(x1=runif(200), x2=runif(200), + sex=factor(sample(c('female','male'),200,TRUE))) > set.seed(9) # so can reproduce example > mydata$y <- ifelse(runif(200)<=plogis(mydata$x1-.5 + .5*(mydata$x2-.5) + + .5*(mydata$sex=='male')),1,0) > > if(.R.) { + library(boot) + b <- boot(mydata, function(data, i, ...) rank(-plot(anova( + lrm(y ~ rcs(x1,4)+pol(x2,2)+sex,data,subset=i)), + sort='none', pl=FALSE)), + R=25) # should really do R=500 but will take a while + Rank <- b$t0 + lim <- t(apply(b$t, 2, quantile, probs=c(.025,.975))) + } else { + b <- bootstrap(mydata, rank(-plot(anova( + lrm(y ~ rcs(x1,4)+pol(x2,2)+sex,mydata)), sort='none', pl=FALSE)), + B=25) # should really do B=500 but will take a while + Rank <- b$observed + lim <- limits.emp(b)[,c(1,4)] # get 0.025 and 0.975 quantiles + } Attaching package: 'boot' The following object(s) are masked from package:survival : aml > > # Use the Hmisc Dotplot function to display ranks and their confidence > # intervals. Sort the categories by descending adj. chi-square, for ranks > original.chisq <- plot(anova(lrm(y ~ rcs(x1,4)+pol(x2,2)+sex,data=mydata)), + sort='none', pl=FALSE) > predictor <- as.factor(names(original.chisq)) > predictor <- reorder.factor(predictor, -original.chisq) > > Dotplot(predictor ~ Cbind(Rank, lim), pch=3, xlab='Rank', + main=if(.R.) expression(paste( + 'Ranks and 0.95 Confidence Limits for ',chi^2,' - d.f.')) else + 'Ranks and 0.95 Confidence Limits for Chi-square - d.f.') Loading required package: grid Loading required package: lattice Attaching package: 'lattice' The following object(s) are masked from package:boot : melanoma > > > > cleanEx(); ..nameEx <- "bj" > > ### * bj > > flush(stderr()); flush(stdout()) > > ### Name: bj > ### Title: Buckley-James Multiple Regression Model > ### Aliases: bj bj.fit residuals.bj print.bj validate.bj bjplot > ### Keywords: models survival > > ### ** Examples > > set.seed(1) > ftime <- 10*rexp(200) > stroke <- ifelse(ftime > 10, 0, 1) > ftime <- pmin(ftime, 10) > units(ftime) <- "Month" > age <- rnorm(200, 70, 10) > hospital <- factor(sample(c('a','b'),200,TRUE)) > dd <- datadist(age, hospital) > options(datadist="dd") > > f <- bj(Surv(ftime, stroke) ~ rcs(age,5) + hospital, x=TRUE, y=TRUE) > # add link="identity" to use a censored normal regression model instead > # of a lognormal one > anova(f) Wald Statistics Response: Surv(ftime, stroke) Factor Chi-Square d.f. P age 9.13 4 0.0580 Nonlinear 9.10 3 0.0279 hospital 1.43 1 0.2325 TOTAL 11.53 5 0.0418 > fastbw(f) Deleted Chi-Sq d.f. P Residual d.f. P AIC hospital 1.43 1 0.233 1.43 1 0.233 -0.57 Approximate Estimates after Deleting Factors Coef S.E. Wald Z P Intercept -1.51928 1.95897 -0.7756 0.438013 age 0.06533 0.03491 1.8715 0.061270 age' -0.49754 0.17772 -2.7995 0.005117 age'' 4.97246 1.66575 2.9851 0.002835 age''' -6.99327 2.38615 -2.9308 0.003381 Factors in Final Model [1] age > validate(f, B=15) Iteration: 1 2 No convergence in 45 steps Warning in fit(x[xtrain, , drop = FALSE], y[train, , drop = FALSE], iter = i, : bj.fit failed 3 No convergence in 45 steps Warning in fit(x[xtrain, , drop = FALSE], y[train, , drop = FALSE], iter = i, : bj.fit failed 4 5 6 No convergence in 45 steps Warning in fit(x[xtrain, , drop = FALSE], y[train, , drop = FALSE], iter = i, : bj.fit failed 7 8 9 10 11 No convergence in 45 steps Warning in fit(x[xtrain, , drop = FALSE], y[train, , drop = FALSE], iter = i, : bj.fit failed 12 13 14 No convergence in 45 steps Warning in fit(x[xtrain, , drop = FALSE], y[train, , drop = FALSE], iter = i, : bj.fit failed 15 Divergence or singularity in 5 samples index.orig training test optimism index.corrected n Dxy 0.151 0.204 0.141 0.0627 0.0881 10 > plot(f, age=NA, hospital=NA) # needs datadist since no explicit age,hosp. > coef(f) # look at regression coefficients Intercept age age' age'' age''' hospital=b -1.0590 0.0554 -0.4562 4.6885 -6.6733 0.2024 > coef(psm(Surv(ftime, stroke) ~ rcs(age,5) + hospital, dist='lognormal')) (Intercept) age age' age'' age''' hospital=b -0.7259 0.0531 -0.4681 4.9181 -7.0953 0.2237 > # compare with coefficients from likelihood-based > # log-normal regression model > # use dist='gau' not under R > > r <- resid(f, 'censored.normalized') > survplot(survfit(r), conf='none') > # plot Kaplan-Meier estimate of > # survival function of standardized residuals > survplot(survfit(r ~ cut2(age, g=2)), conf='none') > # may desire both strata to be n(0,1) > options(datadist=NULL) > > > > cleanEx(); ..nameEx <- "bootcov" > > ### * bootcov > > flush(stderr()); flush(stdout()) > > ### Name: bootcov > ### Title: Bootstrap Covariance and Distribution for Regression > ### Coefficients > ### Aliases: bootcov bootplot bootplot.bootcov confplot confplot.bootcov > ### histdensity > ### Keywords: models regression htest methods hplot > > ### ** Examples > > set.seed(191) > x <- exp(rnorm(200)) > logit <- 1 + x/2 > y <- ifelse(runif(200) <= plogis(logit), 1, 0) > f <- lrm(y ~ pol(x,2), x=TRUE, y=TRUE) > g <- bootcov(f, B=50, pr=TRUE, coef.reps=TRUE) 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 > anova(g) # using bootstrap covariance estimates Wald Statistics Response: y Factor Chi-Square d.f. P x 1.96 2 0.376 Nonlinear 0.06 1 0.805 TOTAL 1.96 2 0.376 > fastbw(g) # using bootstrap covariance estimates Deleted Chi-Sq d.f. P Residual d.f. P AIC x 1.96 2 0.376 1.96 2 0.376 -2.04 Approximate Estimates after Deleting Factors Coef S.E. Wald Z P [1,] 1.305 0.1981 6.588 4.465e-11 Factors in Final Model None > beta <- g$boot.Coef[,1] > hist(beta, nclass=15) #look at normality of parameter estimates > qqnorm(beta) > # bootplot would be better than these last two commands > > # A dataset contains a variable number of observations per subject, > # and all observations are laid out in separate rows. The responses > # represent whether or not a given segment of the coronary arteries > # is occluded. Segments of arteries may not operate independently > # in the same patient. We assume a "working independence model" to > # get estimates of the coefficients, i.e., that estimates assuming > # independence are reasonably efficient. The job is then to get > # unbiased estimates of variances and covariances of these estimates. > > set.seed(1) > n.subjects <- 30 > ages <- rnorm(n.subjects, 50, 15) > sexes <- factor(sample(c('female','male'), n.subjects, TRUE)) > logit <- (ages-50)/5 > prob <- plogis(logit) # true prob not related to sex > id <- sample(1:n.subjects, 300, TRUE) # subjects sampled multiple times > table(table(id)) # frequencies of number of obs/subject 2 6 7 8 9 10 11 12 13 15 16 1 1 1 5 9 1 3 3 3 2 1 > age <- ages[id] > sex <- sexes[id] > # In truth, observations within subject are independent: > y <- ifelse(runif(300) <= prob[id], 1, 0) > f <- lrm(y ~ lsp(age,50)*sex, x=TRUE, y=TRUE) > g <- bootcov(f, id, B=50) # usually do B=200 or more singular information matrix in lrm.fit (rank= 5 ). Offending variable(s): age' * sex=male Warning in bootcov(f, id, B = 50) : fit failure in 1 resamples. Might try increasing maxit > diag(g$var)/diag(f$var) Intercept age age' sex=male age * sex=male 3.75 3.61 2.34 180.90 167.93 age' * sex=male 97.59 > # add ,group=w to re-sample from within each level of w > anova(g) # cluster-adjusted Wald statistics Wald Statistics Response: y Factor Chi-Square d.f. P age (Factor+Higher Order Factors) 46.50 4 <.0001 All Interactions 0.05 2 0.974 Nonlinear (Factor+Higher Order Factors) 0.28 2 0.871 sex (Factor+Higher Order Factors) 0.14 3 0.986 All Interactions 0.05 2 0.974 age * sex (Factor+Higher Order Factors) 0.05 2 0.974 Nonlinear 0.00 1 0.997 Nonlinear Interaction : f(A,B) vs. AB 0.00 1 0.997 TOTAL NONLINEAR 0.28 2 0.871 TOTAL NONLINEAR + INTERACTION 0.28 3 0.964 TOTAL 46.54 5 <.0001 > # fastbw(g) # cluster-adjusted backward elimination > plot(g, age=30:70, sex='female') # cluster-adjusted confidence bands > > # Get design effects based on inflation of the variances when compared > # with bootstrap estimates which ignore clustering > g2 <- bootcov(f, B=50) > diag(g$var)/diag(g2$var) Intercept age age' sex=male age * sex=male 0.00843 0.00955 0.01887 3.53230 3.56082 age' * sex=male 3.60403 > > # Get design effects based on pooled tests of factors in model > anova(g2)[,1] / anova(g)[,1] age (Factor+Higher Order Factors) 0.657 All Interactions 0.882 Nonlinear (Factor+Higher Order Factors) 0.499 sex (Factor+Higher Order Factors) 0.718 All Interactions 0.882 age * sex (Factor+Higher Order Factors) 0.882 Nonlinear 3.604 Nonlinear Interaction : f(A,B) vs. AB 3.604 TOTAL NONLINEAR 0.499 TOTAL NONLINEAR + INTERACTION 1.111 TOTAL 0.656 > > # Simulate binary data where there is a strong > # age x sex interaction with linear age effects > # for both sexes, but where not knowing that > # we fit a quadratic model. Use the bootstrap > # to get bootstrap distributions of various > # effects, and to get pointwise and simultaneous > # confidence limits > > set.seed(71) > n <- 500 > age <- rnorm(n, 50, 10) > sex <- factor(sample(c('female','male'), n, rep=TRUE)) > L <- ifelse(sex=='male', 0, .1*(age-50)) > y <- ifelse(runif(n)<=plogis(L), 1, 0) > > f <- lrm(y ~ sex*pol(age,2), x=TRUE, y=TRUE) > b <- bootcov(f, B=50, coef.reps=TRUE, pr=TRUE) # better: B=500 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 > > par(mfrow=c(2,3)) > # Assess normality of regression estimates > bootplot(b, which=1:6, what='qq') > # They appear somewhat non-normal > > # Plot histograms and estimated densities > # for 6 coefficients > w <- bootplot(b, which=1:6) > # Print bootstrap quantiles > w$quantiles 0.050 0.025 0.005 0.950 Intercept -20.45330 -21.24902 -22.28109 -6.477199 Coefficient of sex=male -1.03600 -2.74698 -4.20281 15.876238 Coefficient of age 0.14623 0.09910 0.07863 0.676707 Coefficient of age^2 -0.00522 -0.00545 -0.00713 -0.000411 Coefficient of sex=male * age -0.51609 -0.54544 -0.68573 0.091802 Coefficient of sex=male * age^2 -0.00152 -0.00249 -0.00293 0.004102 0.975 0.995 Intercept -4.653642 -3.97e+00 Coefficient of sex=male 16.409298 1.82e+01 Coefficient of age 0.685469 8.04e-01 Coefficient of age^2 -0.000109 -2.15e-05 Coefficient of sex=male * age 0.187578 2.35e-01 Coefficient of sex=male * age^2 0.004486 6.31e-03 > > # Estimate regression function for females > # for a sequence of ages > ages <- seq(25, 75, length=100) > label(ages) <- 'Age' > > # Plot fitted function and pointwise normal- > # theory confidence bands > par(mfrow=c(1,1)) > p <- plot(f, age=ages, sex='female') > w <- p$x.xbeta > # Save curve coordinates for later automatic > # labeling using labcurve in the Hmisc library > curves <- vector('list',8) > curves[[1]] <- list(x=w[,1],y=w[,3]) > curves[[2]] <- list(x=w[,1],y=w[,4]) > > # Add pointwise normal-distribution confidence > # bands using unconditional variance-covariance > # matrix from the 500 bootstrap reps > p <- plot(b, age=ages, sex='female', add=TRUE, lty=3) > w <- p$x.xbeta > curves[[3]] <- list(x=w[,1],y=w[,3]) > curves[[4]] <- list(x=w[,1],y=w[,4]) > > dframe <- expand.grid(sex='female', age=ages) > X <- predict(f, dframe, type='x') # Full design matrix > > # Add pointwise bootstrap nonparametric > # confidence limits > p <- confplot(b, X=X, against=ages, method='pointwise', + add=TRUE, lty.conf=4) > curves[[5]] <- list(x=ages, y=p$lower) > curves[[6]] <- list(x=ages, y=p$upper) > > # Add simultaneous bootstrap confidence band > p <- confplot(b, X=X, against=ages, add=TRUE, lty.conf=5) > curves[[7]] <- list(x=ages, y=p$lower) > curves[[8]] <- list(x=ages, y=p$upper) > lab <- c('a','a','b','b','c','c','d','d') > labcurve(curves, lab) > > # Now get bootstrap simultaneous confidence set for > # female:male odds ratios for a variety of ages > > dframe <- expand.grid(age=ages, sex=c('female','male')) > X <- predict(f, dframe, type='x') # design matrix > f.minus.m <- X[1:100,] - X[101:200,] > # First 100 rows are for females. By subtracting > # design matrices are able to get Xf*Beta - Xm*Beta > # = (Xf - Xm)*Beta > > confplot(b, X=f.minus.m, against=ages, + method='pointwise', ylab='F:M Log Odds Ratio') > confplot(b, X=f.minus.m, against=ages, + lty.conf=3, add=TRUE) > > # contrast.Design makes it easier to compute the design matrix for use > # in bootstrapping contrasts: > > f.minus.m <- contrast(f, list(sex='female',age=ages), + list(sex='male', age=ages))$X > confplot(b, X=f.minus.m) $fitted [,1] 1 -2.3810 2 -2.3232 3 -2.2657 4 -2.2087 5 -2.1520 6 -2.0956 7 -2.0396 8 -1.9840 9 -1.9288 10 -1.8740 11 -1.8195 12 -1.7654 13 -1.7116 14 -1.6583 15 -1.6053 16 -1.5527 17 -1.5004 18 -1.4485 19 -1.3970 20 -1.3459 21 -1.2951 22 -1.2447 23 -1.1947 24 -1.1451 25 -1.0958 26 -1.0469 27 -0.9984 28 -0.9502 29 -0.9024 30 -0.8550 31 -0.8080 32 -0.7613 33 -0.7150 34 -0.6691 35 -0.6235 36 -0.5783 37 -0.5335 38 -0.4891 39 -0.4450 40 -0.4013 41 -0.3580 42 -0.3150 43 -0.2724 44 -0.2302 45 -0.1884 46 -0.1469 47 -0.1058 48 -0.0651 49 -0.0247 50 0.0152 51 0.0549 52 0.0941 53 0.1330 54 0.1714 55 0.2096 56 0.2473 57 0.2847 58 0.3217 59 0.3583 60 0.3946 61 0.4305 62 0.4660 63 0.5011 64 0.5359 65 0.5703 66 0.6044 67 0.6380 68 0.6713 69 0.7042 70 0.7368 71 0.7689 72 0.8007 73 0.8322 74 0.8632 75 0.8939 76 0.9242 77 0.9542 78 0.9837 79 1.0129 80 1.0418 81 1.0702 82 1.0983 83 1.1260 84 1.1533 85 1.1803 86 1.2069 87 1.2331 88 1.2590 89 1.2845 90 1.3096 91 1.3343 92 1.3587 93 1.3827 94 1.4063 95 1.4295 96 1.4524 97 1.4749 98 1.4970 99 1.5188 100 1.5402 $upper 1 2 3 4 5 6 7 8 0.33841 0.29209 0.24728 0.20401 0.16226 0.12204 0.08334 0.04617 9 10 11 12 13 14 15 16 0.01052 -0.02360 -0.05620 -0.08727 -0.11681 -0.14483 -0.17132 -0.19629 17 18 19 20 21 22 23 24 -0.21973 -0.24164 -0.24470 -0.23552 -0.22566 -0.21512 -0.20389 -0.19198 25 26 27 28 29 30 31 32 -0.17939 -0.16611 -0.15214 -0.13750 -0.12217 -0.10615 -0.08945 -0.07207 33 34 35 36 37 38 39 40 -0.05400 -0.03525 -0.01581 0.00431 0.02512 0.04660 0.06878 0.11293 41 42 43 44 45 46 47 48 0.16856 0.22253 0.27487 0.32556 0.37461 0.42201 0.46778 0.51189 49 50 51 52 53 54 55 56 0.55437 0.59520 0.63438 0.67192 0.70782 0.74208 0.78140 0.82739 57 58 59 60 61 62 63 64 0.87249 0.91671 0.96004 1.00249 1.04406 1.08475 1.12455 1.16347 65 66 67 68 69 70 71 72 1.20151 1.23866 1.27493 1.31032 1.34483 1.37845 1.41119 1.44305 73 74 75 76 77 78 79 80 1.47402 1.50411 1.53332 1.56165 1.58909 1.61565 1.65541 1.69632 81 82 83 84 85 86 87 88 1.73659 1.77621 1.81520 1.85354 1.89125 1.92831 1.96474 2.01577 89 90 91 92 93 94 95 96 2.10218 2.19012 2.27958 2.37057 2.46309 2.55713 2.65270 2.74979 97 98 99 100 2.84841 2.94855 3.05022 3.15342 $lower 1 2 3 4 5 6 7 8 -5.915345 -5.752203 -5.590991 -5.431710 -5.274360 -5.118942 -4.965455 -4.813898 9 10 11 12 13 14 15 16 -4.664273 -4.516579 -4.370816 -4.226985 -4.085084 -3.945114 -3.807076 -3.670968 17 18 19 20 21 22 23 24 -3.536792 -3.404547 -3.274233 -3.145850 -3.019398 -2.894878 -2.772288 -2.651630 25 26 27 28 29 30 31 32 -2.532902 -2.416106 -2.301241 -2.188307 -2.077304 -1.968232 -1.861092 -1.755882 33 34 35 36 37 38 39 40 -1.652604 -1.551256 -1.451840 -1.354355 -1.258801 -1.165178 -1.073487 -0.985685 41 42 43 44 45 46 47 48 -0.942848 -0.899287 -0.855003 -0.809995 -0.764264 -0.717809 -0.670631 -0.622729 49 50 51 52 53 54 55 56 -0.574104 -0.524755 -0.474683 -0.423888 -0.372369 -0.320127 -0.269875 -0.230848 57 58 59 60 61 62 63 64 -0.191223 -0.150997 -0.110171 -0.068746 -0.026721 0.015903 0.059128 0.102952 65 66 67 68 69 70 71 72 0.147376 0.192400 0.238023 0.284247 0.321252 0.325329 0.327071 0.326479 73 74 75 76 77 78 79 80 0.323553 0.318292 0.310696 0.300766 0.288501 0.273902 0.256968 0.237700 81 82 83 84 85 86 87 88 0.216097 0.192159 0.165887 0.137281 0.106340 0.073064 0.037454 -0.000491 89 90 91 92 93 94 95 96 -0.040770 -0.083384 -0.128333 -0.175616 -0.225233 -0.277185 -0.331472 -0.388093 97 98 99 100 -0.447049 -0.508339 -0.571964 -0.637924 > > # For a quadratic binary logistic regression model use bootstrap > # bumping to estimate coefficients under a monotonicity constraint > set.seed(177) > n <- 400 > x <- runif(n) > logit <- 3*(x^2-1) > y <- rbinom(n, size=1, prob=plogis(logit)) > f <- lrm(y ~ pol(x,2), x=TRUE, y=TRUE) > k <- coef(f) > k Intercept x x^2 -3.78 1.41 2.49 > vertex <- -k[2]/(2*k[3]) > vertex x -0.283 > > # Outside [0,1] so fit satisfies monotonicity constraint within > # x in [0,1], i.e., original fit is the constrained MLE > > g <- bootcov(f, B=50, coef.reps=TRUE) > bootcoef <- g$boot.Coef # 100x3 matrix > vertex <- -bootcoef[,2]/(2*bootcoef[,3]) > table(cut2(vertex, c(0,1))) [-6.12, 0.00) [ 0.00, 1.00) [ 1.00, 5.58] 29 14 7 > mono <- !(vertex >= 0 & vertex <= 1) > mean(mono) # estimate of Prob{monotonicity in [0,1]} [1] 0.72 > > var(bootcoef) # var-cov matrix for unconstrained estimates Intercept x x^2 Intercept 0.948 -2.81 1.90 x -2.808 9.78 -7.32 x^2 1.900 -7.32 5.83 > var(bootcoef[mono,]) # for constrained estimates Intercept x x^2 Intercept 0.963 -2.60 1.64 x -2.600 8.38 -5.97 x^2 1.637 -5.97 4.62 > > # Find second-best vector of coefficient estimates, i.e., best > # from among bootstrap estimates > g$boot.Coef[order(g$boot.loglik[-length(g$boot.loglik)])[1],] Intercept x x^2 -3.565 0.827 2.990 > # Note closeness to MLE > > > > graphics::par(get("par.postscript", env = .CheckExEnv)) > cleanEx(); ..nameEx <- "calibrate" > > ### * calibrate > > flush(stderr()); flush(stdout()) > > ### Name: calibrate > ### Title: Resampling Model Calibration > ### Aliases: calibrate calibrate.default calibrate.cph calibrate.psm > ### print.calibrate print.calibrate.default plot.calibrate > ### plot.calibrate.default > ### Keywords: methods models regression survival hplot > > ### ** Examples > > set.seed(1) > d.time <- rexp(200) > x1 <- runif(200) > x2 <- factor(sample(c('a','b','c'),200,TRUE)) > f <- cph(Surv(d.time) ~ pol(x1,2)*x2, x=TRUE, y=TRUE, surv=TRUE, time.inc=2) > #or f <- psm(S ~ ...) > cal <- calibrate(f, u=2, m=50, B=20) # usually B=200 or 300 Using Cox survival estimates at 2 Days Averaging 2 repetitions of B= 10 Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations x n events KM std.err [1,] 0.06433 50 50 0.06 0.1990 [2,] 0.11124 50 50 0.16 0.1768 [3,] 0.14900 50 50 0.10 0.1843 [4,] 0.20614 50 50 0.20 0.1757 Iteration: 1 Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 2 Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 3 Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 4 Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 5 Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 6 Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 7 Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 8 Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 9 Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 10 Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations x n events KM std.err [1,] 0.06433 50 50 0.06 0.1990 [2,] 0.11124 50 50 0.16 0.1768 [3,] 0.14900 50 50 0.10 0.1843 [4,] 0.20614 50 50 0.20 0.1757 Iteration: 1 Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 2 Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 3 Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 4 Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 5 Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 6 Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 7 Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 8 Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 9 Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 10 Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Mean over 2 overall replications mean.optimism mean.corrected n [1,] -0.01526 0.0109 20 [2,] 0.00094 0.0478 20 [3,] -0.00463 -0.0444 20 [4,] 0.04720 -0.0533 20 > plot(cal) > > y <- sample(0:2, 200, TRUE) > x1 <- runif(200) > x2 <- runif(200) > x3 <- runif(200) > x4 <- runif(200) > f <- lrm(y ~ x1+x2+x3*x4, x=TRUE, y=TRUE) > cal <- calibrate(f, kint=2, predy=seq(.2,.8,length=60), + group=y) Iteration: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 > # group= does k-sample validation: make resamples have same > # numbers of subjects in each level of y as original sample > > plot(cal) n=200 Mean absolute error=0.0330 Mean squared error=0.00212 0.9 Quantile of absolute error=0.0446 > #See the example for the validate function for a method of validating > #continuation ratio ordinal logistic models. You can do the same > #thing for calibrate > > > > cleanEx(); ..nameEx <- "contrast" > > ### * contrast > > flush(stderr()); flush(stdout()) > > ### Name: contrast.Design > ### Title: General Contrasts of Regression Coefficients > ### Aliases: contrast contrast.Design print.contrast.Design > ### Keywords: htest models regression > > ### ** Examples > > set.seed(1) > age <- rnorm(200,40,12) > sex <- factor(sample(c('female','male'),200,TRUE)) > logit <- (sex=='male') + (age-40)/5 > y <- ifelse(runif(200) <= plogis(logit), 1, 0) > f <- lrm(y ~ pol(age,2)*sex) > # Compare a 30 year old female to a 40 year old male > # (with or without age x sex interaction in the model) > contrast(f, list(sex='female', age=30), list(sex='male', age=40)) Contrast S.E. Lower Upper Z Pr(>|z|) 1 -3.96 0.854 -5.63 -2.28 -4.64 0 > > # For a model containing two treatments, centers, and treatment > # x center interaction, get 0.95 confidence intervals separately > # by cente > center <- factor(sample(letters[1:8],500,TRUE)) > treat <- factor(sample(c('a','b'), 500,TRUE)) > y <- 8*(treat=='b') + rnorm(500,100,20) > f <- ols(y ~ treat*center) > > lc <- levels(center) > contrast(f, list(treat='b', center=lc), + list(treat='a', center=lc)) center Contrast S.E. Lower Upper t Pr(>|t|) a -2.73 4.70 -11.974 6.51 -0.58 0.5616 b 4.29 5.28 -6.091 14.67 0.81 0.4173 c 11.45 4.86 1.901 21.01 2.36 0.0189 d 10.52 4.87 0.948 20.09 2.16 0.0313 e 24.08 6.10 12.097 36.07 3.95 0.0001 f 9.09 5.14 -1.012 19.18 1.77 0.0777 g 11.91 5.39 1.325 22.50 2.21 0.0275 h -4.47 4.86 -14.032 5.08 -0.92 0.3580 Error d.f.= 484 > > # Get 'Type III' contrast: average b - a treatment effect over > # centers, weighting centers equally (which is almost always > # an unreasonable thing to do) > contrast(f, list(treat='b', center=lc), + list(treat='a', center=lc), + type='average') Contrast S.E. Lower Upper t Pr(>|t|) 1 8.02 1.83 4.43 11.6 4.39 0 Error d.f.= 484 > > # Get 'Type II' contrast, weighting centers by the number of > # subjects per center. Print the design contrast matrix used. > k <- contrast(f, list(treat='b', center=lc), + list(treat='a', center=lc), + type='average', weights=table(center)) > print(k, X=TRUE) Contrast S.E. Lower Upper t Pr(>|t|) 1 7.1 1.81 3.55 10.6 3.93 1e-04 Error d.f.= 484 Design Matrix for Contrasts Intercept treat=b center=b center=c center=d center=e center=f center=g 1 0 1 0 0 0 0 0 0 center=h treat=b * center=b treat=b * center=c treat=b * center=d 1 0 0.12 0.136 0.136 treat=b * center=e treat=b * center=f treat=b * center=g treat=b * center=h 1 0.09 0.122 0.112 0.136 > # Note: If other variables had interacted with either treat > # or center, we may want to list settings for these variables > # inside the list()'s, so as to not use default settings > > # For a 4-treatment study, get all comparisons with treatment 'a' > treat <- factor(sample(c('a','b','c','d'), 500,TRUE)) > y <- 8*(treat=='b') + rnorm(500,100,20) > dd <- datadist(treat,center); options(datadist='dd') > f <- ols(y ~ treat*center) > lt <- levels(treat) > contrast(f, list(treat=lt[-1]), + list(treat=lt[ 1]), + cnames=paste(lt[-1],lt[1],sep=':'), conf.int=1-.05/3) Contrast S.E. Lower Upper t Pr(>|t|) b:a 7.44 7.57 -10.8 25.62 0.98 0.326 c:a -7.19 7.10 -24.3 9.87 -1.01 0.312 d:a -5.67 6.51 -21.3 9.96 -0.87 0.384 Error d.f.= 468 > > # Compare each treatment with average of all others > for(i in 1:length(lt)) { + cat('Comparing with',lt[i],'\n\n') + print(contrast(f, list(treat=lt[-i]), + list(treat=lt[ i]), type='average')) + } Comparing with a Contrast S.E. Lower Upper t Pr(>|t|) 1 -1.81 5.52 -12.7 9.04 -0.33 0.743 Error d.f.= 468 Comparing with b Contrast S.E. Lower Upper t Pr(>|t|) 1 -11.7 6.68 -24.8 1.40 -1.76 0.0799 Error d.f.= 468 Comparing with c Contrast S.E. Lower Upper t Pr(>|t|) 1 7.78 6.2 -4.42 20.0 1.25 0.211 Error d.f.= 468 Comparing with d Contrast S.E. Lower Upper t Pr(>|t|) 1 5.75 5.6 -5.24 16.7 1.03 0.304 Error d.f.= 468 > options(datadist=NULL) > > # Six ways to get the same thing, for a variable that > # appears linearly in a model and does not interact with > # any other variables. We estimate the change in y per > # unit change in a predictor x1. Methods 4, 5 also > # provide confidence limits. Method 6 computes nonparametric > # bootstrap confidence limits. Methods 2-6 can work > # for models that are nonlinear or non-additive in x1. > # For that case more care is needed in choice of settings > # for x1 and the variables that interact with x1. > > ## Not run: > ##D coef(fit)['x1'] # method 1 > ##D diff(predict(fit, gendata(x1=c(0,1)))) # method 2 > ##D g <- Function(fit) # method 3 > ##D g(x1=1) - g(x1=0) > ##D summary(fit, x1=c(0,1)) # method 4 > ##D k <- contrast(fit, list(x1=1), list(x1=0)) # method 5 > ##D print(k, X=TRUE) > ##D fit <- update(fit, x=TRUE, y=TRUE) # method 6 > ##D b <- bootcov(fit, B=500, coef.reps=TRUE) > ##D bootplot(b, X=k$X) # bootstrap distribution and CL > ##D > ##D # In a model containing age, race, and sex, > ##D # compute an estimate of the mean response for a > ##D # 50 year old male, averaged over the races using > ##D # observed frequencies for the races as weights > ##D > ##D f <- ols(y ~ age + race + sex) > ##D contrast(f, list(age=50, sex='male', race=levels(race)), > ##D type='average', weights=table(race)) > ## End(Not run) > > # Plot the treatment effect (drug - placebo) as a function of age > # and sex in a model in which age nonlinearly interacts with treatment > # for females only > set.seed(1) > n <- 800 > treat <- factor(sample(c('drug','placebo'), n,TRUE)) > sex <- factor(sample(c('female','male'), n,TRUE)) > age <- rnorm(n, 50, 10) > y <- .05*age + (sex=='female')*(treat=='drug')*.05*abs(age-50) + rnorm(n) > f <- ols(y ~ rcs(age,4)*treat*sex) > d <- datadist(age, treat, sex); options(datadist='d') > # show separate estimates by treatment and sex > plot(f, age=NA, treat=NA, sex='female') > plot(f, age=NA, treat=NA, sex='male') > ages <- seq(35,65,by=5); sexes <- c('female','male') > w <- contrast(f, list(treat='drug', age=ages, sex=sexes), + list(treat='placebo', age=ages, sex=sexes)) > xYplot(Cbind(Contrast, Lower, Upper) ~ age | sex, data=w, + ylab='Drug - Placebo') Loading required package: grid Loading required package: lattice > xYplot(Cbind(Contrast, Lower, Upper) ~ age, groups=sex, data=w, + ylab='Drug - Placebo', method='alt bars') > options(datadist=NULL) > > > > cleanEx(); ..nameEx <- "cph" > > ### * cph > > flush(stderr()); flush(stdout()) > > ### Name: cph > ### Title: Cox Proportional Hazards Model and Extensions > ### Aliases: cph Survival.cph Quantile.cph Mean.cph > ### Keywords: survival models nonparametric > > ### ** Examples > > # Simulate data from a population model in which the log hazard > # function is linear in age and there is no age x sex interaction > n <- 1000 > set.seed(731) > age <- 50 + 12*rnorm(n) > label(age) <- "Age" > sex <- factor(sample(c('Male','Female'), n, + rep=TRUE, prob=c(.6, .4))) > cens <- 15*runif(n) > h <- .02*exp(.04*(age-50)+.8*(sex=='Female')) > dt <- -log(runif(n))/h > label(dt) <- 'Follow-up Time' > e <- ifelse(dt <= cens,1,0) > dt <- pmin(dt, cens) > units(dt) <- "Year" > dd <- datadist(age, sex) > options(datadist='dd') > Srv <- Surv(dt,e) > > f <- cph(Srv ~ rcs(age,4) + sex, x=TRUE, y=TRUE) > cox.zph(f, "rank") # tests of PH rho chisq p age 0.0374 0.177 0.6741 age' -0.0351 0.171 0.6792 age'' 0.0268 0.101 0.7504 sex=Male 0.1486 3.977 0.0461 GLOBAL NA 4.867 0.3013 > anova(f) Wald Statistics Response: Srv Factor Chi-Square d.f. P age 57.75 3 <.0001 Nonlinear 8.17 2 0.0168 sex 18.75 1 <.0001 TOTAL 75.63 4 <.0001 > plot(f, age=NA, sex=NA) # plot age effect, 2 curves for 2 sexes > survplot(f, sex=NA) # time on x-axis, curves for x2 > res <- resid(f, "scaledsch") > time <- as.numeric(dimnames(res)[[1]]) > z <- loess(res[,4] ~ time, span=0.50) # residuals for sex > if(.R.) plot(time, fitted(z)) else + plot(z, coverage=0.95, confidence=7, xlab="t", + ylab="Scaled Schoenfeld Residual",ylim=c(-3,5)) > lines(supsmu(time, res[,4]),lty=2) > plot(cox.zph(f,"identity")) #Easier approach for last 6 lines > # latex(f) > > f <- cph(Srv ~ age + strat(sex), surv=TRUE) > g <- Survival(f) # g is a function > g(seq(.1,1,by=.1), stratum="sex=Male", type="poly") #could use stratum=2 [1] 0.999 0.996 0.994 0.993 0.992 0.992 0.992 0.992 0.992 0.991 > med <- Quantile(f) > plot(f, age=NA, fun=function(x) med(lp=x)) #plot median survival > > # g <- cph(Surv(hospital.charges) ~ age, surv=TRUE) > # Cox model very useful for analyzing highly skewed data, censored or not > # m <- Mean(g) > # m(0) # Predicted mean charge for reference age > > #Fit a time-dependent covariable representing the instantaneous effect > #of an intervening non-fatal event > rm(age) > set.seed(121) > dframe <- data.frame(failure.time=1:10, event=rep(0:1,5), + ie.time=c(NA,1.5,2.5,NA,3,4,NA,5,5,5), + age=sample(40:80,10,rep=TRUE)) > z <- ie.setup(dframe$failure.time, dframe$event, dframe$ie.time) > S <- z$S > ie.status <- z$ie.status > attach(dframe[z$subs,]) # replicates all variables > > f <- cph(S ~ age + ie.status, x=TRUE, y=TRUE) > #Must use x=TRUE,y=TRUE to get survival curves with time-dep. covariables > > #Get estimated survival curve for a 50-year old who has an intervening > #non-fatal event at 5 days > new <- data.frame(S=Surv(c(0,5), c(5,999), c(FALSE,FALSE)), age=rep(50,2), + ie.status=c(0,1)) > g <- survfit(f, new) > plot(c(0,g$time), c(1,g$surv[,2]), type='s', + xlab='Days', ylab='Survival Prob.') > # Not certain about what columns represent in g$surv for survival5 > # but appears to be for different ie.status > #or: > #g <- survest(f, new) > #plot(g$time, g$surv, type='s', xlab='Days', ylab='Survival Prob.') > > #Compare with estimates when there is no intervening event > new2 <- data.frame(S=Surv(c(0,5), c(5, 999), c(FALSE,FALSE)), age=rep(50,2), + ie.status=c(0,0)) > g2 <- survfit(f, new2) > lines(c(0,g2$time), c(1,g2$surv[,2]), type='s', lty=2) > #or: > #g2 <- survest(f, new2) > #lines(g2$time, g2$surv, type='s', lty=2) > detach("dframe[z$subs, ]") > options(datadist=NULL) > > > > cleanEx(); ..nameEx <- "cr.setup" > > ### * cr.setup > > flush(stderr()); flush(stdout()) > > ### Name: cr.setup > ### Title: Continuation Ratio Ordinal Logistic Setup > ### Aliases: cr.setup > ### Keywords: category models regression > > ### ** Examples > > y <- c(NA, 10, 21, 32, 32) > cr.setup(y) $y [1] NA 1 0 1 0 0 0 0 $cohort [1] all all y>=21 all y>=21 all y>=21 Levels: all y>=21 $subs [1] 1 2 3 3 4 4 5 5 $reps [1] 1 1 2 2 2 > > set.seed(171) > y <- sample(0:2, 100, rep=TRUE) > sex <- sample(c("f","m"),100,rep=TRUE) > sex <- factor(sex) > table(sex, y) y sex 0 1 2 f 23 15 13 m 12 23 14 > options(digits=5) > tapply(y==0, sex, mean) f m 0.45098 0.24490 > tapply(y==1, sex, mean) f m 0.29412 0.46939 > tapply(y==2, sex, mean) f m 0.25490 0.28571 > cohort <- y>=1 > tapply(y[cohort]==1, sex[cohort], mean) f m 0.53571 0.62162 > > u <- cr.setup(y) > Y <- u$y > cohort <- u$cohort > sex <- sex[u$subs] > > lrm(Y ~ cohort + sex) Logistic Regression Model lrm(formula = Y ~ cohort + sex) Frequencies of Responses 0 1 92 73 Obs Max Deriv Model L.R. d.f. P C Dxy 165 5e-08 10.32 2 0.0057 0.645 0.291 Gamma Tau-a R2 Brier 0.382 0.144 0.081 0.232 Coef S.E. Wald Z P Intercept -0.4299 0.2586 -1.66 0.0964 cohort=y>=1 1.0018 0.3317 3.02 0.0025 sex=m -0.3983 0.3263 -1.22 0.2223 > > > f <- lrm(Y ~ cohort*sex) # saturated model - has to fit all data cells > f Logistic Regression Model lrm(formula = Y ~ cohort * sex) Frequencies of Responses 0 1 92 73 Obs Max Deriv Model L.R. d.f. P C Dxy 165 3e-12 14.03 3 0.0029 0.659 0.317 Gamma Tau-a R2 Brier 0.417 0.157 0.109 0.226 Coef S.E. Wald Z P Intercept -0.1967 0.2814 -0.70 0.4845 cohort=y>=1 0.3398 0.4720 0.72 0.4716 sex=m -0.9293 0.4354 -2.13 0.0328 cohort=y>=1 * sex=m 1.2826 0.6694 1.92 0.0553 > > # In S-Plus: > #Prob(y=0|female): > # plogis(-.50078) > #Prob(y=0|male): > # plogis(-.50078+.11301) > #Prob(y=1|y>=1, female): > plogis(-.50078+.31845) [1] 0.45454 > #Prob(y=1|y>=1, male): > plogis(-.50078+.31845+.11301-.07379) [1] 0.46428 > > combinations <- expand.grid(cohort=levels(cohort), sex=levels(sex)) > combinations cohort sex 1 all f 2 y>=1 f 3 all m 4 y>=1 m > p <- predict(f, combinations, type="fitted") > p 1 2 3 4 0.45098 0.53571 0.24490 0.62162 > p0 <- p[c(1,3)] > p1 <- p[c(2,4)] > p1.unconditional <- (1 - p0) *p1 > p1.unconditional 1 3 0.29412 0.46939 > p2.unconditional <- 1 - p0 - p1.unconditional > p2.unconditional 1 3 0.25490 0.28571 > > ## Not run: > ##D dd <- datadist(inputdata) # do this on non-replicated data > ##D options(datadist='dd') > ##D pain.severity <- inputdata$pain.severity > ##D u <- cr.setup(pain.severity) > ##D # inputdata frame has age, sex with pain.severity > ##D attach(inputdata[u$subs,]) # replicate age, sex > ##D # If age, sex already available, could do age <- age[u$subs] etc., or > ##D # age <- rep(age, u$reps), etc. > ##D y <- u$y > ##D cohort <- u$cohort > ##D dd <- datadist(dd, cohort) # add to dd > ##D f <- lrm(y ~ cohort + age*sex) # ordinary cont. ratio model > ##D g <- lrm(y ~ cohort*sex + age, x=TRUE,y=TRUE) # allow unequal slopes for > ##D # sex across cutoffs > ##D cal <- calibrate(g, cluster=u$subs, subset=cohort=='all') > ##D # subs makes bootstrap sample the correct units, subset causes > ##D # Predicted Prob(pain.severity=0) to be checked for calibration > ## End(Not run) > > > > cleanEx(); ..nameEx <- "datadist" > > ### * datadist > > flush(stderr()); flush(stdout()) > > ### Name: datadist > ### Title: Distribution Summaries for Predictor Variables > ### Aliases: datadist print.datadist > ### Keywords: models nonparametric regression > > ### ** Examples > > ## Not run: > ##D d <- datadist(data=1) # use all variables in search pos. 1 > ##D d <- datadist(x1, x2, x3) > ##D page(d) # if your options(pager) leaves up a pop-up > ##D # window, this is a useful guide in analyses > ##D d <- datadist(data=2) # all variables in search pos. 2 > ##D d <- datadist(data=my.data.frame) > ##D d <- datadist(my.data.frame) # same as previous. Run for all potential vars. > ##D d <- datadist(x2, x3, data=my.data.frame) # combine variables > ##D d <- datadist(x2, x3, q.effect=c(.1,.9), q.display=c(0,1)) > ##D # uses inter-decile range odds ratios, > ##D # total range of variables for regression function plots > ##D d <- datadist(d, z) # add a new variable to an existing datadist > ##D options(datadist="d") #often a good idea, to store info with fit > ##D f <- ols(y ~ x1*x2*x3) > ##D > ##D options(datadist=NULL) #default at start of session > ##D f <- ols(y ~ x1*x2) > ##D d <- datadist(f) #info not stored in `f' > ##D d$limits["Adjust to","x1"] <- .5 #reset adjustment level to .5 > ##D options(datadist="d") > ##D > ##D f <- lrm(y ~ x1*x2, data=mydata) > ##D d <- datadist(f, data=mydata) > ##D options(datadist="d") > ##D > ##D f <- lrm(y ~ x1*x2) #datadist not used - specify all values for > ##D summary(f, x1=c(200,500,800), x2=c(1,3,5)) # obtaining predictions > ##D plot(f, x1=200:800, x2=3) > ##D > ##D # Change reference value to get a relative odds plot for a logistic model > ##D d$limits$age[2] <- 30 # make 30 the reference value for age > ##D # Could also do: d$limits["Adjust to","age"] <- 30 > ##D fit <- update(fit) # make new reference value take effect > ##D plot(fit, age=NA, ref.zero=TRUE, fun=exp, ylab='Age=x:Age=30 Odds Ratio') > ## End(Not run) > > > > cleanEx(); ..nameEx <- "fastbw" > > ### * fastbw > > flush(stderr()); flush(stdout()) > > ### Name: fastbw > ### Title: Fast Backward Variable Selection > ### Aliases: fastbw print.fastbw > ### Keywords: models regression htest > > ### ** Examples > > ## Not run: > ##D fastbw(fit, optional.arguments) # print results > ##D z <- fastbw(fit, optional.args) # typically used in simulations > ##D lm.fit(X[,z$parms.kept], Y) # least squares fit of reduced model > ## End(Not run) > > > > cleanEx(); ..nameEx <- "gendata" > > ### * gendata > > flush(stderr()); flush(stdout()) > > ### Name: gendata > ### Title: Generate Data Frame with Predictor Combinations > ### Aliases: gendata gendata.Design gendata.default > ### Keywords: methods models regression manip > > ### ** Examples > > set.seed(1) > age <- rnorm(200, 50, 10) > sex <- factor(sample(c('female','male'),200,TRUE)) > race <- factor(sample(c('a','b','c','d'),200,TRUE)) > y <- sample(0:1, 200, TRUE) > dd <- datadist(age,sex,race) > options(datadist="dd") > f <- lrm(y ~ age*sex + race) > gendata(f) age sex race 1 49.506 male d > gendata(f, age=50) age sex race 1 50 male d > d <- gendata(f, age=50, sex="female") # leave race=reference category > d <- gendata(f, age=c(50,60), race=c("b","a")) # 4 obs. > d$Predicted <- predict(f, d, type="fitted") > d # Predicted column prints at the far right age sex race Predicted 1 50 male b 0.54418 2 60 male b 0.49680 3 50 male a 0.45405 4 60 male a 0.40751 > options(datadist=NULL) > ## Not run: > ##D d <- gendata(f, nobs=5, view=TRUE) # 5 interactively defined obs. > ##D d[,attr(d,"names.subset")] # print variables which varied > ##D predict(f, d) > ## End(Not run) > > > > cleanEx(); ..nameEx <- "glmD" > > ### * glmD > > flush(stderr()); flush(stdout()) > > ### Name: glmD > ### Title: Design Version of glm > ### Aliases: glmD print.glmD > ### Keywords: models regression > > ### ** Examples > > ## Dobson (1990) Page 93: Randomized Controlled Trial : > counts <- c(18,17,15,20,10,20,25,13,12) > outcome <- gl(3,1,9) > treatment <- gl(3,3) > f <- glm(counts ~ outcome + treatment, family=poisson()) > f Call: glm(formula = counts ~ outcome + treatment, family = poisson()) Coefficients: (Intercept) outcome2 outcome3 treatment2 treatment3 3.04e+00 -4.54e-01 -2.93e-01 -2.26e-16 -1.25e-16 Degrees of Freedom: 8 Total (i.e. Null); 4 Residual Null Deviance: 10.6 Residual Deviance: 5.13 AIC: 56.8 > anova(f) Analysis of Deviance Table Model: poisson, link: log Response: counts Terms added sequentially (first to last) Df Deviance Resid. Df Resid. Dev NULL 8 10.58 outcome 2 5.45 6 5.13 treatment 2 0.00 4 5.13 > summary(f) Call: glm(formula = counts ~ outcome + treatment, family = poisson()) Deviance Residuals: 1 2 3 4 5 6 7 8 -0.6712 0.9627 -0.1696 -0.2200 -0.9555 1.0494 0.8472 -0.0917 9 -0.9666 Coefficients: Estimate Std. Error z value Pr(>|z|) (Intercept) 3.04e+00 1.71e-01 17.81 <2e-16 *** outcome2 -4.54e-01 2.02e-01 -2.25 0.025 * outcome3 -2.93e-01 1.93e-01 -1.52 0.128 treatment2 -2.26e-16 2.00e-01 -1.1e-15 1.000 treatment3 -1.25e-16 2.00e-01 -6.3e-16 1.000 --- Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 (Dispersion parameter for poisson family taken to be 1) Null deviance: 10.5814 on 8 degrees of freedom Residual deviance: 5.1291 on 4 degrees of freedom AIC: 56.76 Number of Fisher Scoring iterations: 4 > f <- glmD(counts ~ outcome + treatment, family=poisson()) > # could have had rcs( ) etc. if there were continuous predictors > f General Linear Model glmD(formula = counts ~ outcome + treatment, family = poisson()) Model L.R. d.f. P 5.5 4 0.2440 Coef S.E. Wald Z P Intercept 3.045e+00 0.1709 1.8e+01 0.00000 outcome2 -4.543e-01 0.2022 -2.2e+00 0.02465 outcome3 -2.930e-01 0.1927 -1.5e+00 0.12849 treatment2 -2.263e-16 0.2000 -1.1e-15 1.00000 treatment3 -1.251e-16 0.2000 -6.3e-16 1.00000 > anova(f) Wald Statistics Response: counts Factor Chi-Square d.f. P outcome 5.49 2 0.0643 treatment 0.00 2 1.0000 TOTAL 5.49 4 0.2409 > summary(f, outcome=c('1','2','3'), treatment=c('1','2','3')) Effects Response : counts Factor Low High Diff. Effect S.E. Lower 0.95 Upper 0.95 outcome - 1:2 2 1 NA 0.45 0.20 0.06 0.85 outcome - 3:2 2 3 NA 0.16 0.22 -0.26 0.58 treatment - 1:2 2 1 NA 0.00 0.20 -0.39 0.39 treatment - 3:2 2 3 NA 0.00 0.20 -0.39 0.39 > > > > cleanEx(); ..nameEx <- "glsD" > > ### * glsD > > flush(stderr()); flush(stdout()) > > ### Name: glsD > ### Title: Fit Linear Model Using Generalized Least Squares > ### Aliases: glsD print.glsD > ### Keywords: models > > ### ** Examples > > ## Not run: > ##D ns <- 20 # no. subjects > ##D nt <- 10 # no. time points/subject > ##D B <- 10 # no. bootstrap resamples > ##D # usually do 100 for variances, 1000 for nonparametric CLs > ##D rho <- .5 # AR(1) correlation parameter > ##D V <- matrix(0, nrow=nt, ncol=nt) > ##D V <- rho^abs(row(V)-col(V)) # per-subject correlation/covariance matrix > ##D > ##D d <- expand.grid(tim=1:nt, id=1:ns) > ##D d$trt <- factor(ifelse(d$id <= ns/2, 'a', 'b')) > ##D true.beta <- c(Intercept=0,tim=.1,'tim^2'=0,'trt=b'=1) > ##D d$ey <- true.beta['Intercept'] + true.beta['tim']*d$tim + > ##D true.beta['tim^2']*(d$tim^2) + true.beta['trt=b']*(d$trt=='b') > ##D set.seed(13) > ##D library(MASS) # needed for mvrnorm > ##D d$y <- d$ey + as.vector(t(mvrnorm(n=ns, mu=rep(0,nt), Sigma=V))) > ##D > ##D dd <- datadist(d); options(datadist='dd') > ##D # library(nlme) # S-Plus: library(nlme3) or later > ##D f <- glsD(y ~ pol(tim,2) + trt, correlation=corCAR1(form= ~tim | id), > ##D data=d, B=B) > ##D f > ##D f$var # bootstrap variances > ##D f$varBeta # original variances > ##D summary(f) > ##D anova(f) > ##D plot(f, tim=NA, trt=NA) > ##D # v <- Variogram(f, form=~tim|id, data=d) > ## End(Not run) > > > > cleanEx(); ..nameEx <- "groupkm" > > ### * groupkm > > flush(stderr()); flush(stdout()) > > ### Name: groupkm > ### Title: Kaplan-Meier Estimates vs. a Continuous Variable > ### Aliases: groupkm > ### Keywords: survival nonparametric > > ### ** Examples > > n <- 1000 > set.seed(731) > age <- 50 + 12*rnorm(n) > cens <- 15*runif(n) > h <- .02*exp(.04*(age-50)) > d.time <- -log(runif(n))/h > label(d.time) <- 'Follow-up Time' > e <- ifelse(d.time <= cens,1,0) > d.time <- pmin(d.time, cens) > units(d.time) <- "Year" > groupkm(age, Surv(d.time, e), g=10, u=5, pl=TRUE) Warning in groupkm(age, Surv(d.time, e), g = 10, u = 5, pl = TRUE) : one interval had < 2 observations x n events KM std.err [1,] 27.917 100 4 0.94651 0.50440 [2,] 36.560 100 6 0.95640 0.50297 [3,] 40.794 100 13 0.89594 0.33580 [4,] 44.157 100 11 0.91388 0.37883 [5,] 47.342 100 18 0.86132 0.30512 [6,] 50.884 100 10 0.92282 0.37965 [7,] 53.931 100 18 0.87666 0.31838 [8,] 57.650 100 18 0.86837 0.31996 [9,] 62.185 100 12 0.93819 0.45157 [10,] 70.899 100 29 0.72407 0.21722 > #Plot 5-year K-M survival estimates and 0.95 confidence bars by > #decile of age. If omit g=10, will have >= 50 obs./group. > > > > cleanEx(); ..nameEx <- "hazard.ratio.plot" > > ### * hazard.ratio.plot > > flush(stderr()); flush(stdout()) > > ### Name: hazard.ratio.plot > ### Title: Hazard Ratio Plot > ### Aliases: hazard.ratio.plot > ### Keywords: survival > > ### ** Examples > > n <- 500 > set.seed(1) > age <- 50 + 12*rnorm(n) > cens <- 15*runif(n) > h <- .02*exp(.04*(age-50)) > d.time <- -log(runif(n))/h > label(d.time) <- 'Follow-up Time' > e <- ifelse(d.time <= cens,1,0) > d.time <- pmin(d.time, cens) > units(d.time) <- "Year" > hazard.ratio.plot(age, Surv(d.time,e), e=20, legendloc='ll') $time [1] 0.72199 2.99413 5.74525 10.85871 $log.hazard.ratio [,1] [,2] [,3] [,4] x1 0.018155 0.0064516 0.063675 0.017749 $se [,1] [,2] [,3] [,4] [1,] 0.017740 0.01836 0.019553 0.018855 > > > > cleanEx(); ..nameEx <- "ie.setup" > > ### * ie.setup > > flush(stderr()); flush(stdout()) > > ### Name: ie.setup > ### Title: Intervening Event Setup > ### Aliases: ie.setup > ### Keywords: survival > > ### ** Examples > > failure.time <- c(1 , 2, 3) > event <- c(1 , 1, 0) > ie.time <- c(NA, 1.5, 2.5) > > z <- ie.setup(failure.time, event, ie.time) > S <- z$S > S [1] (0.0,1.0 ] (0.0,1.5+] (1.5,2.0 ] (0.0,2.5+] (2.5,3.0+] > ie.status <- z$ie.status > ie.status [1] 0 0 1 0 1 > z$subs [1] 1 2 2 3 3 > z$reps [1] 1 2 2 > ## Not run: > ##D attach(input.data.frame[z$subs,]) #replicates all variables > ##D f <- cph(S ~ age + sex + ie.status) > ##D # Instead of duplicating rows of data frame, could do this: > ##D attach(input.data.frame) > ##D z <- ie.setup(failure.time, event, ie.time) > ##D s <- z$subs > ##D age <- age[s] > ##D sex <- sex[s] > ##D f <- cph(S ~ age + sex + ie.status) > ## End(Not run) > > > > cleanEx(); ..nameEx <- "latex.Design" > > ### * latex.Design > > flush(stderr()); flush(stdout()) > > ### Name: latex.Design > ### Title: LaTeX Representation of a Fitted Model > ### Aliases: latexDesign latex.Design latex.bj latex.glmD latex.glsD latex > ### Keywords: models regression character methods interface > > ### ** Examples > > ## Not run: > ##D f <- lrm(death ~ rcs(age)+sex) > ##D w <- latex(f) > ##D w # displays, using e.g. xdvi > ##D latex(f, file="") # send LaTeX code to screen > ## End(Not run) > > > > cleanEx(); ..nameEx <- "latex.cph" > > ### * latex.cph > > flush(stderr()); flush(stdout()) > > ### Name: latex.cph > ### Title: LaTeX Representation of a Fitted Cox Model > ### Aliases: latex.cph latex.lrm latex.ols latex.pphsm latex.psm > ### Keywords: regression character survival interface models > > ### ** Examples > > ## Not run: > ##D units(ftime) <- "Day" > ##D f <- cph(Surv(ftime, death) ~ rcs(age)+sex, surv=TRUE, time.inc=60) > ##D w <- latex(f) #Interprets fitted model and makes table of S0(t) > ##D #for t=0,60,120,180,... Creates file f.tex > ##D w #displays image, if viewer installed > ##D latex(f,file="") # send LaTeX code to the screen > ## End(Not run) > > > > cleanEx(); ..nameEx <- "lrm" > > ### * lrm > > flush(stderr()); flush(stdout()) > > ### Name: lrm > ### Title: Logistic Regression Model > ### Aliases: lrm > ### Keywords: category models > > ### ** Examples > > #Fit a logistic model containing predictors age, blood.pressure, sex > #and cholesterol, with age fitted with a smooth 5-knot restricted cubic > #spline function and a different shape of the age relationship for males > #and females. > # > n <- 1000 # define sample size > set.seed(17) # so can reproduce the results > age <- rnorm(n, 50, 10) > blood.pressure <- rnorm(n, 120, 15) > cholesterol <- rnorm(n, 200, 25) > sex <- factor(sample(c('female','male'), n,TRUE)) > label(age) <- 'Age' # label is in Hmisc > label(cholesterol) <- 'Total Cholesterol' > label(blood.pressure) <- 'Systolic Blood Pressure' > label(sex) <- 'Sex' > units(cholesterol) <- 'mg/dl' # uses units.default in Hmisc > units(blood.pressure) <- 'mmHg' > > # Specify population model for log odds that Y=1 > L <- .4*(sex=='male') + .045*(age-50) + + (log(cholesterol - 10)-5.2)*(-2*(sex=='female') + 2*(sex=='male')) > # Simulate binary y to have Prob(y=1) = 1/[1+exp(-L)] > y <- ifelse(runif(n) < plogis(L), 1, 0) > cholesterol[1:3] <- NA # 3 missings, at random > > ddist <- datadist(age, blood.pressure, cholesterol, sex) > options(datadist='ddist') > > fit <- lrm(y ~ blood.pressure + sex * (age + rcs(cholesterol,4)), + x=TRUE, y=TRUE) > # x=TRUE, y=TRUE allows use of resid(), which.influence below > # could define d <- datadist(fit) after lrm(), but data distribution > # summary would not be stored with fit, so later uses of plot.Design > # or summary.Design would require access to the original dataset or > # d or specifying all variable values to summary, plot, nomogram > anova(fit) Wald Statistics Response: y Factor Chi-Square d.f. P blood.pressure 0.23 1 0.6315 sex (Factor+Higher Order Factors) 38.17 5 <.0001 All Interactions 26.25 4 <.0001 age (Factor+Higher Order Factors) 30.48 2 <.0001 All Interactions 3.68 1 0.0552 cholesterol (Factor+Higher Order Factors) 24.15 6 0.0005 All Interactions 22.74 3 <.0001 Nonlinear (Factor+Higher Order Factors) 5.11 4 0.2759 sex * age (Factor+Higher Order Factors) 3.68 1 0.0552 sex * cholesterol (Factor+Higher Order Factors) 22.74 3 <.0001 Nonlinear 4.54 2 0.1031 Nonlinear Interaction : f(A,B) vs. AB 4.54 2 0.1031 TOTAL NONLINEAR 5.11 4 0.2759 TOTAL INTERACTION 26.25 4 <.0001 TOTAL NONLINEAR + INTERACTION 26.98 6 0.0001 TOTAL 62.10 10 <.0001 > plot(fit, age=NA, sex=NA) > plot(fit, age=20:70, sex="male") # need if datadist not used > print(cbind(resid(fit,"dfbetas"), resid(fit,"dffits"))[1:20,]) Intercept blood.pressure sex=male age cholesterol cholesterol' 1 NA NA NA NA NA NA 2 NA NA NA NA NA NA 3 NA NA NA NA NA NA 4 0.00110585 -0.0050650 0.0183063 1.8010e-04 -2.2335e-06 -1.9131e-05 5 -0.00185943 0.0085165 -0.0166674 -3.0282e-04 3.7555e-06 3.2167e-05 6 0.00013314 -0.0006098 -0.0133104 2.1682e-05 -2.6890e-07 -2.3032e-06 7 -0.00216888 0.0099339 0.0059980 -3.5322e-04 4.3805e-06 3.7521e-05 8 0.00613942 -0.0281198 0.0010518 9.9985e-04 -1.2400e-05 -1.0621e-04 9 0.01327221 -0.0141340 -0.0068221 -9.1017e-03 -1.3786e-02 3.7026e-02 10 -0.00504365 0.0231009 0.0349401 -8.2139e-04 1.0187e-05 8.7253e-05 11 -0.00521942 0.0239060 0.0576047 -8.5002e-04 1.0542e-05 9.0294e-05 12 -0.00166064 0.0076061 -0.0165494 -2.7045e-04 3.3540e-06 2.8728e-05 13 -0.00548470 -0.0486423 0.0102769 -7.3412e-02 3.3466e-02 -5.4945e-02 14 -0.00183227 0.0083922 0.0050644 -2.9840e-04 3.7007e-06 3.1698e-05 15 0.00034549 -0.0015824 -0.0077359 5.6265e-05 -6.9779e-07 -5.9768e-06 16 0.00965780 -0.0442347 0.0156597 1.5728e-03 -1.9506e-05 -1.6708e-04 17 0.01748991 -0.0495067 -0.0047630 3.7370e-02 -1.4820e-02 2.1205e-02 18 -0.00582013 0.0266574 -0.0174438 -9.4785e-04 1.1755e-05 1.0069e-04 19 -0.00589454 0.0269982 -0.0262389 -9.5997e-04 1.1905e-05 1.0197e-04 20 0.04250436 -0.0134313 -0.0262051 -2.5385e-02 -3.7618e-02 3.3413e-02 cholesterol'' sex=male * age sex=male * cholesterol sex=male * cholesterol' 1 NA NA NA NA 2 NA NA NA NA 3 NA NA NA NA 4 2.9398e-05 0.0295922 -0.0256047 0.0191085 5 -4.9431e-05 0.0204040 0.0134957 -0.0062749 6 3.5393e-06 0.0045672 0.0133296 -0.0209921 7 -5.7657e-05 0.0229034 -0.0112614 0.0239536 8 1.6321e-04 0.0337384 -0.0081084 0.0190529 9 -4.0073e-02 0.0064489 0.0092390 -0.0249259 10 -1.3408e-04 -0.0172364 -0.0338516 0.0274696 11 -1.3875e-04 -0.0541841 -0.0514712 0.0576721 12 -4.4146e-05 -0.0320011 0.0244739 -0.0492967 13 5.3970e-02 0.0516281 -0.0212942 0.0346039 14 -4.8709e-05 0.0062267 -0.0068786 0.0210381 15 9.1844e-06 0.0230729 0.0035486 -0.0060937 16 2.5674e-04 -0.0004442 -0.0163451 0.0028584 17 -1.2691e-02 -0.0258407 0.0104006 -0.0156937 18 -1.5472e-04 0.0054641 0.0198009 -0.0326582 19 -1.5670e-04 0.0179390 0.0244120 -0.0222177 20 -2.5250e-02 0.0178324 0.0248663 -0.0225168 sex=male * cholesterol'' 1 NA NA 2 NA NA 3 NA NA 4 -0.0118147 -0.16575 5 0.0009406 0.11198 6 0.0110310 -0.27585 7 -0.0270128 0.13541 8 -0.0221810 0.16161 9 0.0268365 -0.16222 10 -0.0194048 -0.18014 11 -0.0529460 -0.27160 12 0.0543134 -0.25762 13 -0.0335716 -0.26170 14 -0.0267139 0.12411 15 0.0125804 0.15831 16 0.0060975 -0.17677 17 0.0102105 0.20056 18 0.0334955 0.15681 19 0.0178083 0.13030 20 0.0170821 -0.14924 > which.influence(fit, .3) $Intercept [1] "173" $cholesterol [1] "143" "173" > # latex(fit) #print nice statement of fitted model > # > #Repeat this fit using penalized MLE, penalizing complex terms > #(for nonlinear or interaction effects) > # > fitp <- update(fit, penalty=list(simple=0,nonlinear=10), x=TRUE, y=TRUE) > effective.df(fitp) Original and Effective Degrees of Freedom Original Penalized All 10 6.76 Simple Terms 4 4.00 Interaction or Nonlinear 6 2.76 Nonlinear 4 2.22 Interaction 4 1.72 Nonlinear Interaction 2 1.17 > # or lrm(y ~ ..., penalty=...) > > #Get fits for a variety of penalties and assess predictive accuracy > #in a new data set. Program efficiently so that complex design > #matrices are only created once. > > set.seed(201) > x1 <- rnorm(500) > x2 <- rnorm(500) > x3 <- sample(0:1,500,rep=TRUE) > L <- x1+abs(x2)+x3 > y <- ifelse(runif(500)<=plogis(L), 1, 0) > new.data <- data.frame(x1,x2,x3,y)[301:500,] > # > for(penlty in seq(0,.15,by=.005)) { + if(penlty==0) { + f <- lrm(y ~ rcs(x1,4)+rcs(x2,6)*x3, subset=1:300, x=TRUE, y=TRUE) + # True model is linear in x1 and has no interaction + X <- f$x # saves time for future runs - don't have to use rcs etc. + Y <- f$y # this also deletes rows with NAs (if there were any) + penalty.matrix <- diag(diag(var(X))) + Xnew <- predict(f, new.data, type="x", incl.non.slopes=FALSE) + # expand design matrix for new data + Ynew <- new.data$y + } else f <- lrm.fit(X,Y, penalty.matrix=penlty*penalty.matrix) + # + cat("\nPenalty :",penlty,"\n") + pred.logit <- f$coef[1] + (Xnew %*% f$coef[-1]) + pred <- plogis(pred.logit) + C.index <- somers2(pred, Ynew)["C"] + Brier <- mean((pred-Ynew)^2) + Deviance<- -2*sum( Ynew*log(pred) + (1-Ynew)*log(1-pred) ) + cat("ROC area:",format(C.index)," Brier score:",format(Brier), + " -2 Log L:",format(Deviance),"\n") + } Penalty : 0 ROC area: 0.71308 Brier score: 0.16241 -2 Log L: 197.97 Penalty : 0.005 ROC area: 0.71723 Brier score: 0.16078 -2 Log L: 196.82 Penalty : 0.01 ROC area: 0.71841 Brier score: 0.16034 -2 Log L: 196.18 Penalty : 0.015 ROC area: 0.71826 Brier score: 0.16001 -2 Log L: 195.66 Penalty : 0.02 ROC area: 0.71856 Brier score: 0.15971 -2 Log L: 195.21 Penalty : 0.025 ROC area: 0.719 Brier score: 0.15944 -2 Log L: 194.80 Penalty : 0.03 ROC area: 0.71856 Brier score: 0.15919 -2 Log L: 194.44 Penalty : 0.035 ROC area: 0.71826 Brier score: 0.15895 -2 Log L: 194.10 Penalty : 0.04 ROC area: 0.719 Brier score: 0.15874 -2 Log L: 193.79 Penalty : 0.045 ROC area: 0.71945 Brier score: 0.15853 -2 Log L: 193.50 Penalty : 0.05 ROC area: 0.7199 Brier score: 0.15833 -2 Log L: 193.23 Penalty : 0.055 ROC area: 0.7199 Brier score: 0.15815 -2 Log L: 192.98 Penalty : 0.06 ROC area: 0.72004 Brier score: 0.15797 -2 Log L: 192.74 Penalty : 0.065 ROC area: 0.72034 Brier score: 0.15781 -2 Log L: 192.52 Penalty : 0.07 ROC area: 0.72063 Brier score: 0.15765 -2 Log L: 192.32 Penalty : 0.075 ROC area: 0.72123 Brier score: 0.1575 -2 Log L: 192.12 Penalty : 0.08 ROC area: 0.72078 Brier score: 0.15736 -2 Log L: 191.94 Penalty : 0.085 ROC area: 0.72093 Brier score: 0.15722 -2 Log L: 191.76 Penalty : 0.09 ROC area: 0.72078 Brier score: 0.15709 -2 Log L: 191.6 Penalty : 0.095 ROC area: 0.72108 Brier score: 0.15697 -2 Log L: 191.44 Penalty : 0.1 ROC area: 0.72123 Brier score: 0.15685 -2 Log L: 191.29 Penalty : 0.105 ROC area: 0.72123 Brier score: 0.15674 -2 Log L: 191.15 Penalty : 0.11 ROC area: 0.72152 Brier score: 0.15663 -2 Log L: 191.02 Penalty : 0.115 ROC area: 0.72152 Brier score: 0.15652 -2 Log L: 190.89 Penalty : 0.12 ROC area: 0.72167 Brier score: 0.15642 -2 Log L: 190.77 Penalty : 0.125 ROC area: 0.72167 Brier score: 0.15633 -2 Log L: 190.66 Penalty : 0.13 ROC area: 0.72197 Brier score: 0.15624 -2 Log L: 190.55 Penalty : 0.135 ROC area: 0.72197 Brier score: 0.15615 -2 Log L: 190.44 Penalty : 0.14 ROC area: 0.72152 Brier score: 0.15606 -2 Log L: 190.34 Penalty : 0.145 ROC area: 0.72197 Brier score: 0.15598 -2 Log L: 190.25 Penalty : 0.15 ROC area: 0.72226 Brier score: 0.15590 -2 Log L: 190.16 > #penalty=0.045 gave lowest -2 Log L, Brier, ROC in test sample for S+ > # > #Use bootstrap validation to estimate predictive accuracy of > #logistic models with various penalties > #To see how noisy cross-validation estimates can be, change the > #validate(f, ...) to validate(f, method="cross", B=10) for example. > #You will see tremendous variation in accuracy with minute changes in > #the penalty. This comes from the error inherent in using 10-fold > #cross validation but also because we are not fixing the splits. > #20-fold cross validation was even worse for some > #indexes because of the small test sample size. Stability would be > #obtained by using the same sample splits for all penalty values > #(see above), but then we wouldn't be sure that the choice of the > #best penalty is not specific to how the sample was split. This > #problem is addressed in the last example. > # > penalties <- seq(0,.7,by=.1) # really use by=.02 > index <- matrix(NA, nrow=length(penalties), ncol=9, + dimnames=list(format(penalties), + c("Dxy","R2","Intercept","Slope","Emax","D","U","Q","B"))) > i <- 0 > for(penlty in penalties) { + cat(penlty, "") + i <- i+1 + if(penlty==0) { + f <- lrm(y ~ rcs(x1,4)+rcs(x2,6)*x3, x=TRUE, y=TRUE) # fit whole sample + X <- f$x + Y <- f$y + penalty.matrix <- diag(diag(var(X))) # save time - only do once + } else f <- lrm(Y ~ X, penalty=penlty, + penalty.matrix=penalty.matrix, x=TRUE,y=TRUE) + val <- validate(f, method="boot", B=20) # use larger B in practice + index[i,] <- val[,"index.corrected"] + } 0 Iteration: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 0.1 Iteration: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 0.2 Iteration: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 0.3 Iteration: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 0.4 Iteration: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 0.5 Iteration: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 0.6 Iteration: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 0.7 Iteration: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 > par(mfrow=c(3,3)) > for(i in 1:9) { + plot(penalties, index[,i], + xlab="Penalty", ylab=dimnames(index)[[2]][i]) + lines(lowess(penalties, index[,i])) + } > options(datadist=NULL) > > # Example of weighted analysis > x <- 1:5 > y <- c(0,1,0,1,0) > reps <- c(1,2,3,2,1) > lrm(y ~ x, weights=reps) Warning in lrm(y ~ x, weights = reps) : currently weights are ignored in model validation and bootstrapping lrm fits Logistic Regression Model lrm(formula = y ~ x, weights = reps) Frequencies of Responses 0 1 3 2 Sum of Weights by Response Category 0 1 5 4 Obs Max Deriv Model L.R. d.f. P 5 1e-16 0 1 1 C Dxy Gamma Tau-a R2 0.5 0 0 0 0 Brier Sum of Weights 0.247 9 Coef S.E. Wald Z P Intercept -2.231e-01 1.867 -0.12 0.9049 x -1.499e-16 0.581 0.00 1.0000 > x <- rep(x, reps) > y <- rep(y, reps) > lrm(y ~ x) # same as above Logistic Regression Model lrm(formula = y ~ x) Frequencies of Responses 0 1 5 4 Obs Max Deriv Model L.R. d.f. P C Dxy 9 1e-16 0 1 1 0.5 0 Gamma Tau-a R2 Brier 0 0 0 0.247 Coef S.E. Wald Z P Intercept -2.231e-01 1.867 -0.12 0.9049 x -1.499e-16 0.581 0.00 1.0000 > > # > #Study performance of a modified AIC which uses the effective d.f. > #See Verweij and Van Houwelingen (1994) Eq. (6). Here AIC=chisq-2*df. > #Also try as effective d.f. equation (4) of the previous reference. > #Also study performance of Shao's cross-validation technique (which was > #designed to pick the "right" set of variables, and uses a much smaller > #training sample than most methods). Compare cross-validated deviance > #vs. penalty to the gold standard accuracy on a 7500 observation dataset. > #Note that if you only want to get AIC or Schwarz Bayesian information > #criterion, all you need is to invoke the pentrace function. > #NOTE: the effective.df( ) function is used in practice > # > ## Not run: > ##D for(seed in c(339,777,22,111,3)){ > ##D # study performance for several datasets > ##D set.seed(seed) > ##D n <- 175; p <- 8 > ##D X <- matrix(rnorm(n*p), ncol=p) # p normal(0,1) predictors > ##D Coef <- c(-.1,.2,-.3,.4,-.5,.6,-.65,.7) # true population coefficients > ##D L <- X %*% Coef # intercept is zero > ##D Y <- ifelse(runif(n)<=plogis(L), 1, 0) > ##D pm <- diag(diag(var(X))) > ##D #Generate a large validation sample to use as a gold standard > ##D n.val <- 7500 > ##D X.val <- matrix(rnorm(n.val*p), ncol=p) > ##D L.val <- X.val %*% Coef > ##D Y.val <- ifelse(runif(n.val)<=plogis(L.val), 1, 0) > ##D # > ##D Penalty <- seq(0,30,by=1) > ##D reps <- length(Penalty) > ##D effective.df <- effective.df2 <- aic <- aic2 <- deviance.val <- > ##D Lpenalty <- single(reps) > ##D n.t <- round(n^.75) > ##D ncv <- c(10,20,30,40) # try various no. of reps in cross-val. > ##D deviance <- matrix(NA,nrow=reps,ncol=length(ncv)) > ##D #If model were complex, could have started things off by getting X, Y > ##D #penalty.matrix from an initial lrm fit to save time > ##D # > ##D for(i in 1:reps) { > ##D pen <- Penalty[i] > ##D cat(format(pen),"") > ##D f.full <- lrm.fit(X, Y, penalty.matrix=pen*pm) > ##D Lpenalty[i] <- pen* t(f.full$coef[-1]) %*% pm %*% f.full$coef[-1] > ##D f.full.nopenalty <- lrm.fit(X, Y, initial=f.full$coef, maxit=1) > ##D info.matrix.unpenalized <- solve(f.full.nopenalty$var) > ##D effective.df[i] <- sum(diag(info.matrix.unpenalized %*% f.full$var)) - 1 > ##D lrchisq <- f.full.nopenalty$stats["Model L.R."] > ##D # lrm does all this penalty adjustment automatically (for var, d.f., > ##D # chi-square) > ##D aic[i] <- lrchisq - 2*effective.df[i] > ##D # > ##D pred <- plogis(f.full$linear.predictors) > ##D score.matrix <- cbind(1,X) * (Y - pred) > ##D sum.u.uprime <- t(score.matrix) %*% score.matrix > ##D effective.df2[i] <- sum(diag(f.full$var %*% sum.u.uprime)) > ##D aic2[i] <- lrchisq - 2*effective.df2[i] > ##D # > ##D #Shao suggested averaging 2*n cross-validations, but let's do only 40 > ##D #and stop along the way to see if fewer is OK > ##D dev <- 0 > ##D for(j in 1:max(ncv)) { > ##D s <- sample(1:n, n.t) > ##D cof <- lrm.fit(X[s,],Y[s], > ##D penalty.matrix=pen*pm)$coef > ##D pred <- cof[1] + (X[-s,] %*% cof[-1]) > ##D dev <- dev -2*sum(Y[-s]*pred + log(1-plogis(pred))) > ##D for(k in 1:length(ncv)) if(j==ncv[k]) deviance[i,k] <- dev/j > ##D } > ##D # > ##D pred.val <- f.full$coef[1] + (X.val %*% f.full$coef[-1]) > ##D prob.val <- plogis(pred.val) > ##D deviance.val[i] <- -2*sum(Y.val*pred.val + log(1-prob.val)) > ##D } > ##D postscript(hor=TRUE) # along with graphics.off() below, allow plots > ##D par(mfrow=c(2,4)) # to be printed as they are finished > ##D plot(Penalty, effective.df, type="l") > ##D lines(Penalty, effective.df2, lty=2) > ##D plot(Penalty, Lpenalty, type="l") > ##D title("Penalty on -2 log L") > ##D plot(Penalty, aic, type="l") > ##D lines(Penalty, aic2, lty=2) > ##D for(k in 1:length(ncv)) { > ##D plot(Penalty, deviance[,k], ylab="deviance") > ##D title(paste(ncv[k],"reps")) > ##D lines(supsmu(Penalty, deviance[,k])) > ##D } > ##D plot(Penalty, deviance.val, type="l") > ##D title("Gold Standard (n=7500)") > ##D title(sub=format(seed),adj=1,cex=.5) > ##D graphics.off() > ##D } > ## End(Not run) > #The results showed that to obtain a clear picture of the penalty- > #accuracy relationship one needs 30 or 40 reps in the cross-validation. > #For 4 of 5 samples, though, the super smoother was able to detect > #an accurate penalty giving the best (lowest) deviance using 10-fold > #cross-validation. Cross-validation would have worked better had > #the same splits been used for all penalties. > #The AIC methods worked just as well and are much quicker to compute. > #The first AIC based on the effective d.f. in Gray's Eq. 2.9 > #(Verweij and Van Houwelingen (1994) Eq. 5 (note typo)) worked best. > > > > graphics::par(get("par.postscript", env = .CheckExEnv)) > cleanEx(); ..nameEx <- "lrm.fit" > > ### * lrm.fit > > flush(stderr()); flush(stdout()) > > ### Name: lrm.fit > ### Title: Logistic Model Fitter > ### Aliases: lrm.fit > ### Keywords: models regression > > ### ** Examples > > #Fit an additive logistic model containing numeric predictors age, > #blood.pressure, and sex, assumed to be already properly coded and > #transformed > # > # fit <- lrm.fit(cbind(age,blood.pressure,sex), death) > > > > cleanEx(); ..nameEx <- "matinv" > > ### * matinv > > flush(stderr()); flush(stdout()) > > ### Name: matinv > ### Title: Total and Partial Matrix Inversion using Gauss-Jordan Sweep > ### Operator > ### Aliases: matinv > ### Keywords: array > > ### ** Examples > > a <- diag(1:3) > a.inv1 <- matinv(a, 1, negate=FALSE) #Invert with respect to a[1,1] > a.inv1 [,1] [,2] [,3] [1,] -1 0 0 [2,] 0 2 0 [3,] 0 0 3 attr(,"rank") [1] 1 attr(,"swept") [1] TRUE FALSE FALSE > a.inv <- -matinv(a.inv1, 2:3, negate=FALSE) #Finish the job > a.inv [,1] [,2] [,3] [1,] 1 0.0 0.00000 [2,] 0 0.5 0.00000 [3,] 0 0.0 0.33333 attr(,"rank") [1] 2 attr(,"swept") [1] TRUE TRUE TRUE > solve(a) [,1] [,2] [,3] [1,] 1 0.0 0.00000 [2,] 0 0.5 0.00000 [3,] 0 0.0 0.33333 > > > > cleanEx(); ..nameEx <- "nomogram" > > ### * nomogram > > flush(stderr()); flush(stdout()) > > ### Name: nomogram > ### Title: Draw a Nomogram > ### Aliases: nomogram nomogram.Design print.nomogram legend.nomabbrev > ### Keywords: models regression hplot > > ### ** Examples > > n <- 1000 # define sample size > set.seed(17) # so can reproduce the results > age <- rnorm(n, 50, 10) > blood.pressure <- rnorm(n, 120, 15) > cholesterol <- rnorm(n, 200, 25) > sex <- factor(sample(c('female','male'), n,TRUE)) > > # Specify population model for log odds that Y=1 > L <- .4*(sex=='male') + .045*(age-50) + + (log(cholesterol - 10)-5.2)*(-2*(sex=='female') + 2*(sex=='male')) > # Simulate binary y to have Prob(y=1) = 1/[1+exp(-L)] > y <- ifelse(runif(n) < plogis(L), 1, 0) > > ddist <- datadist(age, blood.pressure, cholesterol, sex) > options(datadist='ddist') > > f <- lrm(y ~ lsp(age,50)+sex*rcs(cholesterol,4)+blood.pressure) > nomogram(f, fun=function(x)1/(1+exp(-x)), # or fun=plogis + fun.at=c(.001,.01,.05,seq(.1,.9,by=.1),.95,.99,.999), + funlabel="Risk of Death", xfrac=.45) > #Instead of fun.at, could have specified fun.lp.at=logit of > #sequence above - faster and slightly more accurate > nomogram(f, age=seq(10,90,by=10), xfrac=.45) > g <- lrm(y ~ sex + rcs(age,3)*rcs(cholesterol,3)) > nomogram(g, interact=list(age=c(20,40,60)), + conf.int=c(.7,.9,.95), col.conf=c(1,.5,.2)) > > cens <- 15*runif(n) > h <- .02*exp(.04*(age-50)+.8*(sex=='Female')) > d.time <- -log(runif(n))/h > death <- ifelse(d.time <= cens,1,0) > d.time <- pmin(d.time, cens) > > f <- psm(Surv(d.time,death) ~ sex*age, dist=if(.R.)'lognormal' else 'gaussian') > med <- Quantile(f) > surv <- Survival(f) # This would also work if f was from cph > nomogram(f, fun=function(x) med(lp=x), funlabel="Median Survival Time") > nomogram(f, fun=list(function(x) surv(3, x), function(x) surv(6, x)), + funlabel=c("3-Month Survival Probability", + "6-month Survival Probability"), xfrac=.5) > > ## Not run: > ##D nom <- nomogram(fit.with.categorical.predictors, abbrev=TRUE, minlength=1) > ##D nom$x1$points # print points assigned to each level of x1 for its axis > ##D #Add legend for abbreviations for category levels > ##D abb <- nom$abbrev$treatment > ##D legend(locator(1), abb$full, pch=paste(abb$abbrev,collapse=''), > ##D ncol=2, bty='n') # this only works for 1-letter abbreviations > ##D #Or use the legend.nomabbrev function: > ##D legend.nomabbrev(nom, 'treatment', locator(1), ncol=2, bty='n') > ## End(Not run) > > #Make a nomogram with axes predicting probabilities Y>=j for all j=1-3 > #in an ordinal logistic model, where Y=0,1,2,3 > Y <- ifelse(y==0, 0, sample(1:3, length(y), TRUE)) > g <- lrm(Y ~ age+rcs(cholesterol,4)*sex) > fun2 <- function(x) plogis(x-g$coef[1]+g$coef[2]) > fun3 <- function(x) plogis(x-g$coef[1]+g$coef[3]) > f <- Newlabels(g, c(age='Age in Years')) > #see Design.Misc, which also has Newlevels to change > #labels for levels of categorical variables > nomogram(f, fun=list('Prob Y>=1'=plogis, 'Prob Y>=2'=fun2, + 'Prob Y=3'=fun3), + fun.at=c(.01,.05,seq(.1,.9,by=.1),.95,.99), + lmgp=.2, cex.axis=.6) > options(datadist=NULL) > > > > cleanEx(); ..nameEx <- "ols" > > ### * ols > > flush(stderr()); flush(stdout()) > > ### Name: ols > ### Title: Linear Model Estimation Using Ordinary Least Squares > ### Aliases: ols > ### Keywords: models regression > > ### ** Examples > > set.seed(1) > x1 <- runif(200) > x2 <- sample(0:3, 200, TRUE) > distance <- (x1 + x2/3 + rnorm(200))^2 > d <- datadist(x1,x2) > options(datadist="d") # No d -> no summary, plot without giving all details > > f <- ols(sqrt(distance) ~ rcs(x1,4) + scored(x2), x=TRUE) > # could use d <- datadist(f); options(datadist="d") at this point, > # but predictor summaries would not be stored in the fit object for > # use with plot.Design, summary.Design. In that case, the original > # dataset or d would need to be accessed later, or all variable values > # would have to be specified to summary, plot > anova(f) Analysis of Variance Response: sqrt(distance) Factor d.f. Partial SS MS F P x1 3 8.16622 2.722074 4.37 0.0053 Nonlinear 2 0.13824 0.069121 0.11 0.8951 x2 3 18.42329 6.141096 9.85 <.0001 Nonlinear 2 4.60425 2.302124 3.69 0.0266 TOTAL NONLINEAR 4 4.60525 1.151312 1.85 0.1214 REGRESSION 6 30.28234 5.047056 8.10 <.0001 ERROR 193 120.27885 0.623206 > which.influence(f) $Intercept [1] 116 193 $x1 [1] 74 116 193 $x2 [1] 6 74 103 180 > summary(f) Effects Response : sqrt(distance) Factor Low High Diff. Effect S.E. Lower 0.95 Upper 0.95 x1 0.2937 0.74248 0.44879 0.33 0.16 0.01 0.64 x2 - 0:1 2.0000 1.00000 NA -0.06 0.15 -0.35 0.22 x2 - 2:1 2.0000 3.00000 NA 0.67 0.17 0.34 1.00 x2 - 3:1 2.0000 4.00000 NA 0.52 0.16 0.21 0.82 > summary.lm(f) # will only work if penalty and penalty.matrix not used Call: ols(formula = sqrt(distance) ~ rcs(x1, 4) + scored(x2), x = TRUE) Residuals: Min 1Q Median 3Q Max -1.463 -0.588 -0.101 0.524 2.132 Coefficients: Estimate Std. Error t value Pr(>|t|) Intercept 0.5050 0.2418 2.09 0.038 * x1 1.0738 0.9831 1.09 0.276 x1' -0.5649 2.7791 -0.20 0.839 x1'' 1.0240 9.0616 0.11 0.910 x2 0.0637 0.1461 0.44 0.663 x2=2 0.6089 0.2624 2.32 0.021 * x2=3 0.3897 0.3860 1.01 0.314 --- Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 Residual standard error: 0.789 on 193 degrees of freedom Multiple R-Squared: 0.201, Adjusted R-squared: 0.176 F-statistic: 8.1 on 6 and 193 DF, p-value: 8.18e-08 > > # Fit a complex model and approximate it with a simple one > x1 <- runif(200) > x2 <- runif(200) > x3 <- runif(200) > x4 <- runif(200) > y <- x1 + x2 + rnorm(200) > f <- ols(y ~ rcs(x1,4) + x2 + x3 + x4) > pred <- fitted(f) # or predict(f) or f$linear.predictors > f2 <- ols(pred ~ rcs(x1,4) + x2 + x3 + x4, sigma=1) > # sigma=1 prevents numerical problems resulting from R2=1 > fastbw(f2, aics=100000) Deleted Chi-Sq d.f. P Residual d.f. P AIC R2 x4 1.16 1 0.2815 1.16 1 0.2815 -0.84 0.981 x3 3.54 1 0.0599 4.70 2 0.0954 0.70 0.922 x2 14.08 1 0.0002 18.78 3 0.0003 12.78 0.687 x1 41.27 3 0.0000 60.05 6 0.0000 48.05 0.000 Approximate Estimates after Deleting Factors Coef S.E. Wald Z P [1,] 0.9716 0.07071 13.74 0 Factors in Final Model None > # This will find the best 1-variable model, best 2-variable model, etc. > # in predicting the predicted values from the original model > options(datadist=NULL) > > > > cleanEx(); ..nameEx <- "pentrace" > > ### * pentrace > > flush(stderr()); flush(stdout()) > > ### Name: pentrace > ### Title: Trace AIC and BIC vs. Penalty > ### Aliases: pentrace plot.pentrace print.pentrace print.pentrace > ### effective.df > ### Keywords: models regression > > ### ** Examples > > n <- 1000 # define sample size > set.seed(17) # so can reproduce the results > age <- rnorm(n, 50, 10) > blood.pressure <- rnorm(n, 120, 15) > cholesterol <- rnorm(n, 200, 25) > sex <- factor(sample(c('female','male'), n,TRUE)) > # Specify population model for log odds that Y=1 > L <- .4*(sex=='male') + .045*(age-50) + + (log(cholesterol - 10)-5.2)*(-2*(sex=='female') + 2*(sex=='male')) > # Simulate binary y to have Prob(y=1) = 1/[1+exp(-L)] > y <- ifelse(runif(n) < plogis(L), 1, 0) > > f <- lrm(y ~ blood.pressure + sex * (age + rcs(cholesterol,4)), + x=TRUE, y=TRUE) > p <- pentrace(f, seq(.2,1,by=.05)) > plot(p) > p$diag # may learn something about fractional effective d.f. [1] 1 > # for each original parameter > pentrace(f, list(simple=c(0,.2,.4), nonlinear=c(0,.2,.4,.8,1))) Best penalty: simple nonlinear df 0 0 10 simple nonlinear df aic bic aic.c 1 0.0 0.0 10.0000 49.389 0.31126 49.166 4 0.0 0.2 9.2803 48.315 2.76959 48.122 5 0.2 0.2 9.0625 47.286 2.81000 47.102 7 0.0 0.4 8.9605 47.187 3.21069 47.006 8 0.2 0.4 8.8176 46.673 3.39800 46.498 9 0.4 0.4 8.7354 46.355 3.48327 46.183 10 0.0 0.8 8.5771 46.212 4.11762 46.046 11 0.2 0.8 8.4834 46.092 4.45760 45.930 12 0.4 0.8 8.4204 46.009 4.68377 45.849 13 0.0 1.0 8.4391 45.991 4.57354 45.830 14 0.2 1.0 8.3568 45.949 4.93563 45.791 15 0.4 1.0 8.2989 45.918 5.18894 45.762 > > # Bootstrap pentrace 5 times, making a plot of corrected AIC plot with 5 reps > n <- nrow(f$x) > plot(pentrace(f, seq(.2,1,by=.05)), which='aic.c', + col=1, ylim=c(30,120)) #original in black > for(j in 1:5) + plot(pentrace(f, seq(.2,1,by=.05), subset=sample(n,n,TRUE)), + which='aic.c', col=j+1, add=TRUE) > > # Find penalty giving optimum corrected AIC. Initial guess is 1.0 > if(!.R.) pentrace(f, 1, method='optimize') > > # Find penalty reducing total regression d.f. effectively to 5 > if(!.R.) pentrace(f, 1, target.df=5) > > # Re-fit with penalty giving best aic.c without differential penalization > f <- update(f, penalty=p$penalty) > effective.df(f) Original and Effective Degrees of Freedom Original Penalized All 10 10 Simple Terms 4 4 Interaction or Nonlinear 6 6 Nonlinear 4 4 Interaction 4 4 Nonlinear Interaction 2 2 > > > > cleanEx(); ..nameEx <- "plot.Design" > > ### * plot.Design > > flush(stderr()); flush(stdout()) > > ### Name: plot.Design > ### Title: Plot Effects of Variables > ### Aliases: plot.Design print.plot.Design perimeter lines.perimeter Legend > ### Legend.default Legend.plot.Design datadensity datadensity.plot.Design > ### Keywords: models hplot htest > > ### ** Examples > > n <- 1000 # define sample size > set.seed(17) # so can reproduce the results > age <- rnorm(n, 50, 10) > blood.pressure <- rnorm(n, 120, 15) > cholesterol <- rnorm(n, 200, 25) > sex <- factor(sample(c('female','male'), n,TRUE)) > label(age) <- 'Age' # label is in Hmisc > label(cholesterol) <- 'Total Cholesterol' > label(blood.pressure) <- 'Systolic Blood Pressure' > label(sex) <- 'Sex' > units(cholesterol) <- 'mg/dl' # uses units.default in Hmisc > units(blood.pressure) <- 'mmHg' > > # Specify population model for log odds that Y=1 > L <- .4*(sex=='male') + .045*(age-50) + + (log(cholesterol - 10)-5.2)*(-2*(sex=='female') + 2*(sex=='male')) > # Simulate binary y to have Prob(y=1) = 1/[1+exp(-L)] > y <- ifelse(runif(n) < plogis(L), 1, 0) > > ddist <- datadist(age, blood.pressure, cholesterol, sex) > options(datadist='ddist') > > fit <- lrm(y ~ blood.pressure + sex * (age + rcs(cholesterol,4)), + x=TRUE, y=TRUE) > > par(mfrow=c(2,2)) > plot(fit) # Plot effects of all 4 predictors > par(mfrow=c(1,2)) > plot(fit, name=c('age','cholesterol')) # Make 2 plots > par(mfrow=c(1,1)) > plot(fit, age=seq(20,80,length=100), sex=NA, conf.int=FALSE) > # Plot relationship between age and log > # odds, separate curve for each sex, > # no C.I. > z <- plot(fit, age=NA, sex=NA, label.curves=FALSE) > # use label.curves=list(keys=c('a','b'))' > # to use 1-letter abbreviations > datadensity(z, age, sex) # rug plots (1-dimensional scatterplots) > # on each treatment curve, with treatment- > # specific density of age > plot(fit, age=seq(20,80,length=100), sex='male') # works if datadist not used > plot(fit, age=NA, cholesterol=NA)# 3-dimensional perspective plot for age, > # cholesterol, and log odds using default > # ranges for both variables > boundaries <- perimeter(age, cholesterol, lowess=TRUE) > plot(age, cholesterol) # show bivariate data density > lines(boundaries) # and perimeter that will be used for 3-D plot > z <- plot(fit, age=NA, cholesterol=NA, perim=boundaries, method='image') > # draws image() plot > # don't show estimates where data are sparse > # doesn't make sense here since vars don't interact > if(!.R.)Legend(z, fun=plogis, at=qlogis(c(.01,.05,.1,.2,.3,.4,.5)), + zlab='Probability') # gray scale or color legend for prob. > plot(fit, age=NA, fun=function(x) 1/(1+exp(-x)) , # or fun=plogis + ylab="Prob", conf.int=.9) # Plot estimated probabilities instead of > # log odds > > # Plot the age effect as an odds ratio > # comparing the age shown on the x-axis to age=30 years > > ddist$limits$age[2] <- 30 # make 30 the reference value for age > # Could also do: ddist$limits["Adjust to","age"] <- 30 > fit <- update(fit) # make new reference value take effect > plot(fit, age=NA, ref.zero=TRUE, fun=exp, ylab='Age=x:Age=30 Odds Ratio') > abline(h=1, lty=2, col=2); abline(v=30, lty=2, col=2) > > # Make two curves, and plot the predicted curves as two trellis panels > w <- plot(fit, age=NA, sex=NA) # Would be nice if a pl=FALSE option was avail. > z <- data.frame(w$x.xbeta) # Makes variable names legal > if(.R.) library(lattice) > xyplot(log.odds ~ age | sex, data=z, type='l') > # To add confidence bands we need to use the Hmisc xYplot function in > # place of xyplot > xYplot(Cbind(log.odds,lower,upper) ~ age | sex, data=z, + method='bands', type='l') Loading required package: grid > # If non-displayed variables were in the model, add a subtitle to show > # their settings using title(sub=paste('Adjusted to',w$adjust),adj=0) > # See predict.Design for an example using predict and xYplot without plot() > > > > # Plots for a parametric survival model > n <- 1000 > set.seed(731) > age <- 50 + 12*rnorm(n) > label(age) <- "Age" > sex <- factor(sample(c('Male','Female'), n, + rep=TRUE, prob=c(.6, .4))) > cens <- 15*runif(n) > h <- .02*exp(.04*(age-50)+.8*(sex=='Female')) > t <- -log(runif(n))/h > label(t) <- 'Follow-up Time' > e <- ifelse(t<=cens,1,0) > t <- pmin(t, cens) > units(t) <- "Year" > ddist <- datadist(age, sex) > Srv <- Surv(t,e) > > # Fit log-normal survival model and plot median survival time vs. age > f <- psm(Surv(t, e) ~ rcs(age), dist=if(.R.)'lognormal' else 'gaussian') > med <- Quantile(f) # Creates function to compute quantiles > # (median by default) > plot(f, age=NA, fun=function(x)med(lp=x), ylab="Median Survival Time") > # Note: This works because med() expects the linear predictor (X*beta) > # as an argument. Would not work if use > # plot(..., ref.zero=TRUE or adj.zero=TRUE) > # Also, confidence intervals from this method are approximate since > # they don't take into account estimation of scale parameter > > # Fit an ols model to log(y) and plot the relationship between x1 > # and the predicted mean(y) on the original scale without assuming > # normality of residuals; use the smearing estimator > set.seed(1) > x1 <- runif(300) > x2 <- runif(300) > ddist <- datadist(x1,x2) > y <- exp(x1+x2-1+rnorm(300)) > f <- ols(log(y) ~ pol(x1,2)+x2) > r <- resid(f) > smean <- function(yhat)smearingEst(yhat, exp, res, statistic='mean') > formals(smean) <- list(yhat=numeric(0), res=r[!is.na(r)]) > #smean$res <- r[!is.na(r)] # define default res argument to function > plot(f, x1=NA, fun=smean, ylab='Predicted Mean on y-scale') > > options(datadist=NULL) > > ## Not run: > ##D # Example in which separate curves are shown for 4 income values > ##D # For each curve the estimated percentage of voters voting for > ##D # the democratic party is plotted against the percent of voters > ##D # who graduated from college. scat1d is used to indicate > ##D # the income-interval-specific data density for college. For > ##D # this purpose show the distribution of percent in college for > ##D # those having an income level within +/- the half-width of > ##D # the income interval. scat1d shows the rug plot superimposed > ##D # on the estimated curve. Data are county-level percents. > ##D # This can't be done automatically using datadensity on the object > ##D # returned by plot.Design, as the variable representing different > ##D # curves (income) is a continuous variable. > ##D > ##D incomes <- seq(22900, 32800, length=4) > ##D # equally spaced to outer quintiles > ##D pl <- plot(f, college=NA, income=incomes, > ##D conf.int=FALSE, xlim=c(0,35), ylim=c(30,55), > ##D lty=1, lwd=c(.25,1.5,3.5,6), col=c(1,1,2,2)) > ##D graph.points <- pl$x.xbeta > ##D for(i in 1:4) { > ##D college.in.income.group <- college[abs(income-incomes[i]) < 1650] > ##D this.income <- graph.points[,'income']==incomes[i] > ##D scat1d(college.in.income.group, > ##D curve=list(x=graph.points[this.income,'college'], > ##D y=graph.points[this.income,'democrat'])) > ##D } > ##D > ##D # Instead of showing a rug plot on each curve, erase end portions > ##D # of each curve where there are fewer than 10 counties having > ##D # % college graduates to the left of the x-coordinate being plotted, > ##D # for the subset of counties having median family income with 1650 > ##D # of the target income for the curve > ##D > ##D show.pts <- function(college.pts, income.pt) { > ##D s <- abs(income - income.pt) < 1650 #assumes income known to top frame > ##D x <- college[s] > ##D x <- sort(x[!is.na(x)]) > ##D n <- length(x) > ##D low <- x[10]; high <- x[n-9] > ##D college.pts >= low & college.pts <= high > ##D } > ##D > ##D plot(f, college=NA, income=incomes, > ##D conf.int=FALSE, xlim=c(0,35), ylim=c(30,55), > ##D lty=1, lwd=c(.25,1.5,3.5,6), col=c(1,1,2,2), > ##D perim=show.pts) > ## End(Not run) > > > > graphics::par(get("par.postscript", env = .CheckExEnv)) > cleanEx(); ..nameEx <- "plot.xmean.ordinaly" > > ### * plot.xmean.ordinaly > > flush(stderr()); flush(stdout()) > > ### Name: plot.xmean.ordinaly > ### Title: Plot Mean X vs. Ordinal Y > ### Aliases: plot.xmean.ordinaly > ### Keywords: category models regression hplot > > ### ** Examples > > # Simulate data from a population proportional odds model > set.seed(1) > n <- 400 > age <- rnorm(n, 50, 10) > blood.pressure <- rnorm(n, 120, 15) > L <- .2*(age-50) + .1*(blood.pressure-120) > p12 <- plogis(L) # Pr(Y>=1) > p2 <- plogis(L-1) # Pr(Y=2) > p <- cbind(1-p12, p12-p2, p2) # individual class probabilites > # Cumulative probabilities: > cp <- matrix(cumsum(t(p)) - rep(0:(n-1), rep(3,n)), byrow=TRUE, ncol=3) > y <- (cp < runif(n)) %*% rep(1,3) > # Thanks to Dave Krantz for this trick > > par(mfrow=c(1,2)) > plot.xmean.ordinaly(y ~ age + blood.pressure, cr=TRUE) > par(mfrow=c(1,1)) > > > > graphics::par(get("par.postscript", env = .CheckExEnv)) > cleanEx(); ..nameEx <- "pphsm" > > ### * pphsm > > flush(stderr()); flush(stdout()) > > ### Name: pphsm > ### Title: Parametric Proportional Hazards form of AFT Models > ### Aliases: pphsm print.pphsm > ### Keywords: models survival regression > > ### ** Examples > > set.seed(1) > S <- Surv(runif(100)) > x <- runif(100) > dd <- datadist(x); options(datadist='dd') > f <- psm(S ~ x, dist="exponential") > summary(f) # effects on log(T) scale Effects Response : S Factor Low High Diff. Effect S.E. Lower 0.95 Upper 0.95 x 0.2844 0.72993 0.44553 0.01 0.15 -0.29 0.32 Survival Time Ratio 0.2844 0.72993 0.44553 1.01 NA 0.75 1.37 > f.ph <- pphsm(f) Warning in pphsm(f) : at present, pphsm does not return the correct covariance matrix > summary(f.ph) # effects on hazard ratio scale Effects Response : S Factor Low High Diff. Effect S.E. Lower 0.95 Upper 0.95 x 0.2844 0.72993 0.44553 -0.01 0.15 -0.32 0.29 Hazard Ratio 0.2844 0.72993 0.44553 0.99 NA 0.73 1.34 > options(datadist=NULL) > > > > cleanEx(); ..nameEx <- "predab.resample" > > ### * predab.resample > > flush(stderr()); flush(stdout()) > > ### Name: predab.resample > ### Title: Predictive Ability using Resampling > ### Aliases: predab.resample > ### Keywords: models > > ### ** Examples > > # See the code for validate.ols for an example of the use of > # predab.resample > > > > cleanEx(); ..nameEx <- "predict.Design" > > ### * predict.Design > > flush(stderr()); flush(stdout()) > > ### Name: predict.Design > ### Title: Predicted Values from Model Fit > ### Aliases: predict.Design predictDesign predict.bj predict.cph > ### predict.glmD predict.glsD predict.ols predict.psm > ### Keywords: models regression > > ### ** Examples > > n <- 1000 # define sample size > set.seed(17) # so can reproduce the results > age <- rnorm(n, 50, 10) > blood.pressure <- rnorm(n, 120, 15) > cholesterol <- rnorm(n, 200, 25) > sex <- factor(sample(c('female','male'), n,TRUE)) > treat <- factor(sample(c('a','b','c'), n,TRUE)) > > # Specify population model for log odds that Y=1 > L <- .4*(sex=='male') + .045*(age-50) + + (log(cholesterol - 10)-5.2)*(-2*(sex=='female') + 2*(sex=='male')) + + .3*sqrt(blood.pressure-60)-2.3 + 1*(treat=='b') > # Simulate binary y to have Prob(y=1) = 1/[1+exp(-L)] > y <- ifelse(runif(n) < plogis(L), 1, 0) > > ddist <- datadist(age, blood.pressure, cholesterol, sex, treat) > options(datadist='ddist') > > fit <- lrm(y ~ rcs(blood.pressure,4) + + sex * (age + rcs(cholesterol,4)) + sex*treat*age) > > # Use xYplot to display predictions in 9 panels, with error bars, > # with superposition of two treatments > > dat <- expand.grid(treat=levels(treat),sex=levels(sex), + age=c(20,40,60),blood.pressure=120, + cholesterol=seq(100,300,length=10)) > # Add variables linear.predictors and se.fit to dat > dat <- cbind(dat, predict(fit, dat, se.fit=TRUE)) > # xYplot in Hmisc extends xyplot to allow error bars > xYplot(Cbind(linear.predictors,linear.predictors-1.96*se.fit, + linear.predictors+1.96*se.fit) ~ cholesterol | sex*age, + groups=treat, data=dat, type='b') Loading required package: grid Loading required package: lattice > > > > # Since blood.pressure doesn't interact with anything, we can quickly and > # interactively try various transformations of blood.pressure, taking > # the fitted spline function as the gold standard. We are seeking a > # linearizing transformation even though this may lead to falsely > # narrow confidence intervals if we use this data-dredging-based transformation > > bp <- 70:160 > logit <- predict(fit, expand.grid(treat="a", sex='male', age=median(age), + cholesterol=median(cholesterol), + blood.pressure=bp), type="terms")[,"blood.pressure"] > #Note: if age interacted with anything, this would be the age > # "main effect" ignoring interaction terms > #Could also use > # logit <- plot(f, age=ag, ...)$x.xbeta[,2] > #which allows evaluation of the shape for any level of interacting > #factors. When age does not interact with anything, the result from > #predict(f, ..., type="terms") would equal the result from > #plot if all other terms were ignored > > plot(bp^.5, logit) # try square root vs. spline transform. > plot(bp^1.5, logit) # try 1.5 power > plot(sqrt(bp-60), logit) > > #Some approaches to making a plot showing how predicted values > #vary with a continuous predictor on the x-axis, with two other > #predictors varying > > combos <- gendata(fit, age=seq(10,100,by=10), cholesterol=c(170,200,230), + blood.pressure=c(80,120,160)) > #treat, sex not specified -> set to mode > #can also used expand.grid > > combos$pred <- predict(fit, combos) > xyplot(pred ~ age | cholesterol*blood.pressure, data=combos, type='l') > xYplot(pred ~ age | cholesterol, groups=blood.pressure, data=combos, type='l') > Key() # Key created by xYplot > xYplot(pred ~ age, groups=interaction(cholesterol,blood.pressure), + data=combos, type='l', lty=1:9) > Key() > > #Add upper and lower 0.95 confidence limits for individuals > combos <- cbind(combos, predict(fit, combos, conf.int=.95)) > xYplot(Cbind(linear.predictors, lower, upper) ~ age | cholesterol, + groups=blood.pressure, data=combos, type='b') > Key() > > # Plot effects of treatments (all pairwise comparisons) vs. > # levels of interacting factors (age, sex) > > d <- gendata(fit, treat=levels(treat), sex=levels(sex), age=seq(30,80,by=10)) > x <- predict(fit, d, type="x") > betas <- fit$coef > cov <- fit$var > > i <- d$treat=="a"; xa <- x[i,]; Sex <- d$sex[i]; Age <- d$age[i] > i <- d$treat=="b"; xb <- x[i,] > i <- d$treat=="c"; xc <- x[i,] > > doit <- function(xd, lab) { + xb <- xd%*%betas + se <- apply((xd %*% cov) * xd, 1, sum)^.5 + q <- qnorm(1-.01/2) # 0.99 confidence limits + lower <- xb - q * se; upper <- xb + q * se + #Get odds ratios instead of linear effects + xb <- exp(xb); lower <- exp(lower); upper <- exp(upper) + #First elements of these agree with + #summary(fit, age=30, sex='female',conf.int=.99)) + for(sx in levels(Sex)) { + j <- Sex==sx + errbar(Age[j], xb[j], upper[j], lower[j], xlab="Age", + ylab=paste(lab,"Odds Ratio"), ylim=c(.1,20), log='y') + title(paste("Sex:",sx)) + abline(h=1, lty=2) + } + } > > par(mfrow=c(3,2), oma=c(3,0,3,0)) > doit(xb - xa, "b:a") > doit(xc - xa, "c:a") > doit(xb - xa, "c:b") > > # NOTE: This is much easier to do using contrast.Design > > ## Not run: > ##D #A variable state.code has levels "1", "5","13" > ##D #Get predictions with or without converting variable in newdata to factor > ##D predict(fit, data.frame(state.code=c(5,13))) > ##D predict(fit, data.frame(state.code=factor(c(5,13)))) > ##D > ##D #Use gendata function (gendata.Design) for interactive specification of > ##D #predictor variable settings (for 10 observations) > ##D df <- gendata(fit, nobs=10, viewvals=TRUE) > ##D df$predicted <- predict(fit, df) # add variable to data frame > ##D df > ##D > ##D df <- gendata(fit, age=c(10,20,30)) # leave other variables at ref. vals. > ##D predict(fit, df, type="fitted") > ##D > ##D # See reShape (in Hmisc) for an example where predictions corresponding to > ##D # values of one of the varying predictors are reformatted into multiple > ##D # columns of a matrix > ## End(Not run) > options(datadist=NULL) > > > > graphics::par(get("par.postscript", env = .CheckExEnv)) > cleanEx(); ..nameEx <- "predict.lrm" > > ### * predict.lrm > > flush(stderr()); flush(stdout()) > > ### Name: predict.lrm > ### Title: Predicted Values for Binary and Ordinal Logistic Models > ### Aliases: predict.lrm > ### Keywords: models regression > > ### ** Examples > > # See help for predict.Design for several binary logistic > # regression examples > > # Examples of predictions from ordinal models > set.seed(1) > y <- factor(sample(1:3, 400, TRUE), 1:3, c('good','better','best')) > x1 <- runif(400) > x2 <- runif(400) > f <- lrm(y ~ rcs(x1,4)*x2) > predict(f, type="fitted.ind")[1:10,] #gets Prob(better) and all others y=good y=better y=best 1 0.31247 0.36315 0.32438 2 0.36761 0.35947 0.27292 3 0.21983 0.34374 0.43643 4 0.30635 0.36297 0.33069 5 0.51713 0.31361 0.16926 6 0.30501 0.36291 0.33208 7 0.35325 0.36129 0.28546 8 0.29339 0.36212 0.34449 9 0.30686 0.36299 0.33015 10 0.62147 0.26122 0.11731 > d <- data.frame(x1=.5,x2=.5) > predict(f, d, type="fitted") # Prob(Y>=j) for new observation y>=better y>=best 0.69066 0.32758 > predict(f, d, type="fitted.ind") # Prob(Y=j) y=good y=better y=best 0.30934 0.36307 0.32758 > predict(f, d, type='mean', codes=TRUE) # predicts mean(y) using codes 1,2,3 1 2.0182 > > > > cleanEx(); ..nameEx <- "psm" > > ### * psm > > flush(stderr()); flush(stdout()) > > ### Name: psm > ### Title: Parametric Survival Model > ### Aliases: psm print.psm Hazard Survival Hazard.psm Mean.psm Quantile.psm > ### Survival.psm residuals.psm lines.residuals.psm.censored.normalized > ### survplot.residuals.psm.censored.normalized > ### Keywords: models survival > > ### ** Examples > > n <- 400 > set.seed(1) > age <- rnorm(n, 50, 12) > sex <- factor(sample(c('Female','Male'),n,TRUE)) > dd <- datadist(age,sex) > options(datadist='dd') > # Population hazard function: > h <- .02*exp(.06*(age-50)+.8*(sex=='Female')) > d.time <- -log(runif(n))/h > cens <- 15*runif(n) > death <- ifelse(d.time <= cens,1,0) > d.time <- pmin(d.time, cens) > > f <- psm(Surv(d.time,death) ~ sex*pol(age,2), + dist=if(.R.)'lognormal' else 'gaussian') > # Log-normal model is a bad fit for proportional hazards data > > anova(f) Wald Statistics Response: Surv(d.time, death) Factor Chi-Square d.f. P sex (Factor+Higher Order Factors) 28.12 3 <.0001 All Interactions 5.44 2 0.0660 age (Factor+Higher Order Factors) 24.49 4 0.0001 All Interactions 5.44 2 0.0660 Nonlinear (Factor+Higher Order Factors) 1.82 2 0.4029 sex * age (Factor+Higher Order Factors) 5.44 2 0.0660 Nonlinear 1.25 1 0.2640 Nonlinear Interaction : f(A,B) vs. AB 1.25 1 0.2640 TOTAL NONLINEAR 1.82 2 0.4029 TOTAL NONLINEAR + INTERACTION 5.63 3 0.1311 TOTAL 47.62 5 <.0001 > fastbw(f) # if deletes sex while keeping age*sex ignore the result Deleted Chi-Sq d.f. P Residual d.f. P AIC sex 1.88 1 0.1706 1.88 1 0.1706 -0.12 Approximate Estimates after Deleting Factors Coef S.E. Wald Z P (Intercept) 6.8467444 1.9553675 3.5015 0.0004626 age -0.1010926 0.0727404 -1.3898 0.1645981 age^2 0.0003765 0.0006781 0.5552 0.5787508 sex=Male * age 0.0093736 0.0258809 0.3622 0.7172149 sex=Male * age^2 0.0002656 0.0004448 0.5970 0.5504872 Factors in Final Model [1] age sex * age > f <- update(f, x=TRUE,y=TRUE) # so can validate, compute certain resids > validate(f, dxy=TRUE, B=10) # ordinarily use B=150 or more Iteration: 1 2 3 4 5 6 7 8 9 10 index.orig training test optimism index.corrected n Dxy 0.4628235 0.4581348 0.451455 0.0066802 0.456143 10 R2 0.1541043 0.1583258 0.145177 0.0131493 0.140955 10 Intercept 0.0000000 0.0000000 0.201796 -0.2017962 0.201796 10 Slope 1.0000000 1.0000000 0.929452 0.0705477 0.929452 10 D 0.0721576 0.0748675 0.067615 0.0072522 0.064905 10 U -0.0026054 -0.0026142 0.012756 -0.0153703 0.012765 10 Q 0.0747630 0.0774816 0.054859 0.0226225 0.052140 10 > plot(f, age=NA, sex=NA) # needs datadist since no explicit age, hosp. > survplot(f, age=c(20,60)) # needs datadist since hospital not set here > # latex(f) > > S <- Survival(f) > plot(f$linear.predictors, S(6, f$linear.predictors), + xlab=if(.R.)expression(X*hat(beta)) else 'X*Beta', + ylab=if(.R.)expression(S(6,X*hat(beta))) else 'S(6|X*Beta)') > # plots 6-month survival as a function of linear predictor (X*Beta hat) > > times <- seq(0,24,by=.25) > plot(times, S(times,0), type='l') # plots survival curve at X*Beta hat=0 > lam <- Hazard(f) > plot(times, lam(times,0), type='l') # similarly for hazard function > > med <- Quantile(f) # new function defaults to computing median only > lp <- seq(-3, 5, by=.1) > plot(lp, med(lp=lp), ylab="Median Survival Time") > med(c(.25,.5), f$linear.predictors) 0.25 0.50 [1,] 20.1888 61.0487 [2,] 4.1789 12.6365 [3,] 20.6695 62.5022 [4,] 10.5559 31.9199 [5,] 3.7222 11.2556 [6,] 20.6387 62.4092 [7,] 3.3087 10.0051 [8,] 14.7774 44.6853 [9,] 3.1082 9.3990 [10,] 6.4664 19.5536 [11,] 1.8629 5.6331 [12,] 3.5552 10.7505 [13,] 8.9207 26.9752 [14,] 20.5356 62.0974 [15,] 2.2266 6.7329 [16,] 18.2796 55.2755 [17,] 18.1665 54.9336 [18,] 13.7674 41.6310 [19,] 2.6466 8.0031 [20,] 15.4735 46.7900 [21,] 2.4956 7.5463 [22,] 14.5637 44.0389 [23,] 17.7999 53.8248 [24,] 20.9556 63.3675 [25,] 15.3496 46.4156 [26,] 5.1281 15.5069 [27,] 5.6132 16.9738 [28,] 24.7472 74.8328 [29,] 7.6809 23.2263 [30,] 16.2980 49.2833 [31,] 11.7095 35.4081 [32,] 18.5025 55.9495 [33,] 16.4366 49.7025 [34,] 18.3142 55.3801 [35,] 21.8696 66.1312 [36,] 19.5853 59.2238 [37,] 7.0570 21.3397 [38,] 18.3356 55.4448 [39,] 12.9919 39.2861 [40,] 14.6563 44.3191 [41,] 18.7331 56.6469 [42,] 6.1510 18.5999 [43,] 2.8643 8.6613 [44,] 3.1499 9.5250 [45,] 9.5946 29.0131 [46,] 9.7930 29.6130 [47,] 3.6237 10.9576 [48,] 14.6302 44.2400 [49,] 18.5387 56.0590 [50,] 2.5521 7.7173 [51,] 16.3889 49.5583 [52,] 20.1513 60.9352 [53,] 3.6892 11.1556 [54,] 21.1322 63.9015 [55,] 1.9244 5.8193 [56,] 1.5981 4.8326 [57,] 6.8699 20.7738 [58,] 21.0244 63.5756 [59,] 3.1214 9.4386 [60,] 5.5072 16.6532 [61,] 1.4766 4.4650 [62,] 5.0518 15.2760 [63,] 15.0132 45.3982 [64,] 4.7630 14.4027 [65,] 10.1864 30.8025 [66,] 4.1614 12.5836 [67,] 39.3321 118.9359 [68,] 11.1850 33.8222 [69,] 17.4707 52.8296 [70,] 1.5309 4.6292 [71,] 3.3373 10.0916 [72,] 20.3951 61.6725 [73,] 15.3931 46.5472 [74,] 12.6526 38.2602 [75,] 18.6612 56.4293 [76,] 3.8339 11.5933 [77,] 7.4132 22.4166 [78,] 4.8756 14.7433 [79,] 17.8008 53.8276 [80,] 20.0916 60.7549 [81,] 8.4372 25.5132 [82,] 18.6244 56.3182 [83,] 12.6038 38.1126 [84,] 21.3382 64.5245 [85,] 3.0695 9.2818 [86,] 16.6845 50.4521 [87,] 13.1755 39.8412 [88,] 19.2255 58.1359 [89,] 16.5170 49.9455 [90,] 3.9080 11.8173 [91,] 8.2092 24.8238 [92,] 2.1346 6.4548 [93,] 2.1862 6.6108 [94,] 2.8582 8.6429 [95,] 1.8093 5.4712 [96,] 3.1459 9.5128 [97,] 21.2659 64.3057 [98,] 8.4781 25.6370 [99,] 17.9903 54.4007 [100,] 7.6437 23.1137 [101,] 20.1730 61.0010 [102,] 17.9326 54.2262 [103,] 12.3163 37.2432 [104,] 4.2676 12.9048 [105,] 9.2458 27.9581 [106,] 1.6990 5.1375 [107,] 2.8276 8.5505 [108,] 2.5085 7.5854 [109,] 3.5703 10.7963 [110,] 1.7479 5.2856 [111,] 20.2127 61.1209 [112,] 19.7281 59.6555 [113,] 11.3478 34.3143 [114,] 9.2071 27.8412 [115,] 5.8892 17.8084 [116,] 7.0466 21.3081 [117,] 6.5588 19.8331 [118,] 19.1403 57.8780 [119,] 15.9444 48.2140 [120,] 18.7800 56.7886 [121,] 7.9036 23.8996 [122,] 11.7866 35.6414 [123,] 18.9144 57.1950 [124,] 5.7381 17.3514 [125,] 18.4926 55.9196 [126,] 14.9021 45.0623 [127,] 18.3907 55.6115 [128,] 5.0446 15.2542 [129,] 20.3273 61.4676 [130,] 6.5862 19.9159 [131,] 17.8590 54.0037 [132,] 20.0900 60.7498 [133,] 3.2062 9.6951 [134,] 26.3805 79.7716 [135,] 16.8027 50.8096 [136,] 21.3367 64.5200 [137,] 19.2147 58.1032 [138,] 8.0884 24.4585 [139,] 20.2542 61.2463 [140,] 5.1317 15.5176 [141,] 46.1227 139.4698 [142,] 12.6113 38.1352 [143,] 32.2564 97.5397 [144,] 7.5671 22.8821 [145,] 15.7219 47.5413 [146,] 10.2719 31.0610 [147,] 8.2865 25.0574 [148,] 18.0325 54.5284 [149,] 21.2724 64.3253 [150,] 21.3055 64.4255 [151,] 3.3995 10.2796 [152,] 4.9604 14.9996 [153,] 19.2722 58.2769 [154,] 20.8448 63.0325 [155,] 21.3396 64.5287 [156,] 14.9619 45.2431 [157,] 13.4888 40.7886 [158,] 8.9209 26.9760 [159,] 22.0810 66.7705 [160,] 1.6470 4.9804 [161,] 3.4629 10.4714 [162,] 6.0655 18.3415 [163,] 2.3067 6.9751 [164,] 14.0512 42.4893 [165,] 8.9017 26.9177 [166,] 1.5213 4.6001 [167,] 6.1608 18.6295 [168,] 21.3322 64.5063 [169,] 18.6588 56.4221 [170,] 17.2380 52.1259 [171,] 7.3474 22.2178 [172,] 17.6704 53.4333 [173,] 3.3826 10.2285 [174,] 18.4045 55.6532 [175,] 6.6491 20.1061 [176,] 5.0316 15.2150 [177,] 2.7025 8.1720 [178,] 8.3388 25.2155 [179,] 13.3529 40.3777 [180,] 12.4557 37.6647 [181,] 18.1428 54.8619 [182,] 2.4040 7.2694 [183,] 17.1843 51.9635 [184,] 24.6319 74.4841 [185,] 3.2301 9.7673 [186,] 5.6286 17.0202 [187,] 11.1897 33.8365 [188,] 10.4475 31.5922 [189,] 19.6324 59.3663 [190,] 20.8392 63.0154 [191,] 5.7251 17.3119 [192,] 3.5229 10.6528 [193,] 20.4458 61.8260 [194,] 2.6318 7.9582 [195,] 17.6211 53.2843 [196,] 21.0298 63.5917 [197,] 11.3043 34.1829 [198,] 13.9327 42.1309 [199,] 3.4968 10.5739 [200,] 6.9648 21.0609 [201,] 16.3372 49.4018 [202,] 10.1085 30.5670 [203,] 1.8095 5.4717 [204,] 19.3149 58.4061 [205,] 81.3774 246.0763 [206,] 1.4615 4.4194 [207,] 15.1227 45.7293 [208,] 15.7229 47.5442 [209,] 18.1555 54.9002 [210,] 15.8698 47.9885 [211,] 18.7326 56.6452 [212,] 16.2853 49.2450 [213,] 7.0991 21.4669 [214,] 21.3159 64.4568 [215,] 2.3986 7.2532 [216,] 10.9211 33.0241 [217,] 19.2409 58.1823 [218,] 18.6530 56.4047 [219,] 2.9703 8.9819 [220,] 5.0763 15.3502 [221,] 35.5037 107.3592 [222,] 18.0937 54.7133 [223,] 9.0076 27.2379 [224,] 19.3481 58.5066 [225,] 16.5276 49.9776 [226,] 9.5711 28.9420 [227,] 19.3156 58.4084 [228,] 29.7055 89.8262 [229,] 4.1331 12.4979 [230,] 16.9951 51.3913 [231,] 20.9381 63.3145 [232,] 224.8404 679.8924 [233,] 9.1065 27.5369 [234,] 15.5847 47.1265 [235,] 18.3372 55.4496 [236,] 5.3254 16.1036 [237,] 3.1408 9.4973 [238,] 21.1920 64.0823 [239,] 13.0081 39.3349 [240,] 4.9032 14.8267 [241,] 2.8450 8.6029 [242,] 2.3376 7.0686 [243,] 17.1689 51.9168 [244,] 20.7532 62.7555 [245,] 2.1833 6.6021 [246,] 52.3965 158.4413 [247,] 19.9692 60.3846 [248,] 6.1645 18.6409 [249,] 5.6670 17.1365 [250,] 13.3873 40.4818 [251,] 4.3453 13.1398 [252,] 16.3474 49.4328 [253,] 5.1905 15.6955 [254,] 19.0313 57.5486 [255,] 14.9851 45.3131 [256,] 2.2021 6.6590 [257,] 20.0751 60.7050 [258,] 3.1148 9.4189 [259,] 16.4956 49.8808 [260,] 7.2795 22.0124 [261,] 2.4495 7.4071 [262,] 19.5042 58.9786 [263,] 19.1581 57.9320 [264,] 2.5888 7.8281 [265,] 9.9629 30.1266 [266,] 16.9648 51.2996 [267,] 7.2569 21.9442 [268,] 17.2086 52.0368 [269,] 19.3153 58.4074 [270,] 12.7375 38.5169 [271,] 6.1838 18.6990 [272,] 3.5431 10.7139 [273,] 20.7016 62.5995 [274,] 1.4465 4.3740 [275,] 17.4590 52.7941 [276,] 12.8418 38.8323 [277,] 20.3652 61.5821 [278,] 2.7836 8.4174 [279,] 20.2131 61.1221 [280,] 2.4943 7.5426 [281,] 3.5331 10.6838 [282,] 7.1510 21.6238 [283,] 11.8793 35.9217 [284,] 9.7261 29.4108 [285,] 20.0677 60.6824 [286,] 20.9616 63.3857 [287,] 20.2943 61.3676 [288,] 2.4578 7.4321 [289,] 16.2254 49.0638 [290,] 2.3754 7.1830 [291,] 7.0278 21.2511 [292,] 16.4881 49.8582 [293,] 3.9798 12.0344 [294,] 23.3282 70.5420 [295,] 9.6865 29.2908 [296,] 17.5503 53.0701 [297,] 2.7403 8.2864 [298,] 13.7114 41.4618 [299,] 18.3016 55.3420 [300,] 19.2310 58.1525 [301,] 14.0154 42.3811 [302,] 14.4670 43.7466 [303,] 1.6018 4.8438 [304,] 6.9825 21.1145 [305,] 1.7652 5.3379 [306,] 10.9577 33.1347 [307,] 4.5440 13.7405 [308,] 3.1268 9.4551 [309,] 20.9966 63.4913 [310,] 3.7410 11.3122 [311,] 2.3254 7.0318 [312,] 17.6984 53.5180 [313,] 7.4952 22.6645 [314,] 20.2634 61.2743 [315,] 18.2443 55.1688 [316,] 13.1453 39.7501 [317,] 7.7269 23.3652 [318,] 18.5714 56.1577 [319,] 19.6482 59.4140 [320,] 15.9438 48.2123 [321,] 2.0345 6.1522 [322,] 11.0315 33.3579 [323,] 2.6573 8.0354 [324,] 21.1176 63.8572 [325,] 16.0012 48.3857 [326,] 16.1216 48.7499 [327,] 6.7769 20.4925 [328,] 4.2241 12.7733 [329,] 11.6690 35.2858 [330,] 2.8979 8.7630 [331,] 19.3023 58.3679 [332,] 28.2587 85.4513 [333,] 6.8714 20.7785 [334,] 1.9828 5.9959 [335,] 6.6509 20.1116 [336,] 2.7985 8.4622 [337,] 2.4558 7.4260 [338,] 18.0846 54.6859 [339,] 19.3853 58.6191 [340,] 8.1003 24.4945 [341,] 2.7862 8.4251 [342,] 14.7512 44.6061 [343,] 17.0696 51.6167 [344,] 6.3678 19.2556 [345,] 20.4224 61.7549 [346,] 21.3285 64.4949 [347,] 2.4999 7.5594 [348,] 5.8011 17.5420 [349,] 14.4601 43.7259 [350,] 1.6385 4.9546 [351,] 11.1443 33.6992 [352,] 2.9017 8.7744 [353,] 16.4717 49.8087 [354,] 18.8362 56.9584 [355,] 10.6397 32.1731 [356,] 3.0647 9.2672 [357,] 16.8793 51.0412 [358,] 5.6124 16.9714 [359,] 46.4330 140.4082 [360,] 5.8227 17.6073 [361,] 134.5353 406.8198 [362,] 2.0288 6.1349 [363,] 20.2122 61.1194 [364,] 7.3141 22.1171 [365,] 18.7507 56.7000 [366,] 3.0313 9.1664 [367,] 15.0683 45.5648 [368,] 3.1252 9.4503 [369,] 20.0458 60.6162 [370,] 21.4809 64.9557 [371,] 19.5026 58.9737 [372,] 16.9300 51.1945 [373,] 11.1378 33.6794 [374,] 5.1867 15.6840 [375,] 21.1734 64.0261 [376,] 4.9159 14.8653 [377,] 17.5738 53.1414 [378,] 18.6643 56.4386 [379,] 5.6555 17.1015 [380,] 1.7010 5.1437 [381,] 2.7456 8.3023 [382,] 2.2424 6.7806 [383,] 12.4932 37.7781 [384,] 4.2455 12.8379 [385,] 12.7194 38.4622 [386,] 5.1299 15.5124 [387,] 63.7657 192.8205 [388,] 16.6309 50.2900 [389,] 45.4889 137.5532 [390,] 10.9889 33.2294 [391,] 11.8806 35.9255 [392,] 15.3697 46.4762 [393,] 2.2659 6.8519 [394,] 16.8025 50.8090 [395,] 5.3836 16.2794 [396,] 20.8360 63.0059 [397,] 10.5673 31.9543 [398,] 17.9208 54.1907 [399,] 9.8753 29.8619 [400,] 2.5766 7.7913 > # prints matrix with 2 columns > > # fit a model with no predictors > f <- psm(Surv(d.time,death) ~ 1, dist=if(.R.)"weibull" else "extreme") > f Parametric Survival Model: Weibull Distribution psm(formula = Surv(d.time, death) ~ 1, dist = if (.R.) "weibull" else "extreme") Obs Events Model L.R. d.f. P R2 400 89 0 0 NA 0 Value Std. Error z p (Intercept) 3.4186 0.167 20.456 5.34e-93 Log(scale) 0.0657 0.092 0.715 4.75e-01 Scale= 1.07 > pphsm(f) # print proportional hazards form Warning in pphsm(f) : at present, pphsm does not return the correct covariance matrix Parametric Survival Model Converted to PH Form Parametric Survival Model: Weibull Distribution psm(formula = Surv(d.time, death) ~ 1, dist = if (.R.) "weibull" else "extreme") Obs Events Model L.R. d.f. P R2 400 89 0 0 NA 0 Value Std. Error z p (Intercept) -3.2011 0.167 -19.155 8.85e-82 Log(scale) 0.0657 0.092 0.715 4.75e-01 Scale= 1.07 Correlation of Coefficients: (Intercept) Log(scale) 0.736 > g <- survest(f) > plot(g$time, g$surv, xlab='Time', type='l', + ylab=if(.R.)expression(S(t)) else 'S(t)') > > f <- psm(Surv(d.time,death) ~ age, + dist=if(.R.)"loglogistic" else "logistic", y=TRUE) > r <- resid(f, 'cens') # note abbreviation > survplot(survfit(r), conf='none') > # plot Kaplan-Meier estimate of > # survival function of standardized residuals > survplot(survfit(r ~ cut2(age, g=2)), conf='none') > # both strata should be n(0,1) > lines(r) # add theoretical survival function > #More simply: > survplot(r, age, g=2) > > options(datadist=NULL) > > > > cleanEx(); ..nameEx <- "residuals.cph" > > ### * residuals.cph > > flush(stderr()); flush(stdout()) > > ### Name: residuals.cph > ### Title: Residuals for a cph Fit > ### Aliases: residuals.cph > ### Keywords: survival > > ### ** Examples > > # fit <- cph(Surv(start, stop, event) ~ (age + surgery)* transplant, > # data=jasa1) > # mresid <- resid(fit, collapse=jasa1$id) > > # Get unadjusted relationships for several variables > # Pick one variable that's not missing too much, for fit > > n <- 1000 # define sample size > set.seed(17) # so can reproduce the results > age <- rnorm(n, 50, 10) > blood.pressure <- rnorm(n, 120, 15) > cholesterol <- rnorm(n, 200, 25) > sex <- factor(sample(c('female','male'), n,TRUE)) > cens <- 15*runif(n) > h <- .02*exp(.04*(age-50)+.8*(sex=='Female')) > d.time <- -log(runif(n))/h > death <- ifelse(d.time <= cens,1,0) > d.time <- pmin(d.time, cens) > > f <- cph(Surv(d.time, death) ~ age + blood.pressure + cholesterol, iter.max=0) > res <- resid(f) # This re-inserts rows for NAs, unlike f$resid > yl <- quantile(res, c(10/length(res),1-10/length(res)), na.rm=TRUE) > # Scale all plots from 10th smallest to 10th largest residual > par(mfrow=c(2,2), oma=c(3,0,3,0)) > p <- function(x) { + s <- !is.na(x+res) + plot(lowess(x[s], res[s], iter=0), xlab=label(x), ylab="Residual", + ylim=yl, type="l") + } > p(age); p(blood.pressure); p(cholesterol) > mtext("Smoothed Martingale Residuals", outer=TRUE) > > # Assess PH by estimating log relative hazard over time > f <- cph(Surv(d.time,death) ~ age + sex + blood.pressure, x=TRUE, y=TRUE) > r <- resid(f, "scaledsch") > tt <- as.numeric(dimnames(r)[[1]]) > par(mfrow=c(3,2)) > for(i in 1:3) { + g <- areg.boot(I(r[,i]) ~ tt, B=20) + plot(g, boot=FALSE) # shows bootstrap CIs + } # Focus on 3 graphs on right Loading required package: acepack 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 > # Easier approach: > plot(cox.zph(f)) # invokes plot.cox.zph > par(mfrow=c(1,1)) > > > > graphics::par(get("par.postscript", env = .CheckExEnv)) > cleanEx(); ..nameEx <- "residuals.lrm" > > ### * residuals.lrm > > flush(stderr()); flush(stdout()) > > ### Name: residuals.lrm > ### Title: Residuals from a Logistic Regression Model Fit > ### Aliases: residuals.lrm plot.lrm.partial > ### Keywords: models regression > > ### ** Examples > > set.seed(1) > x1 <- runif(200, -1, 1) > x2 <- runif(200, -1, 1) > L <- x1^2 - .5 + x2 > y <- ifelse(runif(200) <= plogis(L), 1, 0) > f <- lrm(y ~ x1 + x2, x=TRUE, y=TRUE) > resid(f) #add rows for NAs back to data 1 2 3 4 5 6 7 8 -0.27089 0.74578 -0.39686 -0.29167 -0.23480 -0.40992 0.56567 -0.22803 9 10 11 12 13 14 15 16 0.72253 -0.48120 -0.61272 -0.20496 -0.52921 -0.61258 -0.56126 -0.29560 17 18 19 20 21 22 23 24 0.52923 0.36266 -0.61530 -0.31901 0.71029 -0.22920 -0.30676 0.62293 25 26 27 28 29 30 31 32 0.40382 -0.38703 0.74078 -0.19210 -0.35885 -0.56314 -0.31239 0.77279 33 34 35 36 37 38 39 40 -0.32522 0.55915 0.65595 -0.48987 -0.49463 0.60125 -0.35932 -0.35933 41 42 43 44 45 46 47 48 0.69467 0.56898 0.39149 -0.23007 0.65454 -0.26354 0.66572 -0.22435 49 50 51 52 53 54 55 56 0.62550 0.37859 -0.52045 0.37740 -0.36903 0.57129 0.63723 -0.20598 57 58 59 60 61 62 63 64 -0.26429 -0.38571 -0.33023 0.39322 0.58678 -0.29288 0.71871 0.47177 65 66 67 68 69 70 71 72 0.50408 -0.23030 -0.20042 0.47218 0.56873 -0.24951 -0.19623 -0.22548 73 74 75 76 77 78 79 80 -0.32355 -0.23400 0.70929 0.74097 -0.28519 -0.24011 -0.38467 0.45630 81 82 83 84 85 86 87 88 -0.18731 0.59261 -0.57895 -0.31886 -0.20179 -0.21914 0.69157 0.77716 89 90 91 92 93 94 95 96 -0.21798 0.75141 0.74660 -0.21280 0.36151 0.68357 0.60043 0.50943 97 98 99 100 101 102 103 104 -0.21176 -0.21756 -0.20399 0.38862 -0.48098 -0.20763 -0.37370 -0.38474 105 106 107 108 109 110 111 112 -0.33034 -0.62767 -0.23098 -0.54737 -0.21313 -0.34096 -0.24135 -0.25472 113 114 115 116 117 118 119 120 0.44278 -0.49658 -0.26719 0.63443 -0.21287 -0.30284 0.37462 -0.45479 121 122 123 124 125 126 127 128 -0.48928 -0.29737 -0.33945 0.37132 0.42055 -0.61792 -0.54805 -0.52058 129 130 131 132 133 134 135 136 -0.26992 -0.52512 0.36162 0.72402 0.67891 0.44698 0.78366 0.67620 137 138 139 140 141 142 143 144 -0.35978 -0.23461 0.55407 -0.62808 0.35622 -0.24430 -0.39719 0.67827 145 146 147 148 149 150 151 152 0.51484 0.72258 -0.35888 -0.24666 -0.30974 -0.51626 -0.38309 -0.20216 153 154 155 156 157 158 159 160 0.47348 -0.34433 -0.63313 -0.27470 0.43280 0.80360 -0.57747 -0.36153 161 162 163 164 165 166 167 168 -0.21082 0.68008 0.44140 -0.29099 -0.44443 -0.55698 0.80752 -0.48385 169 170 171 172 173 174 175 176 0.50316 -0.36214 0.64048 0.56995 0.37978 0.74047 -0.25573 -0.36091 177 178 179 180 181 182 183 184 -0.31162 0.41609 -0.24469 -0.39733 0.65585 -0.40660 -0.48075 0.36797 185 186 187 188 189 190 191 192 -0.27499 -0.25768 0.45050 0.43374 -0.22998 -0.62616 -0.23625 -0.22596 193 194 195 196 197 198 199 200 -0.59059 -0.40515 -0.22722 -0.31651 0.54731 -0.30836 -0.30889 -0.23844 attr(,"levels") [1] "0" "1" > resid(f, "score") #also adds back rows x1 x2 1 -0.27089 0.12704416 0.1259608 2 0.74578 -0.19073432 -0.4196562 3 -0.39686 -0.05782553 -0.0133321 4 -0.29167 -0.23812395 0.1347804 5 -0.23480 0.14008732 0.1497203 6 -0.40992 -0.32661939 -0.0152296 7 0.56567 0.50307771 0.0710287 8 -0.22803 -0.07333487 0.1691300 9 0.72253 0.18657807 -0.3520644 10 -0.48120 0.42173382 -0.2097394 11 -0.61272 0.36030834 -0.5654268 12 -0.20496 0.13258434 0.1639084 13 -0.52921 -0.19794931 -0.2786010 14 -0.61258 0.14199039 -0.5488262 15 -0.56126 -0.30290185 -0.3576732 16 -0.29560 0.00136020 0.1133367 17 0.52923 0.23034196 0.1583249 18 0.36266 0.35678993 0.3288283 19 -0.61530 0.14762808 -0.5583610 20 -0.31901 -0.17701423 0.1020957 21 0.71029 0.61753163 -0.3374235 22 -0.22920 0.13195467 0.1533568 23 -0.30676 -0.09305630 0.1091051 24 0.62293 -0.46650835 0.0126147 25 0.40382 -0.18800140 0.3424130 26 -0.38703 0.08815462 -0.0084835 27 0.74078 -0.72093880 -0.3590973 28 -0.19210 0.04518613 0.1742481 29 -0.35885 -0.26532459 0.0589540 30 -0.56314 0.17981135 -0.3987040 31 -0.31239 0.01119610 0.0954482 32 0.77279 0.15388745 -0.5696372 33 -0.32522 0.00420094 0.0816378 34 0.55915 -0.35090458 0.1469680 35 0.65595 0.42948313 -0.1442061 36 -0.48987 -0.16505198 -0.1857842 37 -0.49463 -0.29108142 -0.1873802 38 0.60125 -0.47144577 0.0660177 39 -0.35932 -0.16076845 0.0505750 40 -0.35933 0.06376309 0.0339780 41 0.69467 0.44590458 -0.2689168 42 0.56898 0.16734828 0.0891635 43 0.39149 0.22152872 0.3213089 44 -0.23007 -0.02440412 0.1644521 45 0.65454 0.03890531 -0.1112095 46 -0.26354 -0.15251216 0.1523635 47 0.66572 -0.63465347 -0.0948642 48 -0.22435 0.01021668 0.1648090 49 0.62550 0.29062268 -0.0499190 50 0.37859 0.14593238 0.3353980 51 -0.52045 0.02329574 -0.2726886 52 0.37740 0.27264368 0.3267637 53 -0.36903 0.04568795 0.0216410 54 0.57129 -0.29158912 0.1183575 55 0.63723 -0.54715074 -0.0191300 56 -0.20598 0.16500104 0.1611533 57 -0.26429 0.09711564 0.1333473 58 -0.38571 -0.01437477 0.0011459 59 -0.33023 -0.10699733 0.0839660 60 0.39322 -0.07327305 0.3418614 61 0.58678 0.48453284 0.0281490 62 -0.29288 0.12089915 0.1071096 63 0.71871 -0.05883974 -0.3191561 64 0.47177 -0.15814382 0.2713085 65 0.50408 0.15210059 0.2041133 66 -0.23030 0.11145903 0.1542904 67 -0.20042 0.00859980 0.1745804 68 0.47218 0.25149354 0.2405342 69 0.56873 -0.47290640 0.1369627 70 -0.24951 -0.18729599 0.1648906 71 -0.19623 0.06315588 0.1718092 72 -0.22548 -0.15307511 0.1763134 73 -0.32355 0.09921075 0.0765411 74 -0.23400 0.07779470 0.1547652 75 0.70929 -0.03354747 -0.2856260 76 0.74097 0.58121602 -0.4561283 77 -0.28519 -0.20781334 0.1385063 78 -0.24011 0.05282982 0.1530806 79 -0.38467 -0.21335333 0.0174534 80 0.45630 0.42036056 0.2470750 81 -0.18731 0.02447841 0.1769043 82 0.59261 0.25187818 0.0323695 83 -0.57895 0.11579684 -0.4403727 84 -0.31886 0.11137729 0.0809507 85 -0.20179 -0.10375776 0.1824391 86 -0.21914 0.13030643 0.1583849 87 0.69157 0.29201114 -0.2469022 88 0.77716 -0.58801138 -0.5365017 89 -0.21798 0.11095780 0.1603352 90 0.75141 -0.53605009 -0.4188276 91 0.74660 -0.38878279 -0.4085655 92 -0.21280 0.18771693 0.1568686 93 0.36151 0.10287635 0.3481770 94 0.68357 0.51441453 -0.2364973 95 0.60043 0.33494024 0.0083334 96 0.50943 0.30291867 0.1848661 97 -0.21176 0.01894195 0.1697580 98 -0.21756 0.03912511 0.1658269 99 -0.20399 -0.12682775 0.1834100 100 0.38862 0.08155850 0.3336339 101 -0.48098 -0.14883785 -0.1671038 102 -0.20763 0.06096135 0.1682395 103 -0.37370 0.17170658 0.0055336 104 -0.38474 -0.37911395 0.0295854 105 -0.33034 -0.08819670 0.0824423 106 -0.62767 0.36002386 -0.6165009 107 -0.23098 0.17121607 0.1495138 108 -0.54737 0.02395515 -0.3431313 109 -0.21313 -0.18076907 0.1839571 110 -0.34096 -0.06734742 0.0678857 111 -0.24135 -0.22984752 0.1732196 112 -0.25472 -0.11808238 0.1562376 113 0.44278 -0.12687795 0.3022899 114 -0.49658 0.06805698 -0.2184078 115 -0.26719 0.18798840 0.1243970 116 0.63443 -0.61783385 -0.0063422 117 -0.21287 -0.09177669 0.1774882 118 -0.30284 0.24034569 0.0885002 119 0.37462 -0.04024610 0.3515516 120 -0.45479 -0.12743205 -0.1134366 121 -0.48928 -0.48129179 -0.1610882 122 -0.29737 0.00262067 0.1115196 123 -0.33945 0.01062499 0.0640266 124 0.37132 -0.24251755 0.3684111 125 0.42055 0.21432839 0.2986577 126 -0.61792 0.05697749 -0.5605102 127 -0.54805 -0.01224331 -0.3423206 128 -0.52058 0.30449432 -0.2937986 129 -0.26992 0.14647997 0.1253076 130 -0.52512 -0.10052000 -0.2753205 131 0.36162 0.05415053 0.3517197 132 0.72402 -0.61243166 -0.2988694 133 0.67891 -0.63065072 -0.1366627 134 0.44698 0.12765316 0.2790325 135 0.78366 0.67177984 -0.6627413 136 0.67620 0.13265936 -0.1843353 137 -0.35978 -0.04382129 0.0413077 138 -0.23461 -0.01221273 0.1610767 139 0.55407 0.53754976 0.0910943 140 -0.62808 -0.00959928 -0.5905946 141 0.35622 0.13022674 0.3487425 142 -0.24430 -0.04961249 0.1580838 143 -0.39719 0.20743926 -0.0334678 144 0.67827 -0.32805577 -0.1569455 145 0.51484 0.23611701 0.1813937 146 0.72258 -0.06854288 -0.3334081 147 -0.35888 0.23318387 0.0220707 148 -0.24666 -0.12170156 0.1619081 149 -0.30974 0.24469874 0.0810333 150 -0.51626 -0.37640206 -0.2327368 151 -0.38309 -0.08783794 0.0106122 152 -0.20216 -0.02311052 0.1763617 153 0.47348 -0.16214194 0.2694554 154 -0.34433 0.03227630 0.0562483 155 -0.63313 -0.00055838 -0.6090927 156 -0.27470 0.17532919 0.1192816 157 0.43280 0.02564823 0.3011265 158 0.80360 -0.68261465 -0.6714230 159 -0.57747 0.25667844 -0.4463365 160 -0.36153 0.20773340 0.0202956 161 -0.21082 0.09074300 0.1648224 162 0.68008 0.53739349 -0.2267696 163 0.44140 -0.04746299 0.2978684 164 -0.29099 -0.16294397 0.1298676 165 -0.44443 -0.33832030 -0.0773628 166 -0.55698 0.09677645 -0.3751074 167 0.80752 -0.70447081 -0.6926074 168 -0.48385 0.15920012 -0.1962307 169 0.50316 0.22514000 0.2000812 170 -0.36214 0.11761136 0.0261012 171 0.64048 0.16705477 -0.0807885 172 0.56995 0.38826362 0.0708748 173 0.37978 0.27050163 0.3254566 174 0.74047 -0.16088957 -0.3991610 175 -0.25573 0.06112350 0.1422832 176 -0.36091 -0.28544032 0.0575897 177 -0.31162 -0.08994217 0.1037551 178 0.41609 0.20062261 0.3035882 179 -0.24469 -0.05153411 0.1579767 180 -0.39733 -0.32030909 0.0053093 181 0.65585 -0.27056616 -0.0921957 182 -0.40660 0.25106474 -0.0522588 183 -0.48075 -0.37157538 -0.1501512 184 0.36797 0.00245766 0.3521872 185 -0.27499 -0.20737753 0.1473084 186 -0.25768 0.16017964 0.1335774 187 0.45050 0.23255147 0.2674500 188 0.43374 0.19474607 0.2877153 189 -0.22998 -0.20409910 0.1777740 190 -0.62616 -0.05966894 -0.5802165 191 -0.23625 -0.10004807 0.1666384 192 -0.22596 0.05020667 0.1610542 193 -0.59059 0.47144035 -0.5022727 194 -0.40515 -0.34624559 -0.0057010 195 -0.22722 0.09850832 0.1568503 196 -0.31651 -0.05733453 0.0960277 197 0.54731 -0.42650998 0.1749445 198 -0.30836 -0.20999472 0.1160822 199 -0.30889 0.11245885 0.0916954 200 -0.23844 -0.13488895 0.1679388 > r <- resid(f, "partial") #for checking transformations of X's > par(mfrow=c(1,2)) > for(i in 1:2) { + xx <- if(i==1)x1 else x2 + if(.R.) { + plot(xx, r[,i], xlab=c('x1','x2')[i]) + lines(lowess(xx,r[,i])) + } else { + g <- loess(r[,i] ~ xx) + plot(g, coverage=0.95, confidence=7) + points(xx, r[,i]) + } + } > resid(f, "partial", pl="loess") #same as last 3 lines > resid(f, "partial", pl=TRUE) #plots for all columns of X using supsmu Loading required package: modreg Warning: package 'modreg' has been merged into 'stats' > resid(f, "gof") #global test of goodness of fit Sum of squared errors Expected value|H0 SD 43.41604 43.32085 0.09529 Z P 0.99898 0.31780 > lp1 <- resid(f, "lp1") #approx. leave-out-1 linear predictors > -2*sum(y*lp1 + log(1-plogis(lp1))) #approx leave-out-1 deviance [1] 255.76 > #formula assumes y is binary > > # Simulate data from a population proportional odds model > set.seed(1) > n <- 400 > age <- rnorm(n, 50, 10) > blood.pressure <- rnorm(n, 120, 15) > L <- .05*(age-50) + .03*(blood.pressure-120) > p12 <- plogis(L) # Pr(Y>=1) > p2 <- plogis(L-1) # Pr(Y=2) > p <- cbind(1-p12, p12-p2, p2) # individual class probabilites > # Cumulative probabilities: > cp <- matrix(cumsum(t(p)) - rep(0:(n-1), rep(3,n)), byrow=TRUE, ncol=3) > # simulate multinomial with varying probs: > y <- (cp < runif(n)) %*% rep(1,3) > # Thanks to Dave Krantz for this trick > f <- lrm(y ~ age + blood.pressure, x=TRUE, y=TRUE) > par(mfrow=c(2,2)) > resid(f, 'score.binary', pl=TRUE) #plot score residuals > resid(f, 'partial', pl=TRUE) #plot partial residuals Loading required package: modreg Warning: package 'modreg' has been merged into 'stats' > resid(f, 'gof') #test GOF for each level separately Sum of squared errors Expected value|H0 SD Z P y>=1 91.739 91.939 0.13870 -1.4398 0.1499349 y>=2 73.581 74.435 0.30202 -2.8301 0.0046529 > > > > # Make a series of binary fits and draw 2 partial residual plots > # > f1 <- lrm(y>=1 ~ age + blood.pressure, x=TRUE, y=TRUE) > f2 <- update(f1, y==2 ~.) > par(mfrow=c(2,1)) > plot.lrm.partial(f1, f2) > > > > # Simulate data from both a proportional odds and a non-proportional > # odds population model. Check how 3 kinds of residuals detect > # non-prop. odds > set.seed(71) > n <- 400 > x <- rnorm(n) > > par(mfrow=c(2,3)) > for(j in 1:2) { # 1: prop.odds 2: non-prop. odds + if(j==1) + L <- matrix(c(1.4,.4,-.1,-.5,-.9),nrow=n,ncol=5,byrow=TRUE) + x/2 else { + # Slopes and intercepts for cutoffs of 1:5 : + slopes <- c(.7,.5,.3,.3,0) + ints <- c(2.5,1.2,0,-1.2,-2.5) + L <- matrix(ints,nrow=n,ncol=5,byrow=TRUE)+ + matrix(slopes,nrow=n,ncol=5,byrow=TRUE)*x + } + p <- plogis(L) + if(!.R.) dim(p) <- dim(L) + # Cell probabilities + p <- cbind(1-p[,1],p[,1]-p[,2],p[,2]-p[,3],p[,3]-p[,4],p[,4]-p[,5],p[,5]) + # Cumulative probabilities from left to right + cp <- matrix(cumsum(t(p)) - rep(0:(n-1), rep(6,n)), byrow=TRUE, ncol=6) + y <- (cp < runif(n)) %*% rep(1,6) + + f <- lrm(y ~ x, x=TRUE, y=TRUE) + for(cutoff in 1:5)print(lrm(y>=cutoff ~ x)$coef) + + print(resid(f,'gof')) + resid(f, 'score', pl=TRUE) + # Note that full ordinal model score residuals exhibit a + # U-shaped pattern even under prop. odds + ti <- if(j==2) 'Non-Proportional Odds\nSlopes=.7 .5 .3 .3 0' else + 'True Proportional Odds\nOrdinal Model Score Residuals' + title(ti) + resid(f, 'score.binary', pl=TRUE) + if(j==1) ti <- 'True Proportional Odds\nBinary Score Residuals' + title(ti) + resid(f, 'partial', pl=TRUE) + if(j==1) ti <- 'True Proportional Odds\nPartial Residuals' + title(ti) + } Intercept x 1.35503 0.50743 Intercept x 0.36942 0.49149 Intercept x -0.078609 0.497561 Intercept x -0.55546 0.51133 Intercept x -0.99113 0.54094 Sum of squared errors Expected value|H0 SD Z P y>=1 63.843 63.676 0.231052 0.72198 4.7030e-01 y>=2 91.488 90.862 0.142683 4.38723 1.1480e-05 y>=3 94.598 94.020 0.082224 7.02790 2.0965e-12 y>=4 89.053 88.933 0.153922 0.78060 4.3504e-01 y>=5 77.708 78.238 0.215632 -2.45833 1.3958e-02 Loading required package: modreg Warning: package 'modreg' has been merged into 'stats' Intercept x 2.2722 0.8468 Intercept x 1.11546 0.56997 Intercept x 0.05387 0.39202 Intercept x -1.16278 0.44113 Intercept x -2.30605 0.16273 Sum of squared errors Expected value|H0 SD Z P y>=1 37.251 38.922 0.12992 -12.8687 0.00000000 y>=2 71.611 73.673 0.19490 -10.5772 0.00000000 y>=3 96.582 95.048 0.06574 23.3420 0.00000000 y>=4 72.637 71.900 0.19250 3.8262 0.00013014 y>=5 33.689 32.535 0.11121 10.3767 0.00000000 Loading required package: modreg Warning: package 'modreg' has been merged into 'stats' > par(mfrow=c(1,1)) > > # Get data used in Hosmer et al. paper and reproduce their calculations > if(FALSE && .R.) { + v <- Cs(id, low, age, lwt, race, smoke, ptl, ht, ui, ftv, bwt) + d <- read.table("http://www-unix.oit.umass.edu/~statdata/data/lowbwt.dat", + skip=6, col.names=v) + d <- upData(d, race=factor(race,1:3,c('white','black','other'))) + f <- lrm(low ~ age + lwt + race + smoke, data=d, x=TRUE,y=TRUE) + f + resid(f, 'gof') + # Their Table 7 Line 2 found sum of squared errors=36.91, expected + # value under H0=36.45, variance=.065, P=.071 + # We got 36.90, 36.45, SD=.26055 (var=.068), P=.085 + # Note that two logistic regression coefficients differed a bit + # from their Table 1 + } > > > > graphics::par(get("par.postscript", env = .CheckExEnv)) > cleanEx(); ..nameEx <- "residuals.ols" > > ### * residuals.ols > > flush(stderr()); flush(stdout()) > > ### Name: residuals.ols > ### Title: Residuals for ols > ### Aliases: residuals.ols > ### Keywords: models regression > > ### ** Examples > > set.seed(1) > x1 <- rnorm(100) > x2 <- rnorm(100) > x1[1] <- 100 > y <- x1 + x2 + rnorm(100) > f <- ols(y ~ x1 + x2, x=TRUE, y=TRUE) > resid(f, "dfbetas") [,1] [,2] [,3] [1,] 0.0214792 -1.9890e+00 -1.0515e-04 [2,] 0.1624541 -1.4264e-02 1.2633e-02 [3,] 0.1462485 -3.7509e-02 -1.3812e-01 [4,] -0.0339635 -2.0799e-03 -7.1306e-03 [5,] -0.2235785 2.7109e-02 1.4892e-01 [6,] 0.2777151 -1.9924e-02 4.8653e-01 [7,] 0.0678307 -9.2997e-04 5.2181e-02 [8,] 0.0565733 1.3008e-03 5.4613e-02 [9,] -0.0019196 5.0703e-05 -8.3376e-04 [10,] 0.0613358 -1.7820e-03 1.0336e-01 [11,] -0.0212894 -2.2512e-05 1.3764e-02 [12,] 0.0356295 -3.6231e-03 -1.6252e-02 [13,] -0.0367406 2.7298e-03 -5.3060e-02 [14,] -0.1393879 5.1702e-02 9.1889e-02 [15,] 0.0908260 -9.2193e-04 -1.6392e-02 [16,] 0.1423171 -1.9981e-02 -5.4473e-02 [17,] -0.0336663 4.4589e-03 1.0264e-02 [18,] -0.1238286 4.1232e-03 3.1962e-02 [19,] 0.0631806 3.1190e-04 3.4708e-02 [20,] -0.0076647 4.7331e-04 1.1557e-03 [21,] -0.1698389 8.6843e-03 8.5555e-02 [22,] 0.0045717 2.4591e-04 6.3459e-03 [23,] -0.0643622 7.4738e-03 1.2393e-02 [24,] -0.0363234 1.1402e-02 5.9780e-03 [25,] -0.1146350 6.1869e-03 7.8968e-03 [26,] 0.1826480 -1.2198e-02 1.3837e-01 [27,] -0.0348783 4.5052e-03 1.5764e-03 [28,] -0.1609446 4.0907e-02 2.4965e-03 [29,] 0.0135238 -2.7601e-03 -9.3940e-03 [30,] 0.0212780 -1.9021e-03 -6.5581e-03 [31,] -0.0973792 -3.0184e-03 -1.0237e-02 [32,] -0.2824841 4.5205e-02 1.6785e-01 [33,] -0.0633405 2.2403e-03 -3.6857e-02 [34,] 0.0445482 -9.9541e-03 -7.2920e-02 [35,] -0.0062671 1.3835e-03 -2.1039e-03 [36,] -0.0189379 4.9565e-03 3.1311e-02 [37,] 0.0507747 -8.5563e-03 -1.4511e-02 [38,] -0.1188548 1.7992e-02 6.2819e-02 [39,] 0.0979612 -4.1690e-03 -6.5065e-02 [40,] -0.0033479 1.2290e-04 7.4749e-05 [41,] 0.0557827 -1.4806e-02 -1.1717e-01 [42,] 0.1107792 -6.1841e-03 1.3381e-01 [43,] 0.0105525 -1.6531e-03 -1.9190e-02 [44,] -0.0887037 7.5396e-03 4.0623e-02 [45,] 0.1039668 -2.6747e-02 -1.2192e-01 [46,] -0.1984536 4.6020e-02 1.5271e-01 [47,] -0.0507382 -2.9532e-03 -1.0528e-01 [48,] -0.0272119 8.5049e-04 -1.5223e-03 [49,] -0.0242391 5.1369e-03 3.3196e-02 [50,] 0.0853583 -1.1568e-02 -1.5299e-01 [51,] 0.0132589 -5.2537e-04 6.6247e-03 [52,] 0.0377658 -6.4202e-03 3.5531e-04 [53,] -0.0106551 1.0300e-03 3.2156e-03 [54,] -0.0303713 8.7133e-03 2.9261e-02 [55,] 0.0550681 -3.6408e-03 -8.9150e-02 [56,] 0.0985653 2.0332e-03 -1.1280e-01 [57,] -0.2432451 1.8969e-02 -2.5180e-01 [58,] 0.0505521 -1.2915e-02 -3.1782e-02 [59,] 0.0262210 -3.9109e-03 -3.9039e-02 [60,] -0.0381933 6.3239e-05 -7.1130e-02 [61,] 0.0908574 1.4537e-02 4.5159e-02 [62,] -0.0411369 5.3097e-03 8.9885e-03 [63,] -0.0260654 -7.1553e-04 -2.8943e-02 [64,] 0.0891181 -4.2275e-03 8.2930e-02 [65,] 0.1611301 -3.6378e-02 -1.0098e-01 [66,] 0.0397327 1.9345e-03 8.6544e-02 [67,] -0.0446331 1.3477e-02 1.0808e-02 [68,] -0.1203459 7.0020e-03 1.8597e-01 [69,] -0.0351308 3.6356e-03 4.1454e-03 [70,] -0.0922236 -1.1373e-02 -2.4633e-02 [71,] -0.0185521 -1.5107e-03 -4.2265e-02 [72,] 0.0373744 -6.3851e-03 5.1265e-03 [73,] -0.0842834 1.5384e-03 -4.2901e-02 [74,] 0.2580059 -5.2989e-02 -1.3752e-02 [75,] 0.0117449 -2.9923e-03 -3.7953e-03 [76,] 0.1071639 -8.8209e-03 -1.9946e-04 [77,] -0.2301992 2.2811e-02 -1.9019e-01 [78,] 0.0904545 2.1199e-03 1.8583e-01 [79,] -0.1315078 4.4692e-03 -1.4062e-01 [80,] 0.0997969 -8.5477e-03 1.2292e-01 [81,] 0.0299898 -7.6159e-03 -3.9083e-02 [82,] -0.0388383 2.2083e-03 -3.9745e-02 [83,] 0.1269136 2.9205e-03 3.4386e-02 [84,] -0.0766894 2.8191e-02 1.1971e-01 [85,] -0.0573739 9.0558e-04 -3.2923e-02 [86,] -0.0999405 8.6539e-03 1.3222e-02 [87,] -0.0644883 -5.6921e-03 -9.7412e-02 [88,] 0.0847220 -1.6343e-02 -6.6687e-02 [89,] 0.0370739 -3.7632e-03 -1.5653e-02 [90,] 0.0886617 -1.3024e-02 -8.5650e-02 [91,] -0.0410437 7.1553e-03 6.3896e-03 [92,] 0.0360625 1.3550e-03 1.6573e-02 [93,] 0.0168777 -7.0522e-04 -1.2701e-02 [94,] -0.1415199 -2.0211e-03 -1.2534e-01 [95,] 0.1573141 -4.7429e-03 -2.0366e-01 [96,] 0.0052202 -6.5717e-04 -5.7649e-03 [97,] 0.0871631 -1.1744e-02 1.2543e-01 [98,] 0.0844836 -2.0124e-02 -8.9702e-02 [99,] -0.0048801 9.7082e-04 -2.1626e-03 [100,] -0.0336388 6.1219e-03 1.2484e-02 > which.influence(f) $Intercept [1] 5 6 32 57 74 77 $x1 [1] 1 $x2 [1] 6 57 95 > > > > cleanEx(); ..nameEx <- "rm.impute" > > ### * rm.impute > > flush(stderr()); flush(stdout()) > > ### Name: rm.impute > ### Title: Imputation of Repeated Measures > ### Aliases: rm.impute pbind > ### Keywords: regression htest multivariate array > > ### ** Examples > > ## Not run: > ##D # Generate multiple imputes of the response matrix for later use > ##D Y <- rm.impute(~treatment + pol(age,2)*sex, responses, > ##D last=lastvisit, data=mydata)$Y > ##D # Do some analysis for each imputation > ##D fits <- vector('list',10) > ##D for(i in 1:10) { > ##D y <- Y[,,i] > ##D fits[[i]] <- my.analysis(X,y) > ##D } > ##D > ##D # Function to generate a 4-variate equal correlation pattern response > ##D # with missing-at-random responses; missingness is a function of x and > ##D # previous responses. > ##D # > ##D # pna is a function that computes the probability that a subject > ##D # drops out at the current visit. For visit 1 pna is a function > ##D # of treatment and baseline covariable x. For visits > 1 pna is > ##D # a function of the matrix of responses for all previous visits. > ##D # > ##D # If second=TRUE we also generate a second response variable having > ##D # NAs in the same positions as this first one. y2 is generated > ##D # so that its NAs are completely unrelated to any y2 values if > ##D # y2B.effect=0, as the pna function is only given the first > ##D # response variable. > ##D # y2 is N(0,1) for treat='A' and N(y2.treat.effect,1) for treat='B'. > ##D > ##D testdf <- function(n=1500, seed=7, pna, second=FALSE, y2.treat.effect=0) { > ##D > ##D set.seed(seed) > ##D treat <- sample(c('A','B'),n,TRUE) > ##D x <- runif(n) > ##D nt <- 4 > ##D > ##D mvrnorm <- function(n, p = 1, u = rep(0, p), S = diag(p)) { > ##D Z <- matrix(rnorm(n * p), p, n) > ##D t(u + t(chol(S)) %*% Z) > ##D } > ##D > ##D # Generate multivariate normal errors for n subjects at nt times > ##D # Assume equal correlations of rho=.5, independent subjects > ##D > ##D rho <- .5 > ##D y <- mvrnorm(n, p=nt, S=diag(rep(1-rho,nt))+rho) > ##D > ##D y[treat=='B',] <- y[treat=='B',] + 1 > ##D > ##D cat('\n\nTreatment-specific means for last period in response variable 1 before generating NAs:\n') > ##D print(tapply(y[,4], treat, mean, na.rm=TRUE)) > ##D > ##D y[runif(n) < pna(treat, x), 1] <- NA > ##D y[is.na(y[,1]) | runif(n) < pna(treat, x, y[,1]), 2] <- NA > ##D y[is.na(y[,2]) | runif(n) < pna(treat, x, y[,1:2]), 3] <- NA > ##D y[is.na(y[,3]) | runif(n) < pna(treat, x, y[,1:3]), 4] <- NA > ##D > ##D last <- rep(4, n) > ##D last[is.na(y[,4])] <- 3 > ##D last[is.na(y[,3])] <- 2 > ##D last[is.na(y[,2])] <- 1 > ##D last[is.na(y[,1])] <- 0 > ##D > ##D cat('\nNumber of NAs for each time period:\n') > ##D print(apply(y, 2, function(x)sum(is.na(x)))) > ##D cat('\n\nTreatment-specific means for last period in response variable 1 after excluding NAs:\n') > ##D print(tapply(y[,4], treat, mean, na.rm=TRUE)) > ##D cat('\n\nNaive complete-case analysis:\n\n') > ##D prn(ols(y[,4] ~ pol(x,2) + treat)) > ##D > ##D if(second) { > ##D y2 <- matrix(rnorm(n*4),ncol=4) > ##D y2[treat=='B',] <- y2[treat=='B',] + y2.treat.effect > ##D cat('\n\nTreatment-specific means for last period in response variable 2 before generating NAs:\n') > ##D print(tapply(y2[,4], treat, mean, na.rm=TRUE)) > ##D > ##D y2[is.na(y[,1]),1] <- NA > ##D y2[is.na(y[,2]),2] <- NA > ##D y2[is.na(y[,3]),3] <- NA > ##D y2[is.na(y[,4]),4] <- NA > ##D cat('\n\nTreatment-specific means for last period in response variable 2 after excluding NAs:\n') > ##D print(tapply(y2[,4], treat, mean, na.rm=TRUE)) > ##D > ##D y <- pbind(y1=y, y2=y2) > ##D } > ##D > ##D list(x=x, treat=treat, y=y, last=last) > ##D } > ##D > ##D pna <- function(treat, x, yprev) { > ##D # In this model for the probability of dropout just before the > ##D # current visit, the probability does not depend on the baseline > ##D # covariable x. For treat='B' the probability of dropout is a > ##D # constant 0.1. For treat='A' it is a curtailed quadratic > ##D # function of the previous visit's response. > ##D # > ##D # If no previous responses available, we are at first follow-up visit > ##D > ##D if(missing(yprev)) 0 else { > ##D if(is.matrix(yprev)) yprev <- yprev[,ncol(yprev)] > ##D ifelse(treat=='B', .1, > ##D pmax(0, pmin(1, .124 +.0835*yprev + .020868*yprev^2))) > ##D } > ##D } > ##D > ##D df <- testdf(pna = pna, second=TRUE) > ##D > ##D g <- rm.impute(~ pol(x,2) + treat, df$y, last=df$last, > ##D rformula=~ pol(x,2) + treat, > ##D n.impute=10, g=4, nk=3, > ##D rinteraction='treat', rint.with='all', > ##D pr=TRUE, pra=TRUE, data=df, keep.prop=TRUE, keep.pfits=TRUE) > ##D # Base propensity model is in.study ~ pol(x,2) + treat > ##D # for visits 2,3,4, filled-in y's from previous visits will also be > ##D # used as predictors, and these interact with treat. > ##D # Restricted cubic spline with 3 knots is assumed for the propensity models > ##D # To fit the multiply-imputed last (4th) response an additive model > ##D # in quadratic x and treat is used > ##D > ##D g$fit[[1]] # shows response fit for first response variable > ##D # (y1), with variances adj. for imputation > ##D page(g$Y) # show all 10 imputations for both responses x 4 periods > ##D > ##D # Check for the first imputation how well propensity matching achieved > ##D # balance in baseline and period 3 filled-in responses for > ##D # dropouts and non-dropouts. For continuous variables show ECDFs > ##D # using the Hmisc ecdf function, for first 4 imputations. Do this > ##D # with and without stratifying on quintiles of propensity, and also > ##D # show the estimated 3rd period response vs. propensity stratified > ##D # by dropout status. Use only first response (y1) for all of this. > ##D > ##D for(imp in 1:4) { > ##D y3 <- g$Y[,3,1,imp] > ##D prop3 <- g$propensity[,3,imp] > ##D prop3g <- cut2(prop3,g=5) > ##D ti <- paste('Imputation',imp) > ##D print(ecdf(~ y3, groups=df$last >= 3, subset=unclass(prop3g)<5)) > ##D title(ti) > ##D print(ecdf(~ y3 | prop3g, groups=df$last >= 3, > ##D subset=unclass(prop3g)<5)) > ##D # Not enough dropouts in highest quintile of propensity completing > ##D # visit 3 > ##D title(ti) > ##D plsmo(prop3, y3, group=df$last >= 3, datadensity=TRUE, col=1:2) > ##D title(ti) > ##D } > ##D > ##D # Examine propensity fit for sixth imputation, 4th response > ##D f <- g$pfits[4,6][[1]] > ##D dfr <- as.data.frame(df) > ##D # Edit names of dfr so that responses called y.1, y.2, etc. > ##D # For this example, these are already OK > ##D dd <- datadist(dfr) > ##D options(datadist='dd') > ##D # datadist makes plot below work without specifying variable settings > ##D plot(f, y.3=NA, treat=NA, conf.int=FALSE) > ##D > ##D # Analyze multiple response variables. Both systolic.bp and > ##D # diastolic.bp are matrices (columns = time periods) > ##D > ##D f <- rm.impute(~treatment + pol(age,2)*sex, > ##D pbind(systolic.bp, diastolic.bp), > ##D last=lastvisit, data=mydata) > ##D > ##D # To deal with a continuous and a binary endpoint you can specify > ##D # pbind(sysbolic.bp, stroke), fitter=list(ols, lrm) > ## End(Not run) > > > > cleanEx(); ..nameEx <- "robcov" > > ### * robcov > > flush(stderr()); flush(stdout()) > > ### Name: robcov > ### Title: Robust Covariance Matrix Estimates > ### Aliases: robcov > ### Keywords: models regression robust > > ### ** Examples > > # A dataset contains a variable number of observations per subject, > # and all observations are laid out in separate rows. The responses > # represent whether or not a given segment of the coronary arteries > # is occluded. Segments of arteries may not operate independently > # in the same patient. We assume a "working independence model" to > # get estimates of the coefficients, i.e., that estimates assuming > # independence are reasonably efficient. The job is then to get > # unbiased estimates of variances and covariances of these estimates. > > set.seed(1) > n.subjects <- 30 > ages <- rnorm(n.subjects, 50, 15) > sexes <- factor(sample(c('female','male'), n.subjects, TRUE)) > logit <- (ages-50)/5 > prob <- plogis(logit) # true prob not related to sex > id <- sample(1:n.subjects, 300, TRUE) # subjects sampled multiple times > table(table(id)) # frequencies of number of obs/subject 2 6 7 8 9 10 11 12 13 15 16 1 1 1 5 9 1 3 3 3 2 1 > age <- ages[id] > sex <- sexes[id] > # In truth, observations within subject are independent: > y <- ifelse(runif(300) <= prob[id], 1, 0) > f <- lrm(y ~ lsp(age,50)*sex, x=TRUE, y=TRUE) > g <- robcov(f, id) > diag(g$var)/diag(f$var) Intercept age age' sex=male age * sex=male 1.34312 1.27831 0.85441 0.60827 0.60212 age' * sex=male 0.56557 > # add ,group=w to re-sample from within each level of w > anova(g) # cluster-adjusted Wald statistics Wald Statistics Response: y Factor Chi-Square d.f. P age (Factor+Higher Order Factors) 92.40 4 <.0001 All Interactions 0.22 2 0.8953 Nonlinear (Factor+Higher Order Factors) 1.19 2 0.5504 sex (Factor+Higher Order Factors) 0.69 3 0.8765 All Interactions 0.22 2 0.8953 age * sex (Factor+Higher Order Factors) 0.22 2 0.8953 Nonlinear 0.00 1 0.9657 Nonlinear Interaction : f(A,B) vs. AB 0.00 1 0.9657 TOTAL NONLINEAR 1.19 2 0.5504 TOTAL NONLINEAR + INTERACTION 1.24 3 0.7426 TOTAL 93.87 5 <.0001 > # fastbw(g) # cluster-adjusted backward elimination > plot(g, age=30:70, sex='female') # cluster-adjusted confidence bands > > # Get design effects based on inflation of the variances when compared > # with bootstrap estimates which ignore clustering > g2 <- robcov(f) > diag(g$var)/diag(g2$var) Intercept age age' sex=male age * sex=male 0.81276 0.78156 0.66574 0.68080 0.66941 age' * sex=male 0.61280 > > # Get design effects based on pooled tests of factors in model > anova(g2)[,1] / anova(g)[,1] age (Factor+Higher Order Factors) 0.81273 All Interactions 0.75763 Nonlinear (Factor+Higher Order Factors) 0.63801 sex (Factor+Higher Order Factors) 0.59953 All Interactions 0.75763 age * sex (Factor+Higher Order Factors) 0.75763 Nonlinear 0.61280 Nonlinear Interaction : f(A,B) vs. AB 0.61280 TOTAL NONLINEAR 0.63801 TOTAL NONLINEAR + INTERACTION 0.63941 TOTAL 0.80672 > > > > # A dataset contains one observation per subject, but there may be > # heteroscedasticity or other model misspecification. Obtain > # the robust sandwich estimator of the covariance matrix. > > # f <- ols(y ~ pol(age,3), x=TRUE, y=TRUE) > # f.adj <- robcov(f) > > > > cleanEx(); ..nameEx <- "sensuc" > > ### * sensuc > > flush(stderr()); flush(stdout()) > > ### Name: sensuc > ### Title: Sensitivity to Unmeasured Covariables > ### Aliases: sensuc plot.sensuc > ### Keywords: regression htest models survival > > ### ** Examples > > set.seed(17) > x <- sample(0:1, 500,TRUE) > y <- sample(0:1, 500,TRUE) > y[1:100] <- x[1:100] # induce an association between x and y > x2 <- rnorm(500) > > f <- lrm(y ~ x + x2, x=TRUE, y=TRUE) > > #Note: in absence of U odds ratio for x is exp(2nd coefficient) > > g <- sensuc(f, c(1,3)) Current odds ratio for x:u=1 3 > > # Note: If the generated sample of U was typical, the odds ratio for > # x dropped had U been known, where U had an odds ratio > # with x of 3 and an odds ratio with y of 3 > > plot(g) > > # Fit a Cox model and check sensitivity to an unmeasured confounder > > # f <- cph(Surv(d.time,death) ~ treatment + pol(age,2)*sex, x=TRUE, y=TRUE) > # sensuc(f, event=function(y) y[,2] & y[,1] < 365.25 ) > # Event = failed, with event time before 1 year > # Note: Analysis uses f$y which is a 2-column Surv object > > > > cleanEx(); ..nameEx <- "specs.Design" > > ### * specs.Design > > flush(stderr()); flush(stdout()) > > ### Name: specs.Design > ### Title: Design Specifications for Models > ### Aliases: specs.Design specs print.specs.Design > ### Keywords: models regression methods > > ### ** Examples > > set.seed(1) > blood.pressure <- rnorm(200, 120, 15) > dd <- datadist(blood.pressure) > options(datadist='dd') > L <- .03*(blood.pressure-120) > sick <- ifelse(runif(200) <= plogis(L), 1, 0) > f <- lrm(sick ~ rcs(blood.pressure,5)) > specs(f) # find out where 5 knots are placed lrm(formula = sick ~ rcs(blood.pressure, 5)) Assumption Parameters d.f. blood.pressure rcspline 97.926 111.75 119.26 128.42 143.99 4 > g <- glmD(sick ~ rcs(blood.pressure,5), family=binomial) > specs(g,long=TRUE) glmD(formula = sick ~ rcs(blood.pressure, 5), family = binomial) Assumption Parameters d.f. blood.pressure rcspline 97.926 111.75 119.26 128.42 143.99 4 blood.pressure Low:effect 110.793 Adjust to 119.259 High:effect 129.195 Low:prediction 97.926 High:prediction 143.994 Low 86.780 High 156.024 > options(datadist=NULL) > > > > cleanEx(); ..nameEx <- "summary.Design" > > ### * summary.Design > > flush(stderr()); flush(stdout()) > > ### Name: summary.Design > ### Title: Summary of Effects in Model > ### Aliases: summary.Design print.summary.Design latex.summary.Design > ### plot.summary.Design > ### Keywords: models regression htest survival hplot interface > > ### ** Examples > > n <- 1000 # define sample size > set.seed(17) # so can reproduce the results > age <- rnorm(n, 50, 10) > blood.pressure <- rnorm(n, 120, 15) > cholesterol <- rnorm(n, 200, 25) > sex <- factor(sample(c('female','male'), n,TRUE)) > label(age) <- 'Age' # label is in Hmisc > label(cholesterol) <- 'Total Cholesterol' > label(blood.pressure) <- 'Systolic Blood Pressure' > label(sex) <- 'Sex' > units(cholesterol) <- 'mg/dl' # uses units.default in Hmisc > units(blood.pressure) <- 'mmHg' > > # Specify population model for log odds that Y=1 > L <- .4*(sex=='male') + .045*(age-50) + + (log(cholesterol - 10)-5.2)*(-2*(sex=='female') + 2*(sex=='male')) > # Simulate binary y to have Prob(y=1) = 1/[1+exp(-L)] > y <- ifelse(runif(n) < plogis(L), 1, 0) > > ddist <- datadist(age, blood.pressure, cholesterol, sex) > options(datadist='ddist') > > fit <- lrm(y ~ blood.pressure + sex * (age + rcs(cholesterol,4))) > > s <- summary(fit) # Estimate effects using default ranges > # Gets odds ratio for age=3rd quartile > # compared to 1st quartile > ## Not run: > ##D latex(s) # Use LaTeX to print nice version > ##D latex(s, file="") # Just write LaTeX code to screen > ## End(Not run) > summary(fit, sex='male', age=60) # Specify ref. cell and adjustment val Effects Response : y Factor Low High Diff. Effect S.E. Lower 0.95 Upper 0.95 blood.pressure 109.580 130.580 20.991 -0.05 0.09 -0.23 0.13 Odds Ratio 109.580 130.580 20.991 0.95 NA 0.80 1.14 age 43.531 56.677 13.146 0.28 0.13 0.03 0.52 Odds Ratio 43.531 56.677 13.146 1.32 NA 1.03 1.69 cholesterol 183.730 216.530 32.796 0.24 0.26 -0.27 0.76 Odds Ratio 183.730 216.530 32.796 1.27 NA 0.76 2.13 sex - female:male 2.000 1.000 NA -0.26 0.22 -0.69 0.16 Odds Ratio 2.000 1.000 NA 0.77 NA 0.50 1.18 Adjusted to: sex=male age=60 cholesterol=200.48 > summary(fit, age=c(50,70)) # Estimate effect of increasing age from Effects Response : y Factor Low High Diff. Effect S.E. Lower 0.95 Upper 0.95 blood.pressure 109.58 130.58 20.991 -0.05 0.09 -0.23 0.13 Odds Ratio 109.58 130.58 20.991 0.95 NA 0.80 1.14 age 50.00 70.00 20.000 0.95 0.19 0.58 1.32 Odds Ratio 50.00 70.00 20.000 2.58 NA 1.79 3.73 cholesterol 183.73 216.53 32.796 0.12 0.24 -0.35 0.59 Odds Ratio 183.73 216.53 32.796 1.13 NA 0.70 1.80 sex - male:female 1.00 2.00 NA 0.52 0.17 0.18 0.86 Odds Ratio 1.00 2.00 NA 1.68 NA 1.20 2.37 Adjusted to: sex=female age=50.25 cholesterol=200.48 > # 50 to 70 > s <- summary(fit, age=c(50,60,70)) > # Increase age from 50 to 70, adjust to > # 60 when estimating effects of other factors > #Could have omitted datadist if specified 3 values for all non-categorical > #variables (1 value for categorical ones - adjustment level) > plot(s, log=TRUE, at=c(.1,.5,1,1.5,2,4,8)) > > options(datadist=NULL) > > > > cleanEx(); ..nameEx <- "summary.survfit" > > ### * summary.survfit > > flush(stderr()); flush(stdout()) > > ### Name: summary.survfit > ### Title: Design version of survival Package summary.survfit > ### Aliases: summary.survfit > ### Keywords: survival > > ### ** Examples > > > > > cleanEx(); ..nameEx <- "survest.cph" > > ### * survest.cph > > flush(stderr()); flush(stdout()) > > ### Name: survest.cph > ### Title: Cox Survival Estimates > ### Aliases: survest survest.cph > ### Keywords: models survival regression > > ### ** Examples > > # Simulate data from a population model in which the log hazard > # function is linear in age and there is no age x sex interaction > # Proportional hazards holds for both variables but we > # unnecessarily stratify on sex to see what happens > n <- 1000 > set.seed(731) > age <- 50 + 12*rnorm(n) > label(age) <- "Age" > sex <- factor(sample(c('Male','Female'), n, TRUE)) > cens <- 15*runif(n) > h <- .02*exp(.04*(age-50)+.8*(sex=='Female')) > dt <- -log(runif(n))/h > label(dt) <- 'Follow-up Time' > e <- ifelse(dt <= cens,1,0) > dt <- pmin(dt, cens) > units(dt) <- "Year" > dd <- datadist(age, sex) > options(datadist='dd') > Srv <- Surv(dt,e) > > f <- cph(Srv ~ age*strat(sex), x=TRUE, y=TRUE) #or surv=T > survest(f, expand.grid(age=c(20,40,60),sex=c("Male","Female")), + times=c(2,4,6), conf.int=.9) $time [1] 2 4 6 $surv 2 4 6 1 0.99611 0.98548 0.97154 2 0.98939 0.96073 0.92397 3 0.97121 0.89610 0.80530 4 0.97220 0.94848 0.92200 5 0.94072 0.89170 0.83862 6 0.87595 0.78003 0.68288 $std.err 2 4 6 1 0.339918 0.334439 0.327647 2 0.151366 0.145178 0.137663 3 0.034251 0.033284 0.031620 4 0.238897 0.231790 0.225017 5 0.101497 0.095004 0.089075 6 0.026229 0.024410 0.021593 $lower 2 4 6 1 0.99320 0.97476 0.95096 2 0.98638 0.94993 0.90390 3 0.96952 0.88993 0.79375 4 0.95864 0.92399 0.88575 5 0.92962 0.87234 0.81091 6 0.87011 0.76987 0.66911 $upper 2 4 6 1 0.99778 0.99166 0.98355 2 0.99174 0.96924 0.93999 3 0.97281 0.90195 0.81628 4 0.98135 0.96524 0.94709 5 0.95011 0.90828 0.86261 6 0.88154 0.78982 0.69621 $strata [1] 1 1 1 2 2 2 attr(,"levels") [1] "sex=Male" "sex=Female" > f <- update(f, surv=TRUE) > lp <- c(0, .5, 1) > f$strata # check strata names [1] "sex=Male" "sex=Female" > attr(lp,'strata') <- rep(1,3) # or rep('sex=Female',3) > survest(f, linear.predictors=lp, times=c(2,4,6)) Warning in survest.cph(f, linear.predictors = lp, times = c(2, 4, 6)) : S.E. and confidence intervals are approximate except at predictor means. Use cph(...,x=T,y=T) (and don't use linear.predictors=) for better estimates. $time [1] 2 4 6 $surv 2 4 6 [1,] 0.98714 0.95256 0.90853 [2,] 0.97889 0.92300 0.85372 [3,] 0.96544 0.87625 0.77047 $lower 2 4 6 [1,] 0.96890 0.90462 0.82628 [2,] 0.94925 0.84766 0.73007 [3,] 0.91771 0.76148 0.59528 $upper 2 4 6 [1,] 0.99471 0.97672 0.95292 [2,] 0.99130 0.96190 0.92357 [3,] 0.98570 0.93797 0.87715 $std.err 2 4 6 [1,] 0.45542 0.36941 0.35091 [2,] 0.45542 0.36941 0.35091 [3,] 0.45542 0.36941 0.35091 $requested.strata [1] 1 1 1 > options(datadist=NULL) > > > > cleanEx(); ..nameEx <- "survest.psm" > > ### * survest.psm > > flush(stderr()); flush(stdout()) > > ### Name: survest.psm > ### Title: Parametric Survival Estimates > ### Aliases: survest.psm print.survest.psm > ### Keywords: survival regression models > > ### ** Examples > > # Simulate data from a proportional hazards population model > n <- 1000 > set.seed(731) > age <- 50 + 12*rnorm(n) > label(age) <- "Age" > cens <- 15*runif(n) > h <- .02*exp(.04*(age-50)) > dt <- -log(runif(n))/h > label(dt) <- 'Follow-up Time' > e <- ifelse(dt <= cens,1,0) > dt <- pmin(dt, cens) > units(dt) <- "Year" > S <- Surv(dt,e) > > f <- psm(S ~ lsp(age,c(40,70))) > survest(f, data.frame(age=seq(20,80,by=5)), times=2) N: 1000 Events: 139 Time:2 Years LinearPredictor survival Lower Upper SE 1 6.2065 0.99663 0.97663 0.99952 0.99277 2 5.6785 0.99419 0.97554 0.99863 0.73817 3 5.1506 0.99000 0.97406 0.99617 0.49051 4 4.6226 0.98282 0.97103 0.98984 0.26965 5 4.0947 0.97056 0.95649 0.98013 0.20306 6 3.9544 0.96606 0.95288 0.97559 0.17083 7 3.8141 0.96087 0.94798 0.97062 0.14875 8 3.6739 0.95492 0.94093 0.96566 0.14164 9 3.5336 0.94808 0.93075 0.96117 0.15163 10 3.3934 0.94024 0.91670 0.95728 0.17583 11 3.2531 0.93126 0.89821 0.95385 0.20936 12 2.9313 0.90550 0.86250 0.93555 0.20351 13 2.6095 0.87078 0.78557 0.92373 0.28383 > > #Get predicted survival curve for 40 year old > survest(f, data.frame(age=40)) N: 1000 Events: 139 Time survival Lower Upper SE 1 0.00000 1.00000 1.00000 1.00000 Inf 2 0.07537 0.99899 0.99778 0.99954 0.40040 3 0.15074 0.99793 0.99587 0.99896 0.35318 4 0.22611 0.99685 0.99405 0.99834 0.32635 5 0.30148 0.99577 0.99228 0.99768 0.30778 6 0.37685 0.99468 0.99055 0.99700 0.29370 7 0.45222 0.99358 0.98886 0.99630 0.28243 8 0.52759 0.99248 0.98718 0.99559 0.27309 9 0.60295 0.99137 0.98553 0.99486 0.26515 10 0.67832 0.99026 0.98389 0.99412 0.25828 11 0.75369 0.98915 0.98227 0.99337 0.25225 12 0.82906 0.98803 0.98065 0.99261 0.24689 13 0.90443 0.98691 0.97905 0.99184 0.24209 14 0.97980 0.98579 0.97746 0.99106 0.23774 15 1.05517 0.98467 0.97587 0.99028 0.23379 16 1.13054 0.98355 0.97430 0.98949 0.23018 17 1.20591 0.98243 0.97273 0.98870 0.22686 18 1.28128 0.98131 0.97116 0.98790 0.22379 19 1.35665 0.98018 0.96961 0.98710 0.22094 20 1.43202 0.97906 0.96805 0.98630 0.21830 21 1.50739 0.97793 0.96650 0.98549 0.21584 22 1.58276 0.97680 0.96496 0.98467 0.21353 23 1.65813 0.97568 0.96342 0.98386 0.21137 24 1.73350 0.97455 0.96189 0.98304 0.20934 25 1.80886 0.97342 0.96036 0.98222 0.20743 26 1.88423 0.97229 0.95883 0.98140 0.20563 27 1.95960 0.97117 0.95730 0.98057 0.20393 28 2.03497 0.97004 0.95578 0.97975 0.20233 29 2.11034 0.96891 0.95426 0.97892 0.20081 30 2.18571 0.96778 0.95275 0.97809 0.19936 31 2.26108 0.96665 0.95124 0.97726 0.19799 32 2.33645 0.96553 0.94973 0.97642 0.19669 33 2.41182 0.96440 0.94822 0.97559 0.19546 34 2.48719 0.96327 0.94671 0.97475 0.19428 35 2.56256 0.96214 0.94521 0.97392 0.19315 36 2.63793 0.96102 0.94371 0.97308 0.19208 37 2.71330 0.95989 0.94221 0.97224 0.19106 38 2.78867 0.95877 0.94071 0.97140 0.19008 39 2.86404 0.95764 0.93922 0.97057 0.18915 40 2.93940 0.95651 0.93772 0.96973 0.18826 41 3.01477 0.95539 0.93623 0.96889 0.18741 42 3.09014 0.95426 0.93474 0.96805 0.18659 43 3.16551 0.95314 0.93325 0.96720 0.18581 44 3.24088 0.95202 0.93177 0.96636 0.18506 45 3.31625 0.95089 0.93028 0.96552 0.18434 46 3.39162 0.94977 0.92880 0.96468 0.18365 47 3.46699 0.94865 0.92731 0.96384 0.18299 48 3.54236 0.94753 0.92583 0.96300 0.18236 49 3.61773 0.94640 0.92436 0.96216 0.18175 50 3.69310 0.94528 0.92288 0.96132 0.18117 51 3.76847 0.94416 0.92140 0.96047 0.18061 52 3.84384 0.94304 0.91993 0.95963 0.18007 53 3.91921 0.94192 0.91845 0.95879 0.17955 54 3.99458 0.94081 0.91698 0.95795 0.17905 55 4.06994 0.93969 0.91551 0.95711 0.17858 56 4.14531 0.93857 0.91404 0.95627 0.17812 57 4.22068 0.93746 0.91257 0.95543 0.17768 58 4.29605 0.93634 0.91110 0.95459 0.17725 59 4.37142 0.93523 0.90964 0.95375 0.17685 60 4.44679 0.93411 0.90817 0.95291 0.17645 61 4.52216 0.93300 0.90671 0.95208 0.17608 62 4.59753 0.93189 0.90525 0.95124 0.17571 63 4.67290 0.93077 0.90378 0.95040 0.17537 64 4.74827 0.92966 0.90232 0.94956 0.17503 65 4.82364 0.92855 0.90087 0.94873 0.17471 66 4.89901 0.92744 0.89941 0.94789 0.17440 67 4.97438 0.92633 0.89795 0.94706 0.17410 68 5.04975 0.92523 0.89649 0.94622 0.17382 69 5.12512 0.92412 0.89504 0.94539 0.17354 70 5.20049 0.92301 0.89359 0.94455 0.17328 71 5.27585 0.92191 0.89213 0.94372 0.17303 72 5.35122 0.92080 0.89068 0.94289 0.17278 73 5.42659 0.91970 0.88923 0.94206 0.17255 74 5.50196 0.91859 0.88778 0.94123 0.17233 75 5.57733 0.91749 0.88634 0.94040 0.17211 76 5.65270 0.91639 0.88489 0.93957 0.17191 77 5.72807 0.91529 0.88344 0.93874 0.17171 78 5.80344 0.91419 0.88200 0.93791 0.17152 79 5.87881 0.91309 0.88056 0.93708 0.17134 80 5.95418 0.91199 0.87911 0.93625 0.17116 81 6.02955 0.91090 0.87767 0.93543 0.17100 82 6.10492 0.90980 0.87623 0.93460 0.17084 83 6.18029 0.90870 0.87479 0.93378 0.17068 84 6.25566 0.90761 0.87335 0.93296 0.17054 85 6.33103 0.90652 0.87192 0.93213 0.17040 86 6.40639 0.90542 0.87048 0.93131 0.17027 87 6.48176 0.90433 0.86905 0.93049 0.17014 88 6.55713 0.90324 0.86761 0.92967 0.17002 89 6.63250 0.90215 0.86618 0.92885 0.16990 90 6.70787 0.90106 0.86475 0.92803 0.16979 91 6.78324 0.89997 0.86332 0.92721 0.16969 92 6.85861 0.89889 0.86189 0.92640 0.16959 93 6.93398 0.89780 0.86046 0.92558 0.16950 94 7.00935 0.89672 0.85903 0.92477 0.16941 95 7.08472 0.89563 0.85761 0.92395 0.16933 96 7.16009 0.89455 0.85618 0.92314 0.16925 97 7.23546 0.89347 0.85476 0.92232 0.16917 98 7.31083 0.89238 0.85334 0.92151 0.16910 99 7.38620 0.89130 0.85191 0.92070 0.16904 100 7.46157 0.89022 0.85049 0.91989 0.16898 101 7.53693 0.88914 0.84908 0.91908 0.16892 102 7.61230 0.88807 0.84766 0.91827 0.16886 103 7.68767 0.88699 0.84624 0.91747 0.16882 104 7.76304 0.88591 0.84482 0.91666 0.16877 105 7.83841 0.88484 0.84341 0.91585 0.16873 106 7.91378 0.88377 0.84200 0.91505 0.16869 107 7.98915 0.88269 0.84058 0.91425 0.16865 108 8.06452 0.88162 0.83917 0.91344 0.16862 109 8.13989 0.88055 0.83776 0.91264 0.16859 110 8.21526 0.87948 0.83635 0.91184 0.16857 111 8.29063 0.87841 0.83495 0.91104 0.16855 112 8.36600 0.87734 0.83354 0.91024 0.16853 113 8.44137 0.87628 0.83213 0.90944 0.16851 114 8.51674 0.87521 0.83073 0.90864 0.16850 115 8.59211 0.87415 0.82933 0.90785 0.16849 116 8.66748 0.87308 0.82793 0.90705 0.16848 117 8.74284 0.87202 0.82653 0.90626 0.16848 118 8.81821 0.87096 0.82513 0.90546 0.16847 119 8.89358 0.86990 0.82373 0.90467 0.16847 120 8.96895 0.86884 0.82233 0.90388 0.16848 121 9.04432 0.86778 0.82094 0.90309 0.16848 122 9.11969 0.86672 0.81954 0.90230 0.16849 123 9.19506 0.86566 0.81815 0.90151 0.16850 124 9.27043 0.86461 0.81676 0.90072 0.16851 125 9.34580 0.86355 0.81536 0.89993 0.16852 126 9.42117 0.86250 0.81398 0.89915 0.16854 127 9.49654 0.86144 0.81259 0.89836 0.16856 128 9.57191 0.86039 0.81120 0.89758 0.16858 129 9.64728 0.85934 0.80981 0.89679 0.16860 130 9.72265 0.85829 0.80843 0.89601 0.16863 131 9.79802 0.85724 0.80705 0.89523 0.16865 132 9.87338 0.85620 0.80566 0.89445 0.16868 133 9.94875 0.85515 0.80428 0.89367 0.16871 134 10.02412 0.85410 0.80290 0.89289 0.16874 135 10.09949 0.85306 0.80153 0.89211 0.16877 136 10.17486 0.85201 0.80015 0.89133 0.16881 137 10.25023 0.85097 0.79877 0.89056 0.16884 138 10.32560 0.84993 0.79740 0.88978 0.16888 139 10.40097 0.84889 0.79603 0.88901 0.16892 140 10.47634 0.84785 0.79465 0.88823 0.16896 141 10.55171 0.84681 0.79328 0.88746 0.16900 142 10.62708 0.84577 0.79191 0.88669 0.16905 143 10.70245 0.84474 0.79055 0.88592 0.16909 144 10.77782 0.84370 0.78918 0.88515 0.16914 145 10.85319 0.84267 0.78782 0.88438 0.16919 146 10.92856 0.84163 0.78645 0.88361 0.16924 147 11.00393 0.84060 0.78509 0.88284 0.16929 148 11.07929 0.83957 0.78373 0.88208 0.16934 149 11.15466 0.83854 0.78237 0.88131 0.16939 150 11.23003 0.83751 0.78101 0.88055 0.16945 151 11.30540 0.83648 0.77965 0.87978 0.16950 152 11.38077 0.83546 0.77830 0.87902 0.16956 153 11.45614 0.83443 0.77694 0.87826 0.16962 154 11.53151 0.83341 0.77559 0.87750 0.16968 155 11.60688 0.83238 0.77424 0.87674 0.16973 156 11.68225 0.83136 0.77289 0.87598 0.16980 157 11.75762 0.83034 0.77154 0.87522 0.16986 158 11.83299 0.82932 0.77019 0.87447 0.16992 159 11.90836 0.82830 0.76884 0.87371 0.16998 160 11.98373 0.82728 0.76750 0.87295 0.17005 161 12.05910 0.82626 0.76616 0.87220 0.17011 162 12.13447 0.82524 0.76481 0.87145 0.17018 163 12.20983 0.82423 0.76347 0.87069 0.17025 164 12.28520 0.82321 0.76213 0.86994 0.17032 165 12.36057 0.82220 0.76080 0.86919 0.17038 166 12.43594 0.82119 0.75946 0.86844 0.17045 167 12.51131 0.82018 0.75812 0.86769 0.17053 168 12.58668 0.81917 0.75679 0.86695 0.17060 169 12.66205 0.81816 0.75546 0.86620 0.17067 170 12.73742 0.81715 0.75413 0.86545 0.17074 171 12.81279 0.81614 0.75280 0.86471 0.17082 172 12.88816 0.81513 0.75147 0.86396 0.17089 173 12.96353 0.81413 0.75014 0.86322 0.17096 174 13.03890 0.81313 0.74882 0.86248 0.17104 175 13.11427 0.81212 0.74749 0.86173 0.17112 176 13.18964 0.81112 0.74617 0.86099 0.17119 177 13.26501 0.81012 0.74485 0.86025 0.17127 178 13.34037 0.80912 0.74353 0.85951 0.17135 179 13.41574 0.80812 0.74221 0.85878 0.17143 180 13.49111 0.80712 0.74090 0.85804 0.17151 181 13.56648 0.80613 0.73958 0.85730 0.17159 182 13.64185 0.80513 0.73827 0.85657 0.17167 183 13.71722 0.80414 0.73696 0.85583 0.17175 184 13.79259 0.80314 0.73565 0.85510 0.17183 185 13.86796 0.80215 0.73434 0.85437 0.17191 186 13.94333 0.80116 0.73303 0.85363 0.17200 187 14.01870 0.80017 0.73172 0.85290 0.17208 188 14.09407 0.79918 0.73042 0.85217 0.17216 189 14.16944 0.79819 0.72911 0.85144 0.17225 190 14.24481 0.79720 0.72781 0.85071 0.17233 191 14.32018 0.79622 0.72651 0.84999 0.17242 192 14.39555 0.79523 0.72521 0.84926 0.17250 193 14.47092 0.79425 0.72392 0.84853 0.17259 194 14.54628 0.79327 0.72262 0.84781 0.17267 195 14.62165 0.79228 0.72132 0.84708 0.17276 196 14.69702 0.79130 0.72003 0.84636 0.17285 197 14.77239 0.79032 0.71874 0.84564 0.17294 198 14.84776 0.78934 0.71745 0.84492 0.17302 199 14.92313 0.78837 0.71616 0.84419 0.17311 200 14.99850 0.78739 0.71487 0.84347 0.17320 > > #Get hazard function for 40 year old > survest(f, data.frame(age=40), what="hazard")$surv #still called surv Warning in survest.psm(f, data.frame(age = 40), what = "hazard") : conf.int ignored for what="hazard" [1] 0.015080 0.013881 0.014192 0.014378 0.014511 0.014616 0.014701 0.014774 [9] 0.014838 0.014894 0.014944 0.014990 0.015032 0.015070 0.015106 0.015140 [17] 0.015171 0.015200 0.015228 0.015255 0.015280 0.015304 0.015327 0.015349 [25] 0.015369 0.015390 0.015409 0.015428 0.015446 0.015463 0.015480 0.015496 [33] 0.015512 0.015527 0.015542 0.015557 0.015571 0.015584 0.015598 0.015611 [41] 0.015623 0.015636 0.015648 0.015660 0.015671 0.015682 0.015693 0.015704 [49] 0.015715 0.015725 0.015735 0.015745 0.015755 0.015765 0.015774 0.015784 [57] 0.015793 0.015802 0.015810 0.015819 0.015828 0.015836 0.015844 0.015852 [65] 0.015860 0.015868 0.015876 0.015884 0.015891 0.015899 0.015906 0.015913 [73] 0.015920 0.015928 0.015934 0.015941 0.015948 0.015955 0.015961 0.015968 [81] 0.015974 0.015981 0.015987 0.015993 0.015999 0.016005 0.016011 0.016017 [89] 0.016023 0.016029 0.016035 0.016040 0.016046 0.016052 0.016057 0.016063 [97] 0.016068 0.016073 0.016079 0.016084 0.016089 0.016094 0.016099 0.016104 [105] 0.016109 0.016114 0.016119 0.016124 0.016129 0.016134 0.016138 0.016143 [113] 0.016148 0.016152 0.016157 0.016161 0.016166 0.016170 0.016175 0.016179 [121] 0.016183 0.016188 0.016192 0.016196 0.016200 0.016205 0.016209 0.016213 [129] 0.016217 0.016221 0.016225 0.016229 0.016233 0.016237 0.016241 0.016245 [137] 0.016248 0.016252 0.016256 0.016260 0.016264 0.016267 0.016271 0.016275 [145] 0.016278 0.016282 0.016285 0.016289 0.016293 0.016296 0.016300 0.016303 [153] 0.016306 0.016310 0.016313 0.016317 0.016320 0.016323 0.016327 0.016330 [161] 0.016333 0.016337 0.016340 0.016343 0.016346 0.016349 0.016353 0.016356 [169] 0.016359 0.016362 0.016365 0.016368 0.016371 0.016374 0.016377 0.016380 [177] 0.016383 0.016386 0.016389 0.016392 0.016395 0.016398 0.016401 0.016404 [185] 0.016407 0.016410 0.016412 0.016415 0.016418 0.016421 0.016424 0.016426 [193] 0.016429 0.016432 0.016435 0.016437 0.016440 0.016443 0.016445 0.016448 > > > > cleanEx(); ..nameEx <- "survfit" > > ### * survfit > > flush(stderr()); flush(stdout()) > > ### Name: survfit > ### Title: Modified Version of survival Package survfit Function > ### Aliases: survfit > ### Keywords: survival > > ### ** Examples > > ## Not run: > ##D #fit a Kaplan-Meier and print the results > ##D data(aml) > ##D survfit(Surv(time, status) ~ x, data=aml) > ## End(Not run) > > > > cleanEx(); ..nameEx <- "survplot" > > ### * survplot > > flush(stderr()); flush(stdout()) > > ### Name: survplot > ### Title: Plot Survival Curves and Hazard Functions > ### Aliases: survplot survplot.Design survplot.survfit > ### Keywords: survival hplot nonparametric models > > ### ** Examples > > # Simulate data from a population model in which the log hazard > # function is linear in age and there is no age x sex interaction > n <- 1000 > set.seed(731) > age <- 50 + 12*rnorm(n) > label(age) <- "Age" > sex <- factor(sample(c('male','female'), n, TRUE)) > cens <- 15*runif(n) > h <- .02*exp(.04*(age-50)+.8*(sex=='Female')) > dt <- -log(runif(n))/h > label(dt) <- 'Follow-up Time' > e <- ifelse(dt <= cens,1,0) > dt <- pmin(dt, cens) > units(dt) <- "Year" > dd <- datadist(age, sex) > options(datadist='dd') > S <- Surv(dt,e) > > #Plot stratified survival curves by sex, adj for quadratic age effect > # with age x sex interaction (2 d.f. interaction) > > f <- cph(S ~ pol(age,2)*strat(sex), surv=TRUE) > #or f <- psm(S ~ pol(age,2)*sex) > > survplot(f, sex=NA, n.risk=TRUE) #Adjust age to median > survplot(f, sex=NA, logt=TRUE, loglog=TRUE) #Check for Weibull-ness (linearity) > survplot(f, sex=c("male","female"), age=50) > #Would have worked without datadist > #or with an incomplete datadist > survplot(f, sex=NA, label.curves=list(keys=c(2,0), point.inc=2)) > #Identify curves with symbols > > survplot(f, sex=NA, label.curves=list(keys=c('m','f'))) > #Identify curves with single letters > > #Plots by quintiles of age, adjusting sex to male > options(digits=3) > survplot(f, age=quantile(age,seq(0,1,by=.2)), sex="male") > > #Plot survival Kaplan-Meier survival estimates for males > f <- survfit(S, subset=sex=="male") > survplot(f) > > #Plot survival for both sexes > f <- survfit(S ~ sex) > survplot(f) > #Check for log-normal and log-logistic fits > survplot(f, fun=qnorm, ylab="Inverse Normal Transform") > survplot(f, fun=function(y)log(y/(1-y)), ylab="Logit S(t)") > > options(datadist=NULL) > > > > cleanEx(); ..nameEx <- "val.prob" > > ### * val.prob > > flush(stderr()); flush(stdout()) > > ### Name: val.prob > ### Title: Validate Predicted Probabilities > ### Aliases: val.prob val.surv print.val.prob plot.val.prob plot.val.surv > ### Keywords: models regression htest smooth survival > > ### ** Examples > > # Fit logistic model on 100 observations simulated from the actual > # model given by Prob(Y=1 given X1, X2, X3) = 1/(1+exp[-(-1 + 2X1)]), > # where X1 is a random uniform [0,1] variable. Hence X2 and X3 are > # irrelevant. After fitting a linear additive model in X1, X2, > # and X3, the coefficients are used to predict Prob(Y=1) on a > # separate sample of 100 observations. > > set.seed(1) > n <- 200 > x1 <- runif(n) > x2 <- runif(n) > x3 <- runif(n) > logit <- 2*(x1-.5) > P <- 1/(1+exp(-logit)) > y <- ifelse(runif(n)<=P, 1, 0) > d <- data.frame(x1,x2,x3,y) > f <- lrm(y ~ x1 + x2 + x3, subset=1:100) > pred.logit <- predict(f, d[101:200,]) > phat <- 1/(1+exp(-pred.logit)) > val.prob(phat, y[101:200], m=20, cex=.5) # subgroups of 20 obs. Dxy C (ROC) R2 D D:Chi-sq D:p U U:Chi-sq 0.32053 0.66026 0.09499 0.06390 7.39010 0.00656 -0.01913 0.08715 U:p Q Brier Intercept Slope Emax Eavg 0.95736 0.08303 0.23190 0.05229 0.95652 0.01925 0.02584 > > # Validate predictions more stringently by stratifying on whether > # x1 is above or below the median > > v <- val.prob(phat, y[101:200], group=x1[101:200], g.group=2) > v n Pavg Obs ChiSq ChiSq2 Eavg Eavg/P90 Med OR C B [0.0131,0.526) 50 0.364 0.36 0.004 0.149 0.049 0.178 1.23 0.637 0.220 [0.5260,0.993] 50 0.590 0.62 0.195 1.902 0.056 0.207 1.08 0.514 0.244 Overall 100 0.477 0.49 0.073 0.088 0.026 0.065 1.09 0.660 0.232 B ChiSq B cal [0.0131,0.526) 0.073 0.211 [0.5260,0.993] 0.327 0.230 Overall 0.021 0.230 Quantiles of Predicted Probabilities 0.01 0.025 0.05 0.1 0.25 0.5 0.75 0.9 0.95 0.975 [0.0131,0.526) 0.213 0.222 0.242 0.270 0.301 0.364 0.407 0.471 0.516 0.531 [0.5260,0.993] 0.397 0.411 0.444 0.488 0.535 0.601 0.650 0.668 0.715 0.748 Overall 0.216 0.243 0.271 0.292 0.365 0.479 0.598 0.654 0.668 0.714 0.99 [0.0131,0.526) 0.545 [0.5260,0.993] 0.764 Overall 0.750 > plot(v) Group [0.0131,0.526) [0.5260,0.993] Overall n 50 50 100 Pavg 0.364 0.590 0.477 Obs 0.36 0.62 0.49 ChiSq 0.0 0.2 0.1 ChiSq2 0.1 1.9 0.1 Eavg 0.049 0.056 0.026 Eavg/P90 0.178 0.207 0.065 Med OR 1.23 1.08 1.09 C 0.637 0.514 0.660 B 0.220 0.244 0.232 B ChiSq 0.1 0.3 0.0 B cal 0.211 0.230 0.230 > plot(v, flag=function(stats) ifelse( + stats[,'ChiSq2'] > qchisq(.95,2) | + stats[,'B ChiSq'] > qchisq(.95,1), '*', ' ') ) Group [0.0131,0.526) [0.5260,0.993] Overall n 50 50 100 Pavg 0.364 0.590 0.477 Obs 0.36 0.62 0.49 ChiSq 0.0 0.2 0.1 ChiSq2 0.1 1.9 0.1 Eavg 0.049 0.056 0.026 Eavg/P90 0.178 0.207 0.065 Med OR 1.23 1.08 1.09 C 0.637 0.514 0.660 B 0.220 0.244 0.232 B ChiSq 0.1 0.3 0.0 B cal 0.211 0.230 0.230 > # Stars rows of statistics in plot corresponding to significant > # mis-calibration at the 0.05 level instead of the default, 0.01 > > plot(val.prob(phat, y[101:200], group=x1[101:200], g.group=2), + col=1:3) # 3 colors (1 for overall) Group [0.0131,0.526) [0.5260,0.993] Overall n 50 50 100 Pavg 0.364 0.590 0.477 Obs 0.36 0.62 0.49 ChiSq 0.0 0.2 0.1 ChiSq2 0.1 1.9 0.1 Eavg 0.049 0.056 0.026 Eavg/P90 0.178 0.207 0.065 Med OR 1.23 1.08 1.09 C 0.637 0.514 0.660 B 0.220 0.244 0.232 B ChiSq 0.1 0.3 0.0 B cal 0.211 0.230 0.230 > > # Weighted calibration curves > # plot(val.prob(pred, y, group=age, weights=freqs)) > > # Survival analysis examples > # Generate failure times from an exponential distribution > set.seed(123) # so can reproduce results > n <- 2000 > age <- 50 + 12*rnorm(n) > sex <- factor(sample(c('Male','Female'), n, rep=TRUE, prob=c(.6, .4))) > cens <- 15*runif(n) > h <- .02*exp(.04*(age-50)+.8*(sex=='Female')) > t <- -log(runif(n))/h > label(t) <- 'Time to Event' > ev <- ifelse(t <= cens, 1, 0) > t <- pmin(t, cens) > S <- Surv(t, ev) > > # First validate true model used to generate data > w <- val.surv(est.surv=exp(-h*t), S=S) > plot(w) > plot(w, group=sex) # stratify by sex > > # Now fit an exponential model and validate > # Note this is not really a validation as we're using the > # training data here > f <- psm(S ~ age + sex, dist='exponential', y=TRUE) > w <- val.surv(f) > plot(w, group=sex) > > # We know the censoring time on every subject, so we can > # compare the predicted Pr[T <= observed T | T>c, X] to > # its expectation 0.5 Pr[T <= C | X] where C = censoring time > # We plot a ratio that should equal one > w <- val.surv(f, censor=cens) > plot(w) Mean F(T|T > plot(w, group=age, g=3) # stratify by tertile of age Mean F(T|T > > > cleanEx(); ..nameEx <- "validate" > > ### * validate > > flush(stderr()); flush(stdout()) > > ### Name: validate > ### Title: Resampling Validation of a Fitted Model's Indexes of Fit > ### Aliases: validate > ### Keywords: models regression methods survival > > ### ** Examples > > # See examples for validate.cph, validate.lrm, validate.ols > # Example of validating a parametric survival model: > > n <- 1000 > set.seed(731) > age <- 50 + 12*rnorm(n) > label(age) <- "Age" > sex <- factor(sample(c('Male','Female'), n, TRUE)) > cens <- 15*runif(n) > h <- .02*exp(.04*(age-50)+.8*(sex=='Female')) > dt <- -log(runif(n))/h > e <- ifelse(dt <= cens,1,0) > dt <- pmin(dt, cens) > units(dt) <- "Year" > S <- Surv(dt,e) > > f <- psm(S ~ age*sex, x=TRUE, y=TRUE) # Weibull model > # Validate full model fit > validate(f, B=10) # usually B=150 Iteration: 1 2 3 4 5 6 7 8 9 10 index.orig training test optimism index.corrected n R2 0.09179 0.10325 0.088 0.01517 0.0766 10 Intercept 0.00000 0.00000 0.347 -0.34675 0.3467 10 Slope 1.00000 1.00000 0.901 0.09852 0.9015 10 D 0.04491 0.05034 0.043 0.00734 0.0376 10 U -0.00116 -0.00114 0.159 -0.16033 0.1592 10 Q 0.04607 0.05148 -0.116 0.16767 -0.1216 10 > > # Validate stepwise model with typical (not so good) stopping rule > # bw=TRUE does not preserve hierarchy of terms at present > validate(f, B=10, bw=TRUE, rule="p", sls=.1, type="individual") Backwards Step-down - Original Model Deleted Chi-Sq d.f. P Residual d.f. P AIC age * sex 0.98 1 0.3217 0.98 1 0.3217 -1.02 Approximate Estimates after Deleting Factors Coef S.E. Wald Z P (Intercept) 5.40012 0.37320 14.470 0.000e+00 age -0.04254 0.00598 -7.114 1.127e-12 sex=Male 0.58686 0.14850 3.952 7.750e-05 Factors in Final Model [1] age sex Iteration: 1 2 3 4 5 6 7 8 9 10 Factors Retained in Backwards Elimination age sex age * sex * * * * * * * * * * * * * * * * * * * * * * Frequencies of Numbers of Factors Retained 2 3 8 2 index.orig training test optimism index.corrected n R2 0.09068 0.09811 0.0894 0.00868 0.0820 10 Intercept 0.00000 0.00000 0.0759 -0.07594 0.0759 10 Slope 1.00000 1.00000 0.9799 0.02012 0.9799 10 D 0.04433 0.04811 0.0437 0.00441 0.0399 10 U -0.00116 -0.00115 0.1598 -0.16092 0.1598 10 Q 0.04549 0.04926 -0.1161 0.16533 -0.1198 10 > > > > cleanEx(); ..nameEx <- "validate.cph" > > ### * validate.cph > > flush(stderr()); flush(stdout()) > > ### Name: validate.cph > ### Title: Validation of a Fitted Cox or Parametric Survival Model's > ### Indexes of Fit > ### Aliases: validate.cph validate.psm > ### Keywords: models regression survival > > ### ** Examples > > n <- 1000 > set.seed(731) > age <- 50 + 12*rnorm(n) > label(age) <- "Age" > sex <- factor(sample(c('Male','Female'), n, TRUE)) > cens <- 15*runif(n) > h <- .02*exp(.04*(age-50)+.8*(sex=='Female')) > dt <- -log(runif(n))/h > e <- ifelse(dt <= cens,1,0) > dt <- pmin(dt, cens) > units(dt) <- "Year" > S <- Surv(dt,e) > > f <- cph(S ~ age*sex, x=TRUE, y=TRUE) > # Validate full model fit > validate(f, B=10) # normally B=150 Iteration: 1 2 3 4 5 6 7 8 9 10 index.orig training test optimism index.corrected n R2 0.081137 0.090315 0.077805 0.01251 0.06863 10 Slope 1.000000 1.000000 0.913037 0.08696 0.91304 10 D 0.031161 0.034184 0.029818 0.00437 0.02679 10 U -0.000821 -0.000799 0.000532 -0.00133 0.00051 10 Q 0.031982 0.034983 0.029286 0.00570 0.02628 10 > > # Validate a model with stratification. Dxy is the only > # discrimination measure for such models, by Dxy requires > # one to choose a single time at which to predict S(t|X) > f <- cph(S ~ rcs(age)*strat(sex), + x=TRUE, y=TRUE, surv=TRUE, time.inc=2) > validate(f, dxy=TRUE, u=2, B=10) # normally B=150 Iteration: 1 2 3 4 5 6 7 8 9 10 index.orig training test optimism index.corrected n Dxy 0.349137 0.297372 0.2790 0.0183 0.33079 10 R2 0.075921 0.085910 0.0468 0.0391 0.03682 10 Slope 1.000000 1.000000 0.2726 0.7274 0.27259 10 D 0.031721 0.036055 0.0115 0.0246 0.00713 10 U -0.000926 -0.000923 0.0982 -0.0992 0.09824 10 Q 0.032646 0.036978 -0.0868 0.1238 -0.09110 10 > # Note u=time.inc > > > > cleanEx(); ..nameEx <- "validate.lrm" > > ### * validate.lrm > > flush(stderr()); flush(stdout()) > > ### Name: validate.lrm > ### Title: Resampling Validation of a Logistic Model > ### Aliases: validate.lrm > ### Keywords: models regression > > ### ** Examples > > n <- 1000 # define sample size > age <- rnorm(n, 50, 10) > blood.pressure <- rnorm(n, 120, 15) > cholesterol <- rnorm(n, 200, 25) > sex <- factor(sample(c('female','male'), n,TRUE)) > > # Specify population model for log odds that Y=1 > L <- .4*(sex=='male') + .045*(age-50) + + (log(cholesterol - 10)-5.2)*(-2*(sex=='female') + 2*(sex=='male')) > # Simulate binary y to have Prob(y=1) = 1/[1+exp(-L)] > y <- ifelse(runif(n) < plogis(L), 1, 0) > > f <- lrm(y ~ sex*rcs(cholesterol)+pol(age,2)+blood.pressure, x=TRUE, y=TRUE) > #Validate full model fit > validate(f, B=10) # normally B=150 Iteration: 1 2 3 4 5 6 7 8 9 10 index.orig training test optimism index.corrected n Dxy 0.318 0.348 0.29910 0.04908 0.26903 10 R2 0.107 0.129 0.09506 0.03411 0.07279 10 Intercept 0.000 0.000 0.02723 -0.02723 0.02723 10 Slope 1.000 1.000 0.83796 0.16204 0.83796 10 Emax 0.000 0.000 0.04392 0.04392 0.04392 10 D 0.082 0.100 0.07247 0.02775 0.05425 10 U -0.002 -0.002 0.00267 -0.00467 0.00267 10 Q 0.084 0.102 0.06981 0.03242 0.05159 10 B 0.226 0.221 0.22922 -0.00787 0.23400 10 > validate(f, B=10, group=y) Iteration: 1 2 3 4 5 6 7 8 9 10 index.orig training test optimism index.corrected n Dxy 0.318 0.3459 0.300509 0.04536 0.272757 10 R2 0.107 0.1230 0.095017 0.02800 0.078901 10 Intercept 0.000 0.0000 0.028682 -0.02868 0.028682 10 Slope 1.000 1.0000 0.865645 0.13436 0.865645 10 Emax 0.000 0.0000 0.036888 0.03689 0.036888 10 D 0.082 0.0953 0.072442 0.02285 0.059164 10 U -0.002 -0.0020 0.000459 -0.00246 0.000459 10 Q 0.084 0.0973 0.071983 0.02530 0.058705 10 B 0.226 0.2230 0.228748 -0.00575 0.231877 10 > # two-sample validation: make resamples have same numbers of > # successes and failures as original sample > > #Validate stepwise model with typical (not so good) stopping rule > validate(f, B=10, bw=TRUE, rule="p", sls=.1, type="individual") Backwards Step-down - Original Model Deleted Chi-Sq d.f. P Residual d.f. P AIC blood.pressure 0.27 1 0.6027 0.27 1 0.6027 -1.73 sex 1.61 1 0.2051 1.88 2 0.3913 -2.12 cholesterol 5.38 4 0.2506 7.26 6 0.2978 -4.74 Approximate Estimates after Deleting Factors Coef S.E. Wald Z P Intercept -0.5577350 1.1672495 -0.47782 0.6328 age -0.0177280 0.0481276 -0.36835 0.7126 age^2 0.0005364 0.0004883 1.09831 0.2721 sex=male * cholesterol 0.0017124 0.0012274 1.39512 0.1630 sex=male * cholesterol' 0.0178045 0.0422760 0.42115 0.6736 sex=male * cholesterol'' 0.0221005 0.3890092 0.05681 0.9547 sex=male * cholesterol''' -0.0968406 0.7308578 -0.13250 0.8946 Factors in Final Model [1] age sex * cholesterol Iteration: 1 2 3 4 5 6 7 8 9 10 Factors Retained in Backwards Elimination sex cholesterol age blood.pressure sex * cholesterol * * * * * * * * * * * * * * * * * * * * * * * * * * * * * Frequencies of Numbers of Factors Retained 2 3 4 4 3 3 index.orig training test optimism index.corrected n Dxy 0.304 0.3471 0.29832 0.04881 0.25567 10 R2 0.097 0.1256 0.09454 0.03107 0.06586 10 Intercept 0.000 0.0000 0.01535 -0.01535 0.01535 10 Slope 1.000 1.0000 0.86423 0.13577 0.86423 10 Emax 0.000 0.0000 0.03506 0.03506 0.03506 10 D 0.074 0.0973 0.07206 0.02529 0.04869 10 U -0.002 -0.0020 0.00206 -0.00406 0.00206 10 Q 0.076 0.0993 0.07000 0.02935 0.04662 10 B 0.228 0.2221 0.22923 -0.00715 0.23508 10 > > ## Not run: > ##D #Fit a continuation ratio model and validate it for the predicted > ##D #probability that y=0 > ##D u <- cr.setup(y) > ##D Y <- u$y > ##D cohort <- u$cohort > ##D attach(mydataframe[u$subs,]) > ##D f <- lrm(Y ~ cohort+rcs(age,4)*sex, penalty=list(interaction=2)) > ##D validate(f, cluster=u$subs, subset=cohort=='all') > ##D #see predab.resample for cluster and subset > ## End(Not run) > > > > cleanEx(); ..nameEx <- "validate.ols" > > ### * validate.ols > > flush(stderr()); flush(stdout()) > > ### Name: validate.ols > ### Title: Validation of an Ordinary Linear Model > ### Aliases: validate.ols > ### Keywords: models regression > > ### ** Examples > > set.seed(1) > x1 <- runif(200) > x2 <- sample(0:3, 200, TRUE) > x3 <- rnorm(200) > distance <- (x1 + x2/3 + rnorm(200))^2 > > f <- ols(sqrt(distance) ~ rcs(x1,4) + scored(x2) + x3, x=TRUE, y=TRUE) > > #Validate full model fit (from all observations) but for x1 < .75 > validate(f, B=20, subset=x1 < .75) # normally B=150 Iteration: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 index.orig training test optimism index.corrected n R-square 0.0939 0.122 0.0592 0.0625 0.0313 20 MSE 0.6047 0.593 0.6278 -0.0348 0.6395 20 Intercept 0.0863 0.065 0.2358 -0.1708 0.2571 20 Slope 0.9016 0.919 0.7651 0.1535 0.7481 20 > > #Validate stepwise model with typical (not so good) stopping rule > validate(f, B=20, bw=TRUE, rule="p", sls=.1, type="individual") Backwards Step-down - Original Model Deleted Chi-Sq d.f. P Residual d.f. P AIC R2 x3 0.99 1 0.3204 0.99 1 0.3204 -1.01 0.128 Approximate Estimates after Deleting Factors Coef S.E. Wald Z P Intercept 0.94530 0.2548 3.7100 0.0002072 x1 -0.65558 1.0361 -0.6327 0.5269109 x1' 3.11974 2.9290 1.0651 0.2868256 x1'' -8.11867 9.5505 -0.8501 0.3952801 x2 0.05955 0.1540 0.3868 0.6989377 x2=2 0.27098 0.2766 0.9797 0.3272444 x2=3 0.36983 0.4068 0.9091 0.3632803 Factors in Final Model [1] x1 x2 Iteration: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 Factors Retained in Backwards Elimination x1 x2 x3 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * Frequencies of Numbers of Factors Retained 1 2 3 3 10 7 index.orig training test optimism index.corrected n R-square 0.128 0.150 0.0818 0.0679 0.0602 20 MSE 0.668 0.631 0.7034 -0.0727 0.7407 20 Intercept 0.000 0.000 0.1973 -0.1973 0.1973 20 Slope 1.000 1.000 0.8627 0.1373 0.8627 20 > > > > cleanEx(); ..nameEx <- "validate.tree" > > ### * validate.tree > > flush(stderr()); flush(stdout()) > > ### Name: validate.tree > ### Title: Dxy and Mean Squared Error by Cross-validating a Tree Sequence > ### Aliases: validate.tree validate.rpart print.validate.tree > ### plot.validate.tree > ### Keywords: models tree category > > ### ** Examples > > ## Not run: > ##D n <- 100 > ##D set.seed(1) > ##D x1 <- runif(n) > ##D x2 <- runif(n) > ##D x3 <- runif(n) > ##D y <- 1*(x1+x2+rnorm(n) > 1) > ##D table(y) > ##D library(rpart) > ##D f <- rpart(y ~ x1 + x2 + x3, model=TRUE) > ##D v <- validate(f) > ##D v # note the poor validation > ##D par(mfrow=c(1,2)) > ##D plot(v, legendloc=c(.2,.5)) > ##D par(mfrow=c(1,1)) > ## End(Not run) > > > > cleanEx(); ..nameEx <- "vif" > > ### * vif > > flush(stderr()); flush(stdout()) > > ### Name: vif > ### Title: Variance Inflation Factors > ### Aliases: vif > ### Keywords: models regression > > ### ** Examples > > set.seed(1) > x1 <- rnorm(100) > x2 <- x1+.1*rnorm(100) > y <- sample(0:1, 100, TRUE) > f <- lrm(y ~ x1 + x2) > vif(f) x1 x2 89 89 > > > > cleanEx(); ..nameEx <- "which.influence" > > ### * which.influence > > flush(stderr()); flush(stdout()) > > ### Name: which.influence > ### Title: Which Observations are Influential > ### Aliases: which.influence show.influence > ### Keywords: models regression survival > > ### ** Examples > > #print observations in data frame that are influential, > #separately for each factor in the model > x1 <- 1:20 > x2 <- abs(x1-10) > x3 <- factor(rep(0:2,length.out=20)) > y <- c(rep(0:1,8),1,1,1,1) > f <- lrm(y ~ rcs(x1,3) + x2 + x3, x=TRUE,y=TRUE) > w <- which.influence(f, .55) > nam <- names(w) > d <- data.frame(x1,x2,x3,y) > for(i in 1:length(nam)) { + print(paste("Influential observations for effect of ",nam[i]),quote=FALSE) + print(d[w[[i]],]) + } [1] Influential observations for effect of Intercept x1 x2 x3 y 10 10 0 0 1 [1] Influential observations for effect of x1 x1 x2 x3 y 10 10 0 0 1 13 13 3 0 0 [1] Influential observations for effect of x2 x1 x2 x3 y 10 10 0 0 1 13 13 3 0 0 [1] Influential observations for effect of x3 x1 x2 x3 y 6 6 4 2 1 11 11 1 1 0 > > show.influence(w, d) # better way to show results Count x1 x2 x3 6 1 6 4 *2 10 3 *10 *0 0 11 1 11 1 *1 13 2 *13 *3 0 > > > > ### *