R : Copyright 2005, The R Foundation for Statistical Computing Version 2.1.1 (2005-06-20), ISBN 3-900051-07-0 R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for a HTML browser interface to help. Type 'q()' to quit R. > ### *
> ### > attach(NULL, name = "CheckExEnv") > assign(".CheckExEnv", as.environment(2), pos = length(search())) # base > ## add some hooks to label plot pages for base and grid graphics > setHook("plot.new", ".newplot.hook") > setHook("persp", ".newplot.hook") > setHook("grid.newpage", ".gridplot.hook") > > assign("cleanEx", + function(env = .GlobalEnv) { + rm(list = ls(envir = env, all.names = TRUE), envir = env) + RNGkind("default", "default") + set.seed(1) + options(warn = 1) + delayedAssign("T", stop("T used instead of TRUE"), + assign.env = .CheckExEnv) + delayedAssign("F", stop("F used instead of FALSE"), + assign.env = .CheckExEnv) + sch <- search() + newitems <- sch[! sch %in% .oldSearch] + for(item in rev(newitems)) + eval(substitute(detach(item), list(item=item))) + missitems <- .oldSearch[! .oldSearch %in% sch] + if(length(missitems)) + warning("items ", paste(missitems, collapse=", "), + " have been removed from the search path") + }, + env = .CheckExEnv) > assign("..nameEx", "__{must remake R-ex/*.R}__", env = .CheckExEnv) # for now > assign("ptime", proc.time(), env = .CheckExEnv) > grDevices::postscript("Hmisc-Examples.ps") > assign("par.postscript", graphics::par(no.readonly = TRUE), env = .CheckExEnv) > options(contrasts = c(unordered = "contr.treatment", ordered = "contr.poly")) > options(warn = 1) > library('Hmisc') Hmisc library by Frank E Harrell Jr Type library(help='Hmisc'), ?Overview, or ?Hmisc.Overview') to see overall documentation. NOTE:Hmisc no longer redefines [.factor to drop unused levels when subsetting. To get the old behavior of Hmisc type dropUnusedLevels(). Attaching package: 'Hmisc' The following object(s) are masked from package:stats : ecdf > > assign(".oldSearch", search(), env = .CheckExEnv) > assign(".oldNS", loadedNamespaces(), env = .CheckExEnv) > cleanEx(); ..nameEx <- ".in." > > ### * .in. > > flush(stderr()); flush(stdout()) > > ### Name: .in. > ### Title: Find Matching (or Non-Matching) Elements > ### Aliases: \%nin\% > ### Keywords: manip character > > ### ** Examples > > c('a','b','c') [1] "a" "b" "c" > > > > cleanEx(); ..nameEx <- "Cs" > > ### * Cs > > flush(stderr()); flush(stdout()) > > ### Name: Cs > ### Title: Character strings from unquoted names > ### Aliases: Cs > ### Keywords: character utilities > > ### ** Examples > > Cs(a,cat,dog) [1] "a" "cat" "dog" > # subset.data.frame <- dataframe[,Cs(age,sex,race,bloodpressure,height)] > > > > cleanEx(); ..nameEx <- "Lag" > > ### * Lag > > flush(stderr()); flush(stdout()) > > ### Name: Lag > ### Title: Lag a Numeric, Character, or Factor Vector > ### Aliases: Lag > ### Keywords: manip > > ### ** Examples > > Lag(1:5,2) [1] NA NA 1 2 3 > Lag(letters[1:4],2) [1] "" "" "a" "b" > Lag(factor(letters[1:4]),2) [1] "" "" "a" "b" > # Find which observations are the first for a given subject > id <- c('a','a','b','b','b','c') > id != Lag(id) [1] TRUE FALSE TRUE FALSE FALSE TRUE > !duplicated(id) [1] TRUE FALSE TRUE FALSE FALSE TRUE > > > > cleanEx(); ..nameEx <- "Misc" > > ### * Misc > > flush(stderr()); flush(stdout()) > > ### Name: Misc > ### Title: Miscellaneous Functions > ### Aliases: confbar james.stein km.quick lm.fit.qr.bare matxv nomiss > ### outerText sepUnitsTrans trap.rule trellis.strip.blank under.unix .R. > ### .SV4. unPaste whichClosest whichClosePW xless > ### Keywords: programming utilities > > ### ** Examples > > trap.rule(1:100,1:100) [1] 4999.5 > > unPaste(c('a;b or c','ab;d','qr;s'), ';') [[1]] [1] "a" "ab" "qr" [[2]] [1] "b or c" "d" "s" > > sepUnitsTrans(c('3 days','4 months','2 years','7')) [1] 3.00 121.75 730.50 7.00 attr(,"units") [1] "day" > > set.seed(1) > whichClosest(1:100, 3:5) [1] 3 4 5 > whichClosest(1:100, rep(3,20)) [1] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 > > whichClosePW(1:100, rep(3,20)) [1] 3 3 5 8 2 8 9 6 6 1 2 2 6 4 7 5 6 11 4 7 > whichClosePW(1:100, rep(3,20), f=.05) [1] 4 2 3 2 2 3 1 3 4 3 3 3 3 2 4 3 4 2 4 3 > whichClosePW(1:100, rep(3,20), f=1e-10) [1] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 > > > > cleanEx(); ..nameEx <- "Save" > > ### * Save > > flush(stderr()); flush(stdout()) > > ### Name: Save > ### Title: Faciliate Use of save and load to Remote Directories > ### Aliases: Save Load > ### Keywords: data file utilities > > ### ** Examples > > ## Not run: > ##D d <- data.frame(x=1:3, y=11:13) > ##D options(LoadPath='../data/rda') > ##D Save(d) # creates ../data/rda/d.rda > ##D Load(d) # reads ../data/rda/d.rda > ## End(Not run) > > > > cleanEx(); ..nameEx <- "abs.error.pred" > > ### * abs.error.pred > > flush(stderr()); flush(stdout()) > > ### Name: abs.error.pred > ### Title: Indexes of Absolute Prediction Error for Linear Models > ### Aliases: abs.error.pred print.abs.error.pred > ### Keywords: robust regression models > > ### ** Examples > > set.seed(1) # so can regenerate results > x1 <- rnorm(100) > x2 <- rnorm(100) > y <- exp(x1+x2+rnorm(100)) > f <- lm(log(y) ~ x1 + poly(x2,3), y=TRUE) > abs.error.pred(lp=exp(fitted(f)), y=y) Mean/Median |Differences| Mean Median |Yi hat - median(Y hat)| 1.983447 0.8651185 |Yi hat - Yi| 2.184563 0.5436367 |Yi - median(Y)| 2.976277 1.0091661 Ratios of Mean/Median |Differences| Mean Median |Yi hat - median(Y hat)|/|Yi - median(Y)| 0.666419 0.8572607 |Yi hat - Yi|/|Yi - median(Y)| 0.733992 0.5386989 > rm(x1,x2,y,f) > > > > cleanEx(); ..nameEx <- "all.is.numeric" > > ### * all.is.numeric > > flush(stderr()); flush(stdout()) > > ### Name: all.is.numeric > ### Title: Check if All Elements in Character Vector are Numeric > ### Aliases: all.is.numeric > ### Keywords: character > > ### ** Examples > > all.is.numeric(c('1','1.2','3')) [1] TRUE > all.is.numeric(c('1','1.2','3a')) [1] FALSE > all.is.numeric(c('1','1.2','3'),'vector') [1] 1.0 1.2 3.0 > all.is.numeric(c('1','1.2','3a'),'vector') [1] "1" "1.2" "3a" > > > > cleanEx(); ..nameEx <- "approxExtrap" > > ### * approxExtrap > > flush(stderr()); flush(stdout()) > > ### Name: approxExtrap > ### Title: Linear Extrapolation > ### Aliases: approxExtrap > ### Keywords: arith dplot > > ### ** Examples > > approxExtrap(1:3,1:3,xout=c(0,4)) $x [1] 0 4 $y [1] 0 4 > > > > cleanEx(); ..nameEx <- "aregImpute" > > ### * aregImpute > > flush(stderr()); flush(stdout()) > > ### Name: aregImpute > ### Title: Multiple Imputation using Additive Regression, Bootstrapping, > ### and Predictive Mean Matching > ### Aliases: aregImpute print.aregImpute plot.aregImpute > ### Keywords: smooth regression multivariate methods models > > ### ** Examples > > # Multiple imputation and estimation of variances and covariances of > # regression coefficient estimates accounting for imputation > # Example 1: large sample size, much missing data, no overlap in > # NAs across variables > set.seed(3) > x1 <- factor(sample(c('a','b','c'),1000,TRUE)) > x2 <- (x1=='b') + 3*(x1=='c') + rnorm(1000,0,2) > x3 <- rnorm(1000) > y <- x2 + 1*(x1=='c') + .2*x3 + rnorm(1000,0,2) > orig.x1 <- x1[1:250] > orig.x2 <- x2[251:350] > x1[1:250] <- NA > x2[251:350] <- NA > d <- data.frame(x1,x2,x3,y) > > # Use 100 imputations to better check against individual true values > f <- aregImpute(~y + x1 + x2 + x3, n.impute=100, data=d) Loading required package: acepack Iteration:1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 > f Multiple Imputation using Bootstrap and PMM aregImpute(formula = ~y + x1 + x2 + x3, data = d, n.impute = 100) Method: ace n= 1000 p= 4 Imputations: 100 Number of NAs: y x1 x2 x3 0 250 100 0 Categorical: x1 R-squares for Predicting Non-Missing Values for Each Variable Using Last Imputations of Predictors x1 x2 0.332 0.678 > par(mfrow=c(2,1)) > plot(f) > modecat <- function(u) { + tab <- table(u) + as.numeric(names(tab)[tab==max(tab)][1]) + } > table(orig.x1,apply(f$imputed$x1, 1, modecat)) orig.x1 1 2 3 a 53 25 12 b 31 26 21 c 12 13 57 > par(mfrow=c(1,1)) > plot(orig.x2, apply(f$imputed$x2, 1, mean)) > fmi <- fit.mult.impute(y ~ x1 + x2 + x3, lm, f, + data=d) Warning in fit.mult.impute(y ~ x1 + x2 + x3, lm, f, data = d) : Not using a Design fitting function; summary(fit) will use standard errors, t, P from last imputation only. Use Varcov(fit) to get the correct covariance matrix, sqrt(diag(Varcov(fit))) to get s.e. Variance Inflation Factors Due to Imputation: (Intercept) x1b x1c x2 x3 1.26 1.35 1.39 1.14 1.08 Rate of Missing Information: (Intercept) x1b x1c x2 x3 0.21 0.26 0.28 0.12 0.08 d.f. for t-distribution for Tests of Single Coefficients: (Intercept) x1b x1c x2 x3 2317.72 1468.62 1248.68 6648.66 16315.33 > sqrt(diag(Varcov(fmi))) (Intercept) x1b x1c x2 x3 0.11294254 0.16173469 0.18968650 0.03316643 0.06456491 > fcc <- lm(y ~ x1 + x2 + x3) > summary(fcc) # SEs are larger than from mult. imputation Call: lm(formula = y ~ x1 + x2 + x3) Residuals: Min 1Q Median 3Q Max -6.8047 -1.4500 0.1035 1.4456 6.5216 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) -0.30886 0.14131 -2.186 0.0292 * x1b 0.20485 0.20329 1.008 0.3140 x1c 1.21049 0.23866 5.072 5.15e-07 *** x2 1.02848 0.04079 25.214 < 2e-16 *** x3 0.23251 0.08176 2.844 0.0046 ** --- Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 Residual standard error: 2.06 on 645 degrees of freedom Multiple R-Squared: 0.6526, Adjusted R-squared: 0.6504 F-statistic: 302.9 on 4 and 645 DF, p-value: < 2.2e-16 > > # Example 2: Very discriminating imputation models, > # x1 and x2 have some NAs on the same rows, smaller n > set.seed(5) > x1 <- factor(sample(c('a','b','c'),100,TRUE)) > x2 <- (x1=='b') + 3*(x1=='c') + rnorm(100,0,.4) > x3 <- rnorm(100) > y <- x2 + 1*(x1=='c') + .2*x3 + rnorm(100,0,.4) > orig.x1 <- x1[1:20] > orig.x2 <- x2[18:23] > x1[1:20] <- NA > x2[18:23] <- NA > #x2[21:25] <- NA > d <- data.frame(x1,x2,x3,y) > n <- naclus(d) > plot(n); naplot(n) # Show patterns of NAs > # 100 imputations to study them; normally use 5 or 10 > f <- aregImpute(~y + x1 + x2 + x3, n.impute=100, defaultLinear=TRUE, data=d) Iteration:1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 > par(mfrow=c(2,3)) > plot(f, diagnostics=TRUE, maxn=2) 22> # Note: diagnostics=TRUE makes graphs similar to those made by: > # r <- range(f$imputed$x2, orig.x2) > # for(i in 1:6) { # use 1:2 to mimic maxn=2 > # plot(1:100, f$imputed$x2[i,], ylim=r, > # ylab=paste("Imputations for Obs.",i)) > # abline(h=orig.x2[i],lty=2) > # } > > table(orig.x1,apply(f$imputed$x1, 1, modecat)) orig.x1 1 2 3 a 8 0 0 b 2 3 0 c 0 0 7 > par(mfrow=c(1,1)) > plot(orig.x2, apply(f$imputed$x2, 1, mean)) > > fmi <- fit.mult.impute(y ~ x1 + x2, lm, f, + data=d) Warning in fit.mult.impute(y ~ x1 + x2, lm, f, data = d) : Not using a Design fitting function; summary(fit) will use standard errors, t, P from last imputation only. Use Varcov(fit) to get the correct covariance matrix, sqrt(diag(Varcov(fit))) to get s.e. Variance Inflation Factors Due to Imputation: (Intercept) x1b x1c x2 1.06 1.14 1.12 1.10 Rate of Missing Information: (Intercept) x1b x1c x2 0.06 0.12 0.11 0.09 d.f. for t-distribution for Tests of Single Coefficients: (Intercept) x1b x1c x2 30313.48 6446.40 8718.67 12375.88 > sqrt(diag(Varcov(fmi))) (Intercept) x1b x1c x2 0.07555815 0.16834057 0.37539993 0.11840490 > fcc <- lm(y ~ x1 + x2) > summary(fcc) # SEs are larger than from mult. imputation Call: lm(formula = y ~ x1 + x2) Residuals: Min 1Q Median 3Q Max -1.01525 -0.30693 -0.02795 0.31447 0.98023 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) -0.001573 0.090484 -0.017 0.986 x1b -0.116310 0.188707 -0.616 0.540 x1c 0.557255 0.418248 1.332 0.187 x2 1.136660 0.130960 8.679 7.44e-13 *** --- Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 Residual standard error: 0.4613 on 73 degrees of freedom Multiple R-Squared: 0.9417, Adjusted R-squared: 0.9393 F-statistic: 393.2 on 3 and 73 DF, p-value: < 2.2e-16 > > # Study relationship between smoothing parameter for weighting function > # (multiplier of mean absolute distance of transformed predicted > # values, used in tricube weighting function) and standard deviation > # of multiple imputations. SDs are computed from average variances > # across subjects. match="closest" same as match="weighted" with > # small value of fweighted. > # This example also shows problems with predicted mean > # matching almost always giving the same imputed values when there is > # only one predictor (regression coefficients change over multiple > # imputations but predicted values are virtually 1-1 functions of each > # other) > > set.seed(23) > x <- runif(200) > y <- x + runif(200, -.05, .05) > r <- resid(lsfit(x,y)) > rmse <- sqrt(sum(r^2)/(200-2)) # sqrt of residual MSE > > y[1:20] <- NA > d <- data.frame(x,y) > f <- aregImpute(~ x + y, n.impute=10, match='closest', data=d) Iteration:1 2 3 4 5 6 7 8 9 10 11 12 13 > # As an aside here is how to create a completed dataset for imputation > # number 3 as fit.mult.impute would do automatically. In this degenerate > # case changing 3 to 1-2,4-10 will not alter the results. > completed <- d > imputed <- impute.transcan(f, imputation=3, data=d, list.out=TRUE, + pr=FALSE, check=FALSE) > completed[names(imputed)] <- imputed > completed x y 1 0.576603660 0.614083867 2 0.223072856 0.244435900 3 0.331896589 0.355249503 4 0.710724552 0.675477281 5 0.819448956 0.828212875 6 0.423720561 0.430384882 7 0.963544549 0.985824002 8 0.978130409 0.949088005 9 0.840521879 0.793234407 10 0.996611237 1.030297530 11 0.865959035 0.854976946 12 0.701421698 0.724431317 13 0.390473063 0.407829640 14 0.314769702 0.298663630 15 0.845947289 0.862478397 16 0.139278505 0.092535506 17 0.518120574 0.503713940 18 0.593550828 0.606602406 19 0.942461697 0.915459991 20 0.628019636 0.636226690 21 0.586397816 0.582927987 22 0.274740962 0.297672227 23 0.147656962 0.188676702 24 0.801410323 0.762234175 25 0.386409824 0.431897320 26 0.820450694 0.828212875 27 0.684937274 0.709136579 28 0.883389266 0.857999147 29 0.111920776 0.126209338 30 0.778834007 0.757872403 31 0.621010916 0.608900942 32 0.274151464 0.295342440 33 0.301469650 0.321126873 34 0.349043808 0.322076852 35 0.329131072 0.297507272 36 0.809503932 0.837448109 37 0.274482146 0.282238285 38 0.939094889 0.906092141 39 0.902267053 0.896015924 40 0.177053087 0.136052631 41 0.798251278 0.759169578 42 0.082798084 0.104000046 43 0.285691755 0.304049820 44 0.661315982 0.664954104 45 0.784774271 0.783219526 46 0.776576873 0.763818475 47 0.121821508 0.113077018 48 0.372319733 0.378427168 49 0.297771757 0.294985491 50 0.752706347 0.713377656 51 0.499577621 0.462479927 52 0.817803537 0.815089077 53 0.304128739 0.352660531 54 0.824903802 0.862868631 55 0.893041672 0.894176899 56 0.019715635 -0.005362821 57 0.254439932 0.293923185 58 0.951945325 0.956585835 59 0.566167130 0.576283573 60 0.897581946 0.881421648 61 0.290061444 0.281850628 62 0.401326056 0.439458938 63 0.539105884 0.564832214 64 0.364780191 0.403813290 65 0.125759875 0.151696734 66 0.394052590 0.434963086 67 0.105663054 0.072751435 68 0.954039707 0.979653804 69 0.419914707 0.427518889 70 0.992387490 1.030297530 71 0.570442292 0.614083867 72 0.783489245 0.761345930 73 0.904503524 0.935024409 74 0.508582721 0.463503668 75 0.089646446 0.123578649 76 0.240984488 0.195176180 77 0.805403850 0.756070757 78 0.223567783 0.244435900 79 0.592070762 0.606602406 80 0.929119053 0.892531613 81 0.851353750 0.867547269 82 0.100520349 0.144351160 83 0.908092219 0.940668079 84 0.837831206 0.799869315 85 0.827914989 0.838095527 86 0.100423461 0.116199780 87 0.162158453 0.189971496 88 0.940109388 0.915459991 89 0.112017832 0.093027680 90 0.262136229 0.213534806 91 0.051634719 0.074034012 92 0.188658662 0.226836499 93 0.536776609 0.527419140 94 0.568323547 0.573837652 95 0.491177148 0.478349009 96 0.330573055 0.367013375 97 0.696191472 0.665870260 98 0.529709426 0.487193232 99 0.409219319 0.452456648 100 0.134911791 0.170629707 101 0.896438480 0.936244695 102 0.490295642 0.526777931 103 0.405771617 0.358623043 104 0.141742043 0.092535506 105 0.022868485 -0.006460331 106 0.767151259 0.751735119 107 0.758372191 0.754843059 108 0.771456471 0.731139583 109 0.888922727 0.930469784 110 0.323340758 0.275927507 111 0.197405802 0.195648269 112 0.563112823 0.573294373 113 0.286510551 0.253949951 114 0.529166437 0.556768047 115 0.609199925 0.580387599 116 0.690374682 0.694505035 117 0.834155862 0.784952252 118 0.362381048 0.378180868 119 0.829611428 0.791900851 120 0.627461779 0.636226690 121 0.345700932 0.320148759 122 0.837500706 0.838609380 123 0.499124135 0.507737458 124 0.289511152 0.270072867 125 0.356837630 0.320339231 126 0.697845101 0.715260385 127 0.515911654 0.503713940 128 0.342042893 0.323246523 129 0.966432105 0.992783078 130 0.067530551 0.021497022 131 0.134513722 0.112133784 132 0.299903116 0.330007097 133 0.165903823 0.180650843 134 0.633417881 0.678764132 135 0.091216465 0.065878727 136 0.086471487 0.129504903 137 0.772470206 0.738699421 138 0.865551194 0.854976946 139 0.112492566 0.096936396 140 0.267977125 0.280900385 141 0.766082917 0.778843033 142 0.461314346 0.486985223 143 0.333625180 0.295756718 144 0.549243461 0.576806521 145 0.447264475 0.463587647 146 0.026166226 0.019120439 147 0.848845804 0.862478397 148 0.322629672 0.365093910 149 0.502781160 0.516439485 150 0.153306864 0.175781188 151 0.667641181 0.703650560 152 0.650329886 0.635760862 153 0.901017198 0.903510358 154 0.841869273 0.793234407 155 0.007797369 -0.019722492 156 0.698544700 0.727672794 157 0.084930372 0.069309632 158 0.738808032 0.723635388 159 0.315827052 0.298663630 160 0.838312863 0.838293426 161 0.504393668 0.470275900 162 0.426998171 0.430384882 163 0.735241703 0.743247940 164 0.352213026 0.353404709 165 0.975926195 0.984943640 166 0.858195372 0.851668930 167 0.677800911 0.686023409 168 0.853173245 0.809621200 169 0.700561504 0.678142475 170 0.404468291 0.376668011 171 0.020173278 0.040183645 172 0.598623241 0.640481825 173 0.290873878 0.292620222 174 0.722049999 0.729298126 175 0.281201833 0.322926910 176 0.037379872 0.086038396 177 0.701300299 0.724431317 178 0.272952283 0.244242792 179 0.444427293 0.471935587 180 0.751319571 0.728704793 181 0.938940207 0.896740411 182 0.910099400 0.877257970 183 0.879749776 0.868990143 184 0.953670035 0.934100299 185 0.738894403 0.777725926 186 0.736350211 0.723689340 187 0.480651564 0.455805035 188 0.156731554 0.205557866 189 0.332019051 0.291771770 190 0.347893705 0.328075861 191 0.388851710 0.407829640 192 0.332005674 0.355249503 193 0.909833391 0.923537606 194 0.189029912 0.224147156 195 0.852367321 0.885531305 196 0.962318775 0.985824002 197 0.773136182 0.785892289 198 0.979254503 0.949088005 199 0.718367246 0.675477281 200 0.662174258 0.662126423 > sd <- sqrt(mean(apply(f$imputed$y, 1, var))) > > ss <- c(0, .01, .02, seq(.05, 1, length=20)) > sds <- ss; sds[1] <- sd > > for(i in 2:length(ss)) { + f <- aregImpute(~ x + y, n.impute=10, fweighted=ss[i]) + sds[i] <- sqrt(mean(apply(f$imputed$y, 1, var))) + } Iteration:1 2 3 4 5 6 7 8 9 10 11 12 13 Iteration:1 2 3 4 5 6 7 8 9 10 11 12 13 Iteration:1 2 3 4 5 6 7 8 9 10 11 12 13 Iteration:1 2 3 4 5 6 7 8 9 10 11 12 13 Iteration:1 2 3 4 5 6 7 8 9 10 11 12 13 Iteration:1 2 3 4 5 6 7 8 9 10 11 12 13 Iteration:1 2 3 4 5 6 7 8 9 10 11 12 13 Iteration:1 2 3 4 5 6 7 8 9 10 11 12 13 Iteration:1 2 3 4 5 6 7 8 9 10 11 12 13 Iteration:1 2 3 4 5 6 7 8 9 10 11 12 13 Iteration:1 2 3 4 5 6 7 8 9 10 11 12 13 Iteration:1 2 3 4 5 6 7 8 9 10 11 12 13 Iteration:1 2 3 4 5 6 7 8 9 10 11 12 13 Iteration:1 2 3 4 5 6 7 8 9 10 11 12 13 Iteration:1 2 3 4 5 6 7 8 9 10 11 12 13 Iteration:1 2 3 4 5 6 7 8 9 10 11 12 13 Iteration:1 2 3 4 5 6 7 8 9 10 11 12 13 Iteration:1 2 3 4 5 6 7 8 9 10 11 12 13 Iteration:1 2 3 4 5 6 7 8 9 10 11 12 13 Iteration:1 2 3 4 5 6 7 8 9 10 11 12 13 Iteration:1 2 3 4 5 6 7 8 9 10 11 12 13 Iteration:1 2 3 4 5 6 7 8 9 10 11 12 13 > > plot(ss, sds, xlab='Smoothing Parameter', ylab='SD of Imputed Values', + type='b') > abline(v=.2, lty=2) # default value of fweighted > abline(h=rmse, lty=2) # root MSE of residuals from linear regression > > ## Not run: > ##D # Do a similar experiment for the Titanic dataset > ##D getHdata(titanic3) > ##D h <- lm(age ~ sex + pclass + survived, data=titanic3) > ##D rmse <- summary(h)$sigma > ##D set.seed(21) > ##D f <- aregImpute(~ age + sex + pclass + survived, n.impute=10, > ##D data=titanic3, match='closest') > ##D sd <- sqrt(mean(apply(f$imputed$age, 1, var))) > ##D > ##D ss <- c(0, .01, .02, seq(.05, 1, length=20)) > ##D sds <- ss; sds[1] <- sd > ##D > ##D for(i in 2:length(ss)) { > ##D f <- aregImpute(~ age + sex + pclass + survived, data=titanic3, > ##D n.impute=10, fweighted=ss[i]) > ##D sds[i] <- sqrt(mean(apply(f$imputed$age, 1, var))) > ##D } > ##D > ##D plot(ss, sds, xlab='Smoothing Parameter', ylab='SD of Imputed Values', > ##D type='b') > ##D abline(v=.2, lty=2) # default value of fweighted > ##D abline(h=rmse, lty=2) # root MSE of residuals from linear regression > ## End(Not run) > > > > graphics::par(get("par.postscript", env = .CheckExEnv)) > cleanEx(); ..nameEx <- "binconf" > > ### * binconf > > flush(stderr()); flush(stdout()) > > ### Name: binconf > ### Title: Confidence Intervals for Binomial Probabilities > ### Aliases: binconf > ### Keywords: category htest > > ### ** Examples > > binconf(0:10,10,include.x=TRUE,include.n=TRUE) X N PointEst Lower Upper 0 10 0.0 0.000000000 0.2775328 1 10 0.1 0.005129329 0.4041500 2 10 0.2 0.056682151 0.5098375 3 10 0.3 0.107791267 0.6032219 4 10 0.4 0.168180330 0.6873262 5 10 0.5 0.236593091 0.7634069 6 10 0.6 0.312673770 0.8318197 7 10 0.7 0.396778147 0.8922087 8 10 0.8 0.490162472 0.9433178 9 10 0.9 0.595849973 0.9948707 10 10 1.0 0.722467200 1.0000000 > binconf(46,50,method="all") PointEst Lower Upper Exact 0.92 0.8076572 0.9777720 Wilson 0.92 0.8116175 0.9684505 Asymptotic 0.92 0.8448027 0.9951973 > > > > cleanEx(); ..nameEx <- "bootkm" > > ### * bootkm > > flush(stderr()); flush(stdout()) > > ### Name: bootkm > ### Title: Bootstrap Kaplan-Meier Estimates > ### Aliases: bootkm > ### Keywords: survival nonparametric > > ### ** Examples > > # Compute 0.95 nonparametric confidence interval for the difference in > # median survival time between females and males (two-sample problem) > set.seed(1) > library(survival) Loading required package: splines Attaching package: 'survival' The following object(s) are masked from package:Hmisc : untangle.specials > S <- Surv(runif(200)) # no censoring > sex <- c(rep('female',100),rep('male',100)) > med.female <- bootkm(S[sex=='female',], B=100) # normally B=500 10 20 30 40 50 60 70 80 90 100 > med.male <- bootkm(S[sex=='male',], B=100) 10 20 30 40 50 60 70 80 90 100 > describe(med.female-med.male) med.female - med.male n missing unique Mean .05 .10 .25 .50 100 0 90 -0.01788 -0.11908 -0.09729 -0.07220 -0.02804 .75 .90 .95 0.02392 0.08634 0.10556 lowest : -0.1913 -0.1648 -0.1202 -0.1195 -0.1195 highest: 0.1258 0.1448 0.1524 0.1728 0.1960 > quantile(med.female-med.male, c(.025,.975), na.rm=TRUE) 2.5% 97.5% -0.1198982 0.1487534 > # na.rm needed because some bootstrap estimates of median survival > # time may be missing when a bootstrap sample did not include the > # longer survival times > > > > cleanEx(); ..nameEx <- "bpower" > > ### * bpower > > flush(stderr()); flush(stdout()) > > ### Name: bpower > ### Title: Power and Sample Size for Two-Sample Binomial Test > ### Aliases: bpower bsamsize ballocation bpower.sim > ### Keywords: htest category > > ### ** Examples > > bpower(.1, odds.ratio=.9, n=1000, alpha=c(.01,.05)) Power1 Power2 0.01953539 0.07780432 > bpower.sim(.1, odds.ratio=.9, n=1000) Power Lower Upper 0.079200 0.073907 0.084493 > bsamsize(.1, .05, power=.95) n1 n2 718.238 718.238 > ballocation(.1, .5, n=100) fraction.group1.min.var.diff fraction.group1.min.var.ratio 0.3750000 0.7500000 fraction.group1.min.var.logodds fraction.group1.max.power 0.6250000 0.4745255 > > # Plot power vs. n for various odds ratios (base prob.=.1) > n <- seq(10, 1000, by=10) > OR <- seq(.2,.9,by=.1) > plot(0, 0, xlim=range(n), ylim=c(0,1), xlab="n", ylab="Power", type="n") > for(or in OR) { + lines(n, bpower(.1, odds.ratio=or, n=n)) + text(350, bpower(.1, odds.ratio=or, n=350)-.02, format(or)) + } > > # Another way to plot the same curves, but letting labcurve do the > # work, including labeling each curve at points of maximum separation > pow <- lapply(OR, function(or,n)list(x=n,y=bpower(p1=.1,odds.ratio=or,n=n)), + n=n) > names(pow) <- format(OR) > labcurve(pow, pl=TRUE, xlab='n', ylab='Power') > > # Contour graph for various probabilities of outcome in the control > # group, fixing the odds ratio at .8 ([p2/(1-p2) / p1/(1-p1)] = .8) > # n is varied also > p1 <- seq(.01,.99,by=.01) > n <- seq(100,5000,by=250) > pow <- outer(p1, n, function(p1,n) bpower(p1, n=n, odds.ratio=.8)) > # This forms a length(p1)*length(n) matrix of power estimates > contour(p1, n, pow) > > > > cleanEx(); ..nameEx <- "bpplot" > > ### * bpplot > > flush(stderr()); flush(stdout()) > > ### Name: bpplot > ### Title: Box-percentile plots > ### Aliases: bpplot > ### Keywords: nonparametric hplot > > ### ** Examples > > set.seed(1) > x1 <- rnorm(500) > x2 <- runif(500, -2, 2) > x3 <- abs(rnorm(500))-2 > bpplot(x1, x2, x3) > g <- sample(1:2, 500, replace=TRUE) > bpplot(split(x2, g), name=c('Group 1','Group 2')) > rm(x1,x2,x3,g) > > > > cleanEx(); ..nameEx <- "bystats" > > ### * bystats > > flush(stderr()); flush(stdout()) > > ### Name: bystats > ### Title: Statistics by Categories > ### Aliases: bystats print.bystats latex.bystats bystats2 print.bystats2 > ### latex.bystats2 > ### Keywords: category > > ### ** Examples > > ## Not run: > ##D bystats(sex==2, county, city) > ##D bystats(death, race) > ##D bystats(death, cut2(age,g=5), race) > ##D bystats(cholesterol, cut2(age,g=4), sex, fun=median) > ##D bystats(cholesterol, sex, fun=quantile) > ##D bystats(cholesterol, sex, fun=function(x)c(Mean=mean(x),Median=median(x))) > ##D latex(bystats(death,race,nmiss=FALSE,subset=sex=="female"), digits=2) > ##D f <- function(y) c(Hazard=sum(y[,2])/sum(y[,1])) > ##D # f() gets the hazard estimate for right-censored data from exponential dist. > ##D bystats(cbind(d.time, death), race, sex, fun=f) > ##D bystats(cbind(pressure, cholesterol), age.decile, > ##D fun=function(y) c(Median.pressure =median(y[,1]), > ##D Median.cholesterol=median(y[,2]))) > ##D y <- cbind(pressure, cholesterol) > ##D bystats(y, age.decile, > ##D fun=function(y) apply(y, 2, median)) # same result as last one > ##D bystats(y, age.decile, fun=function(y) apply(y, 2, quantile, c(.25,.75))) > ##D # The last one computes separately the 0.25 and 0.75 quantiles of 2 vars. > ##D latex(bystats2(death, race, sex, fun=table)) > ## End(Not run) > > > > cleanEx(); ..nameEx <- "ciapower" > > ### * ciapower > > flush(stderr()); flush(stdout()) > > ### Name: ciapower > ### Title: Power of Interaction Test for Exponential Survival > ### Aliases: ciapower > ### Keywords: survival htest > > ### ** Examples > > # Find the power of a race x treatment test. 25% of patients will > # be non-white and the total sample size is 14000. > # Accrual is for 1.5 years and minimum follow-up is 5y. > # Reduction in 5-year mortality is 15% for whites, 0% or -5% for > # non-whites. 5-year mortality for control subjects if assumed to > # be 0.18 for whites, 0.23 for non-whites. > n <- 14000 > for(nonwhite.reduction in c(0,-5)) { + cat("\n\n\n% Reduction in 5-year mortality for non-whites:", + nonwhite.reduction, "\n\n") + pow <- ciapower(5, .75*n, .25*n, .18, .23, 15, nonwhite.reduction, + 1.5, 5) + cat("\n\nPower:",format(pow),"\n") + } % Reduction in 5-year mortality for non-whites: 0 Accrual duration: 1.5 y Minimum follow-up: 5 y Sample size Stratum 1: 10500 Stratum 2: 3500 Alpha= 0.05 5-year Mortalities Control Intervention Stratum 1 0.18 0.153 Stratum 2 0.23 0.230 Hazard Rates Control Intervention Stratum 1 0.03969019 0.03321092 Stratum 2 0.05227295 0.05227295 Probabilities of an Event During Study Control Intervention Stratum 1 0.2039322 0.1737512 Stratum 2 0.2594139 0.2594139 Expected Number of Events Control Intervention Stratum 1 1070.6 912.2 Stratum 2 454.0 454.0 Ratio of hazard ratios: 0.8367538 Standard deviation of log ratio of ratios: 0.08022351 Power: 0.6032173 % Reduction in 5-year mortality for non-whites: -5 Accrual duration: 1.5 y Minimum follow-up: 5 y Sample size Stratum 1: 10500 Stratum 2: 3500 Alpha= 0.05 5-year Mortalities Control Intervention Stratum 1 0.18 0.1530 Stratum 2 0.23 0.2415 Hazard Rates Control Intervention Stratum 1 0.03969019 0.03321092 Stratum 2 0.05227295 0.05528250 Probabilities of an Event During Study Control Intervention Stratum 1 0.2039322 0.1737512 Stratum 2 0.2594139 0.2720973 Expected Number of Events Control Intervention Stratum 1 1070.6 912.2 Stratum 2 454.0 476.2 Ratio of hazard ratios: 0.7912015 Standard deviation of log ratio of ratios: 0.07958098 Power: 0.8371925 > > > > cleanEx(); ..nameEx <- "contents" > > ### * contents > > flush(stderr()); flush(stdout()) > > ### Name: contents > ### Title: Metadata for a Data Frame > ### Aliases: contents contents.data.frame print.contents.data.frame > ### html.contents.data.frame contents.list print.contents.list > ### Keywords: data interface > > ### ** Examples > > set.seed(1) > dfr <- data.frame(x=rnorm(400),y=sample(c('male','female'),400,TRUE)) > contents(dfr) Data frame:dfr 400 observations and 2 variables Maximum # NAs:0 Levels Storage x double y 2 integer +--------+-----------+ |Variable|Levels | +--------+-----------+ | y |female,male| +--------+-----------+ > k <- contents(dfr) > print(k, sort='names', prlevels=FALSE) Data frame:dfr 400 observations and 2 variables Maximum # NAs:0 Levels Storage x double y 2 integer > ## Not run: > ##D html(k) > ##D html(contents(dfr)) # same result > ##D w <- html(k, file='my.html') # create my.html, don't display > ## End(Not run) > > > > cleanEx(); ..nameEx <- "cpower" > > ### * cpower > > flush(stderr()); flush(stdout()) > > ### Name: cpower > ### Title: Power of Cox/log-rank Two-Sample Test > ### Aliases: cpower > ### Keywords: htest survival > > ### ** Examples > > #In this example, 4 plots are drawn on one page, one plot for each > #combination of noncompliance percentage. Within a plot, the > #5-year mortality % in the control group is on the x-axis, and > #separate curves are drawn for several % reductions in mortality > #with the intervention. The accrual period is 1.5y, with all > #patients followed at least 5y and some 6.5y. > > par(mfrow=c(2,2),oma=c(3,0,3,0)) > > morts <- seq(10,25,length=50) > red <- c(10,15,20,25) > > for(noncomp in c(0,10,15,-1)) { + if(noncomp>=0) nc.i <- nc.c <- noncomp else {nc.i <- 25; nc.c <- 15} + z <- paste("Drop-in ",nc.c,"%, Non-adherence ",nc.i,"%",sep="") + plot(0,0,xlim=range(morts),ylim=c(0,1), + xlab="5-year Mortality in Control Patients (%)", + ylab="Power",type="n") + title(z) + cat(z,"\n") + lty <- 0 + for(r in red) { + lty <- lty+1 + power <- morts + i <- 0 + for(m in morts) { + i <- i+1 + power[i] <- cpower(5, 14000, m/100, r, 1.5, 5, nc.c, nc.i, pr=FALSE) + } + lines(morts, power, lty=lty) + } + if(noncomp==0)legend(18,.55,rev(paste(red,"% reduction",sep="")), + lty=4:1,bty="n") + } Drop-in 0%, Non-adherence 0% Drop-in 10%, Non-adherence 10% Drop-in 15%, Non-adherence 15% Drop-in 15%, Non-adherence 25% > mtitle("Power vs Non-Adherence for Main Comparison", + ll="alpha=.05, 2-tailed, Total N=14000",cex.l=.8) > # > # Point sample size requirement vs. mortality reduction > # Root finder (uniroot()) assumes needed sample size is between > # 1000 and 40000 > # > nc.i <- 25; nc.c <- 15; mort <- .18 > red <- seq(10,25,by=.25) > samsiz <- red > > i <- 0 > for(r in red) { + i <- i+1 + samsiz[i] <- uniroot(function(x) cpower(5, x, mort, r, 1.5, 5, + nc.c, nc.i, pr=FALSE) - .8, + c(1000,40000))$root + } > > samsiz <- samsiz/1000 > par(mfrow=c(1,1)) > plot(red, samsiz, xlab='% Reduction in 5-Year Mortality', + ylab='Total Sample Size (Thousands)', type='n') > lines(red, samsiz, lwd=2) > title('Sample Size for Power=0.80\nDrop-in 15%, Non-adherence 25%') > title(sub='alpha=0.05, 2-tailed', adj=0) > > > > graphics::par(get("par.postscript", env = .CheckExEnv)) > cleanEx(); ..nameEx <- "cut2" > > ### * cut2 > > flush(stderr()); flush(stdout()) > > ### Name: cut2 > ### Title: Cut a Numeric Variable into Intervals > ### Aliases: cut2 > ### Keywords: category nonparametric > > ### ** Examples > > set.seed(1) > x <- runif(1000, 0, 100) > z <- cut2(x, c(10,20,30)) > table(z) z [ 0.131, 10.000) [ 10.000, 20.000) [ 20.000, 30.000) [ 30.000, 99.993] 96 104 93 707 > table(cut2(x, g=10)) # quantile groups [ 0.131, 10.5) [10.505, 20.2) [20.168, 31.2) [31.204, 39.8) [39.784, 48.4) 100 100 100 100 100 [48.435, 59.6) [59.645, 70.7) [70.666, 79.7) [79.731, 91.0) [91.037,100.0] 100 100 100 100 100 > table(cut2(x, m=50)) # group x into intevals with at least 50 obs. [ 0.131, 5.52) [ 5.516, 10.51) [10.505, 15.48) [15.483, 20.17) [20.168, 25.82) 50 50 50 50 50 [25.817, 31.20) [31.204, 35.32) [35.320, 39.78) [39.784, 44.15) [44.146, 48.43) 50 50 50 50 50 [48.435, 52.78) [52.778, 59.64) [59.645, 65.09) [65.087, 70.67) [70.666, 74.76) 50 50 50 50 50 [74.764, 79.73) [79.731, 85.51) [85.508, 91.04) [91.037, 95.37) [95.373, 99.99] 50 50 50 50 50 > > > > cleanEx(); ..nameEx <- "data.frame.create.modify.check" > > ### * data.frame.create.modify.check > > flush(stderr()); flush(stdout()) > > ### Name: data.frame.create.modify.check > ### Title: Tips for Creating, Modifying, and Checking Data Frames > ### Aliases: data.frame.create.modify.check > ### Keywords: data manip programming interface htest > > ### ** Examples > > ## Not run: > ##D # First, we do steps that create or manipulate the data > ##D # frame in its entirety. These are done with .Data > ##D # in search position one (the S-Plus default at the > ##D # start of the session). > ##D # > ##D # ----------------------------------------------------------------------- > ##D # Step 1: Create initial draft of data frame > ##D # > ##D # We usually begin by importing a dataset from > ##D # # another application. ASCII files may be imported > ##D # using the scan and read.table functions. SAS > ##D # datasets may be imported using the Hmisc sas.get > ##D # function (which will carry more attributes from > ##D # SAS than using File ... Import) from the GUI > ##D # menus. But for most applications (especially > ##D # Excel), File ... Import will suffice. If using > ##D # the GUI, it is often best to provide variable > ##D # names during the import process, using the Options > ##D # tab, rather than renaming all fields later Of > ##D # course, if the data to be imported already have > ##D # field names (e.g., in Excel), let S-Plus use those > ##D # automatically. If using S-Plus 4.x on Windows/NT, > ##D # you can use a command to execute File ... Import, > ##D # e.g.: > ##D > ##D import.data(FileName = "/windows/temp/fev.asc", > ##D FileType = "ASCII", DataFrame = "FEV") > ##D > ##D # Here we name the new data frame FEV rather than > ##D # fev, because we wanted to distinguish a variable > ##D # in the data frame named fev from the data frame > ##D # name. For S-Plus 6.x the command will look > ##D # instead like the following: > ##D > ##D FEV <- importData("/tmp/fev.asc") > ##D > ##D > ##D > ##D # ----------------------------------------------------------------------- > ##D # Step 2: Clean up data frame / make it be more > ##D # efficiently stored > ##D # > ##D # Unless using sas.get to import your dataset > ##D # (sas.get already stores data efficiently), it is > ##D # usually a good idea to run the data frame through > ##D # the Hmisc cleanup.import function to change > ##D # numeric variables that are always whole numbers to > ##D # be stored as integers, the remaining numerics to > ##D # single precision, strange values from Excel to > ##D # NAs, and character variables that always contain > ##D # legal numeric values to numeric variables. > ##D # cleanup.import typically halves the size of the > ##D # data frame. If you do not specify any parameters > ##D # to cleanup.import, the function assumes that no > ##D # numeric variable needs more than 7 significant > ##D # digits of precision, so all non-integer-valued > ##D # variables will be converted to single precision. > ##D > ##D FEV <- cleanup.import(FEV) > ##D > ##D > ##D > ##D # ----------------------------------------------------------------------- > ##D # Step 3: Make global changes to the data frame > ##D # > ##D # A data frame has attributes that are "external" to > ##D # its variables. There are the vector of its > ##D # variable names ("names" attribute), the > ##D # observation identifiers ("row.names"), and the > ##D # "class" (whose value is "data.frame"). The > ##D # "names" attribute is the one most commonly in need > ##D # of modification. If we had wanted to change all > ##D # the variable names to lower case, we could have > ##D # specified lowernames=TRUE to the cleanup.import > ##D # invocation above, or type > ##D > ##D names(FEV) <- casefold(names(FEV)) > ##D > ##D # The upData function can also be used to change > ##D # variable names in two ways (see below). > ##D # To change names in a non-systematic way we use > ##D # other options. Under Windows/NT the most > ##D # straigtforward approach is to change the names > ##D # interactively. Click on the data frame in the > ##D # left panel of the Object Browser, then in the > ##D # right pane click twice (slowly) on a variable. > ##D # Use the left arrow and other keys to edit the > ##D # name. Click outside that name field to commit the > ##D # change. You can also rename columns while in a > ##D # Data Sheet. To instead use programming commands > ##D # to change names, use something like: > ##D > ##D names(FEV)[6] <- 'smoke' # assumes you know the positions! > ##D names(FEV)[names(FEV)=='smoking'] <- 'smoke' > ##D names(FEV) <- edit(names(FEV)) > ##D > ##D # The last example is useful if you are changing > ##D # many names. But none of the interactive > ##D # approaches such as edit() are handy if you will be > ##D # re-importing the dataset after it is updated in > ##D # its original application. This problem can be > ##D # addressed by saving the new names in a permanent > ##D # vector in .Data: > ##D > ##D new.names <- names(FEV) > ##D > ##D # Then if the data are re-imported, you can type > ##D > ##D names(FEV) <- new.names > ##D > ##D # to rename the variables. > ##D > ##D > ##D > ##D # ----------------------------------------------------------------------- > ##D # Step 4: Delete unneeded variables > ##D # > ##D # To delete some of the variables, you can > ##D # right-click on variable names in the Object > ##D # Browser's right pane, then select Delete. You can > ##D # also set variables to have NULL values, which > ##D # causes the system to delete them. We don't need > ##D # to delete any variables from FEV but suppose we > ##D # did need to delete some from mydframe. > ##D > ##D mydframe$x1 <- NULL > ##D mydframe$x2 <- NULL > ##D mydframe[c('age','sex')] <- NULL # delete 2 variables > ##D mydframe[Cs(age,sex)] <- NULL # same thing > ##D > ##D # The last example uses the Hmisc short-cut quoting > ##D # function Cs. See also the drop parameter to upData. > ##D > ##D > ##D > ##D # ----------------------------------------------------------------------- > ##D # Step 5: Make changes to individual variables > ##D # within the data frame > ##D # > ##D # After importing data, the resulting variables are > ##D # seldom self - documenting, so we commonly need to > ##D # change or enhance attributes of individual > ##D # variables within the data frame. > ##D # > ##D # If you are only changing a few variables, it is > ##D # efficient to change them directly without > ##D # attaching the entire data frame. > ##D > ##D FEV$sex <- factor(FEV$sex, 0:1, c('female','male')) > ##D FEV$smoke <- factor(FEV$smoke, 0:1, > ##D c('non-current smoker','current smoker')) > ##D units(FEV$age) <- 'years' > ##D units(FEV$fev) <- 'L' > ##D label(FEV$fev) <- 'Forced Expiratory Volume' > ##D units(FEV$height) <- 'inches' > ##D > ##D # When changing more than one or two variables it is > ##D # more convenient change the data frame using the > ##D # Hmisc upData function. > ##D > ##D FEV2 <- upData(FEV, > ##D rename=c(smoking='smoke'), > ##D # omit if renamed above > ##D drop=c('var1','var2'), > ##D levels=list(sex =list(female=0,male=1), > ##D smoke=list('non-current smoker'=0, > ##D 'current smoker'=1)), > ##D units=list(age='years', fev='L', height='inches'), > ##D labels=list(fev='Forced Expiratory Volume')) > ##D > ##D # An alternative to levels=list(...) is for example > ##D # upData(FEV, sex=factor(sex,0:1,c('female','male'))). > ##D # > ##D # Note that we saved the changed data frame into a > ##D # new data frame FEV2. If we were confident of the > ##D # correctness of our changes we could have stored > ##D # the new data frame on top of the old one, under > ##D # the original name FEV. > ##D > ##D # ----------------------------------------------------------------------- > ##D # Step 6: Check the data frame > ##D # > ##D # The Hmisc describe function is perhaps the first > ##D # function that should be used on the new data > ##D # frame. It provides documentation of all the > ##D # variables and the frequency tabulation, counts of > ##D # NAs, and 5 largest and smallest values are > ##D # helpful in detecting data errors. Typing > ##D # describe(FEV) will write the results to the > ##D # current output window. To put the results in a > ##D # new window that can persist, even upon exiting > ##D # S-Plus, we use the page function. The describe > ##D # output can be minimized to an icon but kept ready > ##D # for guiding later steps of the analysis. > ##D > ##D page(describe(FEV2), multi=TRUE) > ##D # multi=TRUE allows that window to persist while > ##D # control is returned to other windows > ##D > ##D # The new data frame is OK. Store it on top of the > ##D # old FEV and then use the graphical user interface > ##D # to delete FEV2 (click on it and hit the Delete > ##D # key) or type rm(FEV2) after the next statement. > ##D > ##D FEV <- FEV2 > ##D > ##D # Next, we can use a variety of other functions to > ##D # check and describe all of the variables. As we > ##D # are analyzing all or almost all of the variables, > ##D # this is best done without attaching the data > ##D # frame. Note that plot.data.frame plots inverted > ##D # CDFs for continuous variables and dot plots > ##D # showing frequency distributions of categorical > ##D # ones. > ##D > ##D summary(FEV) > ##D # basic summary function (summary.data.frame) > ##D > ##D plot(FEV) # plot.data.frame > ##D datadensity(FEV) > ##D # rug plots and freq. bar charts for all var. > ##D > ##D hist.data.frame(FEV) > ##D # for variables having > 2 values > ##D > ##D by(FEV, FEV$smoke, summary) > ##D # use basic summary function with stratification > ##D > ##D > ##D > ##D # ----------------------------------------------------------------------- > ##D # Step 7: Do detailed analyses involving individual > ##D # variables > ##D # > ##D # Analyses based on the formula language can use > ##D # data= so attaching the data frame may not be > ##D # required. This saves memory. Here we use the > ##D # Hmisc summary.formula function to compute 5 > ##D # statistics on height, stratified separately by age > ##D # quartile and by sex. > ##D > ##D options(width=80) > ##D summary(height ~ age + sex, data=FEV, > ##D fun=function(y)c(smean.sd(y), > ##D smedian.hilow(y,conf.int=.5))) > ##D # This computes mean height, S.D., median, outer quartiles > ##D > ##D fit <- lm(height ~ age*sex, data=FEV) > ##D summary(fit) > ##D > ##D # For this analysis we could also have attached the > ##D # data frame in search position 2. For other > ##D # analyses, it is mandatory to attach the data frame > ##D # unless FEV$ prefixes each variable name. > ##D # Important: DO NOT USE attach(FEV, 1) or > ##D # attach(FEV, pos=1, ...) if you are only analyzing > ##D # and not changing the variables, unless you really > ##D # need to avoid conflicts with variables in search > ##D # position 1 that have the same names as the > ##D # variables in FEV. Attaching into search position > ##D # 1 will cause S-Plus to be more of a memory hog. > ##D > ##D attach(FEV) > ##D # Use e.g. attach(FEV[,Cs(age,sex)]) if you only > ##D # want to analyze a small subset of the variables > ##D # Use e.g. attach(FEV[FEV$sex=='male',]) to > ##D # analyze a subset of the observations > ##D > ##D summary(height ~ age + sex, > ##D fun=function(y)c(smean.sd(y), > ##D smedian.hilow(y,conf.int=.5))) > ##D fit <- lm(height ~ age*sex) > ##D > ##D # Run generic summary function on height and fev, > ##D # stratified by sex > ##D by(data.frame(height,fev), sex, summary) > ##D > ##D # Cross-classify into 4 sex x smoke groups > ##D by(FEV, list(sex,smoke), summary) > ##D > ##D # Plot 5 quantiles > ##D s <- summary(fev ~ age + sex + height, > ##D fun=function(y)quantile(y,c(.1,.25,.5,.75,.9))) > ##D > ##D plot(s, which=1:5, pch=c(1,2,15,2,1), #pch=c('=','[','o',']','='), > ##D main='A Discovery', xlab='FEV') > ##D > ##D # Use the nonparametric bootstrap to compute a > ##D # 0.95 confidence interval for the population mean fev > ##D smean.cl.boot(fev) # in Hmisc > ##D > ##D # Use the Statistics ... Compare Samples ... One Sample > ##D # keys to get a normal-theory-based C.I. Then do it > ##D # more manually. The following method assumes that > ##D # there are no NAs in fev > ##D > ##D sd <- sqrt(var(fev)) > ##D xbar <- mean(fev) > ##D xbar > ##D sd > ##D n <- length(fev) > ##D qt(.975,n-1) > ##D # prints 0.975 critical value of t dist. with n-1 d.f. > ##D > ##D xbar + c(-1,1)*sd/sqrt(n)*qt(.975,n-1) > ##D # prints confidence limits > ##D > ##D # Fit a linear model > ##D # fit <- lm(fev ~ other variables ...) > ##D > ##D detach() > ##D > ##D # The last command is only needed if you want to > ##D # start operating on another data frame and you want > ##D # to get FEV out of the way. > ##D > ##D > ##D > ##D # ----------------------------------------------------------------------- > ##D # Creating data frames from scratch > ##D # > ##D # Data frames can be created from within S-Plus. To > ##D # create a small data frame containing ordinary > ##D # data, you can use something like > ##D > ##D dframe <- data.frame(age=c(10,20,30), > ##D sex=c('male','female','male')) > ##D > ##D # You can also create a data frame using the Data > ##D # Sheet. Create an empty data frame with the > ##D # correct variable names and types, then edit in the > ##D # data. > ##D > ##D dd <- data.frame(age=numeric(0),sex=character(0)) > ##D > ##D # The sex variable will be stored as a factor, and > ##D # levels will be automatically added to it as you > ##D # define new values for sex in the Data Sheet's sex > ##D # column. > ##D # > ##D # When the data frame you need to create is defined > ##D # by systematically varying variables (e.g., all > ##D # possible combinations of values of each variable), > ##D # the expand.grid function is useful for quickly > ##D # creating the data. Then you can add > ##D # non-systematically-varying variables to the object > ##D # created by expand.grid, using programming > ##D # statements or editing the Data Sheet. This > ##D # process is useful for creating a data frame > ##D # representing all the values in a printed table. > ##D # In what follows we create a data frame > ##D # representing the combinations of values from an 8 > ##D # x 2 x 2 x 2 (event x method x sex x what) table, > ##D # and add a non-systematic variable percent to the > ##D # data. > ##D > ##D jcetable <- expand.grid( > ##D event=c('Wheezing at any time', > ##D 'Wheezing and breathless', > ##D 'Wheezing without a cold', > ##D 'Waking with tightness in the chest', > ##D 'Waking with shortness of breath', > ##D 'Waking with an attack of cough', > ##D 'Attack of asthma', > ##D 'Use of medication'), > ##D method=c('Mail','Telephone'), > ##D sex=c('Male','Female'), > ##D what=c('Sensitivity','Specificity')) > ##D > ##D jcetable$percent <- > ##D c(756,618,706,422,356,578,289,333, > ##D 576,421,789,273,273,212,212,212, > ##D 613,763,713,403,377,541,290,226, > ##D 613,684,632,290,387,613,258,129, > ##D 656,597,438,780,732,679,938,919, > ##D 714,600,494,877,850,703,963,987, > ##D 755,420,480,794,779,647,956,941, > ##D 766,423,500,833,833,604,955,986) / 10 > ##D > ##D # In jcetable, event varies most rapidly, then > ##D # method, then sex, and what. > ## End(Not run) > > > > cleanEx(); ..nameEx <- "dataRep" > > ### * dataRep > > flush(stderr()); flush(stdout()) > > ### Name: dataRep > ### Title: Representativeness of Observations in a Data Set > ### Aliases: dataRep print.dataRep predict.dataRep print.predict.dataRep > ### roundN [.roundN > ### Keywords: datasets category cluster manip models > > ### ** Examples > > set.seed(13) > num.symptoms <- sample(1:4, 1000,TRUE) > sex <- factor(sample(c('female','male'), 1000,TRUE)) > x <- runif(1000) > x[1] <- NA > table(num.symptoms, sex, .25*round(x/.25)) , , = 0 sex num.symptoms female male 1 19 22 2 9 11 3 19 16 4 19 12 , , = 0.25 sex num.symptoms female male 1 30 31 2 24 35 3 37 35 4 27 29 , , = 0.5 sex num.symptoms female male 1 30 30 2 31 30 3 26 28 4 44 28 , , = 0.75 sex num.symptoms female male 1 24 36 2 31 28 3 31 31 4 32 23 , , = 1 sex num.symptoms female male 1 19 17 2 17 16 3 19 14 4 26 13 > > d <- dataRep(~ num.symptoms + sex + roundN(x,.25)) > print(d, long=TRUE) Data Representativeness n=999 dataRep(formula = ~num.symptoms + sex + roundN(x, 0.25)) Frequencies of Missing Values Due to Each Variable num.symptoms sex roundN(x, 0.25) 0 0 1 Specifications for Matching Type Parameters num.symptoms exact numeric sex exact categorical female male x round to nearest 0.25 Unique Combinations of Descriptor Variables num.symptoms sex x Frequency 1 1 female 0.00 19 2 2 female 0.00 9 3 3 female 0.00 19 4 4 female 0.00 19 5 1 male 0.00 22 6 2 male 0.00 11 7 3 male 0.00 16 8 4 male 0.00 12 9 1 female 0.25 30 10 2 female 0.25 24 11 3 female 0.25 37 12 4 female 0.25 27 13 1 male 0.25 31 14 2 male 0.25 35 15 3 male 0.25 35 16 4 male 0.25 29 17 1 female 0.50 30 18 2 female 0.50 31 19 3 female 0.50 26 20 4 female 0.50 44 21 1 male 0.50 30 22 2 male 0.50 30 23 3 male 0.50 28 24 4 male 0.50 28 25 1 female 0.75 24 26 2 female 0.75 31 27 3 female 0.75 31 28 4 female 0.75 32 29 1 male 0.75 36 30 2 male 0.75 28 31 3 male 0.75 31 32 4 male 0.75 23 33 1 female 1.00 19 34 2 female 1.00 17 35 3 female 1.00 19 36 4 female 1.00 26 37 1 male 1.00 17 38 2 male 1.00 16 39 3 male 1.00 14 40 4 male 1.00 13 > > predict(d, data.frame(num.symptoms=1:3, sex=c('male','male','female'), + x=c(.03,.5,1.5))) Warning in approx(pctl[[nam[j]]], seq(0, 100, by = 1), xout = newdata[[nam[j]]], : collapsing to unique 'x' values Descriptor Variable Values, Estimated Frequency in Original Dataset, and Minimum Marginal Frequency for any Variable num.symptoms sex x Frequency Marginal.Freq 1 1 male 0.03 22 127 2 2 male 0.50 30 232 3 3 female 1.50 0 0 Percentiles for Continuous Descriptor Variables, Percentage in Category for Categorical Variables num.symptoms sex x 1 12 49 3 2 37 49 50 3 62 51 100 > > > > cleanEx(); ..nameEx <- "deff" > > ### * deff > > flush(stderr()); flush(stdout()) > > ### Name: deff > ### Title: Design Effect and Intra-cluster Correlation > ### Aliases: deff > ### Keywords: htest > > ### ** Examples > > set.seed(1) > blood.pressure <- rnorm(1000, 120, 15) > clinic <- sample(letters, 1000, replace=TRUE) > deff(blood.pressure, clinic) n clusters rho deff 1.000000e+03 2.600000e+01 1.439064e-03 1.054975e+00 > > > > cleanEx(); ..nameEx <- "describe" > > ### * describe > > flush(stderr()); flush(stdout()) > > ### Name: describe > ### Title: Concise Statistical Description of a Vector, Matrix, Data Frame, > ### or Formula > ### Aliases: describe describe.default describe.vector describe.matrix > ### describe.formula describe.data.frame print.describe > ### print.describe.single [.describe latex.describe latex.describe.single > ### Keywords: interface nonparametric category distribution robust models > > ### ** Examples > > set.seed(1) > describe(runif(200),dig=2) #single variable, continuous runif(200) n missing unique Mean .05 .10 .25 .50 .75 .90 200 0 200 0.52 0.084 0.142 0.294 0.505 0.742 0.881 .95 0.927 lowest : 0.013 0.013 0.023 0.036 0.059, highest: 0.98 0.99 0.99 0.99 0.99 > #get quantiles .05,.10,... > > dfr <- data.frame(x=rnorm(400),y=sample(c('male','female'),400,TRUE)) > describe(dfr) dfr 2 Variables 400 Observations --------------------------------------------------------------------------- x n missing unique Mean .05 .10 .25 .50 400 0 400 0.001083 -1.64182 -1.32308 -0.64280 -0.05831 .75 .90 .95 0.67754 1.35234 1.72182 lowest : -3.008 -2.889 -2.592 -2.403 -2.343 highest: 2.351 2.447 2.498 2.649 3.810 --------------------------------------------------------------------------- y n missing unique 400 0 2 female (187, 47%), male (213, 53%) --------------------------------------------------------------------------- > > ## Not run: > ##D d <- sas.get(".","mydata",special.miss=TRUE,recode=TRUE) > ##D describe(d) #describe entire data frame > ##D attach(d, 1) > ##D describe(relig) #Has special missing values .D .F .M .R .T > ##D #attr(relig,"label") is "Religious preference" > ##D > ##D #relig : Religious preference Format:relig > ##D # n missing D F M R T unique > ##D # 4038 263 45 33 7 2 1 8 > ##D # > ##D #0:none (251, 6%), 1:Jewish (372, 9%), 2:Catholic (1230, 30%) > ##D #3:Jehovah's Witnes (25, 1%), 4:Christ Scientist (7, 0%) > ##D #5:Seventh Day Adv (17, 0%), 6:Protestant (2025, 50%), 7:other (111, 3%) > ##D > ##D # Method for describing part of a data frame: > ##D describe(death.time ~ age*sex + rcs(blood.pressure)) > ##D describe(~ age+sex) > ##D describe(~ age+sex, weights=freqs) # weighted analysis > ##D > ##D fit <- lrm(y ~ age*sex + log(height)) > ##D describe(formula(fit)) > ##D describe(y ~ age*sex, na.action=na.delete) > ##D # report on number deleted for each variable > ##D options(na.detail.response=TRUE) > ##D # keep missings separately for each x, report on dist of y by x=NA > ##D describe(y ~ age*sex) > ##D options(na.fun.response="quantile") > ##D describe(y ~ age*sex) # same but use quantiles of y by x=NA > ##D > ##D d <- describe(my.data.frame) > ##D d$age # print description for just age > ##D d[c('age','sex')] # print description for two variables > ##D d[sort(names(d))] # print in alphabetic order by var. names > ##D d2 <- d[20:30] # keep variables 20-30 > ##D page(d2) # pop-up window for these variables > ##D > ##D # Test date/time formats and suppression of times when they don't vary > ##D library(chron) > ##D d <- data.frame(a=chron((1:20)+.1), > ##D b=chron((1:20)+(1:20)/100), > ##D d=ISOdatetime(year=rep(2003,20),month=rep(4,20),day=1:20, > ##D hour=rep(11,20),min=rep(17,20),sec=rep(11,20)), > ##D f=ISOdatetime(year=rep(2003,20),month=rep(4,20),day=1:20, > ##D hour=1:20,min=1:20,sec=1:20), > ##D g=ISOdate(year=2001:2020,month=rep(3,20),day=1:20)) > ##D describe(d) > ##D > ## End(Not run) > > > > cleanEx(); ..nameEx <- "dropUnusedLevels" > > ### * dropUnusedLevels > > flush(stderr()); flush(stdout()) > > ### Name: dropUnusedLevels > ### Title: Create Temporary Factor Subsetting Function > ### Aliases: dropUnusedLevels > ### Keywords: category utilities programming methods > > ### ** Examples > > ## Not run: > ##D x <- factor(c('a','b','c')) > ##D x[1:2] # keeps level c > ##D dropUnusedLevels() > ##D x[1:2] # no c any more > ## End(Not run) > > > cleanEx(); ..nameEx <- "ecdf" > > ### * ecdf > > flush(stderr()); flush(stdout()) > > ### Name: ecdf > ### Title: Empirical Cumulative Distribution Plot > ### Aliases: ecdf ecdf.default ecdf.data.frame ecdf.formula panel.ecdf > ### prepanel.ecdf > ### Keywords: nonparametric hplot methods distribution > > ### ** Examples > > set.seed(1) > ch <- rnorm(1000, 200, 40) > ecdf(ch, xlab="Serum Cholesterol") > scat1d(ch) # add rug plot > histSpike(ch, add=TRUE, frac=.15) # add spike histogram > # Better: add a data density display automatically: > ecdf(ch, datadensity='density') > > label(ch) <- "Serum Cholesterol" > ecdf(ch) > other.ch <- rnorm(500, 220, 20) > ecdf(other.ch,add=TRUE,lty=2) > > sex <- factor(sample(c('female','male'), 1000, TRUE)) > ecdf(ch, q=c(.25,.5,.75)) # show quartiles > ecdf(ch, group=sex, + label.curves=list(method='arrow')) Warning in approx(xx, yy[s], xout = xs, f = if (step.type == "left") 0 else 1, : collapsing to unique 'x' values Warning in approx(xx, yy[s], xout = xs, f = if (step.type == "left") 0 else 1, : collapsing to unique 'x' values > > # Example showing how to draw multiple ECDFs from paired data > pre.test <- rnorm(100,50,10) > post.test <- rnorm(100,55,10) > x <- c(pre.test, post.test) > g <- c(rep('Pre',length(pre.test)),rep('Post',length(post.test))) > ecdf(x, group=g, xlab='Test Results', label.curves=list(keys=1:2)) Warning in approx(xx, yy[s], xout = xs, f = if (step.type == "left") 0 else 1, : collapsing to unique 'x' values Warning in approx(xx, yy[s], xout = xs, f = if (step.type == "left") 0 else 1, : collapsing to unique 'x' values Warning in approx(x, y, xout = xx, method = if (type == "l") "linear" else "constant", : collapsing to unique 'x' values Warning in approx(x, y, xout = xx, method = if (type == "l") "linear" else "constant", : collapsing to unique 'x' values > # keys=1:2 causes symbols to be drawn periodically on top of curves > > # Draw a matrix of ECDFs for a data frame > m <- data.frame(pre.test, post.test, + sex=sample(c('male','female'),100,TRUE)) > ecdf(m, group=m$sex, datadensity='rug') Warning in approx(xx, yy[s], xout = xs, f = if (step.type == "left") 0 else 1, : collapsing to unique 'x' values Warning in approx(xx, yy[s], xout = xs, f = if (step.type == "left") 0 else 1, : collapsing to unique 'x' values Warning in approx(xx, yy[s], xout = xs, f = if (step.type == "left") 0 else 1, : collapsing to unique 'x' values Warning in approx(xx, yy[s], xout = xs, f = if (step.type == "left") 0 else 1, : collapsing to unique 'x' values > > freqs <- sample(1:10, 1000, TRUE) > ecdf(ch, weights=freqs) # weighted estimates > > # Trellis/Lattice examples: > > region <- factor(sample(c('Europe','USA','Australia'),100,TRUE)) > year <- factor(sample(2001:2002,1000,TRUE)) > ecdf(~ch | region*year, groups=sex) Loading required package: grid Loading required package: lattice Warning in approx(xx, yy[s], xout = xs, f = if (step.type == "left") 0 else 1, : collapsing to unique 'x' values Warning in approx(xx, yy[s], xout = xs, f = if (step.type == "left") 0 else 1, : collapsing to unique 'x' values Warning in approx(xx[s], yy[s], xout = c(xt[i] - w/2, xt[i] + w/2), rule = 2) : collapsing to unique 'x' values Warning in approx(xx[s], yy[s], xout = c(xt[i] - w/2, xt[i] + w/2), rule = 2) : collapsing to unique 'x' values Warning in approx(xx, yy[s], xout = xs, f = if (step.type == "left") 0 else 1, : collapsing to unique 'x' values Warning in approx(xx, yy[s], xout = xs, f = if (step.type == "left") 0 else 1, : collapsing to unique 'x' values Warning in approx(xx[s], yy[s], xout = c(xt[i] - w/2, xt[i] + w/2), rule = 2) : collapsing to unique 'x' values Warning in approx(xx[s], yy[s], xout = c(xt[i] - w/2, xt[i] + w/2), rule = 2) : collapsing to unique 'x' values Warning in approx(xx, yy[s], xout = xs, f = if (step.type == "left") 0 else 1, : collapsing to unique 'x' values Warning in approx(xx, yy[s], xout = xs, f = if (step.type == "left") 0 else 1, : collapsing to unique 'x' values Warning in approx(xx[s], yy[s], xout = c(xt[i] - w/2, xt[i] + w/2), rule = 2) : collapsing to unique 'x' values Warning in approx(xx[s], yy[s], xout = c(xt[i] - w/2, xt[i] + w/2), rule = 2) : collapsing to unique 'x' values Warning in approx(xx, yy[s], xout = xs, f = if (step.type == "left") 0 else 1, : collapsing to unique 'x' values Warning in approx(xx, yy[s], xout = xs, f = if (step.type == "left") 0 else 1, : collapsing to unique 'x' values Warning in approx(xx[s], yy[s], xout = c(xt[i] - w/2, xt[i] + w/2), rule = 2) : collapsing to unique 'x' values Warning in approx(xx[s], yy[s], xout = c(xt[i] - w/2, xt[i] + w/2), rule = 2) : collapsing to unique 'x' values Warning in approx(xx, yy[s], xout = xs, f = if (step.type == "left") 0 else 1, : collapsing to unique 'x' values Warning in approx(xx, yy[s], xout = xs, f = if (step.type == "left") 0 else 1, : collapsing to unique 'x' values Warning in approx(xx[s], yy[s], xout = c(xt[i] - w/2, xt[i] + w/2), rule = 2) : collapsing to unique 'x' values Warning in approx(xx[s], yy[s], xout = c(xt[i] - w/2, xt[i] + w/2), rule = 2) : collapsing to unique 'x' values Warning in approx(xx, yy[s], xout = xs, f = if (step.type == "left") 0 else 1, : collapsing to unique 'x' values Warning in approx(xx, yy[s], xout = xs, f = if (step.type == "left") 0 else 1, : collapsing to unique 'x' values Warning in approx(xx[s], yy[s], xout = c(xt[i] - w/2, xt[i] + w/2), rule = 2) : collapsing to unique 'x' values Warning in approx(xx[s], yy[s], xout = c(xt[i] - w/2, xt[i] + w/2), rule = 2) : collapsing to unique 'x' values > Key() # draw a key for sex at the default location > # Key(locator(1)) # user-specified positioning of key > age <- rnorm(1000, 50, 10) > ecdf(~ch | equal.count(age), groups=sex) # use overlapping shingles Warning in approx(xx, yy[s], xout = xs, f = if (step.type == "left") 0 else 1, : collapsing to unique 'x' values Warning in approx(xx, yy[s], xout = xs, f = if (step.type == "left") 0 else 1, : collapsing to unique 'x' values Warning in approx(xx[s], yy[s], xout = c(xt[i] - w/2, xt[i] + w/2), rule = 2) : collapsing to unique 'x' values Warning in approx(xx[s], yy[s], xout = c(xt[i] - w/2, xt[i] + w/2), rule = 2) : collapsing to unique 'x' values Warning in approx(xx, yy[s], xout = xs, f = if (step.type == "left") 0 else 1, : collapsing to unique 'x' values Warning in approx(xx, yy[s], xout = xs, f = if (step.type == "left") 0 else 1, : collapsing to unique 'x' values Warning in approx(xx[s], yy[s], xout = c(xt[i] - w/2, xt[i] + w/2), rule = 2) : collapsing to unique 'x' values Warning in approx(xx[s], yy[s], xout = c(xt[i] - w/2, xt[i] + w/2), rule = 2) : collapsing to unique 'x' values Warning in approx(xx, yy[s], xout = xs, f = if (step.type == "left") 0 else 1, : collapsing to unique 'x' values Warning in approx(xx, yy[s], xout = xs, f = if (step.type == "left") 0 else 1, : collapsing to unique 'x' values Warning in approx(xx[s], yy[s], xout = c(xt[i] - w/2, xt[i] + w/2), rule = 2) : collapsing to unique 'x' values Warning in approx(xx[s], yy[s], xout = c(xt[i] - w/2, xt[i] + w/2), rule = 2) : collapsing to unique 'x' values Warning in approx(xx, yy[s], xout = xs, f = if (step.type == "left") 0 else 1, : collapsing to unique 'x' values Warning in approx(xx, yy[s], xout = xs, f = if (step.type == "left") 0 else 1, : collapsing to unique 'x' values Warning in approx(xx[s], yy[s], xout = c(xt[i] - w/2, xt[i] + w/2), rule = 2) : collapsing to unique 'x' values Warning in approx(xx[s], yy[s], xout = c(xt[i] - w/2, xt[i] + w/2), rule = 2) : collapsing to unique 'x' values Warning in approx(xx, yy[s], xout = xs, f = if (step.type == "left") 0 else 1, : collapsing to unique 'x' values Warning in approx(xx, yy[s], xout = xs, f = if (step.type == "left") 0 else 1, : collapsing to unique 'x' values Warning in approx(xx[s], yy[s], xout = c(xt[i] - w/2, xt[i] + w/2), rule = 2) : collapsing to unique 'x' values Warning in approx(xx[s], yy[s], xout = c(xt[i] - w/2, xt[i] + w/2), rule = 2) : collapsing to unique 'x' values Warning in approx(xx, yy[s], xout = xs, f = if (step.type == "left") 0 else 1, : collapsing to unique 'x' values Warning in approx(xx, yy[s], xout = xs, f = if (step.type == "left") 0 else 1, : collapsing to unique 'x' values Warning in approx(xx[s], yy[s], xout = c(xt[i] - w/2, xt[i] + w/2), rule = 2) : collapsing to unique 'x' values Warning in approx(xx[s], yy[s], xout = c(xt[i] - w/2, xt[i] + w/2), rule = 2) : collapsing to unique 'x' values > ecdf(~ch | sex, datadensity='hist', side=3) # add spike histogram at top > > > > cleanEx(); ..nameEx <- "eip" > > ### * eip > > flush(stderr()); flush(stdout()) > > ### Name: eip > ### Title: Edit In Place > ### Aliases: eip > ### Keywords: utilities > > ### ** Examples > > ## Not run: > ##D eip(summary.formula > ## End(Not run) # make temporary bug fix in central area > > > > cleanEx(); ..nameEx <- "errbar" > > ### * errbar > > flush(stderr()); flush(stdout()) > > ### Name: errbar > ### Title: Plot Error Bars > ### Aliases: errbar > ### Keywords: hplot > > ### ** Examples > > set.seed(1) > x <- 1:10 > y <- x + rnorm(10) > delta <- runif(10) > errbar( x, y, y + delta, y - delta ) > > # Show bootstrap nonparametric CLs for 3 group means and for > # pairwise differences on same graph > group <- sample(c('a','b','d'), 200, TRUE) > y <- runif(200) + .25*(group=='b') + .5*(group=='d') > cla <- smean.cl.boot(y[group=='a'],B=100,reps=TRUE) # usually B=1000 > a <- attr(cla,'reps') > clb <- smean.cl.boot(y[group=='b'],B=100,reps=TRUE) > b <- attr(clb,'reps') > cld <- smean.cl.boot(y[group=='d'],B=100,reps=TRUE) > d <- attr(cld,'reps') > a.b <- quantile(a-b,c(.025,.975)) > a.d <- quantile(a-d,c(.025,.975)) > b.d <- quantile(b-d,c(.025,.975)) > errbar(c('a','b','d','a - b','a - d','b - d'), + c(cla[1],clb[1],cld[1],cla[1]-clb[1],cla[1]-cld[1],clb[1]-cld[1]), + c(cla[3],clb[3],cld[3],a.b[2],a.d[2],b.d[2]), + c(cla[2],clb[2],cld[2],a.b[1],a.d[1],b.d[1]), + Type=c(1,1,1,2,2,2)) > > > rm(x,y,delta,group,a,b,d,a.b,a.d,b.d,cla,clb,cld) > > > > cleanEx(); ..nameEx <- "event.chart" > > ### * event.chart > > flush(stderr()); flush(stdout()) > > ### Name: event.chart > ### Title: Flexible Event Chart for Time-to-Event Data > ### Aliases: event.chart event.convert > ### Keywords: hplot survival > > ### ** Examples > > # The sample data set is an augmented CDC AIDS dataset (ASCII) > # which is used in the examples in the help file. This dataset is > # described in Kalbfleisch and Lawless (JASA, 1989). > # Here, we have included only children 4 years old and younger. > # We have also added a new field, dethdate, which > # represents a fictitious death date for each patient. There was > # no recording of death date on the original dataset. > # > # All dates are julian with julian=0 being > # January 1, 1960, and julian=14000 being 14000 days beyond > # January 1, 1960 (i.e., May 1, 1998). > > cdcaids <- data.frame( + age=c(4,2,1,1,2,2,2,4,2,1,1,3,2,1,3,2,1,2,4,2,2,1,4,2,4,1,4,2,1,1,3,3,1,3), + infedate=c( + 7274,7727,7949,8037,7765,8096,8186,7520,8522,8609,8524,8213,8455,8739, + 8034,8646,8886,8549,8068,8682,8612,9007,8461,8888,8096,9192,9107,9001, + 9344,9155,8800,8519,9282,8673), + diagdate=c( + 8100,8158,8251,8343,8463,8489,8554,8644,8713,8733,8854,8855,8863,8983, + 9035,9037,9132,9164,9186,9221,9224,9252,9274,9404,9405,9433,9434,9470, + 9470,9472,9489,9500,9585,9649), + diffdate=c( + 826,431,302,306,698,393,368,1124,191,124,330,642,408,244,1001,391,246, + 615,1118,539,612,245,813,516,1309,241,327,469,126,317,689,981,303,976), + dethdate=c( + 8434,8304,NA,8414,8715,NA,8667,9142,8731,8750,8963,9120,9005,9028,9445, + 9180,9189,9406,9711,9453,9465,9289,9640,9608,10010,9488,9523,9633,9667, + 9547,9755,NA,9686,10084), + censdate=c( + NA,NA,8321,NA,NA,8519,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA, + NA,NA,NA,NA,NA,NA,NA,NA,NA,10095,NA,NA)) > > cdcaids <- upData(cdcaids, + labels=c(age ='Age, y', infedate='Date of blood transfusion', + diagdate='Date of AIDS diagnosis', + diffdate='Incubation period (days from HIV to AIDS)', + dethdate='Fictitious date of death', + censdate='Fictitious censoring date')) Input object size: 3796 bytes; 6 variables New object size: 4692 bytes; 6 variables > > # Note that the style options listed with these > # examples are best suited for output to a postscript file (i.e., using > # the postscript function with horizontal=TRUE) as opposed to a graphical > # window (e.g., motif). > > # To produce simple calendar event chart (with internal legend): > # postscript('example1.ps', horizontal=TRUE) > event.chart(cdcaids, + subset.c=c('infedate','diagdate','dethdate','censdate'), + x.lab = 'observation dates', + y.lab='patients (sorted by AIDS diagnosis date)', + titl='AIDS data calendar event chart 1', + point.pch=c(1,2,15,0), point.cex=c(1,1,0.8,0.8), + legend.plot=TRUE, legend.location='i', legend.cex=1.0, + legend.point.text=c('transfusion','AIDS diagnosis','death','censored'), + legend.point.at = list(c(7210, 8100), c(35, 27)), legend.bty='o') > > # To produce simple interval event chart (with internal legend): > # postscript('example2.ps', horizontal=TRUE) > event.chart(cdcaids, + subset.c=c('infedate','diagdate','dethdate','censdate'), + x.lab = 'time since transfusion (in days)', + y.lab='patients (sorted by AIDS diagnosis date)', + titl='AIDS data interval event chart 1', + point.pch=c(1,2,15,0), point.cex=c(1,1,0.8,0.8), + legend.plot=TRUE, legend.location='i', legend.cex=1.0, + legend.point.text=c('transfusion','AIDS diagnosis','death','censored'), + x.reference='infedate', x.julian=TRUE, + legend.bty='o', legend.point.at = list(c(1400, 1950), c(7, -1))) > > # To produce more complicated interval chart which is > # referenced by infection date, and sorted by age and incubation period: > # postscript('example3.ps', horizontal=TRUE) > event.chart(cdcaids, + subset.c=c('infedate','diagdate','dethdate','censdate'), + x.lab = 'time since diagnosis of AIDS (in days)', + y.lab='patients (sorted by age and incubation length)', + titl='AIDS data interval event chart 2 (sorted by age, incubation)', + point.pch=c(1,2,15,0), point.cex=c(1,1,0.8,0.8), + legend.plot=TRUE, legend.location='i',legend.cex=1.0, + legend.point.text=c('transfusion','AIDS diagnosis','death','censored'), + x.reference='diagdate', x.julian=TRUE, sort.by=c('age','diffdate'), + line.by='age', line.lty=c(1,3,2,4), line.lwd=rep(1,4), line.col=rep(1,4), + legend.bty='o', legend.point.at = list(c(-1350, -800), c(7, -1)), + legend.line.at = list(c(-1350, -800), c(16, 8)), + legend.line.text=c('age = 1', ' = 2', ' = 3', ' = 4')) > > # To produce the Goldman chart: > # postscript('example4.ps', horizontal=TRUE) > event.chart(cdcaids, + subset.c=c('infedate','diagdate','dethdate','censdate'), + x.lab = 'time since transfusion (in days)', y.lab='dates of observation', + titl='AIDS data Goldman event chart 1', + y.var = c('infedate'), y.var.type='d', now.line=TRUE, y.jitter=FALSE, + point.pch=c(1,2,15,0), point.cex=c(1,1,0.8,0.8), mgp = c(3.1,1.6,0), + legend.plot=TRUE, legend.location='i',legend.cex=1.0, + legend.point.text=c('transfusion','AIDS diagnosis','death','censored'), + x.reference='infedate', x.julian=TRUE, + legend.bty='o', legend.point.at = list(c(1500, 2800), c(9300, 10000))) > > # To convert coded time-to-event data, then, draw an event chart: > surv.time <- c(5,6,3,1,2) > cens.ind <- c(1,0,1,1,0) > surv.data <- cbind(surv.time,cens.ind) > event.data <- event.convert(surv.data) > event.chart(cbind(rep(0,5),event.data),x.julian=TRUE,x.reference=1) > > > > cleanEx(); ..nameEx <- "event.history" > > ### * event.history > > flush(stderr()); flush(stdout()) > > ### Name: event.history > ### Title: Produces event.history graph for survival data > ### Aliases: event.history > ### Keywords: survival > > ### ** Examples > > # Code to produce event history graphs for SIM paper > # > # before generating plots, some pre-processing needs to be performed, > # in order to get dataset in proper form for event.history function; > # need to create one line per subject and sort by time under observation, > # with those experiencing event coming before those tied with censoring time; > if(.R.) { # get access to heart data frame + require('survival') + data(heart) + } Loading required package: survival Loading required package: splines Attaching package: 'survival' The following object(s) are masked from package:Hmisc : untangle.specials > > # creation of event.history version of heart dataset (call heart.one): > > heart.one <- matrix(nrow=length(unique(heart$id)), ncol=8) > for(i in 1:length(unique(heart$id))) + { + if(length(heart$id[heart$id==i]) == 1) + heart.one[i,] <- as.numeric(unlist(heart[heart$id==i, ])) + else if(length(heart$id[heart$id==i]) == 2) + heart.one[i,] <- as.numeric(unlist(heart[heart$id==i,][2,])) + } > > heart.one[,3][heart.one[,3] == 0] <- 2 ## converting censored events to 2, from 0 > if(is.factor(heart$transplant)) + heart.one[,7] <- heart.one[,7] - 1 > ## getting back to correct transplantation coding > heart.one <- as.data.frame(heart.one[order(unlist(heart.one[,2]), unlist(heart.one[,3])),]) > names(heart.one) <- names(heart) > # back to usual censoring indicator: > heart.one[,3][heart.one[,3] == 2] <- 0 > # note: transplant says 0 (for no transplants) or 1 (for one transplant) > # and event = 1 is death, while event = 0 is censored > > # plot single Kaplan-Meier curve from heart data, first creating survival object > heart.surv <- survfit(Surv(heart.one$stop, heart.one$event), conf.int = FALSE) > > # figure 3: traditional Kaplan-Meier curve > # postscript('ehgfig3.ps', horiz=TRUE) > # omi <- par(omi=c(0,1.25,0.5,1.25)) > plot(heart.surv, ylab='estimated survival probability', + xlab='observation time (in days)') > title('Figure 3: Kaplan-Meier curve for Stanford data', cex=0.8) > # dev.off() > > ## now, draw event history graph for Stanford heart data; use as Figure 4 > > # postscript('ehgfig4.ps', horiz=TRUE, colors = seq(0, 1, len=20)) > # par(omi=c(0,1.25,0.5,1.25)) > event.history(heart.one, + survtime.col=heart.one[,2], surv.col=heart.one[,3], + covtime.cols = cbind(rep(0, dim(heart.one)[1]), heart.one[,1]), + cov.cols = cbind(rep(0, dim(heart.one)[1]), heart.one[,7]), + num.colors=2, colors=c(6,10), + x.lab = 'time under observation (in days)', + title='Figure 4: Event history graph for\nStanford data', + cens.mark.right =TRUE, cens.mark = '-', + cens.mark.ahead = 30.0, cens.mark.cex = 0.85) > # dev.off() > > > # now, draw age-stratified event history graph for Stanford heart data; > # use as Figure 5 > > # two plots, stratified by age status > # postscript('c:\temp\ehgfig5.ps', horiz=TRUE, colors = seq(0, 1, len=20)) > # par(omi=c(0,1.25,0.5,1.25)) > par(mfrow=c(1,2)) > > event.history(data=heart.one, subset.rows = (heart.one[,4] < 0), + survtime.col=heart.one[,2], surv.col=heart.one[,3], + covtime.cols = cbind(rep(0, dim(heart.one)[1]), heart.one[,1]), + cov.cols = cbind(rep(0, dim(heart.one)[1]), heart.one[,7]), + num.colors=2, colors=c(6,10), + x.lab = 'time under observation\n(in days)', + title = 'Figure 5a:\nStanford data\n(age < 48)', + cens.mark.right =TRUE, cens.mark = '-', + cens.mark.ahead = 40.0, cens.mark.cex = 0.85, + xlim=c(0,1900)) > > event.history(data=heart.one, subset.rows = (heart.one[,4] >= 0), + survtime.col=heart.one[,2], surv.col=heart.one[,3], + covtime.cols = cbind(rep(0, dim(heart.one)[1]), heart.one[,1]), + cov.cols = cbind(rep(0, dim(heart.one)[1]), heart.one[,7]), + num.colors=2, colors=c(6,10), + x.lab = 'time under observation\n(in days)', + title = 'Figure 5b:\nStanford data\n(age >= 48)', + cens.mark.right =TRUE, cens.mark = '-', + cens.mark.ahead = 40.0, cens.mark.cex = 0.85, + xlim=c(0,1900)) > # dev.off() > # par(omi=omi) > > # we will not show liver cirrhosis data manipulation, as it was > # a bit detailed; however, here is the > # event.history code to produce Figure 7 / Plate 1 > > # Figure 7 / Plate 1 : prothrombin ehg with color > ## Not run: > ##D second.arg <- 1 ### second.arg is for shading > ##D third.arg <- c(rep(1,18),0,1) ### third.arg is for intensity > ##D > ##D # postscript('c:\temp\ehgfig7.ps', horiz=TRUE, > ##D # colors = cbind(seq(0, 1, len = 20), second.arg, third.arg)) > ##D # par(omi=c(0,1.25,0.5,1.25), col=19) > ##D event.history(cirrhos2.eh, subset.rows = NULL, > ##D survtime.col=cirrhos2.eh$time, surv.col=cirrhos2.eh$event, > ##D covtime.cols = as.matrix(cirrhos2.eh[, ((2:18)*2)]), > ##D cov.cols = as.matrix(cirrhos2.eh[, ((2:18)*2) + 1]), > ##D cut.cov = as.numeric(quantile(as.matrix(cirrhos2.eh[, ((2:18)*2) + 1]), > ##D c(0,.2,.4,.6,.8,1), na.rm=TRUE) + c(-1,0,0,0,0,1)), > ##D colors=c(20,4,8,11,14), > ##D x.lab = 'time under observation (in days)', > ##D title='Figure 7: Event history graph for liver cirrhosis data (color)', > ##D cens.mark.right =TRUE, cens.mark = '-', > ##D cens.mark.ahead = 100.0, cens.mark.cex = 0.85) > ##D # dev.off() > ## End(Not run) > > > > graphics::par(get("par.postscript", env = .CheckExEnv)) > cleanEx(); ..nameEx <- "find.matches" > > ### * find.matches > > flush(stderr()); flush(stdout()) > > ### Name: find.matches > ### Title: Find Close Matches > ### Aliases: find.matches summary.find.matches print.find.matches > ### matchCases > ### Keywords: math multivariate htest > > ### ** Examples > > y <- rbind(c(.1, .2),c(.11, .22), c(.3, .4), c(.31, .41), c(.32, 5)) > x <- rbind(c(.09,.21), c(.29,.39)) > y [,1] [,2] [1,] 0.10 0.20 [2,] 0.11 0.22 [3,] 0.30 0.40 [4,] 0.31 0.41 [5,] 0.32 5.00 > x [,1] [,2] [1,] 0.09 0.21 [2,] 0.29 0.39 > w <- find.matches(x, y, maxmatch=5, tol=c(.05,.05)) > > set.seed(111) # so can replicate results > x <- matrix(runif(500), ncol=2) > y <- matrix(runif(2000), ncol=2) > w <- find.matches(x, y, maxmatch=5, tol=c(.02,.03)) > w$matches[1:5,] Match #1 Match #2 Match #3 Match #4 Match #5 [1,] 999 694 0 0 0 [2,] 0 0 0 0 0 [3,] 235 0 0 0 0 [4,] 964 139 0 0 0 [5,] 906 427 204 0 0 > w$distance[1:5,] Distance #1 Distance #2 Distance #3 Distance #4 Distance #5 [1,] 0.1042884 0.1562084 NA NA NA [2,] NA NA NA NA NA [3,] 0.7272258 NA NA NA NA [4,] 0.2815041 0.7973284 NA NA NA [5,] 0.6135293 0.7162828 0.7189297 NA NA > # Find first x with 3 or more y-matches > num.match <- apply(w$matches, 1, function(x)sum(x > 0)) > j <- ((1:length(num.match))[num.match > 2])[1] > x[j,] [1] 0.3776632 0.6833354 > y[w$matches[j,],] [,1] [,2] [1,] 0.3708767 0.7045144 [2,] 0.3687917 0.7049588 [3,] 0.3821378 0.7078709 > > summary(w) Frequency table of number of matches found per observation m 0 1 2 3 4 5 27 53 64 54 32 20 Median minimum distance by number of matches 1 2 3 4 5 0.5859325 0.3376432 0.1917933 0.1407859 0.1398928 Observations selected first more than once (with frequencies) 57 73 91 101 116 165 191 251 256 292 415 422 438 443 467 552 592 593 650 691 2 2 2 2 2 3 2 2 2 3 2 2 2 2 2 2 2 2 2 2 719 733 747 754 818 820 824 849 871 926 945 964 970 2 2 2 2 2 2 3 2 2 2 2 2 2 > > # For many applications would do something like this: > # attach(df1) > # x <- cbind(age, sex) # Just do as.matrix(df1) if df1 has no factor objects > # attach(df2) > # y <- cbind(age, sex) > # mat <- find.matches(x, y, tol=c(5,0)) # exact match on sex, 5y on age > > # Demonstrate matchCases > xcase <- c(1,3,5,12) > xcontrol <- 1:6 > idcase <- c('A','B','C','D') > idcontrol <- c('a','b','c','d','e','f') > ycase <- c(11,33,55,122) > ycontrol <- c(11,22,33,44,55,66) > matchCases(xcase, ycase, idcase, + xcontrol, ycontrol, idcontrol, tol=1) Frequencies of Number of Matched Controls per Case: matches 0 2 3 1 1 2 idcase type id x y 1 A case A 1 11 2 A control a 1 11 3 A control b 2 22 4 B case B 3 33 5 B control b 2 22 6 B control c 3 33 7 B control d 4 44 8 C case C 5 55 9 C control d 4 44 10 C control e 5 55 11 C control f 6 66 > > # If y is a binary response variable, the following code > # will produce a Mantel-Haenszel summary odds ratio that > # utilizes the matching. > # Standard variance formula will not work here because > # a control will match more than one case > # WARNING: The M-H procedure exemplified here is suspect > # because of the small strata and widely varying number > # of controls per case. > > x <- c(1, 2, 3, 3, 3, 6, 7, 12, 1, 1:7) > y <- c(0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1) > case <- c(rep(TRUE, 8), rep(FALSE, 8)) > id <- 1:length(x) > > m <- matchCases(x[case], y[case], id[case], + x[!case], y[!case], id[!case], tol=1) Frequencies of Number of Matched Controls per Case: matches 0 2 3 4 1 1 5 1 > iscase <- m$type=='case' > # Note: the first tapply on insures that event indicators are > # sorted by case id. The second actually does something. > event.case <- tapply(m$y[iscase], m$idcase[iscase], sum) > event.control <- tapply(m$y[!iscase], m$idcase[!iscase], sum) > n.control <- tapply(!iscase, m$idcase, sum) > n <- tapply(m$y, m$idcase, length) > or <- sum(event.case * (n.control - event.control) / n) / + sum(event.control * (1 - event.case) / n) > or [1] 1.666667 > > # Bootstrap this estimator by sampling with replacement from > # subjects. Assumes id is unique when combine cases+controls > # (id was constructed this way above). The following algorithms > # puts all sampled controls back with the cases to whom they were > # originally matched. > > ids <- unique(m$id) > idgroups <- split(1:nrow(m), m$id) > B <- 50 # in practice use many more > ors <- numeric(B) > # Function to order w by ids, leaving unassigned elements zero > align <- function(ids, w) { + z <- structure(rep(0, length(ids)), names=ids) + z[names(w)] <- w + z + } > for(i in 1:B) { + j <- sample(ids, replace=TRUE) + obs <- unlist(idgroups[j]) + u <- m[obs,] + iscase <- u$type=='case' + n.case <- align(ids, tapply(u$type, u$idcase, + function(v)sum(v=='case'))) + n.control <- align(ids, tapply(u$type, u$idcase, + function(v)sum(v=='control'))) + event.case <- align(ids, tapply(u$y[iscase], u$idcase[iscase], sum)) + event.control <- align(ids, tapply(u$y[!iscase], u$idcase[!iscase], sum)) + n <- n.case + n.control + # Remove sets having 0 cases or 0 controls in resample + s <- n.case > 0 & n.control > 0 + denom <- sum(event.control[s] * (n.case[s] - event.case[s]) / n[s]) + or <- if(denom==0) NA else + sum(event.case[s] * (n.control[s] - event.control[s]) / n[s]) / denom + ors[i] <- or + } > describe(ors) ors n missing unique Mean .05 .10 .25 .50 .75 .90 31 19 14 0.9076 0.000 0.000 0.000 0.000 1.143 2.917 .95 3.107 0.0000 0.3750 0.3846 0.8000 0.9375 1.0909 1.1429 1.6667 1.7143 2.2500 Frequency 17 1 1 1 1 1 2 1 1 1 % 55 3 3 3 3 3 6 3 3 3 2.9167 3.0000 3.2143 7.5000 Frequency 1 1 1 1 % 3 3 3 3 > > > > cleanEx(); ..nameEx <- "first.word" > > ### * first.word > > flush(stderr()); flush(stdout()) > > ### Name: first.word > ### Title: First Word in a String or Expression > ### Aliases: first.word > ### Keywords: character manip > > ### ** Examples > > first.word(expr=expression(y ~ x + log(w))) [1] "y" > > > > cleanEx(); ..nameEx <- "format.df" > > ### * format.df > > flush(stderr()); flush(stdout()) > > ### Name: format.df > ### Title: Format a Data Frame or Matrix for LaTeX or HTML > ### Aliases: format.df > ### Keywords: utilities interface methods file character manip > > ### ** Examples > > x <- data.frame(a=1:2, b=3:4) > x$m <- matrix(5:8,nrow=2) > names(x) [1] "a" "b" "m" > dim(x) [1] 2 3 > x a b m.1 m.2 1 1 3 5 7 2 2 4 6 8 > format.df(x) a b m 1 m 2 1 "1" "3" "5" "7" 2 "2" "4" "6" "8" attr(,"col.just") [1] "r" "r" "r" "r" > dim(format.df(x)) [1] 2 4 > > > > cleanEx(); ..nameEx <- "gbayes" > > ### * gbayes > > flush(stderr()); flush(stdout()) > > ### Name: gbayes > ### Title: Gaussian Bayesian Posterior and Predictive Distributions > ### Aliases: gbayes plot.gbayes gbayes2 gbayesMixPredNoData gbayesMixPost > ### gbayesMixPowerNP gbayes1PowerNP > ### Keywords: htest > > ### ** Examples > > # Compare 2 proportions using the var stabilizing transformation > # arcsin(sqrt((x+3/8)/(n+3/4))) (Anscombe), which has variance > # 1/[4(n+.5)] > > m1 <- 100; m2 <- 150 > deaths1 <- 10; deaths2 <- 30 > > f <- function(events,n) asin(sqrt((events+3/8)/(n+3/4))) > stat <- f(deaths1,m1) - f(deaths2,m2) > var.stat <- function(m1, m2) 1/4/(m1+.5) + 1/4/(m2+.5) > cat("Test statistic:",format(stat)," s.d.:", + format(sqrt(var.stat(m1,m2))), "\n") Test statistic: -0.1388297 s.d.: 0.06441034 > #Use unbiased prior with variance 1000 (almost flat) > b <- gbayes(0, 1000, m1, m2, stat, var.stat, 2*m1, 2*m2) > print(b) $mean.prior [1] 0 $var.prior [1] 1000 $mean.post [1] -0.1388291 $var.post [1] 0.004148675 $mean.pred [1] -0.1388291 $var.pred [1] 0.006227504 attr(,"class") [1] "gbayes" > plot(b) > #To get posterior Prob[parameter > w] use > # 1-pnorm(w, b$mean.post, sqrt(b$var.post)) > > #If g(effect, n1, n2) is the power function to > #detect an effect of 'effect' with samples size for groups 1 and 2 > #of n1,n2, estimate the expected power by getting 1000 random > #draws from the posterior distribution, computing power for > #each value of the population effect, and averaging the 1000 powers > #This code assumes that g will accept vector-valued 'effect' > #For the 2-sample proportion problem just addressed, 'effect' > #could be taken approximately as the change in the arcsin of > #the square root of the probability of the event > > g <- function(effect, n1, n2, alpha=.05) { + sd <- sqrt(var.stat(n1,n2)) + z <- qnorm(1 - alpha/2) + effect <- abs(effect) + 1 - pnorm(z - effect/sd) + pnorm(-z - effect/sd) + } > > effects <- rnorm(1000, b$mean.post, sqrt(b$var.post)) > powers <- g(effects, 500, 500) > hist(powers, nclass=35, xlab='Power') > describe(powers) powers n missing unique Mean .05 .10 .25 .50 .75 .90 1000 0 997 0.8567 0.1581 0.4029 0.8509 0.9939 0.9999 1.0000 .95 1.0000 lowest : 0.05005 0.05009 0.05014 0.05056 0.05122 highest: 1 1 1 1 1 > > > > # gbayes2 examples > # First consider a study with a binary response where the > # sample size is n1=500 in the new treatment arm and n2=300 > # in the control arm. The parameter of interest is the > # treated:control log odds ratio, which has variance > # 1/[n1 p1 (1-p1)] + 1/[n2 p2 (1-p2)]. This is not > # really constant so we average the variance over plausible > # values of the probabilities of response p1 and p2. We > # think that these are between .4 and .6 and we take a > # further short cut > > v <- function(n1, n2, p1, p2) 1/(n1*p1*(1-p1)) + 1/(n2*p2*(1-p2)) > n1 <- 500; n2 <- 300 > ps <- seq(.4, .6, length=100) > vguess <- quantile(v(n1, n2, ps, ps), .75) > vguess 75% 0.02183459 > # 75% > # 0.02183459 > > # The minimally interesting treatment effect is an odds ratio > # of 1.1. The prior distribution on the log odds ratio is > # a 50:50 mixture of a vague Gaussian (mean 0, sd 100) and > # an informative prior from a previous study (mean 1, sd 1) > > prior <- function(delta) + 0.5*dnorm(delta, 0, 100)+0.5*dnorm(delta, 1, 1) > deltas <- seq(-5, 5, length=150) > plot(deltas, prior(deltas), type='l') > > # Now compute the power, averaged over this prior > gbayes2(sqrt(vguess), prior, log(1.1)) [1] 0.6133338 > # [1] 0.6133338 > > # See how much power is lost by ignoring the previous > # study completely > > gbayes2(sqrt(vguess), function(delta)dnorm(delta, 0, 100), log(1.1)) [1] 0.4984588 > # [1] 0.4984588 > > # What happens to the power if we really don't believe the treatment > # is very effective? Let's use a prior distribution for the log > # odds ratio that is uniform between log(1.2) and log(1.3). > # Also check the power against a true null hypothesis > > prior2 <- function(delta) dunif(delta, log(1.2), log(1.3)) > gbayes2(sqrt(vguess), prior2, log(1.1)) [1] 0.1385113 > # [1] 0.1385113 > > gbayes2(sqrt(vguess), prior2, 0) [1] 0.3264065 > # [1] 0.3264065 > > # Compare this with the power of a two-sample binomial test to > # detect an odds ratio of 1.25 > bpower(.5, odds.ratio=1.25, n1=500, n2=300) Power 0.3307486 > # Power > # 0.3307486 > > # For the original prior, consider a new study with equal > # sample sizes n in the two arms. Solve for n to get a > # power of 0.9. For the variance of the log odds ratio > # assume a common p in the center of a range of suspected > # probabilities of response, 0.3. For this example we > # use a zero null value and the uniform prior above > > v <- function(n) 2/(n*.3*.7) > pow <- function(n) gbayes2(sqrt(v(n)), prior2) > uniroot(function(n) pow(n)-0.9, c(50,10000))$root [1] 2119.688 > # [1] 2119.675 > # Check this value > pow(2119.675) [1] 0.8999984 > # [1] 0.9 > > # Get the posterior density when there is a mixture of two priors, > # with mixing probability 0.5. The first prior is almost > # non-informative (normal with mean 0 and variance 10000) and the > # second has mean 2 and variance 0.3. The test statistic has a value > # of 3 with variance 0.4. > f <- gbayesMixPost(3, 4, mix=0.5, d0=0, v0=10000, d1=2, v1=0.3) > > args(f) function (delta = numeric(0), x = 3, v = 4, mix = 0.5, d0 = 0, v0 = 10000, d1 = 2, v1 = 0.3, dist = function (x, mean = 0, sd = 1, log = FALSE) .Internal(dnorm(x, mean, sd, log))) NULL > > # Plot this density > delta <- seq(-2, 6, length=150) > plot(delta, f(delta), type='l') > > # Add to the plot the posterior density that used only > # the almost non-informative prior > lines(delta, f(delta, mix=1), lty=2) > > # The same but for an observed statistic of zero > lines(delta, f(delta, mix=1, x=0), lty=3) > > # Derive the CDF instead of the density > g <- gbayesMixPost(3, 4, mix=0.5, d0=0, v0=10000, d1=2, v1=0.3, + what='cdf') > # Had mix=0 or 1, gbayes1PowerNP could have been used instead > # of gbayesMixPowerNP below > > # Compute the power to detect an effect of delta=1 if the variance > # of the test statistic is 0.2 > gbayesMixPowerNP(g, 1, 0.2, interval=c(-10,12)) Critical value Power 0.5025818 0.8669870 > > # Do the same thing by simulation > gbayesMixPowerNP(g, 1, 0.2, interval=c(-10,12), nsim=20000) Power Lower 0.95 Upper 0.95 0.86675 0.86204 0.87146 > > # Compute by what factor the sample size needs to be larger > # (the variance needs to be smaller) so that the power is 0.9 > ratios <- seq(1, 4, length=50) > pow <- single(50) > for(i in 1:50) + pow[i] <- gbayesMixPowerNP(g, 1, 0.2/ratios[i], interval=c(-10,12))[2] > > # Solve for ratio using reverse linear interpolation > approx(pow, ratios, xout=0.9)$y [1] 1.214671 > > # Check this by computing power > gbayesMixPowerNP(g, 1, 0.2/2.1, interval=c(-10,12)) Critical value Power 0.4141731 0.9711715 > # So the study will have to be 2.1 times as large as earlier thought > > > > cleanEx(); ..nameEx <- "getHdata" > > ### * getHdata > > flush(stderr()); flush(stdout()) > > ### Name: getHdata > ### Title: Download and Install Datasets for Hmisc, Design, and Statistical > ### Modeling > ### Aliases: getHdata > ### Keywords: interface data > > ### ** Examples > > ## Not run: > ##D getHdata() # download list of available datasets > ##D getHdata(prostate) # downloads, load( ) or data.restore( ) > ##D # runs cleanup.import for S-Plus 6 > ##D getHdata(valung, "contents") # open browser (options(browser="whatever")) > ##D # after downloading valung.html > ##D # (result of html(contents())) > ##D getHdata(support, "all") # download and open one browser window > ##D datadensity(support) > ##D attach(support) # make individual variables available > ##D getHdata(plasma, "all") # download and open two browser windows > ##D # (description file is available for plasma) > ## End(Not run) > > > > cleanEx(); ..nameEx <- "hdquantile" > > ### * hdquantile > > flush(stderr()); flush(stdout()) > > ### Name: hdquantile > ### Title: Harrell-Davis Distribution-Free Quantile Estimator > ### Aliases: hdquantile > ### Keywords: univar > > ### ** Examples > > set.seed(1) > x <- runif(100) > hdquantile(x, (1:3)/4, se=TRUE) 0.25 0.50 0.75 0.3064350 0.5054821 0.7571213 attr(,"se") 0.25 0.50 0.75 0.03931112 0.04878284 0.02997026 > > ## Not run: > ##D # Compare jackknife standard errors with those from the bootstrap > ##D library(boot) > ##D boot(x, function(x,i) hdquantile(x[i], probs=(1:3)/4), R=400) > ## End(Not run) > > > > cleanEx(); ..nameEx <- "hist.data.frame" > > ### * hist.data.frame > > flush(stderr()); flush(stdout()) > > ### Name: hist.data.frame > ### Title: Histograms for Variables in a Data Frame > ### Aliases: hist.data.frame > ### Keywords: hplot dplot distribution > > ### ** Examples > > d <- data.frame(a=runif(200), b=rnorm(200), + w=factor(sample(c('green','red','blue'), 200, TRUE))) > hist.data.frame(d) # in R, just say hist(d) > > > > cleanEx(); ..nameEx <- "histbackback" > > ### * histbackback > > flush(stderr()); flush(stdout()) > > ### Name: histbackback > ### Title: Back to Back Histograms > ### Aliases: histbackback > ### Keywords: dplot hplot distribution > > ### ** Examples > > options(digits=3) > set.seed(1) > histbackback(rnorm(20), rnorm(30)) > > fool <- list(x=rnorm(40), y=rnorm(40)) > histbackback(fool) > age <- rnorm(1000,50,10) > sex <- sample(c('female','male'),1000,TRUE) > histbackback(split(age, sex)) > agef <- age[sex=='female']; agem <- age[sex=='male'] > histbackback(list(Female=agef,Male=agem), probability=TRUE, xlim=c(-.06,.06)) > > > > cleanEx(); ..nameEx <- "hoeffd" > > ### * hoeffd > > flush(stderr()); flush(stdout()) > > ### Name: hoeffd > ### Title: Matrix of Hoeffding's D Statistics > ### Aliases: hoeffd print.hoeffd > ### Keywords: nonparametric htest > > ### ** Examples > > x <- c(-2, -1, 0, 1, 2) > y <- c(4, 1, 0, 1, 4) > z <- c(1, 2, 3, 4, NA) > q <- c(1, 2, 3, 4, 5) > hoeffd(cbind(x,y,z,q)) D x y z q x 1 0 NA 1 y 0 1 NA 0 z NA NA 1 NA q 1 0 NA 1 n x y z q x 5 5 4 5 y 5 5 4 5 z 4 4 4 4 q 5 5 4 5 P x y z q x 0.363 0.000 y 0.363 0.363 z q 0.000 0.363 > > # Hoeffding's test can detect even one-to-many dependency > set.seed(1) > x <- seq(-10,10,length=200) > y <- x*sign(runif(200,-1,1)) > plot(x,y) > hoeffd(x,y) D x y x 1.00 0.06 y 0.06 1.00 n= 200 P x y x 0 y 0 > > > > cleanEx(); ..nameEx <- "html" > > ### * html > > flush(stderr()); flush(stdout()) > > ### Name: html > ### Title: Convert an S object to HTML > ### Aliases: html html.latex html.data.frame html.default show.html > ### print.html > ### Keywords: utilities interface methods file character manip > > ### ** Examples > > ## Not run: > ##D x <- matrix(1:6, nrow=2, dimnames=list(c('a','b'),c('c','d','e'))) > ##D w <- latex(x) > ##D h <- html(w) # run HeVeA to convert .tex to .html > ##D h <- html(x) # convert x directly to html > ##D options(browser='konqueror') # use help.browser for S-Plus > ##D h # launch html browser by running print.html > ##D w <- html(x, link=c('','B')) # hyperlink first row first col to B > ## End(Not run) > > > > cleanEx(); ..nameEx <- "impute" > > ### * impute > > flush(stderr()); flush(stdout()) > > ### Name: impute > ### Title: Generic Functions and Methods for Imputation > ### Aliases: impute impute.default print.impute summary.impute [.impute > ### is.imputed > ### Keywords: methods math htest models > > ### ** Examples > > age <- c(1,2,NA,4) > age.i <- impute(age) > # Could have used impute(age,2.5), impute(age,mean), impute(age,"random") > age.i 1 2 3 4 1 2 2* 4 > summary(age.i) 1 values imputed to 2 Min. 1st Qu. Median Mean 3rd Qu. Max. 1.00 1.75 2.00 2.25 2.50 4.00 > is.imputed(age.i) [1] FALSE FALSE TRUE FALSE > > > > cleanEx(); ..nameEx <- "labcurve" > > ### * labcurve > > flush(stderr()); flush(stdout()) > > ### Name: labcurve > ### Title: Label Curves, Make Keys, and Interactively Draw Points and > ### Curves > ### Aliases: labcurve putKey putKeyEmpty largest.empty drawPlot > ### plot.drawPlot bezier > ### Keywords: hplot aplot dplot iplot > > ### ** Examples > > n <- 2:8 > m <- length(n) > type <- c('l','l','l','l','s','l','l') > # s=step function l=ordinary line (polygon) > curves <- vector('list', m) > > plot(0,1,xlim=c(0,1),ylim=c(-2.5,4),type='n') > > set.seed(39) > > for(i in 1:m) { + x <- sort(runif(n[i])) + y <- rnorm(n[i]) + lines(x, y, lty=i, type=type[i], col=i) + curves[[i]] <- list(x=x,y=y) + } > > labels <- paste('Label for',letters[1:m]) > labcurve(curves, labels, tilt=TRUE, type=type, col=1:m) > > # Put only single letters on curves at points of > # maximum space, and use key() to define the letters, > # with automatic positioning of the key in the most empty > # part of the plot > # Have labcurve do the plotting, leaving extra space for key > > names(curves) <- labels > labcurve(curves, keys=letters[1:m], type=type, col=1:m, + pl=TRUE, ylim=c(-2.5,4)) > > # Put plotting symbols at equally-spaced points, > # with a key for the symbols, ignoring line types > > labcurve(curves, keys=1:m, lty=1, type=type, col=1:m, + pl=TRUE, ylim=c(-2.5,4)) > > > > # Plot and label two curves, with line parameters specified with data > set.seed(191) > ages.f <- sort(rnorm(50,20,7)) > ages.m <- sort(rnorm(40,19,7)) > height.f <- pmin(ages.f,21)*.2+60 > height.m <- pmin(ages.m,21)*.16+63 > > labcurve(list(Female=list(ages.f,height.f,col=2), + Male =list(ages.m,height.m,col=3,lty='dashed')), + xlab='Age', ylab='Height', pl=TRUE) > # add ,keys=c('f','m') to label curves with single letters > # For S-Plus use lty=2 > > # Plot power for testing two proportions vs. n for various odds ratios, > # using 0.1 as the probability of the event in the control group. > # A separate curve is plotted for each odds ratio, and the curves are > # labeled at points of maximum separation > > n <- seq(10, 1000, by=10) > OR <- seq(.2,.9,by=.1) > pow <- lapply(OR, function(or,n)list(x=n,y=bpower(p1=.1,odds.ratio=or,n=n)), + n=n) > names(pow) <- format(OR) > labcurve(pow, pl=TRUE, xlab='n', ylab='Power') > > # Plot some random data and find the largest empty rectangle > # that is at least .1 wide and .1 tall > > x <- runif(50) > y <- runif(50) > plot(x, y) > z <- largest.empty(x, y, .1, .1) > z $x [1] 0.876 $y [1] 0.595 > points(z,pch=3) # mark center of rectangle, or > #key(z$x, z$y, ... stuff for legend) > > > > # Use the mouse to draw a series of points using one symbol, and > # two smooth curves or straight lines (if two points are clicked), > # none of these being labeled > > # d <- drawPlot(Points(), Curve(), Curve()) > # plot(d, file='/tmp/z') # send result to /tmp/z.ps > > ## Not run: > ##D # Use the mouse to draw a Gaussian density, two series of points > ##D # using 2 symbols, one Bezier curve, a step function, and raw data > ##D # along the x-axis as a 1-d scatter plot (rug plot). Draw a key. > ##D # The density function is fit to 3 mouse clicks > ##D # Abline draws a dotted horizontal reference line > ##D d <- drawPlot(Curve('Normal',type='gauss'), > ##D Points('female'), Points('male'), > ##D Curve('smooth',ask=TRUE,lty=2), Curve('step',type='s',lty=3), > ##D Points(type='r'), Abline(h=.5, lty=2), > ##D xlab='X', ylab='y', xlim=c(0,100), key=TRUE) > ##D plot(d, ylab='Y') > ##D plot(d, key=FALSE) # label groups using labcurve > ## End(Not run) > > > > cleanEx(); ..nameEx <- "label" > > ### * label > > flush(stderr()); flush(stdout()) > > ### Name: label > ### Title: Label Attribute of an Object > ### Aliases: label label<- labelPlotmath [.labelled print.labelled Label > ### Label.data.frame llist plotmathTranslate as.data.frame.labelled > ### data.frame.labelled reLabelled > ### Keywords: interface misc utilities > > ### ** Examples > > age <- c(21,65,43) > y <- 1:3 > label(age) <- "Age in Years" > plot(age, y, xlab=label(age)) > > x1 <- 1:10 > x2 <- 10:1 > label(x2) <- 'Label for x2' > units(x2) <- 'mmHg' > x2 Label for x2 [mmHg] [1] 10 9 8 7 6 5 4 3 2 1 > x2[1:5] Label for x2 [mmHg] [1] 10 9 8 7 6 > dframe <- data.frame(x1, x2) > Label(dframe) label(x1) <- '' label(x2) <- 'Label for x2' > > ##In these examples of llist, note that labels are printed after > ##variable names, because of print.labelled > a <- 1:3 > b <- 4:6 > label(b) <- 'B Label' > llist(a,b) $a a [1] 1 2 3 $b B Label [1] 4 5 6 > llist(a,b,d=0) $a a [1] 1 2 3 $b B Label [1] 4 5 6 $d d [1] 0 > llist(a,b,0) $a a [1] 1 2 3 $b B Label [1] 4 5 6 $"0" 0 [1] 0 > > w <- llist(a, b>5, d=101:103) > sapply(w, function(x){ + hist(as.numeric(x), xlab=label(x)) + # locator(1) ## wait for mouse click + }) a b > 5 d breaks Numeric,5 Numeric,3 Numeric,5 counts Integer,4 Integer,2 Integer,4 intensities Numeric,4 Numeric,2 Numeric,4 density Numeric,4 Numeric,2 Numeric,4 mids Numeric,4 Numeric,2 Numeric,4 xname "as.numeric(x)" "as.numeric(x)" "as.numeric(x)" equidist TRUE TRUE TRUE > > # Or: for(u in w) {hist(u); title(label(u))} > > > > cleanEx(); ..nameEx <- "latex" > > ### * latex > > flush(stderr()); flush(stdout()) > > ### Name: latex > ### Title: Convert an S object to LaTeX, and Related Utilities > ### Aliases: latex latex.default latex.function latex.list latexTranslate > ### latexSN latexVerbatim dvi print.dvi dvi.latex dvips dvips.latex > ### dvips.dvi dvigv dvigv.latex dvigv.dvi print.latex show.latex show.dvi > ### Keywords: utilities interface methods file character manip > > ### ** Examples > > ## Not run: > ##D x <- matrix(1:6, nrow=2, dimnames=list(c('a','b'),c('c','d','enLine 2'))) > ##D latex(x) # creates x.tex in working directory > ##D w <- latex(x, file='/tmp/my.tex') > ##D d <- dvi(w) # compile LaTeX document, make .dvi > ##D # latex assumed to be in path > ##D d # or show(d) : run xdvi (assumed in path) to display > ##D w # or show(w) : run dvi then xdvi > ##D dvips(d) # run dvips to print document > ##D dvips(w) # run dvi then dvips > ##D latex(x, file="") # just write out LaTeX code to screen > ##D > ##D # After running latex( ) multiple times with different special styles in > ##D # effect, make a file that will call for the needed LaTeX packages when > ##D # latex is run (especially when using Sweave with R) > ##D if(exists(latexStyles)) > ##D cat(paste('\usepackage{',latexStyles,'}',sep=''), > ##D file='stylesused.tex', sep='\n') > ##D # Then in the latex job have something like: > ##D # \documentclass{article} > ##D # \input{stylesused} > ##D # \begin{document} > ##D # ... > ## End(Not run) > > > > cleanEx(); ..nameEx <- "ldBands" > > ### * ldBands > > flush(stderr()); flush(stdout()) > > ### Name: ldBands > ### Title: Group Sequential Boundaries using the Lan-DeMets Approach > ### Aliases: ldBands summary.ldBands print.ldBands plot.ldBands > ### print.summary.ldBands > ### Keywords: distribution htest design > > ### ** Examples > > ## Not run: > ##D # Get boundaries for O'Brien-Fleming spending function, 5 looks, alpha=.05 > ##D b <- ldBands(5, pr=FALSE) > ##D plot(b) > ##D # Same but times are irregular, and information times are different than > ##D # test times. Use Pocock spending function. > ##D b <- ldBands(times= c(.4, .6, .8, .9, .95), > ##D information=c(.42,.65,.83,.89,.94), spending='Pocock') > ##D > ##D # Get power calculations > ##D u <- ldBands(times=c(.4, .6, .8, .9, .95), power=.9) > ##D u$drift # standardize difference * sqrt(n per arm) > ##D # needed to provide power=.9 > ##D summary(u, n=50) # obtain detectable standardized difference > ##D summary(u, p1=.4, p2=.5) # get sample size per arm, two-sample binomial > ##D summary(u, hr=1.5) # get number of events per arm needed > ##D # to detect a hazard ratio of 1.5 > ##D > ##D # Asymmetric boundaries with different spending functions, truncate > ##D b <- ldBands(5, sided=3, spending='alpha*t^phi', phi=1, phi2=1.5, > ##D alphaLower=.01, alphaUpper=.04, truncate=4) > ##D b > ##D plot(b) > ##D # Compute differences in proportions and odds ratios needed to cross > ##D # the boundaries, given a mean probability in two treatment arms of 0.1 > ##D # and given a vector of sample sizes per arm corresponding to the looks > ##D s <- summary(b, n=seq(200,1000,by=200), pbar=.1) > ##D s > ##D d <- s$data > ##D plot(d$n, d$or.lower, xlab='N Per Arm', > ##D ylab='Critical Odds Ratio', type='b', > ##D ylim=range(d$or.lower, d$or.upper), log='y') > ##D lines(d$n, d$or.upper, type='b') > ##D abline(h=1, lty=2) > ## End(Not run) > > > > cleanEx(); ..nameEx <- "list.tree" > > ### * list.tree > > flush(stderr()); flush(stdout()) > > ### Name: list.tree > ### Title: Pretty-print the Structure of a Data Object > ### Aliases: list.tree > ### Keywords: documentation > > ### ** Examples > > X <- list(a=ordered(c(1:30,30:1)),b=c("Rick","John","Allan"), + c=diag(300),e=cbind(p=1008:1019,q=4)) > list.tree(X) X = list 4 (722788 bytes) . a = integer 60= category (30 levels)( ordered factor )= 1 2 3 4 5 6 7 8 ... . b = character 3= Rick John Allan . c = double 90000= array 300 X 300= 1 0 0 0 0 0 0 0 ... . e = double 24= named array 12 X 2= 1008 1009 1010 1011 ... > # In R you can say str(X) > > > > cleanEx(); ..nameEx <- "mgp.axis" > > ### * mgp.axis > > flush(stderr()); flush(stdout()) > > ### Name: mgp.axis > ### Title: Draw Axes With Side-Specific mgp Parameters > ### Aliases: mgp.axis mgp.axis.labels > ### Keywords: iplot dplot environment > > ### ** Examples > > ## Not run: > ##D mgp.axis.labels(type='x') # get default value for x-axis > ##D mgp.axis.labels(type='y') # get value for y-axis > ##D mgp.axis.labels(type='xy') # get 2nd element of both mgps > ##D mgp.axis.labels(type='x and y') # get a list with 2 elements > ##D mgp.axis.labels(c(3,.5,0), type='x') # set > ##D options('mgp.axis.labels') # retrieve > ##D > ##D plot(..., axes=FALSE) > ##D mgp.axis(1, "X Label") > ##D mgp.axis(2, "Y Label") > ##D > ## End(Not run) > > > cleanEx(); ..nameEx <- "minor.tick" > > ### * minor.tick > > flush(stderr()); flush(stdout()) > > ### Name: minor.tick > ### Title: Minor Tick Marks > ### Aliases: minor.tick > ### Keywords: aplot hplot > > ### ** Examples > > plot(runif(20),runif(20)) > minor.tick() > > > > cleanEx(); ..nameEx <- "mtitle" > > ### * mtitle > > flush(stderr()); flush(stdout()) > > ### Name: mtitle > ### Title: Margin Titles > ### Aliases: mtitle > ### Keywords: hplot > > ### ** Examples > > #Set up for 1 plot on figure, give a main title, > #use date for lr > plot(runif(20),runif(20)) > mtitle("Main Title") > > #Set up for 2 x 2 matrix of plots with a lower left subtitle and overall title > par(mfrow=c(2,2), oma=c(3,0,3,0)) > plot(runif(20),runif(20)) > plot(rnorm(20),rnorm(20)) > plot(exp(rnorm(20)),exp(rnorm(20))) > mtitle("Main Title",ll="n=20") > > > > graphics::par(get("par.postscript", env = .CheckExEnv)) > cleanEx(); ..nameEx <- "na.delete" > > ### * na.delete > > flush(stderr()); flush(stdout()) > > ### Name: na.delete > ### Title: Row-wise Deletion na.action > ### Aliases: na.delete > ### Keywords: models > > ### ** Examples > > # options(na.action="na.delete") > # ols(y ~ x) > > > > cleanEx(); ..nameEx <- "na.detail.response" > > ### * na.detail.response > > flush(stderr()); flush(stdout()) > > ### Name: na.detail.response > ### Title: Detailed Response Variable Information > ### Aliases: na.detail.response > ### Keywords: models regression > > ### ** Examples > > # sex > # [1] m f f m f f m m m m m m m m f f f m f m > # age > # [1] NA 41 23 30 44 22 NA 32 37 34 38 36 36 50 40 43 34 22 42 30 > # y > # [1] 0 1 0 0 1 0 1 0 0 1 1 1 0 0 1 1 0 1 0 0 > # options(na.detail.response=TRUE, na.action="na.delete", digits=3) > # lrm(y ~ age*sex) > # > # Logistic Regression Model > # > # lrm(formula = y ~ age * sex) > # > # > # Frequencies of Responses > # 0 1 > # 10 8 > # > # Frequencies of Missing Values Due to Each Variable > # y age sex > # 0 2 0 > # > # > # Statistics on Response by Missing/Non-Missing Status of Predictors > # > # age=NA age!=NA sex!=NA Any NA No NA > # N 2.0 18.000 20.00 2.0 18.000 > # Mean 0.5 0.444 0.45 0.5 0.444 > # > # ...... > # options(na.action="na.keep") > # describe(y ~ age*sex) > # Statistics on Response by Missing/Non-Missing Status of Predictors > # > # age=NA age!=NA sex!=NA Any NA No NA > # N 2.0 18.000 20.00 2.0 18.000 > # Mean 0.5 0.444 0.45 0.5 0.444 > # > # ... > # options(na.fun.response="table") #built-in function table() > # describe(y ~ age*sex) > # > # Statistics on Response by Missing/Non-Missing Status of Predictors > # > # age=NA age!=NA sex!=NA Any NA No NA > # 0 1 10 11 1 10 > # 1 1 8 9 1 8 > # > # ... > > > > cleanEx(); ..nameEx <- "na.keep" > > ### * na.keep > > flush(stderr()); flush(stdout()) > > ### Name: na.keep > ### Title: Do-nothing na.action > ### Aliases: na.keep > ### Keywords: models > > ### ** Examples > > options(na.action="na.keep", na.detail.response=TRUE) > x1 <- runif(20) > x2 <- runif(20) > x2[1:4] <- NA > y <- rnorm(20) > describe(y ~ x1*x2) y ~ x1 * x2 3 Variables 20 Observations --------------------------------------------------------------------------- y n missing unique Mean .05 .10 .25 .50 20 0 20 -0.006472 -1.49668 -1.38643 -0.39947 -0.05497 .75 .90 .95 0.65566 0.93708 1.11296 lowest : -1.9894 -1.4708 -1.3771 -0.4782 -0.4150 highest: 0.7632 0.7821 0.9190 1.1000 1.3587 --------------------------------------------------------------------------- x1 n missing unique Mean .05 .10 .25 .50 .75 .90 20 0 20 0.5552 0.1708 0.1992 0.3455 0.6010 0.7717 0.9119 .95 0.9470 lowest : 0.06179 0.17656 0.20168 0.20597 0.26551 highest: 0.7774 0.8984 0.9082 0.9447 0.9919 --------------------------------------------------------------------------- x2 n missing unique Mean .05 .10 .25 .50 .75 .90 16 4 16 0.4721 0.0843 0.1471 0.3221 0.4467 0.6823 0.8108 .95 0.8380 0.01339 0.10794 0.18622 0.26722 0.34035 0.38239 0.38611 0.41127 Frequency 1 1 1 1 1 1 1 1 % 6 6 6 6 6 6 6 6 0.48208 0.49354 0.59957 0.66847 0.72371 0.79424 0.82737 0.86969 Frequency 1 1 1 1 1 1 1 1 % 6 6 6 6 6 6 6 6 --------------------------------------------------------------------------- > > > > cleanEx(); ..nameEx <- "panel.bpplot" > > ### * panel.bpplot > > flush(stderr()); flush(stdout()) > > ### Name: panel.bpplot > ### Title: Box-Percentile Panel Function for Trellis > ### Aliases: panel.bpplot bpplt > ### Keywords: nonparametric hplot distribution > > ### ** Examples > > set.seed(13) > x <- rnorm(1000) > g <- sample(1:6, 1000, replace=TRUE) > x[g==1][1:20] <- rnorm(20)+3 # contaminate 20 x's for group 1 > > # default trellis box plot > if(.R.) library(lattice) > bwplot(g ~ x) > > # box-percentile plot with data density (rug plot) > bwplot(g ~ x, panel=panel.bpplot, probs=seq(.01,.49,by=.01), datadensity=TRUE) Loading required package: grid > # add ,scat1d.opts=list(tfrac=1) to make all tick marks the same size > # when a group has > 125 observations > > # small dot for means, show only .05,.125,.25,.375,.625,.75,.875,.95 quantiles > bwplot(g ~ x, panel=panel.bpplot, cex=.3) > > # suppress means and reference lines for lower and upper quartiles > bwplot(g ~ x, panel=panel.bpplot, probs=c(.025,.1,.25), means=FALSE, qref=FALSE) > > # continuous plot up until quartiles ("Tootsie Roll plot") > bwplot(g ~ x, panel=panel.bpplot, probs=seq(.01,.25,by=.01)) > > # start at quartiles then make it continuous ("coffin plot") > bwplot(g ~ x, panel=panel.bpplot, probs=seq(.25,.49,by=.01)) > > # same as previous but add a spike to give 0.95 interval > bwplot(g ~ x, panel=panel.bpplot, probs=c(.025,seq(.25,.49,by=.01))) > > # decile plot with reference lines at outer quintiles and median > bwplot(g ~ x, panel=panel.bpplot, probs=c(.1,.2,.3,.4), qref=c(.5,.2,.8)) > > # default plot with tick marks showing all observations outside the outer > # box (.05 and .95 quantiles), with very small ticks > bwplot(g ~ x, panel=panel.bpplot, nout=.05, scat1d.opts=list(frac=.01)) > > # show 5 smallest and 5 largest observations > bwplot(g ~ x, panel=panel.bpplot, nout=5) > > # Use a scat1d option (preserve=TRUE) to ensure that the right peak extends > # to the same position as the extreme scat1d > bwplot(~x , panel=panel.bpplot, probs=seq(.00,.5,by=.001), + datadensity=TRUE, scat1d.opt=list(preserve=TRUE)) > > # Draw a prototype showing how to interpret the plots > bpplt() > > # make a local copy of bwplot that always uses panel.bpplot (S-Plus only) > # bwplot$panel <- panel.bpplot > # bwplot(g ~ x, nout=.05) > > > > cleanEx(); ..nameEx <- "pc1" > > ### * pc1 > > flush(stderr()); flush(stdout()) > > ### Name: pc1 > ### Title: First Principal Component > ### Aliases: pc1 > ### Keywords: multivariate > > ### ** Examples > > set.seed(1) > x1 <- rnorm(100) > x2 <- x1 + rnorm(100) > w <- pc1(cbind(x1,x2)) Fraction variance explained by PC1: 0.842 Coefficients to obtain PC1: Intercept x1 x2 -0.124 0.787 0.539 > attr(w,'coef') Intercept x1 x2 -0.124 0.787 0.539 > > > > cleanEx(); ..nameEx <- "plotCorrPrecision" > > ### * plotCorrPrecision > > flush(stderr()); flush(stdout()) > > ### Name: plotCorrPrecision > ### Title: Plot Precision of Estimate of Pearson Correlation Coefficient > ### Aliases: plotCorrPrecision > ### Keywords: htest > > ### ** Examples > > plotCorrPrecision() > plotCorrPrecision(rho=0) > > > > cleanEx(); ..nameEx <- "plsmo" > > ### * plsmo > > flush(stderr()); flush(stdout()) > > ### Name: plsmo > ### Title: Plot smoothed estimates > ### Aliases: plsmo panel.plsmo > ### Keywords: smooth nonparametric hplot > > ### ** Examples > > set.seed(1) > x <- 1:100 > y <- x + runif(100, -10, 10) > plsmo(x,y,"supsmu",xlab="Time of Entry") > #Use label(y) or "y" for ylab > > plsmo(x,y,add=TRUE,lty=2) > #Add lowess smooth to existing plot, with different line type > > age <- rnorm(500, 50, 15) > survival.time <- rexp(500) > sex <- sample(c('female','male'), 500, TRUE) > race <- sample(c('black','non-black'), 500, TRUE) > plsmo(age, survival.time < 1, fun=qlogis, group=sex) # plot logit by sex > > #Plot points and smooth trend line using trellis > # (add type='l' to suppress points or type='p' to suppress trend lines) > if(.R.) library(lattice) > xyplot(survival.time ~ age, panel=panel.plsmo) Loading required package: grid > > #Do this for multiple panels > xyplot(survival.time ~ age | sex, panel=panel.plsmo) > > #Do this for subgroups of points on each panel, show the data > #density on each curve, and draw a key at the default location > xyplot(survival.time ~ age | sex, groups=race, panel=panel.plsmo, + datadensity=TRUE) > Key() > > #Use wloess.noiter to do a fast weighted smooth > plot(x, y) > lines(wtd.loess.noiter(x, y)) > lines(wtd.loess.noiter(x, y, weights=c(rep(1,50), 100, rep(1,49))), col=2) > points(51, y[51], pch=18) # show overly weighted point > #Try to duplicate this smooth by replicating 51st observation 100 times > lines(wtd.loess.noiter(c(x,rep(x[51],99)),c(y,rep(y[51],99)), + type='ordered all'), col=3) > #Note: These two don't agree exactly > > > > cleanEx(); ..nameEx <- "popower" > > ### * popower > > flush(stderr()); flush(stdout()) > > ### Name: popower > ### Title: Power and Sample Size for Ordinal Response > ### Aliases: popower posamsize print.popower print.posamsize > ### Keywords: htest category > > ### ** Examples > > #For a study of back pain (none, mild, moderate, severe) here are the > #expected proportions (averaged over 2 treatments) that will be in > #each of the 4 categories: > > p <- c(.1,.2,.4,.3) > popower(p, 1.2, 1000) # OR=1.2, total n=1000 Power: 0.351 Efficiency of design compared with continuous response: 0.9 > posamsize(p, 1.2) Total sample size: 3148 Efficiency of design compared with continuous response: 0.9 > popower(p, 1.2, 3148) Power: 0.8 Efficiency of design compared with continuous response: 0.9 > > > > cleanEx(); ..nameEx <- "print.char.matrix" > > ### * print.char.matrix > > flush(stderr()); flush(stdout()) > > ### Name: print.char.matrix > ### Title: Function to print a matrix with stacked cells > ### Aliases: print.char.matrix > ### Keywords: print array > > ### ** Examples > > data(HairEyeColor) > print.char.matrix(HairEyeColor[ , , "Male"], col.names = TRUE) +-----+-----+----+-----+-----+ | Hair|Brown|Blue|Hazel|Green| +-----+-----+----+-----+-----+ |Black| 32 | 11 | 10 | 3 | +-----+-----+----+-----+-----+ |Brown| 38 | 50 | 25 | 15 | +-----+-----+----+-----+-----+ | Red| 10 | 10 | 7 | 7 | +-----+-----+----+-----+-----+ |Blond| 3 | 30 | 5 | 8 | +-----+-----+----+-----+-----+ > print.char.matrix(HairEyeColor[ , , "Female"], col.txt.align = "left", col.names = TRUE) +-----+-----+----+-----+-----+ | Hair|Brown|Blue|Hazel|Green| +-----+-----+----+-----+-----+ |Black| 36 | 9 | 5 | 2 | +-----+-----+----+-----+-----+ |Brown| 81 | 34 | 29 | 14 | +-----+-----+----+-----+-----+ |Red | 16 | 7 | 7 | 7 | +-----+-----+----+-----+-----+ |Blond| 4 | 64 | 5 | 8 | +-----+-----+----+-----+-----+ > > z <- rbind(c("", "N", "y"), + c("[ 1.34,40.3)\n[40.30,48.5)\n[48.49,58.4)\n[58.44,87.8]", + " 50\n 50\n 50\n 50", + "0.530\n0.489\n0.514\n0.507"), + c("female\nmale", " 94\n106", "0.552\n0.473" ), + c("", "200", "0.510")) > dimnames(z) <- list(c("", "age", "sex", "Overall"),NULL) > > print.char.matrix(z) +-------+------------+---+-----+ | | |N |y | +-------+------------+---+-----+ |age |[ 1.34,40.3)| 50|0.530| | |[40.30,48.5)| 50|0.489| | |[48.49,58.4)| 50|0.514| | |[58.44,87.8]| 50|0.507| +-------+------------+---+-----+ |sex |female | 94|0.552| | |male |106|0.473| +-------+------------+---+-----+ |Overall| |200|0.510| +-------+------------+---+-----+ > > > > cleanEx(); ..nameEx <- "prnz" > > ### * prnz > > flush(stderr()); flush(stdout()) > > ### Name: prnz > ### Title: Print and Object with its Name > ### Aliases: prn > ### Keywords: print > > ### ** Examples > > x <- 1:5 > prn(x) x [1] 1 2 3 4 5 > # prn(fit, 'Full Model Fit') > > > > cleanEx(); ..nameEx <- "ps.slide" > > ### * ps.slide > > flush(stderr()); flush(stdout()) > > ### Name: ps.slide > ### Title: Postscript and Adobe PDF Setup for 35mm Slides and Other Formats > ### Aliases: ps.slide setps setpdf topdf tex showPsfrag > ### Keywords: hplot device > > ### ** Examples > > ## Not run: > ##D ps.slide("myslide") # myslide is file name prefix > ##D # use ps.slide("myslide",back="green") to use e.g. green background > ##D plot(x, y) > ##D title("My Title") > ##D > ##D ps.slide(view=TRUE) # makes myslide.ps file > ##D # use ps.slide(close=TRUE) to close file without viewing with > ##D # ghostview. > ##D ps.slide(view=TRUE, pcx=TRUE) > ##D # converts myslide.ps into myslide.pcx (PC Paintbrush > ##D # format suitable for importing in PC graphics packages) > ##D mgp.axis.labels(c(.4,1.2)) # override 2nd mgp parameters for x- and y axes > ##D mgp.axis.labels(type='x') # retrieve 3 mgp parameters for x-axis > ##D > ##D setps(myfile) # equiv. to setps('myfile', type='char') > ##D # setps(myfile, trellis=TRUE, other args) for Trellis > ##D # plotting commands > ##D dev.off() > ##D topdf() # topdf created by setps > ##D # makes Ghostscript create "myfile.pdf" > ##D setpdf(myfile) > ##D # plotting commands > ##D dev.off() > ##D > ##D # Put math and Greek symbols in a graph > ##D setps(test) > ##D x <- seq(0,15,length=100) > ##D plot(x, dchisq(x, 5), xlab=tex('$x$'), > ##D ylab=tex('$f(x)$'), type='l') > ##D title(tex('Density Function of the $\chi_{5}^{2}$ Distribution')) > ##D dev.off() > ##D # To process this file in LaTeX do something like > ##D #\documentclass{article} > ##D #\usepackage[scanall]{psfrag} > ##D #\begin{document} > ##D #\begin{figure} > ##D #\includegraphics{test.ps} > ##D #\caption{This is an example} > ##D #\end{figure} > ##D #\end{document} > ## End(Not run) > > > > cleanEx(); ..nameEx <- "pstamp" > > ### * pstamp > > flush(stderr()); flush(stdout()) > > ### Name: pstamp > ### Title: Date/Time/Directory Stamp the Current Plot > ### Aliases: pstamp > ### Keywords: aplot > > ### ** Examples > > plot(1:20) > pstamp(pwd=TRUE, time=FALSE) > > > > cleanEx(); ..nameEx <- "rMultinom" > > ### * rMultinom > > flush(stderr()); flush(stdout()) > > ### Name: rMultinom > ### Title: Generate Multinomial Random Variables with Varying Probabilities > ### Aliases: rMultinom > ### Keywords: distribution > > ### ** Examples > > set.seed(1) > w <- rMultinom(rbind(c(.1,.2,.3,.4),c(.4,.3,.2,.1)),200) > t(apply(w, 1, table)/200) 1 2 3 4 [1,] 0.080 0.205 0.315 0.40 [2,] 0.445 0.280 0.185 0.09 > > > > cleanEx(); ..nameEx <- "rcorr" > > ### * rcorr > > flush(stderr()); flush(stdout()) > > ### Name: rcorr > ### Title: Matrix of Correlations and Generalized Spearman Rank Correlation > ### Aliases: rcorr print.rcorr spearman2 spearman2.default > ### spearman2.formula print.spearman2.formula plot.spearman2.formula > ### spearman spearman.test > ### Keywords: nonparametric htest > > ### ** Examples > > x <- c(-2, -1, 0, 1, 2) > y <- c(4, 1, 0, 1, 4) > z <- c(1, 2, 3, 4, NA) > v <- c(1, 2, 3, 4, 5) > rcorr(cbind(x,y,z,v)) x y z v x 1 0.00 1.00 1 y 0 1.00 -0.75 0 z 1 -0.75 1.00 1 v 1 0.00 1.00 1 n x y z v x 5 5 4 5 y 5 5 4 5 z 4 4 4 4 v 5 5 4 5 P x y z v x 1.000 0.000 0.000 y 1.000 0.255 1.000 z 0.000 0.255 0.000 v 0.000 1.000 0.000 > > spearman2(x, y) rho2 F df1 df2 P 0.000 0.000 1.000 3.000 1.000 n Adjusted rho2 5.000 -0.333 > plot(spearman2(z ~ x + y + v, p=2)) > > > > cleanEx(); ..nameEx <- "rcorr.cens" > > ### * rcorr.cens > > flush(stderr()); flush(stdout()) > > ### Name: rcorr.cens > ### Title: Rank Correlation for Censored Data > ### Aliases: rcorr.cens > ### Keywords: survival > > ### ** Examples > > set.seed(1) > x <- round(rnorm(200)) > y <- rnorm(200) > rcorr.cens(x, y, outx=TRUE) # can correlate non-censored variables C Index Dxy S.D. n missing 4.76e-01 -4.74e-02 6.49e-02 2.00e+02 0.00e+00 uncensored Relevant Pairs Concordant Uncertain 2.00e+02 2.83e+04 1.35e+04 0.00e+00 > if(.R.) library(survival) Loading required package: splines Attaching package: 'survival' The following object(s) are masked from package:Hmisc : untangle.specials > age <- rnorm(400, 50, 10) > d.time <- rexp(400) > cens <- runif(400,.5,2) > death <- d.time <= cens > d.time <- pmin(d.time, cens) > rcorr.cens(age, Surv(d.time, death)) C Index Dxy S.D. n missing 4.98e-01 -4.14e-03 3.94e-02 4.00e+02 0.00e+00 uncensored Relevant Pairs Concordant Uncertain 2.67e+02 1.33e+05 6.61e+04 2.68e+04 > > > > cleanEx(); ..nameEx <- "rcorrp.cens" > > ### * rcorrp.cens > > flush(stderr()); flush(stdout()) > > ### Name: rcorrp.cens > ### Title: Rank Correlation for Paired Predictors with a Censored Response > ### Aliases: rcorrp.cens > ### Keywords: survival nonparametric > > ### ** Examples > > set.seed(1) > if(.R.) library(survival) Loading required package: splines Attaching package: 'survival' The following object(s) are masked from package:Hmisc : untangle.specials > x1 <- rnorm(400) > x2 <- x1 + rnorm(400) > d.time <- rexp(400) + (x1 - min(x1)) > cens <- runif(400,.5,2) > death <- d.time <= cens > d.time <- pmin(d.time, cens) > rcorrp.cens(x1, x2, Surv(d.time, death)) Dxy S.D. x1 more concordant x2 more concordant -8.21e-02 1.37e-01 4.59e-01 5.41e-01 n missing uncensored Relevant Pairs 4.00e+02 0.00e+00 1.10e+01 4.26e+03 Uncertain C X1 C X2 Dxy X1 1.55e+05 9.92e-01 9.26e-01 9.84e-01 Dxy X2 8.52e-01 > # rcorrp.cens(x1, x2, y) ## no censoring > > > > cleanEx(); ..nameEx <- "rcspline.eval" > > ### * rcspline.eval > > flush(stderr()); flush(stdout()) > > ### Name: rcspline.eval > ### Title: Restricted Cubic Spline Design Matrix > ### Aliases: rcspline.eval > ### Keywords: regression smooth > > ### ** Examples > > x <- 1:100 > rcspline.eval(x, nk=4, inclx=TRUE) x [1,] 1 0.00e+00 0.00e+00 [2,] 2 0.00e+00 0.00e+00 [3,] 3 0.00e+00 0.00e+00 [4,] 4 0.00e+00 0.00e+00 [5,] 5 0.00e+00 0.00e+00 [6,] 6 1.57e-08 0.00e+00 [7,] 7 1.46e-04 0.00e+00 [8,] 8 1.09e-03 0.00e+00 [9,] 9 3.57e-03 0.00e+00 [10,] 10 8.37e-03 0.00e+00 [11,] 11 1.62e-02 0.00e+00 [12,] 12 2.79e-02 0.00e+00 [13,] 13 4.41e-02 0.00e+00 [14,] 14 6.57e-02 0.00e+00 [15,] 15 9.34e-02 0.00e+00 [16,] 16 1.28e-01 0.00e+00 [17,] 17 1.70e-01 0.00e+00 [18,] 18 2.20e-01 0.00e+00 [19,] 19 2.80e-01 0.00e+00 [20,] 20 3.49e-01 0.00e+00 [21,] 21 4.29e-01 0.00e+00 [22,] 22 5.21e-01 0.00e+00 [23,] 23 6.24e-01 0.00e+00 [24,] 24 7.41e-01 0.00e+00 [25,] 25 8.71e-01 0.00e+00 [26,] 26 1.02e+00 0.00e+00 [27,] 27 1.17e+00 0.00e+00 [28,] 28 1.35e+00 0.00e+00 [29,] 29 1.54e+00 0.00e+00 [30,] 30 1.75e+00 0.00e+00 [31,] 31 1.98e+00 0.00e+00 [32,] 32 2.23e+00 0.00e+00 [33,] 33 2.49e+00 0.00e+00 [34,] 34 2.78e+00 0.00e+00 [35,] 35 3.09e+00 0.00e+00 [36,] 36 3.42e+00 5.40e-06 [37,] 37 3.77e+00 3.10e-04 [38,] 38 4.15e+00 1.63e-03 [39,] 39 4.55e+00 4.74e-03 [40,] 40 4.97e+00 1.04e-02 [41,] 41 5.42e+00 1.93e-02 [42,] 42 5.90e+00 3.23e-02 [43,] 43 6.41e+00 5.00e-02 [44,] 44 6.94e+00 7.33e-02 [45,] 45 7.50e+00 1.03e-01 [46,] 46 8.09e+00 1.40e-01 [47,] 47 8.71e+00 1.84e-01 [48,] 48 9.37e+00 2.37e-01 [49,] 49 1.00e+01 3.00e-01 [50,] 50 1.08e+01 3.72e-01 [51,] 51 1.15e+01 4.56e-01 [52,] 52 1.23e+01 5.51e-01 [53,] 53 1.31e+01 6.58e-01 [54,] 54 1.40e+01 7.78e-01 [55,] 55 1.49e+01 9.13e-01 [56,] 56 1.58e+01 1.06e+00 [57,] 57 1.68e+01 1.23e+00 [58,] 58 1.78e+01 1.41e+00 [59,] 59 1.88e+01 1.60e+00 [60,] 60 1.99e+01 1.82e+00 [61,] 61 2.10e+01 2.05e+00 [62,] 62 2.22e+01 2.30e+00 [63,] 63 2.34e+01 2.58e+00 [64,] 64 2.46e+01 2.87e+00 [65,] 65 2.59e+01 3.18e+00 [66,] 66 2.73e+01 3.52e+00 [67,] 67 2.87e+01 3.88e+00 [68,] 68 3.01e+01 4.26e+00 [69,] 69 3.16e+01 4.66e+00 [70,] 70 3.31e+01 5.08e+00 [71,] 71 3.46e+01 5.52e+00 [72,] 72 3.62e+01 5.98e+00 [73,] 73 3.78e+01 6.45e+00 [74,] 74 3.94e+01 6.94e+00 [75,] 75 4.11e+01 7.45e+00 [76,] 76 4.28e+01 7.97e+00 [77,] 77 4.46e+01 8.51e+00 [78,] 78 4.63e+01 9.06e+00 [79,] 79 4.81e+01 9.62e+00 [80,] 80 5.00e+01 1.02e+01 [81,] 81 5.18e+01 1.08e+01 [82,] 82 5.37e+01 1.14e+01 [83,] 83 5.55e+01 1.20e+01 [84,] 84 5.74e+01 1.26e+01 [85,] 85 5.94e+01 1.32e+01 [86,] 86 6.13e+01 1.39e+01 [87,] 87 6.32e+01 1.45e+01 [88,] 88 6.52e+01 1.51e+01 [89,] 89 6.72e+01 1.58e+01 [90,] 90 6.91e+01 1.64e+01 [91,] 91 7.11e+01 1.71e+01 [92,] 92 7.31e+01 1.78e+01 [93,] 93 7.51e+01 1.84e+01 [94,] 94 7.71e+01 1.91e+01 [95,] 95 7.91e+01 1.98e+01 [96,] 96 8.11e+01 2.04e+01 [97,] 97 8.31e+01 2.11e+01 [98,] 98 8.51e+01 2.18e+01 [99,] 99 8.71e+01 2.24e+01 [100,] 100 8.91e+01 2.31e+01 attr(,"knots") [1] 5.95 35.65 65.35 95.05 > #lrm.fit(rcspline.eval(age,nk=4,inclx=TRUE), death) > > > > cleanEx(); ..nameEx <- "rcspline.plot" > > ### * rcspline.plot > > flush(stderr()); flush(stdout()) > > ### Name: rcspline.plot > ### Title: Plot Restricted Cubic Spline Function > ### Aliases: rcspline.plot > ### Keywords: regression models > > ### ** Examples > > # rcspline.plot(cad.dur, tvdlm, m=150) > # rcspline.plot(log10(cad.dur+1), tvdlm, m=150) > > > > cleanEx(); ..nameEx <- "rcspline.restate" > > ### * rcspline.restate > > flush(stderr()); flush(stdout()) > > ### Name: rcspline.restate > ### Title: Re-state Restricted Cubic Spline Function > ### Aliases: rcspline.restate > ### Keywords: regression interface character > > ### ** Examples > > set.seed(1) > x <- 1:100 > y <- x + rnorm(100, 0, 5) > xx <- rcspline.eval(x, inclx=TRUE, nk=4) > knots <- attr(xx, "knots") > coef <- lsfit(xx, y)$coef > options(digits=4) > # rcspline.restate must ignore intercept > w <- rcspline.restate(knots, coef[-1], x="{\\rm BP}") > # could also have used coef instead of coef[-1], to include intercept > cat(attr(w,"latex"), sep="\n") & & 0.97492033{\rm BP}+1.3543140\!\times\!10^{-5}({\rm BP}-5.95)_{+}^{3} \ & & -4.4634511\!\times\!10^{-5}({\rm BP}-35.65)_{+}^{3}+4.8639601\!\times\!10^{-5}({\rm BP}-65.35)_{+}^{3} \ & & -1.7548230\!\times\!10^{-5}({\rm BP}-95.05)_{+}^{3} \ > > xtrans <- eval(attr(w, "function")) > # This is an S function of a single argument > plot(x, xtrans(x), type="l") > # Plots fitted transformation > > #x <- blood.pressure > xx.simple <- cbind(x, pmax(x-knots[1],0)^3, pmax(x-knots[2],0)^3, + pmax(x-knots[3],0)^3, pmax(x-knots[4],0)^3) > pred.value <- coef[1] + xx.simple %*% w > plot(x, pred.value, type='l') # same as above > > > > cleanEx(); ..nameEx <- "reShape" > > ### * reShape > > flush(stderr()); flush(stdout()) > > ### Name: reShape > ### Title: Reshape Matrices and Serial Data > ### Aliases: reShape > ### Keywords: manip array > > ### ** Examples > > if(.R.) { + set.seed(1) + Solder <- factor(sample(c('Thin','Thick'),200,TRUE),c('Thin','Thick')) + Opening <- factor(sample(c('S','M','L'), 200,TRUE),c('S','M','L')) + } else attach(solder[solder$skips > 10, ]) > tab <- table(Opening, Solder) > tab Solder Opening Thin Thick S 44 34 M 30 35 L 24 33 > reShape(tab) $Opening [1] "S" "M" "L" "S" "M" "L" $Solder [1] "Thin" "Thin" "Thin" "Thick" "Thick" "Thick" $tab [1] 44 30 24 34 35 33 > # attach(tab) # do further processing > > if(!.R.) { + g <- crosstabs( ~ Solder + Opening, data = solder, subset = skips > 10) + rowpct <- 100*attr(g,'marginals')$"N/RowTotal" # compute row pcts + rowpct + + r <- reShape(rowpct) + # note names "Solder" and "Opening" came originally from formula + # given to crosstabs + r + dotplot(Solder ~ rowpct, groups=Opening, panel=panel.superpose, data=r) + } > > # An example where a matrix is created from irregular vectors > follow <- data.frame(id=c('a','a','b','b','b','d'), + month=c(1, 2, 1, 2, 3, 2), + cholesterol=c(225,226, 320,319,318, 270)) > follow id month cholesterol 1 a 1 225 2 a 2 226 3 b 1 320 4 b 2 319 5 b 3 318 6 d 2 270 > attach(follow) > reShape(cholesterol, id=id, colvar=month) 1 2 3 a 225 226 NA b 320 319 318 d NA 270 NA > detach('follow') > # Could have done : > # reShape(cholesterol, triglyceride=trig, id=id, colvar=month) > > # Create a data frame, reshaping a long dataset in which groups are > # formed not just by subject id but by combinations of subject id and > # visit number. Also carry forward a variable that is supposed to be > # constant within subject-visit number combinations. In this example, > # it is not constant, so an arbitrary visit number will be selected. > w <- data.frame(id=c('a','a','a','a','b','b','b','d','d','d'), + visit=c( 1, 1, 2, 2, 1, 1, 2, 2, 2, 2), + k=c('A','A','B','B','C','C','D','E','F','G'), + var=c('x','y','x','y','x','y','y','x','y','z'), + val=1:10) > with(w, + reShape(val, id=data.frame(id,visit), + constant=data.frame(k), colvar=var)) id visit k x y z 1 a 1 A 1 2 NA 3 a 2 B 3 4 NA 5 b 1 C 5 6 NA 7 b 2 D NA 7 NA 8 d 2 E 8 9 10 > > # Get predictions from a regression model for 2 systematically > # varying predictors. Convert the predictions into a matrix, with > # rows corresponding to the predictor having the most values, and > # columns corresponding to the other predictor > # d <- expand.grid(x2=0:1, x1=1:100) > # pred <- predict(fit, d) > # reShape(pred, id=d$x1, colvar=d$x2) # makes 100 x 2 matrix > > # Reshape a wide data frame containing multiple variables representing > # repeated measurements (3 repeats on 2 variables; 4 subjects) > set.seed(33) > n <- 4 > w <- data.frame(age=rnorm(n, 40, 10), + sex=sample(c('female','male'), n,TRUE), + sbp1=rnorm(n, 120, 15), + sbp2=rnorm(n, 120, 15), + sbp3=rnorm(n, 120, 15), + dbp1=rnorm(n, 80, 15), + dbp2=rnorm(n, 80, 15), + dbp3=rnorm(n, 80, 15), row.names=letters[1:n]) > options(digits=3) > w age sex sbp1 sbp2 sbp3 dbp1 dbp2 dbp3 a 38.6 female 109 123 131 81.8 90.9 88.9 b 39.6 female 132 120 120 71.1 86.9 110.0 c 50.1 male 131 148 118 73.4 82.8 52.4 d 38.4 female 104 124 125 84.4 83.5 67.1 > > u <- reShape(w, base=c('sbp','dbp'), reps=3) > u seqno age sex sbp dbp a 1 1 38.6 female 109 81.8 a 2 2 38.6 female 123 90.9 a 3 3 38.6 female 131 88.9 b 1 1 39.6 female 132 71.1 b 2 2 39.6 female 120 86.9 b 3 3 39.6 female 120 110.0 c 1 1 50.1 male 131 73.4 c 2 2 50.1 male 148 82.8 c 3 3 50.1 male 118 52.4 d 1 1 38.4 female 104 84.4 d 2 2 38.4 female 124 83.5 d 3 3 38.4 female 125 67.1 > reShape(w, base=c('sbp','dbp'), reps=3, timevar='week', times=c(0,3,12)) week age sex sbp dbp a 1 0 38.6 female 109 81.8 a 2 3 38.6 female 123 90.9 a 3 12 38.6 female 131 88.9 b 1 0 39.6 female 132 71.1 b 2 3 39.6 female 120 86.9 b 3 12 39.6 female 120 110.0 c 1 0 50.1 male 131 73.4 c 2 3 50.1 male 148 82.8 c 3 12 50.1 male 118 52.4 d 1 0 38.4 female 104 84.4 d 2 3 38.4 female 124 83.5 d 3 12 38.4 female 125 67.1 > > > > cleanEx(); ..nameEx <- "reorder.factor" > > ### * reorder.factor > > flush(stderr()); flush(stdout()) > > ### Name: reorder.factor > ### Title: Reorder Factor Levels > ### Aliases: reorder.factor > ### Keywords: manip > > ### ** Examples > > x <- factor(c('a','b','b','c')) > v <- c(3,-1,1,-5) > w <- reorder.factor(x, v) # uses FUN=mean > w [1] a b b c Levels: c < b < a > levels(w) [1] "c" "b" "a" > class(w) [1] "ordered" "factor" > > > > cleanEx(); ..nameEx <- "rm.boot" > > ### * rm.boot > > flush(stderr()); flush(stdout()) > > ### Name: rm.boot > ### Title: Bootstrap Repeated Measurements Model > ### Aliases: rm.boot plot.rm.boot > ### Keywords: regression multivariate htest hplot > > ### ** Examples > > # Generate multivariate normal responses with equal correlations (.7) > # within subjects and no correlation between subjects > # Simulate realizations from a piecewise linear population time-response > # profile with large subject effects, and fit using a 6-knot spline > # Estimate the correlation structure from the residuals, as a function > # of the absolute time difference > > # Function to generate n p-variate normal variates with mean vector u and > # covariance matrix S > # Slight modification of function written by Bill Venables > # See also the built-in function rmvnorm > mvrnorm <- function(n, p = 1, u = rep(0, p), S = diag(p)) { + Z <- matrix(rnorm(n * p), p, n) + t(u + t(chol(S)) %*% Z) + } > > n <- 20 # Number of subjects > sub <- .5*(1:n) # Subject effects > > # Specify functional form for time trend and compute non-stochastic component > times <- seq(0, 1, by=.1) > g <- function(times) 5*pmax(abs(times-.5),.3) > ey <- g(times) > > # Generate multivariate normal errors for 20 subjects at 11 times > # Assume equal correlations of rho=.7, independent subjects > > nt <- length(times) > rho <- .7 > > > set.seed(19) > errors <- mvrnorm(n, p=nt, S=diag(rep(1-rho,nt))+rho) > # Note: first random number seed used gave rise to mean(errors)=0.24! > > # Add E[Y], error components, and subject effects > y <- matrix(rep(ey,n), ncol=nt, byrow=TRUE) + errors + + matrix(rep(sub,nt), ncol=nt) > > # String out data into long vectors for times, responses, and subject ID > y <- as.vector(t(y)) > times <- rep(times, n) > id <- sort(rep(1:n, nt)) > > # Show lowess estimates of time profiles for individual subjects > f <- rm.boot(times, y, id, plot.individual=TRUE, B=25, cor.pattern='estimate', + smoother=lowess, bootstrap.type='x fixed', nk=6) 10 20 Spearman rank correlation between 26 log likelihoods assuming independence and assuming dependence: 0.899 > # In practice use B=400 or 500 > # This will compute a dependent-structure log-likelihood in addition > # to one assuming independence. By default, the dep. structure > # objective will be used by the plot method (could have specified rho=.7) > # NOTE: Estimating the correlation pattern from the residual does not > # work in cases such as this one where there are large subject effects > > # Plot fits for a random sample of 10 of the 25 bootstrap fits > plot(f, individual.boot=TRUE, ncurves=10, ylim=c(6,8.5)) > > # Plot pointwise and simultaneous confidence regions > plot(f, pointwise.band=TRUE, col.pointwise=1, ylim=c(6,8.5)) > > # Plot population response curve at average subject effect > ts <- seq(0, 1, length=100) > lines(ts, g(ts)+mean(sub), lwd=3) > > ## Not run: > ##D # > ##D # Handle a 2-sample problem in which curves are fitted > ##D # separately for males and females and we wish to estimate the > ##D # difference in the time-response curves for the two sexes. > ##D # The objective criterion will be taken by plot.rm.boot as the > ##D # total of the two sums of squared errors for the two models > ##D # > ##D knots <- rcspline.eval(c(time.f,time.m), nk=6, knots.only=TRUE) > ##D # Use same knots for both sexes, and use a times vector that > ##D # uses a range of times that is included in the measurement > ##D # times for both sexes > ##D # > ##D tm <- seq(max(min(time.f),min(time.m)), > ##D min(max(time.f),max(time.m)),length=100) > ##D > ##D f.female <- rm.boot(time.f, bp.f, id.f, knots=knots, times=tm) > ##D f.male <- rm.boot(time.m, bp.m, id.m, knots=knots, times=tm) > ##D plot(f.female) > ##D plot(f.male) > ##D # The following plots female minus male response, with > ##D # a sequence of shaded confidence band for the difference > ##D plot(f.female,f.male,multi=TRUE) > ##D > ##D # Do 1000 simulated analyses to check simultaneous coverage > ##D # probability. Use a null regression model with Gaussian errors > ##D > ##D n.per.pt <- 30 > ##D n.pt <- 10 > ##D > ##D null.in.region <- 0 > ##D > ##D for(i in 1:1000) { > ##D y <- rnorm(n.pt*n.per.pt) > ##D time <- rep(1:n.per.pt, n.pt) > ##D # Add the following line and add ,id=id to rm.boot to use clustering > ##D # id <- sort(rep(1:n.pt, n.per.pt)) > ##D # Because we are ignoring patient id, this simulation is effectively > ##D # using 1 point from each of 300 patients, with times 1,2,3,,,30 > ##D > ##D f <- rm.boot(time, y, B=500, nk=5, bootstrap.type='x fixed') > ##D g <- plot(f, ylim=c(-1,1), pointwise=FALSE) > ##D null.in.region <- null.in.region + all(g$lower<=0 & g$upper>=0) > ##D prn(c(i=i,null.in.region=null.in.region)) > ##D } > ##D > ##D # Simulation Results: 905/1000 simultaneous confidence bands > ##D # fully contained the horizontal line at zero > ## End(Not run) > > > > cleanEx(); ..nameEx <- "samplesize.bin" > > ### * samplesize.bin > > flush(stderr()); flush(stdout()) > > ### Name: samplesize.bin > ### Title: Sample Size for 2-sample Binomial > ### Aliases: samplesize.bin > ### Keywords: htest category > > ### ** Examples > > alpha <- .05 > beta <- c(.70,.80,.90,.95) > > # N1 is a matrix of total sample sizes whose > # rows vary by hypothesized treatment success probability and > # columns vary by power > # See Meinert's book for formulae. > > N1 <- samplesize.bin(alpha, beta, pit=.55, pic=.5) > N1 <- rbind(N1, samplesize.bin(alpha, beta, pit=.60, pic=.5)) > N1 <- rbind(N1, samplesize.bin(alpha, beta, pit=.65, pic=.5)) > N1 <- rbind(N1, samplesize.bin(alpha, beta, pit=.70, pic=.5)) > attr(N1,"dimnames") <- NULL > > #Accounting for 5% noncompliance in the treated group > inflation <- (1/.95)**2 > print(round(N1*inflation+.5,0)) [,1] [,2] [,3] [,4] [1,] 2079 2732 3784 4782 [2,] 516 676 937 1184 [3,] 225 296 409 518 [4,] 125 163 225 284 > > > > cleanEx(); ..nameEx <- "sas.get" > > ### * sas.get > > flush(stderr()); flush(stdout()) > > ### Name: sas.get > ### Title: Convert a SAS Dataset to an S Data Frame > ### Aliases: sas.get is.special.miss [.special.miss print.special.miss > ### format.special.miss sas.codes code.levels timePOSIXt > ### Keywords: interface manip > > ### ** Examples > > ## Not run: > ##D sas.contents("saslib", "mice") > ##D # [1] "dose" "ld50" "strain" "lab_no" > ##D attr(, "n"): > ##D # [1] 117 > ##D mice <- sas.get("saslib", mem="mice", var=c("dose", "strain", "ld50")) > ##D plot(mice$dose, mice$ld50) > ##D > ##D nude.mice <- sas.get(lib=unix("echo $HOME/saslib"), mem="mice", > ##D ifs="if strain='nude'") > ##D > ##D nude.mice.dl <- sas.get(lib=unix("echo $HOME/saslib"), mem="mice", > ##D var=c("dose", "ld50"), ifs="if strain='nude'") > ##D > ##D # Get a dataset from current directory, recode PROC FORMAT; VALUE ... > ##D # variables into factors with labels of the form "good(1)" "better(2)", > ##D # get special missing values, recode missing codes .D and .R into new > ##D # factor levels "Don't know" and "Refused to answer" for variable q1 > ##D d <- sas.get(".", "mydata", recode=2, special.miss=TRUE) > ##D attach(d) > ##D nl <- length(levels(q1)) > ##D lev <- c(levels(q1), "Don't know", "Refused") > ##D q1.new <- as.integer(q1) > ##D q1.new[is.special.miss(q1,"D")] <- nl+1 > ##D q1.new[is.special.miss(q1,"R")] <- nl+2 > ##D q1.new <- factor(q1.new, 1:(nl+2), lev) > ##D # Note: would like to use factor() in place of as.integer ... but > ##D # factor in this case adds "NA" as a category level > ##D > ##D d <- sas.get(".", "mydata") > ##D sas.codes(d$x) # for PROC FORMATted variables returns original data codes > ##D d$x <- code.levels(d$x) # or attach(d); x <- code.levels(x) > ##D # This makes levels such as "good" "better" "best" into e.g. > ##D # "1:good" "2:better" "3:best", if the original SAS values were 1,2,3 > ##D > ##D # Retrieve the same variables from another dataset (or an update of > ##D # the original dataset) > ##D mydata2 <- sas.get('mydata2', var=names(d)) > ##D # This only works if none of the original SAS variable names contained _ > ##D mydata2 <- cleanup.import(mydata2) # will make true integer variables > ##D > ##D # Code from Don MacQueen to generate SAS dataset to test import of > ##D # date, time, date-time variables > ##D # data ssd.test; > ##D # d1='3mar2002'd ; > ##D # dt1='3mar2002 9:31:02'dt; > ##D # t1='11:13:45't; > ##D # output; > ##D # > ##D # d1='3jun2002'd ; > ##D # dt1='3jun2002 9:42:07'dt; > ##D # t1='11:14:13't; > ##D # output; > ##D # format d1 mmddyy10. dt1 datetime. t1 time.; > ##D # run; > ## End(Not run) > > > > cleanEx(); ..nameEx <- "sasxport.get" > > ### * sasxport.get > > flush(stderr()); flush(stdout()) > > ### Name: sasxport.get > ### Title: Enhanced Importing of SAS Transport Files using read.xport > ### Aliases: sasxport.get sasdsLabels > ### Keywords: interface manip > > ### ** Examples > > ## Not run: > ##D # SAS code to generate test dataset: > ##D # libname y SASV5XPT "test2.xpt"; > ##D # > ##D # PROC FORMAT; VALUE race 1=green 2=blue 3=purple; RUN; > ##D # PROC FORMAT CNTLOUT=format;RUN; * Name, e.g. 'format', unimportant; > ##D # data test; > ##D # LENGTH race 3 age 4; > ##D # age=30; label age="Age at Beginning of Study"; > ##D # race=2; > ##D # d1='3mar2002'd ; > ##D # dt1='3mar2002 9:31:02'dt; > ##D # t1='11:13:45't; > ##D # output; > ##D # > ##D # age=31; > ##D # race=4; > ##D # d1='3jun2002'd ; > ##D # dt1='3jun2002 9:42:07'dt; > ##D # t1='11:14:13't; > ##D # output; > ##D # format d1 mmddyy10. dt1 datetime. t1 time. race race.; > ##D # run; > ##D # data z; LENGTH x3 3 x4 4 x5 5 x6 6 x7 7 x8 8; > ##D # DO i=1 TO 100; > ##D # x3=ranuni(3); > ##D # x4=ranuni(5); > ##D # x5=ranuni(7); > ##D # x6=ranuni(9); > ##D # x7=ranuni(11); > ##D # x8=ranuni(13); > ##D # output; > ##D # END; > ##D # DROP i; > ##D # RUN; > ##D # PROC MEANS; RUN; > ##D # PROC COPY IN=work OUT=y;SELECT test format z;RUN; *Creates test2.xpt; > ##D w <- sasxport.get('test2.xpt') > ##D # To use an existing copy of test2.xpt available on the web: > ##D w <- sasxport.get('http://hesweb1.med.virginia.edu/biostat/s/data/sas/test2.xpt') > ##D > ##D describe(w$test) # see labels, format names for dataset test > ##D # Note: if only one dataset (other than format) had been exported, > ##D # just do describe(w) as sasxport.get would not create a list for that > ##D lapply(w, describe)# see descriptive stats for both datasets > ##D contents(w$test) # another way to see variable attributes > ##D lapply(w, contents)# show contents of both datasets > ##D options(digits=7) # compare the following matrix with PROC MEANS output > ##D t(sapply(w$z, function(x) > ##D c(Mean=mean(x),SD=sqrt(var(x)),Min=min(x),Max=max(x)))) > ## End(Not run) > > > > cleanEx(); ..nameEx <- "scat1d" > > ### * scat1d > > flush(stderr()); flush(stdout()) > > ### Name: scat1d > ### Title: One-Dimensional Scatter Diagram, Spike Histogram, or Density > ### Aliases: scat1d jitter2 jitter2.default jitter2.data.frame datadensity > ### datadensity.data.frame histSpike > ### Keywords: dplot aplot hplot distribution > > ### ** Examples > > plot(x <- rnorm(50), y <- 3*x + rnorm(50)/2 ) > scat1d(x) # density bars on top of graph > scat1d(y, 4) # density bars at right > histSpike(x, add=TRUE) # histogram instead, 100 bins > histSpike(y, 4, add=TRUE) > histSpike(x, type='density', add=TRUE) # smooth density at bottom > histSpike(y, 4, type='density', add=TRUE) > > smooth <- lowess(x, y) # add nonparametric regression curve > lines(smooth) # Note: plsmo() does this > scat1d(x, y=approx(smooth, xout=x)$y) # data density on curve > scat1d(x, curve=smooth) # same effect as previous command > histSpike(x, curve=smooth, add=TRUE) # same as previous but with histogram > histSpike(x, curve=smooth, type='density', add=TRUE) > # same but smooth density over curve > > plot(x <- rnorm(250), y <- 3*x + rnorm(250)/2) > scat1d(x, tfrac=0) # dots randomly spaced from axis > scat1d(y, 4, frac=-.03) # bars outside axis > scat1d(y, 2, tfrac=.2) # same bars with smaller random fraction > > x <- c(0:3,rep(4,3),5,rep(7,10),9) > plot(x, jitter2(x)) # original versus jittered values > abline(0,1) # unique values unjittered on abline > points(x+0.1, jitter2(x, limit=FALSE), col=2) > # allow locally maximum jittering > points(x+0.2, jitter2(x, fill=1), col=3); abline(h=seq(0.5,9,1), lty=2) > # fill 3/3 instead of 1/3 > x <- rnorm(200,0,2)+1; y <- x^2 > x2 <- round((x+rnorm(200))/2)*2 > x3 <- round((x+rnorm(200))/4)*4 > dfram <- data.frame(y,x,x2,x3) > plot(dfram$x2, dfram$y) # jitter2 via scat1d > scat1d(dfram$x2, y=dfram$y, preserve=TRUE, col=2) > scat1d(dfram$x2, preserve=TRUE, frac=-0.02, col=2) > scat1d(dfram$y, 4, preserve=TRUE, frac=-0.02, col=2) > > pairs(jitter2(dfram)) # pairs for jittered data.frame > # This gets reasonable pairwise scatter plots for all combinations of > # variables where > # > # - continuous variables (with unique values) are not jittered at all, thus > # all relations between continuous variables are shown as they are, > # extreme values have exact positions. > # > # - discrete variables get a reasonable amount of jittering, whether they > # have 2, 3, 5, 10, 20 ... levels > # > # - different from adding noise, jitter2() will use the available space > # optimally and no value will randomly mask another > # > # If you want a scatterplot with lowess smooths on the *exact* values and > # the point clouds shown jittered, you just need > # > pairs( dfram ,panel=function(x,y) { points(jitter2(x),jitter2(y)) + lines(lowess(x,y)) } ) > > > > datadensity(dfram) # graphical snapshot of entire data frame > datadensity(dfram, group=cut2(dfram$x2,g=3)) > # stratify points and frequencies by > # x2 tertiles and use 3 colors > > # datadensity.data.frame(split(x, grouping.variable)) > # need to explicitly invoke datadensity.data.frame when the > # first argument is a list > > > > cleanEx(); ..nameEx <- "score.binary" > > ### * score.binary > > flush(stderr()); flush(stdout()) > > ### Name: score.binary > ### Title: Score a Series of Binary Variables > ### Aliases: score.binary > ### Keywords: manip > > ### ** Examples > > set.seed(1) > age <- rnorm(25, 70, 15) > previous.disease <- sample(0:1, 25, TRUE) > #Hierarchical scale, highest of 1:age>70 2:previous.disease > score.binary(age>70, previous.disease, retfactor=FALSE) [1] 0 2 0 1 1 0 1 2 2 0 2 1 0 0 2 0 0 2 1 2 1 2 1 0 1 > #Same as above but return factor variable with levels "none" "age>70" > # "previous.disease" > score.binary(age>70, previous.disease) [1] none previous.disease none age > 70 [5] age > 70 none age > 70 previous.disease [9] previous.disease none previous.disease age > 70 [13] none none previous.disease none [17] none previous.disease age > 70 previous.disease [21] age > 70 previous.disease age > 70 none [25] age > 70 Levels: none age > 70 previous.disease > > #Additive scale with weights 1:age>70 2:previous.disease > score.binary(age>70, previous.disease, fun=sum) [1] 0 3 0 1 1 0 1 3 3 0 3 1 0 0 3 0 0 3 1 3 1 3 1 0 1 > #Additive scale, equal weights > score.binary(age>70, previous.disease, fun=sum, points=c(1,1)) [1] 0 2 0 1 1 0 1 2 2 0 2 1 0 0 2 0 0 2 1 2 1 2 1 0 1 > #Same as saying points=1 > > #Union of variables, to create a new binary variable > score.binary(age>70, previous.disease, fun=any) [1] FALSE TRUE FALSE TRUE TRUE FALSE TRUE TRUE TRUE FALSE TRUE TRUE [13] FALSE FALSE TRUE FALSE FALSE TRUE TRUE TRUE TRUE TRUE TRUE FALSE [25] TRUE > > > > cleanEx(); ..nameEx <- "sedit" > > ### * sedit > > flush(stderr()); flush(stdout()) > > ### Name: sedit > ### Title: Character String Editing and Miscellaneous Character Handling > ### Functions > ### Aliases: sedit substring.location substring2 substring2<- > ### replace.substring.wild numeric.string all.digits > ### Keywords: manip character > > ### ** Examples > > x <- 'this string' > substring2(x, 3, 4) <- 'IS' > x [1] "thIS string" > substring2(x, 7) <- '' > x [1] "thIS s" > > substring.location('abcdefgabc', 'ab') $first [1] 1 8 $last [1] 2 9 > substring.location('abcdefgabc', 'ab', restrict=c(3,999)) $first [1] 8 $last [1] 9 > > replace.substring.wild('this is a cat','this*cat','that*dog') [1] "that is a dog" > replace.substring.wild('there is a cat','is a*', 'is not a*') [1] "there is not a cat" > replace.substring.wild('this is a cat','is a*', 'Z') [1] "this Z" > > qualify <- function(x) x==' 1.5 ' | x==' 2.5 ' > replace.substring.wild('He won 1.5 million $','won*million', + 'lost*million', test=qualify) [1] "He lost 1.5 million $" > replace.substring.wild('He won 1 million $','won*million', + 'lost*million', test=qualify) [1] "He won 1 million $" > replace.substring.wild('He won 1.2 million $','won*million', + 'lost*million', test=numeric.string) [1] "He lost 1.2 million $" > > x <- c('a = b','c < d','hello') > sedit(x, c('=','he*o'),c('==','he*')) [1] "a == b" "c < d" "hell" > > sedit('x23', '*$', '[*]', test=numeric.string) [1] "x[23]" > sedit('23xx', '^*', 'Y_{*} ', test=all.digits) [1] "Y_{23} xx" > > replace.substring.wild("abcdefabcdef", "d*f", "xy") [1] "abcxy" > > x <- "abcd" > substring2(x, "bc") <- "BCX" > x [1] "aBCXd" > substring2(x, "B*d") <- "B*D" > x [1] "aBCXD" > > > > cleanEx(); ..nameEx <- "show.pch" > > ### * show.pch > > flush(stderr()); flush(stdout()) > > ### Name: show.pch > ### Title: Display Colors, Plotting Symbols, and Symbol Numeric Equivalents > ### Aliases: show.pch show.col character.table > ### Keywords: aplot > > ### ** Examples > > ## Not run: > ##D show.pch() > ##D show.col() > ##D character.table() > ## End(Not run) > > > cleanEx(); ..nameEx <- "smean.sd" > > ### * smean.sd > > flush(stderr()); flush(stdout()) > > ### Name: smean.sd > ### Title: Compute Summary Statistics on a Vector > ### Aliases: smean.cl.normal smean.sd smean.sdl smean.cl.boot smedian.hilow > ### Keywords: nonparametric htest > > ### ** Examples > > set.seed(1) > x <- rnorm(100) > smean.sd(x) Mean SD 0.109 0.898 > smean.sdl(x) Mean Lower Upper 0.109 -1.688 1.905 > smean.cl.normal(x) Mean Lower Upper 0.1089 -0.0693 0.2871 > smean.cl.boot(x) Mean Lower Upper 0.109 -0.062 0.270 > smedian.hilow(x, conf.int=.5) # 25th and 75th percentiles Median Lower Upper 0.114 -0.494 0.692 > > > > cleanEx(); ..nameEx <- "somers2" > > ### * somers2 > > flush(stderr()); flush(stdout()) > > ### Name: somers2 > ### Title: Somers' Dxy Rank Correlation > ### Aliases: somers2 > ### Keywords: nonparametric > > ### ** Examples > > set.seed(1) > predicted <- runif(200) > dead <- sample(0:1, 200, TRUE) > roc.area <- somers2(predicted, dead)["C"] > > > > cleanEx(); ..nameEx <- "spower" > > ### * spower > > flush(stderr()); flush(stdout()) > > ### Name: spower > ### Title: Simulate Power of 2-Sample Test for Survival under Complex > ### Conditions > ### Aliases: spower Quantile2 print.Quantile2 plot.Quantile2 logrank > ### Gompertz2 Lognorm2 Weibull2 > ### Keywords: htest survival > > ### ** Examples > > # Simulate a simple 2-arm clinical trial with exponential survival so > # we can compare power simulations of logrank-Cox test with cpower() > # Hazard ratio is constant and patients enter the study uniformly > # with follow-up ranging from 1 to 3 years > # Drop-in probability is constant at .1 and drop-out probability is > # constant at .175. Two-year survival of control patients in absence > # of drop-in is .8 (mortality=.2). Note that hazard rate is -log(.8)/2 > # Total sample size (both groups combined) is 1000 > # % mortality reduction by intervention (if no dropin or dropout) is 25 > # This corresponds to a hazard ratio of 0.7283 (computed by cpower) > > cpower(2, 1000, .2, 25, accrual=2, tmin=1, + noncomp.c=10, noncomp.i=17.5) Accrual duration: 2 y Minimum follow-up: 1 y Total sample size: 1000 Alpha= 0.05 2-year Mortalities Control Intervention 0.20 0.15 Hazard Rates Control Intervention 0.1116 0.0813 Probabilities of an Event During Study Control Intervention 0.198 0.149 Expected Number of Events Control Intervention 99.2 74.5 Hazard ratio: 0.728 Drop-in rate (controls):10% Non-adherence rate (intervention):17.5% Effective hazard ratio with non-compliance: 0.795 Standard deviation of log hazard ratio: 0.153 Power 0.323 > > ranfun <- Quantile2(function(x)exp(log(.8)/2*x), + hratio=function(x)0.7283156, + dropin=function(x).1, + dropout=function(x).175) Interval of time for evaluating functions:[0, 61.9 ] > > rcontrol <- function(n) ranfun(n, what='control') > rinterv <- function(n) ranfun(n, what='int') > rcens <- function(n) runif(n, 1, 3) > > set.seed(11) # So can reproduce results > spower(rcontrol, rinterv, rcens, nc=500, ni=500, + test=logrank, nsim=50) # normally use nsim=500 or 1000 10 20 30 40 50 [1] 0.24 > > # Simulate a 2-arm 5-year follow-up study for which the control group's > # survival distribution is Weibull with 1-year survival of .95 and > # 3-year survival of .7. All subjects are followed at least one year, > # and patients enter the study with linearly increasing probability after that > # Assume there is no chance of dropin for the first 6 months, then the > # probability increases linearly up to .15 at 5 years > # Assume there is a linearly increasing chance of dropout up to .3 at 5 years > # Assume that the treatment has no effect for the first 9 months, then > # it has a constant effect (hazard ratio of .75) > > # First find the right Weibull distribution for compliant control patients > sc <- Weibull2(c(1,3), c(.95,.7)) > sc function (times = NULL, alpha = 0.0512932943875506, gamma = 1.76519490623438) { exp(-alpha * (times^gamma)) } > > # Inverse cumulative distribution for case where all subjects are followed > # at least a years and then between a and b years the density rises > # as (time - a) ^ d is a + (b-a) * u ^ (1/(d+1)) > > rcens <- function(n) 1 + (5-1) * (runif(n) ^ .5) > # To check this, type hist(rcens(10000), nclass=50) > > # Put it all together > > f <- Quantile2(sc, + hratio=function(x)ifelse(x<=.75, 1, .75), + dropin=function(x)ifelse(x<=.5, 0, .15*(x-.5)/(5-.5)), + dropout=function(x).3*x/5) Interval of time for evaluating functions:[0, 16.1 ] > > par(mfrow=c(2,2)) > # par(mfrow=c(1,1)) to make legends fit > plot(f, 'all', label.curves=list(keys='lines')) > > rcontrol <- function(n) f(n, 'control') > rinterv <- function(n) f(n, 'intervention') > > set.seed(211) > spower(rcontrol, rinterv, rcens, nc=350, ni=350, + test=logrank, nsim=50) # normally nsim=500 or more 10 20 30 40 50 [1] 0.5 > par(mfrow=c(1,1)) > > > > graphics::par(get("par.postscript", env = .CheckExEnv)) > cleanEx(); ..nameEx <- "spss.get" > > ### * spss.get > > flush(stderr()); flush(stdout()) > > ### Name: spss.get > ### Title: Enhanced Importing of SPSS Files > ### Aliases: spss.get > ### Keywords: interface manip > > ### ** Examples > > ## Not run: > ##D w <- spss.get('/tmp/my.sav', datevars=c('birthdate','deathdate')) > ##D > ## End(Not run) > > > > cleanEx(); ..nameEx <- "src" > > ### * src > > flush(stderr()); flush(stdout()) > > ### Name: src > ### Title: Source a File from the Current Working Directory > ### Aliases: src > ### Keywords: file programming utilities > > ### ** Examples > > ## Not run: > ##D src(myfile) # source("myfile.s") > ##D src() # re-source myfile.s > ## End(Not run) > > > > cleanEx(); ..nameEx <- "store" > > ### * store > > flush(stderr()); flush(stdout()) > > ### Name: store > ### Title: Store an Object Permanently > ### Aliases: store stores storeTemp > ### Keywords: data > > ### ** Examples > > ## Not run: > ##D attach(database, 1) #this database takes precedence > ##D store() #new objects to go under database in memory > ##D #this doesn't work in R > ##D f <- lm(y ~ x) > ##D store(f) #store f under name "f" in .Data or .GlobalEnv > ##D #uses assign() with immediate=T > ##D store(f, "final.model") #immediately store f under "final.model" in .Data > ##D store() #store future objects in .Data.tempnnnn > ##D x <- runif(1000) #x will disappear at session end unless > ##D store(x) #this statement appears -> store in .Data > ##D stores(x, y, z) #store x,y,z in .Data under names x,y,z > ##D storeTemp(x) #put x as name 'x' in frame 0 > ##D #for R, attach .GlobalTemp and store it there > ##D storeTemp(x,'X') #same as previous but under the name X > ## End(Not run) > > > > cleanEx(); ..nameEx <- "summarize" > > ### * summarize > > flush(stderr()); flush(stdout()) > > ### Name: summarize > ### Title: Summarize Scalars or Matrices by Cross-Classification > ### Aliases: summarize mApply asNumericMatrix matrix2dataFrame subsAttr > ### Keywords: category manip > > ### ** Examples > > ## Not run: > ##D s <- summarize(ap>1, llist(size=cut2(sz, g=4), bone), mean, > ##D stat.name='Proportion') > ##D dotplot(Proportion ~ size | bone, data=s7) > ## End(Not run) > > set.seed(1) > temperature <- rnorm(300, 70, 10) > month <- sample(1:12, 300, TRUE) > year <- sample(2000:2001, 300, TRUE) > g <- function(x)c(Mean=mean(x,na.rm=TRUE),Median=median(x,na.rm=TRUE)) > summarize(temperature, month, g) month temperature Median 1 1 72.9 73.8 5 2 70.4 68.0 6 3 72.3 72.2 7 4 65.1 67.0 8 5 71.0 70.7 9 6 70.3 67.4 10 7 68.7 69.3 11 8 68.9 69.4 12 9 72.4 72.1 2 10 69.4 68.6 3 11 67.7 68.6 4 12 71.6 73.3 > mApply(temperature, month, g) Mean Median 1 72.9 73.8 2 70.4 68.0 3 72.3 72.2 4 65.1 67.0 5 71.0 70.7 6 70.3 67.4 7 68.7 69.3 8 68.9 69.4 9 72.4 72.1 10 69.4 68.6 11 67.7 68.6 12 71.6 73.3 > > mApply(temperature, month, mean, na.rm=TRUE) 1 2 3 4 5 6 7 8 9 10 11 12 72.9 70.4 72.3 65.1 71.0 70.3 68.7 68.9 72.4 69.4 67.7 71.6 > w <- summarize(temperature, month, mean, na.rm=TRUE) > if(.R.) library(lattice) > xyplot(temperature ~ month, data=w) # plot mean temperature by month > > w <- summarize(temperature, llist(year,month), + quantile, probs=c(.5,.25,.75), na.rm=TRUE, type='matrix') > xYplot(Cbind(temperature[,1],temperature[,-1]) ~ month | year, data=w) Loading required package: grid > mApply(temperature, llist(year,month), + quantile, probs=c(.5,.25,.75), na.rm=TRUE) , , = 50% year month 2000 2001 1 75.1 73.7 2 67.2 68.4 3 68.2 74.6 4 67.0 66.7 5 69.5 72.5 6 70.4 66.1 7 69.0 70.0 8 65.4 70.3 9 74.0 70.1 10 69.5 65.7 11 63.9 74.1 12 74.8 71.8 , , = 25% year month 2000 2001 1 64.9 66.8 2 64.3 66.1 3 64.0 67.8 4 58.5 65.0 5 63.3 69.2 6 64.7 65.2 7 64.1 61.8 8 59.2 66.7 9 66.7 67.4 10 63.3 63.7 11 60.3 69.2 12 66.7 63.8 , , = 75% year month 2000 2001 1 79.5 76.4 2 76.4 71.3 3 77.6 77.6 4 72.9 69.0 5 77.2 75.5 6 77.7 77.2 7 73.3 77.3 8 72.9 78.2 9 77.6 77.9 10 75.3 74.3 11 68.5 77.4 12 77.6 73.9 > > # Compute the median and outer quartiles. The outer quartiles are > # displayed using "error bars" > set.seed(111) > dfr <- expand.grid(month=1:12, year=c(1997,1998), reps=1:100) > attach(dfr) > y <- abs(month-6.5) + 2*runif(length(month)) + year-1997 > s <- summarize(y, llist(month,year), smedian.hilow, conf.int=.5) > s month year y Lower Upper 7 1 2000 9.58 9.41 10.12 8 1 2001 10.56 10.11 10.89 9 2 2000 8.27 8.02 9.14 10 2 2001 8.88 8.73 10.11 11 3 2000 7.23 6.96 7.79 12 3 2001 8.74 8.24 8.86 13 4 2000 6.65 6.20 6.83 14 4 2001 7.88 7.64 8.07 15 5 2000 4.97 4.67 5.55 16 5 2001 6.66 6.12 7.00 17 6 2000 4.98 4.39 5.16 18 6 2001 5.47 5.22 5.82 19 7 2000 4.76 4.25 5.24 20 7 2001 5.40 5.11 6.00 21 8 2000 5.11 4.89 5.47 22 8 2001 6.14 6.12 6.86 23 9 2000 6.69 6.53 7.19 24 9 2001 7.58 6.93 7.85 1 10 2000 7.65 7.41 7.99 2 10 2001 8.99 8.65 9.27 3 11 2000 8.63 8.22 8.95 4 11 2001 9.03 8.75 9.71 5 12 2000 9.45 9.04 9.98 6 12 2001 10.65 10.57 10.81 > mApply(y, llist(month,year), smedian.hilow, conf.int=.5) , , = Median month year 1 2 3 4 5 6 7 8 9 10 11 12 2000 9.58 8.27 7.23 6.65 4.97 4.98 4.76 5.11 6.69 7.65 8.63 9.45 2001 10.56 8.88 8.74 7.88 6.66 5.47 5.40 6.14 7.58 8.99 9.03 10.65 , , = Lower month year 1 2 3 4 5 6 7 8 9 10 11 12 2000 9.41 8.02 6.96 6.20 4.67 4.39 4.25 4.89 6.53 7.41 8.22 9.04 2001 10.11 8.73 8.24 7.64 6.12 5.22 5.11 6.12 6.93 8.65 8.75 10.57 , , = Upper month year 1 2 3 4 5 6 7 8 9 10 11 12 2000 10.1 9.14 7.79 6.83 5.55 5.16 5.24 5.47 7.19 7.99 8.95 9.98 2001 10.9 10.11 8.86 8.07 7.00 5.82 6.00 6.86 7.85 9.27 9.71 10.81 > > xYplot(Cbind(y,Lower,Upper) ~ month, groups=year, data=s, + keys='lines', method='alt') > # Can also do: > s <- summarize(y, llist(month,year), quantile, probs=c(.5,.25,.75), + stat.name=c('y','Q1','Q3')) > xYplot(Cbind(y, Q1, Q3) ~ month, groups=year, data=s, keys='lines') > # To display means and bootstrapped nonparametric confidence intervals > # use for example: > s <- summarize(y, llist(month,year), smean.cl.boot) > xYplot(Cbind(y, Lower, Upper) ~ month | year, data=s) > > # For each subject use the trapezoidal rule to compute the area under > # the (time,response) curve using the Hmisc trap.rule function > x <- cbind(time=c(1,2,4,7, 1,3,5,10),response=c(1,3,2,4, 1,3,2,4)) > subject <- c(rep(1,4),rep(2,4)) > trap.rule(x[1:4,1],x[1:4,2]) [1] 16 > summarize(x, subject, function(y) trap.rule(y[,1],y[,2])) subject x 1 1 16 2 2 24 > > ## Not run: > ##D # Another approach would be to properly re-shape the mm array below > ##D # This assumes no missing cells. There are many other approaches. > ##D # mApply will do this well while allowing for missing cells. > ##D m <- tapply(y, list(year,month), quantile, probs=c(.25,.5,.75)) > ##D mm <- array(unlist(m), dim=c(3,2,12), > ##D dimnames=list(c('lower','median','upper'),c('1997','1998'), > ##D as.character(1:12))) > ##D # aggregate will help but it only allows you to compute one quantile > ##D # at a time; see also the Hmisc mApply function > ##D dframe <- aggregate(y, list(Year=year,Month=month), quantile, probs=.5) > ##D > ##D # Compute expected life length by race assuming an exponential > ##D # distribution - can also use summarize > ##D g <- function(y) { # computations for one race group > ##D futime <- y[,1]; event <- y[,2] > ##D sum(futime)/sum(event) # assume event=1 for death, 0=alive > ##D } > ##D mApply(cbind(followup.time, death), race, g) > ##D > ##D # To run mApply on a data frame: > ##D m <- mApply(asNumericMatrix(x), race, h) > ##D # Here assume h is a function that returns a matrix similar to x > ##D at <- subsAttr(x) # get original attributes and storage modes > ##D matrix2dataFrame(m, at) > ##D > ##D # Get stratified weighted means > ##D g <- function(y) wtd.mean(y[,1],y[,2]) > ##D summarize(cbind(y, wts), llist(sex,race), g, stat.name='y') > ##D mApply(cbind(y,wts), llist(sex,race), g) > ##D > ##D # Compare speed of mApply vs. by for computing > ##D d <- data.frame(sex=sample(c('female','male'),100000,TRUE), > ##D country=sample(letters,100000,TRUE), > ##D y1=runif(100000), y2=runif(100000)) > ##D g <- function(x) { > ##D y <- c(median(x[,'y1']-x[,'y2']), > ##D med.sum =median(x[,'y1']+x[,'y2'])) > ##D names(y) <- c('med.diff','med.sum') > ##D y > ##D } > ##D > ##D system.time(by(d, llist(sex=d$sex,country=d$country), g)) > ##D system.time({ > ##D x <- asNumericMatrix(d) > ##D a <- subsAttr(d) > ##D m <- mApply(x, llist(sex=d$sex,country=d$country), g) > ##D }) > ##D system.time({ > ##D x <- asNumericMatrix(d) > ##D summarize(x, llist(sex=d$sex, country=d$country), g) > ##D }) > ##D > ##D # An example where each subject has one record per diagnosis but sex of > ##D # subject is duplicated for all the rows a subject has. Get the cross- > ##D # classified frequencies of diagnosis (dx) by sex and plot the results > ##D # with a dot plot > ##D > ##D count <- rep(1,length(dx)) > ##D d <- summarize(count, llist(dx,sex), sum) > ##D Dotplot(dx ~ count | sex, data=d) > ## End(Not run) > detach('dfr') > > # Run summarize on a matrix to get column means > x <- c(1:19,NA) > y <- 101:120 > z <- cbind(x, y) > g <- c(rep(1, 10), rep(2, 10)) > summarize(z, g, colMeans, na.rm=TRUE, stat.name='x') g x y 1 1 5.5 106 2 2 15.0 116 > # Also works on an all numeric data frame > summarize(as.data.frame(z), g, colMeans, na.rm=TRUE, stat.name='x') g x y 1 1 5.5 106 2 2 15.0 116 > > > > cleanEx(); ..nameEx <- "summary.formula" > > ### * summary.formula > > flush(stderr()); flush(stdout()) > > ### Name: summary.formula > ### Title: Summarize Data for Making Tables and Plots > ### Aliases: summary.formula stratify print.summary.formula.response > ### plot.summary.formula.response latex.summary.formula.response > ### print.summary.formula.reverse plot.summary.formula.reverse > ### latex.summary.formula.reverse [.summary.formula.response > ### print.summary.formula.cross latex.summary.formula.cross > ### formula.summary.formula.cross na.retain cumcategory mChoice > ### as.character.mChoice > ### Keywords: category interface hplot manip > > ### ** Examples > > options(digits=3) > set.seed(173) > sex <- factor(sample(c("m","f"), 500, rep=TRUE)) > age <- rnorm(500, 50, 5) > treatment <- factor(sample(c("Drug","Placebo"), 500, rep=TRUE)) > > # Generate a 3-choice variable; each of 3 variables has 5 possible levels > symp <- c('Headache','Stomach Ache','Hangnail', + 'Muscle Ache','Depressed') > symptom1 <- sample(symp, 500,TRUE) > symptom2 <- sample(symp, 500,TRUE) > symptom3 <- sample(symp, 500,TRUE) > Symptoms <- mChoice(symptom1, symptom2, symptom3, label='Primary Symptoms') > table(as.character(Symptoms)) Depressed Depressed,Hangnail 5 26 Depressed,Hangnail,Headache Depressed,Hangnail,Muscle Ache 29 27 Depressed,Hangnail,Stomach Ache Depressed,Headache 17 27 Depressed,Headache,Muscle Ache Depressed,Headache,Stomach Ache 20 19 Depressed,Muscle Ache Depressed,Muscle Ache,Stomach Ache 16 26 Depressed,Stomach Ache Hangnail 27 6 Hangnail,Headache Hangnail,Headache,Muscle Ache 25 27 Hangnail,Headache,Stomach Ache Hangnail,Muscle Ache 24 23 Hangnail,Muscle Ache,Stomach Ache Hangnail,Stomach Ache 20 33 Headache Headache,Muscle Ache 3 21 Headache,Muscle Ache,Stomach Ache Headache,Stomach Ache 22 27 Muscle Ache Muscle Ache,Stomach Ache 2 24 Stomach Ache 4 > > # Note: In this example, some subjects have the same symptom checked > # multiple times; in practice these redundant selections would be NAs > # mChoice will ignore these redundant selections > # If the multiple choices to a single survey question were already > # stored as a series of T/F yes/no present/absent questions we could do: > # Symptoms <- cbind(headache,stomach.ache,hangnail,muscle.ache,depressed) > # where the 5 input variables are all of the same type: 0/1,logical,char. > # These variables cannot be factors in this case as cbind would > # store integer codes instead of character strings. > # To give better column names can use > # cbind(Headache=headache, 'Stomach Ache'=stomach.ache, ...) > > # Following 8 commands only for checking mChoice > data.frame(symptom1,symptom2,symptom3)[1:10,] symptom1 symptom2 symptom3 1 Depressed Depressed Hangnail 2 Depressed Hangnail Headache 3 Depressed Stomach Ache Muscle Ache 4 Headache Muscle Ache Headache 5 Depressed Headache Stomach Ache 6 Depressed Headache Hangnail 7 Hangnail Headache Depressed 8 Headache Depressed Headache 9 Hangnail Muscle Ache Hangnail 10 Headache Hangnail Headache > Symptoms[1:10,] # Print first 10 subjects' new binary indicators Primary Symptoms Depressed Hangnail Headache Muscle Ache Stomach Ache [1,] TRUE TRUE FALSE FALSE FALSE [2,] TRUE TRUE TRUE FALSE FALSE [3,] TRUE FALSE FALSE TRUE TRUE [4,] FALSE FALSE TRUE TRUE FALSE [5,] TRUE FALSE TRUE FALSE TRUE [6,] TRUE TRUE TRUE FALSE FALSE [7,] TRUE TRUE TRUE FALSE FALSE [8,] TRUE FALSE TRUE FALSE FALSE [9,] FALSE TRUE FALSE TRUE FALSE [10,] FALSE TRUE TRUE FALSE FALSE > > meanage <- if(.R.)double(5) else single(5) > for(j in 1:5) meanage[j] <- mean(age[Symptoms[,j]]) > names(meanage) <- dimnames(Symptoms)[[2]] > meanage Depressed Hangnail Headache Muscle Ache Stomach Ache 50.2 50.0 49.8 49.7 50.2 > > # Manually compute mean age for 2 symptoms > mean(age[symptom1=='Headache' | symptom2=='Headache' | symptom3=='Headache']) [1] 49.8 > mean(age[symptom1=='Hangnail' | symptom2=='Hangnail' | symptom3=='Hangnail']) [1] 50 > > #Frequency table sex*treatment, sex*Symptoms > summary(sex ~ treatment + Symptoms, fun=table) sex N=500 +----------------+------------+---+---+---+ | | |N |f |m | +----------------+------------+---+---+---+ |treatment |Drug |263|140|123| | |Placebo |237|133|104| +----------------+------------+---+---+---+ |Primary Symptoms|Depressed |239|130|109| | |Hangnail |257|143|114| | |Headache |244|131|113| | |Muscle Ache |228|127|101| | |Stomach Ache|243|133|110| +----------------+------------+---+---+---+ |Overall | |500|273|227| +----------------+------------+---+---+---+ > # could also do summary(sex ~ treatment + mChoice(symptom1,...),...) > > #Compute mean age, separately by 3 variables > summary(age ~ sex + treatment + Symptoms) age N=500 +----------------+------------+---+----+ | | |N |age | +----------------+------------+---+----+ |sex |f |273|50.1| | |m |227|49.9| +----------------+------------+---+----+ |treatment |Drug |263|49.9| | |Placebo |237|50.1| +----------------+------------+---+----+ |Primary Symptoms|Depressed |239|50.2| | |Hangnail |257|50.0| | |Headache |244|49.8| | |Muscle Ache |228|49.7| | |Stomach Ache|243|50.2| +----------------+------------+---+----+ |Overall | |500|50.0| +----------------+------------+---+----+ > > summary(age ~ sex + treatment, method="cross") mean by sex, treatment +---+ |N | |age| +---+ +---+----+-------+----+ |sex|Drug|Placebo| ALL| +---+----+-------+----+ |f |140 | 133 |273 | | |50.1| 50.1 |50.1| +---+----+-------+----+ |m |123 | 104 |227 | | |49.6| 50.2 |49.9| +---+----+-------+----+ |ALL|263 | 237 |500 | | |49.9| 50.1 |50.0| +---+----+-------+----+ > # Note: method="cross" will not allow mChoice variables > > f <- summary(treatment ~ age + sex + Symptoms, method="reverse", test=TRUE) > f Descriptive Statistics by treatment +----------------------------+----------------------+----------------------+------------------------------+ | |Drug |Placebo | Test | | |(N=263) |(N=237) |Statistic | +----------------------------+----------------------+----------------------+------------------------------+ |age | 46.5/49.9/53.2| 46.7/50.0/53.4| F=0.1 d.f.=1,498 P=0.754 | +----------------------------+----------------------+----------------------+------------------------------+ |sex : m | 47% (123) | 44% (104) |Chi-square=0.42 d.f.=1 P=0.517| +----------------------------+----------------------+----------------------+------------------------------+ |Primary Symptoms : Depressed| 48% (126) | 48% (113) |Chi-square=0.00 d.f.=1 P=0.959| +----------------------------+----------------------+----------------------+------------------------------+ | Hangnail | 53% (140) | 49% (117) |Chi-square=0.75 d.f.=1 P=0.388| +----------------------------+----------------------+----------------------+------------------------------+ | Headache | 46% (122) | 51% (122) |Chi-square=1.29 d.f.=1 P=0.256| +----------------------------+----------------------+----------------------+------------------------------+ | Muscle Ache | 46% (121) | 45% (107) |Chi-square=0.04 d.f.=1 P=0.847| +----------------------------+----------------------+----------------------+------------------------------+ | Stomach Ache | 49% (130) | 48% (113) |Chi-square=0.15 d.f.=1 P=0.696| +----------------------------+----------------------+----------------------+------------------------------+ > # trio of numbers represent 25th, 50th, 75th percentile > print(f, long=TRUE) Descriptive Statistics by treatment +----------------+----------------------+----------------------+------------------------------+ | |Drug |Placebo | Test | | |(N=263) |(N=237) |Statistic | +----------------+----------------------+----------------------+------------------------------+ |age | 46.5/49.9/53.2| 46.7/50.0/53.4| F=0.1 d.f.=1,498 P=0.754 | +----------------+----------------------+----------------------+------------------------------+ |sex | | |Chi-square=0.42 d.f.=1 P=0.517| +----------------+----------------------+----------------------+------------------------------+ | m | 47% (123) | 44% (104) | | +----------------+----------------------+----------------------+------------------------------+ |Primary Symptoms| | | | +----------------+----------------------+----------------------+------------------------------+ | Depressed | 48% (126) | 48% (113) |Chi-square=0.00 d.f.=1 P=0.959| +----------------+----------------------+----------------------+------------------------------+ | Hangnail | 53% (140) | 49% (117) |Chi-square=0.75 d.f.=1 P=0.388| +----------------+----------------------+----------------------+------------------------------+ | Headache | 46% (122) | 51% (122) |Chi-square=1.29 d.f.=1 P=0.256| +----------------+----------------------+----------------------+------------------------------+ | Muscle Ache | 46% (121) | 45% (107) |Chi-square=0.04 d.f.=1 P=0.847| +----------------+----------------------+----------------------+------------------------------+ | Stomach Ache| 49% (130) | 48% (113) |Chi-square=0.15 d.f.=1 P=0.696| +----------------+----------------------+----------------------+------------------------------+ > plot(f) > plot(f, conType='bp', prtest='P') > bpplt() # annotated example showing layout of bp plot > > #Compute predicted probability from a logistic regression model > #For different stratifications compute receiver operating > #characteristic curve areas (C-indexes) > predicted <- plogis(.4*(sex=="m")+.15*(age-50)) > positive.diagnosis <- ifelse(runif(500)<=predicted, 1, 0) > roc <- function(z) { + x <- z[,1]; + y <- z[,2]; + n <- length(x); + if(n<2)return(c(ROC=NA)); + n1 <- sum(y==1); + c(ROC= (mean(rank(x)[y==1])-(n1+1)/2)/(n-n1) ); + } > y <- cbind(predicted, positive.diagnosis) > options(digits=2) > summary(y ~ age + sex, fun=roc) y N=500 +-------+-----------+---+----+ | | |N |ROC | +-------+-----------+---+----+ |age |[36.8,46.7)|125|0.63| | |[46.7,50.0)|125|0.56| | |[50.0,53.3)|125|0.55| | |[53.3,67.5]|125|0.52| +-------+-----------+---+----+ |sex |f |273|0.70| | |m |227|0.65| +-------+-----------+---+----+ |Overall| |500|0.68| +-------+-----------+---+----+ > > options(digits=3) > summary(y ~ age + sex, fun=roc, method="cross") roc by age, sex +---+ |N | |ROC| +---+ +-----------+-----+-----+-----+ | age | f | m | ALL | +-----------+-----+-----+-----+ |[36.8,46.7)| 73 | 52 |125 | | |0.597|0.656|0.629| +-----------+-----+-----+-----+ |[46.7,50.0)| 62 | 63 |125 | | |0.454|0.550|0.556| +-----------+-----+-----+-----+ |[50.0,53.3)| 69 | 56 |125 | | |0.489|0.590|0.546| +-----------+-----+-----+-----+ |[53.3,67.5]| 69 | 56 |125 | | |0.521|0.593|0.515| +-----------+-----+-----+-----+ |ALL |273 |227 |500 | | |0.697|0.647|0.676| +-----------+-----+-----+-----+ > > #Use stratify() to produce a table in which time intervals go down the > #page and going across 3 continuous variables are summarized using > #quartiles, and are stratified by two treatments > > set.seed(1) > d <- expand.grid(visit=1:5, treat=c('A','B'), reps=1:100) > d$sysbp <- rnorm(100*5*2, 120, 10) > label(d$sysbp) <- 'Systolic BP' > d$diasbp <- rnorm(100*5*2, 80, 7) > d$diasbp[1] <- NA > d$age <- rnorm(100*5*2, 50, 12) > g <- function(y) { + N <- apply(y, 2, function(w) sum(!is.na(w))) + h <- function(x) { + qu <- quantile(x, c(.25,.5,.75), na.rm=TRUE) + names(qu) <- c('Q1','Q2','Q3') + c(N=sum(!is.na(x)), qu) + } + w <- as.vector(apply(y, 2, h)) + names(w) <- as.vector( outer(c('N','Q1','Q2','Q3'), dimnames(y)[[2]], + function(x,y) paste(y,x))) + w + } > #Use na.rm=FALSE to count NAs separately by column > s <- summary(cbind(age,sysbp,diasbp) ~ visit + stratify(treat), + na.rm=FALSE, fun=g, data=d) > #The result is very wide. Re-do, putting treatment vertically > x <- with(d, factor(paste('Visit', visit, treat))) > summary(cbind(age,sysbp,diasbp) ~ x, na.rm=FALSE, fun=g, data=d) cbind(age, sysbp, diasbp) N=1000 +-------+---------+----+-----+------+------+------+-------+--------+--------+--------+--------+---------+---------+---------+ | | |N |age N|age Q1|age Q2|age Q3|sysbp N|sysbp Q1|sysbp Q2|sysbp Q3|diasbp N|diasbp Q1|diasbp Q2|diasbp Q3| +-------+---------+----+-----+------+------+------+-------+--------+--------+--------+--------+---------+---------+---------+ |x |Visit 1 A| 100| 100 |43.6 |49.8 |59.5 | 100 |113 |118 |127 | 99 |75.6 |79.3 |83.4 | | |Visit 1 B| 100| 100 |42.1 |49.1 |56.5 | 100 |111 |119 |127 |100 |75.8 |79.6 |84.1 | | |Visit 2 A| 100| 100 |41.6 |48.1 |58.3 | 100 |116 |122 |129 |100 |76.7 |79.9 |85.1 | | |Visit 2 B| 100| 100 |43.3 |49.9 |58.0 | 100 |113 |119 |127 |100 |73.4 |79.2 |86.0 | | |Visit 3 A| 100| 100 |43.8 |51.0 |61.8 | 100 |113 |120 |127 |100 |73.9 |78.9 |83.5 | | |Visit 3 B| 100| 100 |42.3 |50.3 |58.9 | 100 |115 |120 |127 |100 |75.8 |79.5 |85.3 | | |Visit 4 A| 100| 100 |44.9 |50.1 |58.6 | 100 |111 |117 |122 |100 |74.6 |81.4 |86.1 | | |Visit 4 B| 100| 100 |38.6 |47.2 |56.2 | 100 |113 |120 |126 |100 |74.3 |79.5 |84.7 | | |Visit 5 A| 100| 100 |46.2 |52.1 |57.6 | 100 |114 |120 |127 |100 |74.8 |80.6 |85.3 | | |Visit 5 B| 100| 100 |39.2 |49.1 |59.5 | 100 |115 |120 |127 |100 |76.9 |80.0 |84.5 | +-------+---------+----+-----+------+------+------+-------+--------+--------+--------+--------+---------+---------+---------+ |Overall| |1000|1000 |42.4 |49.9 |58.6 |1000 |113 |120 |127 |999 |75.1 |79.8 |85.2 | +-------+---------+----+-----+------+------+------+-------+--------+--------+--------+--------+---------+---------+---------+ > > #Compose LaTeX code directly > g <- function(y) { + h <- function(x) { + qu <- format(round(quantile(x, c(.25,.5,.75), na.rm=TRUE),1),nsmall=1) + paste('{\scriptsize(',sum(!is.na(x)), + ')} \hfill{\scriptsize ', qu[1], '} \textbf{', qu[2], + '} {\scriptsize ', qu[3],'}', sep='') + } + apply(y, 2, h) + } > s <- summary(cbind(age,sysbp,diasbp) ~ visit + stratify(treat), + na.rm=FALSE, fun=g, data=d) > # latex(s, prn=FALSE) > ## need option in latex to not print n > #Put treatment vertically > s <- summary(cbind(age,sysbp,diasbp) ~ x, fun=g, data=d, na.rm=FALSE) > # latex(s, prn=FALSE) > > #Plot estimated mean life length (assuming an exponential distribution) > #separately by levels of 4 other variables. Repeat the analysis > #by levels of a stratification variable, drug. Automatically break > #continuous variables into tertiles. > #We are using the default, method='response' > ## Not run: > ##D life.expect <- function(y) c(Years=sum(y[,1])/sum(y[,2])) > ##D attach(pbc) > ##D S <- Surv(follow.up.time, death) > ##D s2 <- summary(S ~ age + albumin + ascites + edema + stratify(drug), > ##D fun=life.expect, g=3) > ##D > ##D #Note: You can summarize other response variables using the same > ##D #independent variables using e.g. update(s2, response~.), or you > ##D #can change the list of independent variables using e.g. > ##D #update(s2, response ~.- ascites) or update(s2, .~.-ascites) > ##D #You can also print, typeset, or plot subsets of s2, e.g. > ##D #plot(s2[c('age','albumin'),]) or plot(s2[1:2,]) > ##D > ##D s2 # invokes print.summary.formula.response > ##D > ##D #Plot results as a separate dot chart for each of the 3 strata levels > ##D par(mfrow=c(2,2)) > ##D plot(s2, cex.labels=.6, xlim=c(0,40), superposeStrata=FALSE) > ##D > ##D #Typeset table, creating s2.tex > ##D w <- latex(s2, cdec=1) > ##D #Typeset table but just print LaTeX code > ##D latex(s2, file="") # useful for Sweave > ##D > ##D #Take control of groups used for age. Compute 3 quartiles for > ##D #both cholesterol and bilirubin (excluding observations that are missing > ##D #on EITHER ONE) > ##D > ##D age.groups <- cut2(age, c(45,60)) > ##D g <- function(y) apply(y, 2, quantile, c(.25,.5,.75)) > ##D y <- cbind(Chol=chol,Bili=bili) > ##D label(y) <- 'Cholesterol and Bilirubin' > ##D #You can give new column names that are not legal S-Plus names > ##D #by enclosing them in quotes, e.g. 'Chol (mg/dl)'=chol > ##D > ##D s <- summary(y ~ age.groups + ascites, fun=g) > ##D > ##D par(mfrow=c(1,2), oma=c(3,0,3,0)) # allow outer margins for overall > ##D for(ivar in 1:2) { # title > ##D isub <- (1:3)+(ivar-1)*3 # *3=number of quantiles/var. > ##D plot(s3, which=isub, main='', > ##D xlab=c('Cholesterol','Bilirubin')[ivar], > ##D pch=c(91,16,93)) # [, closed circle, ] > ##D } > ##D mtext(paste('Quartiles of', label(y)), adj=.5, outer=TRUE, cex=1.75) > ##D #Overall (outer) title > ##D > ##D prlatex(latex(s3, trios=TRUE)) > ##D # trios -> collapse 3 quartiles > ##D > ##D #Summarize only bilirubin, but do it with two statistics: > ##D #the mean and the median. Make separate tables for the two randomized > ##D #groups and make plots for the active arm. > ##D > ##D g <- function(y) c(Mean=mean(y), Median=median(y)) > ##D > ##D for(sub in c("D-penicillamine", "placebo")) { > ##D ss <- summary(bili ~ age.groups + ascites + chol, fun=g, > ##D subset=drug==sub) > ##D cat('\n',sub,'\n\n') > ##D print(ss) > ##D > ##D if(sub=='D-penicillamine') { > ##D par(mfrow=c(1,1)) > ##D plot(s4, which=1:2, dotfont=c(1,-1), subtitles=FALSE, main='') > ##D #1=mean, 2=median -1 font = open circle > ##D title(sub='Closed circle: mean; Open circle: median', adj=0) > ##D title(sub=sub, adj=1) > ##D } > ##D > ##D w <- latex(ss, append=TRUE, fi='my.tex', > ##D label=if(sub=='placebo') 's4b' else 's4a', > ##D caption=paste(label(bili),' {\\em (',sub,')}', sep='')) > ##D #Note symbolic labels for tables for two subsets: s4a, s4b > ##D prlatex(w) > ##D } > ##D > ##D #Now consider examples in 'reverse' format, where the lone dependent > ##D #variable tells the summary function how to stratify all the > ##D #'independent' variables. This is typically used to make tables > ##D #comparing baseline variables by treatment group, for example. > ##D > ##D s5 <- summary(drug ~ bili + albumin + stage + protime + sex + > ##D age + spiders, > ##D method='reverse') > ##D #To summarize all variables, use summary(drug ~., data=pbc) > ##D #To summarize all variables with no stratification, use > ##D #summary(~a+b+c) or summary(~.,data=...) > ##D > ##D options(digits=1) > ##D print(s5, npct='both') > ##D #npct='both' : print both numerators and denominators > ##D plot(s5, which='categorical') > ##D Key(locator(1)) # draw legend at mouse click > ##D par(oma=c(3,0,0,0)) # leave outer margin at bottom > ##D plot(s5, which='continuous') > ##D Key2() # draw legend at lower left corner of plot > ##D # oma= above makes this default key fit the page better > ##D > ##D options(digits=3) > ##D w <- latex(s5, npct='both', here=TRUE) > ##D # creates s5.tex > ##D > ##D #Turn to a different dataset and do cross-classifications on possibly > ##D #more than one independent variable. The summary function with > ##D #method='cross' produces a data frame containing the cross- > ##D #classifications. This data frame is suitable for multi-panel > ##D #trellis displays, although `summarize' works better for that. > ##D > ##D attach(prostate) > ##D size.quartile <- cut2(sz, g=4) > ##D bone <- factor(bm,labels=c("no mets","bone mets")) > ##D > ##D s7 <- summary(ap>1 ~ size.quartile + bone, method='cross') > ##D #In this case, quartiles are the default so could have said sz + bone > ##D > ##D options(digits=3) > ##D print(s7, twoway=FALSE) > ##D s7 # same as print(s7) > ##D w <- latex(s7, here=TRUE) # Make s7.tex > ##D > ##D library(trellis,TRUE) > ##D invisible(ps.options(reset=TRUE)) > ##D trellis.device(postscript, file='demo2.ps') > ##D > ##D dotplot(S ~ size.quartile|bone, data=s7, #s7 is name of summary stats > ##D xlab="Fraction ap>1", ylab="Quartile of Tumor Size") > ##D #Can do this more quickly with summarize: > ##D # s7 <- summarize(ap>1, llist(size=cut2(sz, g=4), bone), mean, > ##D # stat.name='Proportion') > ##D # dotplot(Proportion ~ size | bone, data=s7) > ##D > ##D summary(age ~ stage, method='cross') > ##D summary(age ~ stage, fun=quantile, method='cross') > ##D summary(age ~ stage, fun=smean.sd, method='cross') > ##D summary(age ~ stage, fun=smedian.hilow, method='cross') > ##D summary(age ~ stage, fun=function(x) c(Mean=mean(x), Median=median(x)), > ##D method='cross') > ##D #The next statements print real two-way tables > ##D summary(cbind(age,ap) ~ stage + bone, > ##D fun=function(y) apply(y, 2, quantile, c(.25,.75)), > ##D method='cross') > ##D options(digits=2) > ##D summary(log(ap) ~ sz + bone, > ##D fun=function(y) c(Mean=mean(y), quantile(y)), > ##D method='cross') > ##D > ##D #Summarize an ordered categorical response by all of the needed > ##D #cumulative proportions > ##D summary(cumcategory(disease.severity) ~ age + sex) > ##D > ## End(Not run) > > > > cleanEx(); ..nameEx <- "symbol.freq" > > ### * symbol.freq > > flush(stderr()); flush(stdout()) > > ### Name: symbol.freq > ### Title: Graphic Representation of a Frequency Table > ### Aliases: symbol.freq > ### Keywords: hplot > > ### ** Examples > > ## Not run: > ##D getHdata(titanic) > ##D attach(titanic) > ##D age.tertile <- cut2(titanic$age, g=3) > ##D symbol.freq(age.tertile, pclass, marginals=T, srtx=45) > ##D detach(2) > ## End(Not run) > > > cleanEx(); ..nameEx <- "t.test.cluster" > > ### * t.test.cluster > > flush(stderr()); flush(stdout()) > > ### Name: t.test.cluster > ### Title: t-test for Clustered Data > ### Aliases: t.test.cluster print.t.test.cluster > ### Keywords: htest > > ### ** Examples > > set.seed(1) > y <- rnorm(800) > group <- sample(1:2, 800, TRUE) > cluster <- sample(1:40, 800, TRUE) > table(cluster,group) group cluster 1 2 1 11 9 2 9 10 3 14 8 4 11 10 5 15 7 6 9 16 7 9 5 8 10 12 9 11 9 10 6 11 11 12 8 12 16 9 13 12 5 14 11 11 15 6 11 16 14 11 17 14 7 18 10 10 19 12 8 20 12 9 21 5 6 22 16 15 23 6 11 24 10 11 25 8 11 26 10 8 27 6 9 28 11 8 29 6 9 30 9 10 31 10 10 32 9 10 33 9 9 34 8 8 35 17 12 36 5 13 37 9 13 38 10 9 39 13 10 40 15 6 > t.test(y ~ group) # R only Welch Two Sample t-test data: y by group t = -0.757, df = 784, p-value = 0.449 alternative hypothesis: true difference in means is not equal to 0 95 percent confidence interval: -0.1982 0.0878 sample estimates: mean in group 1 mean in group 2 -0.0428 0.0124 > t.test.cluster(y, cluster, group) 1 2 N 416 384 Clusters 40 40 Mean -0.0428 0.0124 SS among clusters within groups 45.9 44.7 SS within clusters within groups 369 382 MS among clusters within groups 1.16 d.f. 78 MS within clusters within groups 1.04 d.f. 720 Na 9.85 Intracluster correlation 0.0114 Variance Correction Factor 1.12 1.10 Variance of effect 0.0058 Variance without cluster adjustment 0.00523 Design Effect 1.11 Effect (Difference in Means) 0.0552 S.E. of Effect 0.0762 0.95 Confidence limits -0.0942 0.2045 Z Statistic 0.724 2-sided P Value 0.469 > # Note: negate estimates of differences from t.test to > # compare with t.test.cluster > > > > cleanEx(); ..nameEx <- "transace" > > ### * transace > > flush(stderr()); flush(stdout()) > > ### Name: transace > ### Title: Additive Regression and Transformations using ace or avas > ### Aliases: transace areg.boot print.areg.boot plot.areg.boot > ### predict.areg.boot summary.areg.boot print.summary.areg.boot > ### Function.areg.boot Mean Mean.areg.boot Quantile Quantile.areg.boot > ### monotone smearingEst > ### Keywords: nonparametric smooth multivariate nonlinear regression > > ### ** Examples > > # xtrans <- transace(cbind(age,sex,blood.pressure,race.code), > # binary='sex', monotonic='age', > # categorical='race.code') > > # Generate random data from the model y = exp(x1 + epsilon/3) where > # x1 and epsilon are Gaussian(0,1) > set.seed(171) # to be able to reproduce example > x1 <- rnorm(200) > x2 <- runif(200) # a variable that is really unrelated to y] > x3 <- factor(sample(c('cat','dog','cow'), 200,TRUE)) # also unrelated to y > y <- exp(x1 + rnorm(200)/3) > f <- areg.boot(y ~ x1 + x2 + x3, B=40) Loading required package: acepack