### Name: anova.Design
### Title: Analysis of Variance (Wald and F Statistics)
### Aliases: anova.Design print.anova.Design text.anova.Design
###   plot.anova.Design latex.anova.Design
### Keywords: models regression htest aplot

### ** Examples

n <- 1000    # define sample size
set.seed(17) # so can reproduce the results
treat <- factor(sample(c('a','b','c'), n,TRUE))
num.diseases <- sample(0:4, n,TRUE)
age <- rnorm(n, 50, 10)
cholesterol <- rnorm(n, 200, 25)
weight <- rnorm(n, 150, 20)
sex <- factor(sample(c('female','male'), n,TRUE))
label(age) <- 'Age'      # label is in Hmisc
label(num.diseases) <- 'Number of Comorbid Diseases'
label(cholesterol) <- 'Total Cholesterol'
label(weight) <- 'Weight, lbs.'
label(sex) <- 'Sex'
units(cholesterol) <- 'mg/dl'   # uses units.default in Hmisc

# Specify population model for log odds that Y=1
L <- .1*(num.diseases-2) + .045*(age-50) +
     (log(cholesterol - 10)-5.2)*(-2*(treat=='a') +
     3.5*(treat=='b')+2*(treat=='c'))
# Simulate binary y to have Prob(y=1) = 1/[1+exp(-L)]
y <- ifelse(runif(n) < plogis(L), 1, 0)

fit <- lrm(y ~ treat + scored(num.diseases) + rcs(age) +
               log(cholesterol+10) + treat:log(cholesterol+10))
anova(fit)                       # Test all factors
anova(fit, treat, cholesterol)   # Test these 2 by themselves
                                 # to get their pooled effects
g <- lrm(y ~ treat*rcs(age))
dd <- datadist(treat, num.diseases, age, cholesterol)
options(datadist='dd')
plot(g, age=NA, treat="b")
s <- anova(g)
print(s)
#p <- locator(1)                  # click mouse at upper left corner of table
p <- list(x=32,y=2.1)
text(s, at=p)                    # add anova table to regression plot
plot(s)                          # new plot - dot chart of chisq-d.f.
# latex(s)                       # nice printout - creates anova.g.tex
options(datdist=NULL)


# Simulate data with from a given model, and display exactly which
# hypotheses are being tested

set.seed(123)
age <- rnorm(500, 50, 15)
treat <- factor(sample(c('a','b','c'), 500,TRUE))
bp  <- rnorm(500, 120, 10)
y   <- ifelse(treat=='a', (age-50)*.05, abs(age-50)*.08) + 3*(treat=='c') +
       pmax(bp, 100)*.09 + rnorm(500)
f   <- ols(y ~ treat*lsp(age,50) + rcs(bp,4))
print(names(coef(f)), quote=FALSE)
specs(f)
anova(f)
an <- anova(f)
options(digits=3)
print(an, 'subscripts')
print(an, 'dots')

an <- anova(f, test='Chisq', ss=FALSE)
plot(0:1)                        # make some plot
text(an, at=list(x=1.5,y=.6))    # add anova table to plot
plot(an)                         # new plot - dot chart of chisq-d.f.
# latex(an)                      # nice printout - creates anova.f.tex

# Suppose that a researcher wants to make a big deal about a variable 
# because it has the highest adjusted chi-square.  We use the
# bootstrap to derive 0.95 confidence intervals for the ranks of all
# the effects in the model.  We use the plot method for anova, with
# pl=FALSE to suppress actual plotting of chi-square - d.f. for each
# bootstrap repetition.  We rank the negative of the adjusted
# chi-squares so that a rank of 1 is assigned to the highest.
# It is important to tell plot.anova.Design not to sort the results,
# or every bootstrap replication would have ranks of 1,2,3 for the stats.

mydata <- data.frame(x1=runif(200), x2=runif(200),
                     sex=factor(sample(c('female','male'),200,TRUE)))
set.seed(9)  # so can reproduce example
mydata$y <- ifelse(runif(200)<=plogis(mydata$x1-.5 + .5*(mydata$x2-.5) + 
                   .5*(mydata$sex=='male')),1,0)

if(.R.) {
library(boot)
b <- boot(mydata, function(data, i, ...) rank(-plot(anova(
                lrm(y ~ rcs(x1,4)+pol(x2,2)+sex,data,subset=i)), 
                sort='none', pl=FALSE)),
                R=25)  # should really do R=500 but will take a while
Rank <- b$t0
lim <- t(apply(b$t, 2, quantile, probs=c(.025,.975)))
} else {
b <- bootstrap(mydata, rank(-plot(anova(
                lrm(y ~ rcs(x1,4)+pol(x2,2)+sex,mydata)), sort='none', pl=FALSE)),
               B=25)  # should really do B=500 but will take a while
Rank <- b$observed
lim <- limits.emp(b)[,c(1,4)]  # get 0.025 and 0.975 quantiles
}

# Use the Hmisc Dotplot function to display ranks and their confidence
# intervals.  Sort the categories by descending adj. chi-square, for ranks
original.chisq <- plot(anova(lrm(y ~ rcs(x1,4)+pol(x2,2)+sex,data=mydata)),
                       sort='none', pl=FALSE)
predictor <- as.factor(names(original.chisq))
predictor <- reorder.factor(predictor, -original.chisq)

Dotplot(predictor ~ Cbind(Rank, lim), pch=3, xlab='Rank', 
                main=if(.R.) expression(paste(
'Ranks and 0.95 Confidence Limits for ',chi^2,' - d.f.')) else
'Ranks and 0.95 Confidence Limits for Chi-square - d.f.')



