\name{ACF1}
\alias{ACF1}
\non_function{}
\title{Aberrant Crypt Foci in Rat Colons}
\usage{data(ACF1)}
\description{
Numbers of aberrant crypt foci (ACF) in the 
section 1 of the colons of 22 rats subjected to a single
dose of the carcinogen azoxymethane (AOM), sacrificed
at 3 different times. 
}
\format{
  This data frame contains the following columns:
  \describe{
    \item{count}{The number of ACF observed in section 1 of
each rat colon}
    \item{endtime}{Time of sacrifice, in weeks following injection
of AOM}
  }
}
\source{
 Ranjana P. Bird, Faculty of Human Ecology, University of Manitoba,
Winnipeg, Canada.
}
\references{
E.A. McLellan, A. Medline and R.P. Bird.  Dose response and
proliferative characteristics of aberrant crypt foci: putative
preneoplastic lesions in rat colon.  Carcinogenesis, 12(11): 2093-2098, 
1991.
}
\examples{
data(ACF1)
sapply(split(ACF1$count,ACF1$endtime),var)
plot(count ~ endtime, data=ACF1, pch=16)
pause()
print("Poisson Regression - Example 8.3")
ACF.glm0 <- glm(formula = count ~ endtime, family = poisson, data = ACF1)
summary(ACF.glm0)

# Is there a quadratic effect?
pause()

ACF.glm <- glm(formula = count ~ endtime + I(endtime^2),
  family = poisson, data = ACF1)
summary(ACF.glm)

# But is the data really Poisson?  If not, try quasipoisson:
pause()

ACF.glm <- glm(formula = count ~ endtime + I(endtime^2),
  family = quasipoisson, data = ACF1)
summary(ACF.glm)
}
\keyword{datasets}

\eof
\name{Cars93.summary}
\alias{Cars93.summary}
\non_function{}
\title{A Summary of the Cars93 Data set}
\usage{data(Cars93.summary)}
\description{
The \code{Cars93.summary} data frame has 6 rows and 4 columns
created from information in the \code{Cars93} data set in the Venables 
and Ripley MASS package. Each row corresponds to a different
class of car (e.g. Compact, Large, etc.).
}
\format{
  This data frame contains the following columns:
  \describe{
    \item{Min.passengers}{minimum
passenger capacity for each class of car}
    \item{Max.passengers}{maximum
passenger capacity for each class of car}
    \item{No.of.cars}{number of
cars in each class}
    \item{abbrev}{a factor with levels
    \code{C} Compact, \code{L} Large, 
    \code{M} Mid-Size, \code{Sm} Small, 
    \code{Sp} Sporty, \code{V} Van}
  }
}
\source{
Lock, R. H. (1993) 1993 New Car Data. Journal of Statistics
     Education 1(1)
}
\references{
MASS library
}
\examples{
data(Cars93.summary)
type <- Cars93.summary$abbrev
type <- Cars93.summary[,4]
type <- Cars93.summary[,"abbrev"]
type <- Cars93.summary[[4]] # Take the object that is stored
                            # in the fourth list element.
type
pause()

attach(Cars93.summary)
  # R can now access the columns of Cars93.summary directly
abbrev
detach("Cars93.summary")
pause()

#  To change the name of the \verb!abbrev! variable (the fourth column)
names(Cars93.summary)[4] <- "code"
pause()

#  To change all of the names, try
names(Cars93.summary) <- c("minpass","maxpass","number","code")

}
\keyword{datasets}

\eof
\name{Lottario} 
\alias{Lottario} 
\non_function{} 
\title{Ontario Lottery Data} 
\usage{data(cuckoos)} 
\description{ The data frame \verb!Lottario!
is a summary of 122 weekly draws of an Ontario lottery, beginning in
November, 1978.  Each draw consists of 7 numbered balls, drawn without
replacement from an urn consisting of balls numbered from 1 through 39.
}
\format{
  This data frame contains the following columns:
  \describe{
    \item{Number}{the integers from 1 to 39, representing the
numbered balls}
    \item{Frequency}{the number of occurrences of each numbered ball}
}
} 
  \source{ 
The Ontario Lottery Corporation
} 
  \references{
Bellhouse, D.R. (1982). Fair is fair:  new rules for Canadian lotteries.
Canadian Public Policy - Analyse de Politiques 8: 311-320.
} 
\examples{ 
data(Lottario)
order(Lottario$Frequency)[33:39]  # the 7 most frequently chosen numbers
} 
\keyword{datasets}

\eof
\name{Manitoba.lakes}
\alias{Manitoba.lakes}
\non_function{}
\title{The Nine Largest Lakes in Manitoba}
\usage{data(Manitoba.lakes)}
\description{
The \code{Manitoba.lakes} data frame has 9 rows and 2 columns.
The areas and elevations of the nine largest lakes in
Manitoba, Canada.  The geography of Manitoba (a relatively
flat province) can be divided crudely into three main
areas: a very flat prairie in the south which is at a 
relatively high elevation, a middle region consisting
of mainly of forest and Precambrian rock, and a northern
region which drains more rapidly into Hudson
Bay.  All water in Manitoba, which does not evaporate, eventually drains 
into Hudson Bay.  
}
\format{
  This data frame contains the following columns:
  \describe{
    \item{elevation}{a numeric vector consisting of the elevations
of the lakes (in meters)}
    \item{area}{a numeric vector consisting of the areas of
the lakes (in square kilometers)}
  }
}
\source{
The CANSIM data base at Statistics Canada.
}
\examples{
data(Manitoba.lakes)
plot(Manitoba.lakes)
plot(Manitoba.lakes[-1,])
}
\keyword{datasets}

\eof
\name{allbacks}
\alias{allbacks}
\non_function{}
\title{Measurements on a Selection of Books}
\usage{data(allbacks)}
\description{
The \code{allbacks} data frame gives measurements
on the volume and weight of 15 books, some of which
are softback (pb) and some of which are hardback (hb).  Area
of the hardback covers is also included.
}
\format{
  This data frame contains the following columns:
  \describe{
    \item{volume}{book volumes
in cubic centimeters}
    \item{area}{hard board cover
areas in square centimeters}
    \item{weight}{book weights in grams}
    \item{cover}{a factor with levels}
   \code{hb} hardback,   \code{pb} paperback}
  }
}
\source{
The bookshelf of J. H. Maindonald.
}
\examples{
print("Multiple Regression - Example 6.1")
data(allbacks)  
attach(allbacks)
volume.split <- split(volume, cover)
weight.split <- split(weight, cover)
plot(weight.split$hb ~ volume.split$hb, pch=16, xlim=range(volume), ylim=range(weight),
     ylab="Weight (g)", xlab="Volume (cc)")
points(weight.split$pb ~ volume.split$pb, pch=16, col=2)
pause()

allbacks.lm <- lm(weight ~ volume+area)
summary(allbacks.lm)
detach(allbacks)
pause()

anova(allbacks.lm)
pause()

model.matrix(allbacks.lm)
pause()

print("Example 6.1.1")
allbacks.lm0 <- lm(weight ~ -1+volume+area, data=allbacks);
summary(allbacks.lm0)
pause()

print("Example 6.1.2")
oldpar <- par(mfrow=c(2,2))
plot(allbacks.lm0)
par(oldpar)
allbacks.lm13 <- lm(weight ~ -1+volume+area, data=allbacks[-13,])
summary(allbacks.lm13)
pause()

print("Example 6.1.3")
round(coef(allbacks.lm0),2)  # Baseline for changes
round(lm.influence(allbacks.lm0)$coef,2)

}
\keyword{datasets}

\eof
\name{anesthetic}
\alias{anesthetic}
\non_function{}
\title{Anesthetic Effectiveness}
\usage{data(anesthetic)}
\description{
Thirty patients were given an anesthetic agent maintained
at a predetermined level (conc) for 15 minutes before making
an incision.  It was then noted whether the patient
moved, i.e. jerked or twisted.  
}
\format{
  This data frame contains the following columns:
  \describe{
    \item{move}{a binary numeric vector coded for 
patient movement (0 = no movement, 1 = movement)}
    \item{conc}{anesthetic concentration}
    \item{logconc}{logarithm of concentration}
    \item{nomove}{the complement of move}
  }
}
\details{
The interest is in estimating
how the probability of jerking or twisting varies with
increasing concentration of the anesthetic agent.
}
\source{
unknown
}
\examples{
print("Logistic Regression - Example 8.1.4")

data(anesthetic)
z <- table(anesthetic$nomove, anesthetic$conc)
tot <- apply(z, 2, sum)         # totals at each concentration
prop <- z[2,  ]/(tot)           # proportions at each concentration
oprop <- sum(z[2,  ])/sum(tot)  # expected proportion moving if concentration had no effect
conc <- as.numeric(dimnames(z)[[2]])
plot(conc, prop, xlab = "Concentration", ylab = "Proportion", xlim = c(.5,2.5),
    ylim = c(0, 1), pch = 16)
chw <- par()$cxy[1]
text(conc - 0.75 * chw, prop, paste(tot), adj = 1)
abline(h = oprop, lty = 2)

pause()

anes.logit <- glm(nomove ~ conc, family = binomial(link = logit),
  data = anesthetic)
anova(anes.logit)
summary(anes.logit)

}
\keyword{datasets}

\eof
\name{austpop}
\alias{austpop}
\non_function{}
\title{Population figures for Australian States and Territories}
\usage{data(austpop)}
\description{
Population figures for Australian states and territories for 1917, 1927,
..., 1997.
}
\format{
  This data frame contains the following columns:
  \describe{
    \item{year}{a numeric vector}
    \item{NSW}{New South Wales population counts }
    \item{Vic}{Victoria population counts}
    \item{Qld}{Queensland population counts}
    \item{SA}{South Australia population counts}
    \item{WA}{Western Australia population counts}
    \item{Tas}{Tasmania population counts}
    \item{NT}{Northern Territory population 
counts}
    \item{ACT}{Australian Capital Territory 
population counts}
    \item{Aust}{Population counts for
the whole country}
  }
}
\source{
Australian Bureau of Statistics
}
\examples{
data(austpop)

pause()
print("Looping - Example 1.7")

growth.rates <- numeric(8)
for (j in seq(2,9)) {
    growth.rates[j-1] <- (austpop[9, j]-austpop[1, j])/austpop[1, j] }
growth.rates <- data.frame(growth.rates)
row.names(growth.rates) <- names(austpop[c(-1,-10)])
  # Note the use of row.names() to name the rows of the data frame
growth.rates

pause()
print("Avoiding Loops - Example 1.7b")

sapply(austpop[,-c(1,10)], function(x){(x[9]-x[1])/x[1]})

pause()
print("Plot - Example 1.8a")
attach(austpop)
plot(year, ACT, type="l") # Join the points ("l" = "line")
detach(austpop)

pause()
print("Exerice 1.12.9")
attach(austpop)
oldpar <- par(mfrow=c(2,4))  
for (i in 2:9){
plot(austpop[,1], log(austpop[, i]), xlab="Year",
    ylab=names(austpop)[i], pch=16, ylim=c(0,10))}
par(oldpar) 
detach(austpop)

}
\keyword{datasets}

\eof
\name{bestset.noise}
\alias{bestset.noise}
\title{Best Subset Selection Applied to Noise}
\description{
Best subset selection applied to completely random noise.  This
function demonstrates how variable selection techniques in 
regression can often err in suggesting that more variables be
included in a regression model than necessary.
}
\usage{
bestset.noise(m=100, n=40)
}
\arguments{
  \item{m}{the number of observations to be simulated. }
  \item{n}{the number of predictor variables in the simulated
model. }
}
\details{
A set of \code{n} predictor variables are simulated as independent
standard normal variates, in addition to a response variable which
is also independent of the predictors.  The best three variable
model relating the response to the predictors is selected using
functions from the leaps package.  (The leaps package
must be installed in order for this function to work.)
}
\value{
\code{bestset.noise} returns a list obtained from the 
\code{\link{summary.lm}} function.
}
\author{J.H. Maindonald}

\seealso{ \code{\link{lm}}}

\examples{
require(leaps)
bestset.noise(20,6) # `best' 3-variable regression for 20 simulated observations 
                    # on 7 unrelated variables (including the response)
}
\keyword{models}

\eof
\name{bomsoi}
\alias{bomsoi}
\non_function{}
\title{Southern Oscillation Index Data}
\usage{data(bomsoi)}
\description{
The Southern Oscillation Index (SOI) is the difference in barometric  
pressure at sea level between Tahiti and Darwin.  Annual SOI and
Australian rainfall data, for the years 1900-2001, are given.
Australia's annual mean rainfall is an area-weighted average of the total 
annual precipitation at approximately 370 rainfall stations 
around the country.

}
\format{
  This data frame contains the following columns:
  \describe{
    \item{Year}{a numeric vector}
\item{Jan}{average January SOI values for each year}
\item{Feb}{average February SOI values for each year}
\item{Mar}{average March SOI values for each year}
\item{Apr}{average April SOI values for each year}
\item{May}{average May SOI values for each year}
\item{Jun}{average June SOI values for each year}
\item{Jul}{average July SOI values for each year}
\item{Aug}{average August SOI values for each year}
\item{Sep}{average September SOI values for each year}
\item{Oct}{average October SOI values for each year}
\item{Nov}{average November SOI values for each year}
\item{Dec}{average December SOI values for each year}
\item{SOI}{a numeric vector consisting of average annual SOI
values}
\item{avrain}{a numeric vector consisting of a weighted average annual
rainfall at a large number of Australian sites}
  } 
} 
  \source{ 
Australian Bureau of Meteorology web pages:

http://www.bom.gov.au/climate/change/rain02.txt and
http://www.bom.gov.au/climate/current/soihtm1.shtml
} 
  \references{
 Nicholls, N., Lavery, B., Frederiksen, C.\ and Drosdowsky, W. 1996.
Recent apparent changes in relationships between the El Nino --
southern oscillation and Australian rainfall and temperature.
Geophysical Research Letters 23: 3357-3360.
} 
  \examples{ 
require(ts)
data(bomsoi) 
plot(ts(bomsoi[, 15:14], start=1900),
     panel=function(y,...)panel.smooth(1900:2001, y,...))

# Check for skewness by comparing the normal probability plots for 
# different a, e.g.
par(mfrow = c(2,3))
for (a in c(50, 100, 150, 200, 250, 300))
qqnorm(log(bomsoi[, "avrain"] - a))
  # a = 250 leads to a nearly linear plot
par(mfrow = c(1,1))

plot(bomsoi$SOI, log(bomsoi$avrain - 250), xlab = "SOI",
     ylab = "log(avrain = 250)")
lines(lowess(bomsoi$SOI)$y, lowess(log(bomsoi$avrain - 250))$y, lwd=2)
  # NB: separate lowess fits against time
lines(lowess(bomsoi$SOI, log(bomsoi$avrain - 250)))

detsoi <- data.frame(
  detSOI = bomsoi[, "SOI"] - lowess(bomsoi[, "SOI"])$y,
  detrain = log(bomsoi$avrain - 250) - lowess(log(bomsoi$avrain - 250))$y)
row.names(detsoi) <- paste(1900:2001)

par(mfrow = c(1,2))  
plot(log(avrain-250) ~ SOI, data = bomsoi, ylab = 
 "log(Average rainfall - 250)")
lines(lowess(bomsoi$SOI, log(bomsoi$avrain-250)))
plot(detrain ~ detSOI, data = detsoi,
  xlab="Detrended SOI", ylab = "Detrended log(Rainfall-250)")
lines(lowess(detsoi$detrain ~ detsoi$detSOI)) 
par(mfrow = c(1,1))

require(nlme)
soi.gls <- gls(detrain ~ detSOI, data = detsoi, correlation = 
corARMA(q=12))
summary(soi.gls)

soi1ML.gls <- update(soi.gls, method = "ML")
soi0ML.gls <- update(soi.gls, detrain ~ 1, method = "ML")
soi2ML.gls <- update(soi.gls, detrain ~ detSOI + detSOI^2, method = "ML")
anova(soi2ML.gls, soi1ML.gls)

# compare with MA(11) and MA(13)
soi11.gls <- update(soi.gls, correlation=corARMA(q=11))
soi13.gls <- update(soi.gls, correlation=corARMA(q=13))
anova(soi11.gls, soi.gls, soi13.gls)

# compare with the white noise model
soi0.gls <- gls(detrain ~ detSOI, data=detsoi)
anova(soi0.gls, soi.gls)

# a Portmanteau test of whiteness for the white noise model residuals

Box.test(resid(soi0.gls), lag=20, type="Ljung-Box")
# check residual properties
acf(resid(soi.gls))                     # (Correlated) residuals
acf(resid(soi.gls, type="normalized"))  # Innovation estimates, uncorrelated
qqnorm(resid(soi.gls, type="normalized"))  ## Examine  normality
# Now extract the moving average parameters, and plot the
# the theoretical autocorrelation function that they imply.
beta <- summary(soi.gls$modelStruct)$corStruct
plot(ARMAacf(ma=beta,lag.max=20), type="h")   
# Next, plot several simulated autocorrelation functions
# We can plot autocorrelation functions as though they were time series!

plot.ts(ts(cbind(
  "Series 1" = acf(arima.sim(list(ma=beta), n=83), plot=FALSE, 
lag.max = 20)$acf,
  "Series 2" = acf(arima.sim(list(ma=beta), n=83), plot=FALSE, 
lag.max = 20)$acf,
  "Series 3" = acf(arima.sim(list(ma=beta), n=83), plot=FALSE, 
lag.max = 20)$acf), start=0), type="h", main = "", xlab = "Lag")
# Show confidence bounds for the MA parameters
intervals(soi.gls)

} 
\keyword{datasets}

\eof
\name{bounce}
\alias{bounce}
\title{Bounce}
\description{
A utility function for oneway.plot
}
\author{J.H. Maindonald}

\keyword{misc}

\eof
\name{carprice}
\alias{carprice}
\non_function{}
\title{US Car Price Data}
\usage{data(carprice)}
\description{
U.S. data extracted from \code{Cars93}, a data frame in the 
MASS package.
}
\format{
  This data frame contains the following columns:
  \describe{
    \item{Type}{Type of car, e.g. Sporty, Van, Compact}
    \item{Min.Price}{Price for a basic model}
    \item{Price}{Price for a mid-range model}
    \item{Max.Price}{Price for a `premium' model}
    \item{Range.Price}{Difference between Max.Price and Min.Price}
    \item{RoughRange}{Rough.Range plus some N(0,.0001) noise}
    \item{gpm100}{The number of gallons required to travel 100 miles}
    \item{MPG.city}{Average number of miles per gallon for city driving}
    \item{MPG.highway}{Average number of miles per gallon for highway 
driving}
  } 
} 
  \source{ 
MASS package
} 
  \references{
 Venables, W.N.\ and Ripley, B.D., 3$^{rd}$ edn 1999. Modern
Applied Statistics with S-Plus. Springer, New York.\\ See also
`R' Complements to Modern Applied Statistics with S-Plus,
available from \\ http://www.stats.ox.ac.uk/pub/MASS3/.
} 
  \examples{ 
print("Multicollinearity - Example 6.8")
data(carprice) 
pairs(carprice[,-c(1,8,9)])

carprice1.lm <- lm(gpm100 ~ Type+Min.Price+Price+Max.Price+Range.Price,
    data=carprice)
round(summary(carprice1.lm)$coef,3)
pause()

alias(carprice1.lm)
pause()

carprice2.lm <- lm(gpm100 ~ Type+Min.Price+Price+Max.Price+RoughRange, data=carprice)
round(summary(carprice2.lm)$coef, 2)
pause()

carprice.lm <- lm(gpm100 ~ Type + Price, data = carprice)
round(summary(carprice.lm)$coef,4)  
pause()

summary(carprice1.lm)$sigma   # residual standard error when fitting all 3 price variables
pause()

summary(carprice.lm)$sigma    # residual standard error when only price is used
pause()

vif(lm(gpm100 ~ Price, data=carprice)) # Baseline Price
pause()

vif(carprice1.lm)    # includes Min.Price, Price & Max.Price
pause()

vif(carprice2.lm)    # includes Min.Price, Price, Max.Price & RoughRange
pause()

vif(carprice.lm)     # Price alone
} 
\keyword{datasets}

\eof
\name{cfseal}
\alias{cfseal}
\non_function{}
\title{Cape Fur Seal Data}
\usage{data(cfseal)}
\description{
The \code{cfseal} data frame has 30 rows and 11 columns consisting
of weight measurements for various organs taken from 30 Cape Fur
Seals that died as an unintended consequence of commercial fishing.
}
\format{
  This data frame contains the following columns:
  \describe{
    \item{age}{a numeric vector}
    \item{weight}{a numeric vector}
    \item{heart}{a numeric vector}
    \item{lung}{a numeric vector}
    \item{liver}{a numeric vector}
    \item{spleen}{a numeric vector}
    \item{stomach}{a numeric vector}
    \item{leftkid}{a numeric vector}
    \item{rightkid}{a numeric vector}
    \item{kidney}{a numeric vector}
    \item{intestines}{a numeric vector}
  }
}
\source{
 Stewardson, C.L., Hemsley, S., Meyer, M.A., Canfield,
P.J. and Maindonald, J.H. 1999.  Gross and microscopic visceral
anatomy of the male Cape fur seal, Arctocephalus pusillus pusillus
(Pinnepedia: Otariidae), with reference to organ size and growth.
Journal of Anatomy (Cambridge) 195: 235-255.  (WWF project ZA-348)
}
\examples{
data(cfseal)
print("Allometric Growth - Example 5.7")

cfseal.lm <- lm(log(heart) ~ log(weight), data=cfseal); summary(cfseal.lm)
plot(log(heart) ~ log(weight), data = cfseal, pch=16, xlab = "Heart Weight (g, log scale)", 
ylab = "Body weight (kg, log scale)", axes=FALSE)
heartaxis <- 100*(2^seq(0,3))
bodyaxis <- c(20,40,60,100,180)
axis(1, at = log(bodyaxis), lab = bodyaxis)
axis(2, at = log(heartaxis), lab = heartaxis)
box()
abline(cfseal.lm)
}
\keyword{datasets}

\eof
\name{cities}
\alias{cities}
\non_function{}
\title{Populations of Major Canadian Cities (1992-96)}
\usage{data(cities)}
\description{
Population estimates for several Canadian cities.
}
\format{
  This data frame contains the following columns:
  \describe{
    \item{CITY}{a factor, consisting of the city names}
    \item{REGION}{a factor with 5 levels (ATL=Atlantic, ON=Ontario,
QC=Quebec, PR=Prairies, WEST=Alberta and British Columbia) representing the location
of the cities}
    \item{POP1992}{a numeric vector giving population in 1000's for 1992}
    \item{POP1993}{a numeric vector giving population in 1000's for 1993}
    \item{POP1994}{a numeric vector giving population in 1000's for 1994}
    \item{POP1995}{a numeric vector giving population in 1000's for 1995}
    \item{POP1996}{a numeric vector giving population in 1000's for 1996}
  }
}
\source{
Statistics Canada
}
\examples{
data(cities)
cities$have <- factor((cities$REGION=="ON")|(cities$REGION=="WEST"))
plot(POP1996~POP1992, data=cities, col=as.integer(cities$have))
}
\keyword{datasets}

\eof
\name{component.residual}
\alias{component.residual}
\title{Component + Residual Plot}
\description{
Component + Residual plot for a term in a \code{lm} model.
}
\usage{
component.residual(lm.obj = mice12.lm, which = 1, xlab = "Component",
    ylab = "C+R")
}
\arguments{
  \item{lm.obj}{ A \code{lm} object }
  \item{which}{numeric code for the term in the \code{lm} formula to be 
plotted}
  \item{xlab}{label for the x-axis}
  \item{ylab}{label for the y-axis}
}
\value{
A scatterplot with a smooth curve overlaid.
}
\author{J.H. Maindonald}
\seealso{\code{\link{lm}}}

\examples{
data(litters)
mice12.lm <- lm(brainwt ~ bodywt + lsize, data=litters)
oldpar <- par(mfrow = c(1,2))
component.residual(mice12.lm, 1, xlab = "Body weight", ylab= "t(Body weight) + e")
component.residual(mice12.lm, 2, xlab = "Litter size", ylab= "t(Litter size) + e")
par(oldpar)
}
\keyword{models}

\eof
\name{cuckoos}
\alias{cuckoos}
\non_function{}
\title{Cuckoo Eggs Data}
\usage{data(cuckoos)}
\description{
Length and breadth measurements of 120 eggs lain in the nests of six 
different species of host bird.
}
\format{
  This data frame contains the following columns:
  \describe{
    \item{length}{the egg lengths in tenths of millimeters}
    \item{breadth}{the egg breadths in tenths of millimeters}
    \item{species}{a factor with levels
   \code{hedge.sparrow}, 
   \code{meadow.pipit}, 
   \code{pied.wagtail}, 
   \code{robin}, 
   \code{tree.pipit}, 
   \code{wren} 
  } 
   \item{id}{a numeric vector}
} 
}
  \source{ Latter, O.H. (1902). The eggs of Cuculus canorus. An
Inquiry into the dimensions of the cuckoo's egg and the relation of the
variations to the size of the eggs of the foster-parent, with notes on
coloration, &c. Biometrika i, 164.  
} 
  \references{
 Tippett, L.H.C. 1931: "The Methods of Statistics". Williams & Norgate, 
London.
} 
  \examples{ data(cuckoos) 
print("Strip and Boxplots - Example 2.1.2")

attach(cuckoos)
oldpar <- par(las = 2) # labels at right angle to axis.
stripchart(length ~ species) 
boxplot(split(cuckoos$length, cuckoos$species),
         xlab="Length of egg", horizontal=TRUE)
detach(cuckoos)
par(oldpar)
pause()

print("Summaries - Example 2.2.2")
sapply(split(cuckoos$length, cuckoos$species), sd)
pause()

print("Example 4.1.4")
wren <- split(cuckoos$length, cuckoos$species)$wren
median(wren)
n <- length(wren)
sqrt(pi/2)*sd(wren)/sqrt(n)  # this s.e. computation assumes normality
} 
\keyword{datasets}

\eof
\name{cv.binary}
\alias{cv.binary}
\title{Cross-Validation for Regression with a Binary Response}
\description{
This function gives internal and cross-validation measures of predictive
accuracy for regression with a binary response.  The data are 
randomly assigned to a number of `folds'.  
Each fold is removed, in turn, while the remaining data is used
to re-fit the regression model and to predict at the deleted observations.
}
\usage{
cv.binary(obj=frogs.glm, rand=NULL, nfolds=10, print.details=TRUE)
}
%- maybe also `usage' for other objects documented here.
\arguments{
  \item{obj}{ a \code{glm} object}
  \item{rand}{ a vector which assigns each observation to a fold }
  \item{nfolds}{ the number of folds}
  \item{print.details}{ logical variable (TRUE = print detailed output, 
the default) }
}
\value{
  \item{the order in which folds were deleted}
  \item{internal estimate of accuracy}
  \item{cross-validation estimate of accuracy}
}
\author{J.H. Maindonald}

\seealso{ \code{glm} }

\examples{
data(frogs)
frogs.glm <- glm(pres.abs ~ log(distance) + log(NoOfPools), 
   family=binomial,data=frogs)
cv.binary(frogs.glm)
}
\keyword{models}

\eof
\name{cv.lm}
\alias{cv.lm}
\title{Cross-Validation for Linear Regression}
\description{
This function gives internal and cross-validation measures of predictive
accuracy for ordinary linear regression.  The data are 
randomly assigned to a number of `folds'.  
Each fold is removed, in turn, while the remaining data is used
to re-fit the regression model and to predict at the deleted observations.
}
\usage{
cv.lm(df = houseprices, form.lm = formula(sale.price ~ area), m=3, dots = 
FALSE, seed=29, plotit=TRUE, printit=TRUE)
}
\arguments{
  \item{df}{a data frame}
  \item{form.lm}{a formula object}
  \item{m}{the number of folds}
  \item{dots}{uses pch=16 for the plotting character}
  \item{seed}{random number generator seed}
  \item{plotit}{if TRUE, a plot is constructed on the active device}
  \item{printit}{if TRUE, output is printed to the screen}
}
\value{
For each fold, a table listing

\item{ } the explanatory variable values
\item{ } the predicted values
\item{ } the observed values
\item{ } the residuals

ms = the overall mean square of prediction error 

}
\author{J.H. Maindonald}

\seealso{ \code{lm} }

\examples{
data(houseprices)
cv.lm()
}
\keyword{models}

\eof
\name{dewpoint}
\alias{dewpoint}
\non_function{}
\title{Dewpoint Data}
\usage{data(dewpoint)}
\description{
The \code{dewpoint} data frame has 72 rows and 3 columns.
Monthly data were obtained for a number of sites (in Australia)
and a number of months. 
}
\format{
  This data frame contains the following columns:
  \describe{
    \item{maxtemp}{monthly minimum temperatures}
    \item{mintemp}{monthly maximum temperatures}
    \item{dewpoint}{monthly average dewpoint for each combination of
minimum and maximum temperature readings}
  }
}
\source{
Dr Edward Linacre,
visiting fellow in the Australian National University Department
of Geography. 
}
\examples{
print("Additive Model - Example 7.5")
data(dewpoint)
require(splines)
attach(dewpoint)   
ds.lm <- lm(dewpoint ~ bs(maxtemp,5) + bs(mintemp,5), data=dewpoint)
ds.fit <-predict(ds.lm, type="terms", se=TRUE)
oldpar <- par(mfrow=c(1,2))
plot(maxtemp, ds.fit$fit[,1], xlab="Maximum temperature",
     ylab="Change from dewpoint mean",type="n")
lines(maxtemp,ds.fit$fit[,1])
lines(maxtemp,ds.fit$fit[,1]-2*ds.fit$se[,1],lty=2)
lines(maxtemp,ds.fit$fit[,1]+2*ds.fit$se[,1],lty=2)
plot(mintemp,ds.fit$fit[,2],xlab="Minimum temperature",
     ylab="Change from dewpoint mean",type="n")
ord<-order(mintemp)
lines(mintemp[ord],ds.fit$fit[ord,2])
lines(mintemp[ord],ds.fit$fit[ord,2]-2*ds.fit$se[ord,2],lty=2)
lines(mintemp[ord],ds.fit$fit[ord,2]+2*ds.fit$se[ord,2],lty=2)
detach(dewpoint)
par(oldpar)

}
\keyword{datasets}

\eof
\name{elastic1}
\alias{elastic1}
\non_function{}
\title{Elastic Band Data Replicated}
\usage{data(elastic1)}
\description{
The \code{elastic1} data frame has 7 rows and 2 columns
giving, for each amount by which an elastic band is stretched
over the end of a ruler, the distance that the band traveled when
released.
}
\format{
  This data frame contains the following columns:
  \describe{
    \item{stretch}{the amount by which the
elastic band was stretched}
    \item{distance}{the distance traveled}
  }
}
\source{
 J. H. Maindonald
}
\examples{
data(elastic1)
plot(elastic1)

print("Inline Functions - Example 12.2.2")
sapply(elastic1, mean)
pause()

sapply(elastic1, function(x)mean(x))
pause()

sapply(elastic1, function(x)sum(log(x)))
pause()

print("Data Output - Example 12.3.2")
write.table(elastic1, file="bandsframe.txt")

}
\keyword{datasets}

\eof
\name{elastic2}
\alias{elastic2}
\non_function{}
\title{Elastic Band Data Replicated Again}
\usage{data(elastic2)}
\description{
The \code{elastic2} data frame has 9 rows and 2 columns
giving, for each amount by which an elastic band is stretched
over the end of a ruler, the distance that the band traveled when
released.
}
\format{
  This data frame contains the following columns:
  \describe{
    \item{stretch}{the amount by which the
elastic band was stretched}
    \item{distance}{the distance traveled}
  }
}
\source{
 J. H. Maindonald
}
\examples{
data(elastic2)
plot(elastic2)
pause()

print("Chapter 5 Exercise")
data(elastic1)

yrange <- range(c(elastic1$distance, elastic2$distance))
xrange <- range(c(elastic1$stretch, elastic2$stretch))
plot(distance ~ stretch, data = elastic1, pch = 16, ylim = yrange, xlim = 
xrange)
points(distance ~ stretch, data = elastic2, pch = 15, col = 2)
legend(xrange[1], yrange[2], legend = c("Data set 1", "Data set 2"), pch = 
c(16, 15), col = c(1, 2))

elastic1.lm <- lm(distance ~ stretch, data = elastic1)
elastic2.lm <- lm(distance ~ stretch, data = elastic2)
abline(elastic1.lm)
abline(elastic2.lm, col = 2)
summary(elastic1.lm)
summary(elastic2.lm)
pause()

predict(elastic1.lm, se.fit=TRUE)
predict(elastic2.lm, se.fit=TRUE)
}
\keyword{datasets}

\eof
\name{elasticband}
\alias{elasticband}
\non_function{}
\title{Elastic Band Data}
\usage{data(elasticband)}
\description{
The \code{elasticband} data frame has 7 rows and 2 columns
giving, for each amount by which an elastic band is stretched
over the end of a ruler, the distance that the band traveled when
released.
}
\format{
  This data frame contains the following columns:
  \describe{
    \item{stretch}{the amount by which the
elastic band was stretched}
    \item{distance}{the distance traveled}
  }
}
\source{
 J. H. Maindonald
}
\examples{
data(elasticband)

print("Example 1.8.1")

attach(elasticband)     # R now knows where to find stretch and distance
plot(stretch, distance) # Alternative: plot(distance ~ stretch)
detach(elasticband)
pause()

print("Output of Data Frames - Example 12.3.2")

write(t(elasticband),file="bands.txt",ncol=2)

sink("bands2.txt")
elasticband   # NB: No output on screen
sink()

print("Lists - Example 12.7")

elastic.lm <- lm(distance ~ stretch, data=elasticband)
 names(elastic.lm)
 elastic.lm$coefficients
elastic.lm[["coefficients"]]
pause()

elastic.lm[[1]]
pause()

elastic.lm[1]
pause()

options(digits=3)
elastic.lm$residuals 
pause()

elastic.lm$call
pause()

 mode(elastic.lm$call)

}
\keyword{datasets}

\eof
\name{frogs}
\alias{frogs}
\non_function{}
\title{Frogs Data}
\usage{data(frogs)}
\description{
The \code{frogs} data frame has 212 rows and 11 columns.
The data are on the distribution of the Southern Corroboree
frog, which occurs in the Snowy Mountains area of New South Wales,
Australia.
}
\format{
  This data frame contains the following columns:
  \describe{
    \item{pres.abs}{0 = frogs were absent, 1 = frogs were present}
    \item{northing}{reference point}
    \item{easting}{reference point}
    \item{altitude}{altitude , in meters}
    \item{distance}{distance in meters to nearest extant population}
    \item{NoOfPools}{number of potential breeding pools}
    \item{NoOfSites}{(number of potential breeding sites within a 2 km 
radius}
    \item{avrain}{mean rainfall for Spring period}
    \item{meanmin}{mean minimum Spring temperature}
    \item{meanmax}{mean maximum Spring temperature}
  }
}
\source{
 Hunter, D. (2000) The conservation and demography of 
the southern corroboree frog (Pseudophryne corroboree). M.Sc. thesis,
University of Canberra, Canberra.
}
\examples{
data(frogs)

print("Multiple Logistic Regression - Example 8.2")

plot(northing ~ easting, data=frogs, pch=c(1,16)[frogs$pres.abs+1],
  xlab="Meters east of reference point", ylab="Meters north")

pause()

pairs(frogs[,4:10], oma=c(2,2,2,2), cex=0.5)

pause()

oldpar <- par(mfrow=c(1,3))
for(nam in c("distance","NoOfPools","NoOfSites")){
  y <- frogs[,nam]
  plot(density(y),main="",xlab=nam)
par(oldpar)
}

pause()

attach(frogs)
pairs(cbind(altitude,log(distance),log(NoOfPools),NoOfSites),
  panel=panel.smooth, labels=c("altitude","log(distance)",
  "log(NoOfPools)","NoOfSites"))
detach(frogs)

frogs.glm0 <- glm(formula = pres.abs ~ altitude + log(distance) +
  log(NoOfPools) + NoOfSites + avrain + meanmin + meanmax,
  family = binomial, data = frogs)
summary(frogs.glm0)
pause()

frogs.glm <- glm(formula = pres.abs ~ log(distance) + log(NoOfPools) + 
meanmin +
  meanmax, family = binomial, data = frogs)
oldpar <- par(mfrow=c(2,2))
termplot(frogs.glm, data=frogs)
par(oldpar)
pause()

termplot(frogs.glm, data=frogs, partial.resid=TRUE)

cv.binary(frogs.glm0)   # All explanatory variables
pause()

cv.binary(frogs.glm)    # Reduced set of explanatory variables

pause()

for (j in 1:4){
 rand <- sample(1:10, 212, replace=TRUE)
 all.acc <- cv.binary(frogs.glm0, rand=rand, print.details=FALSE)$acc.cv
 reduced.acc <- cv.binary(frogs.glm, rand=rand, print.details=FALSE)$acc.cv
 cat("\nAll:", round(all.acc,3), "  Reduced:", round(reduced.acc,3))
}

}
\keyword{datasets}

\eof
\name{fruitohms}
\alias{fruitohms}
\non_function{}
\title{Electrical Resistance of Kiwi Fruit}
\usage{data(fruitohms)}
\description{
Data are from a study that examined how the electrical
resistance of a slab of kiwifruit changed with the apparent
juice content.
}
\format{
  This data frame contains the following columns:
  \describe{
    \item{juice}{apparent juice content (percent) }
    \item{ohms}{electrical resistance (in ohms)}
  }
}
\source{
Harker, F. R. and Maindonald J.H. 1994. Ripening of nectarine
fruit. Plant Physiology 106: 165 - 171.
}
\examples{
data(fruitohms)
plot(ohms ~ juice, xlab="Apparent juice content (\%)",ylab="Resistance (ohms)", data=fruitohms)
lines(lowess(fruitohms$juice, fruitohms$ohms), lwd=2)
pause()

require(splines)
attach(fruitohms)
plot(ohms ~ juice, cex=0.8, xlab="Apparent juice content (\%)",
     ylab="Resistance (ohms)", type="n")
fruit.lmb4 <- lm(ohms ~ bs(juice,4))
ord <- order(juice)
lines(juice[ord], fitted(fruit.lmb4)[ord], lwd=2)
ci <- predict(fruit.lmb4, interval="confidence")
lines(juice[ord], ci[ord,"lwr"])
lines(juice[ord], ci[ord,"upr"])
}
\keyword{datasets}

\eof
\name{geophones}
\alias{geophones}
\non_function{}
\title{Seismic Timing Data}
\usage{data(geophones)}
\description{
The \code{geophones} data frame has 56 rows and 2 columns.
Thickness of a layer of Alberta substratum as measured by
a line of geophones.
}
\format{
  This data frame contains the following columns:
  \describe{
    \item{distance}{location of geophone.}
    \item{thickness}{time for signal to pass through substratum.}
  }
}
\examples{
data(geophones) 
plot(geophones)
lines(lowess(geophones, f=.25))
}
\keyword{datasets}

\eof
\name{head.injury}
\alias{head.injury}
\non_function{}
\title{Minor Head Injury (Simulated) Data}
\usage{data(head.injury)}
\description{
The \code{head.injury} data frame has 3121 rows and 11 columns.
The data were simulated according to a simple logistic
regression model to match roughly the clinical characteristics
of a sample of individuals who suffered minor head injuries.
}
\format{
  This data frame contains the following columns:
  \describe{
    \item{age.65}{age factor (0 = under 65, 1 = over 65).}
    \item{amnesia.before}{amnesia before impact (less than 30 minutes = 0, 
more 
than 30 minutes =1).}
    \item{basal.skull.fracture}{(0 = no fracture, 1 = fracture).}
    \item{GCS.decrease}{Glasgow Coma Scale decrease (0 = no 
deterioration, 1 = deterioration).}
    \item{GCS.13}{initial Glasgow Coma Scale (0 = not `13', 1 = `13'). }
    \item{GCS.15.2hours}{Glasgow Coma Scale after 2 hours
(0 = not `15', 1 = '15').}
    \item{high.risk}{assessed by clinician as high risk for neurological
intervention (0 = not high risk, 1 = high risk).}
    \item{loss.of.consciousness}{(0 = conscious, 1 = loss of 
consciousness).}
    \item{open.skull.fracture}{(0 = no fracture, 1 = fracture) }
    \item{vomiting}{(0 = no vomiting, 1 = vomiting)}
    \item{clinically.important.brain.injury}{any acute brain finding
revealed on CT (0 = not present, 1 = present).}
  }
}
\references{
Stiell, I.G., Wells, G.A., Vandemheen, K., Clement, C., Lesiuk, H.,
Laupacis, A., McKnight, R.D., Verbee, R., Brison, R., Cass, D., 
Eisenhauer, M., Greenberg, G.H., and Worthington, J. (2001) 
The Canadian CT Head Rule for Patients with Minor Head Injury,
The Lancet. 357: 1391-1396. 
}
\examples{
data(head.injury)
## maybe str(head.injury) ; plot(head.injury) ...
}
\keyword{datasets}

\eof
\name{hills}
\alias{hills}
\non_function{}
\title{Scottish Hill Races Data}
\usage{data(hills)}
\description{
The record times in 1984 for 35 Scottish hill races.
}
\format{
  This data frame contains the following columns:
  \describe{
    \item{dist}{distance, in miles (on the map)}
    \item{climb}{total height gained during the route, in feet}
    \item{time}{record time in hours}
  }
}
\source{
 A.C. Atkinson (1986) Comment: Aspects of diagnostic regression
     analysis. Statistical Science  1, 397-402.

     Also, in MASS library, with time in minutes.
}
\references{
     A.C. Atkinson (1988) Transformations unmasked. Technometrics 30,
     311-318. [ "corrects" the time for Knock Hill from 78.65 to 18.65. It   
     is unclear if this based on the original records.]

}
\examples{
print("Transformation - Example 6.4.3")
data(hills)
pairs(hills, labels=c("dist\n\n(miles)", "climb\n\n(feet)", 
"time\n\n(hours)"))
pause()

pairs(log(hills), labels=c("dist\n\n(log(miles))", "climb\n\n(log(feet))",
  "time\n\n(log(hours))"))
pause()

hills0.loglm <- lm(log(time) ~ log(dist) + log(climb), data = hills)  
oldpar <- par(mfrow=c(2,2))
plot(hills0.loglm)
pause()


hills.loglm <- lm(log(time) ~ log(dist) + log(climb), data = hills[-18,])
summary(hills.loglm) 
plot(hills.loglm)
pause()

hills2.loglm <- lm(log(time) ~ log(dist)+log(climb)+log(dist):log(climb), 
data=hills[-18,])
anova(hills.loglm, hills2.loglm)
pause()

step(hills2.loglm)
pause()

summary(hills.loglm, corr=TRUE)$coef
pause()

summary(hills2.loglm, corr=TRUE)$coef
par(oldpar)
pause()

print("Nonlinear - Example 6.9.4")
require(nls)
hills.nls0 <- nls(time ~ (dist^alpha)*(climb^beta), start =
   c(alpha = .909, beta = .260), data = hills[-18,])
summary(hills.nls0)
plot(residuals(hills.nls0) ~ predict(hills.nls0)) # residual plot
pause()

hills$climb.mi <- hills$climb/5280
hills.nls <- nls(time ~ alpha + beta*dist + gamma*(climb.mi^delta),
  start=c(alpha = 1, beta = 1, gamma = 1, delta = 1), data=hills[-18,])
summary(hills.nls)
plot(residuals(hills.nls) ~ predict(hills.nls)) # residual plot


}
\keyword{datasets}

\eof
\name{hills2000}
\alias{hills2000}
\non_function{}
\title{Scottish Hill Races Data - 2000}
\usage{data(hills2000)}
\description{
The record times in 2000 for 77 Scottish hill races.
}
\format{
  This data frame contains the following columns:
  \describe{
    \item{h}{male record time in hours}
    \item{m}{plus minutes}
    \item{s}{plus seconds}
    \item{h0}{female record time in hours}
    \item{m0}{plus minutes}
    \item{s0}{plus seconds}
    \item{dist}{distance, in miles (on the map)}
    \item{climb}{total height gained during the route, in feet}
    \item{time}{record time in hours}
    \item{timef}{record time in hours for females}
    \item{type}{a factor, with levels indicating type of race, 
i.e. hill, marathon, relay, uphill or other}
  }
}
\source{
The Scottish Running Resource, http://www.hillrunning.co.uk
}
\examples{
data(hills2000)
}
\keyword{datasets}

\eof
\name{houseprices}
\alias{houseprices}
\non_function{}
\title{Aranda House Prices}
\usage{data(houseprices)}
\description{
The \code{houseprices} data frame consists of the floor
area, price, and the number
of bedrooms for a sample of houses sold in Aranda in 1999. 
Aranda is a suburb of Canberra, Australia.
}
\format{
  This data frame contains the following columns:
  \describe{
    \item{area}{a numeric vector giving the floor area}
    \item{bedrooms}{a numeric vector giving the number of bedrooms}
    \item{sale.price}{a numeric vector giving the sale price
in thousands of Australian dollars}
  }
}
\source{
J.H. Maindonald
}
\examples{
data(houseprices)
plot(sale.price~area, data=houseprices)
pause()

coplot(sale.price~area|bedrooms, data=houseprices)
pause()

print("Cross-Validation - Example 5.5.2")

houseprices.lm <- lm(sale.price ~ area, data=houseprices)
summary(houseprices.lm)$sigma^2
pause()

cv.lm()
pause()

print("Bootstrapping - Example 5.5.3")
houseprices.fn <- function (houseprices, index){
house.resample <- houseprices[index,]
house.lm <- lm(sale.price ~ area, data=house.resample)
coef(house.lm)[2]    # slope estimate for resampled data
}
require(boot)       # ensure that the boot package is loaded
houseprices.boot <- boot(houseprices, R=999, statistic=houseprices.fn)

houseprices1.fn <- function (houseprices, index){
house.resample <- houseprices[index,]
house.lm <- lm(sale.price ~ area, data=house.resample)
predict(house.lm, newdata=data.frame(area=1200))
}

houseprices1.boot <- boot(houseprices, R=999, statistic=houseprices1.fn)
boot.ci(houseprices1.boot, type="perc") # "basic" is an alternative to "perc"
houseprices2.fn <- function (houseprices, index){
house.resample <- houseprices[index,]
house.lm <- lm(sale.price ~ area, data=house.resample)
houseprices$sale.price-predict(house.lm, houseprices)  # resampled prediction errors
}

n <- length(houseprices$area)
R <- 200   
houseprices2.boot <- boot(houseprices, R=R, statistic=houseprices2.fn)
house.fac <- factor(rep(1:n, rep(R, n)))
plot(house.fac, as.vector(houseprices2.boot$t), ylab="Prediction Errors", 
xlab="House")
pause()

plot(apply(houseprices2.boot$t,2, sd)/predict.lm(houseprices.lm, se.fit=TRUE)$se.fit,
     ylab="Ratio of Bootstrap SE's to Model-Based SE's", xlab="House", pch=16)
abline(1,0)

}
\keyword{datasets}

\eof
\name{ironslag}
\alias{ironslag}
\non_function{}
\title{Iron Content Measurements}
\usage{data(ironslag)}
\description{
The \code{ironslag} data frame has 53 rows and 2 columns.
Two methods for measuring the iron content in samples of slag 
were compared, a chemical and a magnetic method.  The chemical
method requires greater effort than the magnetic method.
}
\format{
  This data frame contains the following columns:
  \describe{
    \item{chemical}{a numeric vector containing the measurements
coming from the chemical method}
    \item{magnetic}{a numeric vector containing the measurments
coming from the magnetic method}
  }
}
\source{
Hand, D.J., Daly, F., McConway, K., Lunn, D., and Ostrowski, E. eds (1993)
A Handbook of Small Data Sets. London: Chapman & Hall. 
}
\examples{
data(ironslag)
iron.lm <- lm(chemical ~ magnetic, data = ironslag)
oldpar <- par(mfrow = c(2,2))
plot(iron.lm)
par(oldpar)
}
\keyword{datasets}

\eof
\name{jobs}
\alias{jobs}
\non_function{}
\title{Canadian Labour Force Summary Data (1995-96)}
\usage{data(jobs)}
\description{
The number of workers in the Canadian
labour force broken down by region (BC, Alberta, Prairies,
Ontario, Quebec, Atlantic) for the 24-month period from January,
1995 to December, 1996 (a time when Canada was emerging from a
deep economic recession).
}
\format{
  This data frame contains the following columns:
  \describe{
    \item{BC}{monthly labour force counts in British Columbia}
    \item{Alberta}{monthly labour force counts in Alberta}
    \item{Prairies}{monthly labour force counts in Saskatchewan and 
Manitoba}
    \item{Ontario}{monthly labour force counts in Ontario}
    \item{Quebec}{monthly labour force counts in Quebec}
    \item{Atlantic}{monthly labour force counts in Newfoundland, Nova 
Scotia, Prince Edward Island and New Brunswick}
    \item{Date}{year (in decimal form)}
  }
}
\details{
These data have been seasonally adjusted.
}
\source{
Statistics Canada
}
\examples{
print("Multiple Variables and Times - Example 2.1.4")
data(jobs)
sapply(jobs, range)
pause()

matplot(jobs[,7], jobs[,-7], type="l", xlim=c(95,97.1))
 # Notice that we have been able to use a data frame as the second argument to matplot().
 # For more information on matplot(), type help(matplot)
text(rep(jobs[24,7], 6), jobs[24,1:6], names(jobs)[1:6], adj=0)
pause()

sapply(log(jobs[,-7]), range)
apply(sapply(log(jobs[,-7]), range), 2, diff)
pause()

oldpar <- par(mfrow=c(2,3))
range.log <- sapply(log(jobs[,-7], 2), range)
maxdiff <- max(apply(range.log, 2, diff))
range.log[2,] <- range.log[1,] + maxdiff
titles <- c("BC Jobs","Alberta Jobs","Prairie Jobs",
   "Ontario Jobs", "Quebec Jobs", "Atlantic Jobs")
for (i in 1:6){
plot(jobs$Date, log(jobs[,i], 2), type = "l", ylim = range.log[,i],
    xlab = "Time", ylab = "Number of jobs", main = titles[i])
}
par(oldpar)
}
\keyword{datasets}

\eof
\name{kiwishade}
\alias{kiwishade}
\non_function{}
\title{Kiwi Shading Data}
\usage{data(kiwishade)}
\description{
The \code{kiwishade} data frame has 48 rows and 4 columns.
The data are from a designed experiment that  
compared different kiwifruit shading treatments.
There are four vines in each plot, and four plots (one for each of four 
treatments) in each the three blocks. Each 
plot has the same number of vines, each block has the same number of 
plots, with each treatment occurring the same number of times.
}
\format{
  This data frame contains the following columns:
  \describe{
    \item{yield}{Total yield (in kg)}
    \item{plot}{a factor with levels    \code{east.Aug2Dec},
    \code{east.Dec2Feb},    \code{east.Feb2May},
    \code{east.none},    \code{north.Aug2Dec},
    \code{north.Dec2Feb},    \code{north.Feb2May},
    \code{north.none},    \code{west.Aug2Dec},
    \code{west.Dec2Feb},    \code{west.Feb2May},
    \code{west.none} }
    \item{block}{a factor indicating the location of  with levels
\code{east}, \code{north}, \code{west} }
\item{shade}{a factor representing the period for which
the experimenter placed shading over the vines; with levels:
    \code{none} no shading,    \code{Aug2Dec} August - December,
    \code{Dec2Feb} December - February,    \code{Feb2May} February - May}
}
}
\details{
The northernmost plots were grouped together because they
were similarly affected by shading from the sun in the north.
For the remaining two blocks shelter effects, whether from the
west or from the east, were thought more important.
}

\source{
Snelgar, W.P., Manson. P.J., Martin, P.J. 1992. Influence of
time of shading on flowering and yield of kiwifruit vines. Journal of
Horticultural Science 67: 481-487.
}
\references{
 Maindonald J H 1992. Statistical design, analysis and presentation
issues. New Zealand Journal of Agricultural Research 35: 121-141.
}
\examples{
data(kiwishade)
print("Data Summary - Example 2.2.1")
attach(kiwishade)
kiwimeans <- aggregate(yield, by=list(block, shade), mean)
names(kiwimeans) <- c("block","shade","meanyield")

kiwimeans[1:4,]
pause()
 
print("Multilevel Design - Example 9.3")
kiwishade.aov <- aov(yield ~ shade+Error(block/shade),data=kiwishade)
summary(kiwishade.aov)
pause()


sapply(split(yield, shade), mean)

pause()

kiwi.table <- t(sapply(split(yield, plot), as.vector))
kiwi.means <- sapply(split(yield, plot), mean)
kiwi.means.table <- matrix(rep(kiwi.means,4), nrow=12, ncol=4)   
kiwi.summary <- data.frame(kiwi.means, kiwi.table-kiwi.means.table)   
names(kiwi.summary)<- c("Mean", "Vine 1", "Vine 2", "Vine 3", "Vine 4")
kiwi.summary
mean(kiwi.means) # the grand mean (only for balanced design)

require(nlme)
kiwishade.lme <- lme(fixed = yield ~ shade, random = ~ 1 | block/plot, 
data=kiwishade)
res <- residuals(kiwishade.lme)
hat <- fitted(kiwishade.lme) # By default fitted(kiwishade.lme, level=2)
coplot(res ~ hat | kiwishade$block, pch=16, columns=3,
  xlab= "Fitted", ylab="Residuals")
 
res <- residuals(kiwishade.lme)
hat <- fitted(kiwishade.lme, level=0) # shade effects only
unique(hat) # There are just four distinct values, one per treatment
coplot(res ~ hat | kiwishade$block, pch=16, columns=3,
  xlab="Fitted", ylab="Residuals")

n.omit <- 2
take <- rep(TRUE, 48)
take[sample(1:48,2)] <- FALSE
kiwishade.lme <- lme(yield ~ shade, data = kiwishade,
                     random = ~1 | block/plot, subset=take)
VarCorr(kiwishade.lme)[4, 1]  # Plot component of variance
VarCorr(kiwishade.lme)[4, 1]  # Vine component of variance

detach(kiwishade)


}
\keyword{datasets}

\eof
\name{leafshape}
\alias{leafshape}
\non_function{}
\title{Full Leaf Shape Data Set}
\usage{data(leafshape)}
\description{
Leaf length, width and petiole measurements taken at various
sites in Australia.
}
\format{
  This data frame contains the following columns:
  \describe{
    \item{bladelen}{leaf length (in mm)}
    \item{petiole}{a numeric vector}
    \item{bladewid}{leaf width (in mm)}
    \item{latitude}{latitude}
    \item{logwid}{natural logarithm of width}
    \item{logpet}{logarithm of petiole}
    \item{loglen}{logarithm of length}
    \item{arch}{leaf architecture (0 = plagiotropic, 1 = orthotropic}
    \item{location}{a factor with levels}
\code{Sabah}, \code{Panama}, \code{Costa Rica},
\code{N Queensland}, \code{S Queensland}, 
\code{Tasmania} }
  }
}
\source{
  King, D.A. and Maindonald, J.H. 1999. Tree architecture in relation to
leaf dimensions and tree stature in temperate and tropical rain
forests. Journal of Ecology 87: 1012-1024.
}
\examples{
data(leafshape)
}
\keyword{datasets}

\eof
\name{leafshape17}
\alias{leafshape17}
\non_function{}
\title{Subset of Leaf Shape Data Set}
\usage{data(leafshape17)}
\description{
The \code{leafshape17} data frame has 61 rows and 8 columns.
These are leaf length, width and petiole measurements taken
at several sites in Australia.  This is a subset of the 
\code{leafshape} data frame.
}
\format{
  This data frame contains the following columns:
  \describe{
    \item{bladelen}{leaf length (in mm)}
    \item{petiole}{a numeric vector}
    \item{bladewid}{leaf width (in mm)}
    \item{latitude}{latitude}
    \item{logwid}{natural logarithm of width}
    \item{logpet}{logarithm of petiole measurement}
    \item{loglen}{logarithm of length}
    \item{arch}{leaf architecture (0 = orthotropic, 1 = plagiotropic)}
  }
}
\source{
King, D.A. and Maindonald, J.H. 1999. Tree architecture in relation to
leaf dimensions and tree stature in temperate and tropical rain
forests. Journal of Ecology 87: 1012-1024.
}
\examples{
print("Discriminant Analysis - Example 11.2")

data(leafshape17)
require(MASS)
leaf17.lda <- lda(arch ~ logwid+loglen, data=leafshape17)
leaf17.hat <- predict(leaf17.lda)
leaf17.lda
 table(leafshape17$arch, leaf17.hat$class)
pause()

tab <- table(leafshape17$arch, leaf17.hat$class)
 sum(tab[row(tab)==col(tab)])/sum(tab)
leaf17cv.lda <- lda(arch ~ logwid+loglen, data=leafshape17, CV=TRUE)
tab <- table(leafshape17$arch, leaf17cv.lda$class)
pause()

leaf17.glm <- glm(arch ~ logwid + loglen, family=binomial, data=leafshape17)
 options(digits=3)
summary(leaf17.glm)$coef
pause()

leaf17.one <- cv.binary(leaf17.glm)
table(leafshape17$arch, round(leaf17.one$internal))     # Resubstitution
pause()

table(leafshape17$arch, round(leaf17.one$cv))           # Cross-validation
}
\keyword{datasets}

\eof
\name{leaftemp}
\alias{leaftemp}
\non_function{}
\title{Leaf and Air Temperature Data}
\usage{data(leaftemp)}
\description{
These data consist of measurements of vapour pressure and of the 
difference between leaf and air temperature.
}
\format{
  This data frame contains the following columns:
  \describe{
    \item{CO2level}{Carbon Dioxide level
\code{low}, \code{medium}, \code{high} }
    \item{vapPress}{Vapour pressure}
    \item{tempDiff}{Difference between leaf and air temperature}
    \item{BtempDiff}{a numeric vector}
  }
}
\source{
Katharina Siebke and Susan von Cammerer, Australian National University.
}
\examples{
data(leaftemp)
print("Fitting Multiple Lines - Example 7.3")

leaf.lm1 <- lm(tempDiff ~ 1 , data = leaftemp)
leaf.lm2 <- lm(tempDiff ~ vapPress, data = leaftemp)
leaf.lm3 <- lm(tempDiff ~ CO2level + vapPress, data = leaftemp)
leaf.lm4 <- lm(tempDiff ~ CO2level + vapPress + vapPress:CO2level,
  data = leaftemp)

anova(leaf.lm1, leaf.lm2, leaf.lm3, leaf.lm4)

summary(leaf.lm2)
plot(leaf.lm2)

}
\keyword{datasets}

\eof
\name{leaftemp.all}
\alias{leaftemp.all}
\non_function{}
\title{Full Leaf and Air Temperature Data Set}
\usage{data(leaftemp.all)}
\description{
The \code{leaftemp.all} data frame has 62 rows and 9 columns.
}
\format{
  This data frame contains the following columns:
  \describe{
    \item{glasshouse}{a factor with levels
\code{A},
\code{B},
\code{C} }
    \item{CO2level}{a factor with Carbon Dioxide Levels:
\code{high},
\code{low},
\code{medium} }
    \item{day}{a factor}
    \item{light}{a numeric vector}
    \item{CO2}{a numeric vector}
    \item{tempDiff}{Difference between Leaf and Air Temperature}
    \item{BtempDiff}{a numeric vector}
    \item{airTemp}{Air Temperature}
    \item{vapPress}{Vapour Pressure}
  }
}
\source{
J.H. Maindonald
}
\examples{
data(leaftemp.all)
}
\keyword{datasets}

\eof
\name{litters}
\alias{litters}
\non_function{}
\title{Mouse Litters}
\usage{data(litters)}
\description{
Data on the body and brain weights of 20 mice, together
with the size of the litter.  Two mice were taken from each
litter size.

}
\format{
  This data frame contains the following columns:
  \describe{
    \item{lsize}{litter size}
    \item{bodywt}{body weight}
    \item{brainwt}{brain weight}
  }
}
\source{
Wainright P, Pelkman C and Wahlsten D 1989. The quantitative
relationship between nutritional effects on preweaning growth      
and behavioral development in mice. Developmental Psychobiology   
22: 183-193.
}
\examples{
print("Multiple Regression - Example 6.2")

data(litters)
pairs(litters, labels=c("lsize\n\n(litter size)", "bodywt\n\n(Body Weight)",  "brainwt\n\n(Brain Weight)"))
  # pairs(litters) gives a scatterplot matrix with less adequate labeling

mice1.lm <- lm(brainwt ~ lsize, data = litters) # Regress on lsize
mice2.lm <- lm(brainwt ~ bodywt, data = litters) #Regress on bodywt
mice12.lm <- lm(brainwt ~ lsize + bodywt, data = litters) # Regress on lsize & bodywt

summary(mice1.lm)$coef # Similarly for other coefficients.
# results are consistent with the biological concept of brain sparing

pause()

hat(model.matrix(mice12.lm))  # hat diagonal
pause()

plot(lm.influence(mice12.lm)$hat, residuals(mice12.lm))

print("Diagnostics - Example 6.3")

mice12.lm <- lm(brainwt ~ bodywt+lsize, data=litters)
oldpar <-par(mfrow = c(1,2))
bx <- mice12.lm$coef[2]; bz <- mice12.lm$coef[3]
res <- residuals(mice12.lm)
plot(litters$bodywt, bx*litters$bodywt+res, xlab="Body weight",
  ylab="Component + Residual")
panel.smooth(litters$bodywt, bx*litters$bodywt+res) # Overlay
plot(litters$lsize, bz*litters$lsize+res, xlab="Litter size", 
  ylab="Component + Residual")
panel.smooth(litters$lsize, bz*litters$lsize+res)
par(oldpar)
}
\keyword{datasets}

\eof
\name{mifem}
\alias{mifem}
\non_function{}
\title{Mortality Outcomes for Females Suffering Myocardial Infarction}
\usage{data(mifem)}
\description{
The \code{mifem} data frame has 1295 rows and 10 columns.
}
\format{
  This data frame contains the following columns:
  \describe{
    \item{outcome}{mortality outcome, a factor with levels \code{live}, 
\code{dead}} 
    \item{age}{age at onset}
    \item{yronset}{year of onset}
    \item{premi}{previous myocardial infarction event, a 
factor with levels \code{y}, \code{n}, \code{nk} not known }
    \item{smstat}{smoking status, a factor with levels \code{c} 
current, \code{x} ex-smoker, \code{n} non-smoker, \code{nk} not known}
    \item{diabetes}{a factor with levels \code{y}, \code{n}, \code{nk} not 
known}
    \item{highbp}{high blood pressure, a factor with levels
\code{y}, \code{n}, \code{nk} not known }
    \item{hichol}{high cholesterol, a factor with levels
\code{y}, \code{n} \code{nk} not known}
    \item{angina}{a factor with levels \code{y},
\code{n}, \code{nk} not known}
    \item{stroke}{a factor with levels
\code{y}, \code{n}, \code{nk} not known}
  }
}
\source{
 Newcastle (Australia) centre of the Monica
  project; see the web site http://www.ktl.fi/monicaindex.html
}
\examples{
data(mifem)

print("CART - Example 10.7")
summary(mifem)
pause()

require(rpart)
mifem.rpart <- rpart(outcome ~ ., data = mifem, cp = 0.0025)
plotcp(mifem.rpart)
printcp(mifem.rpart)
pause()

mifemb.rpart <- prune(mifem.rpart, cp=0.006)
print(mifemb.rpart)
}
\keyword{datasets}

\eof
\name{mignonette}
\alias{mignonette}
\non_function{}
\title{Darwin's Wild Mignonette Data}
\usage{data(mignonette)}
\description{
Data which compare the heights of crossed plants with
self-fertilized plants.  Plants were paired within the
pots in which they were grown, with one on one side and
one on the other.
}
\format{
  This data frame contains the following columns:
  \describe{
    \item{cross}{heights of 
the crossed plants}
    \item{self}{heights of
the self-fertilized plants}
  }
}
\source{
Darwin, Charles. 1877.  The Effects of Cross and Self
Fertilisation in the Vegetable Kingdom.  Appleton and Company, New
York.
}
\examples{
print("Is Pairing Helpful? - Example 4.3.1")

data(mignonette)
attach(mignonette)
plot(cross ~ self, pch=rep(c(4,1), c(3,12))); abline(0,1) 
abline(mean(cross-self), 1, lty=2)
detach(mignonette)
}
\keyword{datasets}

\eof
\name{milk}
\alias{milk}
\non_function{}
\title{Milk Sweetness Study}
\usage{data(milk)}
\description{
The \code{milk} data frame has 17 rows and 2 columns.
Each of 17 panelists compared two milk samples
for sweetness. 
}
\format{
  This data frame contains the following columns:
  \describe{
    \item{four}{a numeric vector consisting of the assessments for four units
of additive}
    \item{one}{a numeric vector  while the is the assessment for one unit
of additive}
  }
}
\source{
 ??
}
\references{
??
}
\examples{
print("Rug Plot - Example 1.8.1")
data(milk)
xyrange <- range(milk)
plot(four ~ one, data = milk, xlim = xyrange, ylim = xyrange, pch = 16)
rug(milk$one)
rug(milk$four, side = 2)
abline(0, 1)
}
\keyword{datasets}

\eof
\name{modelcars}
\alias{modelcars}
\non_function{}
\title{Model Car Data}
\usage{data(modelcars)}
\description{
The \code{modelcars} data frame has 12 rows and 2 columns.
The data are for an experiment in which a model car was released
three times at each of four different distances up a 20 degree
ramp.  The experimenter recorded distances traveled from the 
bottom of the ramp across a concrete floor.  
}
\format{
  This data frame contains the following columns:
  \describe{
    \item{distance.traveled}{a numeric vector consisting
of the lengths traveled (in cm)}
    \item{starting.point}{a numeric vector consisting
of the distance of the starting point from the top of
the ramp (in cm)}
  }
}
\source{
J.H. Maindonald
}
\examples{
data(modelcars)
plot(modelcars)
modelcars.lm <- lm(distance.traveled ~ starting.point, data=modelcars)
aov(modelcars.lm)
pause()

print("Response Curves - Example 4.6")
attach(modelcars)
stripchart(distance.traveled ~ starting.point, vertical=TRUE, pch=15,  xlab = "Distance up ramp", ylab="Distance traveled")
detach(modelcars)

}
\keyword{datasets}

\eof
\name{monica}
\alias{monica}
\non_function{}
\title{WHO Monica Data}
\usage{data(monica)}
\description{
The \code{monica} data frame has 6357 rows and 12 columns. Note
that \code{mifem} is the female subset of this data frame.
}
\format{
  This data frame contains the following columns:
  \describe{
    \item{outcome}{mortality outcome, a factor with levels \code{live}, 
\code{dead}} 
    \item{age}{age at onset}
    \item{sex}{m = male, f = female}
    \item{hosp}{y = hospitalized, n = not hospitalized}
    \item{yronset}{year of onset}
    \item{premi}{previous myocardial infarction event, a 
factor with levels \code{y}, \code{n}, \code{nk} not known }
    \item{smstat}{smoking status, a factor with levels \code{c} 
current, \code{x} ex-smoker, \code{n} non-smoker, \code{nk} not known}
    \item{diabetes}{a factor with levels \code{y}, \code{n}, \code{nk} not 
known}
    \item{highbp}{high blood pressure, a factor with levels
\code{y}, \code{n}, \code{nk} not known }
    \item{hichol}{high cholesterol, a factor with levels
\code{y}, \code{n} \code{nk} not known}
    \item{angina}{a factor with levels \code{y},
\code{n}, \code{nk} not known}
    \item{stroke}{a factor with levels
\code{y}, \code{n}, \code{nk} not known}
  }
}
\source{
 Newcastle (Australia) centre of the Monica
  project; see the web site http://www.ktl.fi
}
\examples{
data(monica)

print("CART - Example 10.7")
summary(monica)
pause()

require(rpart)
monica.rpart <- rpart(outcome ~ ., data = monica, cp = 0.0025)
plotcp(monica.rpart)
printcp(monica.rpart)
pause()

monicab.rpart <- prune(monica.rpart, cp=0.006)
print(monicab.rpart)
}
\keyword{datasets}

\eof
\name{moths}
\alias{moths}
\non_function{}
\title{Moths Data}
\usage{data(moths)}
\description{
The \code{moths} data frame has 41 rows and 4 columns.
These data are from a study of the effect of habitat on the
densities of two species of moth (A and P).  Transects were
set across the search area.  Within transects, sections
were identified according to habitat type.  
}
\format{
  This data frame contains the following columns:
  \describe{
    \item{meters}{length of transect}
    \item{A}{number of type A moths found}
    \item{P}{number of type P moths found}
    \item{habitat}{a factor with levels
\code{Bank},
\code{Disturbed},
\code{Lowerside},
\code{NEsoak},
\code{NWsoak},
\code{SEsoak},
\code{SWsoak},
\code{Upperside} }
  }
}
\source{
Sharyn Wragg, formerly of Australian National University
}
\examples{
print("Quasi Poisson Regression - Example 8.3")
data(moths)
rbind(table(moths[,4]), sapply(split(moths[,-4], moths$habitat), apply,2, 
sum))
A.glm <- glm(formula = A ~ log(meters) + factor(habitat), family = 
quasipoisson, data = moths)
summary(A.glm)
moths$habitat <- relevel(moths$habitat, ref="Lowerside")
A.glm <- glm(A ~ habitat + log(meters), family=quasipoisson, data=moths)
summary(A.glm)$coef
}
\keyword{datasets}

\eof
\name{nsw74psid1}
\alias{nsw74psid1}
\non_function{}
\title{Labour Training Evaluation Data}
\usage{data(nsw74psid3)}
\description{
This data frame contains 2675 rows and 10 columns.
These data are pertinent to an investigation of the way that   
earnings changed, between 1974-1975 and 1978, in the absence
of training.  Data for the experimental treatment 
group (NSW) were combined with control data results from the 
Panel Study of Income Dynamics (PSID) study.  
}
\format{
  This data frame contains the following columns:
  \describe{
    \item{trt}{a numeric vector 
identifying the study in which the subjects were enrolled
(0 = PSID, 1 = NSW).}
    \item{age}{ age (in years).}
    \item{educ}{ years of education.}
    \item{black}{ (0 = not black, 1 = black).}
    \item{hisp}{ (0 = not hispanic, 1 = hispanic).}
    \item{marr}{ (0 = not married, 1 = married).}
    \item{nodeg}{ (0 = completed high school, 1 = dropout).}
    \item{re74}{ real earnings in 1974.}
    \item{re75}{ real earnings in 1975.}
    \item{re78}{ real earnings in 1978.} 
  }
}
\source{
  http://www.columbia.edu/~rd247/nswdata.html
}
\references{
 Dehejia, R.H. and Wahba, S. 1999. Causal effects in
non-experimental studies: re-evaluating the evaluation of training
programs. Journal of the American Statistical Association 94:
1053-1062.

 Lalonde, R. 1986. Evaluating the economic evaluations of
training programs. American Economic Review 76: 604-620.

}
\examples{
data(nsw74psid1)
print("Interpretation of Regression Coefficients - Example 6.6")

 nsw74psid1.lm <- lm(re78~ trt+ (age + educ + re74 + re75) +
   (black + hisp + marr + nodeg), data = nsw74psid1)
 summary(nsw74psid1.lm)$coef
options(digits=4)
sapply(nsw74psid1[, c(2,3,8,9,10)], quantile, prob=c(.25,.5,.75,.95,1))
attach(nsw74psid1)
sapply(nsw74psid1[trt==1, c(2,3,8,9,10)], quantile, 
prob=c(.25,.5,.75,.95,1))
pause()

here <- age <= 40 & re74<=5000 & re75 <= 5000 & re78 < 30000 
nsw74psidA <- nsw74psid1[here, ]
detach(nsw74psid1)
table(nsw74psidA$trt)
pause()

A1.lm <- lm(re78 ~ trt + (age + educ + re74 + re75) + (black +
      hisp + marr + nodeg), data = nsw74psidA)
summary(A1.lm)$coef
pause()

A2.lm <- lm(re78 ~ trt + (age + educ + re74 + re75) * (black +   
      hisp + marr + nodeg), data = nsw74psidA)
anova(A1.lm, A2.lm)

}
\keyword{datasets}

\eof
\name{nsw74psid3}
\alias{nsw74psid3}
\non_function{}
\title{Labour Training Evaluation Data}
\usage{data(nsw74psid3)}
\description{
These data are pertinent to an investigation of the way that   
earnings changed, between 1974-1975 and 1978, in the absence
of training.  The data frame combines data for the experimental treatment 
group (NSW, 185 observations), using as control data results from the PSID 
(Panel Study of Income Dynamics) study (128 observations).  The latter
were chosen to mimic the characteristics of the NSW training
and control groups.  These are a subset of the \code{nsw74psid1} data.
}
\format{
  This data frame contains the following columns:
  \describe{
    \item{trt}{a numeric vector 
identifying the study in which the subjects were enrolled
(0 = PSID, 1 = NSW)
}
    \item{age}{ age (in years)}
    \item{educ}{ years of education}
    \item{black}{ (0 = not black, 1 = black)}
    \item{hisp}{ (0 = not hispanic, 1 = hispanic)}
    \item{marr}{ (0 = not married, 1 = married)}
    \item{nodeg}{ (0 = completed high school, 1 = dropout)}
    \item{re74}{ real earnings in 1974}
    \item{re75}{ real earnings in 1975}
    \item{re78}{ real earnings in 1978} 
  }
}
\source{
  http://www.columbia.edu/~rd247/nswdata.html
}
\references{
 Dehejia, R.H. and Wahba, S. 1999. Causal effects in
non-experimental studies: re-evaluating the evaluation of training
programs. Journal of the American Statistical Association 94:
1053-1062.

 Lalonde, R. 1986. Evaluating the economic evaluations of
training programs. American Economic Review 76: 604-620.
}
\examples{
print("Contingency Tables - Example 4.4")
data(nsw74psid3)
table(nsw74psid3$trt, nsw74psid3$nodeg)
chisq.test(table(nsw74psid3$trt,nsw74psid3$nodeg))
}
\keyword{datasets}

\eof
\name{nsw74psidA}
\alias{nsw74psidA}
\non_function{}
\title{A Subset of the nsw74psid1 Data Set}
\usage{data(nsw74psidA)}
\description{
The \code{nsw74psidA} data frame has 252 rows and 10 columns.
See \code{nsw74psid1} for more information.
}
\format{
  This data frame contains the following columns:
  \describe{
    \item{trt}{a numeric vector}
    \item{age}{a numeric vector}
    \item{educ}{a numeric vector}
    \item{black}{a numeric vector}
    \item{hisp}{a numeric vector}
    \item{marr}{a numeric vector}
    \item{nodeg}{a numeric vector}
    \item{re74}{a numeric vector}
    \item{re75}{a numeric vector}
    \item{re78}{a numeric vector}
  }
}
\details{
This data set was obtained using:

\code{here <- age <= 40 & re74<=5000 & re75 <= 5000 & re78 < 30000}
\code{nsw74psidA <- nsw74psid1[here, ]}
}
\examples{
data(nsw74psidA)
table(nsw74psidA$trt)


A1.lm <- lm(re78 ~ trt + (age + educ + re74 + re75) + (black +
      hisp + marr + nodeg), data = nsw74psidA)
summary(A1.lm)$coef

discA.glm <- glm(formula = trt ~ age + educ + black + hisp +
  marr + nodeg + re74 + re75, family = binomial, data = nsw74psidA)
A.scores <- predict(discA.glm)

options(digits=4)
overlap <- A.scores > -3.5 & A.scores < 3.8
A.lm <- lm(re78 ~ trt + A.scores, data=nsw74psidA, subset = overlap)
summary(A.lm)$coef

}
\keyword{datasets}

\eof
\name{onesamp}
\alias{onesamp}
%- Also NEED an `\alias' for EACH other topic documented here.
\title{ Paired Sample t-test }
\description{
This function performs a t-test for the mean difference for paired data,
and produces a scatterplot of one column against the other column, showing 
whether there was any benefit to using the paired design.
}
\usage{
onesamp(dset=corn, x="unsprayed", y="sprayed", xlab=NULL, ylab=NULL, dubious=NULL, conv=NULL, dig=2)
}
\arguments{
  \item{dset}{a matrix or dataframe having two columns}
  \item{x}{ name of column to play the role of the `predictor' }
  \item{y}{ name of column to play the role of the `response' }
  \item{xlab}{ horizontal axis label }
  \item{ylab}{ vertical axis label }
  \item{dubious}{ }
  \item{conv}{ }
  \item{dig}{ }
}
\value{

A scatterplot of \code{y} against \code{x} together with estimates
of standard errors and standard errors of the difference 
(\code{y}-\code{x}).

Also produced is a confidence interval and p-value for the test.

}
\author{J.H. Maindonald}

\examples{
data(pair65)
onesamp(dset = pair65, x = "ambient", y = "heated", xlab =
        "Amount of stretch (ambient)", ylab =
        "Amount of stretch (heated)") 
}
\keyword{models}

\eof
\name{onet.permutation}
\alias{onet.permutation}
\title{One Sample Permutation t-test}
\description{
This function computes the p-value for the one sample
t-test using a permutation test.  The permutation
density can  also be plotted.
}
\usage{
onet.permutation(x=pair65$heated - pair65$ambient, nsim=2000, plotit=TRUE)
}
\arguments{
  \item{x}{a numeric vector containing the sample values (centered
at the null hypothesis value) }
  \item{nsim}{the number of permutations (randomly selected)}
  \item{plotit}{if TRUE, the permutation density is plotted }
}
\value{
The p-value for the test of the hypothesis that the mean of \code{x}
differs from 0
}
\references{ 
Good, P. 2000. Permutation Tests. Springer, New York.
}
\author{J.H. Maindonald}
\examples{
data(pair65)
onet.permutation()
}
\keyword{models}

\eof
\name{oneway.plot}
\alias{oneway.plot}
\title{Display of One Way Analysis Results}
\description{
A line plot of means for unstructured comparison.
}
\usage{
oneway.plot(obj = rice.aov, axisht = 6, xlim = NULL, xlab = NULL,
    lsdht = 1.5, hsdht = 0.5, textht = axisht - 2.5, oma = rep(1,
        4), angle = 80, alpha = 0.05)
}
\arguments{
  \item{obj}{One way analysis of variance object (from aov)}
  \item{axisht}{Axis height}
  \item{xlim}{Range on horizontal axis}
  \item{xlab}{Horizontal axis label}
  \item{lsdht}{Height adjustment parameter for LSD comparison plot}
  \item{hsdht}{Height adjustment parameter for Tukey's HSD comparison 
plot}
  \item{textht}{Height of text}
  \item{oma}{Outer margin area}
  \item{angle}{Text angle (in degrees)}
  \item{alpha}{Test size}
}
\value{
A line plot
}
\author{J.H. Maindonald}

\examples{
data(rice)
rice.aov <- aov(ShootDryMass ~ trt, data=rice)
oneway.plot(obj=rice.aov)
}
\keyword{models}

\eof
\name{orings}
\alias{orings}
\non_function{}
\title{Challenger O-rings Data}
\usage{data(orings)}
\description{
Record of the number and type of O-ring failures prior to the
tragic Challenger mission in January, 1986.
}
\format{
  This data frame contains the following columns:
  \describe{
    \item{Temperature}{O-ring temperature for each test firing or
actual launch of the shuttle rocket engine}
    \item{Erosion}{Number of erosion incidents }
    \item{Blowby}{Number of blowby incidents}
    \item{Total}{Total number of incidents}
  }
}
\source{
 Presidential Commission on the Space Shuttle Challenger Accident, 
Vol. 1, 1986: 129-131.  
}
\references{
 Tufte, E. R. 1997. Visual Explanations. Graphics Press, Cheshire,
Connecticut, U.S.A.
}
\examples{
data(orings)
oldpar <- par(mfrow=c(1,2))
plot(Total~Temperature, data = orings[c(1,2,4,11,13,18),]) # the 
               # observations included in the pre-launch charts
plot(Total~Temperature, data = orings)
par(oldpar)
}
\keyword{datasets}

\eof
\name{overlap.density}
\alias{overlap.density}
\title{Overlapping Density Plots}
\description{
Densities for two independent samples are estimated and plotted.  
}
\usage{
overlap.density(x0, x1, ratio=c(0.05, 20), plotit=TRUE)
}
\arguments{
  \item{x0}{control group measurements}
  \item{x1}{treatment group measurements}
  \item{ratio}{the range within which the relative numbers of
           observations from the two groups are
           required to lie.  [The relative numbers at
           any point are estimated from (density1*n1)/(density0*x0)]}
  \item{plotit}{If TRUE, a plot is produced}
}
\author{J.H. Maindonald}
\seealso{ \code{t.test} }

\examples{
data(two65)
attach(two65)
overlap.density(ambient,heated)
t.test(ambient,heated)
}
\keyword{models}

\eof
\name{ozone}
\alias{ozone}
\non_function{}
\title{Ozone Data}
\usage{data(ozone)}
\description{
Monthly provisional mean total ozone (in Dobson units) at Halley Bay 
(approximately corrected to Bass-Paur).
}
\format{
  This data frame contains the following columns:
  \describe{
    \item{Year}{the year}
    \item{Aug}{August mean total ozone}
    \item{Sep}{September mean total ozone}
    \item{Oct}{October mean total ozone}
    \item{Nov}{November mean total ozone}
    \item{Dec}{December mean total ozone}
    \item{Jan}{January mean total ozone}
    \item{Feb}{February mean total ozone}
    \item{Mar}{March mean total ozone}
    \item{Apr}{April mean total ozone}
    \item{Annual}{Yearly mean total ozone}
  }
}
\source{
Shanklin, J. (2001) Ozone at Halley, Rothera and Vernadsky/Faraday.  

http://www.antarctica.ac.uk/met/jds/ozone/data/zoz5699.dat
}
\references{
Christie, M. (2000) The Ozone Layer: a Philosophy of Science Perspective.
Cambridge University Press.
}
\examples{
data(ozone)
}
\keyword{datasets}

\eof
\name{pair65}
\alias{pair65}
\non_function{}
\title{Heated Elastic Bands}
\usage{data(pair65)}
\description{
The \code{pair65} data frame has 9 rows and 2 columns.
Eighteen elastic bands were divided into nine pairs, with bands
of similar stretchiness placed in the same pair. One member of
each pair was placed in hot water (60-65 degrees C) for four
minutes, while the other was left at ambient temperature.  After
a wait of about ten minutes, the amounts of stretch, under a 1.35 kg
weight, were recorded.
}
\format{
  This data frame contains the following columns:
  \describe{
    \item{heated}{a numeric vector giving the stretch lengths for
the heated bands}
    \item{ambient}{a numeric vector giving the stretch lengths for
the unheated bands}
  }
}
\source{
J.H. Maindonald
}
\examples{
data(pair65)
mean(pair65$heated - pair65$ambient)
sd(pair65$heated - pair65$ambient)
}
\keyword{datasets}

\eof
\name{panel.corr}
\alias{panel.corr}
\title{Scatterplot Panel}
\description{
This function produces a bivariate scatterplot with the Pearson 
correlation.  This is for use with the function \code{panelplot}.
}
\usage{
panel.corr(data, ...)
}
\arguments{
  \item{data}{A data frame with columns x and y}
  \item{...}{Additional arguments} 
}
\author{J.H. Maindonald}

\examples{

# correlation between body and brain weights for 20 mice:

data(litters)
weights <- litters[,-1]
names(weights) <-  c("x","y")
weights <- list(weights)
weights[[1]]$xlim <- range(litters[,2])
weights[[1]]$ylim <- range(litters[,3])
panelplot(weights, panel.corr, totrows=1, totcols=1)
}
\keyword{models}

\eof
\name{panelplot}
\alias{panelplot}
\title{Panel Plot}
\description{
Panel plots of various types.
}
\usage{
panelplot(data, panel=points, totrows=3, totcols=2, oma=rep(2.5, 4), par.strip.text=NULL)
}
\arguments{
  \item{data}{ A list consisting of elements, each of which
consists of x, y, xlim and ylim vectors}
  \item{panel}{The panel function to be plotted}
  \item{totrows}{The number of rows in the plot layout}
  \item{totcols}{The number of columns in the plot layout}
  \item{oma}{Outer margin area}
  \item{par.strip.text}{A data frame with column cex}
}
\author{J.H. Maindonald}

\examples{
     x1 <- x2 <- x3 <- (11:30)/5
     y1 <- x1 + rnorm(20)/2
     y2 <- 2 - 0.05 * x1 + 0.1 * ((x1 - 1.75))^4 + 1.25 * rnorm(20)
     r <- round(cor(x1, y2), 3)
     rho <- round(cor(rank(x1), rank(y2)), 3)
     y3 <- (x1 - 3.85)^2 + 0.015 + rnorm(20)/4
     theta <- ((2 * pi) * (1:20))/20
     x4 <- 10 + 4 * cos(theta)
     y4 <- 10 + 4 * sin(theta) + (0.5 * rnorm(20))
     r1 <- cor(x1, y1)
     xy <- data.frame(x = c(rep(x1, 3), x4), y = c(y1, y2, y3, y4), gp = rep(                1:4, rep(20, 4)))
     xy <- split(xy,xy$gp)
     xlimdf<-lapply(list(x1,x2,x3,x4),range)
     ylimdf<-lapply(list(y1,y2,y3,y4),range)
     xy <- lapply(1:4,function(i,u,v,w){u[[i]]$xlim<-v[[i]];
     u[[i]]$ylim<-w[[i]]; u[[i]]},u=xy,v=xlimdf,w=ylimdf)
 panelplot(xy,panel=panel.corr,totrows=2,totcols=2, oma=rep(1,4))
}
\keyword{models}

\eof
\name{pause}
\alias{pause}
\title{
Pause before continuing execution  
}
\description{
If a program produces several plots, isertion of \code{pause()} between
two plots suspends execution until the <Enter> key is pressed, to
allow inspection of the current plot.
}
\usage{
pause()
}
\author{
From the `sm' package of Bowman and Azzalini (1997)
}
\keyword{misc}
% Converted by Sd2Rd version 1.15.

\eof
\name{possum}
\alias{possum}
\non_function{}
\title{Possum Measurements}
\usage{data(possum)}
\description{
The \code{possum} data frame consists of nine morphometric
measurements on each of 104 mountain brushtail possums, trapped
at seven sites from Southern Victoria to central Queensland.
}
\format{
  This data frame contains the following columns:
  \describe{
    \item{case}{observation number}
    \item{site}{one of seven locations where possums were
trapped}
    \item{Pop}{a factor which classifies the sites as
\code{Vic} Victoria,
\code{other} New South Wales or Queensland}
    \item{sex}{a factor with levels
\code{f} female,
\code{m} male }
\item{age}{age}
    \item{hdlngth}{head length}
    \item{skullw}{skull width}
    \item{totlngth}{total length}
    \item{taill}{tail length}
    \item{footlgth}{foot length}
    \item{earconch}{ear conch length}
    \item{eye}{distance from medial canthus to lateral canthus of
right eye}
    \item{chest}{chest girth (in cm)}
    \item{belly}{belly girth (in cm)}
  }
}
\source{
Lindenmayer, D. B., Viggers, K. L., Cunningham, R. B., and
Donnelly, C. F. 1995. Morphological variation among columns of the
mountain brushtail possum, Trichosurus caninus Ogilby
(Phalangeridae: Marsupiala). Australian Journal of Zoology 43:
449-458. 
}
\examples{
data(possum)
boxplot(earconch~sex, data=possum)
pause()

sex <- as.integer(possum$sex)
pairs(possum[, c(9:11)], oma=c(2,4,5,4), pch=c(0,2:7), col=c("red","blue"),
  labels=c("tail\nlength","foot\nlength","ear conch\nlength"))
chh <- par()$cxy[2]; xleg <- 0.05; yleg <- 1.04
oldpar <- par(xpd=TRUE)  
legend(xleg, yleg, c("Cambarville", "Bellbird", "Whian Whian  ",
  "Byrangery", "Conondale  ","Allyn River", "Bulburin"), pch=c(0,2:7),
  x.intersp=1, y.intersp=0.75, cex=0.8, xjust=0, bty="n", ncol=4)
text(x=0.2, y=yleg - 2.25*chh, "female", col="red", cex=0.8, bty="n")
text(x=0.75, y=yleg - 2.25*chh, "male", col="blue", cex=0.8, bty="n")
par(oldpar)
pause()

sapply(possum[,6:14], function(x)max(x,na.rm=TRUE)/min(x,na.rm=TRUE))
pause()

require(mva)           # Load multivariate analysis library
here <- na.omit(possum$footlgth)
possum.prc <- princomp(possum[here, 6:14])
pause()

plot(possum.prc$scores[,1] ~ possum.prc$scores[,2],
  col=c("red","blue")[as.numeric(possum$sex[here])],
  pch=c(0,2:7)[possum$site[here]], xlab = "PC1", ylab = "PC2")
  # NB: We have abbreviated the axis titles
chh <- par()$cxy[2]; xleg <- -15; yleg <- 20.5
oldpar <- par(xpd=TRUE)
legend(xleg, yleg, c("Cambarville", "Bellbird", "Whian Whian  ",
  "Byrangery", "Conondale  ","Allyn River", "Bulburin"), pch=c(0,2:7),
  x.intersp=1, y.intersp=0.75, cex=0.8, xjust=0, bty="n", ncol=4)
text(x=-9, y=yleg - 2.25*chh, "female", col="red", cex=0.8, bty="n")
summary(possum.prc, loadings=TRUE, digits=2)
par(oldpar)
pause()

require(MASS)
here <- !is.na(possum$footlgth)
possum.lda <- lda(site ~ hdlngth+skullw+totlngth+ taill+footlgth+
  earconch+eye+chest+belly, data=possum, subset=here)
options(digits=4)
possum.lda$svd   # Examine the singular values   
plot(possum.lda, dimen=3)
  # Scatterplot matrix - scores on 1st 3 canonical variates (Figure 11.4)
possum.lda
}
\keyword{datasets}

\eof
\name{possumsites}
\alias{possumsites}
\non_function{}
\title{Possum Sites}
\usage{data(possumsites)}
\description{
The \code{possumsites} data frame consists of latitudes, longitudes,
and altitudes for the seven sites from Southern Victoria to central Queensland
where the \code{possum} observations were made.
}
\format{
  This data frame contains the following columns:
  \describe{
    \item{latitude}{a numeric vector}
    \item{longitude}{a numeric vector}
    \item{altitude}{in meters}
  }
}
\source{
Lindenmayer, D. B., Viggers, K. L., Cunningham, R. B., and
Donnelly, C. F. 1995. Morphological variation among columns of the
mountain brushtail possum, Trichosurus caninus Ogilby
(Phalangeridae: Marsupiala). Australian Journal of Zoology 43:
449-458. 
}
\examples{
require(oz)
oz(sections=c(3:5, 11:16))
data(possumsites)
attach(possumsites)
points(latitude, longitude, pch=16, col=2)
chw <- par()$cxy[1]
chh <- par()$cxy[2]
posval <- c(2,4,2,2,4,2,2)
text(latitude+(3-posval)*chw/4, longitude, row.names(possumsites), pos=posval)
}
\keyword{datasets}

\eof
\name{powerplot}
\alias{powerplot}
\title{Plot of Power Functions}
\description{
This function plots powers of a variable on the interval [0,10].
}
\usage{
powerplot(expr="x^2", xlab="x", ylab="y")
}
\arguments{
  \item{expr}{Functional form to be plotted}
  \item{xlab}{x-axis label}
  \item{ylab}{y-axis label}
}
\value{
A plot of the given expression on the interval [0,10]. 
}
\author{J.H. Maindonald}
\notes{Other expressions such as "sin(x)" and "cos(x)", etc.
could also be plotted with this function, but results are
not guaranteed.
}
\examples{
   oldpar <- par(mfrow = c(2, 3), mar = par()$mar - c(
        1, 1, 1.0, 1),  mgp = c(1.5, 0.5, 0),  oma=c(0,1,0,1))
#    on.exit(par(oldpar))
    powerplot(expr="sqrt(x)", xlab="")
    powerplot(expr="x^0.25", xlab="", ylab="")
    powerplot(expr="log(x)", xlab="", ylab="")
    powerplot(expr="x^2")
    powerplot(expr="x^4", ylab="")  
    powerplot(expr="exp(x)", ylab="")
par(oldpar)}
\keyword{models}

\eof
\name{primates}
\alias{primates}
\non_function{}
\title{Primate Body and Brain Weights}
\usage{data(primates)}
\description{
A subset of \code{Animals} data frame from the MASS library.
It contains the average body and brain measurements of five
primates.
}
\format{
  This data frame contains the following columns:
  \describe{
    \item{Bodywt}{a numeric vector consisting of the body weights (in kg) 
of five
    different primates}
    \item{Brainwt}{a numeric vector consisting of the corresponding brain weights (in g) }
  }
}
\source{
P. J. Rousseeuw  and A. M. Leroy (1987) Robust Regression and
     Outlier Detection. Wiley, p. 57.
}
\examples{
data(primates)
attach(primates)
plot(x=Bodywt, y=Brainwt, pch=16,
       xlab="Body weight (kg)", ylab="Brain weight (g)",
       xlim=c(5,300), ylim=c(0,1500))
chw <- par()$cxy[1]
chh <- par()$cxy[2]
text(x=Bodywt+chw, y=Brainwt+c(-.1,0,0,.1,0)*chh,
       labels=row.names(primates), adj=0)
detach(primates)
}
\keyword{datasets}

\eof
\name{qreference}
\alias{qreference}
\title{Normal QQ Reference Plot}
\description{
This function computes the normal QQ plot for given data and
allows for comparison with normal QQ plots of simulated data.
}
\usage{
qreference(test=NULL, mu = 10, sigma = 1, m = 50, nrep = 5,
             seed=NULL, nrows=NULL, cex.points=0.65, cex.strip=0.75)
}
\arguments{
  \item{test}{a vector containing a sample to be tested}
  \item{mu}{the mean of the simulated reference samples}
  \item{sigma}{the standard deviation of the reference samples}
  \item{m}{the sample size for the reference samples; default is
test sample size if test sample is supplied}
  \item{nrep}{the total number of samples, including reference
samples and test sample if any}
  \item{seed}{the random number generator seed}
  \item{nrows}{number of rows in the plot layout}
  \item{cex.points}{character expansion factor for plotted points}
  \item{cex.strip}{character expansion factor for labels}
}
\value{
QQ plots of the sample (if non-null) and all reference samples
}
\author{J.H. Maindonald}

\examples{
qreference(rt(180,1))

qreference(rexp(180), nrep = 4)

data(toycars)
toycars.lm <- lm(distance ~ angle + factor(car), data = toycars)
qreference(residuals(toycars.lm), nrep = 9)
}
\keyword{models}

\eof
\name{rainforest}
\alias{rainforest}
\non_function{}
\title{Rainforest Data}
\usage{data(rainforest)}
\description{
The \code{rainforest} data frame has 65 rows and 7 columns.
}
\format{
  This data frame contains the following columns:
  \describe{
    \item{dbh}{a numeric vector}
    \item{wood}{a numeric vector}
    \item{bark}{a numeric vector}
    \item{root}{a numeric vector}
    \item{rootsk}{a numeric vector}
    \item{branch}{a numeric vector}
    \item{species}{a factor with levels
\code{Acacia mabellae},
\code{C. fraseri},
\code{Acmena smithii},
\code{B. myrtifolia} }
  }
}
\source{
J. Ash, Australian National University
}
\examples{
data(rainforest)
table(rainforest$species)
}
\keyword{datasets}

\eof
\name{rareplants}
\alias{rareplants}
\non_function{}
\title{Rare and Endangered Plant Species}
\usage{data(rareplants)}
\description{
These data were taken from species lists for South Australia, Victoria and 
Tasmania.  Species were classified as CC, CR, RC and RR, with C denoting
common and R denoting rare.  The first code relates to South Australia 
and Victoria, and the second to Tasmania.  They were further classified
by habitat according to the Victorian register, where D = dry only, 
W = wet only, and WD = wet or dry.
}
\format{
  The format is:
 chr "rareplants"
}
\source{
Jasmyn Lynch, Department of Botany and Zoology at Australian National 
University
}
\examples{
data(rareplants)
chisq.test(rareplants)
}
\keyword{datasets}
\keyword{datasets}

\eof
\name{rice}
\alias{rice}
\non_function{}
\title{Genetically Modified and Wild Type Rice Data}
\usage{data(rice)}
\description{
The \code{rice} data frame has 72 rows and 5 columns.
The data are from an experiment that compared wild type (wt)
and genetically modified rice plants (ANU843), each
with three different chemical treatments (F10, NH4Cl, and NH4NO3).   
}
\format{
  This data frame contains the following columns:
  \describe{
    \item{PlantNo}{a numeric vector}
    \item{Block}{a numeric vector}
    \item{RootDryMass}{a numeric vector}
    \item{ShootDryMass}{a numeric vector}
    \item{trt}{a factor with levels
\code{F10},
\code{NH4Cl},
\code{NH4NO3},
\code{F10 +ANU843},
\code{NH4Cl +ANU843},
\code{NH4NO3 +ANU843}}
  }
}
\source{
 Perrine, F.M., Prayitno, J., Weinman, J.J., Dazzo, F.B. and
Rolfe, B. 2001.  Rhizobium plasmids are involved in the inhibition or
stimulation of rice growth and development.  Australian Journal of
Plant Physiology 28: 923-927.
}
\examples{
data(rice)
print("One and Two-Way Comparisons - Example 4.5")
attach(rice)
oldpar <- par(las = 2)
stripchart(ShootDryMass ~ trt, pch=1, cex=1, xlab="Level of factor 1")
detach(rice)
pause()

rice.aov <- aov(ShootDryMass ~ trt, data=rice); anova(rice.aov)
anova(rice.aov)
pause()

summary.lm(rice.aov)$coef
pause()

rice$trt <- relevel(rice$trt, ref="NH4Cl")
  # Set NH4Cl as the baseline

fac1 <- factor(sapply(strsplit(as.character(rice$trt)," \\\+"), function(x)x[1]))
anu843 <- sapply(strsplit(as.character(rice$trt), "\\\+"), 
function(x)c("wt","ANU843")[length(x)])
anu843 <- factor(anu843, levels=c("wt", "ANU843"))
attach(rice)
interaction.plot(fac1, anu843, ShootDryMass)
detach(rice)
par(oldpar)

}
\keyword{datasets}

\eof
\name{roller}
\alias{roller}
\non_function{}
\title{Lawn Roller Data}
\usage{data(roller)}
\description{
The \code{roller} data frame has 10 rows and 2 columns.
Different weights of roller were rolled over different parts
of a lawn, and the depression was recorded.
}
\format{
  This data frame contains the following columns:
  \describe{
    \item{weight}{a numeric vector consisting of the roller weights}
    \item{depression}{the depth of the depression made in the grass
under the roller}
  }
}
\source{
Stewart, K.M., Van Toor, R.F., Crosbie,
S.F. 1988. Control of grass grub (Coleoptera: Scarabaeidae) with
rollers of different design. N.Z. Journal of Experimental Agriculture
16: 141-150.
}
\examples{
data(roller)
plot(roller)
roller.lm <- lm(depression ~ weight, data = roller)
plot(roller.lm, which = 4)
}
\keyword{datasets}

\eof
\name{science}
\alias{science}
\non_function{}
\title{School Science Survey Data}
\usage{data(science)}
\description{
The \code{science} data frame has 1385 rows and 7 columns.

The data are on attitudes to science, from a survey where there were
results from 20 classes in private schools and 46 classes in public
schools.
}
\format{
  This data frame contains the following columns:
  \describe{
    \item{State}{a factor with levels
   \code{ACT} Australian Capital Territory, 
   \code{NSW} New South Wales}
    \item{PrivPub}{a factor with levels
        \code{private} school, 
        \code{public} school}
    \item{school}{a numeric code identifying the school}
    \item{class}{a numeric vector identifying the class}
    \item{sex}{a factor with levels
    \code{f}, \code{m} }
    \item{like}{ a summary score
  based on two of the questions, on a scale from 1 (dislike)
  to 12 (like)}
    \item{Class}{a factor with levels corresponding to each class}
}
}
\source{
 Francine Adams,  Rosemary Martin and Murali Nayadu, Australian
National University
}
\examples{
data(science)
attach(science)
classmeans <- aggregate(like, by=list(PrivPub, Class), mean)
names(classmeans) <- c("PrivPub","Class","like")
dim(classmeans)

attach(classmeans)
boxplot(split(like, PrivPub), ylab = "Class average of attitude to science score", boxwex = 0.4)
rug(like[PrivPub == "private"], side = 2)
rug(like[PrivPub == "public"], side = 4)
detach(classmeans)

require(nlme)
science.lme <- lme(fixed = like ~ sex + PrivPub,
  data = science, random = ~ 1 | school/Class, na.action=na.omit)
summary(science.lme)$tTable # Print coefficients.

science1.lme <- lme(fixed = like ~ sex + PrivPub, data = science,
random = ~ 1 | Class, na.action=na.exclude)
summary(science1.lme)$tTable # Table of coefficients

intervals(science1.lme, which="var-cov")[[1]]$Class^2
intervals(science1.lme, which="var-cov")[[2]]^2 

science.lme <- lme(fixed = like ~ sex + PrivPub,
  data = science, random =  ~ 1 | Class/school, na.action=na.exclude)
res <- residuals(science.lme)
hat <- fitted(science.lme)
coplot(res ~ hat|science$PrivPub[is.na(science$sex)!=TRUE],
  xlab="Fitted values", ylab="Residuals")
detach(science)
}
\keyword{datasets}

\eof
\name{seedrates}
\alias{seedrates}
\non_function{}
\title{Barley Seeding Rate Data}
\usage{data(seedrates)}
\description{
The \code{seedrates} data frame has 5 rows and 2 columns on 
the effect of seeding rate of barley on yield.

}
\format{
  This data frame contains the following columns:
  \describe{
    \item{rate}{the seeding rate}
    \item{grain}{the number of grain per head of barley}
  }
}
\source{
 McLeod, C.C. 1982.
Effect of rates of seeding on barley grown for grain. New Zealand 
Journal of Agriculture 10: 133-136.}
\references{
 Maindonald J H 1992. Statistical design, analysis and presentation
issues. New Zealand Journal of Agricultural Research 35: 121-141.}
\examples{
data(seedrates)
plot(grain~rate,data=seedrates,xlim=c(50,180),ylim=c(15.5,22),axes=FALSE)
new.df<-data.frame(rate=(2:8)*25)
seedrates.lm1<-lm(grain~rate,data=seedrates)
seedrates.lm2<-lm(grain~rate+I(rate^2),data=seedrates)
hat1<-predict(seedrates.lm1,newdata=new.df,interval="confidence")
hat2<-predict(seedrates.lm2,newdata=new.df,interval="confidence")
axis(1,at=new.df$rate); axis(2); box()
z1<-spline(new.df$rate, hat1[,"fit"]); z2<-spline(new.df$rate,   
hat2[,"fit"])
rate<-new.df$rate; lines(z1$x,z1$y)
lines(spline(rate,hat1[,"lwr"]),lty=1,col=3)
lines(spline(rate,hat1[,"upr"]),lty=1,col=3)
lines(z2$x,z2$y,lty=4)
lines(spline(rate,hat2[,"lwr"]),lty=4,col=3)
lines(spline(rate,hat2[,"upr"]),lty=4,col=3)}
\keyword{datasets}

\eof
\name{show.colors}
\alias{show.colors}
\title{Show R's Colors}
\description{
This function displays the built-in colors.
}
\usage{
show.colors(type=c("singles", "shades", "grayshades"), order.cols=TRUE)
}
\arguments{
  \item{type}{type of display - single, multiple or gray shades}
  \item{order.cols}{ Arrange colors in order }
}
\value{
A plot of colors for which there is a single shade (type = "single"),
multiple shades (type = "multiple"), or gray shades (type = "gray")
}
\author{J.H. Maindonald}

\examples{
require(MASS)
show.colors()
}
\keyword{models}

\eof
\name{simulate.linear}
\alias{simulate.linear}
\title{Simulation of Linear Models for ANOVA vs. Regression Comparison}
\description{
This function simulates a number of bivariate data sets
in which there are replicates at each level of the predictor.
The p-values for ANOVA and for the regression slope are 
compared.
}
\usage{
simulate.linear(sd=2, npoints=5, nrep=4, nsets=200, type="xy", seed=21)
}
\arguments{
  \item{sd}{The error standard deviation}
  \item{npoints}{Number of distinct predictor levels}
  \item{nrep}{Number of replications at each level}
  \item{nsets}{Number of simulation runs}
  \item{type}{Type of data}
  \item{seed}{Random Number generator seed}
}
\value{
The proportion of regression p-values that are less than the ANOVA p-values is
printed
}
\author{J.H. Maindonald}
\examples{
simulate.linear()
}
\keyword{models}

\eof
\name{socsupport}
\alias{socsupport}
\non_function{}
\title{Social Support Data}
\usage{data(socsupport)}
\description{
Data from a survey on social and other kinds of support.
}
\format{
  This data frame contains the following columns:
  \describe{
    \item{gender}{a factor with levels
\code{female}, \code{male} }
    \item{age}{age, in years, with levels
    \code{18-20}, \code{21-24}, \code{25-30}, 
 \code{31-40},\code{40+} }
    \item{country}{a factor with levels \code{australia}, 
\code{other} }
    \item{marital}{a factor with levels \code{married},
\code{other}, \code{single} }
    \item{livewith}{a factor with levels \code{alone}, 
\code{friends}, \code{other}, \code{parents}, 
\code{partner}, \code{residences}}
    \item{employment}{a factor with levels
\code{employed fulltime}, \code{employed part-time}, 
\code{govt assistance}, \code{other}, \code{parental support}} 
    \item{firstyr}{a factor with levels \code{first year}, 
\code{other} }
    \item{enrolment}{a factor with levels
\code{}, \code{full-time}, \code{part-time}}
    \item{emotional}{summary of 5 questions on emotional support availability}
    \item{emotionalsat}{summary of 5 questions on emotional support satisfaction}
    \item{tangible}{summary of 4 questions on availability of tangible support}
    \item{tangiblesat}{summary of 4 questions on satisfaction with tangible support}
    \item{affect}{summary of 3 questions on availability of affectionate support sources}
    \item{affectsat}{summary of 3 questions on satisfaction with affectionate support sources}
    \item{psi}{summary of 3 questions on availability of positive social interaction}
    \item{psisat}{summary of 3 questions on satisfaction with positive social 
interaction}
    \item{esupport}{summary of 4 questions on extent of emotional support sources}
    \item{psupport}{summary of 4 questions on extent of practical support sources}
    \item{socsupport}{summary of 4 questions on extent of social support sources}
    \item{BDI}{Score on the Beck depression index (summary of 21 questions)}
  }
}
\source{
Melissa Manning, Psychology, Australian National University
}
\examples{
data(socsupport)
attach(socsupport)
require(mva)
not.na <- apply(socsupport[,9:19], 1, function(x)!any(is.na(x)))
ss.pr1 <- princomp(as.matrix(socsupport[not.na, 9:19]), cor=TRUE)  
pairs(ss.pr1$scores[,1:3])
sort(-ss.pr1$scores[,1])        # Minus the largest value appears first
pause()

not.na[36] <- FALSE
ss.pr <- princomp(as.matrix(socsupport[not.na, 9:19]), cor=TRUE)  
summary(ss.pr)          # Examine the contribution of the components
pause()

# We now regress BDI on the first six principal components:
ss.lm <- lm(BDI[not.na] ~ ss.pr$scores[, 1:6], data=socsupport)
summary(ss.lm)$coef
pause()

ss.pr$loadings[,1]
plot(BDI[not.na] ~  ss.pr$scores[ ,1], col=as.numeric(gender), 
pch=as.numeric(gender),  xlab ="1st principal component", ylab="BDI")
topleft <- par()$usr[c(1,4)]
legend(topleft[1], topleft[2], col=1:2, pch=1:2, legend=levels(gender))
}
\keyword{datasets}

\eof
\name{softbacks}
\alias{softbacks}
\non_function{}
\title{Measurements on a Selection of Paperback Books}
\usage{data(softbacks)}
\description{
This is a subset of the \code{allbacks} data frame 
which gives measurements
on the volume and weight of 8 paperback books.
}
\format{
  This data frame contains the following columns:
  \describe{
    \item{volume}{a numeric vector giving the book volumes
in cubic centimeters}
    \item{weight}{a numeric vector giving the weights in grams}
  }
}
\source{
The bookshelf of J. H. Maindonald.
}
\examples{
data(softbacks)
print("Outliers in Simple Regression - Example 5.2")
paperback.lm <- lm(weight ~ volume, data=softbacks)
summary(paperback.lm)
plot(paperback.lm)
}
\keyword{datasets}

\eof
\name{spam7}
\alias{spam7}
\non_function{}
\title{Spam E-mail Data}
\usage{data(spam7)}
\description{
The data consist of 4601 email items, of which 1813 items were identified
as spam.
}
\format{
  This data frame contains the following columns:
  \describe{
    \item{crl.tot}{total length of words in capitals}
    \item{dollar}{number of occurrences of the \$ symbol}
    \item{bang}{number of occurrences of the ! symbol}
    \item{money}{number of occurrences of the word `money'}
    \item{n000}{number of occurrences of the string `000'}
    \item{make}{number of occurrences of the word `make'}
    \item{yesno}{outcome variable, a factor with levels
    \code{n} not spam,
    \code{y} spam}
  }
}
\source{
 George Forman, Hewlett-Packard Laboratories

These data are available from the University
of California at Irvine Repository of Machine Learning Databases
and Domain Theories. The address is:  http://www.ics.uci.edu/~Here
}
\examples{
data(spam7)
require(rpart)
spam.rpart <- rpart(formula = yesno ~ crl.tot + dollar + bang +
   money + n000 + make, data=spam7)
plot(spam.rpart)
text(spam.rpart)

}
\keyword{datasets}

\eof
\name{sugar}
\alias{sugar}
\non_function{}
\title{Sugar Data}
\usage{data(sugar)}
\description{
The \code{sugar} data frame has 12 rows and 2 columns.
They are from an experiment that
compared an unmodified wild type plant with three different
genetically modified forms.  The measurements are
weights  of sugar that were obtained by breaking down the
cellulose.  
}
\format{
  This data frame contains the following columns:
  \describe{
    \item{weight}{weight, in mg}
    \item{trt}{a factor with levels
\code{Control} i.e. unmodified Wild form,  
\code{A} Modified 1,
\code{B} Modified 2,
\code{C} Modified 3}
  }
}
\source{
Anonymous
}
\examples{
data(sugar)
sugar.aov <- aov(weight ~ trt, data=sugar)
fitted.values(sugar.aov)
summary.lm(sugar.aov)
sugar.aov <- aov(formula = weight ~ trt, data = sugar)
summary.lm(sugar.aov)
}
\keyword{datasets}

\eof
\name{tinting}
\alias{tinting}
\non_function{}
\title{Car Window Tinting Experiment Data}
\usage{data(tinting)}
\description{
These data are from an experiment that aimed to model the
effects of the tinting of car windows on visual
performance. The authors
were mainly interested in effects on side window vision, and hence in
visual recognition tasks that would be performed when looking through
side windows.
}
\format{
  This data frame contains the following columns:
  \describe{
    \item{case}{observation number}
    \item{id}{subject identifier code (1-26)}
    \item{age}{age (in years) }
    \item{sex}{a factor with levels
\code{f} female,
\code{m} male}
    \item{tint}{an ordered factor with levels representing degree of
tinting: \code{no} < \code{lo} < \code{hi}}
    \item{target}{a factor with levels
   \code{locon}: low contrast,
   \code{hicon}: high contrast }
    \item{it}{the inspection time, the time required
to perform a simple discrimination task (in milliseconds)}
    \item{csoa}{critical stimulus onset asynchrony, the time 
to recognize an alphanumeric target (in milliseconds)}
    \item{agegp}{a factor with levels
\code{young}, 21-27, 
\code{elderly}, 70-78 }
  }
}
\details{
Visual light transmittance (VLT) levels were 100\% (tint=none), 
81.3\% (tint=lo), and 35.1\% (tint=hi).  Based on these and other 
data, Burns et al. argue that road safety may be compromised if 
the front side windows of cars are tinted to 35% VLT.
}
\source{
 Burns, N.R., Nettlebeck, T., White, M. and Willson, J., 1999.
Effects of car window tinting on visual performance: a comparison of
elderly and young drivers. Ergonomics 42: 428-443. 
}
\examples{
data(tinting)
require(lattice)
xyplot(csoa ~ it | sex * agegp, data=tinting) # Simple use of xyplot()
pause()

xyplot(csoa ~ it|sex*agegp, data=tinting, panel=panel.superpose, groups=target)
pause()

xyplot(csoa ~ it|sex*agegp, data=tinting, panel=panel.superpose, col=1:2,
  groups=target, key=list(x=0.14, y=0.84, points=list(pch=rep(1,2),
  col=1:2), text=list(levels(tinting$target), col=1:2), border=TRUE))
pause()

xyplot(csoa ~ it|sex*agegp, data=tinting, panel=panel.superpose,
  groups=tint, type=c("p","smooth"), span=0.8, col=1:3,
  key=list(x=0.14, y=0.84, points=list(pch=rep(1,2), col=1:3),
  text=list(levels(tinting$tint), col=1:3), border=TRUE))
}
\keyword{datasets}

\eof
\name{toycars}
\alias{toycars}
\non_function{}
\title{Toy Cars Data}
\usage{data(toycars)}
\description{
The \code{toycars} data frame has 27 rows and 3 columns.
Observations are on the
distance traveled by one of three different toy cars on
a smooth surface, starting from rest at the top of a 16 inch long ramp
tilted at varying angles.
}
\format{
  This data frame contains the following columns:
  \describe{
    \item{angle}{tilt of ramp, in degrees}
    \item{distance}{distance traveled, in meters}
    \item{car}{a numeric code (1 = first car, 2 = second car, 3 = third 
car)}
  }
}
\examples{
data(toycars)
toycars.lm <- lm(distance ~ angle + factor(car), data=toycars)
summary(toycars.lm)
}
\keyword{datasets}

\eof
\name{two65}
\alias{two65}
\non_function{}
\title{Unpaired Heated Elastic Bands}
\usage{data(pair65)}
\description{
Twenty-one elastic bands were divided into two groups.

One of the sets was placed in hot water (60-65 degrees C) for four
minutes, while the other was left at ambient temperature.  After
a wait of about ten minutes, the amounts of stretch, under a 1.35 kg
weight, were recorded.
}
\format{
  This list contains the following elements:
  \describe{
    \item{heated}{a numeric vector giving the stretch lengths for
the heated bands}
    \item{ambient}{a numeric vector giving the stretch lengths for
the unheated bands}
  }
}
\source{
J.H. Maindonald
}
\examples{
data(two65)
twot.permutation(two65$ambient,two65$heated) # two sample permutation test
}
\keyword{datasets}

\eof
\name{twot.permutation}
\alias{twot.permutation}
\title{Two Sample Permutation Test}
\description{
This function computes the p-value for the two sample
t-test using a permutation test.  The permutation density
can  also be plotted.
}
\usage{
twot.permutation(x1=two65$ambient, x2=two65$heated, nsim=2000, plotit=TRUE)
}
\arguments{
  \item{x1}{Sample 1}
  \item{x2}{Sample 2}
  \item{nsim}{Number of simulations}
  \item{plotit}{If TRUE, the permutation density will be plotted}
}
\details{
Suppose we have n1 values in one group and n2 in a
second, with n = n1 + n2. The permutation distribution
results from taking all possible samples of n2 values from
the total of n values.
}
\value{
The p-value for the test of the hypothesis that the mean of
\code{x1} differs from \code{x2}
}
\references{ 
Good, P. 2000. Permutation Tests. Springer, New York.
}
\author{J.H. Maindonald}

\examples{
data(two65)
twot.permutation()
}
\keyword{models}

\eof
\name{vif}
\alias{vif}
\title{Variance Inflation Factors}
\description{
Variance inflation factors are computed for the standard
errors of linear model coefficient estimates.
}
\usage{
vif(obj, digits=5)
}
\arguments{
  \item{obj}{ A \code{lm} object }
  \item{digits}{ Number of digits}
}
\value{
A vector of variance inflation factors corresponding to 
the coefficient estimates given in the \code{lm} object.
}
\author{J.H. Maindonald}
\seealso{\code{lm}}

\examples{
data(litters)
litters.lm <- lm(brainwt ~ bodywt + lsize, data = litters)
vif(litters.lm)

data(carprice)
carprice1.lm <- lm(gpm100 ~ Type+Min.Price+Price+Max.Price+Range.Price,
    data=carprice)
vif(carprice1.lm)

carprice.lm <- lm(gpm100 ~ Type + Price, data = carprice)
vif(carprice1.lm)

}
\keyword{models}

\eof
\name{wages1833}
\alias{wages1833}
\non_function{}
\title{Wages of Lancashire Cotton Factory Workers in 1833}
\usage{data(wages1833)}
\description{
The \code{wages1833} data frame gives the wages 
  of Lancashire cotton factory workers in 1833. 
}
\format{
  This data frame contains the following columns:
  \describe{
    \item{age}{age in years}
    \item{mnum}{number of male workers}
    \item{mwage}{average wage of male workers}
    \item{fnum}{number of female workers}
    \item{fwage}{average wage of female workers}
  }
}
\source{
Boot, H.M. 1995. How Skilled Were the Lancashire Cotton Factory
  Workers in 1833? Economic History Review 48: 283-303.
}
\examples{
data(wages1833)
attach(wages1833)
plot(mwage~age,ylim=range(c(mwage,fwage[fwage>0])))
points(fwage[fwage>0]~age[fwage>0],pch=15,col="red")
lines(lowess(age,mwage))
lines(lowess(age[fwage>0],fwage[fwage>0]),col="red")
}
\keyword{datasets}

\eof
