\name{Adler}
\alias{Adler}
\non_function{}

\title{Experimenter Expectations} 

\description{
The \code{Adler} data frame has 97 rows and 3 columns.  
                                                        
The ``experimenters'' were the actual subjects of the study. 
They collected ratings of the apparent successfulness of
people in pictures who were pre-selected for their
average appearance. The experimenters were told prior to collecting 
data that the pictures were either high or low in their 
appearance of success, and were instructed to get good data, 
scientific data, or were given no such instruction. Each 
experimenter collected ratings from 18 randomly assigned
respondents.                                             
}

\format{
  This data frame contains the following columns:
  \describe{
    \item{instruction}{
     a factor with levels:
     \code{GOOD}, good data;
     \code{NONE}, no stress;
     \code{SCIENTIFIC}, scientific data.
     }
    \item{expectation}{
     a factor with levels:
     \code{HIGH}, expect high ratings;
     \code{LOW}, expect low ratings.
     }
    \item{rating}{
     The average rating obtained.  
     }
   }
 }

\source{
 Adler, N. E. (1973)
 Impact of prior sets given experimenters and subjects on the experimenter
 expectancy effect.
 \emph{Sociometry} \bold{36}, 113--126.
}

\references{
  Erickson, B. H., and Nosanchuk, T. A. (1977)
  \emph{Understanding Data.} McGraw-Hill Ryerson.
}

\usage{
data(Adler)
}

\keyword{datasets}

\eof
\name{Angell}
\alias{Angell}
\non_function{}

\title{Moral Integration of American Cities}

\description{
The \code{Angell} data frame has 43 rows and 4 columns.
The observations are 43 U. S. cities around 1950.

}

\format{
  This data frame contains the following columns:
  \describe{
    \item{moral}{
      Moral Integration: Composite of crime rate and
      welfare expenditures.
      }
    \item{hetero}{
      Ethnic Heterogenity: From percentages of nonwhite and
      foreign-born white residents.
      }
    \item{mobility}{
      Geographic Mobility: From percentages of residents
      moving into and out of the city.
      }
    \item{region}{
      A factor with levels: 
      \code{E} Northeast;
      \code{MW} Midwest;
      \code{S} Southeast;
      \code{W} West.
      }
    }
  } 

\source{
 Angell, R. C. (1951) The moral integration of American Cities.
 \emph{American Journal of Sociology} \bold{57} (part 2), 1--140.
}

\references{
  Fox, J. (1997)
  \emph{Applied Regression, Linear Models, and Related Methods.} Sage.
}

\usage{
data(Angell)
}

\keyword{datasets}

\eof
\name{Anova}
\alias{Anova}
\alias{Anova.lm}
\alias{Anova.aov}
\alias{Anova.II.lm}
\alias{Anova.III.lm}
\alias{Anova.glm}
\alias{Anova.II.F.glm}
\alias{Anova.II.LR.glm}
\alias{Anova.II.Wald.glm}
\alias{Anova.III.F.glm}
\alias{Anova.III.LR.glm}
\alias{Anova.III.Wald.glm}
\title{Anova Tables for Linear and Generalized Linear Models}
\description{
  Calculates type-II or type-III analysis-of-variance tables for
  model objects produced by \code{lm} and \code{glm}. For linear
  models, F-tests are calculated; for generalized linear models, 
  likelihood-ratio chisquare, Wald chisquare, or F-tests are calculated.
}
\usage{
Anova(mod, ...)

\method{Anova}{lm}(mod, error, type=c("II", "III"), ...)

\method{Anova}{aov}(mod, ...)

\method{Anova}{glm}(mod, type=c("II", "III"), test.statistic=c("LR", "Wald", "F"), 
    error, error.estimate=c("pearson", "dispersion", "deviance"), ...)
}

\arguments{
  \item{mod}{\code{lm} or \code{glm} model object.}
  \item{error}{for a linear model, an \code{lm} model object from which the
    error sum of squares and degrees of freedom are to be calculated. For 
    F-tests for a generalized linear model, a \code{glm} object from which the
    dispersion is to be estimated. If not specified, \code{mod} is used.}
  \item{type}{type of test, \code{"II"} or \code{"III"}.}
  \item{test.statistic}{for a generalized linear model, whether to calculate 
    \code{"LR"} (likelihood-ratio), \code{"Wald"}, or \code{"F"} tests.}
  \item{error.estimate}{for F-tests for a generalized linear model, base the
    dispersion estimate on the Pearson residuals (\code{pearson}, the default); use the
    dispersion estimate in the model object (\code{dispersion}), which, e.g., is
    fixed to 1 for binomial and Poisson models; or base the dispersion estimate on
    the residual deviance (\code{deviance}).}
  \item{\dots}{arguments to be passed to \code{linear.hypothesis}; only use
    \code{white.adjust} for a linear model.}
}
\details{
  The designations "type-II" and "type-III" are borrowed from SAS, but the
  definitions used here do not correspond precisely to those employed by SAS. 
  Type-II tests are calculated according to the principle of marginality,
  testing each term after all others, except ignoring the term's higher-order relatives;
  so-called type-III tests violate marginality, testing 
  each term in the model after all of the others. This definition of Type-II tests 
  corresponds to the tests produced by SAS for analysis-of-variance models, where all of the predictors
  are factors, but not more generally (i.e., when there are quantitative predictors).
  Be very careful in formulating the model for type-III tests, or the hypotheses tested
  will not make sense. 
  
  As implemented here, type-II Wald tests for generalized linear models are actually
  \emph{differences} of Wald statistics.
  
  For all but type-II likelihood-ratio and \emph{F} tests for generalized linear models, 
  \code{Anova} finds the test statistics without refitting the model.
  
  The standard R \code{anova} function calculates sequential ("type-I") tests.
  These rarely test interesting hypotheses.
}
\value{
  An object of class \code{anova}, usually printed.
}
\references{ 
  Fox, J. (1997)
  \emph{Applied Regression, Linear Models, and Related Methods.} Sage.
}
\author{John Fox \email{jfox@mcmaster.ca}}

\section{Warning}{Be careful of type-III tests.}

\seealso{ \code{\link{linear.hypothesis}}, \code{\link[base]{anova}}}

\examples{
data(Moore)
mod<-lm(conformity~fcategory*partner.status, data=Moore, 
  contrasts=list(fcategory=contr.sum, partner.status=contr.sum))
Anova(mod)
## Anova Table (Type II tests)
##
## Response: conformity
##                         Sum Sq Df F value   Pr(>F)
## fcategory                 11.61  2  0.2770 0.759564
## partner.status           212.21  1 10.1207 0.002874
## fcategory:partner.status 175.49  2  4.1846 0.022572
## Residuals                817.76 39                 
Anova(mod, type="III")
## Anova Table (Type III tests)
##
## Response: conformity
##                          Sum Sq Df  F value    Pr(>F)
## (Intercept)              5752.8  1 274.3592 < 2.2e-16
## fcategory                  36.0  2   0.8589  0.431492
## partner.status            239.6  1  11.4250  0.001657
## fcategory:partner.status  175.5  2   4.1846  0.022572
## Residuals                 817.8 39                   
}
\keyword{htest}
\keyword{models}
\keyword{regression}

\eof
\name{Anscombe}
\alias{Anscombe}
\non_function{}

\title{U. S. State Public-School Expenditures} 

\description{
The \code{Anscombe} data frame has 51 rows and 4 columns.
The observations are the U. S. states plus Washington, D. C. in 1970.

}

\format{
  This data frame contains the following columns:
  \describe{
    \item{education}{
      Per-capita education expenditures, dollars.
      }
    \item{income}{
     Per-capita income, dollars. 
      }
    \item{young}{
      Proportion under 18, per 1000. 
      }
    \item{urban}{
      Proportion urban, per 1000. 
      }
    }
  }


\source{
  Anscombe, F. J. (1981)
  \emph{Computing in Statistical Science Through APL}. Springer-Verlag.
}

\references{
  Fox, J.  (1997)
  \emph{Applied Regression, Linear Models, and Related Methods.}
Sage.
}

\usage{
data(Anscombe)
}
\keyword{datasets}

\eof
\name{Ask}
\alias{Ask}

\title{Change Argument to a Function Interactively}
\description{
  \code{Ask} allows you to change the argument to a function interactively.
  It is meant to be used, in lieu of a graphical control such as a slidebar,
  to adjust plotting parameters, which are
  most naturally passed as the argument to an anonymous function that sets
  up the plot.
}
\usage{
Ask(arg, fun, ...)
}

\arguments{
  \item{arg}{argument to \code{fun} to change. By specifying a vector of values,
    you can change several parameters via an argument to an anonymous function.}
  \item{fun}{function to call; often an anonymous function that sets up
    a call to plotting functions.}
  \item{\dots}{other arguments to \code{fun}; not necessary if \code{fun}
    is an anonymous function.}
}
\details{
  \code{Ask} repeatedly prompts in the R Console for the value of \code{arg}.
  To exit, enter a blank line.
}
\value{
  \code{Ask} returns invisibly the value of the last call to \code{fun};
  usually this will be \code{NULL}, and in any event is probably not of
  interest. If it is, use \code{print(Ask(arg, fun, ...))}.
}

\author{John Fox \email{jfox@mcmaster.ca}}

\examples{
    \dontrun{
data(UN)
attach(UN)

# enter the power-transformation parameter
# start with 1
Ask(p, function(p) qq.plot(box.cox(gdp, p), 
        ylab=paste("transformed gdp, power =",p)))

# enter an expression that evaluates to a 2-vector
# of powers; e.g., start with c(1,1); then interactively
# identify points in each plot
Ask(p, function(p) scatterplot(box.cox(gdp,p[1]), 
    box.cox(infant.mortality, p[2]), 
    xlab=paste("transformed GDP/capita, power =",p[1]),
    ylab=paste("transformed infant mortality, power =",p[2]),
    labels=rownames(UN)))    
    }
}

\keyword{iplot}
\keyword{misc}

\eof
\name{Baumann}
\alias{Baumann}
\non_function{}
\title{Methods of Teaching Reading Comprehension}
\description{
The \code{Baumann} data frame has 66 rows and 6 columns.
The data are from an experimental study conducted by Baumann and Jones, as reported
by Moore and McCabe (1993). Students were randomly assigned to one of three
experimental groups.
}
\format{
This data frame contains the following columns:
\describe{
\item{group}{ 
Experimental group;
a factor with levels:
\code{Basal}, traditional method of teaching;
\code{DRTA}, an innovative method;
\code{Strat}, another innovative method.
}
\item{pretest.1}{
First pretest.
}
\item{pretest.2}{
Second pretest.
}
\item{post.test.1}{
First post-test.
}
\item{post.test.2}{
Second post-test.
}
\item{post.test.3}{
Third post-test.
}
}
}

\source{
  Moore, D. S. and McCabe, G. P. (1993)
  \emph{Introduction to the Practice of Statistics, Second Edition.}
  Freeman [pp. 794--795].
}
\usage{
data(Baumann)
}
\keyword{datasets}

\eof
\name{Bfox}
\alias{Bfox}
\non_function{}

\title{Canadian Women's Labour-Force Participation}
\description{
The \code{Bfox} data frame has 30 rows and 7 columns.
Time-series data on Canadian women's labor-force participation,
1946--1975.

}

\format{
  This data frame contains the following columns:
  \describe{
    \item{partic}{
      Percent of adult women in the workforce.
      }
    \item{tfr}{
      Total fertility rate: expected births to a cohort of 1000
      women at current age-specific fertility rates.
      }
    \item{menwage}{
      Men's average weekly wages, in constant 1935 dollars and
      adjusted for current tax rates.
      }
    \item{womwage}{
      Women's average weekly wages.
      }
    \item{debt}{
      Per-capita consumer debt, in constant dollars.
      }
    \item{parttime}{
      Percent of the active workforce working 34 hours per week or
      less.
      }
    }
  }

\source{
   Fox, B. (1980) 
   \emph{Women's Domestic Labour and their Involvement 
   in Wage Work.} Unpublished doctoral dissertation [p. 449].
}

\references{
  Fox, J. (1997)
  \emph{Applied Regression, Linear Models, and Related Methods.} Sage.
}

\usage{
data(Bfox)
}

\keyword{datasets}

\eof
\name{Blackmoor}
\alias{Blackmoor}
\non_function{}
\title{Exercise Histories of Eating-Disordered and Control Subjects}
\usage{data(Blackmoor)}
\description{
The \code{Blackmoor} data frame has 945 rows and 4 columns.
Blackmoor and Davis's data on exercise histories of 138 teenaged
girls hospitalized for eating disorders and 98 control subjects.
}
\format{
  This data frame contains the following columns:
  \describe{
    \item{subject}{a factor with subject id codes.}
    \item{age}{age in years.}
    \item{exercise}{hours per week of exercise.}
    \item{group}{a factor with levels:
        \code{control}, Control subjects;
        \code{patient}, Eating-disordered patients.
    }
  }
}

\source{
 Personal communication from Elizabeth Blackmoor and Caroline Davis,
 York University.
}

\keyword{datasets}

\eof
\name{Burt}
\alias{Burt}
\non_function{}

\title{Fraudulent Data on IQs of Twins Raised Apart}

\description{
The \code{Burt} data frame has 27 rows and 4 columns.
The ``data'' were simply (and notoriously) manufactured.
}

\format{
  This data frame contains the following columns:
  \describe{
    \item{IQbio}{
      IQ of twin raised by biological parents
      }
    \item{IQfoster}{
      IQ of twin raised by foster parents
      }
    \item{class}{
      A factor with levels (note: out of order):
      \code{high};
      \code{low};
      \code{medium}.
      }
    }
  }

\source{
 Burt, C. (1966) 
 The genetic determination of differences in intelligence: 
 A study of monozygotic twins reared together and apart. 
 \emph{British Journal of Psychology} \bold{57}, 137--153.

}


\usage{
data(Burt)
}

\keyword{datasets}

\eof
\name{Can.pop}
\alias{Can.pop}
\non_function{}

\title{Canadian Population Data}

\description{
The \code{Can.pop} data frame has 15 rows and 1 columns.
Decennial time-series of Canadian population, 1851--1991.

}

\format{
  This data frame contains the following columns:
  \describe{
    \item{year}{
        census year.
        }
    \item{population}{
      Population, in millions
      }
    }
  }

\source{
 Urquhart, M. C. and Buckley, K. A. H. (Eds.) (1965)
 \emph{Historical Statistics of Canada}. Macmillan [p. 1369].
 
 Canada (1994)
 \emph{Canada Year Book}. Statistics Canada [Table 3.2].
}

\references{
  Fox, J. (1997)
  \emph{Applied Regression, Linear Models, and Related Methods.} Sage.
}

\usage{
data(Can.pop)
}
\keyword{datasets}

\eof
\name{Chile}
\alias{Chile}
\non_function{}
\title{Voting Intentions in the 1988 Chilean Plebiscite}
\description{
The \code{Chile} data frame has 2700 rows and 8 columns.
The data are from a national survey conducted in April and May of 1988
by FLACSO/Chile. There are some missing data.
}
\format{
This data frame contains the following columns:
\describe{
\item{region}{
A factor with levels:
\code{C}, Central;
\code{M}, Metropolitan Santiago area;
\code{N}, North;
\code{S}, South;
\code{SA}, city of Santiago.
}
\item{population}{
Population size of respondent's community.
}
\item{sex}{
A factor with levels:
\code{F}, female;
\code{M}, male.
}
\item{age}{
in years.
}
\item{education}{
A factor with levels (note: out of order):
\code{P}, Primary;
\code{PS}, Post-secondary;
\code{S}, Secondary.
}
\item{income}{
Monthly income, in Pesos.
}
\item{statusquo}{
Scale of support for the status-quo.
}
\item{vote}{
a factor with levels:
\code{A}, will abstain; 
\code{N}, will vote no (against Pinochet);
\code{U}, undecided;
\code{Y}, will vote yes (for Pinochet).
}
}
}

\source{
 Personal communication from FLACSO/Chile.
}

\references{
  Fox, J. (1997)
  \emph{Applied Regression, Linear Models, and Related Methods.} Sage.
}

\usage{
data(Chile)
}
\keyword{datasets}

\eof
\name{Chirot}
\alias{Chirot}
\non_function{}

\title{The 1907 Romanian Peasant Rebellion}

\description{
The \code{Chirot} data frame has 32 rows and 5 columns.
The observations are counties in Romania.

}

\format{
 This data frame contains the following columns:
 \describe{
   \item{intensity}{
     Intensity of the rebellion
     }
   \item{commerce}{
     Commercialization of agriculture
     }
   \item{tradition}{
     Traditionalism
     }
   \item{midpeasant}{
     Strength of middle peasantry
     }
   \item{inequality}{
     Inequality of land tenure
     }
   }
 }

\source{
 Chirot, D. and C. Ragin (1975)
 The market, tradition and peasant rebellion: The case of Romania.
 \emph{American Sociological Review} \bold{40}, 428--444 [Table 1].
}

\references{
  Fox, J. (1997)
  \emph{Applied Regression, Linear Models, and Related Methods.} Sage.
}


\usage{
data(Chirot)
}

\keyword{datasets}

\eof
\name{Contrasts}       
\alias{Contrasts}
\alias{contr.Treatment}
\alias{contr.Sum}
\alias{contr.Helmert}

\title{Functions to Construct Contrasts}
\description{
    These are substitutes for similarly named functions in the base package 
    (note the uppercase letter starting the second word in each function name).
    The only difference is that the contrast functions from the car package
    produce easier-to-read names for the contrasts when they are used in statistical models. 
    
    The functions and this documentation are adapted from the base package.
    }

\usage{
contr.Treatment(n, base = 1, contrasts = TRUE)

contr.Sum(n, contrasts = TRUE)

contr.Helmert(n, contrasts = TRUE)
}

\arguments{
  \item{n}{a vector of levels for a factor, or the number of levels.}
  \item{base}{an integer specifying which level is considered the baseline level. 
    Ignored if \code{contrasts} is \code{FALSE}.}
  \item{contrasts}{a logical indicating whether contrasts should be computed.}
}

\details{
    These functions are used for creating contrast matrices for use in fitting analysis of variance and regression models. 
    The columns of the resulting matrices contain contrasts which can be used for coding a factor with \code{n} levels. 
    The returned value contains the computed contrasts. If the argument \code{contrasts} is \code{FALSE} then a square matrix is returned.
    
    Several aspects of these contrast functions are controlled by options set via the \code{options} command:
    \describe{
        \item{\code{decorate.contrasts}}{This option should be set to a 2-element character vector containing the prefix and suffix
            characters to surround contrast names. If the option is not set, then \code{c("[", "]")} is used. For example, setting
            \code{options(decorate.contrasts=c(".", ""))} produces contrast names that are separated from factor names by a period.
            Setting \code{options(decorate.contrasts=c("", ""))} reproduces the behaviour of the R base contrast functions.} 
        \item{\code{decorate.contr.Treatment}}{A character string to be appended to contrast names to signify treatment contrasts;
            if the option is unset, then \code{"T."} is used.}
        \item{\code{decorate.contr.Sum}}{Similar to the above, with default \code{"S."}.}
        \item{\code{decorate.contr.Helmert}}{Similar to the above, with default \code{"H."}.} 
        \item{\code{contr.Sum.show.levels}}{Logical value: if \code{TRUE} (the default if unset), 
            then level names are used for contrasts; if \code{FALSE}, then numbers are used, as in \code{contr.sum}
            in the \code{base} package.} 
        }
        
    Note that there is no replacement for \code{contr.poly} in the \code{base} package (which produces 
    orthogonal-polynomial contrasts) since this function already constructs easy-to-read contrast names.
}

\value{
    A matrix with \code{n} rows and \code{k} columns, with \code{k = n - 1} if \code{contrasts} is \code{TRUE} 
    and \code{k = n} if \code{contrasts} is \code{FALSE}.
}

\author{John Fox \email{jfox@mcmaster.ca}}

\seealso{\code{\link[base]{contr.treatment}}, \code{\link[base]{contr.sum}}, 
  \code{\link[base]{contr.helmert}}, \code{\link[base]{contr.poly}} }

\examples{
# contr.Treatment vs. contr.treatment in the base package:

data(Prestige)
lm(prestige ~ (income + education)*type, data=Prestige, 
    contrasts=list(type="contr.Treatment"))

##  Call:
##  lm(formula = prestige ~ (income + education) * type, data = Prestige,
##      contrasts = list(type = "contr.Treatment"))
##  
##  Coefficients:
##          (Intercept)                  income               education  
##              2.275753                0.003522                1.713275  
##          type[T.prof]              type[T.wc]     income:type[T.prof]  
##              15.351896              -33.536652               -0.002903  
##      income:type[T.wc]  education:type[T.prof]    education:type[T.wc]  
##              -0.002072                1.387809                4.290875  

lm(prestige ~ (income + education)*type, data=Prestige, 
    contrasts=list(type="contr.treatment"))    

##  Call:
##  lm(formula = prestige ~ (income + education) * type, data = Prestige,
##      contrasts = list(type = "contr.treatment"))
##  
##  Coefficients:
##      (Intercept)              income           education  
##          2.275753            0.003522            1.713275  
##          typeprof              typewc     income:typeprof  
##          15.351896          -33.536652           -0.002903  
##      income:typewc  education:typeprof    education:typewc  
##          -0.002072            1.387809            4.290875      
}

\keyword{models}
\keyword{regression}

\eof
\name{Cowles}
\alias{Cowles}
\non_function{}
\title{Cowles and Davis's Data on Volunteering}
\usage{data(Cowles)}
\description{
The \code{Cowles} data frame has 1421 rows and 4 columns.
These data come from a study of the personality determinants
of volunteering for psychological research.
}
\format{
  This data frame contains the following columns:
  \describe{
    \item{neuroticism}{scale from Eysenck personality inventory}
    \item{extraversion}{scale from Eysenck personality inventory}
    \item{sex}{a factor with levels: \code{female}; \code{male}}
    \item{volunteer}{volunteeing, a factor with levels: \code{no}; \code{yes}}
  }
}

}
\source{
 Cowles, M. and C. Davis (1987)
 The subject matter of psychology: Volunteers.
 \emph{British Journal of Social Psychology} \bold{26}, 97--102.
}
 

\keyword{datasets}

\eof
\name{Davis}
\alias{Davis}
\non_function{}

\title{Self-Reports of Height and Weight}

\description{
The \code{Davis} data frame has 200 rows and 5 columns.
The subjects were men and women engaged in regular exercise.
There are some missing data.
}

\format{
  This data frame contains the following columns:
  \describe{
    \item{sex}{
      A factor with levels:
      \code{F}, female;
      \code{M}, male.
      }
    \item{weight}{
      Measured weight in kg. 
      }
    \item{height}{
      Measured height in cm. 
      }
    \item{repwt}{
      Reported weight in kg. 
      }
    \item{repht}{
      Reported height in cm. 
      }
    }
  }


\source{
 Personal communication from C. Davis, Departments of
 Physical Education and Psychology, York University.
}

\references{
  Davis, C. (1990)
  Body image and weight preoccupation: A comparison between exercising
  and non-exercising women.
  \emph{Appetite}, \bold{15}, 13--21.
  
  Fox, J. (1997)
  \emph{Applied Regression, Linear Models, and Related Methods.} Sage.
}


\usage{
data(Davis)
}

\keyword{datasets}

\eof
\name{DavisThin}
\alias{DavisThin}
\non_function{}
\title{Davis's Data on Drive for Thinness}
\usage{data(DavisThin)}
\description{
The \code{DavisThin} data frame has 191 rows and 7 columns.
This is part of a larger dataset for a study of eating disorders.
The seven variables in the data frame comprise a "drive for thinness"
scale, to be formed by summing the items.
}
\format{
  This data frame contains the following columns:
  \describe{
    \item{DT1}{a numeric vector}
    \item{DT2}{a numeric vector}
    \item{DT3}{a numeric vector}
    \item{DT4}{a numeric vector}
    \item{DT5}{a numeric vector}
    \item{DT6}{a numeric vector}
    \item{DT7}{a numeric vector}
  }
}

\source{
    Davis, C., G. Claridge, and D. Cerullo (1997) 
    Personality factors 
    predisposing to weight preoccupation: A continuum approach to the 
    association between eating disorders and personality disorders. 
    \emph{Journal of Psychiatric Research} \bold{31}, 467--480.
}


\keyword{datasets}

\eof
\name{Duncan}
\alias{Duncan}
\non_function{}

\title{Duncan's Occupational Prestige Data}

\description{
  The \code{Duncan} data frame has 45 rows and 4 columns.
  Data on the prestige and other characteristics of 45 U. S. occupations in 1950.
  }
  
\format{
  This data frame contains the following columns:
  \describe{
    \item{type}{ Type of occupation.
      A factor with the following levels:
        \code{prof}, professional and managerial;
        \code{wc}, white-collar;
        \code{bc}, blue-collar.
      }
    \item{income}{
      Percent of males in occupation earning \$3500 or more in 1950.
      }
    \item{education}{
      Percent of males in occupation in 1950 who were high-school graduates.
      }
    \item{prestige}{
      Percent of raters in NORC study rating occupation as excellent or good in prestige.
      }
    }
  }

\source{
  Duncan, O. D. (1961) 
  A socioeconomic index for all occupations.
  In Reiss, A. J., Jr. (Ed.)
  \emph{Occupations and Social Status.} Free Press 
  [Table VI-1].
  }
 
\references{
  Fox, J. (1997)
  \emph{Applied Regression, Linear Models, and Related Methods.} Sage.

}

\usage{
data(Duncan)
}

\keyword{datasets}

\eof
\name{Ellipses}
\alias{ellipse}
\alias{data.ellipse}
\alias{confidence.ellipse}
\alias{confidence.ellipse.lm}
\alias{confidence.ellipse.glm}

\title{Ellipses, Data Ellipses, and Confidence Ellipses}
\description{
  These functions draw ellipses, including data ellipses, and
  confidence ellipses for linear and generalized linear models.
}
\usage{
ellipse(center, shape, radius, center.pch=19, center.cex=1.5, 
  segments=51, add=TRUE, xlab="", ylab="", 
   las=par('las'), col=palette()[2], lwd=2, lty=1, ...)

data.ellipse(x, y, levels=c(0.5, 0.9), center.pch=19, center.cex=1.5, 
  plot.points=TRUE, add=!plot.points, segments=51, robust=FALSE,
  xlab=deparse(substitute(x)), 
  ylab=deparse(substitute(y)), 
  las=par('las'), col=palette()[2], pch=1, lwd=2, lty=1, ...)

confidence.ellipse(model, ...)

\method{confidence.ellipse}{lm}(model, which.coef, levels=0.95, Scheffe=FALSE, 
  center.pch=19, center.cex=1.5, segments=51, xlab, ylab, 
  las=par('las'), col=palette()[2], lwd=2, lty=1, ...)

\method{confidence.ellipse}{glm}(model, which.coef, levels=0.95, Scheffe=FALSE, 
  center.pch=19, center.cex=1.5, segments=51, xlab, ylab, 
  las=par('las'), col=palette()[2], lwd=2, lty=1, ...)
}

\arguments{
  \item{center}{2-element vector with coordinates of center of ellipse.}
  \item{shape}{\eqn{2\times 2}{2 * 2} shape (or covariance) matrix.}
  \item{radius}{radius of circle generating the ellipse.}
  \item{center.pch}{character for plotting ellipse center.}
  \item{center.cex}{relative size of character for plotting ellipse center.}
  \item{segments}{number of line-segments used to draw ellipse.}
  \item{add}{if \code{TRUE} add ellipse to current plot.}
  \item{xlab}{label for horizontal axis.}
  \item{ylab}{label for vertical axis.}
  \item{x}{a numeric vector, or (if \code{y} is missing) a 2-column numeric matrix.}
  \item{y}{a numeric vector, of the same length as \code{x}.}
  \item{plot.points}{if \code{FALSE} data ellipses are added to the current scatterplot,
    but points are not plotted.}
  \item{levels}{draw elliptical contours at these (normal) probability or confidence levels.}
  \item{robust}{if \code{TRUE} use the \code{cov.trob} function in the \code{MASS} package
    to calculate the center and covariance matrix for the data ellipse.}
  \item{model}{a model object produced by \code{lm} or \code{glm}.}
  \item{which.coef}{2-element vector giving indices of coefficients to plot; 
    if missing, the first two coefficients (disregarding the regression constant)
    will be selected.}
  \item{Scheffe}{if \code{TRUE} scale the ellipse so that its projections onto the
    axes give Scheffe confidence intervals for the coefficients.}
  \item{las}{if \code{0}, ticks labels are drawn parallel to the
    axis; set to \code{1} for horizontal labels (see \code{\link[base]{par}}).}
  \item{col}{color for points and lines; the default is the \emph{second} entry
    in the current color palette (see \code{\link[base]{palette}}
    and \code{\link[base]{par}}).}
  \item{pch}{plotting character for points; default is \code{1} 
    (a circle, see \code{\link[base]{par}}).}
  \item{lwd}{line width; default is \code{2} (see \code{\link[base]{par}}).}
  \item{lty}{line type; default is \code{1}, a solid line (see \code{\link[base]{par}}).}
  \item{\dots}{other plotting parameters to be passed to \code{plot} and
    \code{line}.}
}
\details{
  The ellipse is computed by suitably transforming a unit circle.
  
  \code{data.ellipse} superimposes the normal-probability contours over a scatterplot
  of the data.
}
\value{
  \code{NULL}. These functions are used for their side effect: producing
  plots.
}
\references{ 
  Fox, J. (1997)
  \emph{Applied Regression, Linear Models, and Related Methods.} Sage.
  
  Monette, G. (1990)
  Geometry of multiple regression and 3D graphics.
  In Fox, J. and Long, J. S. (Eds.) 
  \emph{Modern Methods of Data Analysis.} Sage.
 }
 
\author{Georges Monette \email{Georges.Monette@mathstat.YorkU.CA} 
  and John Fox \email{jfox@mcmaster.ca}}

\seealso{\code{\link[MASS]{cov.trob}}.}
  
\examples{
data(Prestige)
attach(Prestige)
data.ellipse(income, education, levels=0.1*1:9, lty=2)
confidence.ellipse(lm(prestige~income+education), Scheffe=TRUE)
}

\keyword{hplot}
\keyword{aplot}

\eof
\name{Ericksen}
\alias{Ericksen}
\non_function{}

\title{The 1980 U.S. Census Undercount}

\description{
The \code{Ericksen} data frame has 66 rows and 9 columns.
The observations are 16 large cities, the remaining parts of
the states in which these cities are located, and the other
U. S. states.
}

\format{
  This data frame contains the following columns:
  \describe{
    \item{minority}{
      Percentage black or Hispanic.
      }
    \item{crime}{
      Rate of serious crimes per 1000 population.
      }
    \item{poverty}{
      Percentage poor.
      }
    \item{language}{
      Percentage having difficulty speaking or writing English.
      }
    \item{highschool}{
      Percentage age 25 or older who had not finished highschool.
      }
    \item{housing}{
      Percentage of housing in small, multiunit buildings.
      }
    \item{city}{A factor with levels: 
      \code{city}, major city; 
      \code{state}, state or state-remainder.
      }
    \item{conventional}{
      Percentage of households counted by conventional personal enumeration.
      }
    \item{undercount}{
      Preliminary estimate of percentage undercount.
      }
    }
  }

\source{
Ericksen, E. P., Kadane, J. B. and Tukey, J. W. (1989)
 Adjusting the 1980 Census of Population and Housing. 
 \emph{Journal of the American Statistical Association} \bold{84}, 927--944 [Tables 7 and 8].
}

\references{
  Fox, J. (1997)
  \emph{Applied Regression, Linear Models, and Related Methods.} Sage.
}

\usage{
data(Ericksen)
}

\keyword{datasets}

\eof
\name{Florida}
\alias{Florida}
\non_function{}
\title{Florida County Voting}
\description{
The \code{Florida} data frame has 67 rows and 11 columns.
Vote by county in Florida for President in the 2000 election.
}
\format{
This data frame contains the following columns:
\describe{
\item{GORE}{
Number of votes for Gore
}
\item{BUSH}{
Number of votes for Bush.
}
\item{BUCHANAN}{
Number of votes for Buchanan.
}
\item{NADER}{
Number of votes for Nader.
}
\item{BROWNE}{
Number of votes for Browne (whoever that is).
}
\item{HAGELIN}{
Number of votes for Hagelin (whoever that is).
}
\item{HARRIS}{
Number of votes for Harris (whoever that is).
}
\item{MCREYNOLDS}{
Number of votes for McReynolds (whoever that is).
}
\item{MOOREHEAD}{
Number of votes for Moorehead (whoever that is).
}
\item{PHILLIPS}{
Number of votes for Phillips (whoever that is).
}
\item{Total}{
Total number of votes.
}
}
}

\source{
Adams, G. D. and Fastnow, C. F. (2000)
A note on the voting irregularities in Palm Beach, FL.
\url{http://madison.hss.cmu.edu/}.
}
\usage{
data(Florida)
}
\keyword{datasets}

\eof
\name{Freedman}
\alias{Freedman}
\non_function{}
\title{Crowding and Crime in U. S. Metropolitan Areas}
\description{
The \code{Freedman} data frame has 110 rows and 4 columns.
The observations are U. S. metropolitan areas with 1968 populations
of 250,000 or more. There are some missing data.
}
\format{
This data frame contains the following columns:
\describe{
\item{population}{
Total 1968 population, 1000s.
}
\item{nonwhite}{
Percent nonwhite population, 1960.
}
\item{density}{
Population per square mile, 1968.
}
\item{crime}{
Crime rate per 100,000, 1969.
}
}
}

\source{
 United States (1970)
 \emph{Statistical Abstract of the United States}. 
 Bureau of the Census.
}

\references{
Freedman, J. (1975)
\emph{Crowding and Behavior.} Viking.
}

\usage{
data(Freedman)
}

\keyword{datasets}

\eof
\name{Friendly}
\alias{Friendly}
\non_function{}

\title{Format Effects on Recall}

\description{
 The \code{Friendly} data frame has 30 rows and 2 columns.
 The data are from an experiment on subjects' ability to remember words
 based on the presentation format.

}

\format{
  This data frame contains the following columns:
    \describe{
      \item{condition}{
        A factor with levels:
          \code{Before}, Recalled words presented before others;
          \code{Meshed}, Recalled words meshed with others; 
          \code{SFR}, Standard free recall.
          }
      \item{correct}{
        Number of words correctly recalled, out of 40 on final trial of the experiment.
        }
     }
  }


\source{
 Friendly, M. and Franklin, P. (1980) 
 Interactive presentation in multitrial free recall. 
 \emph{Memory and Cognition}
 \bold{8} 265--270. 
 
 Personal communication from M. Friendly, Department of Psychology,
 York University.
}

\references{
  Fox, J. (1997)
  \emph{Applied Regression, Linear Models, and Related Methods.} Sage.
}


\usage{
data(Friendly)
}

\keyword{datasets}

\eof
\name{Ginzberg}
\alias{Ginzberg}
\non_function{}

\title{Data on Depression}

\description{
The \code{Ginzberg} data frame has 82 rows and 6 columns.
The data are for psychiatric patients hospitalized for depression.
}

\format{
  This data frame contains the following columns:
  \describe{
    \item{simplicity}{
      Measures subject's need to see the world in black and white.
      }
    \item{fatalism}{
      Fatalism scale.
      }
    \item{depression}{
      Beck self-report depression scale.
      }
    \item{adjsimp}{
      Adjusted Simplicity: Simplicity adjusted (by regression) for other
      variables thought to influence depression.
      }
    \item{adjfatal}{
      Adjusted Fatalism.
      }
    \item{adjdep}{
      Adjusted Depression.
      }
    }
  }

\source{
 Personal communication from Georges Monette, Department of
 Mathematics and Statistics, York University, with the permission
 of the original investigator.
}

\references{
  Fox, J. (1997)
  \emph{Applied Regression, Linear Models, and Related Methods.} Sage.
}

\usage{
data(Ginzberg)
}

\keyword{datasets}

\eof
\name{Greene}
\alias{Greene}
\non_function{}

\title{Refugee Appeals}

\description{
The \code{Greene} data frame has 384 rows and 7 columns.
These are cases filed in 1990, in which refugee claimants rejected
by the Canadian Immigration and Refugee Board asked the Federal
Court of Appeal for leave to appeal the negative ruling of the Board.
}

\format{
  This data frame contains the following columns:
  \describe{
    \item{judge}{
      Name of judge hearing case.
      A factor with levels:
      \code{Desjardins}, 
      \code{Heald}, 
      \code{Hugessen}, 
      \code{Iacobucci}, 
      \code{MacGuigan}, 
      \code{Mahoney},
      \code{Marceau}, 
      \code{Pratte},
      \code{Stone},
      \code{Urie}. 
      }
    \item{nation}{
      Nation of origin of claimant.
      A factor with levels:
      \code{Argentina},
      \code{Bulgaria},
      \code{China},
      \code{Czechoslovakia},
      \code{El.Salvador},
      \code{Fiji},
      \code{Ghana},
      \code{Guatemala},
      \code{India},
      \code{Iran},
      \code{Lebanon},
      \code{Nicaragua},
      \code{Nigeria},
      \code{Pakistan},
      \code{Poland},
      \code{Somalia},
      \code{Sri.Lanka}.
      }
    \item{rater}{
      Judgment of independent rater.
      A factor with levels:
      \code{no}, case has no merit;
      \code{yes}, case has some merit (leave to appeal should be granted).
      }
    \item{decision}{
      Judge's decision.
      A factor with levels:
      \code{no}, leave to appeal not granted;
      \code{yes}, leave to appeal granted.
      }
    \item{language}{
      Language of case. 
      A factor with levels:
      \code{English},
      \code{French}.
      }
    \item{location}{
      Location of original refugee claim.
      A factor with levels:
      \code{Montreal},
      \code{other},
      \code{Toronto}.
      }
    \item{success}{
      Logit of success rate, for all cases from the applicant's nation.
      }
    }
  }


\source{
 Personal communication from Ian Greene, Department of
 Political Science, York University.
}

\references{
  Fox, J. (1997)
  \emph{Applied Regression, Linear Models, and Related Methods.} Sage.
}


\usage{
data(Greene)
}
\keyword{datasets}

\eof
\name{Guyer}
\alias{Guyer}
\non_function{}

\title{Anonymity and Cooperation}

\description{
The \code{Guyer} data frame has 20 rows and 3 columns.
The data are from an experiment in which four-person groups
played a prisoner's dilemma game for 30 trails, each person
making either a cooperative or competitive choice on each
trial. Choices were made either anonymously or in public;
groups were composed either of females or of males.
The observations are 20 groups.
}

\format{
  This data frame contains the following columns:
  \describe{
    \item{cooperation}{
      Number of cooperative choices (out of 120 in all).
      }
    \item{condition}{
      A factor with levels:
      \code{A}, Anonymous;
      \code{P}, Public-Choice.
      }
    \item{sex}{
      Sex.  A factor with levels:
      \code{F}, Female;
      \code{M}, Male.
      }
    }
  }
 
\source{
 Fox, J. and Guyer, M. (1978)
 Public choice and cooperation in n-person prisoner's dilemma.
 \emph{Journal of Conflict Resolution} \bold{22}, 469--481.
}

\references{
  Fox, J. (1997)
  \emph{Applied Regression, Linear Models, and Related Methods.} Sage.
}

\usage{
data(Guyer)
}

\keyword{datasets}

\eof
\name{Hartnagel}
\alias{Hartnagel}
\non_function{}

\title{Canadian Crime-Rates Time Series}

\description{
The \code{Hartnagel} data frame has 38 rows and 7 columns.
The data are an annual time-series from 1931 to 1968. There are
some missing data.
}

\format{
  This data frame contains the following columns:
  \describe{
    \item{year}{
      1931--1968.
      }
    \item{tfr}{
      Total fertility rate per 1000 women.
      }
    \item{partic}{
      Women's labor-force participation rate per 1000.
      }
    \item{degrees}{
      Women's post-secondary degree rate per 10,000.
      }
    \item{fconvict}{
      Female indictable-offense conviction rate per 100,000.
      }
    \item{ftheft}{
      Female theft conviction rate per 100,000.
      }
    \item{mconvict}{
      Male indictable-offense conviction rate per 100,000.
      }
    \item{mtheft}{
      Male theft conviction rate per 100,000.
      }
    }
  }

\details{
 The post-1948 crime rates have been adjusted to account for
 a difference in method of recording. Some of your results will differ
 in the last decimal place from those in Table 14.1 of Fox (1997) due
 to rounding of the data. Missing values for 1950 were interpolated.
}


\source{
 Personal communication from T. Hartnagel, 
 Department of Sociology, University of Alberta.
}

\references{
  Fox, J., and Hartnagel, T. F (1979)
  Changing social roles and female crime in Canada:
  A time series analysis.
  \emph{Canadian Review of Sociology and Anthroplogy}, 
  \bold{16}, 96--104.
  
  Fox, J. (1997)
  \emph{Applied Regression, Linear Models, and Related Methods.} Sage.
}

\usage{
data(Hartnagel)
}

\keyword{datasets}

\eof
\name{Leinhardt}
\alias{Leinhardt}
\non_function{}

\title{Data on Infant-Mortality}

\description{
The \code{Leinhardt} data frame has 105 rows and 4 columns.
The observations are nations of the world around 1970.

}

\format{
  This data frame contains the following columns:
  \describe{
    \item{income}{
      Per-capita income in U. S. dollars.
      }
    \item{infant}{
      Infant-mortality rate per 1000 live births.
      }
    \item{region}{
      A factor with levels:
      \code{Africa}; 
      \code{Americas};
      \code{Asia}, Asia and Oceania;
      \code{Europe}.
      }
    \item{oil}{
      Oil-exporting country. 
      A factor with levels:
      \code{no}, 
      \code{yes}.
      }
    }
  }

\details{
The infant-mortality rate for Jamaica is misprinted in
 Leinhardt and Wasserman; the correct value is given here.
 Some of the values given in Leinhardt and Wasserman do not appear
 in the original New York Times table.
}

\source{
 Leinhardt, S. and Wasserman, S. S. (1979)
 Exploratory data analysis: An introduction to selected methods.
 In Schuessler, K. (Ed.) 
 \emph{Sociological Methodology 1979} Jossey-Bass.
  
 \emph{The New York Times}, 28 September 1975, p. E-3, Table 3.
}

\references{
  Fox, J. (1997)
  \emph{Applied Regression, Linear Models, and Related Methods.} Sage.
}


\usage{
data(Leinhardt)
}
\keyword{datasets}

\eof
\name{Mandel}
\alias{Mandel}
\non_function{}

\title{Contrived Collinear Data}

\description{
 The \code{Mandel} data frame has 8 rows and 3 columns.

}

\format{
  This data frame contains the following columns:
  \describe{
    \item{x1}{
      first predictor.
      }
      \item{x2}{
      second predictor.
      }
      \item{y}{
      response.
      }
    }
  }

\source{
 Mandel, J. (1982)
 Use of the singular value decomposition in regression analysis.
 \emph{The American Statistician} \bold{36}, 15--24.
}

\references{
  Fox, J. (1997)
  \emph{Applied Regression, Linear Models, and Related Methods.} Sage.
}


\usage{
data(Mandel)
}

\keyword{datasets}

\eof
\name{Migration}
\alias{Migration}
\non_function{}

\title{Canadian Interprovincial Migration Data}

\description{
The \code{Migration} data frame has 90 rows and 8 columns.

}

\format{
  This data frame contains the following columns:
  \describe{
      \item{source}{
        Province of origin (source).
        A factor with levels:
        \code{ALTA}, Alberta;
        \code{BC}, British Columbia;
        \code{MAN}, Manitoba;
        \code{NB}, New Brunswick;
        \code{NFLD}, New Foundland;
        \code{NS}, Nova Scotia;
        \code{ONT}, Ontario;
        \code{PEI}, Prince Edward Island;
        \code{QUE}, Quebec;
        \code{SASK}, Saskatchewan.
        }
      \item{destination}{
        Province of destination (1971 residence).    
        A factor with levels:
        \code{ALTA}, Alberta;
        \code{BC}, British Columbia;
        \code{MAN}, Manitoba;
        \code{NB}, New Brunswick;
        \code{NFLD}, New Foundland;
        \code{NS}, Nova Scotia;
        \code{ONT}, Ontario;
        \code{PEI}, Prince Edward Island;
        \code{QUE}, Quebec;
        \code{SASK}, Saskatchewan.
        }
      \item{migrants}{
        Number of migrants (from source to destination) in the
        period 1966--1971.
        }
      \item{distance}{
        Distance (between principal cities of provinces):
        NFLD, St. John;
        PEI, Charlottetown;
        NS, Halifax;
        NB, Fredricton;
        QUE, Montreal;
        ONT, Toronto;
        MAN, Winnipeg;
        SASK, Regina;
        ALTA, Edmonton;
        BC, Vancouver.
        }
      \item{pops66}{
        1966 population of source province.
        }
      \item{pops71}{
        1971 population of source province.
        }
      \item{popd66}{
        1966 population of destination province.
        }
      \item{popd71}{
        1971 population of destination province.
        }
      }
    }

\details{
There is one record in the data file for each migration stream. You can average the 1966 
and 1971 population figures for each of the source and destination 
provinces.
}

\source{
 Canada (1962)
 \emph{Map}. Department of Mines and Technical Surveys.
 
 Canada (1971)
 \emph{Census of Canada}.
  Statistics Canada,
  Vol. 1, Part 2 [Table 32]. 
  
  Canada (1972)
  \emph{Canada Year Book}. Statistics Canada [p. 1369].
}

\references{
  Fox, J. (1997)
  \emph{Applied Regression, Linear Models, and Related Methods.} Sage.
}


\usage{
data(Migration)
}
\keyword{datasets}

\eof
\name{Moore}
\alias{Moore}
\non_function{}

\title{Status, Authoritarianism, and Conformity}

\description{
The \code{Moore} data frame has 45 rows and 4 columns.
The data are for subjects in a social-psychological experiment,
who were faced with manipulated disagreement from a partner of either
of low or high status. The subjects could either conform to the
partner's judgment or stick with their own judgment.
}

\format{
  This data frame contains the following columns:
  \describe{
    \item{partner.status}{
      Partner's status.  A factor with levels:
      \code{high},
      \code{low}.
      }
    \item{conformity}{
      Number of conforming responses in 40 critical trials.
      }
    \item{fcategory}{
      F-Scale Categorized.
      A factor with levels (note levels out of order):
      \code{high},
      \code{low},
      \code{medium}.
      }
    \item{fscore}{
      Authoritarianism: F-Scale score.
      }
    }
  }
 
\source{
 Moore, J. C., Jr. and Krupat, E. (1971) 
 Relationship between source status, authoritarianism and conformity in a
 social setting. \emph{Sociometry} \bold{34}, 122--134.  
 
 Personal communication
 from J. Moore, Department of Sociology, York University.
}

\references{
  Fox, J. (1997)
  \emph{Applied Regression, Linear Models, and Related Methods.} Sage.
}


\usage{
data(Moore)
}
\keyword{datasets}

\eof
\name{Mroz}
\alias{Mroz}
\non_function{}

\title{U.S. Women's Labor-Force Participation}
\usage{data(Mroz)}
\description{
The \code{Mroz} data frame has 753 rows and 8 columns.
The observations, from the Panel Study of Income Dynamics (PSID),
are married women.

}
\format{
  This data frame contains the following columns:
  \describe{
    \item{lfp}{labor-force participation; a factor with levels:
        \code{no};
        \code{yes}.
    }
    \item{k5}{number of children 5 years old or younger.}
    \item{k618}{number of children 6 to 18 years old.}
    \item{age}{in years.}
    \item{wc}{wife's college attendance; a factor with levels:
        \code{no};
        \code{yes}.
    }
    \item{hc}{husband's college attendance; a factor with levels:
        \code{no};
        \code{yes}.
    }
    \item{lwg}{log expected wage rate; for women in the labor force, the actual
      wage rate; for women not in the labor force, an imputed value based on the
      regression of \code{lwg} on the other variables.}
    \item{inc}{family income exclusive of wife's income.}
  }
}
 
\source{
  Mroz, T. A. (1987)
  The sensitivity of an empirical model of married women's hours of work to
  economic and statistical assumptions. 
  \emph{Econometrica} \bold{55}, 765--799.
}

\references{
  Fox, J. (2000)
  \emph{Multiple and Generalized Nonparametric Regression.} Sage.
  
  Long. J. S. (1997)
  \emph{Regression Models for Categorical and Limited Dependent Variables.}
  Sage.
}
 
\keyword{datasets}

\eof
\name{Ornstein}
\alias{Ornstein}
\non_function{}
\title{Interlocking Directorates Among Major Canadian Firms}

\description{
The \code{Ornstein} data frame has 248 rows and 4 columns.
The observations are the 248 largest Canadian firms with
publicly available information in the mid-1970s. The names
of the firms were not available.
}

\format{
  This data frame contains the following columns:
  \describe{
    \item{assets}{
      Assets in millions of dollars.
      }
    \item{sector}{
      Industrial sector.
      A factor with levels:
      \code{AGR}, agriculture, food, light industry;
      \code{BNK}, banking; 
      \code{CON}, construction; 
      \code{FIN}, other financial;
      \code{HLD}, holding companies; 
      \code{MAN}, heavy manufacturing; 
      \code{MER}, merchandizing;
      \code{MIN}, mining, metals, etc.;
      \code{TRN}, transport;
      \code{WOD}, wood and paper.
      }
    \item{nation}{
      Nation of control.
      A factor with levels:
      \code{CAN}, Canada;
      \code{OTH}, other foreign;
      \code{UK}, Britain;
      \code{US}, United States.
      }
    \item{interlocks}{
      Number of interlocking director and executive positions shared with
      other major firms.
      }
    }
  }

\source{
 Ornstein, M. (1976) 
 The boards and executives of the largest Canadian corporations. 
 \emph{Canadian Journal of Sociology} \bold{1}, 411--437.
 
 Personal communication from M. Ornstein, Department of Sociology,
 York University.
}

\references{
  Fox, J. (1997)
  \emph{Applied Regression, Linear Models, and Related Methods.} Sage.
}


\usage{
data(Ornstein)
}

\keyword{datasets}

\eof
\name{Prestige}
\alias{Prestige}
\non_function{}

\title{Prestige of Canadian Occupations}

\description{
The \code{Prestige} data frame has 102 rows and 6 columns.
The observations are occupations.
}

\format{
  This data frame contains the following columns:
  \describe{
    \item{education}{
      Average education of occupational incumbents, years, in 1971. 
      }
    \item{income}{
      Average income of incumbents, dollars, in 1971.
      }
    \item{women}{
      Percentage of incumbents who are women.
      }
    \item{prestige}{
      Pineo-Porter prestige score for occupation, from a social survey
      conducted in the mid-1960s.
      }
    \item{census}{
      Canadian Census occupational code.
      }
    \item{type}{
      Type of occupation.
      A factor with levels (note: out of order):
      \code{bc}, Blue Collar;
      \code{prof}, Professional, Managerial, and Technical;
      \code{wc}, White Collar. 
      }
    }
  }

\source{
 Canada (1971)
 \emph{Census of Canada}. Vol. 3, Part 6. Statistics Canada [pp. 19-1--19-21].
 
 Personal communication from B. Blishen, W. Carroll, and
 C. Moore, Departments of Sociology, York University and
 University of Victoria.
}

\references{
  Fox, J. (1997)
  \emph{Applied Regression, Linear Models, and Related Methods.} Sage.
}


\usage{
data(Prestige)
}

\keyword{datasets}

\eof
\name{Quartet}
\alias{Quartet}
\non_function{}

\title{Four Regression Datasets}

\description{
The \code{Quartet} data frame has 11 rows and 5 columns.
These are contrived data.

}

\format{
  This data frame contains the following columns:
  \describe{
    \item{x}{
      X-values for datasets 1--3.
      }
    \item{y1}{
      Y-values for dataset 1.
      }
    \item{y2}{
      Y-values for dataset 2.
      }
    \item{y3}{
      Y-values for dataset 3.
      }
    \item{x4}{
      X-values for dataset 4.
      }
    \item{y4}{
      Y-values for dataset 4.
      }
    }
  }

\source{
 Anscombe, F. J. (1973)
 Graphs in statistical analysis.
 \emph{American Statistician} \bold{27}, 17--21.
}

\usage{
data(Quartet)
}
\keyword{datasets}

\eof
\name{Robey}
\alias{Robey}
\non_function{}

\title{Fertility and Contraception}

\description{
The \code{Robey} data frame has 50 rows and 3 columns.
The observations are developing nations around 1990.
}

\format{
  This data frame contains the following columns:
  \describe{
    \item{region}{
      A factor with levels:
      \code{Africa};
      \code{Asia}, Asia and Pacific;
      \code{Latin.Amer}, Latin America and Caribbean;
      \code{Near.East}, Near East and North Africa.
      }
    \item{tfr}{
      Total fertility rate (children per woman).
      }
    \item{contraceptors}{
      Percent of contraceptors among married women of childbearing age.
      }
    }
  }

\source{
 Robey, B., Shea, M. A., Rutstein, O. and Morris, L. (1992) 
 The reproductive revolution: New survey findings. 
 \emph{Population Reports}. Technical Report M-11. 
}

\references{
  Fox, J. (1997)
  \emph{Applied Regression, Linear Models, and Related Methods.} Sage.
}


\usage{
data(Robey)
}
\keyword{datasets}

\eof
\name{SLID}
\alias{SLID}
\non_function{}
\title{Survey of Labour and Income Dynamics}
\description{
The \code{SLID} data frame has 7425 rows and 5 columns.
The data are from the 1994 wave of the Canadian Survey of Labour and Income Dynamics,
for the province of Ontario.
There are missing data, particularly for wages.
}
\format{
This data frame contains the following columns:
\describe{
\item{wages}{
Composite hourly wage rate from all jobs.
}
\item{education}{
Number of years of schooling.
}
\item{age}{
in years.
}
\item{sex}{
A factor with levels:
\code{Female},
\code{Male}.
}
\item{language}{
A factor with levels:
\code{English},
\code{French},
\code{Other}.
}
}
}

}
\source{
 The data are taken from the public-use dataset made available by
 Statistics Canada, and prepared by the Institute for Social Research,
 York University.
}
\usage{
data(SLID)
}
\keyword{datasets}

\eof
\name{Sahlins}
\alias{Sahlins}
\non_function{}

\title{Agricultural Production in Mazulu Village}

\description{
The \code{Sahlins} data frame has 20 rows and 2 columns.
The observations are households in a Central African village.
}

\format{
  This data frame contains the following columns:
  \describe{
    \item{consumers}{
      Consumers/Gardener, ratio of consumers to productive individuals.
      }
    \item{acres}{
      Acres/Gardener, amount of land cultivated per gardener.
      }
    }
  }

\source{
 Sahlins, M. (1972)
 \emph{Stone Age Economics.} 
 Aldine
 [Table 3.1]. 
}

\references{
  Fox, J. (1997)
  \emph{Applied Regression, Linear Models, and Related Methods.} Sage.
}


\usage{
data(Sahlins)
}

\keyword{datasets}

\eof
\name{States}
\alias{States}
\non_function{}

\title{Education and Related Statistics for the U.S. States}

\description{
  The \code{States} data frame has 51 rows and 8 columns.
  The observations are the U. S. states and Washington, D. C.
}

\format{
  This data frame contains the following columns:
  \describe{
    \item{region}{
      U. S. Census regions.
      A factor with levels:
      \code{ENC}, East North Central;
      \code{ESC}, East South Central;
      \code{MA}, Mid-Atlantic;
      \code{MTN}, Mountain; 
      \code{NE}, New England;
      \code{PAC}, Pacific; 
      \code{SA}, South Atlantic;
      \code{WNC}, West North Central; 
      \code{WSC}, West South Central. 
      }
    \item{pop}{
      Population: in 1,000s.
      }
    \item{SATV}{
      Average score of graduating high-school 
      students in the state on the \emph{verbal} component of the 
      Scholastic Aptitude Test 
      (a standard university admission exam).
      }
    \item{SATM}{
      Average score of graduating high-school 
      students in the state on the \emph{math} component of the 
      Scholastic Aptitude Test.
      }
    \item{percent}{
       Percentage of graduating high-school 
      students in the state who took the SAT exam.
      }
    \item{dollars}{
       State spending on public education, 
      in \$1000s per student.
      }
    \item{pay}{
       Average teacher's salary in the state, 
      in \$1000s.
      }
    }
  }

\source{
 United States (1992)
 \emph{Statistical Abstract of the United States.} Bureau of the Census.
}

\references{
 Moore, D. (1995)
 \emph{The Basic Practice of Statistics}. Freeman [Table 2.1].
}

\usage{
data(States)
}

\keyword{datasets}

\eof
\name{Transformation Axes}
\alias{power.axis}
\alias{box.cox.axis}
\alias{prob.axis}

\title{Axes for Transformed Variables}
\description{
  These functions produce axes for the original scale of 
  transformed variables. Typically these would appear as additional
  axes to the right or
  at the top of the plot, but if the plot is produced with 
  \code{axes=FALSE}, then these functions could be used for axes below or to
  the left of the plot as well.
}
\usage{
power.axis(power, base=exp(1), side=c("right", "above", "left", "below"), 
  at, grid=FALSE, grid.col=gray(0.5), grid.lty=3, 
  axis.title="Untransformed Data", cex=1, las=par("las"))

box.cox.axis(power, side=c("right", "above", "left", "below"), 
  at, grid=FALSE, grid.col=gray(0.5), grid.lty=3, 
  axis.title="Untransformed Data", cex=1, las=par("las"))

prob.axis(at, side=c("right", "above", "left", "below"), grid=FALSE, grid.lty=3, 
  grid.col=gray(0.5), axis.title="Probability", interval=0.1, cex=1, las=par("las"))
}

\arguments{
  \item{power}{power for Box-Cox or power transformation.}
  \item{side}{side at which the axis is to be drawn; numeric
   codes are also permitted: \code{side = 1} for the bottom of the plot,
   \code{side=2} for the left side, 
   \code{side = 3} for the top, \code{side = 4} for the right side.}
  \item{at}{numeric vector giving location of tick marks on
    original scale; if missing, the function will try to pick
    nice locations for the ticks.}
  \item{grid}{if \code{TRUE} grid lines for the axis will be drawn.}
  \item{grid.col}{color of grid lines.}
  \item{grid.lty}{line type for grid lines.}
  \item{axis.title}{title for axis.}
  \item{cex}{relative character expansion for axis label.}
  \item{las}{if \code{0}, ticks labels are drawn parallel to the
    axis; set to \code{1} for horizontal labels (see \code{\link[base]{par}}).}
  \item{base}{base of log transformation for \code{power.axis}
    when \code{power = 0}.}
  \item{interval}{desired interval between tick marks on the probability
    scale.}
}

\details{
  The transformations corresponding to the three functions are as follows:
  \describe{
    \item{\code{power.axis}:}{ \eqn{x^{\prime }=x^{p}}{x' = x^p} for \eqn{p\neq 0}{p != 0}
      and \eqn{x^{\prime }=\log x}{x' = log x} for \eqn{p=0}{p = 0}.}
    \item{\code{box.cox.axis}:}{\eqn{x^{\prime }=(x^{\lambda }-1)/\lambda}{x' = (x^p - 1)/p} 
      for \eqn{\lambda \neq 0}{x != 0} and \eqn{x^{\prime }=\log x}{x' = log(x)} 
      for \eqn{\lambda =0}{p = 0}.}
    \item{\code{prob.axis}:}{logit \eqn{=\log [p/(1-p)]}{= log[p/(1 - p)]}.}
  }
  
  These functions will try to place tick marks at reasonable locations, but
  producing a good-looking graph sometimes requires some fiddling with the 
  \code{at} argument.
}
\value{
  These functions are used for their side effects: to draw axes.
}

\author{John Fox \email{jfox@mcmaster.ca}}

\seealso{ \code{\link{box.cox}}, \code{\link{logit}}}

\examples{
data(UN)
UN<-na.omit(UN)
attach(UN)
par(mar=c(5, 4, 4, 4)+.1)

plot(log(gdp, 10), log(infant.mortality, 10))
power.axis(0, base=10, side="above", 
  at=c(50,200,500,2000,5000,20000),grid=TRUE, axis.title="GDP per capita")
power.axis(0, base=10, side="right",
  at=c(5,10,20,50,100), grid=TRUE, axis.title="infant mortality rate per 1000")

plot(box.cox(gdp, 0), box.cox(infant.mortality, 0))
box.cox.axis(0, side="above", 
  grid=TRUE, axis.title="GDP per capita")
box.cox.axis(0, side="right",
  grid=TRUE, axis.title="infant mortality rate per 1000")

qq.plot(logit(infant.mortality/1000))
prob.axis()

qq.plot(logit(infant.mortality/1000))
prob.axis(c(.005, .01, .02, .04, .08, .16))
}
\keyword{aplot}

\eof
\name{UN}
\alias{UN}
\non_function{}
\title{GDP and Infant Mortality}
\description{
The \code{UN} data frame has 207 rows and 2 columns.
The data are for 1998 and are from the United Nations; the observations
are nations of the world.
There are some missing data.
}
\format{
This data frame contains the following columns:
\describe{
\item{infant.mortality}{
Infant morality rate, infant deaths per 1000 live births.
}
\item{gdp}{
GDP per capita, in US dollars.
}
}
}

\source{
 United Nations (1998) 
 Social indicators.  
 \url{http://www.un.org/Depts/unsd/social/main.htm}.
}
\usage{
data(UN)
}
\keyword{datasets}

\eof
\name{US.pop}
\alias{US.pop}
\non_function{}

\title{Population of the United States}

\description{
  The \code{US.pop} data frame has 21 rows and 1 columns.
  This is a decennial time-series, from 1790 to 1990.
}

\format{
  This data frame contains the following columns:
  \describe{
    \item{year}{
        census year.
        }
    \item{population}{
      Population in millions.
      }
    }
  }

\source{
 United States (1994)
 \emph{Statistical Abstract of the United States.}
 Bureau of the Census.
}

\references{
  Fox, J. (1997)
  \emph{Applied Regression, Linear Models, and Related Methods.} Sage.
}

\usage{
data(US.pop)
}

\keyword{datasets}

\eof
\name{Var}
\alias{Var}
\alias{Var.default}
\alias{Var.lm}
\alias{Var.glm}

\title{Variance-Covariance Matrices (deprecated)}
\description{
  Computes variance-covariance matrices or variances for model objects or data.
  The default method uses the function \code{var}. 
  
  These functions are now deprecated; instead, use
  the \code{vcov} function, now in the base package. Note that \code{vcov} has no \code{diagonal}
  argument and no default method.
}

\usage{
Var(object, ...)

\method{Var}{default}(object, diagonal=FALSE, ...)

\method{Var}{lm}(object, diagonal=FALSE, ...)

\method{Var}{glm}(object, diagonal=FALSE, ...)
}

\arguments{
  \item{object}{an object for which the covariance matrix is desired.}
  \item{\dots}{arguments to be passed to \code{var} (e.g., \code{na.rm}).}
  \item{diagonal}{if \code{TRUE}, return only the variances.}
}

\value{
  A variance-covariance matrix or a vector of variances.
  }

\author{John Fox \email{jfox@mcmaster.ca}}

\seealso{\code{\link[base]{var}}}

\examples{
data(Davis)
attach(Davis)
Var(cbind(weight, repwt), na.rm=TRUE)
##          weight    repwt
## weight 233.8781 176.1014
## repwt  176.1014 189.7966

Var(lm(weight~repwt))
##              (Intercept)        repwt
##  (Intercept)   9.2228211 -0.134640952
##  repwt        -0.1346410  0.002051736
}

\keyword{misc}

\eof
\name{Vocab}
\alias{Vocab}
\non_function{}

\title{Vocabulary and Education}

\description{
The \code{Vocab} data frame has 968 rows and 2 columns.
The observations are respondents to the 1989 U. S. General Social Survey.
}

\format{
  This data frame contains the following columns:
  \describe{
    \item{education}{
      Education, in years.
      }
    \item{vocabulary}{
      Vocabulary test score: number correct on a 10-word test.
      }
    }
  }

\source{
 National Opinion Research Center (1989)
 \emph{General Social Survey.}
 Distributed by the Inter-University Consortium for Political and Social
 Research.
}

\references{
  Fox, J. (1997)
  \emph{Applied Regression, Linear Models, and Related Methods.} Sage.
}


\usage{
data(Vocab)
}

\keyword{datasets}

\eof
\name{Womenlf}
\alias{Womenlf}
\non_function{}

\title{Canadian Women's Labour-Force Participation}

\description{
  The \code{Womenlf} data frame has 263 rows and 4 columns.
  The data are from a 1977 survey of the Canadian population.
}

\format{
  This data frame contains the following columns:
  \describe{
    \item{partic}{
      Labour-Force Participation.
      A factor with levels (note: out of order):
      \code{fulltime}, Working full-time;
      \code{not.work}, Not working outside the home;
      \code{parttime}, Working part-time.
      }
    \item{hincome}{
      Husband's income, \$1000s.
      }
    \item{children}{
      Presence of children in the household.
      A factor with levels:
      \code{absent}, 
      \code{present}.
      }
    \item{region}{
      A factor with levels:
      \code{Atlantic}, Atlantic Canada;
      \code{BC}, British Columbia;
      \code{Ontario}; 
      \code{Prairie}, Prairie provinces;
      \code{Quebec}.
      }
    }
  }

\source{
 \emph{Social Change in Canada Project.} York Institute for
 Social Research.
}

\references{
  Fox, J. (1997)
  \emph{Applied Regression, Linear Models, and Related Methods.} Sage.
}

\usage{
data(Womenlf)
}

\keyword{datasets}

\eof
\name{av.plots}
\alias{av.plots}
\alias{avp}
\alias{av.plot}
\alias{av.plot.lm}
\alias{av.plot.glm}

\title{Added-Variable Plots}
\description{
  These functions construct added-variable (also called 
  partial-regression) plots for  linear and generalized linear
  models.
}

\usage{
av.plots(model, variable, ask=missing(variable), one.page=!ask, ...)

avp(...)

av.plot(model, ...)

\method{av.plot}{lm}(model, variable, 
  labels=names(residuals(model)[!is.na(residuals(model))]), 
  identify.points=TRUE, las=par("las"), col=palette()[2], pch=1, lwd=2,
  main="Added-Variable Plot", ...)

\method{av.plot}{glm}(model, variable, 
    labels=names(residuals(model)[!is.na(residuals(model))]), 
    identify.points=TRUE, las=par("las"), col=palette()[2], pch=1, lwd=2, 
    main="Added-Variable Plot", type=c("Wang", "Weisberg"), ...)
}

\arguments{
  \item{model}{model object produced by \code{lm} or \code{glm}.}
  \item{variable}{variable (if it exists in the search path) or
    name of variable. This argument usually is omitted for \code{avp}
    or \code{av.plots}.}
  \item{ask}{if \code{TRUE}, a menu is provided in the R Console for the
    user to select the term(s) to plot.}
  \item{one.page}{if \code{TRUE} (and \code{ask=FALSE}), put all plots on one
    graph.}
  \item{labels}{observation names.}
  \item{identify.points}{if \code{TRUE}, then identify points interactively.}
  \item{las}{if \code{0}, ticks labels are drawn parallel to the
    axis; set to \code{1} for horizontal labels (see \code{\link[base]{par}}).}
  \item{col}{color for points and lines; the default is the \emph{second} entry
    in the current color palette (see \code{\link[base]{palette}}
    and \code{\link[base]{par}}).}
  \item{pch}{plotting character for points; default is \code{1} 
    (a circle, see \code{\link[base]{par}}).}
  \item{lwd}{line width; default is \code{2} (see \code{\link[base]{par}}).}
  \item{main}{title for plot.}
  \item{type}{if \code{"Wang"} use the method of Wang (1985);
    if \code{"Weisberg"} use the method in the Arc software associated with
    Cook and Weisberg (1999).}
  \item{\dots}{arguments to be passed down to \code{av.plot.lm} or
    \code{av.plot.glm}.}
}

\details{
  The function intended for direct use is \code{av.plots} (for which \code{avp}
  is an abbreviation). By default, these functions are used interactively
  through a text menu.
  
  The model can contain factors and interactions. An added-variable plot can be
  drawn for each column of the model matrix, including the constant.
}

\value{
  \code{NULL}. These functions are used for their side effect: producing
  plots.
}

\references{
  Cook, R. D. and Weisberg, S. (1999)
  \emph{Applied Regression, Including Computing and Graphics.} Wiley.

  Fox, J. (1997)
  \emph{Applied Regression, Linear Models, and Related Methods.} Sage.
  
  Wang, P C. (1985)
  Adding a variable in generalized linear models.
  \emph{Technometrics} \bold{27}, 273--276.
}

\author{John Fox \email{jfox@mcmaster.ca}}

\seealso{\code{\link{cr.plots}}, \code{\link{ceres.plots}}, \code{\link{leverage.plots}}}

\examples{
    \dontrun{
data(Duncan)
av.plots(lm(prestige~income+education+type, data=Duncan))

data(Womenlf)
av.plots(glm(partic != "not.work" ~ hincome + children, 
  data=Womenlf, family=binomial))
    }
}

\keyword{hplot}
\keyword{regression}

\eof
\name{box.cox}
\alias{box.cox}
\alias{bc}

\title{Box-Cox Family of Transformations}
\description{
  Compute the Box-Cox power transformation of a variable.
}
\usage{
box.cox(x, p, start=0)

bc(x, p)
}

\arguments{
  \item{x}{numeric vector to transform.}
  \item{p}{power (0 = log).}
  \item{start}{constant to be added to each value of \code{x} prior
    to transformation.}
}
\details{
  Computes \eqn{x^{\prime }=(x^p-1)/p}{x' = (x^p - 1)/p} 
  for \eqn{p \neq 0}{p != 0} and \eqn{x^{\prime }=\log x}{x' = log(x)} 
  for \eqn{p=0}{p = 0}.
  
  The values of \code{x} must all be positive; if not, a \code{start} should
  be added to each value to make all the values positive. The function will
  automatically compute the \code{start} and print a warning, if necessary. 
  
  \code{bc} is just an abbreviation for \code{box.cox}.
}
\value{
  a vector of transformed values.
}
\references{
  Atkinson, A. C. (1985)
  \emph{Plots, Transformations, and Regression} Oxford.
  
  Box, G. E. P. and Cox, D. R. (1964)
  An analysis of transformations.
  \emph{JRSS B} \bold{26}, 211--246.
  
  Fox, J. (1997)
  \emph{Applied Regression, Linear Models, and Related Methods.} Sage. 
}

\author{John Fox \email{jfox@mcmaster.ca}}

\section{Warning}{These functions \emph{do not} compute the maximum-likelihood estimate for a Box-Cox
normalizing transformation. See \code{\link{box.cox.powers}} for estimating unconditional
univariate and multivariate Box-Cox transformations, and \code{boxcox} in the
\code{MASS} package for estimating the Box-Cox transformation of the response in a
linear model.}

\seealso{\code{\link[MASS]{boxcox}}, \code{\link{box.cox.var}}, 
  \code{\link{box.cox.powers}}, \code{\link{box.cox.axis}}}

\examples{
box.cox(1:10, 2)
## [1]  0.0  1.5  4.0  7.5 12.0 17.5 24.0 31.5 40.0 49.5
box.cox(-5:5, 2)
##  [1]  0.0  1.5  4.0  7.5 12.0 17.5 24.0 31.5 40.0 49.5 60.0
## Warning message: 
## start =  6 added to data prior to transformation in: box.cox(-5:5, 2)
options(digits=4)
box.cox(-5:5, 0, start=6)
##  [1] 0.0000 0.6931 1.0986 1.3863 1.6094 1.7918 1.9459 2.0794 2.1972
## [10] 2.3026 2.3979
}

\keyword{manip}
\keyword{regression}

\eof
\name{box.cox.powers}
\alias{box.cox.powers}
\alias{print.box.cox.powers}
\alias{summary.box.cox.powers}

\title{Multivariate Unconditional Box-Cox Transformations}
\description{
  Estimates multivariate unconditional power transformations
  to multinormality
  by the method of maximum likelihood. The univariate case is
  obtained when only one variable is specified.
}
\usage{
box.cox.powers(X, start=NULL, hypotheses=NULL, ...)

\method{print}{box.cox.powers}(x, ...)

\method{summary}{box.cox.powers}(object, digits=4, ...)
}

\arguments{
  \item{X}{a numeric matrix of variables (or a vector for one variable)
    to be transformed.}
  \item{start}{start values for the power transformation parameters;
    if \code{NULL} (the default), univariate Box-Cox transformations will
    be computed and used as the start values.}
  \item{hypotheses}{if non-\code{NULL}, a list of hypotheses to be tested;
    each hypothesis should be a vector of values giving the power for each 
    column of \code{X}. Note that the hypotheses that all powers are 1 and
    that all powers are 0 (log) are always tested.}
  \item{...}{optional arguments to be passed to the \code{optim} function.}
  \item{digits}{number of places to round result.}
  \item{x, object}{\code{box.cox.powers} object.}
}
\details{
  Note that this is \emph{unconditional} Box-Cox. That is, there is
  no regression model, and there are no predictors. The object is to
  make the distribution of the variable(s) as (multi)normal as possible.
  For Box-Cox regression, see the \code{boxcox} function in the 
  \code{MASS} package.
  
  The function estimates the Box-Cox powers, 
  \eqn{x_{j}^{\prime }=(x_{j}^{\lambda _{j}}-1)/\lambda _{j}}{x' = (x^p - 1)/p} 
  for \eqn{\lambda _{j} \neq 0}{p != 0} and \eqn{x_{j}^{\prime }=\log x_{j}}{x' = log(x)} 
  for \eqn{\lambda _{j}=0}{p = 0}. Subsequently using ordinary power
  transformations (i.e., \eqn{x^p} for \eqn{p \neq 0}{p != 0}) 
  achieves the same result.
}
\value{
  returns an object of class \code{box.cox.powers}, which may be printed
  or summarized.
}

\references{
  Box, G. E. P. and Cox, D. R. (1964)
  An analysis of transformations.
  \emph{JRSS B} \bold{26} 211--246.
  
  Cook, R. D. and Weisberg, S. (1999)
  \emph{Applied Regression, Including Computing and Graphics.} Wiley.
}

\author{John Fox \email{jfox@mcmaster.ca}}

\seealso{\code{\link[MASS:boxcox]{boxcox}}, \code{\link{box.cox}}, \code{\link{box.cox.var}}, 
    \code{\link{box.cox.axis}}}

\examples{
data(Prestige)
attach(Prestige)
summary(box.cox.powers(cbind(income, education)))
## Box-Cox Transformations to Multinormality  
## 
##           Est.Power Std.Err. Wald(Power=0) Wald(Power=1) 
## income       0.2617   0.1014         2.580        -7.280 
## education    0.4242   0.4033         1.052        -1.428 
## 
## L.R. test, all powers = 0:  7.694   df = 2   p = 0.0213 
## L.R. test, all powers = 1:  48.8727   df = 2   p = 0  
plot(income, education)
plot(box.cox(income, .26), box.cox(education, .42))

summary(box.cox.powers(income))
## Box-Cox Transformation to Normality 
## 
##  Est.Power Std.Err. Wald(Power=0) Wald(Power=1)
##     0.1793   0.1108         1.618        -7.406
## 
## L.R. test, power = 0:  2.7103   df = 1   p = 0.0997
## L.R. test, power = 1:  47.261   df = 1   p = 0 
qq.plot(income)
qq.plot(income^.18)
}

\keyword{multivariate}
\keyword{models}

\eof
\name{box.cox.var}
\alias{box.cox.var}

\title{Constructed Variable for Box-Cox Transformation}
\description{
  Computes a constructed variable for the Box-Cox transformation of the
  response variable in a linear model.
}
\usage{
box.cox.var(y)
}

\arguments{
  \item{y}{response variable.}
}
\details{
  The constructed variable is defined as \eqn{y[\log (y/\widetilde{y})-1]}{y[log(y/y') -1]}, where 
  \eqn{\widetilde{y}}{y'} is the geometric mean of \code{y}. 
  
  The constructed variable is meant to be
  added to the right-hand-side of the linear model. The t-test for the
  coefficient of the constructed variable is an approximate score test for whether a
  transformation is required. 
  
  If \eqn{b} is the coefficient of the constructed variable,
  then an estimate of the normalizing power transformation based on the score statistic
  is \eqn{1-b}{1 - b}. An added-variable plot for the constructed
  variable shows leverage and influence on the decision to transform \code{y}. 
}
\value{
  a numeric vector of the same length as \code{y}.
}
\references{
  Atkinson, A. C. (1985)
  \emph{Plots, Transformations, and Regression}. Oxford.
  
  Box, G. E. P. and Cox, D. R. (1964)
  An analysis of transformations.
  \emph{JRSS B} \bold{26} 211--246.
  
  Fox, J. (1997)
  \emph{Applied Regression, Linear Models, and Related Methods.} Sage. 
}

\author{John Fox \email{jfox@mcmaster.ca}}

\seealso{\code{\link[MASS]{boxcox}}, \code{\link{box.cox}}, \code{\link{box.cox.powers}}, 
  \code{\link{box.cox.axis}}, \code{\link{av.plots}}}

\examples{
data(Ornstein)
mod<-lm(interlocks+1~assets, data=Ornstein)
mod.aux<-update(mod, .~.+box.cox.var(interlocks+1))
summary(mod.aux)
## Call:
## lm(formula = interlocks + 1 ~ assets + box.cox.var(interlocks + 
##     1), data = Ornstein)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -23.189  -6.701   0.541   6.773  12.051 
## 
## Coefficients:
##                               Estimate Std. Error t value Pr(>|t|)
## (Intercept)                  1.461e+01  5.426e-01  26.920   <2e-16
## assets                      -7.142e-05  5.119e-05  -1.395    0.164
## box.cox.var(interlocks + 1)  7.427e-01  4.136e-02  17.956   <2e-16
## 
## Residual standard error: 7.247 on 245 degrees of freedom
## Multiple R-Squared: 0.7986,     Adjusted R-squared: 0.797 
## F-statistic: 485.7 on 2 and 245 degrees of freedom,     p-value:     0 
av.plots(mod.aux, "box.cox.var(interlocks + 1)")
}
\keyword{manip}
\keyword{regression}

\eof
\name{box.tidwell}
\alias{box.tidwell}
\alias{box.tidwell.formula}
\alias{box.tidwell.default}
\alias{print.box.tidwell}

\title{Box-Tidwell Transformations}

\description{
  Computes the Box-Tidwell power transformations of the predictors in a
  linear model.
}

\usage{
box.tidwell(y, ...)

\method{box.tidwell}{formula}(formula, other.x=NULL, data=NULL, subset, 
  na.action=options()$na.action, verbose=FALSE, tol=0.001, 
  max.iter=25, ...)

\method{box.tidwell}{default}(y, x1, x2=NULL, max.iter=25, tol=0.001, 
  verbose=FALSE, ...)
  
\method{print}{box.tidwell}(x, digits, ...)
}

\arguments{
  \item{formula}{two-sided formula, the right-hand-side of which gives the
    predictors to be transformed.}
  \item{other.x}{one-sided formula giving the predictors that are \emph{not}
    candidates for transformation, including (e.g.) factors.}
  \item{data}{an optional data frame containing the variables in the model. 
    By default the variables are taken from the environment from which 
    \code{box.tidwell} is called.}
  \item{subset}{an optional vector specifying a subset of observations to be used.}
  \item{na.action}{a function that indicates what should happen when the data contain \code{NA}s. 
    The default is set by the \code{na.action} setting of \code{options}.}
  \item{verbose}{if \code{TRUE} a record of iterations is printed.}
  \item{tol}{if maximum relative change in coefficients is less than \code{tol} then
    convergence is declared.}
  \item{max.iter}{maximum number of iterations.}
  \item{y}{response variable.}
  \item{x1}{matrix of predictors to transform.}
  \item{x2}{matrix of predictors that are \emph{not} candidates for transformation.}
  \item{\dots}{not for the user.}
  \item{x}{\code{box.tidwell} object.}
  \item{digits}{number of digits for rounding.}
}

\details{
  The maximum-likelihood estimates of the transformation parameters are computed
  by Box and Tidwell's (1962) method, which is usually more efficient than using
  a general nonlinear least-squares routine for this problem. Score tests for the
  transformations are also reported.
}

\value{
  an object of class \code{box.tidwell}, which is normally just printed.
}

\references{ 
  Box, G. E. P. and Tidwell, P. W. (1962)
  Transformation of the independent variables.
  \emph{Technometrics} \bold{4}, 531-550.

  Fox, J. (1997)
  \emph{Applied Regression, Linear Models, and Related Methods.} Sage. 
}

\author{John Fox \email{jfox@mcmaster.ca}}

\examples{
data(Prestige)
box.tidwell(prestige~income+education, ~ poly(women,2), data=Prestige)
##                   income education
## Initial Power   -0.91030   2.24354
## Score Statistic -5.30129   2.40556
## p-value          0.00000   0.01615
## MLE of Power    -0.03777   2.19283
}

\keyword{regression}

\eof
\name{car-internal}
\alias{inv}
\alias{has.intercept}
\alias{has.intercept.default}
\alias{term.names}
\alias{term.names.default}
\alias{predictor.names}
\alias{predictor.names.default}
\alias{responseName}
\alias{responseName.default}
\alias{response}
\alias{response.default}
\alias{is.aliased}
\alias{df.terms}
\alias{df.terms.default}
\alias{mfrow}
\alias{nice}
\alias{relatives}

\title{Internal car functions}

\description{
Internal functions for package car.
}

\usage{
df.terms(model, term, ...)
\method{df.terms}{default}(model, term, ...)
has.intercept(model, ...)
\method{has.intercept}{default}(model, ...)
inv(x)
is.aliased(model)
mfrow(n, max.plots=0)
nice(x, direction=c("round", "down", "up"))
predictor.names(model, ...)
\method{predictor.names}{default}(model, ...)
relatives(term, names, factors)
responseName(model, ...) 
\method{responseName}{default}(model, ...)
response(model, ...)
\method{response}{default}(model, ...)
term.names(model, ...)
\method{term.names}{default}(model, ...)
}

\arguments{
  \item{model}{}
  \item{term}{}
  \item{...}{}
  \item{x}{}
  \item{n}{}
  \item{max.plots}{}
  \item{direction}{}
  \item{names}{}
  \item{factors}{}
}

\details{
These functions are not intended to be called by the user.
}

\author{John Fox \email{jfox@mcmaster.ca}}

\keyword{misc}

\eof
\name{ceres.plots}
\alias{ceres.plots}
\alias{ceres.plot}
\alias{ceres.plot.lm}
\alias{ceres.plot.glm}

\title{Ceres Plots}
\description{
  These functions calculate Ceres plots for linear and generalized
  linear model.
}
\usage{
ceres.plots(model, variable, ask=missing(variable), one.page=!ask, 
  span=0.5, ...)

ceres.plot(model, ...)

\method{ceres.plot}{lm}(model, variable, line=TRUE, smooth=TRUE, span=0.5, iter,
  las=par('las'), col=palette()[2], pch=1, lwd=2, main="Ceres Plot", ...)

\method{ceres.plot}{glm}(model, ...)
}

\arguments{
  \item{model}{model object produced by \code{lm} or \code{glm}.}
  \item{variable}{variable (if it exists in the search path) or
    name of variable. This argument usually is omitted for
    \code{ceres.plots}.}
  \item{ask}{if \code{TRUE}, a menu is provided in the R Console for the
    user to select the variable(s) to plot, and to modify the span for the smoother
    used to draw a nonparametric-regression line on the plot.}
  \item{one.page}{if \code{TRUE} (and \code{ask=FALSE}), put all plots on one
    graph.}
  \item{span}{span for lowess smoother.}
  \item{iter}{number of robustness iterations for nonparametric-regression smooth;
    defaults to 3 for a linear model and to 0 for a non-Gaussian glm.}
  \item{line}{\code{TRUE} to plot least-squares line.}
  \item{smooth}{\code{TRUE} to plot nonparametric-regression (lowess) line.}
  \item{las}{if \code{0}, ticks labels are drawn parallel to the
    axis; set to \code{1} for horizontal labels (see \code{\link[base]{par}}).}
  \item{col}{color for points and lines; the default is the \emph{second} entry
    in the current color palette (see \code{\link[base]{palette}}
    and \code{\link[base]{par}}).}
  \item{pch}{plotting character for points; default is \code{1} 
    (a circle, see \code{\link[base]{par}}).}
  \item{lwd}{line width; default is \code{2} (see \code{\link[base]{par}}).}
  \item{main}{title for plot.}
  \item{\dots}{pass arguments down.}
}
\details{
  Ceres plots are a generalization of component+residual (partial
  residual) plots that are less prone to leakage of nonlinearity
  among the predictors.
  
  The function intended for direct use is \code{ceres.plots}. 
  By default, this function is used interactively
  through a text menu.
  
  The model cannot contain interactions, but can contain factors. 
  Factors may be present in the model, but Ceres plots cannot be drawn
  for them.
}
\value{
  \code{NULL}. These functions are used for their side effect: producing
  plots.
}
\references{ 
  Cook, R. D. and Weisberg, S. (1999)
  \emph{Applied Regression, Including Computing and Graphics.} Wiley.

  Fox, J. (1997)
  \emph{Applied Regression, Linear Models, and Related Methods.} Sage.
}
\author{John Fox \email{jfox@mcmaster.ca}}

\seealso{\code{\link{cr.plots}}, \code{\link{av.plots}}}

\examples{
  \dontrun{
data(Prestige)
ceres.plots(lm(prestige~income+education+type, data=Prestige))
  }
}

\keyword{hplot}
\keyword{regression}

\eof
\name{Cook's Distances}

\alias{cookd}

\title{Cook's Distances for Linear and Generalized Linear Models}
\description{
  This function now simply calls \code{cooks.distance} in the \code{base} package.
}

\usage{
cookd(model, ...)
}

\arguments{
  \item{model}{\code{lm} or \code{glm} model object.}
  \item{...}{other arguments to be passed to \code{cooks.distance}.}
}

\details{
  Cook's distances for generalized linear
  models are approximations, as described in Williams (1987) (except that the Cook's distances are
  scaled as \emph{F} rather than as chi-square values).
  
  This function is retained primarily for consistency with \emph{An R and S-PLUS Companion
  to Applied Regression.} Other deletion diagnostics formerly in the \code{car} package have
  been rewritten and moved to the \code{base} package; these include \code{influence}, 
  \code{rstudent}, \code{hatvalues}, \code{dfbeta}, and \code{dfbetas}.
}

\value{
 \code{cookd} returns a vector with one entry for each observation.
}

\references{
  Cook, R. D. and Weisberg, S. (1984)
  \emph{Residuals and Influence in Regression.} Wiley.
  
  Fox, J. (1997)
  \emph{Applied Regression, Linear Models, and Related Methods.} Sage.
  
  Williams, D. A. (1987)
  Generalized linear model diagnostics using the deviance and single
  case deletions. \emph{Applied Statistics} \bold{36}, 181--191.
}

\author{John Fox \email{jfox@mcmaster.ca}}

\seealso{\code{\link[base:cooks.distance]{cooks.distance}}, \code{\link[base:influence]{influence}}}

\examples{
data(Duncan)
attach(Duncan)
mod <- lm(prestige ~ income + education)
plot(cookd(mod))
}

\keyword{regression}
\keyword{models}

\eof
\name{cr.plots}
\alias{cr.plots}
\alias{crp}
\alias{cr.plot}
\alias{cr.plot.lm}
\alias{cr.plot.glm}

\title{Component+Residual (Partial Residual) Plots}
\description{
  These functions construct component+residual plots (also called
  partial-residual plots) for linear and generalized linear models.
}
\usage{
cr.plots(model, variable, ask=missing(variable), one.page=!ask, 
  span=0.5, ...)

crp(...)

cr.plot(model, ...)

\method{cr.plot}{lm}(model, variable, order=1, line=TRUE, smooth=TRUE, iter, 
  span=0.5, las=par('las'), col=palette()[2], pch=1, lwd=2,
  main="Component+Residual Plot", ...)

\method{cr.plot}{glm}(model, ...)
}

\arguments{
  \item{model}{model object produced by \code{lm} or \code{glm}.}
  \item{variable}{variable (if it exists in the search path) or
    name of variable. This argument usually is omitted for \code{crp}
    or \code{cr.plots}.}
  \item{ask}{if \code{TRUE}, a menu is provided in the R Console for the
    user to select the variable(s) to plot, and to modify the span for the smoother
    used to draw a nonparametric-regression line on the plot.}
  \item{one.page}{if \code{TRUE} (and \code{ask=FALSE}), put all plots on one
    graph.}
  \item{order}{order of polynomial regression performed for predictor to be plotted.}
  \item{line}{\code{TRUE} to plot least-squares line.}
  \item{smooth}{\code{TRUE} to plot nonparametric-regression (lowess) line.}
  \item{iter}{number of robustness iterations for nonparametric-regression smooth;
    defaults to 3 for a linear model and to 0 for a non-Gaussian glm.}
  \item{span}{span for lowess smoother.}
  \item{las}{if \code{0}, ticks labels are drawn parallel to the
    axis; set to \code{1} for horizontal labels (see \code{\link[base]{par}}).}
  \item{col}{color for points and lines; the default is the \emph{second} entry
    in the current color palette (see \code{\link[base]{palette}}
    and \code{\link[base]{par}}).}
  \item{pch}{plotting character for points; default is \code{1} 
    (a circle, see \code{\link[base]{par}}).}
  \item{lwd}{line width; default is \code{2} (see \code{\link[base]{par}}).}
  \item{main}{title for plot.}
  \item{\dots}{pass arguments down.}
}
\details{
  The function intended for direct use is \code{cr.plots} (for which \code{crp}
  is an abbreviation). By default, these functions are used interactively
  through a text menu.
  
  The model cannot contain interactions, but can contain factors. 
  Parallel boxplots of the partial residuals are drawn for the levels
  of a factor.
}
\value{
  \code{NULL}. These functions are used for their side effect: producing
  plots.
}
\references{ 
  Cook, R. D. and Weisberg, S. (1999)
  \emph{Applied Regression, Including Computing and Graphics.} Wiley.

  Fox, J. (1997)
  \emph{Applied Regression, Linear Models, and Related Methods.} Sage.
}
\author{John Fox \email{jfox@mcmaster.ca}}

\seealso{\code{\link{ceres.plots}}, \code{\link{av.plots}}}

\examples{
  \dontrun{
data(Womenlf)
cr.plots(glm(partic != "not.work" ~ hincome + children, 
  data=Womenlf, family=binomial))

data(Prestige)
cr.plots(lm(prestige~income+education, data=Prestige), 
  variable="income")
  }
}

\keyword{hplot}
\keyword{regression}

\eof
\name{durbin.watson}
\alias{durbin.watson}
\alias{durbin.watson.lm}
\alias{durbin.watson.default}
\alias{print.durbin.watson}

\title{Durbin-Watson Test for Autocorrelated Errors}
\description{
  Computes residual autocorrelations and generalized Durbin-Watson
  statistics and their bootstrapped p-values.
}
\usage{
durbin.watson(model, ...)

\method{durbin.watson}{lm}(model, max.lag=1, simulate=TRUE, reps=1000,
    method=c("resample","normal"),
    alternative=c("two.sided", "positive", "negative"), ...)

\method{durbin.watson}{default}(model, max.lag=1, ...)

\method{print}{durbin.watson}(x, ...)
}

\arguments{
  \item{model}{a linear-model object, or a vector of residuals from a linear model.}
  \item{max.lag}{maximum lag to which to compute residual autocorrelations
    and Durbin-Watson statistics.}
  \item{simulate}{if \code{TRUE} p-values will be estimated by bootstrapping.}
  \item{reps}{number of bootstrap replications.}
  \item{method}{bootstrap method: \code{"resample"} to resample from the observed
    residuals; \code{"normal"} to sample normally distributed errors with 0 mean
    and standard deviation equal to the standard error of the regression.}
  \item{alternative}{sign of autocorrelation in alternative hypothesis; specify
    only if \code{max.lag = 1}; if \code{max.lag > 1}, then \code{alternative} is
    taken to be \code{"two.sided"}.}
  \item{\dots}{arguments to be passed down to method functions.}
  \item{x}{\code{durbin.watson} object.}
}

\value{
    Returns an object of type \code{"durbin.watson"}.
}

\references{ 
  Fox, J. (1997)
  \emph{Applied Regression, Linear Models, and Related Methods.} Sage.
}

\author{John Fox \email{jfox@mcmaster.ca}}

\examples{
data(Hartnagel)
durbin.watson(lm(fconvict ~ tfr + partic + degrees + mconvict, data=Hartnagel))
##  lag Autocorrelation D-W Statistic p-value 
##    1        0.688345     0.6168636       0 
##  Alternative hypothesis: rho != 0
}

\keyword{regression}
\keyword{ts}

\eof
\name{hccm}
\alias{hccm}
\alias{hccm.lm}
\alias{hccm.default}

\title{Heteroscedasticity-Corrected Covariance Matrices}
\description{
  Calculates heteroscedasticity-corrected covariance matrices for
  unweighted linear models. These are also called ``White-corrected''
  covariance matrices.
}
\usage{
hccm(model, ...)

\method{hccm}{lm}(model, type=c("hc3", "hc0", "hc1", "hc2", "hc4"), ...)

\method{hccm}{default}(model, ...)
}

\arguments{
  \item{model}{an unweighted linear model, produced by \code{lm}.}
  \item{type}{one of \code{"hc0"}, \code{"hc1"}, \code{"hc2"}, \code{"hc3"}, or \code{"hc4"}; the
    first of these gives the classic White correction. The \code{"hc1"}, \code{"hc2"}, and \code{"hc3"}
    corrections are described in Long and Ervin (2000); \code{"hc4"} is described in Cribari-Neto (in press).}
  \item{...}{arguments to pass to \code{hccm.lm}.}
}

\details{
  The classical White-corrected coefficient covariance matrix (\code{"hc0"}) is
  \deqn{V(b)=(X^{\prime }X)^{-1}X^{\prime }diag(e_{i}^{2})X(X^{\prime }X)^{-1}}{V(b) = inv(X'X) X' diag(e^2) X inv(X'X)}
  where \eqn{e_{i}^{2}}{e^2} are the squared residuals, and \eqn{X} is the model
  matrix. The other methods represent adjustments to this formula.
  
  The function \code{hccm.default} simply catches non-\code{lm} objects.
}
\value{
  The heteroscedasticity-corrected covariance matrix for the model.
}
\references{
  Cribari-Neto, F. (in press)
  Asymptotic inference under heteroskedasticity of unknown form.
  \emph{Computational Statistics and Data Analysis}.
  
  Long, J. S. and Ervin, L. H. (2000) 
  Using heteroscedasity consistent standard errors in the linear regression model. 
  \emph{The American Statistician} \bold{54}, 217--224.
  
  White, H. (1980)
  A heterskedastic consistent covariance matrix estimator and a direct test of heteroskedasticity.
  \emph{Econometrica} \bold{48}, 817--838.
  }
\author{John Fox \email{jfox@mcmaster.ca}}

\seealso{\code{\link{ncv.test}}, \code{\link{spread.level.plot}} }

\examples{
options(digits=4)
data(Ornstein)
mod<-lm(interlocks~assets+nation, data=Ornstein)
Var(mod)
##             (Intercept)     assets  nationOTH   nationUK   nationUS
## (Intercept)   1.079e+00 -1.588e-05 -1.037e+00 -1.057e+00 -1.032e+00
## assets       -1.588e-05  1.642e-09  1.155e-05  1.362e-05  1.109e-05
## nationOTH    -1.037e+00  1.155e-05  7.019e+00  1.021e+00  1.003e+00
## nationUK     -1.057e+00  1.362e-05  1.021e+00  7.405e+00  1.017e+00
## nationUS     -1.032e+00  1.109e-05  1.003e+00  1.017e+00  2.128e+00
hccm(mod)             
##             (Intercept)     assets  nationOTH   nationUK   nationUS
## (Intercept)   1.664e+00 -3.957e-05 -1.569e+00 -1.611e+00 -1.572e+00
## assets       -3.957e-05  6.752e-09  2.275e-05  3.051e-05  2.231e-05
## nationOTH    -1.569e+00  2.275e-05  8.209e+00  1.539e+00  1.520e+00
## nationUK     -1.611e+00  3.051e-05  1.539e+00  4.476e+00  1.543e+00
## nationUS     -1.572e+00  2.231e-05  1.520e+00  1.543e+00  1.946e+00
}

\keyword{regression}

\eof
\name{leverage.plots}
\alias{leverage.plots}
\alias{leverage.plot}
\alias{leverage.plot.lm}
\alias{leverage.plot.glm}

\title{Regression Leverage Plots}
\description{
  These functions display a generalization, due to Sall (1990), 
  of added-variable plots to multiple-df terms in a linear model. 
  When a term has just 1 df, the leverage plot is a rescaled version
  of the usual added-variable (partial-regression) plot.
}
\usage{
leverage.plots(model, term.name, ask=missing(term.name), ...)

leverage.plot(model, ...)

\method{leverage.plot}{lm}(model, term.name, 
  labels=names(residuals(model)[!is.na(residuals(model))]),  
  identify.points=TRUE, las=par('las'), col=palette()[2], pch=1, lwd=2, 
  main="Leverage Plot", ...)

\method{leverage.plot}{glm}(model, ...)
}

\arguments{
  \item{model}{model object produced by \code{lm}}.
  \item{term.name}{name of term in the model to be plotted; this argument is usually
    omitted for \code{leverage.plots}.}
  \item{ask}{if \code{TRUE}, a menu is provided in the R Console for the
    user to select the term(s) to plot.}
  \item{labels}{observation names.}
  \item{identify.points}{if \code{TRUE}, then identify points interactively.}
  \item{las}{if \code{0}, ticks labels are drawn parallel to the
    axis; set to \code{1} for horizontal labels (see \code{\link[base]{par}}).}
  \item{col}{color for points and lines; the default is the \emph{second} entry
    in the current color palette (see \code{\link[base]{palette}}
    and \code{\link[base]{par}}).}
  \item{pch}{plotting character for points; default is \code{1} 
    (a circle, see \code{\link[base]{par}}).}
  \item{lwd}{line width; default is \code{2} (see \code{\link[base]{par}}).}
  \item{main}{title for plot.}
  \item{\dots}{arguments passed down to method functions.}
}
\details{
  The function intended for direct use is \code{leverage.plots}. By default, this
  function is used interactively through a text menu.
  
  The model can contain factors and interactions. A leverage plot can be
  drawn for each term in the model, including the constant.
  
  \code{leverage.plot.glm} is a dummy function, which generates an error message.
}

\value{
  \code{NULL}. These functions are used for their side effect: producing
  plots.
}

\references{
  Fox, J. (1997)
  \emph{Applied Regression, Linear Models, and Related Methods.} Sage.
  
  Sall, J. (1990)
  Leverage plots for general linear hypotheses.
  \emph{American Statistician} \bold{44}, 308--315.
}

\author{John Fox \email{jfox@mcmaster.ca}}

\seealso{\code{\link{av.plots}}}

\examples{
  \dontrun{
data(Duncan)
leverage.plots(lm(prestige~(income+education)*type, data=Duncan))
  }
}

\keyword{hplot}
\keyword{regression}

\eof
\name{linear.hypothesis}
\alias{linear.hypothesis}
\alias{lht}
\alias{linear.hypothesis.lm}
\alias{linear.hypothesis.glm}
\alias{print.F.test}
\alias{print.chisq.test}

\title{Test Linear Hypothesis}
\description{
Test a linear hypothesis for a linear or generalized linear model.
}
\usage{
linear.hypothesis(model, ...)

lht(...)

\method{linear.hypothesis}{lm}(model, hypothesis.matrix, rhs=0, 
  summary.model=summary(model, corr = FALSE), 
  white.adjust=FALSE, error.SS, error.df, ...)

\method{linear.hypothesis}{glm}(model, hypothesis.matrix, rhs=0, 
  summary.model=summary(model, corr = FALSE), ...)
  
\method{print}{chisq.test}(x, ...)

\method{print}{F.test}(x, ...)
}
\arguments{
  \item{model}{model object produced by \code{lm} or \code{glm}.}
  \item{hypothesis.matrix}{matrix (or vector) giving linear combinations
    of coefficients by rows.}
  \item{rhs}{right-hand-side vector for hypothesis, with as many entries as
    rows in \code{hypothesis.matrix}.}.
  \item{summary.model}{a \code{summary} object for the model; usually specified
    only when \code{linear.hypothesis} is called from another function that has
    already computed the summary.}
  \item{white.adjust}{if \code{TRUE} use heteroscedasticity-corrected covariance matrix.}
  \item{error.SS}{error sum of squares for the hypothesis; if not specified, will be
    taken from \code{model}.}
  \item{error.df}{error degrees of freedom for the hypothesis; if not specified,
    will be taken from \code{model}.}
  \item{x}{\code{chisq.test} or \code{F.test} object.}
  \item{...}{aruments to pass down.}
}
\details{
  Computes an F-test for the hypothesis in a linear model, or a Wald test in a
  generalized linear model.
}
\value{
  Returns an \code{F.test} or \code{chisq.test} object, with components:
  \item{SSH}{sum of squares for hypothesis (for a linear model).}
  \item{SSE}{error sum of squares (for a linear model).}
  \item{f}{F-statistic for the hypothesis (for a linear model.)}
  \item{Df}{degrees of freedom for F or chisquare.}
  \item{p}{p-value for the hypothesis.}
  \item{ChiSquare}{chisquare statistic for the hypothesis (for a generalized linear model).}
}

\references{
  Fox, J. (1997)
  \emph{Applied Regression, Linear Models, and Related Methods.} Sage.
}
\author{John Fox \email{jfox@mcmaster.ca}}

\seealso{\code{\link[base]{anova}}, \code{\link{Anova}}, \code{\link{hccm}}}

\examples{
data(Davis)
mod<-lm(weight~repwt, data=Davis)
linear.hypothesis(mod, diag(2), c(0,1))
## F-Test 
## SS = 245.9738     SSE = 12828.03     F = 1.735312  Df = 2 and 181     p = 0.179266 
}

\keyword{htest}
\keyword{models}
\keyword{regression}

\eof
\name{logit}
\alias{logit}

\title{Logit Transformation}
\description{
  Compute the logit transformation of proportions or percentages.
}
\usage{
logit(p, percents=max(p, na.rm = TRUE) > 1, adjust)
}

\arguments{
  \item{p}{numeric vector or array of proportions or percentages.}
  \item{percents}{\code{TRUE} for percentages.}
  \item{adjust}{adjustment factor to avoid proportions of 0 or 1;
    defaults to \code{0} if there are no such proportions in the data,
    and to \code{.025} if there are.}
}
\details{
  Computes the logit transformation logit \eqn{=\log [p/(1-p)]}{= log[p/(1 - p)]}
  for the proportion \eqn{p}. 
  
  If \eqn{p=0}{p = 0} or \eqn{1}, then the logit
  is undefined. \code{logit} can remap the proportions to the interval 
  \code{(adjust, 1 - adjust)} prior to the transformation. If it adjusts the data
  automatically, \code{logit} will print a warning message.
}
\value{
  a numeric vector or array of the same shape and size as \code{p}.
}

\author{John Fox \email{jfox@mcmaster.ca}}

\seealso{\code{\link{prob.axis}}}

\examples{
options(digits=4)
logit(.1*0:10)
##  [1] -3.6636 -1.9924 -1.2950 -0.8001 -0.3847  0.0000  0.3847
##  [8]  0.8001  1.2950  1.9924  3.6636
## Warning message: 
## Proportions remapped to (0.025,0.975) in: logit(0.1 * 0:10) 

logit(.1*0:10, adjust=0)
##  [1]    -Inf -2.1972 -1.3863 -0.8473 -0.4055  0.0000  0.4055
##  [8]  0.8473  1.3863  2.1972     Inf
}

\keyword{manip}

\eof
\name{n.bins}
\alias{n.bins}

\title{Number of Bins for Histogram}
\description{
  Several rules for calculating the number of bins to use for
  a histogram.
}
\usage{
n.bins(x, rule=c("freedman.diaconis", "sturges", "scott", "simple"))
}

\arguments{
  \item{x}{numeric vector, variable for histogram}
  \item{rule}{see below.}
}
\details{
  \describe{
    \item{\code{"freedman.diaconis"}:}{\eqn{n^{1/3}range/2\times IQR}{(n^1/3 * range)/(2 * IQR)}.}
    \item{\code{"sturges"}:}{\eqn{ceiling(\log _{2}n+1)}{ceiling(log2n + 1)}.}
    \item{\code{"scott"}:}{\eqn{ceiling(n^{1/3}range/3.5s)}{ceiling[n^1/3 * range/(3.5 * s)]}.}
    \item{\code{"simple"}:}{\eqn{floor(10\log _{10}n)}{floor[10 * log10(n)]} for \eqn{n>100}{n > 100},
     or \eqn{floor(2\sqrt{n})}{floor[2/sqrt(n)]} for \eqn{n\leq 100}{n <= 100}.}
    }
  where \eqn{n} is the number of observations, \eqn{range} is the range of \code{x},
  \eqn{IQR} is the inter-quartile range of \code{x}, and \eqn{s} is the standard
  deviation of \code{x}.  
}

\value{
  the number of bins.
}

\references{
  Freedman, D. and Diaconis, P. (1981)
  On the histogram as a density estimator.
  \emph{Zeitschrift fur Wahrscheinlichkeitstheorie und verwandte Gebiete}
  \bold{57}, 453--476.
  
  Scott, D. W. (1979)
  On optimal and data based-histograms.
  \emph{Biometrika} \bold{66}, 605--610.
  
  Venables, W. N. and Ripley, B. D. (1999)
  \emph{Modern Applied Statistics with S-PLUS, Third Edition}, Springer.
}

\author{John Fox \email{jfox@mcmaster.ca}}

\seealso{\code{\link[base]{hist}}}

\examples{
data(UN)
attach(UN)
n.bins(gdp)
## [1] 19
n.bins(gdp, "sturges")
## [1] 9
n.bins(gdp, "scott")
## [1] 8
n.bins(gdp, "simple")
## [1] 22
hist(gdp)
hist(gdp, nclass=n.bins(gdp))
}

\keyword{dplot}

\eof
\name{ncv.test}
\alias{ncv.test}
\alias{ncv.test.lm}
\alias{ncv.test.glm}

\title{Score Test for Non-Constant Error Variance}
\description{
  Computes a score test of the hypothesis of constant error variance
  against the alternative that the error variance changes with the
  level of the response (fitted values), or with a linear combination
  of predictors.
}
\usage{
ncv.test(model, ...)

\method{ncv.test}{lm}(model, var.formula, data=NULL, subset, na.action, ...)

\method{ncv.test}{glm}(model, ...)
}

\arguments{
  \item{model}{an unweighted linear model, produced by \code{lm}.}
  \item{var.formula}{a one-sided formula for the error variance; if omitted,
    the error variance depends on the fitted values.}
  \item{data}{an optional data frame containing the variables in the model. 
    By default the variables are taken from the environment from which 
    \code{ncv.test} is called.}
  \item{subset}{an optional vector specifying a subset of observations to be used.}
  \item{na.action}{a function that indicates what should happen when the data contain \code{NA}s. 
    The default is set by the \code{na.action} setting of \code{options}.}
  \item{\dots}{arguments passed down to methods functions.}
}
\details{
  This test is often called the Breusch-Pagan test; it was independently
  suggested by Cook and Weisberg (1983).
  
  \code{ncv.test.glm} is a dummy function to generate an error when a \code{glm}
  model is used.
}
\value{
  The function returns a \code{chisq.test} object, which is usually just printed.
}
\references{
  Breusch, T. S. and Pagan, A. R. (1979)
  A simple test for heteroscedasticity and random coefficient variation.
  \emph{Econometrica} \bold{47}, 1287--1294.
  
  Cook, R. D. and Weisberg, S. (1983)
  Diagnostics for heteroscedasticity in regression.
  \emph{Biometrika} \bold{70}, 1--10.
  
  Fox, J. (1997)
  \emph{Applied Regression, Linear Models, and Related Methods.} Sage. 
}

\author{John Fox \email{jfox@mcmaster.ca}}

\seealso{\code{\link{hccm}}, \code{\link{spread.level.plot}} }

\examples{
data(Ornstein)
mod<-lm(interlocks~assets+sector+nation, data=Ornstein)
ncv.test(mod)
## Non-constant Variance Score Test 
## Variance formula: ~ fitted.values 
## Chisquare = 46.98537    Df = 1     p = 7.151835e-12 
ncv.test(mod, ~ assets+sector+nation, data=Ornstein)
## Non-constant Variance Score Test 
## Variance formula: ~ assets + sector + nation 
## Chisquare = 74.73535    Df = 13     p = 1.066320e-10 
}

\keyword{htest}
\keyword{regression}

\eof
\name{outlier.test}
\alias{outlier.test}
\alias{outlier.test.lm}
\alias{outlier.test.glm}
\alias{print.outlier.test}

\title{Bonferroni Outlier Test}

\description{
  Reports the Bonferroni p-value for the most extreme observation.
  At present, there are methods for studentized residuals in
  linear and generalized linear models.
}

\usage{
outlier.test(model, ...)

\method{outlier.test}{lm}(model, labels=names(rstud), ...)

\method{outlier.test}{glm}(model, labels=names(rstud), ...)

\method{print}{outlier.test}(x, digits=options("digits")[[1]], ...)
}

\arguments{
  \item{model}{a suitable model object.}
  \item{labels}{an optional vector of observation names.}
  \item{...}{arguments passed down to methods functions.}
  \item{x}{\code{outlier.test} object.}
  \item{digits}{number of digits for printed output.}
}

\details{
  For a linear model, the p-value reported is for the largest absolute
  studentized residual, using the \eqn{t} distribution with degrees of
  freedom one less than the residual df for the model. For a generalized
  linear model, the largest absolute studentized residual is also used,
  but with the standard-normal distribution. The Bonferroni
  adjustment multiplies the usual two-sided p-value by the number of 
  observations.
}

\value{
  an object of class \code{outlier.test}, which is normally just
  printed.
}

\references{
  Cook, R. D. and Weisberg, S. (1984)
  \emph{Residuals and Influence in Regression.} Wiley.
  
  Fox, J. (1997)
  \emph{Applied Regression, Linear Models, and Related Methods.} Sage.
  
  Williams, D. A. (1987)
  Generalized linear model diagnostics using the deviance and single
  case deletions. \emph{Applied Statistics} \bold{36}, 181--191.
}

\author{John Fox \email{jfox@mcmaster.ca}}

\examples{
data(Duncan)
outlier.test(lm(prestige~income+education, data=Duncan))
##  max|rstudent| df unadjusted p Bonferroni p
##       3.134519 41  0.003177202    0.1429741
## 
## Observation: minister 
}

\keyword{regression}
\keyword{htest}

\eof
\name{panel.car}
\alias{panel.car}

\title{Panel Function Coplots}
\description{
  a panel function for use with \code{coplot} that plots points, a lowess line,
  and a regression line.
}
\usage{
panel.car(x, y, col, pch, cex=1, span=0.5, lwd=2, 
  regression.line=lm, lowess.line=TRUE, ...)
}

\arguments{
  \item{x}{vector giving horizontal coordinates.}
  \item{y}{vector giving vertical coordinates.}
  \item{col}{point color.}
  \item{pch}{plotting character for points.}
  \item{cex}{character expansion factor for points.}
  \item{span}{span for lowess smoother.}
  \item{lwd}{line width, default is \code{2}.}
  \item{regression.line}{function to compute coefficients of regression line,
    or \code{FALSE} for no line.}
  \item{lowess.line}{if \code{TRUE} plot lowess smooth.}
  \item{\dots}{other arguments to pass to functions \code{lines}
    and \code{reg.line}.}
}

}
\value{
  \code{NULL}. This function is used for its side effect: producing
  a panel in a coplot.
}

\author{John Fox \email{jfox@mcmaster.ca}}

\seealso{\code{\link[base]{coplot}}, \code{\link{reg.line}}}

\examples{
data(Prestige)
coplot(prestige~income|education, panel=panel.car, 
  col="red", data=Prestige)
}

\keyword{aplot}

\eof
\name{qq.plot}
\alias{qq.plot}
\alias{qqp}
\alias{qq.plot.default}
\alias{qq.plot.lm}
\alias{qq.plot.glm}

\title{Quantile-Comparison Plots}
\description{
 Plots empirical quantiles of a variable, or of studentized residuals from
 a linear model, against theoretical quantiles of a comparison distribution.
}
\usage{
qq.plot(x, ...)

qqp(...)

\method{qq.plot}{default}(x, distribution="norm", 
  ylab=deparse(substitute(x)), 
  xlab=paste(distribution, "quantiles"), main="", las=par('las'),
  envelope=0.95, labels=FALSE, col=palette()[2], lwd=2, pch=1,
  line=c("quartiles", "robust", "none"), ...)

\method{qq.plot}{lm}(x, main="", xlab=paste(distribution, "Quantiles"), 
  ylab=paste("Studentized Residuals(", deparse(substitute(x)), ")",
    sep = ""), 
  distribution=c("t", "norm"), line=c("quartiles", "robust", "none"), 
  las=par('las'), simulate=FALSE, envelope=0.95, labels=names(rstudent), 
  reps=100, col=palette()[2], lwd=2, pch=1, ...)
}
\arguments{
  \item{x}{vector of numeric values or \code{lm} object.}
  \item{distribution}{root name of comparison distribution -- e.g., \code{norm} for the
    normal distribution; \code{t} for the t-distribution.}
  \item{ylab}{label for vertical (empirical quantiles) axis.}
  \item{xlab}{label for horizontal (comparison quantiles) axis.}
  \item{main}{label for plot.}
  \item{envelope}{confidence level for point-wise confidence envelope, or 
    \code{FALSE} for no envelope.}
  \item{labels}{vector of point labels for interactive point identification,
    or \code{FALSE} for no labels.}
  \item{las}{if \code{0}, ticks labels are drawn parallel to the
    axis; set to \code{1} for horizontal labels (see \code{\link[base]{par}}).}
  \item{col}{color for points and lines; the default is the \emph{second} entry
    in the current color palette (see \code{\link[base]{palette}}
    and \code{\link[base]{par}}).}
  \item{pch}{plotting character for points; default is \code{1} 
    (a circle, see \code{\link[base]{par}}).}
  \item{lwd}{line width; default is \code{2} (see \code{\link[base]{par}}).
    Confidence envelopes are drawn at half this line width.}
  \item{line}{\code{"quartiles"} to pass a line through the quartile-pairs, or
    \code{"robust"} for a robust-regression line; the latter uses the \code{rlm}
    function in the \code{MASS} package. Specifying \code{line = "none"} suppresses the line.}
  \item{simulate}{if \code{TRUE} calculate confidence envelope by parametric bookstrap;
    for \code{lm} object only. The method is due to Atkinson (1985).}
  \item{reps}{integer; number of bootstrap replications for confidence envelope.}
  \item{\dots}{arguments such as \code{df} to be passed to the appropriate quantile function.}
}

\details{
  Draws theoretical quantile-comparison plots for variables and for studentized residuals
  from a linear model. A comparison line is drawn on the plot either through the quartiles
  of the two distributions, or by robust regression. 
  
  Any distribution for which quantile and
  density functions exist in R (with prefixes \code{q} and \code{d}, respectively) may be used. 
  Studentized residuals are plotted against the
  appropriate t-distribution.
  
  The function \code{qqp} is an abbreviation for \code{qq.plot}.
}
\value{
  \code{NULL}. These functions are used only for their side effect (to make a graph).
}
\references{
  Fox, J. (1997)
  \emph{Applied Regression, Linear Models, and Related Methods.} Sage.

  Atkinson, A. C. (1985)
  \emph{Plots, Transformations, and Regression.} Oxford.
}
\author{John Fox \email{jfox@mcmaster.ca}}

\seealso{\code{\link[base]{qqplot}}, \code{\link[base]{qqnorm}},
 \code{\link[base]{qqline}}}

\examples{
x<-rchisq(100, df=2)
qq.plot(x)
qq.plot(x, dist="chisq", df=2)

data(Ornstein)
mod<-lm(interlocks~assets+sector+nation, data=Ornstein)
qq.plot(mod, sim=TRUE)
}

\keyword{distribution}
\keyword{univar}
\keyword{regression}

\eof
\name{recode}
\alias{recode}
\title{Recode a Variable}
\description{
 Recodes a numeric vector, character vector, or factor 
 according to simple recode specifications.
}
\usage{
recode(var, recodes, as.factor.result)
}

\arguments{
  \item{var}{numeric vector, character vector, or factor.}
  \item{recodes}{character string of recode specifications:
    see below.}
  \item{as.factor.result}{return a factor; default is \code{TRUE} if 
    \code{var} is a factor, \code{FALSE} otherwise.}
}
\details{
  Recode specifications appear in a character string, separated by
  semicolons (see the examples below), of the form \code{input=output}.
  If an input value satisfies more than one specification, 
  then the first (from left to right) applies.
  If no specification is satisfied, then the input value is carried
  over to the result. \code{NA} is allowed on input and output.
  Several recode specifications are supported:
  \describe{
    \item{single value}{For example, \code{0=NA}.}
    \item{vector of values}{For example, \code{c(7,8,9)='high'}.}
    \item{range of values}{For example, \code{7:9='C'}. The special values \code{lo}
      and \code{hi} may appear in a range. For example, \code{lo:10=1}.}
    \item{\code{else}}{everything that does not fit a previous specification.
      For example, \code{else=NA}. Note that \code{else} matches \emph{all} otherwise
      unspecified values on input, including \code{NA}.}
    }
  If all of the output values are numeric, and if \code{as.factor.result} is 
  \code{FALSE}, then a numeric result is returned.
}

\value{
  a recoded vector of the same length as \code{var}; if \code{va}r is a factor,
  then so is the result.
}
 
\author{John Fox \email{jfox@mcmaster.ca}}

\seealso{\code{\link[base]{cut}}, \code{\link[base]{factor}}}

\examples{
x<-rep(1:3,3)
x
## [1] 1 2 3 1 2 3 1 2 3
recode(x, "c(1,2)='A'; else='B'")
## [1] "A" "A" "B" "A" "A" "B" "A" "A" "B"
recode(x, "1:2='A'; 3='B'")
## [1] "A" "A" "B" "A" "A" "B" "A" "A" "B"
}

\keyword{manip}

\eof
\name{reg.line}
\alias{reg.line}

\title{Plot Regression Line}
\description{
  Plots a regression line on a scatterplot; the line is plotted 
  between the minimum and maximum x-values.
}
\usage{
reg.line(mod, col=palette()[2], lwd=2, lty=1,...)
}

\arguments{
  \item{mod}{a model, such as produced by \code{lm},
    that responds to the \code{coefficients} function
    by returning a 2-element vector, whose elements are
    interpreted respectively as the
    intercept and slope of a regresison line.}
  \item{col}{color for points and lines; the default is the \emph{second} entry
    in the current color palette (see \code{\link[base]{palette}}
    and \code{\link[base]{par}}).}
  \item{lwd}{line width; default is \code{2} (see \code{\link[base]{par}}).}
  \item{lty}{line type; default is \code{1}, a solid line (see \code{\link[base]{par}}).}
  \item{\dots}{optional arguments to be passed to the
    \code{lines} plotting function.}
}
\details{
  In contrast to \code{abline}, this function plots only over
  the range of the observed x-values. The x-values are extracted from
  \code{mod} as the second column of the model matrix. 
}
\value{
  \code{NULL}. This function is used for its side effect: adding
  a line to the plot.
}

\author{John Fox \email{jfox@mcmaster.ca}}


\seealso{\code{\link[base]{abline}}, \code{\link[base]{lines}}}

\examples{
data(Davis)
attach(Davis)
mod.M<-lm(repwt~weight, subset=sex=="M")
mod.F<-lm(repwt~weight, subset=sex=="F")
plot(weight, repwt, pch=c(1,2)[sex])
reg.line(mod.M)
reg.line(mod.F, lty=2)
}

\keyword{aplot}

\eof
\name{scatterplot}
\alias{scatterplot}
\alias{scatterplot.formula}
\alias{scatterplot.default}
\alias{sp}

\title{Scatterplots with Boxplots}
\description{
  Makes fancy scatterplots, with boxplots in the margins, a lowess
  smooth, and a regression line; \code{sp} is an abbreviation for
  \code{scatterplot}.
}
\usage{
scatterplot(x, ...)

\method{scatterplot}{formula}(formula, data, xlab, ylab, subset, labels=FALSE, ...)

\method{scatterplot}{default}(x, y, smooth=TRUE, span=0.5, reg.line=lm, 
  boxplots="xy", xlab=deparse(substitute(x)), ylab=deparse(substitute(y)), 
  las=par('las'), lwd=1, labels=FALSE, log="", groups=FALSE, by.groups=!(groups[1]==FALSE),
  ellipse=FALSE, levels=c(.5, .9), robust=FALSE, 
  col=palette(), pch=1:n.groups, legend.plot=length(levels(groups)) > 1, 
  reset.par=TRUE, ...)
  
sp(...)
}

\arguments{
  \item{formula}{``model'' formula, of the form \code{y ~ x} or 
    (to plot by groups) \code{y ~ x | z}, where \code{z} evaluates to a factor 
    or other variable dividing the data into groups.}
  \item{data}{data frame within which to evaluate the formula.}
  \item{subset}{expression defining a subset of observations.}
  \item{x}{vector of horizontal coordinates.}
  \item{y}{vector of verical coordinates.}
  \item{smooth}{if \code{TRUE} a lowess nonparametric regression line is
    drawn on the plot.}
  \item{span}{span for the lowess smooth.}
  \item{reg.line}{function to draw a regression line on the plot
    or \code{FALSE} not to plot a regression line.}
  \item{boxplots}{if \code{"x"} a boxplot for \code{x} is drawn above the plot;
    if \code{"y"} a boxplot for \code{y} is drawn to the right of the plot;
    if \code{"xy"} both boxplots are drawn.}
  \item{xlab}{label for horizontal axis.}
  \item{ylab}{label for vertical axis.}
  \item{las}{if \code{0}, ticks labels are drawn parallel to the
    axis; set to \code{1} for horizontal labels (see \code{\link[base]{par}}).}
  \item{lwd}{width of plotted lines.}
  \item{labels}{if not \code{FALSE} a vector of point labels, to be used interactively
    to identify points on the plot.}
  \item{log}{same as the \code{log} argument to \code{plot}, to produce log axes.}
  \item{groups}{a factor or other variable dividing the data into groups; groups are
    plotted with different colors and plotting characters.}
  \item{by.groups}{if \code{TRUE}, regression lines are fit by groups.}
  \item{ellipse}{if \code{TRUE} data-concentration ellipses are plotted.}
  \item{levels}{level or levels at which concentration ellipses are plotted;
    the default is \code{c(.5, .9)}.}
  \item{robust}{if \code{TRUE} use the \code{cov.trob} function in the \code{MASS} package
    to calculate the center and covariance matrix for the data ellipse.}
  \item{col}{colors for points and lines; the default is the
    current color palette, starting at the \emph{second} entry
    (see \code{\link[base]{palette}} and \code{\link[base]{par}}).}
  \item{pch}{plotting characters for points; default is the plotting characters in
    order (see \code{\link[base]{par}}).}
  \item{legend.plot}{if \code{TRUE} then a legend for the groups is plotted, and positioned
    interactively with a mouse click; if points are to be labelled, then the legend is 
    positioned first.}
  \item{reset.par}{if \code{TRUE} then plotting parameters are reset to their previous values
    when \code{scatterplot} exits; if \code{FALSE} then the \code{mar} and \code{mfcol} parameters are
    altered for the current plotting device. Set to \code{FALSE} if you want to add graphical elements
    (such as lines) to the plot.}
  \item{\dots}{other arguments passed to \code{plot}.}
}

\value{
  \code{NULL}. This function is used for its side effect: producing
  a plot.
}

\author{John Fox \email{jfox@mcmaster.ca}}


\seealso{\code{\link[base]{plot}}, \code{\link[base]{boxplot}}, 
  \code{\link{scatterplot.matrix}}, \code{\link{data.ellipse}},
  \code{\link[base]{par}}, \code{\link[MASS]{cov.trob}}.}

\examples{
data(Prestige)
scatterplot(prestige~income|type, data=Prestige, span=1, legend=FALSE)
  \dontrun{
data(UN)
scatterplot(infant.mortality~gdp, labels=row.names(UN), data=UN)

scatterplot(infant.mortality~gdp, log="xy", labels=row.names(UN), data=UN)
  }
}

\keyword{hplot}

\eof
\name{scatterplot.matrix}
\alias{scatterplot.matrix}
\alias{scatterplot.matrix.formula}
\alias{scatterplot.matrix.default}
\alias{spm}
\title{Scatterplot Matrices}
\description{
  Scatterplot matrices with univariate displays down the diagonal;
  \code{spm} is an abbreviation for \code{scatterplot.matrix}.
  This function just sets up a call to \code{pairs}.
}
\usage{

scatterplot.matrix(x, ...)

\method{scatterplot.matrix}{formula}(formula, data=NULL, subset, ...)

\method{scatterplot.matrix}{default}(x, labels=colnames(x), 
    diagonal=c("density", "boxplot", "histogram", "qqplot", "none"), 
    adjust=1, nclass, plot.points=TRUE, smooth=TRUE, span=0.5, reg.line=lm, 
    transform=FALSE, ellipse=FALSE, levels=c(.5, .9), robust=FALSE,
    groups=FALSE, by.groups=FALSE, col=palette(), 
    pch=1:n.groups, lwd=1, legend.plot=length(levels(groups)) > 1, ...)

spm(x, ...)
}

\arguments{
  \item{x}{a data matrix, numeric data frame, or formula.}
  \item{formula}{a one-side ``model'' formula, of the form 
    \code{ ~ x1 + x2 + ... + xk} or \code{ ~ x1 + x2 + ... + xk | z} where \code{z}
    evaluates to a factor or other variable to divide the data into groups.}
  \item{data}{for \code{scatterplot.matrix.formula},
    a data frame within which to evaluate the formula.}
  \item{subset}{expression defining a subset of observations.}
  \item{labels}{variable labels (for the diagonal of the plot).}
  \item{diagonal}{contents of the diagonal panels of the plot.}
  \item{adjust}{relative bandwidth for density estimate, passed to 
    \code{density} function.}
  \item{nclass}{number of bins for histogram, passed to \code{hist}
    function.}
  \item{plot.points}{if \code{TRUE} the points are plotted in each 
    off-diagonal panel.}
  \item{smooth}{if \code{TRUE} a lowess smooth is plotted in each
    off-diagonal panel.}
  \item{span}{span for lowess smoother.}
  \item{reg.line}{if not \code{FALSE} a line is plotted using the
    function given by this argument; e.g., using \code{rlm} in
    package \code{MASS} plots a robust-regression line.}
  \item{transform}{if \code{TRUE}, multivariate normalizing Box-Cox transformations
    are computed and plotted; if a vector of powers, one for each variable, these are applied
    as Box-Cox power transformations prior to plotting.}
  \item{ellipse}{if \code{TRUE} data-concentration ellipses are plotted in
    the off-diagonal panels.}
  \item{levels}{levels or levels at which concentration ellipses are plotted;
    the default is \code{c(.5, .9)}.}
  \item{robust}{if \code{TRUE} use the \code{cov.trob} function in the \code{MASS} package
    to calculate the center and covariance matrix for the data ellipse.}
  \item{groups}{a factor or other variable dividing the data into groups; groups are
    plotted with different colors and plotting characters.}
  \item{by.groups}{if \code{TRUE}, regression lines are fit by groups.}
  \item{pch}{plotting characters for points; default is the plotting characters in
    order (see \code{\link[base]{par}}).}
  \item{col}{colors for points and lines; the default is the
    in the current color palette, starting at the \emph{second} entry
    (see \code{\link[base]{palette}} and \code{\link[base]{par}}).}
  \item{lwd}{width for lines.}
  \item{legend.plot}{if \code{TRUE} then a legend for the groups is plotted
    in the bottom-right cell.}
  \item{...}{arguments to pass down.}
}

\value{
  \code{NULL}. This function is used for its side effect: producing
  a plot.
}

\author{John Fox \email{jfox@mcmaster.ca}}

\seealso{\code{\link[base]{pairs}}, \code{\link{scatterplot}},
  \code{\link{data.ellipse}}, \code{\link{box.cox.powers}}, 
  \code{\link{box.cox}}, \code{\link[MASS]{cov.trob}}.}

\examples{
data(Duncan)
scatterplot.matrix(~income + education + prestige | type, data=Duncan)
scatterplot.matrix(~income + education + prestige, 
    transform=TRUE, data=Duncan)
}

\keyword{hplot}

\eof
\name{spread.level.plot}
\alias{spread.level.plot}
\alias{slp}
\alias{spread.level.plot.formula}
\alias{spread.level.plot.default}
\alias{spread.level.plot.lm}
\alias{print.spread.level.plot}

\title{Spread-Level Plots}
\description{
  Creates plots for examining the possible dependence of
  spread on level, or an extension of these plots to the studentized residuals
  from linear models.
}
\usage{
spread.level.plot(x, ...)

slp(x, ...)

\method{spread.level.plot}{formula}(formula, data=NULL, subset, na.action, 
    main=paste("Spread-Level Plot for", varnames[response], 
    "by", varnames[-response]), ...)

\method{spread.level.plot}{default}(x, by, 
  robust.line=any("MASS"==.packages(all=TRUE)), 
  start=0, xlab="Median", ylab="Hinge-Spread", las=par("las"),
  main=paste("Spread-Level Plot for", deparse(substitute(x)), 
  "by", deparse(substitute(by))), col=palette()[2], pch=1, lwd=2, ...)

\method{spread.level.plot}{lm}(x, start=0, 
  robust.line=any("MASS"==.packages(all=TRUE)), 
  xlab="Fitted Values",
  ylab="Absolute Studentized Residuals", las=par("las"),
  main=paste("Spread-Level Plot for", deparse(substitute(x))),
  pch=1, col=palette()[2], lwd=2, ...)
  
\method{print}{spread.level.plot}(x, ...)
}

\arguments{
  \item{x}{a formula or an \code{lm} object to be plotted; alternatively a numeric vector.}
  \item{formula}{a formula of the form \code{y~x}, where \code{y} is a numeric vector
    and \code{x} is a factor.}
  \item{data}{an optional data frame containing the variables to be plotted. 
    By default the variables are taken from the environment from which 
    \code{spread.level.plot} is called.}
  \item{subset}{an optional vector specifying a subset of observations to be used.}
  \item{na.action}{a function that indicates what should happen when the data contain \code{NA}s. 
    The default is set by the \code{na.action} setting of \code{options}.}
  \item{by}{a factor, numeric or character vector defining groups.}
  \item{robust.line}{if \code{TRUE} a robust line is fit using the \code{rlm} function in
    the \code{MASS} package; if \code{FALSE} a line is fit using \code{lm}.}
  \item{start}{add the constant \code{start} to each data value.}
  \item{main}{title for the plot.}
  \item{xlab}{label for horizontal axis.}
  \item{ylab}{label for vertical axis.}
  \item{las}{if \code{0}, ticks labels are drawn parallel to the
    axis; set to \code{1} for horizontal labels (see \code{\link[base]{par}}).}
  \item{col}{color for points and lines; the default is the \emph{second} entry
    in the current color palette (see \code{\link[base]{palette}}
    and \code{\link[base]{par}}).}
  \item{pch}{plotting character for points; default is \code{1} 
    (a circle, see \code{\link[base]{par}}).}
  \item{lwd}{line width; default is \code{2} (see \code{\link[base]{par}}).}
  \item{...}{arguments passed to plotting functions.}
}
\details{
  Except for linear models, computes the statistics for, and plots, a Tukey spread-level plot
  of log(hinge-spread) vs. log(median) for the groups; fits a line to the plot; and calculates a
  spread-stabilizing transformation from the slope of the line. 
  
  For linear models, plots log(abs(studentized residuals) vs. log(fitted values).
  
  The function \code{slp} is an abbreviation for \code{spread.level.plot}.
}
\value{
  A list containing:
  \item{Statistics}{a matrix with the lower-hinge, median, upper-hinge, and hinge-spread
    for each group. (Not for an \code{lm} object.)}
  \item{PowerTransformation}{spread-stabilizing power transformation, calculated as 1 -- slope
    of the line fit to the plot.}
}
\references{ 
  Fox, J. (1997)
  \emph{Applied Regression, Linear Models, and Related Methods.} Sage.
  
  Hoaglin, D. C., Mosteller, F. and Tukey, J. W. (Eds.) (1983)
  \emph{Understanding Robust and Exploratory Data Analysis.} Wiley.
}
\author{John Fox \email{jfox@mcmaster.ca}}

\seealso{\code{\link{hccm}}, \code{\link{ncv.test}} }

\examples{
data(Ornstein)
attach(Ornstein)
spread.level.plot(interlocks+1~nation)
## $Statistics
##     LowerHinge Median UpperHinge Hinge-Spread
## US           2    6.0         13           11
## UK           4    9.0         14           10
## CAN          6   13.0         30           24
## OTH          4   15.5         24           20
## 
## $PowerTransformation
## [1] 0.1534487
mod<-lm(interlocks ~ assets + sector + nation)
slp(mod)
## $PowerTransformation
## [1] 0.3222165
## 
## Warning message: 
## Start =  3 added to fitted values to avoid 0 or negative values. in: spread.level.plot.lm(x, ...) 
}

\keyword{hplot}
\keyword{regression}

\eof
\name{subsets}
\alias{subsets}
\alias{subsets.regsubsets}
\title{Plot Output from regsubsets Function in leaps package}
\description{
  The \code{\link[leaps]{regsubsets}} function in the \code{leaps} package finds
  optimal subsets of predictors. This function plots a measure of fit
  (see the \code{statistic} argument below) against subset size).
}
\usage{
subsets(object, ...)

\method{subsets}{regsubsets}(object, 
    names=abbreviate(object$xnames, minlength = abbrev), 
    abbrev=1, min.size=1, max.size=length(names), legend, 
    statistic=c("bic", "cp", "adjr2", "rsq", "rss"), 
    las=par('las'), cex.subsets=1, ...)
}

\arguments{
  \item{object}{a \code{regsubsets} object produced by the \code{regsubsets} function
    in the \code{leaps} package.}
  \item{names}{a vector of (short) names for the predictors, excluding the
    regression intercept, if one is present; if missing, these are
    derived from the predictor names in \code{object}.}
  \item{abbrev}{minimum number of characters to use in abbreviating predictor names.}
  \item{min.size}{minimum size subset to plot; default is 1.}
  \item{max.size}{maximum size subset to plot; default is number of predictors.}
  \item{legend}{TRUE to plot a legend of predictor names; defaults to TRUE if
    abbreviations are computed for predictor names. The legend is placed on the
    plot interactively with the mouse.}
  \item{statistic}{statistic to plot for each predictor subset; one of: 
    \code{"bic"}, Bayes Information Criterion; 
    \code{"cp"}, Mallows\'s \eqn{C_{p}}{Cp};
    \code{"adjr2"}, \eqn{R^{2}}{R^2} adjusted for degrees of freedom;
    \code{"rsq"}, unadjusted \eqn{R^{2}}{R^2};
    \code{"rss"}, residual sum of squares.}
  \item{las}{if \code{0}, ticks labels are drawn parallel to the
    axis; set to \code{1} for horizontal labels (see \code{\link[base]{par}}).}
  \item{cex.subsets}{can be used to change the relative size of the characters used to
    plot the regression subsets; default is \code{1}.}
  \item{\dots}{arguments to be passed down to 
    \code{subsets.regsubsets} and \code{plot}.}
}

\value{
  \code{NULL}. This function is used for its side effect --
  to create a plot.
}
 
\author{John Fox}

\seealso{\code{\link[leaps]{regsubsets}}}

\examples{
    \dontrun{
library(leaps)
data(Ericksen)
subsets(regsubsets(undercount ~ ., data=Ericksen))
    }
}

\keyword{hplot}
\keyword{regression}

\eof
\name{vif}
\alias{vif}
\alias{vif.lm}
\alias{vif.default}

\title{Variance Inflation Factors}
\description{
  Calculates variance-inflation and generalized variance-inflation factors
  for linear models.
}
\usage{
vif(mod)

\method{vif}{lm}(mod)

\method{vif}{default}(mod)
}
 
\arguments{
  \item{mod}{an unweighted \code{lm} object.}
}
\details{
  If all terms in the model have 1 df, then the usual variance-inflation
  factors are calculated.
  
  If any terms have more than 1 df, then generalized variance-inflation factors
  (Fox and Monette, 1992) are calculated. These are interpretable as the inflation
  in size of the confidence ellipse or ellipsoid for the coefficients of the term in
  comparison with what would be obtained for orthogonal data. 
  
  The generalized vifs
  are invariant with respect to the coding of the terms in the model (as long as
  the subspace of the columns of the model matrix pertaining to each term is
  invariant). To adjust for the dimension of the confidence ellipsoid, the function
  also prints \eqn{GVIF^{1/(2\times df)}}.
  
  Currently, \code{vif} is only defined for linear models; \code{vif.default} is
  a dummy function that generates an error.
}
\value{
  A vector of vifs, or a matrix containing one row for each term in the model, and
  columns for the GVIF, df, and \eqn{GVIF^{1/(2\times df)}}.
}
\references{ 
  Fox, J. and Monette, G. (1992)
  Generalized collinearity diagnostics.
  \emph{JASA}, \bold{87}, 178--183.

  Fox, J. (1997)
  \emph{Applied Regression, Linear Models, and Related Methods.} Sage.
}
\author{John Fox \email{jfox@mcmaster.ca}}

\examples{
data(Duncan)
vif(lm(prestige~income+education, data=Duncan))
##    income education 
##  2.104900  2.104900 
vif(lm(prestige~income+education+type, data=Duncan))
##               GVIF Df GVIF^(1/2Df)
## income    2.209178  1     1.486330
## education 5.297584  1     2.301648
## type      5.098592  2     1.502666
}

\keyword{regression}

\eof
\name{which.names}
\alias{which.names}

\title{Position of Row Names}
\description{
  This function returns the indices of row names in a data frame
  or a vector of names.
}
\usage{
which.names(names, object)
}

\arguments{
  \item{names}{a name or character vector of names.}
  \item{object}{a data frame or character vector of (row) names.}
}

\value{
Returns the index or indices of \code{names} within \code{object}.
}

\author{John Fox \email{jfox@mcmaster.ca}}

\examples{
data(Duncan)
which.names(c('minister', 'conductor'), Duncan)
## [1]  6 16
}
\keyword{utilities}

\eof
