\name{api}
\alias{api}
\alias{apipop}
\alias{apiclus1}
\alias{apiclus2}
\alias{apistrat}
\docType{data}
\title{Student performance in California schools}
\description{
The Academic Performance Index is computed for all California schools
based on standardised testing of students. The data sets contain
information for all schools with at least 100 students and for various
probability samples of the data.
}
\usage{
data(apipop)
data(apistrat)
data(apiclus1)
data(apiclus2)
}
\format{
  The full population data in \code{pop} are a data frame with 6194 observations on the following 37 variables.
  \describe{
    \item{cds}{Unique identifier}
    \item{stype}{Elementary/Middle/High School}
    \item{name}{School name (15 characters)}
    \item{sname}{School name (40 characters)}
    \item{snum}{School number}
    \item{dname}{District name}
    \item{dnum}{District number}
    \item{cname}{County name}
    \item{cnum}{County number}
    \item{flag}{reason for missing data}
    \item{pcttest}{percentage of students tested}
    \item{api00}{API in 2000}
    \item{api99}{API in 1999}
    \item{target}{target for change in API}
    \item{growth}{Change in API}
    \item{sch.wide}{Met school-wide growth target?}
    \item{comp.imp}{Met Comparable Improvement target}
    \item{both}{Met both targets}
    \item{awards}{Eligible for awards program}
    \item{meals}{Percentage of students eligible for subsidized meals}
    \item{ell}{`English Language Learners' (percent)}
    \item{yr.rnd}{Year-round school}
    \item{mobility}{percentage of students for whom this is the first
      year at the school}
    \item{acs.k3}{average class size years K-3}
    \item{acs.46}{average class size years 4-6}
    \item{acs.core}{Number of core academic courses}
    \item{pct.resp}{percent where parental education level is known}
    \item{not.hsg}{percent parents not high-school graduates}
    \item{hsg}{percent parents who are high-school graduates}
    \item{some.col}{percent parents with some college}
    \item{col.grad}{percent parents with college degree}
    \item{grad.sch}{percent parents with postgraduate education}
    \item{avg.ed}{average parental education level}
    \item{full}{percent fully qualified teachers}
    \item{emer}{percent teachers with emergency qualifications}
    \item{enroll}{number of students enrolled}
    \item{api.stu}{number of students tested.}
  }
  The other data sets contain additional variables \code{pw} for
  sampling weights and \code{fpc} to compute finite population
  corrections to variance.
}
\details{
  \code{pop} is the entire population,
 \code{clus1} is a cluster sample of school districts, \code{strat} is
  a sample stratified by \code{stype}, and \code{clus2} is a two-stage
  cluster sample of schools within districts. 
}
\source{
  Data were obtained from the survey sampling help pages of UCLA
  Academic Technology Services, at \url{http://www.ats.ucla.edu/stat/stata/Library/svy_survey.htm}.
}
\references{
  The API program and original data files are at  \url{http://api.cde.ca.gov/}
}
\examples{
library(survey)
data(api)
mean(apipop$api00)
sum(apipop$enroll)

#stratified sample
dstrat<-svydesign(id=~1,strata=~stype, weights=~pw, data=apistrat, fpc=~fpc)
summary(dstrat)
svymean(~api00, dstrat)
svytotal(~enroll, dstrat)

# one-stage cluster sample
dclus1<-svydesign(id=~dnum, weights=~pw, data=apiclus1, fpc=~fpc)
summary(dclus1)
svymean(~api00, dclus1)
svytotal(~enroll, dclus1)

# two-stage cluster sample
dclus2<-svydesign(id=~dnum+snum, weights=~pw, data=apiclus2)
summary(dclus2)
svymean(~api00, dclus2)
svytotal(~enroll, dclus2)

# convert to replicate weights
rclus1<-as.svrepdesign(dclus1)
summary(rclus1)
svrepmean(~api00, rclus1)
svreptotal(~enroll, rclus1)

# post-stratify on school type
pop.types<-xtabs(~stype, data=apipop)

rclus1p<-postStratify(rclus1, ~stype, pop.types)
summary(rclus1p)
svrepmean(~api00, rclus1p)
svreptotal(~enroll, rclus1p)

}
\keyword{datasets}

\eof
\name{as.svrepdesign}
\alias{as.svrepdesign}
%- Also NEED an `\alias' for EACH other topic documented here.
\title{Convert a survey design to use replicate weights}
\description{
  Creates a replicate-weights survey design object from a traditional
  strata/cluster survey design object. 
}
\usage{
as.svrepdesign(design, type=c("auto", "JK1", "JKn", "BRR", "Fay"),
fay.rho = 0, ...)
}
%- maybe also `usage' for other objects documented here.
\arguments{
  \item{design}{Object of class \code{survey.design} }
  \item{type}{Type of replicate weights. \code{"auto"} uses JKn for
    stratified, JK1 for unstratified designs}
  \item{fay.rho}{Tuning parameter for Fay's variance method }
  \item{\dots}{Other arguments to \code{\link{brrweights}}, controlling
    the handling of strata that do not have exactly 2 clusters.}
}

\value{
  Object of class \code{svrep.design}.
}

\seealso{\code{\link{brrweights}}, \code{\link{svydesign}}, \code{\link{svrepdesign}}}

\examples{
data(scd)
scddes<-svydesign(data=scd, prob=~1, id=~ambulance, strata=~ESA,
nest=TRUE, fpc=rep(5,6))
scdnofpc<-svydesign(data=scd, prob=~1, id=~ambulance, strata=~ESA,
nest=TRUE)

# convert to BRR replicate weights
scd2brr <- as.svrepdesign(scdnofpc, type="BRR")
# convert to JKn weights 
scd2jkn <- as.svrepdesign(scdnofpc, type="JKn")

# convert to JKn weights with finite population correction
scd2jknf <- as.svrepdesign(scddes, type="JKn")

svrepratio(~alive, ~arrests, design=scd2brr)
svrepratio(~alive, ~arrests, design=scd2jkn)
svrepratio(~alive, ~arrests, design=scd2jknf)

data(api)
dclus1<-svydesign(id=~dnum, weights=~pw, data=apiclus1, fpc=~fpc)
rclus1<-as.svrepdesign(dclus1)

svrepmean(~api00, rclus1)
svreptotal(~enroll, rclus1)

}
\keyword{survey}% at least one, from doc/KEYWORDS


\eof
\name{brrweights}
\alias{jk1weights}
\alias{jknweights}
\alias{brrweights}
%- Also NEED an `\alias' for EACH other topic documented here.
\title{Compute replicate weights }
\description{
  Compute replicate weights from a survey design. These functions are
  usually called from \code{\link{as.svrepdesign}} rather than directly
  by the user.
}
\usage{
brrweights(strata, psu, match = NULL, small = c("fail", "split",
"merge"), large = c("split", "merge", "fail"))
jk1weights(psu,fpc=NULL, fpctype=c("population","fraction","correction"))
jknweights(strata,psu, fpc=NULL, fpctype=c("population","fraction","correction"))
}
%- maybe also `usage' for other objects documented here.
\arguments{
  \item{strata}{Stratum identifiers }
  \item{psu}{PSU (cluster) identifier }
  \item{match}{Optional variable to use in matching. }
  \item{small}{How to handle strata with only one PSU}
  \item{large}{How to handle strata with more than two PSUs}
  \item{fpc}{Optional population (stratum) size or finite population correction }
  \item{fpctype}{How \code{fpc} is coded.}
}
\details{
  JK1 and JKn are  jackknife schemes for unstratified and stratified
  designs respectively.  The finite population correction may be
  specified as a single number, a vector with one entry per stratum, or
  a vector with one entry per observation (constant within strata). 
  When \code{fpc} is a vector with one entry per stratum it may not have
  names that differ from the stratum identifiers (it may have no names,
  in which case it must be in the same order as
  \code{unique(strata)}). To specify population stratum sizes use
  \code{fpctype="population"}, to specify sampling fractions use
  \code{fpctype="fraction"} and to specify the correction directly use
  \code{fpctype="correction"}
  
  In BRR variance estimation each stratum is split in two to give
  half-samples. Balanced replicated weights are needed, where
  observations in two different strata end up in the same half stratum
  as often as in different half-strata.BRR, strictly speaking, is
  defined only when each stratum has exactly
  two PSUs.  A stratum with one PSU can be merged with another such
  stratum, or can be split to appear in both half samples with half
  weight.  The latter approach is appropriate for a PSU that was
  deterministically sampled.
  
  A stratum with more than two PSUs can be split into multiple smaller
  strata each with two PSUs or the PSUs can be merged to give two
  superclusters within the stratum.
  
  When merging small strata or grouping PSUs in large strata the
  \code{match} variable is used to sort PSUs before merging, to give
  approximate matching on this variable.
  

}
\value{
For \code{brrweights} a list with elements
  \item{weights}{two-column matrix indicating the weight for each
    half-stratum in one particular set  of split samples}
  \item{wstrata}{New stratum variable incorporating merged or split strata}
  \item{strata}{Original strata for distinct PSUs}
  \item{psu}{Distinct PSUs}
  \item{npairs}{Dimension of Hadamard matrix used in BRR construction}
  \item{sampler}{function returning replicate weights}
  For \code{jk1weights} and \code{jknweights} a data frame of replicate
  weights and the \code{scale} and \code{rscale} arguments to \code{\link{svrVar}}.
}
\references{Levy and Lemeshow "Sampling of Populations". Wiley.}

\seealso{\code{\link{hadamard}}, \code{\link{as.svrepdesign}},
  \code{\link{svrVar}}}

\examples{

 

}
\keyword{survey}% at least one, from doc/KEYWORDS


\eof
\name{fpc}
\alias{fpc}
\non_function{}
\title{Small survey example}
\usage{data(fpc)}
\description{
The \code{fpc} data frame has 8 rows and 6 columns. It is artificial
data to illustrate survey sampling estimators.
}
\format{
  This data frame contains the following columns:
  \describe{
    \item{stratid}{Stratum ids}
    \item{psuid}{Sampling unit ids}
    \item{weight}{Sampling weights}
    \item{nh}{number sampled per stratum}
    \item{Nh}{population size per stratum}
    \item{x}{data}
  }
}

\source{
\url{http://www.stata-press.com/data/r7/fpc.dta}
}

\examples{
data(fpc)
fpc


withoutfpc<-svydesign(weights=~weight, ids=~psuid, strata=~stratid, variables=~x, data=fpc, nest=TRUE)

withoutfpc
svymean(~x, withoutfpc)

withfpc<-svydesign(weights=~weight, ids=~psuid, strata=~stratid,
fpc=~Nh, variables=~x, data=fpc, nest=TRUE)

withfpc
svymean(~x, withfpc)

## Other equivalent forms 
withfpc<-svydesign(prob=~I(1/weight), ids=~psuid, strata=~stratid,
fpc=~Nh, variables=~x, data=fpc, nest=TRUE)

svymean(~x, withfpc)

withfpc<-svydesign(weights=~weight, ids=~psuid, strata=~stratid,
fpc=~I(nh/Nh), variables=~x, data=fpc, nest=TRUE)

svymean(~x, withfpc)

withfpc<-svydesign(weights=~weight, ids=~interaction(stratid,psuid), strata=~stratid,
fpc=~I(nh/Nh), variables=~x, data=fpc)

svymean(~x, withfpc)

fpctbl<-data.frame(strata=c(1,2), sizes=c(15,12))

withfpc<-svydesign(weights=~weight, ids=~psuid, strata=~stratid, fpc=fpctbl, variables=~x, data=fpc, nest=TRUE)

svymean(~x, withfpc)

}
\keyword{datasets}

\eof
\name{hadamard}
\alias{hadamard}
%- Also NEED an '\alias' for EACH other topic documented here.
\title{Hadamard matrices }
\description{
Returns a Hadamard matrix of dimension larger than the argument.
}
\usage{
hadamard(n)
}
%- maybe also 'usage' for other objects documented here.
\arguments{
  \item{n}{lower bound for size }
}

\value{
A Hadamard matrix
}

\seealso{\code{\link{brrweights}}}
\examples{
image(hadamard(30))
}
\keyword{survey}


\eof
\name{hospital}
\alias{hospital}
\non_function{}
\title{Sample of obstetric hospitals }
\usage{data(hospital)}
\description{
The \code{hospital} data frame has 15 rows and 5 columns. 
}
\format{
  This data frame contains the following columns:
  \describe{
    \item{hospno}{a numeric vector}
    \item{oblevel}{a numeric vector}
    \item{weighta}{a numeric vector}
    \item{tothosp}{a numeric vector}
    \item{births}{a numeric vector}
  }
}
\source{
\url{http://www.ats.ucla.edu/stat/books/sop/hospsamp.dta}
}
\references{
Levy and Lemeshow. "Sampling of Populations" (3rd edition). Wiley.
}
\examples{
data(hospital)
hospdes<-svydesign(strata=~oblevel, id=~hospno, weights=~weighta,
fpc=~tothosp, data=hospital)
hosprep<-as.svrepdesign(hospdes)

svytotal(~births, design=hospdes)
svreptotal(~births, design=hosprep)
}
\keyword{datasets}

\eof
\name{postStratify}
\alias{postStratify}
%- Also NEED an '\alias' for EACH other topic documented here.
\title{Post-stratify a replicate weight survey }
\description{
 Post-stratification adjusts the sampling and replicate weights so that
 the joint distribution of a set of post-stratifying variables matches
 the known population joint distribution. The advantage of
 post-stratification is that sampling frames need not be available for
 the strata.
}
\usage{
postStratify(design, strata, population, partial = FALSE)
}
%- maybe also 'usage' for other objects documented here.
\arguments{
  \item{design}{A survey design with replicate weights}
  \item{strata}{A formula or data frame of post-stratifying variables }
  \item{population}{A \code{\link{table}}, \code{\link{xtabs}} or \code{data.frame}
    with population frequencies }
  \item{partial}{if \code{TRUE}, ignore population strata not present in
  the sample}
}
\details{
  The \code{population} totals can be specified as a table with the
  strata variables in the margins, or as a data frame where one column
  lists frequencies and the other columns list the unique combinations
  of strata variables (the format produced by \code{as.data.frame}
  acting on a \code{table} object).
  
  A table must have named dimnames to indicate the variable
  names. 
}
\value{
 A new survey design object.
}
\references{ }

\seealso{\code{\link{as.svrepdesign}}, \code{\link{svrepdesign}}, \code{\link{rake}} }
\examples{
data(api)
dclus1<-svydesign(id=~dnum, weights=~pw, data=apiclus1, fpc=~fpc)
rclus1<-as.svrepdesign(dclus1)

svrepmean(~api00, rclus1)
svreptotal(~enroll, rclus1)

# post-stratify on school type
pop.types <- data.frame(stype=c("E","H","M"), Freq=c(4421,755,1018))
#or: pop.types <- xtabs(~stype, data=apipop)
#or: pop.types <- table(stype=apipop$stype)

rclus1p<-postStratify(rclus1, ~stype, pop.types)
summary(rclus1p)
svrepmean(~api00, rclus1p)
svreptotal(~enroll, rclus1p)

}
\keyword{survey}% at least one, from doc/KEYWORDS
\keyword{manip}

\eof
\name{rake}
\alias{rake}
%- Also NEED an '\alias' for EACH other topic documented here.
\title{Raking of survey design}
\description{
  Raking uses iterative post-stratification to match marginal
  distributions of a survey sample to known population margins. 
}
\usage{
rake(design, sample.margins, population.margins, control = list(maxit =
10, epsilon = 1, verbose=FALSE))
}
%- maybe also 'usage' for other objects documented here.
\arguments{
  \item{design}{A survey design with replicate weights }
  \item{sample.margins}{list of formulas or data frames describing
    sample margins}
  \item{population.margins}{list of tables or data frames
    describing corresponding population margins }
  \item{control}{\code{maxit} controls the number of
    iterations. Convergence is declared if the maximum change in a table
    entry is less than  \code{epsilon}. If \code{epsilon<1} it is
    taken to be a fraction of the total sampling weight. }
}
\details{
  The \code{sample.margins} should be in a format suitable for \code{\link{postStratify}}.
  
  Raking is known to converge for any table without zeros, and for any
  table with zeros for which there is a joint distribution with the
  given margins and the same pattern of zeros.   The `margins' need not
  be one-dimensional.
  
  The algorithm works by repeated calls to \code{\link{postStratify}},
  perhaps the least efficient possible implementation.
}
\value{
 A raked survey design. 
}

\seealso{\code{\link{as.svrepdesign}}, \code{\link{svrepdesign}}, \code{\link{postStratify}} }
\examples{
data(api)
dclus1 <- svydesign(id=~dnum, weights=~pw, data=apiclus1, fpc=~fpc)
rclus1 <- as.svrepdesign(dclus1)

svrepmean(~api00, rclus1)
svreptotal(~enroll, rclus1)

## population marginal totals for each stratum
pop.types <- data.frame(stype=c("E","H","M"), Freq=c(4421,755,1018))
pop.schwide <- data.frame(sch.wide=c("No","Yes"), Freq=c(1072,5122))

rclus1r <- rake(rclus1, list(~stype,~sch.wide), list(pop.types, pop.schwide))

svrepmean(~api00, rclus1r)
svreptotal(~enroll, rclus1r)

## marginal totals correspond to population
xtabs(~stype, apipop)
svreptable(~stype, rclus1r, round=TRUE)
xtabs(~sch.wide, apipop)
svreptable(~sch.wide, rclus1r, round=TRUE)

## joint totals don't correspond 
xtabs(~stype+sch.wide, apipop)
svreptable(~stype+sch.wide, rclus1r, round=TRUE)

## compare to joint post-stratification
## (only possible if joint population table is known)
##
pop.table <- xtabs(~stype+sch.wide,apipop)
rclus1ps <- postStratify(rclus1, ~stype+sch.wide, pop.table)
svreptable(~stype+sch.wide, rclus1ps, round=TRUE)

svrepmean(~api00, rclus1ps)
svreptotal(~enroll, rclus1ps)

}
\keyword{survey}% at least one, from doc/KEYWORDS
\keyword{manip}

\eof
\name{regTermTest}
\alias{regTermTest}
\alias{print.regTermTest}
%- Also NEED an '\alias' for EACH other topic documented here.
\title{Wald test for a term in a regression model}
\description{
  Provides a Wald test of the hypothesis that all coefficients
  associated with a particular regression term are zero (or have some
  other specified values). Particularly useful as a substitute for
  \code{\link{anova}} when not fitting by maximum likelihood.
}
\usage{
regTermTest(model, test.terms, null=NULL)
}
%- maybe also 'usage' for other objects documented here.
\arguments{
  \item{model}{A model object with \code{\link{coef}} and \code{\link{vcov}} methods}
  \item{test.terms}{Character string or one-sided formula giving name of
    term or terms to test}
  \item{null}{Null hypothesis values for parameters. Default is zeros}
}
\value{
  An object of class \code{regTermTest}.
}

\seealso{\code{\link{anova}}, \code{\link{vcov}}, \code{\link{contrasts}}}
\examples{
 data(esoph)
 model1 <- glm(cbind(ncases, ncontrols) ~ agegp + tobgp * 
     alcgp, data = esoph, family = binomial())
 anova(model1)

 regTermTest(model1,"tobgp")
 regTermTest(model1,"tobgp:alcgp")
 regTermTest(model1, ~alcgp+tobgp:alcgp)
}
\keyword{regression}% at least one, from doc/KEYWORDS


\eof
\name{scd}
\alias{scd}
\non_function{}
\title{Survival in cardiac arrest}
\usage{data(scd)}
\description{
These data are from Section 12.2 of Levy and Lemeshow.  They describe
(a possibly apocryphal) study of survival in out-of-hospital cardiac
arrest. Two out of five ambulance stations were sampled from each of
three emergency service areas.
}
\format{
  This data frame contains the following columns:
  \describe{
    \item{ESA}{Emergency Service Area (strata)}
    \item{ambulance}{Ambulance station (PSU)}
    \item{arrests}{estimated number of cardiac arrests}
    \item{alive}{number reaching hospital alive}
  }
}
}
\source{
Levy and Lemeshow. "Sampling of Populations" (3rd edition). Wiley.
}
\examples{
data(scd)

## survey design objects
scddes<-svydesign(data=scd, prob=~1, id=~ambulance, strata=~ESA,
nest=TRUE, fpc=rep(5,6))
scdnofpc<-svydesign(data=scd, prob=~1, id=~ambulance, strata=~ESA,
nest=TRUE)

# convert to BRR replicate weights
scd2brr <- as.svrepdesign(scdnofpc, type="BRR")

# use BRR replicate weights from Levy and Lemeshow
repweights<-2*cbind(c(1,0,1,0,1,0), c(1,0,0,1,0,1), c(0,1,1,0,0,1),
c(0,1,0,1,1,0))
scdrep<-svrepdesign(data=scd, type="BRR", repweights=repweights)

# ratio estimates
svyratio(~alive, ~arrests, design=scddes)
svyratio(~alive, ~arrests, design=scdnofpc)
svrepratio(~alive, ~arrests, design=scd2brr)
svrepratio(~alive, ~arrests, design=scdrep)

# or a logistic regression
summary(svyglm(cbind(alive,arrests-alive)~1, family=binomial, design=scdnofpc))
summary(svrepglm(cbind(alive,arrests-alive)~1, family=binomial, design=scdrep))

}
\keyword{datasets}

\eof
\name{subset.survey.design}
\alias{subset.survey.design}
\alias{subset.svyrep.design}
%- Also NEED an `\alias' for EACH other topic documented here.
\title{Subset of survey}
\description{
Restrict a survey design to a subpopulation, keeping the original design information about number of clusters, strata. 
}
\usage{
\method{subset}{survey.design}(x, subset, ...)
\method{subset}{svyrep.design}(x, subset, ...)
}
%- maybe also `usage' for other objects documented here.
\arguments{
  \item{x}{A survey design object}
  \item{subset}{An expression specifying the subpopulation}
  \item{\dots}{Arguments not used by this method}
}
\value{
  A new survey design object
}

\seealso{\code{\link{svydesign}}}

\examples{
data(fpc)
dfpc<-svydesign(id=~psuid,strat=~stratid,weight=~weight,data=fpc,nest=TRUE)
dsub<-subset(dfpc,x>4)
summary(dsub)
## These should give the same estimates and variances
svymean(~x,dsub)
svyglm(x~I(x>4)+0,design=dfpc)

data(api)
dclus1<-svydesign(id=~dnum, weights=~pw, data=apiclus1, fpc=~fpc)
rclus1<-as.svrepdesign(dclus1)
svymean(~enroll, subset(dclus1, sch.wide=="Yes" & comp.imp=="Yes"))
svrepmean(~enroll, subset(rclus1, sch.wide=="Yes" & comp.imp=="Yes"))

}
\keyword{survey}% at least one, from doc/KEYWORDS
\keyword{manip}% __ONLY ONE__ keyword per line

\eof
\name{surveysummary}
\alias{svymean}
\alias{svrepmean}
\alias{svytotal}
\alias{print.svystat}
\alias{print.svrepstat}
\alias{svreptotal}
\alias{svyvar}
\alias{svrepvar}
%- Also NEED an `\alias' for EACH other topic documented here.
\title{Summary statistics for sample surveys}
\description{
Compute means, variances, ratios and totals for data from complex surveys.
}
\usage{
svymean(x, design, na.rm=FALSE,deff=FALSE) 
svrepmean(x, design, na.rm=FALSE, rho=NULL, return.replicates=FALSE, deff=FALSE) 
svyvar(x, design, na.rm=FALSE) 
svrepvar(x, design, na.rm=FALSE, rho=NULL, return.replicates=FALSE) 
svytotal(x, design, na.rm=FALSE,deff=FALSE) 
svreptotal(x, design, na.rm=FALSE, rho=NULL, return.replicates=FALSE, deff=FALSE) 
}
%- maybe also `usage' for other objects documented here.
\arguments{
  \item{x}{A formula, vector or matrix}
  \item{design}{\code{survey.design} or \code{svyrep.design} object}
  \item{na.rm}{Should missing values be removed?}
  \item{rho}{parameter for Fay's variance estimator in a BRR design}
  \item{return.replicates}{Return the replicate means?}
  \item{deff}{Return the design effect}
}
\details{
  
These functions perform weighted estimation, with each observation being
weighted by the inverse of its sampling probability.  Except for the
table functions, these also give precision estimates that incorporate
the effects of stratification and clustering.

The \code{svytotal} and \code{svreptotal} functions estimate a
population total.  Use \code{predict} on \code{\link{svyratio}},
\code{\link{svrepratio}}, \code{\link{svyglm}}, \code{\link{svrepglm}}
to get ratio or regression estimates of totals.

The design effect compares the variance of a mean or total to the
variance of a simple random sample of the same size.  Although strictly
speaking this should be a simple random sample without replacement, we
compute as if it were a simple random sample with replacement.

}
\value{
  Objects of class \code{"svystat"} or \code{"svrepstat"},
    which are vectors with a \code{"var"} attribute giving the variance
    and a \code{"statistic"} attribute giving the name of the statistic.
    
}

\author{Thomas Lumley}

\seealso{ \code{\link{svydesign}}, \code{\link{as.svrepdesign}},
  \code{\link{svrepdesign}}, \code{\link{svyCprod}}, \code{\link{mean}},
  \code{\link{var}}, \code{\link{svyquantile}} }

\examples{

  data(api)
  ## population
  mean(apipop$api00)
  quantile(apipop$api00,c(.25,.5,.75))
  var(apipop$api00)
  sum(apipop$enroll)
  sum(apipop$api.stu)/sum(apipop$enroll)

  ## one-stage cluster sample
  dclus1<-svydesign(id=~dnum, weights=~pw, data=apiclus1, fpc=~fpc)
  summary(dclus1)
  svymean(~api00, dclus1, deff=TRUE)
  svyquantile(~api00, dclus1, c(.25,.5,.75))
  svyvar(~api00, dclus1)
  svytotal(~enroll, dclus1, deff=TRUE)
  svyratio(~api.stu, ~enroll, dclus1)

  #stratified sample
  dstrat<-svydesign(id=~1, strata=~stype, weights=~pw, data=apistrat, fpc=~fpc)
  summary(dstrat)
  svymean(~api00, dstrat)
  svyquantile(~api00, dstrat, c(.25,.5,.75))
  svyvar(~api00, dstrat)
  svytotal(~enroll, dstrat)
  svyratio(~api.stu, ~enroll, dstrat)

  # replicate weights - jackknife (this is slow)
  jkstrat<-as.svrepdesign(dstrat)
  summary(jkstrat)
  svrepmean(~api00, jkstrat)
  svrepvar(~api00,jkstrat)
  svrepquantile(~api00, jkstrat, c(.25,.5,.75))
  svreptotal(~enroll, jkstrat)
  svrepratio(~api.stu, ~enroll, jkstrat)

  # BRR method
  data(scd)
  repweights<-2*cbind(c(1,0,1,0,1,0), c(1,0,0,1,0,1), c(0,1,1,0,0,1),
              c(0,1,0,1,1,0))
  scdrep<-svrepdesign(data=scd, type="BRR", repweights=repweights)
  svrepmean(~arrests+alive, design=scdrep)

 }
}
\keyword{univar}% at least one, from doc/KEYWORDS
\keyword{survey}% __ONLY ONE__ keyword per line

\eof
\name{svrVar}
\alias{svrVar}
%- Also NEED an `\alias' for EACH other topic documented here.
\title{Compute variance from replicates }
\description{
Compute an appropriately scaled empirical variance estimate from
replicates.  
}
\usage{
svrVar(thetas, scale, rscales, na.action=getOption("na.action"))
}
%- maybe also `usage' for other objects documented here.
\arguments{
  \item{thetas}{matrix whose rows are replicates (or a vector of replicates)}
  \item{scale}{Overall scaling factor}
  \item{rscales}{Scaling factor for each squared deviation }
  \item{na.action}{How to handle replicates where the statistic could
    not be estimated}
}
\value{
covariance matrix.  
}
\seealso{\code{\link{svrepdesign}}, \code{\link{as.svrepdesign}},
  \code{\link{brrweights}},
  \code{\link{jk1weights}}, \code{\link{jknweights}}}
\examples{

}
\keyword{survey}% at least one, from doc/KEYWORDS


\eof
\name{svrepdesign}
\alias{svrepdesign}
\alias{[.svyrep.design}
\alias{image.svyrep.design}
\alias{print.svyrep.design}
\alias{summary.svyrep.design}
\alias{print.summary.svyrep.design}
%- Also NEED an `\alias' for EACH other topic documented here.
\title{Specify survey design with replicate weights}
\description{
Some recent large-scale surveys specify replication weights rather than
the sampling design (partly for privacy reasons).  This function specifies the
data structure for such a survey. 
}
\usage{
svrepdesign(variables = NULL, repweights = NULL, weights = NULL, data =
NULL, type = c("BRR", "Fay", "JK1","JKn","other"),
combined.weights=FALSE, rho = NULL, 
scale=NULL, rscales=NULL,fpc=NULL, fpctype=c("fraction","correction"))
\method{image}{svyrep.design}(x, ..., col=grey(seq(.5,1,length=30)), type.=c("rep","total"))
}
%- maybe also `usage' for other objects documented here.
\arguments{
  \item{variables}{formula or data frame specifying variables to include in the design (default is all) }
  \item{repweights}{formula or data frame specifying replication weights }
  \item{weights}{sampling weights }
  \item{data}{data frame to look up variables in formulas}
  \item{type}{Type of replication weights}
  \item{combined.weights}{\code{TRUE} if the \code{repweights} already
    include the sampling weights}
  \item{rho}{Shrinkage factor for weights in Fay's method}
  \item{scale, rscales}{Scaling constant for variance, see Details
below}
\item{fpc,fpctype}{Finite population correction information}
\item{x}{survey design with replicate weights}
\item{...}{Other arguments to \code{\link{image}}}
\item{col}{Colors}
\item{type.}{\code{"rep"} for only the replicate weights, \code{"total"} for the replicate and sampling weights combined.}
}
\details{
  In the BRR method, the dataset is split into halves, and the
  difference between halves is used to estimate the variance. In Fay's
  method, rather than removing observations from half the sample they
  are given weight \code{rho} in one half-sample and \code{2-rho} in the
  other.  The ideal BRR analysis is restricted to a design where each
  stratum has two PSUs, however, it has been used in a much wider class
  of surveys.
  
  The JK1 and JKn types are both jackknife estimators deleting one
  cluster at a time. JKn is designed for stratified and JK1 for
  unstratified designs. 
  
  The variance is computed as the sum of squared deviations of the
  replicates from their mean.  This may be rescaled: \code{scale} is an
  overall multiplier and \code{rscale} is a vector of
  replicate-specific multipliers for the squared deviations.  If the
  replication weights incorporate the sampling weights
  (\code{combined.weights=TRUE}) or for \code{type="other"} these must
  be specified, otherwise they can be guessed from the weights.


A finite population correction may be specified for \code{type="other"},
\code{type="JK1"} and \code{type="JKn"}.  \code{fpc} must be a vector
with one entry for each replicate. To specify sampling fractions use
\code{fpctype="fraction"} and to specify the correction directly use
\code{fpctype="correction"}

To generate your own replicate weights either use
\code{\link{as.svrepdesign}} on a \code{survey.design} object, or see
 \code{\link{brrweights}}, \code{\link{jk1weights}} and \code{\link{jknweights}}


}
\value{
  Object of class \code{svyrep.design}, with methods for \code{print},
  \code{summary}, \code{weights}, \code{image}.
}
\references{Levy and Lemeshow. "Sampling of Populations". Wiley. }
\note{To use replication-weight analyses on a survey specified by
  sampling design, use \code{as.svrepdesign} to convert it. }

\seealso{\code{\link{as.svrepdesign}}, \code{\link{svydesign}}, \code{\link{brrweights}} }

\examples{
data(scd)
# use BRR replicate weights from Levy and Lemeshow
repweights<-2*cbind(c(1,0,1,0,1,0), c(1,0,0,1,0,1), c(0,1,1,0,0,1),
c(0,1,0,1,1,0))
scdrep<-svrepdesign(data=scd, type="BRR", repweights=repweights)
svrepratio(~alive, ~arrests, scdrep)
}
\keyword{survey}% at least one, from doc/KEYWORDS


\eof
\name{svy.varcoef}
\alias{svy.varcoef}
%- Also NEED an `\alias' for EACH other topic documented here.
\title{Sandwich variance estimator for glms}
\description{
Computes the sandwich variance estimator for a generalised linear model fitted to data from a complex sample survey. Designed to be used internally by \code{\link{svyglm}}.
}
\usage{
svy.varcoef(glm.object, design)
}
\arguments{
  \item{glm.object}{A \code{\link{glm}} object}
  \item{design}{A \code{\survey.design} object }
}
\value{
 A variance matrix
}
\author{ Thomas Lumley}

 ~Make other sections like WARNING with \section{WARNING }{....} ~

\seealso{\code{\link{svyglm}},\code{\link{svydesign}}, \code{\link{svyCprod}} }

\examples{

}
\keyword{regression}% at least one, from doc/KEYWORDS
\keyword{survey}% __ONLY ONE__ keyword per line

\eof
\name{svyCprod}
\alias{svyCprod}
%- Also NEED an `\alias' for EACH other topic documented here.
\title{Computations for survey variances}
\description{
Computes the sum of products needed for the variance of survey sample estimators.    
}
\usage{
svyCprod(x, strata, psu, fpc, nPSU,
      lonely.psu=getOption("survey.lonely.psu"))
}
%- maybe also `usage' for other objects documented here.
\arguments{
  \item{x}{A vector or matrix}
  \item{strata}{A vector of stratum indicators, or \code{NULL}}
  \item{psu}{A vector of cluster indicators or \code{NULL}}
  \item{fpc}{A data frame of population stratum sizes or \code{NULL}}
  \item{nPSU}{Table of original sample stratum sizes (or \code{NULL})}
  \item{lonely.psu}{One of \code{"remove"}, \code{"adjust"},
    \code{"fail"}, \code{"certainty"}. See Details below}

}
\details{
 The observations for each cluster are added, then centred within each
 stratum and the outer product is taken of the row vector resulting for
 each cluster.  This is added within strata, multiplied by a
 degrees-of-freedom correction and by a finite population correction (if
 supplied) and added across strata.  

 If there are fewer clusters (PSUs) in a stratum than in the original
 design extra rows of zeroes are added to \code{x} to allow the correct
 subpopulation variance to be computed.

The variance formula gives 0/0 if a stratum contains only one sampling
unit. The options to handle this are \code{"fail"} to give an error,
\code{"remove"} or \code{"certainty"} to give a variance contribution of
0 for the stratum, and \code{"adjust"} to center the stratum at the
grand mean rather than the stratum mean.  The choice is controlled by
setting \code{options(survey.lonely.psu)}. If this is not done the
factory default is \code{"fail"}. Using \code{"adjust"} is conservative,
and it would often be better to combine strata in some intelligent
way.

The \code{"remove"}and \code{"certainty"} options give the same result,
but \code{"certainty"} is intended for situations where there is only
one PSU in the population stratum, which is sampled with certainty (also
called `self-representing' PSUs or strata). With \code{"certainty"} no
warning is generated for strata with only one PSU.  The factory default
is \code{"fail"}.

}
\value{
  A covariance matrix
}
\author{Thomas Lumley}


\seealso{\code{\link{svydesign}}, \code{\link{svy.varcoef}} }

\examples{
}
\keyword{utilities}% at least one, from doc/KEYWORDS
\keyword{survey}% __ONLY ONE__ keyword per line

\eof
\name{svyby}
\alias{svyby}
%- Also NEED an '\alias' for EACH other topic documented here.
\title{Survey statistics on subsets}
\description{
Compute survey statistics on subsets of a survey defined by factors. 
}
\usage{
svyby(formula, by, design, FUN, ..., keep.var = FALSE, keep.names = TRUE)
}
%- maybe also 'usage' for other objects documented here.
\arguments{
  \item{formula}{A formula specifying the variables to pass to \code{FUN} }
  \item{by}{A formula specifying factors that define subsets, or a list
    of factors.}
  \item{design}{A \code{svydesign} or \code{svrepdesign} object}
  \item{FUN}{A function taking a formula and survey design object as its
  first two arguments.}
  \item{\dots}{Other arguments to \code{FUN}}
  \item{keep.var}{If \code{FUN} returns a \code{svystat} object, extract
  standard errors from it}
  \item{keep.names}{Define row names based on the subsets}
}

\value{
  A data frame showing the factors and the results of \code{FUN}
}
\seealso{\code{\link{svytable}} }
\examples{
data(api)
dclus1<-svydesign(id=~dnum, weights=~pw, data=apiclus1, fpc=~fpc)

svyby(~api99, ~stype, dclus1, svymean)
svyby(~api99, ~stype, dclus1, svyquantile, quantiles=0.5)
svyby(~api99, list(school.type=apiclus1$stype), dclus1, svymean)
svyby(~api99+api00, ~stype+sch.wide, dclus1, svymean, keep.var=TRUE)

rclus1<-as.svrepdesign(dclus1)

svyby(~api99, ~stype, rclus1, svrepmean)
svyby(~api99, ~stype, rclus1, svrepquantile, quantiles=0.5)
svyby(~api99, list(school.type=apiclus1$stype), rclus1, svrepmean)
svyby(~api99+api00, ~stype+sch.wide, rclus1, svrepmean, keep.var=TRUE)


}
\keyword{survey}% at least one, from doc/KEYWORDS
\keyword{manip}% __ONLY ONE__ keyword per line

\eof
\name{svytable}
\alias{svreptable}
\alias{svytable}
\alias{svychisq}
\alias{summary.svytable}
\alias{print.summary.svytable}
\alias{summary.svreptable}
%- Also NEED an '\alias' for EACH other topic documented here.
\title{Contingency tables for survey data}
\description{
  Contingency tables and chisquared tests of association for survey data.
}
\usage{
svytable(formula, design, Ntotal = design$fpc, round = FALSE)
svreptable(formula, design, Ntotal = sum(weights(design, "sampling"))), round = FALSE)
svychisq(formula, design, statistic = c("F",  "Chisq","Wald","adjWald"))
\method{summary}{svytable}(object, statistic = c("F",  "Chisq","Wald","adjWald"),...)
}
%- maybe also 'usage' for other objects documented here.
\arguments{
  \item{formula}{Model formula specifying margins for the table (using \code{+} only)}
  \item{design}{survey object}
  \item{statistic}{See Details below}
  \item{Ntotal}{A population total or set of population stratum totals
    to normalise to.}
  \item{round}{Should the table entries be rounded to the nearest
    integer?}
  \item{object}{Output from \code{svytable}}
  \item{\dots}{Other arguments to \code{summary}, not used here}
 }
\details{
 
The \code{svytable} and \code{svreptable} function compute a weighted
crosstabulation.  If the sampling probabilities supplied to
\code{svydesign} were actual probabilities (rather than relative
probabilities) this estimates a full population crosstabulation.
Otherwise it estimates only relative proportions and should be
normalised to some convenient total such as 100 or 1.0 by specifying the
\code{Ntotal} argument.  If the formula has a left-hand side the mean or sum of this
 variable rather than the frequency is tabulated

The \code{Ntotal} argument can be either a single number or a data frame
whose first column is the sampling strata and second column the
population size in each stratum.  In this second case the
\code{svytable} command performs `post-stratification': tabulating
and scaling to the population within strata and then adding up the
strata.

As with other \code{xtabs} objects, the output of \code{svytable} can be
processed by \code{ftable} for more attractive display. The
\code{summary} method for \code{svytable} objects calls \code{svychisq}
for a test of independence.

\code{svychisq} computes first and second-order Rao-Scott corrections to
the Pearson chisquared test, and two Wald-type tests.

The default (\code{statistic="F"}) is the Rao-Scott second-order correction.  The
p-values are computed with a Satterthwaite approximation to the
distribution.  The alternative \code{statistic="Chisq"} adjusts the
Pearson chisquared statistic by a design effect estimate and then
compares it to the chisquared distribution it would have under simple
random sampling.

The \code{statistic="Wald"} test is that proposed by Koch et al (1975)
and used by the SUDAAN software package. It is a Wald test based on the
differences between the observed cells counts and those expected under
independence. The adjustment given by \code{statistic="adjWald"} reduces
the statistic when the number of PSUs is small compared to the number of
degrees of freedom of the test. Rao and Thomas (1990) compare these
tests and find the adjustment benefical.


At the moment, \code{svychisq} works only for 2-dimensional tables.

}
\value{
  The table commands return an \code{xtabs} object, \code{svychisq}
  returns a \code{htest} object.
}
\references{
  Koch, GG, Freeman, DH, Freeman, JL (1975) "Strategies in the
  multivariate analysis of data from complex surveys" International
  Statistical Review 43: 59-78
  
Rao, JNK, Scott, AJ (1984) "On Chi-squared Tests For Multiway
Contigency Tables with Proportions Estimated From Survey Data"  Annals
of Statistics 12:46-60.

Sribney WM (1998) "Two-way contingency tables for survey or clustered
data" Stata Technical Bulletin 45:33-49.

Thomas, DR, Rao, JNK (1990) "Small-sample comparison of level and power
for simple goodness-of-fit statistics under cluster sampling" JASA 82:630-636
}

\note{Rao and Scott (1984) leave open one computational issue. In
  computing `generalised design effects' for these tests, should the
  variance under simple random sampling be estimated using the observed
  proportions or the the predicted proportions under the null
  hypothesis? \code{svychisq} uses the observed proportions, following
  simulations by Sribney (1998)}


\seealso{\code{\link{xtabs}}, \code{\link{svyby}} for tables of means,
  medians, etc. }
\examples{
  data(api)
  xtabs(~sch.wide+stype, data=apipop)

  dclus1<-svydesign(id=~dnum, weights=~pw, data=apiclus1, fpc=~fpc)
  summary(dclus1)

  svytable(~sch.wide+stype, dclus1)
  svychisq(~sch.wide+stype, dclus1)
  svychisq(~sch.wide+stype, dclus1, statistic="Chisq")
 svychisq(~sch.wide+stype, dclus1, statistic="adjWald")

  rclus1 <- as.svrepdesign(dclus1)
  svreptable(~sch.wide+stype, rclus1, round=TRUE)

}
\keyword{survey}% at least one, from doc/KEYWORDS
\keyword{category}% __ONLY ONE__ keyword per line
\keyword{htest}% __ONLY ONE__ keyword per line

\eof
\name{svycoxph}
\alias{svycoxph}
\alias{print.svycoxph}
\alias{summary.svycoxph}
\alias{anova.svycoxph}
\alias{extractAIC.svycoxph}
\alias{survfit.svycoxph}
%- Also NEED an `\alias' for EACH other topic documented here.
\title{Survey-weighted generalised linear models.}
\description{
Fit a generalised linear model to data from a complex survey design, with inverse-probability weighting and with standard errors corrected for cluster sampling.
}
\usage{
svycoxph(formula, design,subset=NULL, ...)
}
%- maybe also `usage' for other objects documented here.
\arguments{
  \item{formula}{Model formula. Any \code{cluster()} terms will be ignored.}
  \item{design}{ \code{survey.design} object. Must contain all variables
    in the formula}
  \item{subset}{Expression to select a subpopulation}
  \item{\dots}{Other arguments passed to \code{coxph}. }
}
\details{
The main difference between this function and \code{\link{coxph}} in the
survival package is that this function accounts for the reduction in
variance from stratified sampling and the increase in variance from
having only a small number of clusters.

Note that \code{strata} terms in the model formula describe subsets that
have a separate baseline hazard function and need not have anything to
do with the stratification of the sampling.
  }
\value{
  An object of class \code{svycoxph}.
}

\author{Thomas Lumley}


\seealso{ \code{\link{svydesign}}, \code{\link{coxph}},
  \code{\link{svyCprod}}}

\examples{

 }
\keyword{regression}% at least one, from doc/KEYWORDS
\keyword{survival}% at least one, from doc/KEYWORDS
\keyword{survey}% at least one, from doc/KEYWORDS

\eof
\name{svydesign}
\alias{svydesign}
\alias{summary.survey.design}
\alias{print.summary.survey.design}
\alias{print.survey.design}
\alias{[.survey.design}
\alias{[<-.survey.design}
\alias{na.omit.survey.design}
\alias{na.exclude.survey.design}
\alias{na.fail.survey.design}
\alias{dim.survey.design}
%- Also NEED an `\alias' for EACH other topic documented here.
\title{Survey sample analysis.}
\description{
  Specify a complex survey design.
}
\usage{
svydesign(ids, probs=NULL, strata = NULL, variables = NULL, fpc=NULL,
data = NULL, nest = FALSE, check.strata = !nest, weights=NULL) 
}
%- maybe also `usage' for other objects documented here.
\arguments{
  \item{ids}{Formula or data frame specifying cluster ids from largest
    level to smallest level, \code{~0} or \code{~1} is a formula for no clusters.}
  \item{probs}{Formula or data frame specifying cluster sampling probabilities}
  \item{strata}{Formula or vector specifying strata, use \code{NULL} for no strata}
  \item{variables}{Formula or data frame specifying the variables
    measured in the survey. If \code{NULL}, the \code{data} argument is
    used.}
  \item{fpc}{Finite population correction: see Details below}
  \item{weights}{Formula or vector specifying sampling weights as an
    alternative to \code{prob}}
  \item{data}{Data frame to look up variables in the formula arguments}
  \item{nest}{If \code{TRUE}, relabel cluster ids to enforce nesting, eg
  if ids at second level of sampling are reused within first-level
  units}
  \item{check.strata}{If \code{TRUE}, check that clusters are nested in strata}.
}
\details{
   When analysing data from a complex survey, observations must be
   weighted inversely to their sampling probabilities, and the effects
   of stratification and of correlation induced by cluster sampling must
   be incorporated in standard errors.

   The \code{svydesign} object combines a data frame and all the survey
   design information needed to analyse it.  These objects are used by
   the survey modelling and summary functions.

   The finite population correction is used to reduce the variance when
   a substantial fraction of the total population of interest has been
   sampled. It may not be appropriate if the target of inference is the
   process generating the data rather than the statistics of a
   particular finite population.

   The finite population correction can be specified either as the total
   population size in each stratum or as the fraction of the total
   population that has been sampled. In either case the relevant
   population size is `primary sampling units', the largest clusters.
   That is, sampling 100 units from a population stratum of size 500 can
   be specified as 100 or as 100/500=0.2.  The finite population
   correction can be specified by a vector with one element for each
   individual (in which case it is an error for it to vary within a
   stratum) or as a data frame with one row per stratum.  The first
   column of the data frame should be a factor with the same levels as
   \code{strata} and the second column the finite population correction.
   
   If population sizes are specified but not sampling probabilities or
   weights, the sampling probabilities will be computed from the
   population sizes assuming simple random sampling within strata.

The \code{dim}, \code{"["}, \code{"[<-"} and na.action methods for
\code{survey.design} objects operate on the dataframe specified by
\code{variables} and ensure that the design information is properly
updated to correspond to the new data frame.  With the \code{"[<-"}
method the new value can be a \code{survey.design} object instead of a
data frame, but only the data frame is used. See also
\code{\link{subset.survey.design}} for a simple way to select
subpopulations.

The value of \code{options("survey.lonely.psu")} controls what happens
to strata containing only one cluster (PSU).See \code{\link{svyCprod}}
for details, especially if you have self-representing ("certainty") PSUs.

}
\value{
An object of class \code{survey.design}.
}
\author{Thomas Lumley}


\seealso{ \code{\link{svyglm}}, \code{\link{svymean}}, \code{\link{svyvar}}, \code{\link{svytable}}, \code{\link{svyquantile}},
\code{\link{subset.survey.design},
\code{\link{update.survey.design}}}
}

\examples{
  data(api)
# stratified sample
dstrat<-svydesign(id=~1,strata=~stype, weights=~pw, data=apistrat, fpc=~fpc)
# one-stage cluster sample
dclus1<-svydesign(id=~dnum, weights=~pw, data=apiclus1, fpc=~fpc)
# two-stage cluster sample
dclus2<-svydesign(id=~dnum+snum, weights=~pw, data=apiclus2)

## syntax for stratified cluster sample
##(though the data weren't really sampled this way)
svydesign(id=~dnum, strata=~stype, weights=~pw, data=apistrat, nest=TRUE)

}
\keyword{survey}% at least one, from doc/KEYWORDS
\keyword{univar}% __ONLY ONE__ keyword per line
\keyword{manip}% __ONLY ONE__ keyword per line

\eof
\name{svyglm}
\alias{svyglm}
\alias{svrepglm}
\alias{print.svyglm}
\alias{summary.svyglm}
\alias{summary.svrepglm}
\alias{print.summary.svyglm}
\alias{vcov.svyglm}
\alias{residuals.svyglm}
\alias{residuals.svrepglm}
\alias{coef.svyglm}
\alias{extractAIC.svyglm}
\alias{extractAIC.svrepglm}
\alias{logLik.svyglm}
\alias{logLik.svrepglm}
%- Also NEED an `\alias' for EACH other topic documented here.
\title{Survey-weighted generalised linear models.}
\description{
Fit a generalised linear model to data from a complex survey design, with inverse-probability weighting and with standard errors corrected for cluster sampling.
}
\usage{
svyglm(formula, design, subset=NULL, ...)
svrepglm(formula, design, subset=NULL, ..., rho=NULL,
return.replicates=FALSE, na.action)
\method{summary}{svyglm}(object, correlation = FALSE,  ...) 
}
%- maybe also `usage' for other objects documented here.
\arguments{
  \item{formula}{Model formula}
  \item{design}{Survey design from \code{\link{svydesign}} or \code{\link{svrepdesign}}. Must contain all variables
    in the formula}
  \item{subset}{Expression to select a subpopulation}
  \item{\dots}{Other arguments passed to \code{glm} or
    \code{summary.glm} }
  \item{rho}{For replicate BRR designs, to specify the paramter for
    Fay's variance method}
  \item{return.replicates}{Return the replicates as a component of the result?}
  \item{object}{A \code{svyglm} object}
  \item{correlation}{Include the correlation matrix of parameters?}
  \item{na.action}{Handling of NAs}
}
\details{
In \code{svyglm}, standard errors for cluster-sampled designs are computed using a
linearisation estimate (in the absence of strata this is equivalent to
the Huber/White sandwich formula in GEEs).  Most of these computations
are done in \code{\link{svyCprod}}.  In \code{svrepglm}, replicate
weight methods are used.

There is no \code{anova} method for \code{svyglm} as the models are not
fitted by maximum likelihood. The function \code{\link{regTermTest}} may
be useful for testing sets of regression terms.
}
\value{
  An object of class \code{svyglm}.
}

\author{Thomas Lumley}


\seealso{ \code{\link{svydesign}},
  \code{\link{svrepdesign}},\code{\link{as.svrepdesign}}, \code{\link{glm}},
  \code{\link{svyCprod}}, \code{\link{svy.varcoef}},\code{\link{regTermTest}}}

\examples{

  data(api)

  glm(api00~ell+meals+mobility, data=apipop)

  dstrat<-svydesign(id=~1,strata=~stype, weights=~pw, data=apistrat, fpc=~fpc)
  dclus2<-svydesign(id=~dnum+snum, weights=~pw, data=apiclus2)
  rstrat<-as.svrepdesign(dstrat)
  rclus2<-as.svrepdesign(dclus2)

  summary(svyglm(api00~ell+meals+mobility, design=dstrat))
  summary(svyglm(api00~ell+meals+mobility, design=dclus2))
  summary(svrepglm(api00~ell+meals+mobility, design=rstrat))
  summary(svrepglm(api00~ell+meals+mobility, design=rclus2))

  ## use quasibinomial, quasipoisson to avoid warning messages
  summary(svyglm(sch.wide~ell+meals+mobility, design=dstrat, family=quasibinomial()))

 }
\keyword{regression}% at least one, from doc/KEYWORDS
\keyword{survey}% at least one, from doc/KEYWORDS

\eof
\name{svymle}
\alias{svymle}
\alias{print.svymle}
\alias{coef.svymle}
\alias{summary.svymle}
\alias{vcov.svymle}
%- Also NEED an `\alias' for EACH other topic documented here.
\title{Maximum pseudolikelihood estimation in complex surveys}
\description{
Fits a user-specified likelihood parametrised by multiple linear
predictors to data from a complex sample survey and computes the
sandwich variance estimator of the coefficients. Note that this function
maximises an estimated population likelihood, it is not the sample MLE.
}
\usage{
svymle(loglike, gradient = NULL, design, formulas, start = NULL, control
= list(maxit=1000), na.action="na.fail", ...)
\method{summary}{svymle}(object, stderr=c("robust", "model"),...)
}
%- maybe also `usage' for other objects documented here.
\arguments{
  \item{loglike}{vectorised loglikelihood function}
  \item{gradient}{Derivative of \code{loglike}. Required for variance computation and helpful for fitting}
  \item{design}{ a \code{survey.design} object }
  \item{formulas}{A list of formulas specifying the variable and linear predictors: see Details below}
  \item{start}{Starting values for parameters}
  \item{control}{control options for \code{\link{optim}}}
  \item{na.action}{Handling of \code{NA}s}
  \item{\dots}{Arguments to \code{loglike} and \code{gradient} that are
    not to be optimised over.}
  \item{object}{\code{svymle} object}
  \item{stderr}{Choice of standard error estimator. The default is a
    standard sandwich estimator. See Details below.}
}
\details{
  The \code{design} object contains all the data and design information
from the survey, so all the formulas refer to variables in this object.
The \code{formulas} argument needs to specify the response variable and
a linear predictor for each freely varying argument of \code{loglike}.

Consider for example the \code{\link{dnorm}} function, with arguments
\code{x}, \code{mean}, \code{sd} and \code{log}, and suppose we want to
estimate the mean of \code{y} as a linear function of a variable
\code{z}, and to estimate a constant standard deviation.  The \code{log}
argument must be fixed at \code{FALSE} to get the loglikelihood.  A
\code{formulas} argument would be \code{list(~y, mean=~z, sd=~1)}. Note
that the data variable \code{y} must be the first argument to
\code{dnorm} and the first formula and that all the other formulas are
labelled.  It is also permitted to have the data variable as the
left-hand side of one of the formulas: eg \code{list( mean=y~z, sd=~1)}.


The usual variance estimator for MLEs in a survey sample is a `sandwich'
variance that requires the score vector and the information matrix. It
requires only sampling assumptions to be valid (though some model
assumptions are required for it to be useful). This is the
\code{stderr="robust"} option, which is available only when the \code{gradient}
argument was specified.

If the model is correctly specified and the sampling is at random
conditional on variables in the model then standard errors based on just
the information matrix will be approximately valid.  In particular, for
independent sampling where weights and strata depend on variables in the
model the \code{stderr="model"} should work fairly well.

}
\value{
  An object of class \code{svymle}
}
\author{Thomas Lumley}

\seealso{\code{\link{svydesign}}, \code{\link{svyglm}}}

\examples{

 data(api)

 dstrat<-svydesign(id=~1, strata=~stype, weight=~pw, fpc=~fpc, data=apistrat)

 ## fit with glm
 m0 <- svyglm(api00~api99+ell,family="gaussian",design=dstrat)
 ## fit as mle (without gradient)
 m1 <- svymle(loglike=dnorm,gradient=NULL, design=dstrat, formulas=list(mean=api00~api99+ell, sd=~1),start=list(c(80,1,0),c(20)), log=TRUE)
 ## with gradient
 gr<- function(x,mean,sd,log){
	 dm<-2*(x - mean)/(2*sd^2)
	 ds<-(x-mean)^2*(2*(2 * sd))/(2*sd^2)^2 - sqrt(2*pi)/(sd*sqrt(2*pi))
         cbind(dm,ds)
      }
 m2 <- svymle(loglike=dnorm,gradient=gr, design=dstrat,
 formulas=list(mean=api00~api99+ell, sd=~1), start=list(c(80,1,0),c(20)), log=TRUE)

 summary(m0)
 summary(m1,stderr="model")
 summary(m2)

 

}
\keyword{survey}% at least one, from doc/KEYWORDS
\keyword{models}% __ONLY ONE__ keyword per line
\keyword{optimize}% __ONLY ONE__ keyword per line

\eof
\name{svyplot}
\alias{svyplot}
%- Also NEED an '\alias' for EACH other topic documented here.
\title{Plots for survey data }
\description{
  Because observations in survey samples may represent very different
  numbers of units in the population ordinary plots can be misleading.
  The \code{svyplot} function produces plots adjusted in various ways
  for sampling weights.
}
\usage{
svyplot(formula, design, style = c("bubble", "hex", "grayhex","subsample"),
sample.size = 500, subset = NULL, legend = 1, inches = 0.05, ...)
}
%- maybe also 'usage' for other objects documented here.
\arguments{
  \item{formula}{A model formula}
  \item{design}{ A survey object (svydesign or svrepdesign)}
  \item{style}{See Details below}
  \item{sample.size}{For \code{style="subsample"}}
  \item{subset}{expression using variables in the design object}
  \item{legend}{For \code{style="hex"} or \code{"grayhex"}}
  \item{inches}{Scale for bubble plots}
  \item{\dots}{Passed to \code{plot} methods}
}
\details{
  Bubble plots are scatterplots with circles whose area is proportional
  to the sampling weight.  The two "hex" styles produce hexagonal
  binning scatterplots, and require the \code{hexbin} package from
  Bioconductor.
  
  The \code{subsample} method uses the sampling weights to create a
  sample from approximately the population distribution and passes this to \code{\link{plot}}
  
  Bubble plots are suited to small surveys, hexagonal binning and
  subsampling to large surveys where plotting all the points would
  result in too much overlap.
  
}
\value{
  None
}

\seealso{ }
\examples{
data(api)
dstrat<-svydesign(id=~1,strata=~stype, weights=~pw, data=apistrat, fpc=~fpc)

svyplot(api00~api99, design=dstrat, style="bubble")
\dontrun{
## these two require the hexbin package from Bioconductor
svyplot(api00~api99, design=dstrat, style="hex", xlab="1999 API",ylab="2000 API")
svyplot(api00~api99, design=dstrat, style="grayhex",legend=0)
}
## Subsampling doesn't really make sense for such a small survey
svyplot(api00~api99, design=dstrat, style="subsample")
svyplot(api00~stype, design=dstrat, style="subsample")

}
\keyword{survey}% at least one, from doc/KEYWORDS
\keyword{hplot}% __ONLY ONE__ keyword per line

\eof
\name{svyquantile}
\alias{svyquantile}
\alias{svrepquantile}
%- Also NEED an `\alias' for EACH other topic documented here.
\title{Quantiles for sample surveys}
\description{
Compute quantiles for data from complex surveys.
}
\usage{
svyquantile(x, design, quantiles, alpha=0.05, ci=FALSE,method = "linear", f = 1)
svrepquantile(x, design, quantiles, method = "linear", f = 1, return.replicates=FALSE)
}
%- maybe also `usage' for other objects documented here.
\arguments{
  \item{x}{A formula, vector or matrix}
  \item{design}{\code{survey.design} or \code{svyrep.design} object}
  \item{quantiles}{Quantiles to estimate}
  \item{method}{see \code{\link{approxfun}}}
  \item{f}{see \code{\link{approxfun}}}
  \item{ci}{Compute a confidence interval (relatively slow)?}
  \item{alpha}{Level for confidence interval}
  \item{return.replicates}{Return the replicate means?}
}
\details{
  Interval estimation for quantiles is complicated, because the
  influence function is not continuous.  Linearisation cannot be used,
  and only some replication weight designs give valid results.
  
  For \code{svyrepquantile} we use the method of Francisco-Fuller, which
  corresponds to inverting a robust score test.  At the upper and lower
  limits of the confidence interval, a test of the null hypothesis that
  the cumulative distribution function is equal to the target quantile
  just rejects.
  
  For \code{svrepquantile} ordinary replication-based standard errors
  are computed.  These are not valid for the JK1 and JKn jackknife
  designs. They are valid for BRR and Fay's method, and for some
  bootstrap-based designs.

  
}
\value{
  \code{svyquantile} returns a list whose first component is the
  quantiles and second component is the confidence
  intervals. \code{svrepquantile} returns an object of class \code{svyrepstat}.
}

\author{Thomas Lumley}

\seealso{ \code{\link{svydesign}}, \code{\link{svymean}},
  \code{\link{as.svrepdesign}}, \code{\link{svrepdesign}}}
\references{
  Binder DA (1991) Use of estimating functions for interval estimation
  from complex surveys. \emph{Journal of Official Statistics}  1991: 34-42
  Shao J, Tu D (1995) \emph{The Jackknife and Bootstrap}. Springer.  
  }
\examples{

  data(api)
  ## population
  quantile(apipop$api00,c(.25,.5,.75))

  ## one-stage cluster sample
  dclus1<-svydesign(id=~dnum, weights=~pw, data=apiclus1, fpc=~fpc)
  svyquantile(~api00, dclus1, c(.25,.5,.75),ci=TRUE)

  #stratified sample
  dstrat<-svydesign(id=~1, strata=~stype, weights=~pw, data=apistrat, fpc=~fpc)
  svyquantile(~api00, dstrat, c(.25,.5,.75),ci=TRUE)


  # BRR method
  data(scd)
  repweights<-2*cbind(c(1,0,1,0,1,0), c(1,0,0,1,0,1), c(0,1,1,0,0,1),
              c(0,1,0,1,1,0))
  scdrep<-svrepdesign(data=scd, type="BRR", repweights=repweights)
  svrepquantile(~arrests+alive, design=scdrep, quantile=0.5)

 }
}
\keyword{univar}% at least one, from doc/KEYWORDS
\keyword{survey}% __ONLY ONE__ keyword per line

\eof
\name{svyratio}
\alias{svyratio}
\alias{svrepratio}
\alias{print.svyratio}
\alias{predict.svyratio}
%- Also NEED an `\alias' for EACH other topic documented here.
\title{Ratio estimation}
\description{
Ratio estimation and estimates of totals based on ratios for complex
survey samples.
}
\usage{
svyratio(numerator, denominator, design)
svrepratio(numerator, denominator, design)
\method{predict}{svyratio}(object, total, se=TRUE,...)
}
%- maybe also `usage' for other objects documented here.
\arguments{
  \item{numerator}{formula, expression, or data frame giving numerator variable(s)}
  \item{denominator}{formula, expression, or data frame giving
    denominator variable(s) }
  \item{design}{from \code{svydesign} for \code{svyratio}, from
    \code{svrepdesign} for \code{svrepratio}}
  \item{object}{result of \code{svyratio}}
  \item{total}{vector of population totals for the denominator variables in
    \code{object}}
  \item{se}{Return standard errors?}
  \item{...}{Other unused arguments to \code{predict}}
}

\value{
\code{svyratio} returns an object of class \code{svyratio}. The
\code{predict} method returns a matrix of population totals and
optionally a matrix of standard errors.
}
\references{Levy and Lemeshow. "Sampling of Populations" (3rd edition). Wiley}
\author{Thomas Lumley}

\seealso{\code{\link{svydesign}} }

\examples{
data(scd)

## survey design objects
scddes<-svydesign(data=scd, prob=~1, id=~ambulance, strata=~ESA,
nest=TRUE, fpc=rep(5,6))
scdnofpc<-svydesign(data=scd, prob=~1, id=~ambulance, strata=~ESA,
nest=TRUE)

# convert to BRR replicate weights
scd2brr <- as.svrepdesign(scdnofpc, type="BRR")

# use BRR replicate weights from Levy and Lemeshow
repweights<-2*cbind(c(1,0,1,0,1,0), c(1,0,0,1,0,1), c(0,1,1,0,0,1),
c(0,1,0,1,1,0))
scdrep<-svrepdesign(data=scd, type="BRR", repweights=repweights)

# ratio estimates
svyratio(~alive, ~arrests, design=scddes)
svyratio(~alive, ~arrests, design=scdnofpc)
svrepratio(~alive, ~arrests, design=scd2brr)
svrepratio(~alive, ~arrests, design=scdrep)

}
\keyword{survey}% at least one, from doc/KEYWORDS


\eof
\name{update.survey.design}
\alias{update.survey.design}
\alias{update.svyrep.design}
%- Also NEED an `\alias' for EACH other topic documented here.
\title{ Add variables to a survey design}
\description{
Update the data variables in a survey design, either with a formula for a new set of variables or with an expression for variables to be added.
}
\usage{
\method{update}{survey.design}(object, ...)
\method{update}{svyrep.design}(object, ...)
}
%- maybe also `usage' for other objects documented here.
\arguments{
  \item{object}{a \code{survey.design} object}
  \item{\dots}{Arguments \code{tag=expr} add a new variable \code{tag}
    computed by evaluating \code{expr} in the survey data.}
}

\value{
A \code{survey.design} or \code{svyrep.design} object  
}

\seealso{\code{\link{svydesign}}, \code{\link{svrepdesign}}}

\examples{
data(api)
dstrat<-svydesign(id=~1,strata=~stype, weights=~pw, data=apistrat,
fpc=~fpc)
dstrat<-update(dstrat, apidiff=api00-api99)
svymean(~api99+api00+apidiff, dstrat)
}
\keyword{survey}% at least one, from doc/KEYWORDS
\keyword{manip}% __ONLY ONE__ keyword per line

\eof
\name{weights.survey.design}
\alias{weights.survey.design}
\alias{weights.svyrep.design}
%- Also NEED an `\alias' for EACH other topic documented here.
\title{Survey design weights}
\description{
Extract weights from a survey design object
}
\usage{
\method{weights}{survey.design}(object, ...)
\method{weights}{svyrep.design}(object, type=c("replication","sampling","analysis"), ...)
} 
%- maybe also `usage' for other objects documented here.
\arguments{
  \item{object}{Survey design object}
  \item{type}{Type of weights: \code{"analysis"} combines sampling and
    replication weights.}
  \item{\dots}{Other arguments ignored }
}

\value{
  vector or matrix of weights
}

\seealso{\code{\link{svydesign}}, \code{\link{svrepdesign}} }

\examples{
data(scd)


scddes<-svydesign(data=scd, prob=~1, id=~ambulance, strata=~ESA,
                 nest=TRUE, fpc=rep(5,6))
repweights<-2*cbind(c(1,0,1,0,1,0), c(1,0,0,1,0,1), c(0,1,1,0,0,1), c(0,1,0,1,1,0))
scdrep<-svrepdesign(data=scd, type="BRR", repweights=repweights)

weights(scdrep)
weights(scdrep, type="sampling")
weights(scdrep, type="analysis")
weights(scddes)

}
\keyword{survey}% at least one, from doc/KEYWORDS


\eof
\name{withReplicates}
\alias{withReplicates}
%- Also NEED an `\alias' for EACH other topic documented here.
\title{Compute variances by replicate weighting}
\description{
Given a function or expression computing a statistic based on sampling
weights, \code{withReplicates} evaluates the statistic and produces a
replicate-based estimate of variance.
}
\usage{
withReplicates(design, theta, rho = NULL, ..., scale.weights=FALSE, return.replicates=FALSE)
}
%- maybe also `usage' for other objects documented here.
\arguments{
  \item{design}{A survey design with replicate weights (eg from \code{\link{svrepdesign}})}
  \item{theta}{A function or expression: see Details below}
  \item{rho}{If \code{design} uses BRR weights, \code{rho} optionally
    specifies the parameter for Fay's variance estimator.}
  \item{\dots}{Other arguments to \code{theta}}
  \item{scale.weights}{Divide the probability weights by their sum (can
    help with overflow problems)}
  \item{return.replicates}{Return the replicate estimates as well as
    the variance?}
}
\details{
  If \code{theta} is a function its first argument will be a vector of
  weights and the second argument will be a data frame containing the
  variables from the design object. 
  
  If it is an expression, the sampling weights will be available as the
  variable \code{.weights}.  Variables in the design object will also
  be in scope.  It is possible to use global variables in the
  expression, but unwise, as they may be masked by local variables
  inside \code{withReplicates}.
}
\value{
  If \code{return.replicates=FALSE},  the weighted statistic, with the
  variance matrix as the \code{"var"} attribute. If
  \code{return.replicates=TRUE}, a list with elements \code{theta} for
  the usual return value and \code{replicates} for the replicates.
}
\seealso{ \code{\link{svrepdesign}}, \code{\link{as.svrepdesign}}, \code{\link{svrVar}}}

\examples{
data(scd)
repweights<-2*cbind(c(1,0,1,0,1,0), c(1,0,0,1,0,1), c(0,1,1,0,0,1),
c(0,1,0,1,1,0))
scdrep<-svrepdesign(data=scd, type="BRR", repweights=repweights)

a<-svrepratio(~alive, ~arrests, design=scdrep)
print(a$ratio)
print(a$var)
withReplicates(scdrep, quote(sum(.weights*alive)/sum(.weights*arrests)))
withReplicates(scdrep, function(w,data) sum(w*data$alive)/sum(w*data$arrests))
}
\keyword{survey}% at least one, from doc/KEYWORDS



\eof
