\name{agnes}
\alias{agnes}
\title{Agglomerative Nesting}
\description{
  Computes agglomerative hierarchical clustering of the dataset.
}
\usage{
agnes(x, diss = inherits(x, "dist"),
      metric = "euclidean", stand = FALSE, method = "average",
      keep.diss = n < 100, keep.data = !diss)
}
\arguments{
  \item{x}{
    data matrix or data frame, or dissimilarity matrix, depending on the
    value of the \code{diss} argument.

    In case of a matrix or data frame, each row corresponds to an observation,
    and each column corresponds to a variable. All variables must be numeric.
    Missing values (NAs) are allowed.

    In case of a dissimilarity matrix, \code{x} is typically the output of
    \code{\link{daisy}} or \code{\link[mva]{dist}}.
    Also a vector with length n*(n-1)/2 is allowed (where n is the number
    of observations), and will be interpreted in the same way as the
    output of the above-mentioned functions. Missing values (NAs) are not
    allowed.
  }
  \item{diss}{
    logical flag: if TRUE (default for \code{dist} or
    \code{dissimilarity} objects), then \code{x} is assumed to be a
    dissimilarity matrix.  If FALSE, then \code{x} is treated as
    a matrix of observations by variables.
  }
  \item{metric}{
    character string specifying the metric to be used for calculating
    dissimilarities between observations.
    The currently available options are "euclidean" and "manhattan".
    Euclidean distances are root sum-of-squares of differences, and
    manhattan distances are the sum of absolute differences.
    If \code{x} is already a dissimilarity matrix, then this argument will
    be ignored.
  }
  \item{stand}{
    logical flag: if TRUE, then the measurements in \code{x} are
    standardized before calculating the dissimilarities. Measurements
    are standardized for each variable (column), by subtracting the
    variable's mean value and dividing by the variable's mean absolute
    deviation.  If \code{x} is already a dissimilarity matrix, then this
    argument will be ignored.
  }
  \item{method}{
    character string defining the clustering method. The five methods
    implemented are "average" (group average method),
    "single" (single linkage), "complete" (complete linkage),
    "ward" (Ward's method), and "weighted" (weighted average linkage).
    Default is "average".
  }
  \item{keep.diss, keep.data}{logicals indicating if the dissimilarities
    and/or input data \code{x} should be kept in the result.  Setting
    these to \code{FALSE} can give much smaller results and hence even save
    memory allocation \emph{time}.}
}
\value{
  an object of class \code{"agnes"} representing the clustering.
  See \code{\link{agnes.object}} for details.
}
\details{
  \code{agnes} is fully described in chapter 5 of Kaufman and Rousseeuw (1990).
  Compared to other agglomerative clustering methods such as \code{hclust},
  \code{agnes} has the following features: (a) it yields the
  agglomerative coefficient (see \code{agnes.object})
  which measures the amount of clustering structure found; and (b)
  apart from the usual tree it also provides the banner, a novel
  graphical display (see \code{\link{plot.agnes}}).

  The \code{agnes}-algorithm constructs a hierarchy of clusterings.\cr
  At first, each observation is a small cluster by itself.  Clusters are
  merged until only one large cluster remains which contains all the
  observations.  At each stage the two \emph{nearest} clusters are combined
  to form one larger cluster.

  For \code{method="average"}, the distance between two clusters is the
  average of the dissimilarities between the points in one cluster and the
  points in the other cluster.
  \cr
  In \code{method="single"}, we use the smallest dissimilarity between a
  point in the first cluster and a point in the second cluster (nearest
  neighbor method).
  \cr
  When \code{method="complete"}, we use the largest dissimilarity
  between a point in the first cluster and a point in the second cluster
  (furthest neighbor method).
}
\section{BACKGROUND}{
  Cluster analysis divides a dataset into groups (clusters) of
  observations that are similar to each other.
  \describe{
    \item{Hierarchical methods}{like
      \code{agnes}, \code{\link{diana}}, and \code{\link{mona}}
      construct a hierarchy of clusterings, with the number of clusters
      ranging from one to the number of observations.}
    \item{Partitioning methods}{like
      \code{\link{pam}}, \code{\link{clara}}, and \code{\link{fanny}}
      require that the number of clusters be given by the user.}
    }
}
\references{
  Kaufman, L. and Rousseeuw, P.J. (1990).
  \emph{Finding Groups in Data: An Introduction to Cluster Analysis}.
  Wiley, New York.

  Anja Struyf, Mia Hubert & Peter J. Rousseeuw (1996):
  Clustering in an Object-Oriented Environment.
  \emph{Journal of Statistical Software}, \bold{1}.
  \url{http://www.stat.ucla.edu/journals/jss/}

  Struyf, A., Hubert, M. and Rousseeuw, P.J. (1997). Integrating
  Robust Clustering Techniques in S-PLUS,
  \emph{Computational Statistics and Data Analysis}, \bold{26}, 17--37.
}
\seealso{
  \code{\link{agnes.object}}, \code{\link{daisy}}, \code{\link{diana}},
  \code{\link{dist}}, \code{\link{hclust}}, \code{\link{plot.agnes}},
  \code{\link{twins.object}}.
}
\examples{
data(votes.repub)
agn1 <- agnes(votes.repub, metric = "manhattan", stand = TRUE)
agn1
plot(agn1)

agn2 <- agnes(daisy(votes.repub), diss = TRUE, method = "complete")
plot(agn2)

data(agriculture)
## Plot similar to Figure 7 in ref
\dontrun{ plot(agnes(agriculture), ask = TRUE)}
\testonly{plot(agnes(agriculture))}
}
\keyword{cluster}
% Converted by Sd2Rd version 0.3-2.

\eof
\name{agnes.object}
\alias{agnes.object}
\title{Agglomerative Nesting (AGNES) Object}
\description{
  The objects of class \code{"agnes"}
  represent an agglomerative hierarchical clustering of a dataset.
}
\section{GENERATION}{
  This class of objects is returned from \code{\link{agnes}}.
}
\section{METHODS}{
  The \code{"agnes"} class has methods for the following generic functions:
  \code{print}, \code{summary}, \code{plot}.
}
\section{INHERITANCE}{
  The class \code{"agnes"} inherits from \code{"twins"}.
  Therefore, the generic functions \code{\link{pltree}} and
  \code{\link[mva]{as.hclust}} are available for \code{agnes} objects.
  After applying \code{as.hclust()}, all \emph{its} methods are
  available, of course.
}
\value{
  A legitimate \code{agnes} object is a list with the following components:
  \item{order}{
    a vector giving a permutation of the original observations to allow
    for plotting, in the sense that the branches of a clustering tree
    will not cross.}
  \item{order.lab}{
    a vector similar to \code{order}, but containing observation labels
    instead of observation numbers. This component is only available if
    the original observations were labelled.
  }
  \item{height}{
    a vector with the distances between merging clusters at the successive
    stages.
  }
  \item{ac}{
    the agglomerative coefficient, measuring the clustering structure of the
    dataset.

    For each observation i, denote by m(i) its dissimilarity to the
    first cluster it is merged with, divided by the dissimilarity of the
    merger in the final step of the algorithm.  The \code{ac} is the
    average of all 1 - m(i). It can also be seen as the average width
    (or the percentage filled) of the banner plot.  Because \code{ac}
    grows with the number of observations, this measure should not
    be used to compare datasets of very different sizes.
  }
  \item{merge}{
    an (n-1) by 2 matrix, where n is the number of observations.  Row i
    of \code{merge} describes the merging of clusters at step i of the
    clustering.  If a number j in the row is negative, then the single
    observation |j| is merged at this stage.  If j is positive, then the
    merger is with the cluster formed at stage j of the algorithm.
  }
  \item{diss}{
    an object of class \code{"dissimilarity"} (see
    \code{\link{dissimilarity.object}}), representing the total
    dissimilarity matrix of the dataset.
  }
  \item{data}{
    a matrix containing the original or standardized measurements, depending
    on the \code{stand} option of the function \code{agnes}. If a
    dissimilarity matrix was given as input structure, then this
    component is not available.
  }
}
\seealso{
  \code{\link{agnes}}, \code{\link{diana}},
  \code{\link[mva]{as.hclust}}, \code{\link[mva]{hclust}},
  \code{\link{plot.agnes}}, \code{\link{twins.object}}.
}
\keyword{cluster}

\eof
\name{agriculture}
\alias{agriculture}
\title{European Union Agricultural Workforces}
\usage{data(agriculture)}
\description{
  Gross National Product (GNP) per capita and percentage of the
  population working in agriculture for each country belonging to the
  European Union in 1993.
}
\format{
  A data frame with 12 observations on 2 variables:
  \tabular{rlll}{
    [ , 1] \tab \code{x} \tab numeric \tab per capita GNP \cr
    [ , 2] \tab \code{y} \tab numeric \tab percentage in agriculture
  }
  The row names of the data frame indicate the countries.
}
\source{
  Eurostat (European Statistical Agency, 1994):
  \emph{Cijfers en feiten: Een statistisch portret van de Europese Unie}.
}
\details{
  The data seem to show two clusters, the ``more agricultural'' one
  consisting of Greece, Portugal, Spain, and Ireland.
}
\seealso{\code{\link{agnes}}, \code{\link{daisy}}, \code{\link{diana}}.
}
\references{
  Anja Struyf, Mia Hubert & Peter J. Rousseeuw (1996):
  Clustering in an Object-Oriented Environment.
  \emph{Journal of Statistical Software}, \bold{1}.
  \url{http://www.stat.ucla.edu/journals/jss/}
}
\examples{
data(agriculture)

## Compute the dissimilarities using Euclidean metric and without
## standardization
daisy(agriculture, metric = "euclidean", stand = FALSE)

## 2nd plot is similar to Figure 3 in Struyf et al (1996)
plot(pam(agriculture, 2))

## Plot similar to Figure 7 in Struyf et al (1996)
\dontrun{plot(agnes(agriculture), ask = TRUE)}
\testonly{plot(agnes(agriculture))}

## Plot similar to Figure 8 in Struyf et al (1996)
\dontrun{plot(diana(agriculture), ask = TRUE)}
\testonly{plot(diana(agriculture))}
}
\keyword{datasets}

\eof
\name{animals}
\alias{animals}
\title{Attributes of Animals}
\usage{data(animals)}
\description{
  This data set considers 6 binary attributes for 20 animals.
}
\format{
  A data frame with 20 observations on 6 variables:
  \tabular{rll}{
    [ , 1] \tab war \tab warm-blooded \cr
    [ , 2] \tab fly \tab can fly \cr
    [ , 3] \tab ver \tab vertebrate \cr
    [ , 4] \tab end \tab endangered \cr
    [ , 5] \tab gro \tab live in groups \cr
    [ , 6] \tab hai \tab have hair \cr
  }
  All variables are encoded as 1 = `no', 2 = `yes'.
}
\source{
  Leonard Kaufman and Peter J. Rousseeuw (1990):
  \emph{Finding Groups in Data}
  (pp 297ff).
  New York: Wiley.
}
\details{
  This dataset is useful for illustrating monothetic (only a single
  variable is used for each split) hierarchical clustering.
}
\references{
  Anja Struyf, Mia Hubert & Peter J. Rousseeuw (1996):
  Clustering in an Object-Oriented Environment.
  \emph{Journal of Statistical Software}, \bold{1}.
  \url{http://www.stat.ucla.edu/journals/jss/}
}
\examples{
data(animals)
apply(animals,2, table) # simple overview

ma <- mona(animals)
ma
## Plot similar to Figure 10 in Struyf et al (1996)
plot(ma)
}
\keyword{datasets}

\eof
\name{bannerplot}
\alias{bannerplot}
\title{Plot Banner (of Hierarchical Clustering)}
\description{
  Draws a ``banner'', i.e. basically a horizontal \code{\link{barplot}}
  visualizing the (agglomerative or divisive) hierarchical clustering or
  an other binary dendrogram structure.
}
\usage{
bannerplot(x, w = rev(x$height), fromLeft = TRUE,
           main, sub, xlab = "Height",  adj = 0,
           col = c(2, 0), border = 0, axes = TRUE, frame.plot = axes,
           rev.xax = !fromLeft, xax.pretty = TRUE,
           labels = NULL, nmax.lab = 35, max.strlen = 5,
           yax.do = axes && length(x$order) <= nmax.lab,
           yaxRight = fromLeft, y.mar = 2.4 + max.strlen/2.5, \dots)
}
\arguments{
  \item{x}{a list with components \code{order}, \code{order.lab} and
    \code{height} when \code{w}, the next argument is not specified.}
  \item{w}{non-negative numeric vector of bar widths.}
  \item{fromLeft}{logical, indicating if the banner is from the left or not.}
  \item{main,sub}{main and sub titles, see \code{\link{title}}.}
  \item{xlab}{x axis label (with `correct' default e.g. for \code{plot.agnes}).}
  \item{adj}{passed to \code{\link{title}(main,sub)} for string adjustment.}
  \item{col}{vector of length 2, for two horizontal segments.}
  \item{border}{color for bar border; now defaults to background (no border).}
  \item{axes}{logical indicating if axes (and labels) should be drawn at all.}
  \item{frame.plot}{logical indicating the banner should be framed;
    mainly used when \code{border = 0} (as per default).}
  \item{rev.xax}{logical indicating if the x axis should be reversed (as
    in \code{plot.diana}).}
  \item{xax.pretty}{logical or integer indicating if
    \code{\link{pretty}()} should be used for the x axis.
    \code{xax.pretty = FALSE} is mainly for back compatibility.}
  \item{labels}{labels to use on y-axis; the default is constructed from
    \code{x}.}
  \item{nmax.lab}{integer indicating the number of labels which is
    considered too large for single-name labelling the banner plot.}
  \item{max.strlen}{positive integer giving the length to which
    strings are truncated in banner plot labeling.}
  \item{yax.do}{logical indicating if a y axis and banner labels should
    be drawn.}
  \item{yaxRight}{logical indicating if the y axis is on the right or left.}
  \item{y.mar}{positive number specifying the margin width to use when
    banners are labeled (along a y-axis).  The default adapts to the
    string width and optimally would also dependend on the font.}
  \item{\dots}{graphical parameters (see \code{\link{par}}) may also
    be supplied as arguments to this function.}
}
\author{Martin Maechler (from original code of Kaufman and Rousseeuw).}
\note{This is mainly a utility called from \code{\link{plot.agnes}},
  \code{\link{plot.diana}} and \code{\link{plot.mona}}.
}% also serves as \seealso{*}
\keyword{hplot}
\keyword{cluster}
\keyword{utilities}

\eof
\name{clara}
\alias{clara}
\title{Clustering Large Applications}
\description{
  Computes a \code{"clara"} object, a list representing a clustering of
  the data into \code{k} clusters.
}
\usage{
clara(x, k, metric = "euclidean", stand = FALSE, samples = 5,
      sampsize = min(n, 40 + 2 * k), trace = 0, keep.data = TRUE, keepdata,
      rngR = FALSE)
}
\arguments{
  \item{x}{
    data matrix or data frame, each row corresponds to an observation,
    and each column corresponds to a variable.  All variables must be numeric.
    Missing values (NAs) are allowed. }
  \item{k}{integer, the number of clusters.
    It is required that \eqn{0 < k < n} where \eqn{n} is the number of
    observations (i.e., n = \code{nrow(x)}).}
  \item{metric}{
    character string specifying the metric to be used for calculating
    dissimilarities between observations.
    The currently available options are "euclidean" and "manhattan".
    Euclidean distances are root sum-of-squares of differences, and
    manhattan distances are the sum of absolute differences.
  }
  \item{stand}{logical, indicating if the measurements in \code{x} are
    standardized before calculating the dissimilarities.  Measurements
    are standardized for each variable (column), by subtracting the
    variable's mean value and dividing by the variable's mean absolute
    deviation.
  }
  \item{samples}{integer, number of samples to be drawn from the dataset.}
  \item{sampsize}{integer, number of observations in each
    sample. \code{sampsize} should be higher than the number of clusters
    (\code{k}) and at most the number of observations (n = \code{nrow(x)}).}
  \item{trace}{integer indicating a \emph{trace level} for diagnostic
    output during the algorithm.}
  \item{keep.data,keepdata}{logical indicating if the (\emph{scaled} if
    \code{stand} is true) data should be kept in the result.
    (\code{keepdata} is equivalent to \code{keep.data} where the former
    is deprecated.)
    Setting this to \code{FALSE} saves memory (and hence time), but
    disables \code{\link{clusplot}()}ing of the result.}
  \item{rngR}{logical indicating if \R's random number generator should
    be used instead of the primitive clara()-builtin one.  If true, this
    also means that each call to \code{clara()} returns a different result
    -- though only slightly different in good situations.}
}
\value{
  an object of class \code{"clara"} representing the clustering.  See
  \code{\link{clara.object}} for details.
}
\details{
  \code{clara} is fully described in chapter 3 of Kaufman and Rousseeuw (1990).
  Compared to other partitioning methods such as \code{pam}, it can deal with
  much larger datasets.  Internally, this is achieved by considering
  sub-datasets of fixed size (\code{sampsize}) such that the time and
  storage requirements become linear in \eqn{n} rather than quadratic.

  Each sub-dataset is partitioned into \code{k} clusters using the same
  algorithm as in \code{\link{pam}}.\cr
  Once \code{k} representative objects have been selected from the
  sub-dataset, each observation of the entire dataset is assigned
  to the nearest medoid.

  The sum of the dissimilarities of the observations to their closest
  medoid is used as a measure of the quality of the clustering.  The
  sub-dataset for which the sum is minimal, is retained.  A further
  analysis is carried out on the final partition.

  Each sub-dataset is forced to contain the medoids obtained from the
  best sub-dataset until then.  Randomly drawn observations are added to
  this set until \code{sampsize} has been reached.
}
\note{
%% mostly by Martin Mchler :
  By default, the random sampling is implemented with a \emph{very}
  simple scheme (with period \eqn{2^{16} = 65536}) inside the Fortran
  code, independently of \R's random number generation, and as a matter of
  fact, deterministically.  Alternatively, we recommend

  The storage requirement of \code{clara} computation (for small
  \code{k}) is about
  \eqn{O(n \times p) + O(j^2)}{O(n * p) + O(j^2)} where
  \eqn{j = \code{sampsize}}, and \eqn{(n,p) = \code{dim(x)}}.
  The CPU computing time (again neglecting small \code{k}) is about
  \eqn{O(n \times p \times j^2 \times N)}{O(n * p * j^2 * N)}, where
  \eqn{N = \code{samples}}.

  For ``small'' datasets, the function \code{\link{pam}} can be used
  directly.  What can be considered \emph{small}, is really a function
  of available computing power, both memory (RAM) and speed.
  Originally (1990), ``small'' meant less than 100 observations;
  later, the authors said \emph{``small (say with fewer than 200
  observations)''}..
}
\author{
  Kaufman and Rousseuw, originally.
  All arguments from \code{trace} on, and most \R documentation and all
  tests by Martin Maechler.
}
\seealso{
  \code{\link{agnes}} for background and references;
  \code{\link{clara.object}}, \code{\link{pam}},
  \code{\link{partition.object}}, \code{\link{plot.partition}}.
}
\examples{
## generate 500 objects, divided into 2 clusters.
x <- rbind(cbind(rnorm(200,0,8), rnorm(200,0,8)),
           cbind(rnorm(300,50,8), rnorm(300,50,8)))
clarax <- clara(x, 2)
clarax
clarax$clusinfo
plot(clarax)

## `xclara' is an artificial data set with 3 clusters of 1000 bivariate
## objects each.
data(xclara)
(clx3 <- clara(xclara, 3))
## Plot similar to Figure 5 in Struyf et al (1996)
\dontrun{plot(clx3, ask = TRUE)}
\testonly{plot(clx3)}

## Try 100 times *different* random samples -- for reliability:
nSim <- 100
nCl <- 3 # = no.classes
set.seed(421)# (reproducibility)
cl <- matrix(NA,nrow(xclara), nSim)
for(i in 1:nSim) cl[,i] <- clara(xclara, nCl, rngR = TRUE)$cluster
tcl <- apply(cl,1, tabulate, nbins = nCl)
## those that are not always in same cluster (5 out of 3000 for this seed):
(iDoubt <- which(apply(tcl,2, function(n) all(n < nSim))))
if(length(iDoubt)) { # (not for all seeds)
  tabD <- tcl[,iDoubt, drop=FALSE]
  dimnames(tabD) <- list(cluster = paste(1:nCl), obs = format(iDoubt))
  t(tabD) # how many times in which clusters
}

}
\keyword{cluster}


\eof
\name{clara.object}
\alias{clara.object}
\title{Clustering Large Applications (CLARA) Object}
\description{
  The objects of class \code{"clara"} represent a partitioning of a large
  dataset into clusters and are typically returned from \code{\link{clara}}.
}
\section{Methods, Inheritance}{
  The \code{"clara"} class has methods for the following generic functions:
  \code{print}, \code{summary}.

  The class \code{"clara"} inherits from \code{"partition"}.
  Therefore, the generic functions \code{plot} and \code{clusplot} can
  be used on a \code{clara} object.
}
\value{
  A legitimate \code{clara} object is a list with the following components:

  \item{sample}{
    labels or case numbers of the observations in the best sample, that is,
    the sample used by the \code{clara} algorithm for the final partition.}
  \item{medoids}{
    the medoids or representative objects of the clusters.
    It is a matrix with in each row the coordinates of one medoid.}
  \item{clustering}{the clustering vector, see \code{\link{partition.object}}.}
  \item{objective}{the objective function for the final clustering of
    the entire dataset.}
  \item{clusinfo}{
    matrix, each row gives numerical information for one cluster. These
    are the cardinality of the cluster (number of observations), the
    maximal and average dissimilarity between the observations in the
    cluster and the cluster's medoid.  %% FIXME: Now differs from pam.object.Rd:
    The last column is the maximal
    dissimilarity between the observations in the cluster and the
    cluster's medoid, divided by the minimal dissimilarity between the
    cluster's medoid and the medoid of any other cluster. If this ratio
    is small, the cluster is well-separated from the other clusters.
  }
  \item{diss}{dissimilarity (maybe NULL), see \code{\link{partition.object}}.}
  \item{silinfo}{list with silhouette width information for the best sample, see
    \code{\link{partition.object}}.}
  \item{call}{generating call, see \code{\link{partition.object}}.}
  \item{data}{matrix, possibibly standardized, or NULL, see
    \code{\link{partition.object}}.}
}
\seealso{
  \code{\link{clara}}, \code{\link{dissimilarity.object}},
  \code{\link{partition.object}}, \code{\link{plot.partition}}.
}
\keyword{cluster}


\eof
\name{clusplot}
\alias{clusplot}
\title{Cluster Plot - Generic Function}
\description{
  Draws a 2-dimensional ``clusplot'' on the current graphics device.
  This is a generic function with a default and \code{partition} method.
}
\usage{
clusplot(x, \dots)
}
\arguments{
  \item{x}{an \R object.}
  \item{\dots}{additional arguments for \code{\link{methods}}.
    Graphical parameters (see \code{\link{par}}) may also
    be supplied as arguments to this function.}
}
\section{Side Effects}{
  a 2-dimensional clusplot is created on the current graphics device.
}
\seealso{(for references and examples)
  \code{\link{clusplot.default}}, \code{\link{clusplot.partition}},
  \code{\link{partition.object}}, \code{\link{pam}}, \code{\link{fanny}},
  \code{\link{clara}}.
}
\keyword{cluster}
\keyword{hplot}


\eof
\name{clusplot.default}
\alias{clusplot.default}
\title{Bivariate Cluster Plot (clusplot) Default Method}
\description{
  Creates a bivariate plot visualizing a partition (clustering) of the data. All
  observation are represented by points in the plot, using principal
  components or multidimensional scaling. Around each cluster an ellipse
  is drawn.
}
\usage{
\method{clusplot}{default}(x, clus, diss = FALSE, cor = TRUE, stand = FALSE,
          lines = 2, shade = FALSE, color = FALSE,
          labels= 0, plotchar = TRUE,
          col.p = "dark green", col.txt = col.p,
          col.clus = if(color) c(2, 4, 6, 3) else 5,
          span = TRUE, xlim = NULL, ylim = NULL,
          main = paste("CLUSPLOT(", deparse(substitute(x)),")"),
          verbose = getOption("verbose"),
          \dots)
}
\arguments{
  \item{x}{matrix or data frame, or dissimilarity matrix, depending on
    the value of the \code{diss} argument.

    In case of a matrix (alike), each row corresponds to an observation,
    and each column corresponds to a variable.  All variables must be
    numeric. Missing values (\code{\link{NA}}s) are allowed.  They are
    replaced by the median of the corresponding variable.  When some
    variables or some observations contain only missing values, the
    function stops with a warning message.

    In case of a dissimilarity matrix, \code{x} is the output of
    \code{\link{daisy}} or \code{\link[mva]{dist}} or a symmetric matrix.  Also,
    a vector of length \eqn{n*(n-1)/2} is allowed (where \eqn{n} is the
    number of observations), and will be interpreted in the same way as
    the output of the above-mentioned functions.  Missing values (NAs)
    are not allowed.
  }
  \item{clus}{
    a vector of length n representing a clustering of \code{x}.  For
    each observation the vector lists the number or name of the cluster
    to which it has been assigned. \code{clus} is often the clustering
    component of the output of \code{\link{pam}}, \code{\link{fanny}} or
    \code{\link{clara}}.}
  \item{diss}{
    logical indicating if \code{x} will be considered as a dissimilarity
    matrix or a matrix of observations by variables (see \code{x}
    arugment above).}
  \item{cor}{
    logical flag, only used when working with a data matrix (\code{diss
      = FALSE}). If TRUE, then the variables are scaled to unit variance.}
  \item{stand}{
    logical flag: if true, then the representations of the n observations in the
    2-dimensional plot are standardized.
  }
  \item{lines}{
    integer out of \code{0, 1, 2}, used to obtain an idea of the
    distances between ellipses.  The distance between two ellipses E1
    and E2 is measured along the line connecting the centers \eqn{m1}
    and \eqn{m2} of the two ellipses.

    In case E1 and E2 overlap on the line through \eqn{m1} and \eqn{m2},
    no line is drawn.  Otherwise, the result depends on the value of
    \code{lines}: If
    \describe{
      \item{lines = 0,}{no distance lines will appear on the plot;}
      \item{lines = 1,}{the line segment between \eqn{m1} and \eqn{m2} is drawn;}
      \item{lines = 2,}{a line segment between the boundaries of E1 and
	E2 is drawn (along the line connecting \eqn{m1} and \eqn{m2}).}
    }
  }
  \item{shade}{
    logical flag: if TRUE, then the ellipses are shaded in relation to their
    density. The density is the number of points in the cluster divided by the
    area of the ellipse.
  }
  \item{color}{
    logical flag: if TRUE, then the ellipses are colored with respect to their
    density. With increasing density, the colors are light blue, light
    green, red and purple.  To see these colors on the graphics device, an
    appropriate color scheme should be selected (we recommend a white
    background).}
  \item{labels}{
    integer code, currently one of 0,1,2,3 and 4.  If
    \describe{
      \item{labels= 0,}{no labels are placed in the plot;}
      \item{labels= 1,}{points and ellipses can be identified in the plot (see
	\code{\link{identify}});}
      \item{labels= 2,}{all points and ellipses are labelled in the plot;}
      \item{labels= 3,}{only the points are labelled in the plot;}
      \item{labels= 4,}{only the ellipses are labelled in the plot.}
      \item{labels= 5,}{the ellipses are labelled in the plot, and
	points can be identified.}
    }
    The levels of the vector \code{clus} are taken as labels for the
    clusters.  The labels
    of the points are the rownames of \code{x} if \code{x} is matrix like.
    Otherwise (\code{diss = TRUE}), \code{x} is a vector, point labels
    can be attached to \code{x} as a "Labels" attribute
    (\code{attr(x,"Labels")}), as is done for the output of
    \code{\link{daisy}}.

    A possible \code{\link{names}} attribute of \code{clus} will not
    be taken into account.
  }
  \item{plotchar}{
    logical flag: if TRUE, then the plotting symbols differ for points belonging
    to different clusters.
  }
  \item{span}{
    logical flag: if TRUE, then each cluster is represented by the ellipse with
    smallest area containing all its points. (This is a special case of the
    minimum volume ellipsoid.)\cr
    If FALSE, the ellipse is based on the mean and covariance matrix of the
    same points.  While this is faster to compute, it often yields a much
    larger ellipse.

    There are also some special cases:  When a cluster consists of only
    one point, a tiny circle is drawn around it.  When the points of a
    cluster fall on a straight line, \code{span=FALSE} draws a narrow
    ellipse around it and \code{span=TRUE} gives the exact line segment.
  }
  \item{col.p}{color code(s) used for the observation points.}
  \item{col.txt}{color code(s) used for the labels (if \code{labels >= 2}).}
  \item{col.clus}{color code for the ellipses (and their labels);
    only one if color is false (as per default).}

  \item{xlim, ylim}{numeric vectors of length 2, giving the x- and y-
    ranges as in \code{\link{plot.default}}.}
  \item{main}{main title for the plot; by default, one is constructed.}
  \item{verbose}{a logical indicating, if there should be extra
    diagnostic output; mainly for `debugging'.}
  \item{\dots}{Further graphical parameters may also be supplied, see
    \code{\link{par}}.}
}% End Arguments

\value{
  An invisible list with components:
  \item{Distances}{
    When \code{lines} is 1 or 2 we optain a k by k matrix (k is the number of
    clusters).  The element in \code{[i,j]} is the distance between ellipse
    i and ellipse j.\cr
    If \code{lines = 0}, then the value of this component is \code{NA}.
  }
  \item{Shading}{
    A vector of length k (where k is the number of clusters), containing the
    amount of shading per cluster. Let y be a vector where element i is the
    ratio between the number of points in cluster i and the area of ellipse i.
    When the cluster i is a line segment, y[i] and the density of the cluster are
    set to \code{NA}. Let z be the sum of all the elements of y without the NAs.
    Then we put shading = y/z *37 + 3 .
  }
}

\section{Side Effects}{
  a visual display of the clustering is plotted on the current graphics device.
}
\details{
  \code{clusplot} uses the functions \code{\link[mva]{princomp}} and
  \code{\link[mva]{cmdscale}}.  These functions are
  data reduction techniques. They will represent the data in a bivariate plot.
  Ellipses are then drawn to indicate the clusters.  The further layout of the
  plot is determined by the optional arguments.
}
\note{
  When we have 4 or fewer clusters, then the \code{color=TRUE} gives
  every cluster a different color.  When there are more than 4 clusters,
  clusplot uses the function \code{\link{pam}} to cluster the
  densities into 4 groups such that ellipses with nearly the same
  density get the same color.  \code{col.clus} specifies the colors used.

  The \code{col.p} and \code{col.txt} arguments, added for \R,
  are recycled to have length the number of observations.
  If \code{col.p} has more than one value, using \code{color = TRUE} can
  be confusing because of a mix of point and ellipse colors.
}
\references{
  Pison, G., Struyf, A. and Rousseeuw, P.J. (1999)
  Displaying a Clustering with CLUSPLOT,
  \emph{Computational Statistics and Data Analysis}, \bold{30}, 381--392.\cr
  A version of this is available as technical report from
  \url{http://win-www.uia.ac.be/u/statis/abstract/Disclu99.htm}

  Kaufman, L. and Rousseeuw, P.J. (1990).
  \emph{Finding Groups in Data: An Introduction to Cluster Analysis.}
  Wiley, New York.

  Struyf, A., Hubert, M. and Rousseeuw, P.J. (1997).
  Integrating Robust Clustering Techniques in S-PLUS,
  \emph{Computational Statistics and Data Analysis}, \bold{26}, 17-37.
}
\seealso{
  \code{\link[mva]{princomp}}, \code{\link[mva]{cmdscale}}, \code{\link{pam}},
  \code{\link{clara}}, \code{\link{daisy}}, \code{\link{par}},
  \code{\link{identify}}, \code{\link[lqs]{cov.mve}},
  \code{\link{clusplot.partition}}.
}
\examples{
## plotting votes.diss(dissimilarity) in a bivariate plot and
## partitioning into 2 clusters
data(votes.repub)
votes.diss <- daisy(votes.repub)
votes.clus <- pam(votes.diss, 2, diss = TRUE)$clustering
clusplot(votes.diss, votes.clus, diss = TRUE, shade = TRUE)
clusplot(votes.diss, votes.clus, diss = TRUE,
         col.p = votes.clus, labels = 4)# color points and label ellipses
%% FIXME: should have a  clusellipses(..) for just adding ellipses
clusplot(votes.diss, votes.clus, diss = TRUE, span = FALSE)# simple ellipses

if(interactive()) { #  uses identify() *interactively* :
  clusplot(votes.diss, votes.clus, diss = TRUE, shade = TRUE, labels = 1)
  clusplot(votes.diss, votes.clus, diss = TRUE, labels = 5)# ident. only points
}

## plotting iris (data frame) in a 2-dimensional plot and partitioning
## into 3 clusters.
data(iris)
iris.x <- iris[, 1:4]
cl3 <- pam(iris.x, 3)$clustering
op <- par(mfrow= c(2,2))
clusplot(iris.x, cl3, color = TRUE)
U <- par("usr")
## zoom in :
rect(0,-1, 2,1, border = "orange", lwd=2)
clusplot(iris.x, cl3, color = TRUE, xlim = c(0,2), ylim = c(-1,1))
box(col="orange",lwd=2); mtext("sub region", font = 4, cex = 2)
##  or zoom out :
clusplot(iris.x, cl3, color = TRUE, xlim = c(-4,4), ylim = c(-4,4))
mtext("`super' region", font = 4, cex = 2)
rect(U[1],U[3], U[2],U[4], lwd=2, lty = 3)

# reset graphics
par(op)
}
\keyword{cluster}
\keyword{hplot}

\eof
\name{clusplot.partition}
\alias{clusplot.partition}
\title{Bivariate Clusplot of a Partitioning Object}
\description{
  Clusplot (Clustering Plot) method for an object of class \code{partition}.
}
\usage{
\method{clusplot}{partition}(x, main = NULL, dist = NULL, \dots)
}
\arguments{
  \item{x}{
    an object of class \code{"partition"}, e.g. created by the functions
    \code{\link{pam}}, \code{\link{clara}}, or \code{\link{fanny}}.}
  \item{main}{title for the plot; when \code{NULL} (by default), a title
    is constructed, using \code{x$call}.}
  \item{dist}{when \code{x} does not have a \code{diss} nor a
    \code{data} component, e.g., for \code{\link{pam}(dist(*),
      keep.diss=FALSE)}, \code{dist} must specify the dissimilarity for the
    clusplot.}
  \item{\dots}{all optional arguments available for the
    \code{\link{clusplot.default}} function (except for the \code{diss}
    one) may also be supplied to this function.  Graphical parameters
    (see \code{\link{par}}) may also be supplied as arguments to this
    function.}
}
\value{
  An invisible list with components
  \item{Distances}{
    When option lines is 1 or 2 we optain a k by k matrix (k is the number of
    clusters). The element at row j and column s is the distance between
    ellipse j and ellipse s.  If lines=0, then the value of this
    component is NA.
  }
  \item{Shading}{
    A vector of length k (where k is the number of clusters), containing
    the amount of shading per cluster.  Let y be a vector where element
    i is the ratio between the number of objects in cluster i and the
    area of ellipse i.  When the cluster i is a line segment, y[i] and
    the density of the cluster are set to NA.  Let z be the sum of all
    the elements of y without the NAs.  Then we put shading = y/z *37 + 3.
  }
}
\details{
  This \code{clusplot.partition()} method relies on
  \code{\link{clusplot.default}}.

  If the clustering algorithms \code{pam}, \code{fanny} and \code{clara}
  are applied to a data matrix of observations-by-variables then a
  clusplot of the resulting clustering can always be drawn.  When the
  data matrix contains missing values and the clustering is performed
  with \code{\link{pam}} or \code{\link{fanny}}, the dissimilarity
  matrix will be given as input to \code{clusplot}.  When the clustering
  algorithm \code{\link{clara}} was applied to a data matrix with NAs
  then clusplot will replace the missing values as described in
  \code{\link{clusplot.default}}, because a dissimilarity matrix is not
  available.
}
\seealso{\code{\link{clusplot.default}} for references;
  \code{\link{partition.object}}, \code{\link{pam}},
  \code{\link{pam.object}}, \code{\link{clara}},
  \code{\link{clara.object}}, \code{\link{fanny}},
  \code{\link{fanny.object}}, \code{\link{par}}.
}
\examples{
## generate 25 objects, divided into 2 clusters.
x <- rbind(cbind(rnorm(10,0,0.5), rnorm(10,0,0.5)),
           cbind(rnorm(15,5,0.5), rnorm(15,5,0.5)))
clusplot(pam(x, 2))
## add noise, and try again :
x4 <- cbind(x, rnorm(25), rnorm(25))
clusplot(pam(x4, 2))
}
\keyword{cluster}
\keyword{hplot}

\eof
\name{cluster-internal}
\alias{meanabsdev}
\title{Internal cluster functions}
\description{
  Internal cluster functions.
}
\usage{
meanabsdev(y)
}
\details{
  These are not to be called by the user.
}
\keyword{internal}

\eof
\name{daisy}
\alias{daisy}
\title{Dissimilarity Matrix Calculation}
\description{
  Compute all the pairwise dissimilarities (distances) between observations
  in the dataset.  The original variables may be of mixed types.
}
\usage{
daisy(x, metric = c("euclidean","manhattan"), stand = FALSE, type = list())
}
\arguments{
  \item{x}{
    numeric matrix or data frame.  Dissimilarities will be computed
    between the rows of \code{x}.  Columns of mode \code{numeric}
    (i.e. all columns when \code{x} is a matrix) will be recognized as
    interval scaled variables, columns of class \code{factor} will be
    recognized as nominal variables, and columns of class \code{ordered}
    will be recognized as ordinal variables.  Other variable types
    should be specified with the \code{type} argument.  Missing values
    (\code{\link{NA}}s) are allowed.
  }
  \item{metric}{
    character string specifying the metric to be used.
    The currently available options are \code{"euclidean"} (the default)
    and \code{"manhattan"}.\cr
    Euclidean distances are root sum-of-squares of differences, and
    manhattan distances are the sum of absolute differences.

    If not all columns of \code{x} are numeric, then this argument
    will be ignored.
  }
  \item{stand}{logical flag: if TRUE, then the measurements in \code{x}
    are standardized before calculating the
    dissimilarities.  Measurements are standardized for each variable
    (column), by subtracting the variable's mean value and dividing by
    the variable's mean absolute deviation.

    If not all columns of
    \code{x} are numeric, then this argument will be ignored.
  }
  \item{type}{list for specifying some (or all) of the types of the
    variables (columns) in \code{x}.  The list may contain the following
    components: \code{"ordratio"} (ratio scaled variables to be treated as
    ordinal variables), \code{"logratio"} (ratio scaled variables that
    must be logarithmically transformed), \code{"asymm"} (asymmetric
    binary) and \code{"symm"} (symmetric binary variables).  Each
    component's value is a vector, containing the names or the numbers
    of the corresponding columns of \code{x}.
    Variables not mentioned in the \code{type} list are interpreted as
    usual (see argument \code{x}).
  }
}% end{arg..}

\value{
  an object of class \code{"dissimilarity"} containing the dissimilarities among
  the rows of x.  This is typically the input for the functions \code{pam},
  \code{fanny}, \code{agnes} or \code{diana}.  See
  \code{\link{dissimilarity.object}} for details.
}
\details{
  \code{daisy} is fully described in chapter 1 of Kaufman and Rousseeuw (1990).
  Compared to \code{\link[mva]{dist}} whose input must be numeric
  variables, the main feature of \code{daisy} is its ability to handle
  other variable types as well (e.g. nominal, ordinal, (a)symmetric
  binary) even when different types occur in the same dataset.

  Note that setting the type to \code{symm} (symmetric binary) gives the
  same dissimilarities as using \emph{nominal} (which is chosen for
  non-ordered factors) only when no missing values are present, and more
  efficiently.

  Note that \code{daisy} now gives a warning when 2-valued numerical
  variables don't have an explicit \code{type} specified, because the
  reference authors recommend to consider using \code{"asymm"}.

  In the \code{daisy} algorithm, missing values in a row of x are not
  included in the dissimilarities involving that row.  There are two
  main cases,
  \enumerate{
    \item If all variables are interval scaled,
      the metric is "euclidean", and ng is the number of columns in which
      neither row i and j have NAs, then the dissimilarity d(i,j) returned is
      sqrt(ncol(x)/ng) times the Euclidean distance between the two vectors
      of length ng shortened to exclude NAs.  The rule is similar for the
      "manhattan" metric, except that the coefficient is ncol(x)/ng.
      If ng is zero, the dissimilarity is NA.

    \item When some variables have a type other than interval scaled, the
      dissimilarity between two rows is the weighted sum of the contributions of
      each variable.\cr
      The weight becomes zero when that variable is missing in either or both
      rows, or when the variable is asymmetric binary and both values are
      zero.  In all other situations, the weight of the variable is 1.

      The contribution of a nominal or binary variable to the total
      dissimilarity is 0 if both values are different, 1 otherwise.  The
      contribution of other variables is the absolute difference of both
      values, divided by the total range of that variable.  Ordinal
      variables are first converted to ranks.

      If \code{nok} is the number of nonzero weights, the dissimilarity is
      multiplied by the factor \code{1/nok} and thus ranges between 0 and 1.
      If \code{nok = 0}, the dissimilarity is set to \code{\link{NA}}.
  }
}
\section{BACKGROUND}{
  Dissimilarities are used as inputs to cluster analysis and
  multidimensional scaling.  The choice of metric may have a
  large impact.
}
\references{
  Kaufman, L. and Rousseeuw, P.J. (1990).
  \emph{Finding Groups in Data: An Introduction to Cluster Analysis. }
  Wiley, New York.

  Struyf, A., Hubert, M. and Rousseeuw, P.J. (1997). Integrating Robust
  Clustering Techniques in S-PLUS,
  \emph{Computational Statistics and Data Analysis, \bold{26}, 17-37.}
}
\seealso{
  \code{\link{dissimilarity.object}}, \code{\link[mva]{dist}},
  \code{\link{pam}}, \code{\link{fanny}}, \code{\link{clara}},
  \code{\link{agnes}}, \code{\link{diana}}.
}
\examples{
data(agriculture)
## Example 1 in ref:
##  Dissimilarities using Euclidean metric and without standardization
d.agr <- daisy(agriculture, metric = "euclidean", stand = FALSE)
d.agr
as.matrix(d.agr)[,"DK"] # via as.matrix.dist(.)

data(flower)
## Example 2 in ref
summary(dfl1 <- daisy(flower, type = list(asymm = 3)))
summary(dfl2 <- daisy(flower, type = list(asymm = c(1, 3), ordratio = 7)))
## this failed earlier:
summary(dfl3 <- daisy(flower,
        type = list(asymm = c("V1", "V3"), symm= 2, ordratio= 7, logratio= 8)))
}
\keyword{cluster}

\eof
\name{diana}
\alias{diana}
\title{DIvisive ANAlysis Clustering}
\description{
  Computes a divisive hierarchical clustering of the dataset
  returning an object of class \code{diana}.
}
\usage{
diana(x, diss = inherits(x, "dist"), metric = "euclidean", stand = FALSE,
      keep.diss = n < 100, keep.data = !diss)
}
\arguments{
  \item{x}{
    data matrix or data frame, or dissimilarity matrix or object,
    depending on the value of the \code{diss} argument.

    In case of a matrix or data frame, each row corresponds to an observation,
    and each column corresponds to a variable.  All variables must be numeric.
    Missing values (\code{\link{NA}}s) \emph{are} allowed.

    In case of a dissimilarity matrix, \code{x} is typically the output
    of \code{\link{daisy}} or \code{\link{dist}}.  Also a vector of
    length n*(n-1)/2 is allowed (where n is the number of observations),
    and will be interpreted in the same way as the output of the
    above-mentioned functions. Missing values (NAs) are \emph{not} allowed.
  }
  \item{diss}{
    logical flag: if TRUE (default for \code{dist} or
    \code{dissimilarity} objects), then \code{x} will be considered as a
    dissimilarity matrix.  If FALSE, then \code{x} will be considered as
    a matrix of observations by variables.
  }
  \item{metric}{
    character string specifying the metric to be used for calculating
    dissimilarities between observations.\cr
    The currently available options are "euclidean" and
    "manhattan".  Euclidean distances are root sum-of-squares of
    differences, and manhattan distances are the sum of absolute
    differences.  If \code{x} is already a dissimilarity matrix, then
    this argument will be ignored.
  }
  \item{stand}{logical; if true, the measurements in \code{x} are
    standardized before calculating the dissimilarities.  Measurements
    are standardized for each variable (column), by subtracting the
    variable's mean value and dividing by the variable's mean absolute
    deviation.  If \code{x} is already a dissimilarity matrix, then this
    argument will be ignored.}
  \item{keep.diss, keep.data}{logicals indicating if the dissimilarities
    and/or input data \code{x} should be kept in the result.  Setting
    these to \code{FALSE} can give much smaller results and hence even save
    memory allocation \emph{time}.}
}
\value{
  an object of class \code{"diana"} representing the clustering.  See
  \code{?\link{diana.object}} for details.
}
\details{
\code{diana} is fully described in chapter 6 of Kaufman and Rousseeuw (1990).
It is probably unique in computing a divisive hierarchy, whereas most
other software for hierarchical clustering is agglomerative.
Moreover, \code{diana} provides (a) the divisive coefficient
(see \code{diana.object}) which measures the amount of clustering structure
found; and (b) the banner, a novel graphical display
(see \code{plot.diana}).

The \code{diana}-algorithm constructs a hierarchy of clusterings,
starting with one large
cluster containing all n observations. Clusters are divided until each cluster
contains only a single observation.\cr
At each stage, the cluster with the largest diameter is selected.
(The diameter of a cluster is the largest dissimilarity between any
two of its observations.)\cr
To divide the selected cluster, the algorithm first looks for its most
disparate observation (i.e., which has the largest average dissimilarity to the
other observations of the selected cluster). This observation initiates the
"splinter group". In subsequent steps, the algorithm reassigns observations
that are closer to the "splinter group" than to the "old party". The result
is a division of the selected cluster into two new clusters.
}
\seealso{
  \code{\link{agnes}} also for background and references;
  \code{\link{diana.object}}, \code{\link{daisy}}, \code{\link{dist}},
  \code{\link{plot.diana}}, \code{\link{twins.object}}.
}
\examples{
data(votes.repub)
dv <- diana(votes.repub, metric = "manhattan", stand = TRUE)
print(dv)
plot(dv)

data(agriculture)
## Plot similar to Figure 8 in ref
\dontrun{plot(diana(agriculture), ask = TRUE)}
\testonly{plot(diana(agriculture))}
}
\keyword{cluster}
% Converted by Sd2Rd version 0.3-2.

\eof
% %Z%%M% version %I% created %G%
% %Z%%Q%
\name{diana.object}
\alias{diana.object}
\title{Divisive Analysis (DIANA) Object}
\description{
  The objects of class \code{"diana"}
  represent a divisive hierarchical clustering of a dataset.
}
\section{GENERATION}{
  This class of objects is returned from \code{\link{diana}}.
}
\section{METHODS}{
  The \code{"diana"} class has methods for the following generic functions:
  \code{print}, \code{summary}, \code{plot}.
}
\section{INHERITANCE}{
  The class \code{"diana"} inherits from \code{"twins"}.
  Therefore, the generic function \code{pltree} can be used on a
  \code{diana} object, and an \code{\link{as.hclust}} method is
    available.
}
\value{
  A legitimate \code{diana} object is a list with the following components:
  \item{order}{
    a vector giving a permutation of the original observations to allow for
    plotting, in the sense that the branches of a clustering tree will
    not cross.
  }
  \item{order.lab}{
    a vector similar to \code{order}, but containing observation labels
    instead of observation numbers.  This component is only available if
    the original observations were labelled.
  }
  \item{height}{a vector with the diameters of the clusters prior to splitting.
  }
  \item{dc}{
    the divisive coefficient, measuring the clustering structure of the
    dataset.  For each observation i, denote by \eqn{d(i)} the diameter of the
    last cluster to which it belongs (before being split off as a single
    observation), divided by the diameter of the whole dataset.  The
    \code{dc} is the average of all \eqn{1 - d(i)}.  It can also be seen
    as the average width (or the percentage filled) of the banner plot.
    Because \code{dc} grows with the number of observations, this
    measure should not be used to compare datasets of very different
    sizes.
  }
  \item{merge}{
    an (n-1) by 2 matrix, where n is the number of
    observations. Row i of \code{merge} describes the split at step n-i of
    the clustering. If a number \eqn{j} in row r is negative, then the single
    observation \eqn{|j|} is split off at stage n-r. If j is positive, then the
    cluster that will be splitted at stage n-j (described by row j), is
    split off at stage n-r.
  }
  \item{diss}{
    an object of class \code{"dissimilarity"}, representing the total
    dissimilarity matrix of the dataset.
  }
  \item{data}{
    a matrix containing the original or standardized measurements, depending
    on the \code{stand} option of the function \code{agnes}.  If a
    dissimilarity matrix was given as input structure, then this component
    is not available.
  }
}
\seealso{
  \code{\link{agnes}}, \code{\link{diana}}, \code{\link{plot.diana}},
  \code{\link{twins.object}}.
}
\examples{
## really see example(diana) !   Additionally:
data(votes.repub)
dv0 <- diana(votes.repub, stand = TRUE)
## Cut into 2 groups:
dv2 <- cutree(as.hclust(dv0), k = 2)
table(dv2)
rownames(votes.repub)[dv2 == 1]
}
\keyword{cluster}
% Converted by Sd2Rd version 0.3-2.

\eof
\name{dissimilarity.object}
\alias{dissimilarity.object}
\title{Dissimilarity Matrix Object}
\description{
  Objects of class \code{"dissimilarity"} representing the dissimilarity
  matrix of a dataset.
}
\section{GENERATION}{
  \code{\link{daisy}} returns this class of objects.
  Also the functions \code{pam}, \code{clara}, \code{fanny},
  \code{agnes}, and \code{diana} return a \code{dissimilarity} object,
  as one component of their return objects.
}
\section{METHODS}{
  The \code{"dissimilarity"} class has methods for the following generic
  functions: \code{print}, \code{summary}.
}
\value{
  The dissimilarity matrix is symmetric, and hence its lower triangle
  (column wise) is represented as a vector to save storage space.
  If the object, is called \code{do}, and \code{n} the number of
  observations, i.e., \code{n <- attr(do, "Size")}, then
  for \eqn{i < j <= n}, the dissimilarity between (row) i and j is
  \code{do[n*(i-1) - i*(i-1)/2 + j-i]}.
  The length of the vector is \eqn{n*(n-1)/2}, i.e., of order \eqn{n^2}.

  \code{"dissimilarity"} objects also inherit from class
  \code{\link[mva]{dist}} and can use \code{dist} methods, in
  particular, \code{\link{as.matrix}}, such that \eqn{d_{ij}}{d(i,j)}
  from above is just \code{as.matrix(do)[i,j]}.

  The object has the following attributes:
  \item{Size}{the number of observations in the dataset.}
  \item{Metric}{the metric used for calculating the
    dissimilarities.  Possible values are "euclidean", "manhattan",
    "mixed" (if variables of different types were present in the
    dataset), and "unspecified".}
  \item{Labels}{optionally, contains the labels, if any, of the
    observations of the dataset.}
  \item{NA.message}{optionally, if a dissimilarity could not be
    computed, because of too many missing values for some observations
    of the dataset.}
  \item{Types}{when a mixed metric was used, the types for each
    variable as one-letter codes (as in the book, e.g. p.54):
    \describe{
      \item{A}{Asymmetric binary}
      \item{S}{Symmetric  binary}
      \item{N}{Nominal (factor)}
      \item{O}{Ordinal (ordered factor)}
      \item{I}{Interval scaled (numeric)}
      \item{T}{raTio to be log transformed (positive numeric)}
      }.}
}
\seealso{
  \code{\link{daisy}}, \code{\link{dist}},
  \code{\link{pam}}, \code{\link{clara}}, \code{\link{fanny}},
  \code{\link{agnes}}, \code{\link{diana}}.
}
%\examples{} --> ./daisy.Rd
\keyword{cluster}


\eof
\name{ellipsoidhull}
\alias{ellipsoidhull}
\alias{print.ellipsoid}
\title{Compute the Ellipsoid Hull or Spanning Ellipsoid of a Point Set}
\description{
  Compute the ``ellipsoid hull'' or ``spanning ellipsoid'', i.e. the
  ellipsoid of minimal volume (`area' in 2D) such that all given points
  lie just inside or on the boundary of the ellipsoid.
}
\usage{
ellipsoidhull(x, tol=0.01, maxit=5000,
              ret.wt = FALSE, ret.sqdist = FALSE, ret.pr = FALSE)
\method{print}{ellipsoid}(x, digits = max(1, getOption("digits") - 2), \dots)
}
\arguments{
  \item{x}{the \eqn{n} \eqn{p}-dimensional points  asnumeric
    \eqn{n\times p}{n x p} matrix.}
  \item{tol}{convergence tolerance for Titterington's algorithm.
    Setting this to much smaller values may drastically increase the number of
    iterations needed, and you may want to increas \code{maxit} as well.}
  \item{maxit}{integer giving the maximal number of iteration steps for
    the algorithm.}
  \item{ret.wt, ret.sqdist, ret.pr}{logicals indicating if additional
    information should be returned, \code{ret.wt} specifying the
    \emph{weights}, \code{ret.sqdist} the \emph{\bold{sq}uared
      \bold{dist}ances} and \code{ret.pr} the final \bold{pr}obabilities
    in the algorithms.}
  \item{digits,\dots}{the usual arguments to \code{\link{print}} methods.}
}
\details{
  The ``spanning ellipsoid'' algorithm is said to stem from
  Titterington(1976), in Pison et al(1999) who use it for
  \code{\link{clusplot.default}}.\cr
  The problem can be seen as a special case of the ``Min.Vol.''
  ellipsoid of which a more more flexible and general implementation is
  \code{\link[lqs]{cov.mve}} in the \code{lqs} package.
}
\value{
  an object of class \code{"ellipsoid"}, basically a \code{\link{list}}
  with several components, comprising at least
  \item{cov}{\eqn{p\times p}{p x p} \emph{covariance} matrix description
    the ellipsoid.}
  \item{loc}{\eqn{p}-dimensional location of the ellipsoid center.}
  \item{d2}{average squared radius.}
  \item{wt}{the vector of weights iff \code{ret.wt} was true.}
  \item{sqdist}{the vector of squared distances iff \code{ret.sqdist} was true.}
  \item{prob}{the vector of algorithm probabilities iff \code{ret.pr} was true.}
  \item{it}{number of iterations used.}
  \item{tol, maxit}{just the input argument, see above.}
  \item{eps}{the achieved tolerance which is the maximal squared radius
    minus \eqn{p}.}
  \item{ierr}{error code as from the algorithm; \code{0} means \emph{ok}.}
  \item{conv}{logical indicating if the converged.  This is defined as
    \code{it < maxit && ierr == 0}.}
}
\references{
  Pison, G., Struyf, A. and Rousseeuw, P.J. (1999)
  Displaying a Clustering with CLUSPLOT,
  \emph{Computational Statistics and Data Analysis}, \bold{30}, 381--392.\cr
  A version of this is available as technical report from
  \url{http://win-www.uia.ac.be/u/statis/abstract/Disclu99.htm}

  D.N. Titterington. (1976)
  Algorithms for computing {D}-optimal design on finite design spaces.  In
  \emph{Proc.\ of the 1976 Conf.\ on Information Science and Systems},
  213--216; John Hopkins University.
}

\author{Martin Maechler did the present class implementation; Rousseeuw
  et al did the underlying code.}
\seealso{\code{\link[base]{chull}} for the convex hull,
  \code{\link{clusplot}} which makes use of this; \code{\link[lqs]{cov.mve}}.}
\examples{
x <- rnorm(100)
xy <- unname(cbind(x, rnorm(100) + 2*x + 10))
exy <- ellipsoidhull(xy)
exy # >> calling print.ellipsoid()

plot(xy)
lines(predict(exy))
points(rbind(exy$loc), col = "red", cex = 3, pch = 13)

exy <- ellipsoidhull(xy, tol = 1e-7, ret.wt = TRUE, ret.sq = TRUE)
str(exy) # had small `tol', hence many iterations
(ii <- which(zapsmall(exy $ wt) > 1e-6)) # only about 4 to 6 points
round(exy$wt[ii],3); sum(exy$wt[ii]) # sum to 1
}
\keyword{dplot}
\keyword{hplot}% << ? chull has "hplot" as well.

\eof
\name{fanny}
\alias{fanny}
\title{Fuzzy Analysis Clustering}
\description{
  Computes a fuzzy clustering of the data into \code{k} clusters.
}
\usage{
fanny(x, k, diss = inherits(x, "dist"), metric = "euclidean", stand = FALSE)
}
\arguments{
  \item{x}{
    data matrix or data frame, or dissimilarity matrix, depending on the
    value of the \code{diss} argument.

    In case of a matrix or data frame, each row corresponds to an observation,
    and each column corresponds to a variable. All variables must be numeric.
    Missing values (NAs) are allowed.

    In case of a dissimilarity matrix, \code{x} is typically the output
    of \code{\link{daisy}} or \code{\link[mva]{dist}}.  Also a vector of
    length n*(n-1)/2 is allowed (where n is the number of observations),
    and will be interpreted in the same way as the output of the
    above-mentioned functions.  Missing values (NAs) are not allowed.
  }
  \item{k}{integer giving the desired number of clusters.  It is
    required that \eqn{0 < k < n/2} where \eqn{n} is the number of
    observations.}
  \item{diss}{
    logical flag: if TRUE (default for \code{dist} or
    \code{dissimilarity} objects), then \code{x} is assumed to be a
    dissimilarity matrix.  If FALSE, then \code{x} is treated as
    a matrix of observations by variables.
  }
  \item{metric}{
    character string specifying the metric to be used for calculating
    dissimilarities between observations.
    The currently available options are "euclidean" and "manhattan".
    Euclidean distances are root sum-of-squares of differences, and
    manhattan distances are the sum of absolute differences.
    If \code{x} is already a dissimilarity matrix, then this argument will
    be ignored.
  }
  \item{stand}{logical; if true, the measurements in \code{x} are
    standardized before calculating the dissimilarities.  Measurements
    are standardized for each variable (column), by subtracting the
    variable's mean value and dividing by the variable's mean absolute
    deviation.  If \code{x} is already a dissimilarity matrix, then this
    argument will be ignored.}
}
\value{
  an object of class \code{"fanny"} representing the clustering.
  See \code{\link{fanny.object}} for details.
}
\details{
  In a fuzzy clustering, each observation is ``spread out'' over the various
  clusters. Denote by u(i,v) the membership of observation i to cluster v.
  The memberships are nonnegative, and for a fixed observation i they sum to 1.
  The particular method \code{fanny} stems from chapter 4 of
  Kaufman and Rousseeuw (1990).\cr
  Compared to other fuzzy clustering methods, \code{fanny} has the following
  features: (a) it also accepts a dissimilarity matrix; (b) it is
  more robust to the \code{spherical cluster} assumption; (c) it provides
  a novel graphical display, the silhouette plot (see
  \code{\link{plot.partition}}).

  Fanny aims to minimize the objective function
  \deqn{\sum_{v=1}^k
    \frac{\sum_{i=1}^n\sum_{j=1}^n u_{iv}^2 u_{jv}^2 d(i,j)}{
      2 \sum_{j=1}^n u_{jv}^2}}{%
    SUM_[v=1..k] (SUM_(i,j) u(i,v)^2 u(j,v)^2 d(i,j)) / (2 SUM_j u(j,v)^2)}
  where \eqn{n} is the number of observations, \eqn{k} is the number of
  clusters and \eqn{d(i,j)} is the dissimilarity between observations
  \eqn{i} and \eqn{j}.
}
\seealso{
  \code{\link{agnes}} for background and references;
  \code{\link{fanny.object}}, \code{\link{partition.object}},
  \code{\link{plot.partition}}, \code{\link{daisy}}, \code{\link{dist}}.
}
\examples{
## generate 25 objects, divided into two clusters, and 3 objects lying
## between those clusters.
x <- rbind(cbind(rnorm(10, 0, 0.5), rnorm(10, 0, 0.5)),
           cbind(rnorm(15, 5, 0.5), rnorm(15, 5, 0.5)),
           cbind(rnorm( 3,3.5,0.5), rnorm( 3,3.5,0.5)))
fannyx <- fanny(x, 2)
fannyx
summary(fannyx)
plot(fannyx)

data(ruspini)
## Plot similar to Figure 6 in Stryuf et al (1996)
plot(fanny(ruspini, 5))
}
\keyword{cluster}


\eof
\name{fanny.object}
\alias{fanny.object}
\title{Fuzzy Analysis (FANNY) Object}
\description{
  The objects of class \code{"fanny"} represent a fuzzy clustering of a
  dataset.
}
\section{GENERATION}{
  These objects are returned from \code{\link{fanny}}.
}
\section{METHODS}{
  The \code{"fanny"} class has methods for the following generic functions:
  \code{print}, \code{summary}.
}
\section{INHERITANCE}{
  The class \code{"fanny"} inherits from \code{"partition"}.
  Therefore, the generic functions \code{plot} and \code{clusplot} can
  be used on a \code{fanny} object.
}
\value{
  A legitimate \code{fanny} object is a list with the following components:
  \item{membership}{
    matrix containing the memberships for each pair consisting of an
    observation and a cluster.
  }
  \item{coeff}{
    Dunn's partition coefficient \eqn{F(k)} of the clustering, where
    \eqn{k} is the number of clusters. \eqn{F(k)} is the sum of all
    squared membership coefficients, divided by the number of
    observations.  Its value is always between \eqn{1/k} and 1.

    The normalized form of the coefficient is also given. It is defined as
    \eqn{(F(k) - 1/k) / (1 - 1/k)}, and ranges between 0 and 1.
    A low value of Dunn's coefficient indicates a very fuzzy clustering,
    whereas a value close to 1 indicates a near-crisp clustering.
  }
  \item{clustering}{
    the clustering vector of the nearest crisp clustering, see
    \code{\link{partition.object}}.}
  \item{objective}{
    the objective function and the number of iterations the \code{fanny}
    algorithm needed to reach this minimal value.
  }
  \item{diss}{
    an object of class \code{"dissimilarity"}, see
    \code{\link{partition.object}}.}
  \item{call}{generating call, see \code{\link{partition.object}}.}
  \item{silinfo}{
    list with silhouette information of the nearest crisp clustering, see
    \code{\link{partition.object}}.}
  \item{data}{matrix, possibibly standardized, or NULL, see
    \code{\link{partition.object}}.}
}
\seealso{
  \code{\link{fanny}}, \code{\link{dissimilarity.object}},
  \code{\link{partition.object}}, \code{\link{plot.partition}}.
}
\keyword{cluster}


\eof
\name{flower}
\alias{flower}
\title{Flower Characteristics}
\usage{data(flower)}
\description{8 characteristics for 18 popular flowers.}
\format{
  A data frame with 18 observations on 8 variables:
  \tabular{rll}{
    [ , "V1"] \tab factor  \tab winters \cr
    [ , "V2"] \tab factor  \tab shadow \cr
    [ , "V3"] \tab factor  \tab tubers \cr
    [ , "V4"] \tab factor  \tab color \cr
    [ , "V5"] \tab ordered \tab soil \cr
    [ , "V6"] \tab ordered \tab preference \cr
    [ , "V7"] \tab numeric \tab height \cr
    [ , "V8"] \tab numeric \tab distance
  }

  \describe{
    \item{V1}{winters, is binary and indicates whether the plant may be left
      in the garden when it freezes.}

    \item{V2}{shadow, is binary and shows whether the plant needs to stand
      in the shadow.}

    \item{V3}{tubers, is asymmetric binary and distinguishes between plants
      with tubers and plants that grow in any other way.}

    \item{V4}{color, is nominal and specifies the flower's color (1 = white,
      2 = yellow, 3 = pink, 4 = red, 5 = blue).}

    \item{V5}{soil, is ordinal and indicates whether the plant grows in dry
      (1), normal (2), or wet (3) soil.}

    \item{V6}{preference, is ordinal and gives someone's preference ranking
      going from 1 to 18.}

    \item{V7}{height, is interval scaled, the plant's height in centimeters.}

    \item{V8}{distance, is interval scaled, the distance in centimeters that
      should be left between the plants.}
  }
}
\source{The reference below.}
\references{
  Anja Struyf, Mia Hubert & Peter J. Rousseeuw (1996):
  Clustering in an Object-Oriented Environment.
  \emph{Journal of Statistical Software}, \bold{1}.
  \url{http://www.stat.ucla.edu/journals/jss/}
}
\examples{
data(flower)
## Example 2 in ref
daisy(flower, type = list(asymm = 3))
daisy(flower, type = list(asymm = c(1, 3), ordratio = 7))
}
\keyword{datasets}

\eof
\name{lower.to.upper.tri.inds}
\alias{lower.to.upper.tri.inds}
\alias{upper.to.lower.tri.inds}
\title{Permute Indices for Triangular Matrices}
\description{
  Compute index vectors for extracting or reordering of lower or upper
  triangular matrices that are stored as contiguous vectors.
}
\usage{
lower.to.upper.tri.inds(n)
upper.to.lower.tri.inds(n)
}
\arguments{
  \item{n}{integer larger than 1.}
}
\value{
  integer vector containing a permutation of \code{1:N} where
  \eqn{N = n(n-1)/2}.
}
\note{
  these functions are mainly for internal use in the cluster package,
  and may not remain available (unless we see a good reason).
}
\seealso{\code{\link{upper.tri}}, \code{\link{lower.tri}} with a related
  purpose.}
\examples{
m5 <- matrix(NA,5,5)
m <- m5; m[lower.tri(m)] <- upper.to.lower.tri.inds(5); m
m <- m5; m[upper.tri(m)] <- lower.to.upper.tri.inds(5); m

stopifnot(lower.to.upper.tri.inds(2) == 1,
          lower.to.upper.tri.inds(3) == 1:3,
          upper.to.lower.tri.inds(3) == 1:3,
     sort(upper.to.lower.tri.inds(5)) == 1:10,
     sort(lower.to.upper.tri.inds(6)) == 1:15)
}
\keyword{array}
\keyword{utilities}

\eof
\name{mona}
\alias{mona}
\title{MONothetic Analysis Clustering of Binary Variables}
}
\description{
  Returns a list representing a divisive hierarchical clustering of
  a dataset with binary variables only.
}
\usage{
mona(x)
}
\arguments{
  \item{x}{
    data matrix or data frame in which each row corresponds to an
    observation, and each column corresponds to a variable.  All
    variables must be binary.  A limited number of missing values (NAs)
    is allowed. Every observation must have at least one value different
    from NA.  No variable should have half of its values missing. There
    must be at least one variable which has no missing values.  A
    variable with all its non-missing values identical, is not allowed.
  }
}
\value{
  an object of class \code{"mona"} representing the clustering.
  See \code{mona.object} for details.
}
\details{
\code{mona} is fully described in chapter 7 of Kaufman and Rousseeuw (1990).
It is "monothetic" in the sense that each division is based on a
single (well-chosen) variable, whereas most other hierarchical methods
(including \code{agnes} and \code{diana}) are "polythetic", i.e. they use
all variables together.

The \code{mona}-algorithm constructs a hierarchy of clusterings,
starting with one large
cluster. Clusters are divided until all observations in the same cluster have
identical values for all variables.\cr
At each stage, all clusters are divided according to the values of one
variable. A cluster is divided into one cluster with all observations having
value 1 for that variable, and another cluster with all observations having
value 0 for that variable.

The variable used for splitting a cluster is the variable with the maximal
total association to the other variables, according to the observations in the
cluster to be splitted. The association between variables f and g
is given by a(f,g)*d(f,g) - b(f,g)*c(f,g), where a(f,g), b(f,g), c(f,g),
and d(f,g) are the numbers in the contingency table of f and g.
[That is, a(f,g) (resp. d(f,g)) is the number of observations for which f and g
both have value 0 (resp. value 1); b(f,g) (resp. c(f,g)) is the number of
observations for which f has value 0 (resp. 1) and g has value 1 (resp. 0).]
The total association of a variable f is the sum of its associations to all
variables.

This algorithm does not work with missing values, therefore the data are
revised, e.g. all missing values are filled in. To do this, the same measure
of association between variables is used as in the algorithm. When variable
f has missing values, the variable g with the largest absolute association
to f is looked up. When the association between f and g is positive,
any missing value of f is replaced by the value of g for the same
observation. If the association between f and g is negative, then any missing
value of f is replaced by the value of 1-g for the same
observation.
}
\seealso{
  \code{\link{agnes}} for background and references;
  \code{\link{mona.object}}, \code{\link{plot.mona}}.
}
\examples{
data(animals)
ma <- mona(animals)
ma
## Plot similar to Figure 10 in Struyf et al (1996)
plot(ma)
}
\keyword{cluster}


\eof
\name{mona.object}
\alias{mona.object}
\title{Monothetic Analysis (MONA) Object}
\description{
  The objects of class \code{"mona"} represent the divisive
  hierarchical clustering of a dataset with only binary variables
  (measurements).   This class of objects is returned from
  \code{\link{mona}}.
}
\section{METHODS}{
  The \code{"mona"} class has methods for the following generic functions:
  \code{print}, \code{summary}, \code{plot}.
}
\value{
  A legitimate \code{mona} object is a list with the following components:

  \item{data}{
    matrix with the same dimensions as the original data matrix,
    but with factors coded as 0 and 1, and all missing values replaced.
  }
  \item{order}{
    a vector giving a permutation of the original observations to allow
    for plotting, in the sense that the branches of a clustering tree
    will not cross.
  }
  \item{order.lab}{
    a vector similar to \code{order}, but containing observation labels
    instead of observation numbers. This component is only available if
    the original observations were labelled.
  }
  \item{variable}{
    vector of length n-1 where n is the number of observations,
    specifying the variables used to separate the observations of \code{order}.
  }
  \item{step}{
    vector of length n-1 where n is the number of observations,
    specifying the separation steps at which the observations of
    \code{order} are separated.
  }
}
\seealso{\code{\link{mona}} for examples etc, \code{\link{plot.mona}}.
}
\keyword{cluster}


\eof
%-*- mode: Rd; kept-old-versions: 12;  kept-new-versions: 20; -*-
\name{pam}
\alias{pam}
\title{Partitioning Around Medoids}
\description{
  Partitioning (clustering) of the data into \code{k} clusters ``around
  medoids'', a more robust version of K-means.
}
\usage{
pam(x, k, diss = inherits(x, "dist"),
    metric = "euclidean", stand = FALSE, cluster.only = FALSE,
    keep.diss = !diss && !cluster.only && n < 100,
    keep.data = !diss && !cluster.only)
}
\arguments{
  \item{x}{
    data matrix or data frame, or dissimilarity matrix or object,
    depending on the value of the \code{diss} argument.

    In case of a matrix or data frame, each row corresponds to an
    observation, and each column corresponds to a variable.  All
    variables must be numeric.  Missing values (\code{\link{NA}}s)
    \emph{are} allowed---as long as every pair of observations has at
    least one case not missing.

    In case of a dissimilarity matrix, \code{x} is typically the output
    of \code{\link{daisy}} or \code{\link{dist}}.  Also a vector of
    length n*(n-1)/2 is allowed (where n is the number of observations),
    and will be interpreted in the same way as the output of the
    above-mentioned functions. Missing values (NAs) are \emph{not}
    allowed.
  }
  \item{k}{positive integer specifying the number of clusters, less than
    the number of observations.}
  \item{diss}{
    logical flag: if TRUE (default for \code{dist} or
    \code{dissimilarity} objects), then \code{x} will be considered as a
    dissimilarity matrix.  If FALSE, then \code{x} will be considered as
    a matrix of observations by variables.
  }
  \item{metric}{
    character string specifying the metric to be used for calculating
    dissimilarities between observations.\cr
    The currently available options are "euclidean" and
    "manhattan".  Euclidean distances are root sum-of-squares of
    differences, and manhattan distances are the sum of absolute
    differences.  If \code{x} is already a dissimilarity matrix, then
    this argument will be ignored.
  }
  \item{stand}{logical; if true, the measurements in \code{x} are
    standardized before calculating the dissimilarities.  Measurements
    are standardized for each variable (column), by subtracting the
    variable's mean value and dividing by the variable's mean absolute
    deviation.  If \code{x} is already a dissimilarity matrix, then this
    argument will be ignored.}
  \item{cluster.only}{logical; if true, only the clustering will be
    computed and returned, see details.}
  \item{keep.diss, keep.data}{logicals indicating if the dissimilarities
    and/or input data \code{x} should be kept in the result.  Setting
    these to \code{FALSE} can give much smaller results and hence even save
    memory allocation \emph{time}.}
}
\value{
  an object of class \code{"pam"} representing the clustering.  See
  \code{?\link{pam.object}} for details.
}
\details{
  \code{pam} is fully described in chapter 2 of Kaufman and Rousseeuw
  (1990).  Compared to the k-means approach in \code{kmeans}, the
  function \code{pam} has the following features: (a) it also accepts a
  dissimilarity matrix; (b) it is more robust because it minimizes a sum
  of dissimilarities instead of a sum of squared euclidean distances;
  (c) it provides a novel graphical display, the silhouette plot (see
  \code{plot.partition}) (d) it allows to select the number of clusters
  using \code{mean(\link{silhouette}(pr))} on the result
  \code{pr <- pam(..)}, or directly its component
  \code{pr$silinfo$avg.width}, see also \code{\link{pam.object}}.

  When \code{cluster.only} is true, the result is simply a (possibly
  named) integer vector specifying the clustering, i.e.,\cr
  \code{pam(x,k, cluster.only=TRUE)} is the same as \cr
  \code{pam(x,k)$clustering} but computed more efficiently.

  The \code{pam}-algorithm is based on the search for \code{k}
  representative objects or medoids among the observations of the
  dataset.  These observations should represent the structure of the
  data.  After finding a set of \code{k} medoids, \code{k} clusters are
  constructed by assigning each observation to the nearest medoid.  The
  goal is to find \code{k} representative objects which minimize the sum
  of the dissimilarities of the observations to their closest
  representative object.
  \cr
  The algorithm first looks for a good initial set of medoids (this is
  called the \bold{build} phase).  Then it finds a local minimum for the
  objective function, that is, a solution such that there is no single
  switch of an observation with a medoid that will decrease the
  objective (this is called the \bold{swap} phase).
}
\note{
  For datasets larger than (say) 200 observations, \code{pam} will take a lot of
  computation time.  Then the function \code{\link{clara}} is preferable.
}
\seealso{
  \code{\link{agnes}} for background and references;
  \code{\link{pam.object}}, \code{\link{clara}}, \code{\link{daisy}},
  \code{\link{partition.object}}, \code{\link{plot.partition}},
  \code{\link{dist}}.
}
\examples{
## generate 25 objects, divided into 2 clusters.
x <- rbind(cbind(rnorm(10,0,0.5), rnorm(10,0,0.5)),
           cbind(rnorm(15,5,0.5), rnorm(15,5,0.5)))
pamx <- pam(x, 2)
pamx
summary(pamx)
plot(pamx)

pam(daisy(x, metric = "manhattan"), 2, diss = TRUE)

data(ruspini)
## Plot similar to Figure 4 in Stryuf et al (1996)
\dontrun{plot(pam(ruspini, 4), ask = TRUE)}
\testonly{plot(pam(ruspini, 4))}
}
\keyword{cluster}

\eof
\name{pam.object}
\alias{pam.object}
\title{Partitioning Around Medoids (PAM) Object}
\description{
  The objects of class \code{"pam"} represent a partitioning of a
  dataset into clusters.
}
\section{GENERATION}{
  These objects are returned from \code{\link{pam}}.}
\section{METHODS}{
  The \code{"pam"} class has methods for the following generic functions:
  \code{print}, \code{summary}.
}
\section{INHERITANCE}{
  The class \code{"pam"} inherits from \code{"partition"}.
  Therefore, the generic functions \code{plot} and \code{clusplot} can
  be used on a \code{pam} object.
}
\value{
  A legitimate \code{pam} object is a list with the following components:
  \item{medoids}{
    the medoids or representative objects of the
    clusters. If a dissimilarity matrix was given as input to
    \code{pam}, then a vector of numbers or labels of observations is
    given, else \code{medoids} is a matrix with in each row the
    coordinates of one medoid.}
  \item{clustering}{the clustering vector, see \code{\link{partition.object}}.}
  \item{objective}{the objective function after the first and second
    step of the \code{pam} algorithm.}
  \item{isolation}{
    vector with length equal to the number of clusters, specifying which
    clusters are isolated clusters (L- or L*-clusters) and which clusters are
    not isolated.\cr
    A cluster is an L*-cluster iff its diameter is smaller than its
    separation.  A cluster is an L-cluster iff for each observation i
    the maximal dissimilarity between i and any other observation of the
    cluster is smaller than the minimal dissimilarity between i and any
    observation of another cluster.  Clearly each L*-cluster is also an
    L-cluster.
  }
  \item{clusinfo}{
    matrix, each row gives numerical information for one cluster. These
    are the cardinality of the cluster (number of observations), the
    maximal and average dissimilarity between the observations in the
    cluster and the cluster's medoid, %% FIXME: Now differs from clara.object.Rd:
    the diameter of the cluster
    (maximal dissimilarity between two observations of the cluster), and
    the separation of the cluster (minimal dissimilarity between an
    observation of the cluster and an observation of another cluster).
  }
  \item{silinfo}{list with silhouette width information, see
    \code{\link{partition.object}}.}
  \item{diss}{dissimilarity (maybe NULL), see \code{\link{partition.object}}.}
  \item{call}{generating call, see \code{\link{partition.object}}.}
  \item{data}{(possibibly standardized) see \code{\link{partition.object}}.}
}
\seealso{
  \code{\link{pam}}, \code{\link{dissimilarity.object}},
  \code{\link{partition.object}}, \code{\link{plot.partition}}.
}
\examples{
## Use the silhouette widths for assessing the best number of clusters,
## following a one-dimensional example from Christian Hennig :
##
x <- c(rnorm(50), rnorm(50,mean=5), rnorm(30,mean=15))
asw <- numeric(20)
## Note that "k=1" won't work!
for (k in 2:20)
  asw[k] <- pam(x, k) $ silinfo $ avg.width
k.best <- which.max(asw)
cat("silhouette-optimal number of clusters:", k.best, "\n")

plot(1:20, asw, type= "h", main = "pam() clustering assessment",
     xlab= "k  (# clusters)", ylab = "average silhouette width")
axis(1, k.best, paste("best",k.best,sep="\n"), col = "red", col.axis = "red")
}
\keyword{cluster}


\eof
\name{partition.object}
\alias{partition}% == class
\alias{partition.object}
\title{Partitioning Object}
\description{
  The objects of class \code{"partition"} represent a partitioning of a
  dataset into clusters.
}
\section{GENERATION}{
  These objects are returned from \code{pam}, \code{clara} or \code{fanny}.
}
\section{METHODS}{
  The \code{"partition"} class has a method for the following generic functions:
  \code{plot}, \code{clusplot}.
}
\section{INHERITANCE}{
  The following classes inherit from class \code{"partition"} :
  \code{"pam"}, \code{"clara"} and \code{"fanny"}.

  See \code{\link{pam.object}}, \code{\link{clara.object}} and
  \code{\link{fanny.object}} for details.
}
\value{a \code{"partition"} object is a list with the following
  (and typically more) components:
  \item{clustering}{
    the clustering vector.  An integer vector of length \eqn{n}, the number of
    observations, giving for each observation the number (`id') of the
    cluster to which it belongs.}
  \item{call}{the matched \code{\link{call}} generating the object.}
  \item{silinfo}{
    a list with all \emph{silhouette} information, only available when
    the number of clusters is non-trivial, i.e., \eqn{1 < k < n} and
    then has the following components, see \code{\link{silhouette}}
    \describe{
      \item{widths}{an (n x 3) matrix, as returned by
	\code{\link{silhouette}()}, with for each observation i the
	cluster to which i belongs, as well as the neighbor cluster of i
	(the cluster, not containing i, for which the average
	dissimilarity between its observations and i is minimal), and
	the silhouette width \eqn{s(i)} of the observation.
      }
      \item{clus.avg.widths}{the average silhouette width per cluster.}
      \item{avg.width}{the average silhouette width for the dataset, i.e.,
	simply the average of \eqn{s(i)} over all observations \eqn{i}.}
    }% describe
    This information is also needed to construct a \emph{silhouette plot} of
    the clustering, see \code{\link{plot.partition}}.

    Note that \code{avg.width} can be maximized over different
    clusterings (e.g. with varying number of clusters) to choose an
    \emph{optimal} clustering.%% see an example or a demo << FIXME >>
  }
  \item{objective}{value of criterion maximized during the
    partitioning algorithm, may more than one entry for different stages.}
  \item{diss}{
    an object of class \code{"dissimilarity"}, representing the total
    dissimilarity matrix of the dataset (or relevant subset, e.g. for
    \code{clara}).
  }
  \item{data}{
    a matrix containing the original or standardized data.  This might
    be missing to save memory or when a dissimilarity matrix was given
    as input structure to the clustering method.
  }
}
\seealso{\code{\link{pam}}, \code{\link{clara}}, \code{\link{fanny}}.
}
\keyword{cluster}

\eof
\name{plot.agnes}
%% almost identical to ./plot.diana.Rd and quite similar to ./plot.mona.Rd
\alias{plot.agnes}
\title{Plots of an Agglomerative Hierarchical Clustering}
\description{
  Creates plots for visualizing an \code{agnes} object.
}
\usage{
\method{plot}{agnes}(x, ask = FALSE, which.plots = NULL, main = NULL,
           sub = paste("Agglomerative Coefficient = ",round(x$ac, digits = 2)),
           adj = 0, nmax.lab = 35, max.strlen = 5, xax.pretty = TRUE, \dots)
}
\arguments{
  \item{x}{an object of class \code{"agnes"}, typically created by
    \code{\link{agnes}(.)}.}
  \item{ask}{logical; if true and \code{which.plots} is \code{NULL},
    \code{plot.agnes} operates in interactive mode, via \code{\link{menu}}.}
  \item{which.plots}{integer vector or NULL (default), the latter
    producing both plots.  Otherwise, \code{which.plots}
    must contain integers of \code{1} for a \emph{banner} plot or \code{2} for a
    dendrogramm or ``clustering tree''.}
  \item{main, sub}{main and sub title for the plot, with convenient
    defaults.  See documentation for these arguments in \code{\link{plot.default}}.}
  \item{adj}{for label adjustment in \code{\link{bannerplot}()}.}
  \item{nmax.lab}{integer indicating the number of labels which is
    considered too large for single-name labelling the banner plot.}
  \item{max.strlen}{positive integer giving the length to which
    strings are truncated in banner plot labeling.}
  \item{xax.pretty}{logical or integer indicating if
    \code{\link{pretty}(*, n = xax.pretty)} should be used for the x axis.
    \code{xax.pretty = FALSE} is for back compatibility.}
  \item{\dots}{graphical parameters (see \code{\link{par}}) may also
    be supplied and are passed to \code{\link{bannerplot}()} or
    \code{\link{pltree}()}, respectively.}
}
\section{Side Effects}{
  Appropriate plots are produced on the current graphics device. This can
  be one or both of the following choices:
  \cr Banner
  \cr Clustering tree
}
\details{
  When \code{ask = TRUE}, rather than producing each plot sequentially,
  \code{plot.agnes} displays a menu listing all the plots that can be produced.
  If the menu is not desired but a pause between plots is still wanted
  one must set \code{par(ask= TRUE)} before invoking the plot command.

  The banner displays the hierarchy of clusters, and is equivalent to a tree.
  See Rousseeuw (1986) or chapter 5 of Kaufman and Rousseeuw (1990).
  The banner plots distances at which observations and clusters are merged.
  The observations are listed in the order found by the \code{agnes} algorithm,
  and the numbers in the \code{height} vector are represented as bars
  between the observations.

  The leaves of the clustering tree are the original observations.  Two
  branches come together at the distance between the two clusters being merged.

  For more customization of the plots, rather call
  \code{\link{bannerplot}} and \code{\link{pltree}} directly with
  corresponding arguments, e.g., \code{xlab} or \code{ylab}.
}
\note{
  In the banner plot, observation labels are only printed when the
  number of observations is limited less than \code{nmax.lab} (35, by
  default), for readability.  Moreover, observation labels are truncated
  to maximally \code{max.strlen} (5) characters.
}
\references{
  Kaufman, L. and Rousseeuw, P.J. (1990)
  \emph{Finding Groups in Data: An Introduction to Cluster Analysis}.
  Wiley, New York.

  Rousseeuw, P.J. (1986). A visual display for hierarchical classification,
  in \emph{Data Analysis and Informatics 4}; edited by E. Diday,
  Y. Escoufier, L. Lebart, J. Pages, Y. Schektman, and R. Tomassone.
  North-Holland, Amsterdam, 743--748.

  Struyf, A., Hubert, M. and Rousseeuw, P.J. (1997)
  Integrating Robust Clustering Techniques in S-PLUS,
  \emph{Computational Statistics and Data Analysis}, \bold{26}, 17-37.
}
\seealso{
  \code{\link{agnes}} and \code{\link{agnes.object}};
  \code{\link{bannerplot}}, \code{\link{pltree}}, and \code{\link{par}}.
}
\examples{
## Can also pass `labels' to pltree() and bannerplot():
data(iris)
cS <- as.character(Sp <- iris$Species)
cS[Sp == "setosa"] <- "S"
cS[Sp == "versicolor"] <- "V"
cS[Sp == "virginica"] <- "g"
ai <- agnes(iris[, 1:4])
plot(ai, labels = cS, nmax = 150)# bannerplot labels are mess
}
\keyword{cluster}
\keyword{hplot}

\eof
\name{plot.diana}
%% almost identical to ./plot.agnes.Rd and quite similar to ./plot.mona.Rd
\alias{plot.diana}
\title{Plots of a Divisive Hierarchical Clustering}
\description{
  Creates plots for visualizing a \code{diana} object.
}
\usage{
\method{plot}{diana}(x, ask = FALSE, which.plots = NULL, main = NULL,
           sub = paste("Divisive Coefficient = ", round(x$dc, digits = 2)),
           adj = 0, nmax.lab = 35, max.strlen = 5, xax.pretty = TRUE, \dots)
}
\arguments{
  \item{x}{an object of class \code{"diana"}, typically created by
    \code{\link{diana}(.)}.}
  \item{ask}{logical; if true and \code{which.plots} is \code{NULL},
    \code{plot.diana} operates in interactive mode, via \code{\link{menu}}.}
  \item{which.plots}{integer vector or NULL (default), the latter
    producing both plots.  Otherwise, \code{which.plots}
    must contain integers of \code{1} for a \emph{banner} plot or \code{2} for a
    dendrogramm or ``clustering tree''.}
  \item{main, sub}{main and sub title for the plot, each with a convenient
    default.  See documentation for these arguments in
    \code{\link{plot.default}}.}
  \item{adj}{for label adjustment in \code{\link{bannerplot}()}.}
  \item{nmax.lab}{integer indicating the number of labels which is
    considered too large for single-name labelling the banner plot.}
  \item{max.strlen}{positive integer giving the length to which
    strings are truncated in banner plot labeling.}
  \item{xax.pretty}{logical or integer indicating if
    \code{\link{pretty}(*, n = xax.pretty)} should be used for the x axis.
    \code{xax.pretty = FALSE} is for back compatibility.}
  \item{\dots}{graphical parameters (see \code{\link{par}}) may also
    be supplied and are passed to \code{\link{bannerplot}()} or
    \code{\link{pltree}()}, respectively.}
}
\section{Side Effects}{
  An appropriate plot is produced on the current graphics device. This can
  be one or both of the following choices:
  \cr Banner
  \cr Clustering tree
}
\details{
When \code{ask = TRUE}, rather than producing each plot sequentially,
\code{plot.diana} displays a menu listing all the plots that can be produced.
If the menu is not desired but a pause between plots is still wanted
one must set \code{par(ask= TRUE)} before invoking the plot command.

The banner displays the hierarchy of clusters, and is equivalent to a tree.
See Rousseeuw (1986) or chapter 6 of Kaufman and Rousseeuw (1990).
The banner plots the diameter of each cluster being splitted.
The observations are listed in the order found by the \code{diana}
algorithm, and the numbers in the \code{height} vector are represented
as bars between the observations.

The leaves of the clustering tree are the original observations.
A branch splits up at the diameter of the cluster being splitted.
}
\note{
  In the banner plot,
  observation labels are only printed when the number of observations is
  limited less than \code{nmax.lab} (35, by default), for readability.
  Moreover, observation labels are truncated to maximally
  \code{max.strlen} (5) characters.
}
\references{see those in \code{\link{plot.agnes}}.}
\seealso{
  \code{\link{diana}}, \code{\link{diana.object}},
  \code{\link{twins.object}}, \code{\link{par}}.
}
\examples{
example(diana)# -> dv <- diana(....)

plot(dv, which = 1, nmax.lab = 100)

## wider labels :
op <- par(mar = par("mar") + c(0, 2, 0,0))
plot(dv, which = 1, nmax.lab = 100, max.strlen = 12)
par(op)
}
\keyword{cluster}
\keyword{hplot}


\eof
\name{plot.mona}
\alias{plot.mona}
\title{Banner of Monothetic Divisive Hierarchical Clusterings}
\description{
  Creates the banner of a \code{mona} object.
}
\usage{
\method{plot}{mona}(x, main = paste("Banner of ", deparse(x$call)),
          sub = NULL, xlab = "Separation step",
	  col = c(2,0), axes = TRUE, adj = 0,
          nmax.lab = 35, max.strlen = 5, \dots)
}
\arguments{
  \item{x}{an object of class \code{"mona"}, typically created by
    \code{\link{mona}(.)}.}
  \item{main,sub}{main and sub titles for the plot, with convenient
    defaults.  See documentation in \code{\link{plot.default}}.}
  \item{xlab}{x axis label, see \code{\link{title}}.}
  \item{col,adj}{graphical parameters passed to \code{\link{bannerplot}()}.}
  \item{axes}{logical, indicating if (labeled) axes should be drawn.}
  \item{nmax.lab}{integer indicating the number of labels which is
    considered too large for labeling.}
  \item{max.strlen}{positive integer giving the length to which
    strings are truncated in labeling.}
  \item{\dots}{further graphical arguments are passed to
    \code{\link{bannerplot}()} and \code{\link{text}}.}
}
\section{Side Effects}{
  A banner is plotted on the current graphics device.
}
\details{
  Plots the separation step at which clusters are splitted.  The
  observations are given in the order found by the \code{mona}
  algorithm, the numbers in the \code{step} vector are represented as
  bars between the observations.

  When a long bar is drawn between two observations,
  those observations have the same value for each variable.
  See chapter 7 of Kaufman and Rousseeuw (1990).
}
\note{
  In the banner plot,
  observation labels are only printed when the number of observations is
  limited less than \code{nmax.lab} (35, by default), for readability.
  Moreover, observation labels are truncated to maximally
  \code{max.strlen} (5) characters.
}
\references{see those in \code{\link{plot.agnes}}.}
\seealso{
  \code{\link{mona}}, \code{\link{mona.object}}, \code{\link{par}}.
}
\keyword{cluster}
\keyword{hplot}
% Converted by Sd2Rd version 0.3-2.

\eof
\name{plot.partition}
\alias{plot.partition}
\title{Plot of a Partition of the Data Set}
\description{Creates plots for visualizing a \code{partition} object.}
\usage{
\method{plot}{partition}(x, ask = FALSE, which.plots = NULL,
               nmax.lab = 40, max.strlen = 5, data = x$data, dist = NULL,
	       cor = TRUE, stand = FALSE, lines = 2,
	       shade = FALSE, color = FALSE, labels = 0, plotchar = TRUE,
	       span = TRUE, xlim = NULL, ylim = NULL, main = NULL, \dots)
}
\arguments{
  \item{x}{an object of class \code{"partition"}, typically created by the
    functions \code{\link{pam}}, \code{\link{clara}}, or \code{\link{fanny}}.}
  \item{ask}{logical; if true and \code{which.plots} is \code{NULL},
    \code{plot.partition} operates in interactive mode, via \code{\link{menu}}.}
  \item{which.plots}{integer vector or NULL (default), the latter
    producing both plots.  Otherwise, \code{which.plots} must contain
    integers of \code{1} for a \emph{clusplot} or \code{2} for
    \emph{silhouette}.}
  \item{nmax.lab}{integer indicating the number of labels which is
    considered too large for single-name labeling the silhouette plot.}
  \item{max.strlen}{positive integer giving the length to which
    strings are truncated in silhouette plot labeling.}
  \item{data}{numeric matrix with the scaled data; per default taken
    from the partition object \code{x}, but can be specified explicitly.}
  \item{dist}{when \code{x} does not have a \code{diss} component as for
    \code{\link{pam}(*, keep.diss=FALSE)}, \code{dist} must be the
    dissimilarity if a clusplot is desired.}
  \item{cor,stand,lines,shade,color,labels,plotchar,span,xlim,ylim,main, \dots}{
    All optional arguments available for the \code{\link{clusplot.default}}
    function (except for the \code{diss} one) and graphical parameters
    (see \code{\link{par}}) may also be supplied as arguments to this function.}
}
\section{Side Effects}{
  An appropriate plot is produced on the current graphics device.  This
  can be one or both of the following choices:
 \cr Clusplot
 \cr Silhouette plot
}
\details{
  When \code{ask= TRUE}, rather than producing each plot sequentially,
  \code{plot.partition} displays a menu listing all the plots that can
  be produced.
  If the menu is not desired but a pause between plots is still wanted,
  call \code{par(ask= TRUE)} before invoking the plot command.

  The \emph{clusplot} of a cluster partition consists of a two-dimensional
  representation of the observations, in which the clusters are
  indicated by ellipses (see \code{\link{clusplot.partition}} for more
  details).

  The \emph{silhouette plot} of a nonhierarchical clustering is fully
  described in Rousseeuw (1987) and in chapter 2 of Kaufman and
  Rousseeuw (1990).
  For each observation i, a bar is drawn, representing its silhouette
  width s(i), see \code{\link{silhouette}} for details.
  Observations are grouped per cluster, starting with cluster 1 at the
  top.  Observations with a large s(i) (almost 1) are very well
  clustered, a small s(i) (around 0) means that the observation lies
  between two clusters, and observations with a negative s(i) are
  probably placed in the wrong cluster.

  A clustering can be performed for several values of \code{k} (the number of
  clusters).  Finally, choose the value of \code{k} with the largest overall
  average silhouette width.
}
\note{
  In the silhouette plot, observation labels are only printed when the
  number of observations is less than \code{nmax.lab} (40, by default),
  for readability.  Moreover, observation labels are truncated to
  maximally \code{max.strlen} (5) characters.  \cr
  For more flexibility, use \code{plot(silhouette(x), ...)}, see
  \code{\link{plot.silhouette}}.
}
\references{
  Rousseeuw, P.J. (1987)
  Silhouettes: A graphical aid to the interpretation and validation of
  cluster analysis. \emph{J. Comput. Appl. Math.}, \bold{20}, 53--65.

  Further, the references in \code{\link{plot.agnes}}.
}
\seealso{
  \code{\link{partition.object}}, \code{\link{clusplot.partition}},
  \code{\link{clusplot.default}}, \code{\link{pam}},
  \code{\link{pam.object}}, \code{\link{clara}},
  \code{\link{clara.object}}, \code{\link{fanny}},
  \code{\link{fanny.object}}, \code{\link{par}}.
}
\examples{
## generate 25 objects, divided into 2 clusters.
x <- rbind(cbind(rnorm(10,0,0.5), rnorm(10,0,0.5)),
           cbind(rnorm(15,5,0.5), rnorm(15,5,0.5)))
plot(pam(x, 2))

## Save space not keeping data in clus.object, and still clusplot() it:
data(xclara)
cx <- clara(xclara, 3, keep.data = FALSE)
cx$data # is NULL
plot(cx, data = xclara)
}
\keyword{cluster}
\keyword{hplot}
% Converted by Sd2Rd version 0.3-2.

\eof
\name{pltree}
\alias{pltree}
\title{Clustering Trees - Generic Function}
\description{
  Generic function drawing a clustering tree (``dendrogram'') on the
  current graphics device.
  There is Functions with names beginning in \code{pltree.} will be
  methods for this function.
}
\usage{
pltree(x, \dots)
}
\arguments{
  \item{x}{an \R object (for which a \code{pltree} method is defined).}
  \item{...}{methods may have additional arguments.  Graphical
    parameters (see \code{\link{par}}) may also be supplied as arguments
    to this function.}
}
\section{Side Effects}{
  a clustering tree is created on the current graphics device.
}
\seealso{
  \code{\link{twins.object}}, \code{\link{agnes}}, \code{\link{diana}},
  \code{\link{par}}.
}
\keyword{cluster}
\keyword{hplot}


\eof
\name{pltree.twins}
\alias{pltree.twins}
\title{Clustering Tree of a Hierarchical Clustering}
\description{
  Draws a clustering tree (dendrogramm) of a \code{twins} object, i.e.,
  hierarchical clustering, typically resulting from \code{agnes} or
  \code{diana}.
}
\usage{
\method{pltree}{twins}(x, main = paste("Dendrogram of ", deparse(x$call)),
             labels = NULL, ylab = "Height", \dots)
}
\arguments{
  \item{x}{an object of class \code{"twins"}, typically created by either
    \code{\link{agnes}()} or \code{\link{diana}()}.}
  \item{main}{main title with a sensible default.}
  \item{labels}{labels to use; the default is constructed from \code{x}.}
  \item{ylab}{label for y-axis.}
  \item{\dots}{
    Graphical parameters (see \code{\link{par}}) may also
    be supplied as arguments to this function.}
}
\value{
  a NULL value is returned.
}
\details{
  Creates a plot of a clustering tree given a \code{twins} object.  The
  leaves of the tree are the original observations.  In case of an
  agglomerative clustering, two branches come together at the distance
  between the two clusters being merged.  For a divisive clustering, a
  branch splits up at the diameter of the cluster being splitted.
}
\seealso{
  \code{\link{agnes}}, \code{\link{agnes.object}}, \code{\link{diana}},
  \code{\link{diana.object}}, \code{\link{hclust}}, \code{\link{par}},
  \code{\link{plot.agnes}}, \code{\link{plot.diana}}.
}
\examples{
}
\keyword{cluster}
\keyword{hplot}




\eof
\name{pluton}
\alias{pluton}
\title{Isotopic Composition Plutonium Batches}
\usage{data(pluton)}
\description{
  The \code{pluton} data frame has 45 rows and 4 columns,
  containing percentages of isotopic composition of 45 Plutonium
  batches.
}
\format{
  This data frame contains the following columns:
  \describe{
    \item{Pu238}{the percentages of \eqn{\ ^{238}Pu}{(238)Pu},
      always less than 2 percent.}
    \item{Pu239}{the percentages of \eqn{\ ^{239}Pu}{(239)Pu},
      typically between 60 and 80 percent (from neutron capture of Uranium,
      \eqn{\ ^{238}U}{(238)U}).}
    \item{Pu240}{percentage of the plutonium 240 isotope.}
    \item{Pu241}{percentage of the plutonium 241 isotope.}
  }
}
\details{
  Note that the percentage of plutonium~242 can be computed from the
  other four percentages, see the examples.

  In the reference below it is explained why it is very desirable to
  combine these plutonium patches in three groups of similar size.
}
\source{
  Available as \file{pluton.dat} in the archive
  \url{http://win-www.uia.ac.be/u/statis/datasets/clusplot-examples.tar.gz}
}
\references{
  Rousseeuw, P.J. and Kaufman, L and Trauwaert, E. (1996)
  Fuzzy clustering using scatter matrices,
  \emph{Computational Statistics and Data Analysis} \bold{23}(1), 135--151.
}
\examples{
data(pluton)

hist(apply(pluton,1,sum), col = "gray") # between 94\% and 100\%
pu5 <- pluton
pu5$Pu242 <- 100 - apply(pluton,1,sum) # the remaining isotope.
pairs(pu5)
}
\keyword{datasets}

\eof
\name{predict.ellipsoid}
\alias{predict.ellipsoid}
\alias{ellipsoidPoints}
\title{Predict Method for Ellipsoid Objects}
\description{
  Compute points on the ellipsoid boundary, mostly for drawing.
}
\usage{
\method{predict}{ellipsoid}(object, n.out=201, \dots)
ellipsoidPoints(A, d2, loc, n=201)
}
\arguments{
  \item{object}{an object of class \code{ellipsoid}, typically from
    \code{\link{ellipsoidhull}()}.}
  \item{n.out,n}{the number of points to create.}
  \item{A, d2, loc}{arguments of the auxilary \code{ellipsoidPoints}, see below.}
  \item{\dots}{passed to and from methods.}
}
\details{
   Note \code{ellipsoidPoints} is the workhorse function of the
   \code{predict.ellipsoid} method; the class of \code{object} is not
   checked; it must solely have valid components \code{loc} (length \eqn{p}),
   the \eqn{p \times p}{p x p} matrix \code{cov} (corresponding to
   \code{A}) and \code{d2} for the center, the shape (``covariance'')
   matrix and the squared average radius or distance.
}
\value{
  a numeric matrix of dimension \code{n.out} times \eqn{p}.
}
\seealso{\code{\link{ellipsoidhull}}, also for examples.}
\keyword{dplot}
\keyword{utilities}

\eof
\name{print.agnes}
\alias{print.agnes}
\title{Print Method for AGNES Objects}
\description{
  Prints the call, agglomerative coefficient, ordering of objects and
  distances between merging clusters (`Height') of an \code{agnes} object.

  This is a method for the generic \code{\link{print}()} function for objects
  inheriting from class \code{agnes}, see \code{\link{agnes.object}}.
}
\usage{
\method{print}{agnes}(x, \dots)
}
\arguments{
  \item{x}{an agnes object.}
  \item{\dots}{potential further arguments (required by generic).}
}
\seealso{
  \code{\link{summary.agnes}} producing more output;
  \code{\link{agnes}}, \code{\link{agnes.object}}, \code{\link{print}},
  \code{\link{print.default}}.
}
\keyword{cluster}
\keyword{print}

\eof
\name{print.clara}
\alias{print.clara}
\title{Print Method for CLARA Objects}
\description{
  Prints the best sample, medoids, clustering vector and objective function
  of \code{clara} object.

  This is a method for the function \code{\link{print}()} for objects
  inheriting from class \code{\link{clara}}.
}
\usage{
\method{print}{clara}(x, \dots)
}
\arguments{
  \item{x}{a clara object.}
  \item{\dots}{potential further arguments (require by generic).}
}
\seealso{
  \code{\link{summary.clara}} producing more output;
  \code{\link{clara}}, \code{\link{clara.object}}, \code{\link{print}},
  \code{\link{print.default}}.
}
\keyword{cluster}
\keyword{print}
% Converted by Sd2Rd version 0.3-2.

\eof
\name{print.diana}
\alias{print.diana}
\title{Print Method for DIANA Objects}
\description{
  Prints the ordering of objects, diameters of splitted clusters,
  and divisive coefficient of a \code{diana} object.

  This is a method for the function \code{\link{print}()} for objects
  inheriting from class \code{\link{diana}}.
}
\usage{
\method{print}{diana}(x, \dots)
}
\arguments{
  \item{x}{a diana object.}
  \item{\dots}{potential further arguments (require by generic).}
}
\seealso{
  \code{\link{diana}}, \code{\link{diana.object}}, \code{\link{print}},
  \code{\link{print.default}}.
}
\keyword{cluster}
\keyword{print}


\eof
\title{Print and Summary Methods for Dissimilarity Objects}
\name{print.dissimilarity}
\alias{print.dissimilarity}
\alias{summary.dissimilarity}
\alias{print.summary.dissimilarity}
\description{
  Print or summarize the distances and the attributes of a
  \code{dissimilarity} object.

  These are methods for the functions \code{print()} and \code{summary()} for
  \code{dissimilarity} objects.  See \code{print}, \code{print.default},
  or \code{summary} for the general behavior of these.
}
\usage{
\method{print}{dissimilarity}(x, \dots)
\method{summary}{dissimilarity}(object, \dots)
\method{print}{summary.dissimilarity}(x, \dots)
}
\arguments{
  \item{x, object}{a \code{dissimilarity} object or a
    \code{summary.dissimilarity} one for \code{print.summary.dissimilarity()}.}
  \item{\dots}{potential further arguments (require by generic).}
}
\seealso{
  \code{\link{daisy}}, \code{\link{dissimilarity.object}},
  \code{\link{print}}, \code{\link{print.default}}.
}
\examples{
 ## See  example(daisy)

 sd <- summary(daisy(matrix(rnorm(100), 20,5)))
 sd # -> print.summary.dissimilarity(.)
 str(sd)
}
\keyword{cluster}
\keyword{print}



\eof
\name{print.fanny}
\alias{print.fanny}
\title{Print Method for FANNY Objects}
\description{
  Prints the objective function, membership coefficients and clustering vector
  of \code{fanny} object.

  This is a method for the function \code{\link{print}()} for objects
  inheriting from class \code{\link{fanny}}.
}
\usage{
\method{print}{fanny}(x, \dots)
}
\arguments{
  \item{x}{a fanny object.}
  \item{\dots}{potential further arguments (require by generic).}
}
\seealso{
  \code{\link{fanny}}, \code{\link{fanny.object}}, \code{\link{print}},
  \code{\link{print.default}}.
}
\keyword{cluster}
\keyword{print}

\eof
\name{print.mona}
\alias{print.mona}
\title{Print Method for MONA Objects}
\description{
  Prints the ordering of objects, separation steps, and used variables
  of a \code{mona} object.

  This is a method for the function \code{\link{print}()} for objects
  inheriting from class \code{\link{mona}}.
}
\usage{
\method{print}{mona}(x, \dots)
}
\arguments{
  \item{x}{a mona object.}
  \item{\dots}{potential further arguments (require by generic).}
}
\seealso{
  \code{\link{mona}}, \code{\link{mona.object}}, \code{\link{print}},
  \code{\link{print.default}}.
}
\keyword{cluster}
\keyword{print}

\eof
\name{print.pam}
\alias{print.pam}
\title{Print Method for PAM Objects}
\description{
  Prints the medoids, clustering vector and objective function
  of \code{pam} object.

  This is a method for the function \code{\link{print}()} for objects
  inheriting from class \code{\link{pam}}.
}
\usage{
\method{print}{pam}(x, \dots)
}
\arguments{
  \item{x}{a pam object.}
  \item{\dots}{potential further arguments (require by generic).}
}
\seealso{
  \code{\link{pam}}, \code{\link{pam.object}}, \code{\link{print}},
  \code{\link{print.default}}.
}
\keyword{cluster}
\keyword{print}


\eof
\name{ruspini}
\alias{ruspini}
\title{Ruspini Data}
\usage{data(ruspini)}
\description{
  The Ruspini data set, consisting of 75 points in four groups that is
  popular for illustrating clustering techniques.
}
\format{
  A data frame with 75 observations on 2 variables giving the x and y
  coordinates of the points, respectively.
}
\source{
  E. H. Ruspini (1970):
  Numerical methods for fuzzy clustering.
  \emph{Inform. Sci.}, \bold{2}, 319--350.
}
\references{
  Anja Struyf, Mia Hubert & Peter J. Rousseeuw (1996):
  Clustering in an Object-Oriented Environment.
  \emph{Journal of Statistical Software}, \bold{1}.
  \url{http://www.stat.ucla.edu/journals/jss/}
}
\examples{
data(ruspini)

## Plot similar to Figure 4 in Stryuf et al (1996)
\dontrun{plot(pam(ruspini, 4), ask = TRUE)}
\testonly{plot(pam(ruspini, 4))}

## Plot similar to Figure 6 in Stryuf et al (1996)
plot(fanny(ruspini, 5))
}
\keyword{datasets}

\eof
\name{silhouette}
\alias{silhouette}
\alias{silhouette.default}
\alias{silhouette.partition}
\alias{sortSilhouette}
\alias{summary.silhouette}
\alias{print.summary.silhouette}
\alias{plot.silhouette}
\title{Compute or Extract Silhouette Information from Clustering}
\description{
  Compute silhouette information according to a given clustering in
  \eqn{k} clusters.
}
\usage{
silhouette(x, \dots)
\method{silhouette}{default}  (x, dist, dmatrix, \dots)
\method{silhouette}{partition}(x, \dots)

sortSilhouette(object, \dots)
\method{summary}{silhouette}(object, FUN = mean, \dots)
\method{plot}{silhouette}(x, nmax.lab = 40, max.strlen = 5,
     main = NULL, sub = NULL, xlab = expression("Silhouette width "* s[i]),
     col = "gray",  do.col.sort = length(col) > 1, border = 0,
     cex.names = par("cex.axis"), do.n.k = TRUE, do.clus.stat = TRUE, \dots)
}
\arguments{
  \item{x}{an object of appropriate class; for the \code{default}
    method an integer vector with \eqn{k} different integer cluster
    codes or a list with such an \code{x$clustering}
    component.  Note that silhouette statistics are only defined if
    \eqn{2 \le k \le n-1}{2 <= k <= n-1}.}
  \item{dist}{a dissimilarity object inheriting from class
    \code{\link{dist}} or coercible to one.  If not specified,
    \code{dmatrix} must be.}
  \item{dmatrix}{a symmetric dissimilarity matrix (\eqn{n \times n}{n * n}),
    specified instead of \code{dist}, which can be more efficient.}
  \item{object}{an object of class \code{silhouette}.}
  \item{\dots}{further arguments passed to and from methods.}
  \item{FUN}{function used summarize silhouette widths.}
  \item{nmax.lab}{integer indicating the number of labels which is
    considered too large for single-name labeling the silhouette plot.}
  \item{max.strlen}{positive integer giving the length to which
    strings are truncated in silhouette plot labeling.}
  \item{main, sub, xlab}{arguments to \code{\link{title}}; have a
    sensible non-NULL default here.}
  \item{col, border, cex.names}{arguments passed
    \code{\link{barplot}()}; note that the default used to be \code{col
      = heat.colors(n), border = par("fg")} instead.\cr
    \code{col} can also be a color vector of length \eqn{k} for
    clusterwise coloring, see also \code{do.col.sort}:
  }
  \item{do.col.sort}{logical indicating if the colors \code{col} should
    be sorted ``along'' the silhouette; this is useful for casewise or
    clusterwise coloring.}
  \item{do.n.k}{logical indicating if \eqn{n} and \eqn{k} ``title text''
    should be written.}
  \item{do.clus.stat}{logical indicating if cluster size and averages
    should be written right to the silhouettes.}
}
\details{
    For each observation i, the \emph{silhouette width} \eqn{s(i)} is
    defined as follows: \cr
    Put a(i) = average dissimilarity between i and all other points of the
    cluster to which i belongs.  For all \emph{other} clusters C, put
    \eqn{d(i,C)} = average dissimilarity of i to all observations of C.  The
    smallest of these \eqn{d(i,C)} is \eqn{b(i) := \min_C d(i,C)},
    and can be seen as the dissimilarity between i and its ``neighbor''
    cluster, i.e., the nearest one to which it does \emph{not} belong.
    Finally, \deqn{s(i) := \frac{b(i) - a(i) }{max(a(i), b(i))}.}{%
      s(i) := ( b(i) - a(i) ) / max( a(i), b(i) ).}

    Observations with a large \eqn{s(i)} (almost 1) are very well
    clustered, a small \eqn{s(i)} (around 0) means that the observation
    lies between two clusters, and observations with a negative
    \eqn{s(i)} are probably placed in the wrong cluster.
}
\note{
  While \code{silhouette()} is \emph{intrinsic} to the
  \code{\link{partition}} clusterings, and hence has a (trivial) method
  for these, it is straightforward to get silhouettes from hierarchical
  clusterings from \code{silhouette.default()} with
  \code{\link{cutree}()} and distance as input.
}
\value{
  \code{silhouette()} returns an object, \code{sil}, of class
  \code{silhouette} which is an [n x 3] matrix with attributes.  For
  each observation i, \code{sil[i,]} contains the cluster to which i
  belongs as well as the neighbor cluster of i (the cluster, not
  containing i, for which the average dissimilarity between its
  observations and i is minimal), and the silhouette width \eqn{s(i)} of
  the observation.  The \code{\link{colnames}} correspondingly are
  \code{c("cluster", "neighbor", "sil_width")}.

  \code{summary(sil)} returns an object of class
  \code{summary.silhouette}, a list with components
%%Rd bug: fails inside \value{}!:\describe{
    \item{si.summary}{numerical \code{\link{summary}} of the individual
      silhouette widths \eqn{s(i)}.}
    \item{clus.avg.widths}{numeric (rank 1) array of clusterwise
      \emph{means} of silhouette widths where \code{mean = FUN} is used.}
    \item{avg.width}{the total mean \code{FUN(s)} where \code{s} are the
      individual silhouette widths.}
    \item{clus.sizes}{\code{\link{table}} of the \eqn{k} cluster sizes.}
    \item{call}{if available, the call creating \code{sil}.}
    \item{Ordered}{logical identical to \code{attr(sil, "Ordered")}, see
      below.}
%%Rd bug:   }

  \code{sortSilhouette(sil)} orders the rows of \code{sil} as in the
  silhouette plot, by cluster (increasingly) and decreasing silhouette
  width \eqn{s(i)}.
  \cr
  \code{attr(sil, "Ordered")} is a logical indicating if \code{sil} \emph{is}
  ordered as by \code{sortSilhouette()}. In that case,
  \code{rownames(sil)} will contain case labels or numbers, and \cr
  \code{attr(sil, "iOrd")} the ordering index vector.
}
\references{
  Rousseeuw, P.J. (1987)
  Silhouettes: A graphical aid to the interpretation and validation of
  cluster analysis. \emph{J. Comput. Appl. Math.}, \bold{20}, 53--65.

  chapter 2 of Kaufman, L. and Rousseeuw, P.J. (1990), see
  the references in \code{\link{plot.agnes}}.
}
\seealso{\code{\link{partition.object}}, \code{\link{plot.partition}}.
}
\examples{
 data(ruspini)
 pr4 <- pam(ruspini, 4)
 str(si <- silhouette(pr4))
 (ssi <- summary(si))
 plot(si) # silhouette plot

 si2 <- silhouette(pr4$clustering, dist(ruspini, "canberra"))
 summary(si2) # has small values: "canberra"'s fault
 plot(si2, nmax= 80, cex.names=0.6)

 par(mfrow = c(3,2), oma = c(0,0, 3, 0))
 for(k in 2:6)
    plot(silhouette(pam(ruspini, k=k)), main = paste("k = ",k), do.n.k=FALSE)
 mtext("PAM(Ruspini) as in Kaufman & Rousseeuw, p.101",
       outer = TRUE, font = par("font.main"), cex = par("cex.main"))

 ## Silhouette for a hierarchical clustering:
 ar <- agnes(ruspini)
 si3 <- silhouette(cutree(ar, k = 5), # k = 4 gave the same as pam() above
     	           daisy(ruspini))
 plot(si3, nmax = 80, cex.names = 0.5)
 ## 2 groups: Agnes() wasn't too good:
 si4 <- silhouette(cutree(ar, k = 2), daisy(ruspini))
 plot(si4, nmax = 80, cex.names = 0.5)
}
\keyword{cluster}

\eof
\name{sizeDiss}
\alias{sizeDiss}
\title{Sample Size of Dissimilarity Like Object}
\description{
  Returns the number of observations (\emph{sample size}) corresponding
  to a  dissimilarity like object, or equivalently,
  the number of rows or columns of a matrix
  when only the lower or upper triangular part (without diagonal) is given.

  It is nothing else but the inverse function of \eqn{f(n) = n(n-1)/2}.
}
\usage{
sizeDiss(d)
}
\arguments{
  \item{d}{any \R object with length (typically) \eqn{n(n-1)/2}.}
}
\value{
  a number; \eqn{n} if \code{length(d) == n(n-1)/2}, \code{NA} otherwise.
}
\seealso{\code{\link{dissimilarity.object}} and also
  \code{\link[mva]{as.dist}} for class \code{dissimilarity} and
  \code{dist} objects which have a \code{Size} attribute.}
\examples{
sizeDiss(1:10)# 5, since 10 == 5 * (5 - 1) / 2
sizeDiss(1:9) # NA

n <- 1:100
stopifnot(n == sapply( n*(n-1)/2, function(n) sizeDiss(logical(n))))
}
\keyword{utilities}
\keyword{arith}

\eof
\name{summary.agnes}
\alias{summary.agnes}
\alias{print.summary.agnes}
\title{Summary Method for `agnes' Objects}
\description{
  Returns (and prints) a summary list for an \code{agnes} object.
  Printing gives more output than the corresponding
  \code{\link{print.agnes}} method.
}
\usage{
\method{summary}{agnes}(object, \dots)
\method{print}{summary.agnes}(x, \dots)
}
\arguments{
  \item{x, object}{a \code{\link{agnes}} object.}
  \item{\dots}{potential further arguments (require by generic).}
}
\seealso{\code{\link{agnes}}, \code{\link{agnes.object}}.}
\examples{
data(agriculture)
summary(agnes(agriculture))
}
\keyword{cluster}
\keyword{print}


\eof
\name{summary.clara}
\alias{summary.clara}
\alias{print.summary.clara}
\title{Summary Method for `clara' Objects}
\description{
  Returns (and prints) a summary list for a \code{clara} object.
  Printing gives more output than the corresponding
  \code{\link{print.clara}} method.
}
\usage{
\method{summary}{clara}(object, \dots)
\method{print}{summary.clara}(x, \dots)
}
\arguments{
  \item{x, object}{a \code{\link{clara}} object.}
  \item{\dots}{potential further arguments (require by generic).}
}
\seealso{\code{\link{clara.object}}}
\examples{
## generate 2000 objects, divided into 5 clusters.
set.seed(47)
x <- rbind(cbind(rnorm(400, 0,4), rnorm(400, 0,4)),
           cbind(rnorm(400,10,8), rnorm(400,40,6)),
           cbind(rnorm(400,30,4), rnorm(400, 0,4)),
           cbind(rnorm(400,40,4), rnorm(400,20,2)),
           cbind(rnorm(400,50,4), rnorm(400,50,4))
)
clx5 <- clara(x, 5)
## Mis`classification' table:
% R version >= 1.5 :
% table(rep(1:5, each = 400), clx5$clust) # -> 1 "error"
table(rep(1:5, rep(400,5)), clx5$clust) # -> 1 "error"
summary(clx5)

## Graphically:
par(mfrow = c(3,1), mgp = c(1.5, 0.6, 0), mar = par("mar") - c(0,0,2,0))
%>1.5: plot(x, col = rep(2:6, each = 400))
plot(x, col = rep(2:6, rep(400,5)))
plot(clx5)
}
\keyword{cluster}
\keyword{print}

\eof
\name{summary.diana}
\alias{summary.diana}
\alias{print.summary.diana}
\title{Summary Method for `diana' Objects}
\description{Returns (and prints) a summary list for a \code{diana} object.}
\usage{
\method{summary}{diana}(object, \dots)
\method{print}{summary.diana}(x, \dots)
}
\arguments{
  \item{x, object}{a \code{\link{diana}} object.}
  \item{\dots}{potential further arguments (require by generic).}
}
\seealso{\code{\link{diana}}, \code{\link{diana.object}}.}
\keyword{cluster}
\keyword{print}


\eof
\name{summary.fanny}
\alias{summary.fanny}
\alias{print.summary.fanny}
\title{Summary Method for `fanny' Objects}
\description{Returns (and prints) a summary list for a \code{fanny} object.}
\usage{
\method{summary}{fanny}(object, \dots)
\method{print}{summary.fanny}(x, \dots)
}
\arguments{
  \item{x, object}{a \code{\link{fanny}} object.}
  \item{\dots}{potential further arguments (require by generic).}
}
\seealso{\code{\link{fanny}}, \code{\link{fanny.object}}.}
\keyword{cluster}
\keyword{print}


\eof
\name{summary.mona}
\alias{summary.mona}
\alias{print.summary.mona}
\title{Summary Method for `mona' Objects}
\description{Returns (and prints) a summary list for a \code{mona} object.}
\usage{
\method{summary}{mona}(object, \dots)
\method{print}{summary.mona}(x, \dots)
}
\arguments{
  \item{x, object}{a \code{\link{mona}} object.}
  \item{\dots}{potential further arguments (require by generic).}
}
\seealso{\code{\link{mona}}, \code{\link{mona.object}}.}
\keyword{cluster}
\keyword{print}

\eof
\name{summary.pam}
\alias{summary.pam}
\alias{print.summary.pam}
\title{Summary Method for PAM Objects}
\description{Summarize a \code{\link{pam}} object and return an object
  of class \code{summary.pam}.
  There's a \code{\link{print}} method for the latter.
}
\usage{
\method{summary}{pam}(object, \dots)
\method{print}{summary.pam}(x, \dots)
}
\arguments{
  \item{x, object}{a \code{\link{pam}} object.}
  \item{\dots}{potential further arguments (require by generic).}
}
\seealso{\code{\link{pam}}, \code{\link{pam.object}}.
}
\keyword{cluster}


\eof
\name{twins.object}
\alias{twins.object}
\alias{twins}% == class
\title{Hierarchical Clustering Object}
}
\description{
  The objects of class \code{"twins"} represent an agglomerative or
  divisive (polythetic) hierarchical clustering of a dataset.
}
\section{GENERATION}{
  This class of objects is returned from \code{agnes} or \code{diana}.
}
\section{METHODS}{
  The \code{"twins"} class has a method for the following generic function:
  \code{pltree}.
}
\section{INHERITANCE}{
  The following classes inherit from class \code{"twins"} :
  \code{"agnes"} and \code{"diana"}.
}
\value{
  See \code{\link{agnes.object}} and \code{\link{diana.object}} for details.
}
\seealso{\code{\link{agnes}},\code{\link{diana}}.
}
\keyword{cluster}

\eof
\name{volume.ellipsoid}
\alias{volume}
\alias{volume.ellipsoid}
\title{Compute the Volume of Planar Object}
\description{
  Compute the volume of a planar object.
  This is a generic function and a method for \code{ellipsoid} objects.
}
\usage{
\method{volume}{ellipsoid}(object)
}
\arguments{
  \item{object}{an \R object the volume of which is wanted.}
}
\value{
  a number, the volume of the given \code{object}.
}
\seealso{\code{\link{ellipsoidhull}} for spanning ellipsoid computation.}
\examples{
example(ellipsoidhull) # which defines `ellipsoid' object <namefoo>
%% FIXME
}
\keyword{utilities}

\eof
\name{votes.repub}
\alias{votes.repub}
\title{Votes for Republican Candidate in Presidential Elections}
\usage{data(votes.repub)}
\description{
  A data frame with the percents of votes given to the republican
  candidate in presidential elections from 1856 to 1976.  Rows
  represent the 50 states, and columns the 31 elections.
}
\source{
  S. Peterson (1973):
  \emph{A Statistical History of the American Presidential Elections}.
  New York: Frederick Ungar Publishing Co.
  
  Data from 1964 to 1976 is from R. M. Scammon, 
  \emph{American Votes 12}, Congressional Quarterly.
}
\keyword{datasets}


\eof
\name{xclara}
\alias{xclara}
\title{Bivariate Data Set with 3 Clusters}
\description{
  An artificial data set consisting of 3000 points in 3 well-separated
  clusters of size 1000 each.
}
\usage{data(xclara)}
\format{
  A data frame with 3000 observations on 2 numeric variables giving the
  \eqn{x} and \eqn{y} coordinates of the points, respectively.
}
\source{
  Sample data set accompanying the reference below, obtained from
  \url{http://www.stat.ucla.edu/journals/jss/v01/i04/data/}.
}
\references{
  Anja Struyf, Mia Hubert & Peter J. Rousseeuw (1996):
  Clustering in an Object-Oriented Environment.
  \emph{Journal of Statistical Software}, \bold{1}.
  \url{http://www.stat.ucla.edu/journals/jss/}
}
\keyword{datasets}

\eof
