\name{GenKern.internal}
\title{GenKern internal functions}
\alias{bandwidthselect}
\alias{rangeselect}
\alias{correlationselect}
\alias{getlims}
\description{
  This is an internal function and it should not be used directly.
}
\author{D.Lucy and R.G.Aykroyd}
\keyword{internal}
\eof
\name{KernSec}

\alias{KernSec}

\title{Univariate kernel density estimate}

\description{Computes univariate kernel density estimate using Gaussian kernels which can also use non-equally spaced ordinates and adaptive bandwidths}

\usage{KernSec(x, xgridsize=100, xbandwidth, range.x, na.rm=FALSE)}

\arguments{
\item{x}{vector of \code{x} values}
\item{xgridsize}{integer for number of ordinates at which to calculate the smoothed estimate: default=100}
\item{xbandwidth}{value of \code{x} window width, or vector of local window widths: default=\code{dpik(x)}}
\item{range.x}{total range of the estimate in the \code{x} dimension, or a vector giving the \code{x} ordinates: default=\code{range +- 1.5 * mean bandwidth}}
\item{na.rm}{NA behaviour: \code{TRUE} drops cases with NA's, \code{FALSE} stops function with a warning if NA's are detected: default=\code{FALSE}}
}

\value{returns two vectors:
\item{xvals}{vector of ordinates}
\item{yden}{vector of density estimates corresponding to each \code{x} ordinate}
}

\author{David Lucy <\email{dlucy@maths.ed.ac.uk}> 
\cr Robert Aykroyd <\email{robert@amsta.leeds.ac.uk}>\url{http://www.amsta.leeds.ac.uk/~robert/}}

\references{Robertson, I. Lucy, D. Baxter, L. Pollard, A.M. Aykroyd, R.G. Carter, A.H.C. Switsur, V.R. and Waterhouse, J.S.(1999) A kernel based Bayesian approach to climatic reconstruction. \emph{Holocene} \bold{9}(4): 495-500}

\note{Slow code suitable for visualisation and display of p.d.f where highly generalised k.p.d.fs are needed - \code{\link[KernSmooth]{bkde}} is faster when uniformly grided, single bandwidth, k.p.d.fs are required, although in the single dimensional case you won't notice the difference.

This function doesn't use bins as such, it calculates the density at a set of points. These points can be thought of as 'bin centres' but in reality they're not.

From version 1.00 onwards a number of improvements have been made: NA's are now handled semi-convincingly by dropping if required. A multi-element vector of bandwidths associated with each case can be sent, so it is possible to accept the default, give a fixed bandwidth, or a bandwidth associated with each case.

It should be noted that if a multi-element vector is sent for bandwidth, it must be of the same length as the data vector. Furthermore, multi-element vectors which approximate the bin centres, can be sent rather than the extreme limits of the range; which means that the points at which the density is to be calculated need not be uniformly spaced.

If the default \code{xbandwidth} is to be used there \bold{must} be at least five unique values for in the \code{x} vector. If not the function will return an error. If you don't have five unique values in the vector then send a value, or vector for \code{xbandwidth}

The number of ordinates defaults to the length of \code{range.x} if \code{range.x} is a vector of ordinates, otherwise it is \code{xgridsize}, or 100 if that isn't specified.

Finally, the various modes of sending parameters can be mixed, ie: the extremes of the range can be sent to define the range for \code{x}, but a multi-element vector could be sent to define the ordinates in the \code{y} dimension, or, a vector could be sent to describe the bandwidth for each case in \code{x}.
}


\seealso{\code{\link{KernSur}} \code{\link{per}}  \code{\link[base]{density}} \code{\link[base]{hist}} \code{\link[KernSmooth]{bkde}} \code{\link[KernSmooth]{bkde2D}} \code{\link[KernSmooth]{dpik}}}

\section{Acknowledgements}{Written in collaboration with A.M.Pollard <\email{a.m.pollard@bradford.ac.uk}> with the financial support of the Natural Environment Research Council (NERC) grant GR3/11395}

\examples{
x <- c(2,4,6,8) 			# make up some x data
z <- KernSec(x, xbandwidth=2, range.x=c(0,10))
plot(z$xvals, z$yden, type="l") 
# use a defined vector for the ordinates and bandwidths
ords <- seq(from=0, to=10, length=100)
bands <- x/15
z <- KernSec(x, xbandwidth=bands, range.x=ords)
plot(z$xvals, z$yden, type="l") 	# should plot a wriggly line
}

\keyword{distribution}
\keyword{smooth}
\eof
\name{KernSur}

\alias{KernSur}

\title{Bivariate kernel density estimation}

\description{Compute bivariate kernel density estimate using five parameter Gaussian kernels which can also use non equally spaced and adaptive bandwidths}

\usage{KernSur(x, y, xgridsize=100, ygridsize=100, correlation=0, xbandwidth,
ybandwidth, range.x, range.y, na.rm=FALSE)}

\arguments{
\item{x}{vector of \code{x} values}
\item{y}{vector of \code{y} values}
\item{xgridsize}{integer for number of ordinates at which to calculate the smoothed estimate: default=100}
\item{ygridsize}{integer for number of ordinates at which to calculate the smoothed estimate: default=100}
\item{correlation}{\code{x,y} correlation, or vector of local correlations: default=\code{cor(x,y)}}
\item{xbandwidth}{value of \code{x} window width, or vector of local window widths: default=\code{dpik(x)}}
\item{ybandwidth}{value of \code{y} window width, or vector of local window widths: default=dpik(y)}
\item{range.x}{total range of the estimate in the \code{x} dimension, or a vector giving the \code{x} ordinates: default=\code{range +- 1.5 * mean bandwidth}}
\item{range.y}{total range of the estimate in the \code{y} dimension, or a vector giving the \code{y} ordinates: default=\code{range +- 1.5 * mean bandwidth}}
\item{na.rm}{NA behaviour: \code{TRUE} drops cases with NA's, \code{FALSE} stops function with a warning if NA's are detected: default=\code{FALSE}}
}
}
\value{returns two vectors and a matrix:
\item{xvals}{vector of ordinates at which the density has been estimated in the \code{x} dimension}
\item{yvals}{vector of ordinates at which the density has been estimated in the \code{y} dimension}
\item{zden}{matrix of density for \code{f(x,y)} with dimensions \code{xgridsize}, \code{ygridsize}}
}

\author{David Lucy <\email{dlucy@maths.ed.ac.uk}>
\cr Robert Aykroyd <\email{robert@amsta.leeds.ac.uk}>\url{http://www.amsta.leeds.ac.uk/~robert/}}

\references{Robertson, I. Lucy, D. Baxter, L. Pollard, A.M. Aykroyd, R.G. Carter, A.H.C. Switsur, V.R. and Waterhouse, J.S.(1999) A kernel based Bayesian approach to climatic reconstruction. \emph{Holocene} \bold{9}(4): 495-500}

\note{Slow code suitable for visualisation and display of correlated p.d.f, where highly generalised k.p.d.fs are needed - \code{\link[KernSmooth]{bkde2D}} is much faster when uncorrelated, uniformly grided, single bandwidth, k.p.d.fs are required.

This function doesn't use bins as such, it calculates the density at a set of points in each dimension. These points can be thought of as 'bin centres' but in reality they're not.

From version 1.00 onwards a number of improvements have been made: NA's are now handled semi-convincingly by dropping if required. A multi-element vector of bandwidths associated with each case can be sent for either dimension, so it is possible to accept the default, give a fixed bandwidth, or a bandwidth associated with each case. A multi-element vector of correlations can be sent, rather than a single correlation.

It should be noted that if a vector is sent for correlation, or either bandwidth, they must be of the same length as the data vectors. Furthermore, vectors which approximate the bin centres, can be sent rather than the extreme limits in the range; which means that the points at which the density is to be calculated need not be uniformly spaced.

If the default \code{bandwidth} is to be used there \bold{must} be at least five unique values for in the \code{x} and \code{y} vectors. If not the function will return an error. If you don't have five unique values in the vector then send a value, or vector for \code{bandwidth}

The number of ordinates defaults to the length of \code{range.x} if \code{range.x} is a vector of ordinates, otherwise it is \code{xgridsize}, or 100 if that isn't specified.

Finally, the various modes of sending parameters can be mixed, ie: the extremes of the range can be sent to define the range for \code{x}, but a multi-element vector could be sent to define the ordinates in the \code{y} dimension, or, a vector could be sent to describe the bandwidth for each case in the \code{x} direction, and a single-element vector defines all bandwidths in the \code{y}.}

\seealso{\code{\link{KernSur}} \code{\link{per}}  \code{\link[base]{density}} \code{\link[base]{hist}} \code{\link[KernSmooth]{bkde}} \code{\link[KernSmooth]{bkde2D}} \code{\link[KernSmooth]{dpik}}}

\section{Acknowledgements}{Written in collaboration with A.M.Pollard <\email{a.m.pollard@bradford.ac.uk}> with the financial support of the Natural Environment Research Council (NERC) grant GR3/11395}

\examples{
x <- c(2,4,6,8) 			# make up some x-y data
y <- x 

# calculate and plot a surface with zero correlation based on above data
op <- KernSur(x,y, xgridsize=50, ygridsize=50, correlation=0, 
	xbandwidth=1, ybandwidth=1, range.x=c(0,10), range.y=c(0,10))
image(op$xvals, op$yvals, op$zden, col=terrain.colors(100), axes=TRUE)
contour(op$xvals, op$yvals, op$zden, add=TRUE)
box()

# re-calculate and re-plot the above using a 0.8 correlation
op <- KernSur(x,y, xgridsize=50, ygridsize=50, correlation=0.8,
	 xbandwidth=1, ybandwidth=1, range.x=c(0,10), range.y=c(0,10))
image(op$xvals, op$yvals, op$zden, col=terrain.colors(100), axes=TRUE)
contour(op$xvals, op$yvals, op$zden, add=TRUE)
box()

# calculate and plot a surface of the above data with an ascending
# correlation and bandwidths and a vector of equally spaced ordinates
bands <- c(1,1.1,1.2,1.3)
cors <- c(0,-0.2,-0.4,-0.6)
rnge.x <- seq(from=0, to=10, length=100)

op <- KernSur(x,y, xgridsize=50, ygridsize=50, correlation=cors, 
        xbandwidth=bands, ybandwidth=bands, range.x=rnge.x, range.y=c(0,10))
image(op$xvals, op$yvals, op$zden, col=terrain.colors(100), axes=TRUE)
contour(op$xvals, op$yvals, op$zden, add=TRUE)
box()


}
\keyword{distribution}
\keyword{smooth}
\eof
\name{nearest}

\alias{nearest}

\title{Index of a vector nearest in value to a supplied value}

\description{Returns the index of a vector which contains the value closest to an arbitary value}

\usage{nearest(x, xval, outside=FALSE, na.rm=FALSE)}

\arguments{
\item{x}{vector of values}
\item{xval}{value to find the nearest value in \code{x} to}
\item{outside}{if not set to \code{TRUE} the function returns an error if \code{xval} is outside the range of \code{x} - default \code{FALSE}}
\item{na.rm}{NA behaviour: \code{TRUE} drops cases with NA's, \code{FALSE} stops function with a warning if NA's are detected: default=\code{FALSE}}
}

\value{returns an integer:
\item{index}{the index of \code{x} with the value nearest to \code{xval}}
}

\author{David Lucy <\email{dlucy@maths.ed.ac.uk}>
\cr Robert Aykroyd <\email{robert@amsta.leeds.ac.uk}>\url{http://www.amsta.leeds.ac.uk/~robert/}}

\note{The vector doesn't have to be in any particular order - this routine will just give the index of the nearest number. The only inconsistancy is that if the value of \code{xval} are not strictly within the range of the vector the function will return an error. To prevent this call with the \code{outside=TRUE} flag enabled. If there are many values which match the 'nearest' value then the function will return a vector of their indicies.}

\section{Acknowledgements}{Written in collaboration with A.M.Pollard <\email{a.m.pollard@bradford.ac.uk}> with the financial support of the Natural Environment Research Council (NERC) grant GR3/11395}

\examples{
# make up a vector
x <- c(1,2,2,2,2,2,3,4,5,6,7,8,9,10)
# conventional useage - xval within range should return 9
nearest(x, 4.7)
# xval - outside the range of x should return 14
nearest(x, 12.7, outside=TRUE)
# many 'nearest' values in x - should return - 2 3 4 5 6 
nearest(x, 1.7)
# make x[3] an NA
x[3] <- NA
# returns - 2 4 5 6 - by enabling na.rm
nearest(x, 1.7, na.rm=TRUE)
}

\keyword{arith}
\eof
\name{per}

\alias{per}

\title{Locate value for ith percentage point in a binned distribution}

\description{Calculates the value for the ith point in a binned distribution}

\usage{per(den, vals, point, na.rm=FALSE, neg.rm=FALSE)}

\arguments{
\item{den}{vector of frequency or density values}
\item{vals}{vector of values corresponding to the centres of the bins in \code{den}, or the bin break points}
\item{point}{percentage point of the distribution ie: 0.50 is median}
\item{na.rm}{behaviour for NA's in the vector of density values: \code{FALSE} (default) per() will fail with warning if NA's are detected, \code{TRUE} per() will assume that these values are really zeros}
\item{neg.rm}{per() will also fail if any member of the density vector is negative (which can happen occasionally from density functions based on FFT), set this to \code{TRUE} to treat these values as zeros}
}

\value{returns a value:
\item{x}{value of \code{vals} corresponding to the \code{point} position}
}

\author{David Lucy <\email{dlucy@maths.ed.ac.uk}>
\cr Robert Aykroyd <\email{robert@amsta.leeds.ac.uk}>\url{http://www.amsta.leeds.ac.uk/~robert/}}

\note{Not restricted to uniform bin widths but due to linear interpolation gets less accurate as bin widths deviate from uniformity. The vectors must be in 
ascending order of bin centres bin break points. The density can be a frequency in that it doesn't have to sum to unity.

Out of character for the rest of the GenKern package this function does assume proper bins rather than ordinates, although if a density estimate has been generated using \code{\link{KernSec}} then the ordinate vector can be used as a first order approximation to bin centres.}

\seealso{\code{\link{KernSur}} \code{\link{per}}  \code{\link[base]{density}} \code{\link[base]{hist}} \code{\link[KernSmooth]{bkde}} \code{\link[KernSmooth]{bkde2D}} \code{\link[KernSmooth]{dpik}}}

\section{Acknowledgements}{Written in collaboration with A.M.Pollard <\email{a.m.pollard@bradford.ac.uk}> with the financial support of the Natural Environment Research Council (NERC) grant GR3/11395}

\examples{
# make up some x-y data
x <- seq(1,100)
y <- dnorm(x, mean=40, sd=10)
plot(x,y)                   
# mark the median, 0.1 and 0.9 positions with vertical lines
abline(v=per(y,x,0.5))
abline(v=per(y,x,0.9))
abline(v=per(y,x,0.1)) 
# for a bimodal distribution which doesn't sum to one
x <- c(1:5) 
y <- c(2,3,4,3,4)
per(y,x,0.5) # should return 3.25
# change the previous example to bin extremes
x <- c(1:6)
per(y,x,0.5) # should return 3.75
}

\keyword{nonparametric}
\eof
