\name{DWD}
\docType{data}
\alias{DWD}
\title{Phenological observations}
\description{
	Phenological observations of nine stations from 1951 to 1998.
	Data from the German Weather Service.
}
\usage{data(Pheno)}
\format{Data frame containing three columns (day of year of observations,year,station-id)}
\source{German Weather Service}
\reference{
		Schaber J, Badeck F-W (2002) 'Evaluation of methods for the combination of phenological time series and outlier detection'. Tree Physiology 22:973-982
}
\keyword{datasets}

\eof
\name{Searle}
\docType{data}
\alias{Searle}
\title{Example of a two-way classification table}
\description{
	Example of a two-way classification table where lacking data
	creates three distinct connected sets.
}
\usage{data(Searle)}
\format{R source file}
\references{
	Searle (1997) 'Linear Models'. Wiley. 324p.
}	
\keyword{datasets}

\eof
\name{Simple}
\docType{data}
\alias{Simple}
\title{Simple example of a two-way classification table}
\description{
	Simple example of a two-way classification table
	where missing data creates two distinct connected sets.
}
\usage{data(Simple)}
\format{R source file}
\keyword{datasets}

\eof
\name{connectedSets}
\alias{connectedSets}
\title{Connected sets in a matrix}
\description{
	Finds connected data sets, i.e. connected rows and columns
	of a numeric matrix M.
}
\usage{connectedSets(M)}
\arguments{
	\item{M}{Numeric matrix with 0-entries considered as missing values.}
}
\details{
	In a two-way classification of linear models sometimes independent
	sets of normal equations are obtained due to missing data in the
	experiments design, i.e. the complete design matrix is not of full rank
	and thus no solution can be found. However, solutions of the independent
	sets of normal equations can still exist.
	This phenomenon is called 'connectedness' of the data.
	Especially in phenological analysis experimental designs are almost
	always unbalanced because of missing data. Thus, when combined time
	series are to be estimated, it is worth checking for and finding
	connected data sets for which combined time series can then be estimated.
	Example (also see example data(Simple) and example in 'maxConnectedSets'):
	In the following matrix dots represent missing values, X represent observations
	and the lines join the connected sets:
	   
	:	X\_\_\_X   .   .	\cr
	:	    \eqn{\mid}{|}	\cr
	:	X\_\_\_X   .   .	\cr
	:						\cr
	:	.   .   X\_\_\_X	\cr
	   
	Thus, in this matrix observations in rows 1 and 2 or colums 1 and 2 form one connected
   	set. Likewise row 3 (or columns 3 and 4) form also one connected set.	
}
\value{
	\item{rowclasses}{Vector of set numbers of rows of M.}
	\item{colclasses}{Vector of set numbers of columns of M.}
}
\references{
	Searle (1997) 'Linear Models'. Wiley. page 318.
}
\author{
	Joerg Schaber
}
\seealso{
	\code{\link{maxConnectedSet}}
}
\examples{
	data(Simple)
	connectedSets(Simple)
}
\keyword{design}
\keyword{models}

\eof
\name{daylength}
\alias{daylength}
\title{Daylength at julian day i on latitude l}
\description{
	Calculates daylength [h] and declination angle delta [radians]
	on day i [julian day of year] for latitude l [degrees].	
}
\usage{daylength(i,l)}
\arguments{
	\item{i}{Integer as julian day of year (1-365)}	
	\item{l}{Float as latitude [degress]}
}
\value{
	\item{dl}{daylength [h]}
	\item{delta}{declination angle [degrees]}
}
\author{
	Joerg Schaber
}
\examples{
	daylength(120,63)
}
\keyword{misc}
\keyword{utilities}

\eof
\name{matrix2raw}
\alias{matrix2raw}
\title{Converts numeric matrix to data frame}
\description{
	Converts a numeric matrix M into a dataframe D
	with three columns (x, factor 1, factor 2)
	where rows of M are ranks of factor 1 levels and columns of M are
	ranks of factor 2 levels, missing values are set to 0.
}
\usage{matrix2raw(M,l1,l2)}
\arguments{
	\item{M}{Numeric matrix}
	\item{l1}{\bold{Optional} numeric vector of level names of column 2 (factor 1)
		of returned data frame. If missing it is assigned row numbers of \code{M}.}
	\item{l2}{\bold{Optional} numeric vector of level names of column 3 (factor 2)
		of returned data frame. If missing it is assigned column numbers of \code{M}.}
}
\value{
	\item{D}{Data frame with three columns: (y,f1,f1). \code{y}: observations, i.e.
	non-zero entries, in matrix. \code{f1}: factor 1, i.e. row number of M or \code{l1}.
	\code{f2}: factor 2, i.e. column number of M or \code{l2}. D is ordered first by
	factor 2 and then factor 1.}
}
\author{
	Joerg Schaber
}
\examples{
	data(DWD)
	M <- raw2matrix(DWD)	# conversion to matrix
	D1 <- matrix2raw(M)	# back conversion, but with different level names
	D2 <- matrix2raw(M,c(1951:1998),c(1:9))	# with original level names
}
\keyword{utilities}
\keyword{misc}

\eof
\name{maxConnectedSet}
\alias{maxConnectedSet}
\title{Maximal connected set in a matrix}
\description{
	Finds connected data set, i.e. connected rows and columns
	of a numeric matrix M, that has the largest number of data entries.

}
\usage{maxConnectedSet(M)}
\arguments{
	\item{M}{Numeric matrix with missing values considered as 0, or
	a data frame. The data frame is internally converted to a matrix
   	and should have three columns (x, factor 1, factor 2) where
	x are considered the entries of the matrix, rows correspond to 
	levels of factor 2 and columns correspond to levels of factor 1.}
}
\details{
	In a two-way classification of linear models sometimes independent
	sets of normal equations are obtained due to missing data in the
	experiments design, i.e. the complete design matrix is not of full rank
	and thus no solution can be found. However, solutions of the independent
    sets of normal equations can still exist.
    This phenomenon is called 'connectedness' of the data.
    Especially in phenological analysis experimental designs are almost
    always unbalanced because of missing data. Thus, when combined time
    series are to be estimated, it is worth checking for and finding
    connected data sets for which combined time series can then be estimated.
	This can also be interpreted in the way that a prerequisite to obtain
	a combined time series is to have overlapping time series.
	Example (also see example data(Searle) from Searle (1997), page 324 and
	example in 'connectedSets'):
	In the following matrix dots represent missing values, X represent observations
	and the lines join the connected sets:

	:	X\_\_\_.\_\_\_.\_\_\_.\_\_\_X   .   .   . 		\cr
	:                       \eqn{\mid}{|}				\cr
	:	.   .   X\_\_\_.\_\_\_!\_\_\_.\_\_\_.\_\_\_X	\cr
	:                       \eqn{\mid}{|}           \eqn{\mid}{|}\cr
	:	.   X\_\_\_.\_\_\_.\_\_\_!\_\_\_X\_\_\_X   !\cr
	:                       \eqn{\mid}{|}       \eqn{\mid}{|}   \eqn{\mid}{|}\cr
	:	.   X\_\_\_.\_\_\_.\_\_\_!\_\_\_X\_\_\_X   !\cr
	:                       \eqn{\mid}{|}           \eqn{\mid}{|}\cr
	:	.   .   .   .   X   .   .   !					\cr
	:                       \eqn{\mid}{|}           \eqn{\mid}{|}\cr
	:	.   .   X\_\_\_.\_\_\_!\_\_\_.\_\_\_.\_\_\_X	\cr
	:                       \eqn{\mid}{|}				\cr
	:	.   .   .   X\_\_\_X   .   .   .				\cr

	Thus, in this matrix observations of rows 1, 5 and 7 or colums 1, 4 and 5 form
   	one connected set. Likewise observations of rows 2 and 6 (or columns 3 and 8)
   	and rows 3 and 4 (or columns 2, 6 and 7) form also connected sets, respectively.
}
\value{
	\item{ms}{maximal connected set as matrix or data frame, corresponding to the input.}
	\item{maxl}{Number of observations in the maximal connected data set.}
	\item{nsets}{Number of connected data sets.}
	\item{lsets}{Vector with number of observations in each connected data sets, i.e. lsets[i] is the number of observations in connected data set i.}
}
\references{
	Searle (1997) 'Linear Models'. Wiley. page 318.
}
\author{
	Joerg Schaber
}
\seealso{
	\code{\link{connectedSets}}
}
\examples{
	data(Searle)
	maxConnectedSet(Searle)
}
\keyword{design}
\keyword{models}

\eof
\name{maxdaylength}
\alias{maxdaylength}
\title{Maximal day length on latitude l}
\description{
	Calculates maximal daylength maxdl [h] at a certain latitude l [degrees].	
}
\usage{maxdaylength(l)}
\arguments{
	\item{l}{Latitude in degrees.}
}
\value{
	\item{maxdl}{Maximal daylength [h] at a certain latitude l [degrees]}	
}
\author{
	Joerg Schaber
}
\examples{
	maxdaylength(60)
}
\keyword{misc}
\keyword{utilities}

\eof
\name{pheno.ddm}
\alias{pheno.ddm}
\title{Dense design matrix for phenological data}
\description{
	Creation of dense two-way classification design matrix
	for usage in robust parameter estimation with rq.fit.sfn (package nprq).
	The sum of the second factor is constrained to be zero. No general mean.
}
\usage{pheno.ddm(D)}
\arguments{
	\item{D}{Data frame with three columns: (observations, factor 1, factor 2).}
}
\details{
	In phenological applications observations should be the julian day
	of observation of a certain phase, factor 1 should be the observation year
	and factor 2 should be a station-id.
	Usually this is much easier created by:
	\code{y <- factor(f1)
	s <- factor(f2)
	ddm <- as.matrix.csr(model.matrix(~ y + s -1, contrasts=list(s=("contr.sum"))))}.
	However, this procedure can be quite memory demanding and might exceed storage
	capacity for large problems. 
	This procedure here is much less memory comsuming.
}
\value{
	\item{ddm}{Dense roworder matrix, matrix.csr format (see matrix.csr in package SparseM)}
	\item{D}{Input data frame D sorted first by f2 then by f1.}
}
\author{
	Joerg Schaber
}
\seealso{
	\code{\link{model.matrix}}
	\code{\link[SparseM]{matrix.csr}}
}
\examples{
	data(DWD)
	ddm1 <- pheno.ddm(DWD)
	attach(DWD)
	y <- factor(DWD[[2]])
	s <- factor(DWD[[3]])
	ddm2 <- as.matrix.csr(model.matrix(~ y + s -1, contrasts=list(s=("contr.sum"))))
	identical(ddm1$ddm,ddm2)
}
\keyword{robust}
\keyword{design}
\keyword{models}

\eof
\name{pheno.lad.fit}
\alias{pheno.lad.fit}
\title{Fits a robust two-way linear model} 
\description{
	Fits a robust two-way linear model. 
	The model assumes both factors (f1 and f2) to be fixed.
	Errors are assumed to be i.i.d. No general mean and sum of
	f2 is constrained to be zero.
}
\usage{pheno.lad.fit(D)}
\arguments{
	\item{D}{Data frame with three columns (x, f1, f2) or a matrix 
	where rows are ranks of factor f1 levels and columns are ranks 
	of factor f2 levels and missing values are set to 0.}	
}
\details{
	The function minimizes the least absolute deviations (LAD or L1 norm)
	of the residuals of a two-way linear model.
	This function is basically a wrapper for the \code{rq.fit()} or \code{rq.fit.sfn()}
	functions of the \code{quantreg} and \code{nprq} package, respectively,
	adapted for the estimation of combined phenological time series. 
	Depending on the size of the problem (length(x)<=1000) 
	either the \code{rq.fit()} function using the Barrodale-Roberts algorithm is used or 
	(length(x)>1000) the corresponding dense matrix implementation with 
	\code{rq.fit.sfn()} using the Interior-Point method.
	In phenological applications, x should be the julian day
	of observation of a certain phase, factor f1 should be the observation year
	and factor f2 should be a station-id. 
	Note that the input data frame is sorted before fitting, such that subsequent
	analyses using the input data should be done using the sorted output data frame.
}
\value{
	\item{p1}{Estimated parameters of factor f1, in phenology this is precisely the combined time series.}
	\item{p2}{Estimated parameters of factor f2, in phenology these are precisely the station effects.}
	\item{resid}{Residuals}
	\item{ierr}{For length(x) > 1000 this is the return error code of \code{rq.fit.sfn()}}
	\item{D}{The input as ordered data frame, ordered first by f2 then by f1}
}
\author{
	Joerg Schaber
}
\references{
	Rousseeuw PJ, Leroy AM (1987) 'Robust estimation and outlier detection'. Wiley.
	Schaber J, Badeck F-W (2002) 'Evaluation of methods for the combination of phenological time series and outlier detection'. Tree Physiology 22:973-982
}
\seealso{
	\code{\link[quantreg]{rq.fit}}
	\code{\link[nprq]{rq.fit.sfn}}
}
\examples{
	data(DWD)
	R <- pheno.lad.fit(DWD)							# robust parameter estimation
	plot(levels(factor(R$D[[2]])),R$p1,type="l")	# plot combined time series
	R$D[R$resid >= 30,]								# observation whose residuals
   													# are > 30 days (outliers)
}
\keyword{design}
\keyword{models}
\keyword{ts}

\eof
\name{pheno.mlm.fit}
\alias{pheno.mlm.fit}
\title{Fits a two-way linear mixed model} 
\description{
	Fits a two-way linear mixed model. 
	The model assumes the first factor f1 to be fixed and the second factor f2 to
	be random. Errors are assumed to be i.i.d. No general mean and sum of 
	f2 is constrained to be zero.
}
\usage{pheno.mlm.fit(D)}
\arguments{
	\item{D}{Data frame with three columns (x, f1, f2) or a matrix
	where rows are ranks of factor f1 levels and columns are ranks 
	of factor f2 levels and missing values are set to 0.}	
}
\details{
	This function is basically a wrapper for the \code{lme()} function of
	the \code{nlme} package, adapted for the estimation of combined
	phenological time series. Estimation method: restricted maximum likelihood (REML)
	In phenological application, x should be the julian day
	of observation of a certain phase, factor f1 should be the observation year
	and factor f2 should be a station-id. 
}
\value{
	\item{fixed}{Estimated fixed effects, in phenology this is precisely the combined time series.}
	\item{random}{Estimated random effects, in phenology these are the station effects.}
	\item{SEf1}{Standard error group f1, i.e. square root of variance component fixed effect.}
	\item{SEf2}{Standard error group f2, i.e. square root of variance component random effect.}
	\item{lclf}{Lower 95 percent confidence limit of fixed effects.}
	\item{uclf}{Upper 95 percent confidence limit of fixed effects.}
}
\references{
	Searle (1997) 'Linear Models'. Wiley.
	Schaber J, Badeck F-W (2002) 'Evaluation of methods for the combination of phenological time series and outlier detection'. Tree Physiology 22:973-982
}
\author{
	Joerg Schaber
}
\seealso{
	\code{\link[nlme]{lme}}
}
\examples{
	data(DWD)
	R <- pheno.mlm.fit(DWD)								# parameter estimation
	plot(levels(factor(DWD[[2]])),R$fixed,type="l")		# plot combined time series
	tr <- lm(R$fixed~rank(levels(factor(DWD[[2]]))))	# trend estimation
	summary(tr)$coef[2]									# slope of trend
	summary(tr)$coef[4]									# standard error of trend
}
\keyword{design}
\keyword{models}
\keyword{ts}

\eof
\name{raw2matrix}
\alias{raw2matrix}
\title{Converts a numeric data frame to matrix}
\description{
	Converts a numeric data frame D with three columns (x, factor 1, factor 2)
	in a matrix M where rows are ranks of levels of factor 1 and columns are
	ranks of levels of factor 2, missing values are set to 0.	
}
\usage{raw2matrix(D)}
\arguments{
	\item{D}{Data frame with three columns (x, factor 1, factor 2)}
}
\value{
	\item{M}{Numeric matrix where rows are ranks of levels of factor 1 and columns are
	ranks of levels of factor 2, missing values are set to 0.}
}
\author{
	Joerg Schaber
}
\examples{
	data(DWD)
	raw2matrix(DWD)
}
\keyword{utilities}
\keyword{misc}

\eof
\name{seqMK}
\alias{seqMK}
\title{Sequential Mann-Kendall test for time series.}
\description{
	The sequential Mann-Kendall test on time series x detects 
	approximate potential trend turning points in time series.
}
\usage{seqMK(x)}
\arguments{
	\item{x}{Numeric vector x.}
}
\details{
	Implicitly assumes a equidistant time series x. 
	Calculates a progressive and a retrograde series of Kendall normalized tau's. 
	Points where the two lines cross are considered as approximate potential 
	trend turning points. When either the progressive or retrograde row exceed
	certain confidence limits before and after the crossing points, this trend
	turning point is considered significant at the corresponding level, 
	i.e. 1.96 for 95% significance and 2.58 for 99% significance etc.
}
\value{
	\item{prog}{Progressive row of Kendall's normalized tau's}
	\item{retr}{Retrograde row of Kendall's normalized tau's}
	\item{tp}{Boolean vector indicating at what indices of the original timeseries the prog and retr cross, i.e. TRUE at potential trend turning points.}
}
\author{
	Joerg Schaber
}
\references{
	Kendall M, Gibbons JD (1990) 'Rank correlation methods'. Arnold.
	Sneyers R (1990) 'On statistical analysis of series of observations. Technical  Note
		No 143. Geneva. Switzerland. World Meteorological Society.
	Schaber J (2003) 'Phenology in German in the 20th Century: Methods, analyses and models.
		Ph.D. Thesis. University of Potsdam. Germany.
		\url{http://pub.ub.uni-potsdam.de/2002meta/0022/door.htm}
}
\keyword{ts}
\keyword{utilities}

\eof
\name{tau}
\alias{tau}
\title{Kendall's normalized tau}
\description{
	Kendall's normalized tau for time series x
}
\usage{tau(x)}
\arguments{
	\item{x}{Numeric vector x.}
}
\details{
	Implicitly assumes a equidistant time series x. 
}
\value{
	\item{t}{Kendall's normalized tau.}
}
\author{
	Joerg Schaber
}
\references{
	Kendall M, Gibbons JD (1990) 'Rank correlation methods'. Arnold.
	Sneyers R (1990) 'On statistical analysis of series of observations. Technical  Note
		No 143. Geneva. Switzerland. World Meteorological Society.
	Schaber J (2003) 'Phenology in German in the 20th Century: Methods, analyses and models.
		Ph.D. Thesis. University of Potsdam. Germany.
		\url{http://pub.ub.uni-potsdam.de/2002meta/0022/door.htm}
}
\keyword{ts}
\keyword{utilities}

\eof
