.packageName <- "pheno"
.packageName <- "pheno"

.First.lib <- function(lib, pkg) {
        library.dynam("pheno", pkg, lib)
        print("pheno library loaded")
}
# Finds connected data sets of a numeric matrix M
# non-entries are considered 0
# Returns two vectors: 
# rowclasses[0..maxnr-1] : Class number of the respective rows
# colclasses[0..maxnc-1] : Class number of the respective cols
connectedSets <- function(M) {
	if(!is.matrix(M)) stop("connectedSets: first argument must be a matrix. Exiting ...")
	maxnr <- dim(M)[1]
	maxnc <- dim(M)[2]

	res <- .C("connectivity",M=as.vector(t(M),"numeric"),nrows=as.integer(maxnr),ncols=as.integer(maxnc),rowclasses=vector("integer",maxnr),colclasses=vector("integer",maxnc),PACKAGE="pheno")
	
	attach(res)
	return(rowclasses,colclasses)

}
# calculates daylength dl [h] and declination delta  [radians]
# on day i for latitude lat [degrees]
# declination: angle between sun rays and equatorial plane for 
# the whole earth (-23 degrees - + 23 degrees)
daylength <- function(i,l) {
		
	res <- .C("daylength",l=as.double(l),i=as.integer(i),dl=double(1),delta=double(1),PACKAGE="pheno")
	
	attach(res)
	return(dl,delta)

}
# Converts a matrix M into a dataframe D with three columns (x, factor 1, factor 2)
# where rows of M are ranks of factor 1 levels and columns of M are
# ranks of factor 2 levels, missing values are assumed to be 0.
# Returned data frame os ordered first by factor 2 and then factor 1.
# input: matrix M with two optional ordered numeric vectors for level names.
matrix2raw <- function(M,l1,l2) {
	if(!is.matrix(M)) stop("matrix2raw: first argument must be a matrix. Exiting ...")
	if(missing(l1)) { l1 <- as.vector(c(1:dim(M)[1]),"numeric") }
	else { l1 <- as.vector(l1,"numeric") }
	if(!is.vector(l1,"numeric") || length(l1) != dim(M)[1]) {
		stop("matric2raw: check argument l1, not numeric or wrong length. Exisiting ...")
	}
	if(missing(l2)) { l2 <- as.vector(c(1:dim(M)[2]),"numeric") }
	else { l2 <- as.vector(l2,"numeric") }
	if(!is.vector(l2,"numeric") || length(l2) != dim(M)[2])
		stop("matric2raw: check argument l2, not numeric or wrong length. Exisiting ...")

	n <- 0
	y <- f1 <- f2 <- vector("numeric",dim(M)[1]*dim(M)[2])
	for(j in 1:dim(M)[2]) {			# order first by factor 2 
		for(i in 1:dim(M)[1]) {		# then by factor 1
			if(M[i,j] != 0) {
				n <- n + 1 
				y[n] <- M[i,j]; f1[n] <- l1[i]; f2[n] <- l2[j]
			}
		}
	}
	y <- y[1:n]; f1 <- f1[1:n]; f2 <- f2[1:n]
	D <- data.frame(y,f1,f2)
	return(D)
}
# Returns largest connected data set of a either numeric data frame D 
# with three columns (x, factor 1, factor 2) or a n*m matrix M,
# where the n rows correspond to n levels of factor 2 and m columns
# correspond to m levels of factor 1.
# Output as data frame or matrix, depending on input, with number
# of data entries in the maximal connected set, and number of connectes sets
maxConnectedSet <- function(M) {
	if(!is.data.frame(M) && !is.matrix(M)) {
		stop("maxConnectedSet: argument must be data frame with 3 columns or matrix")
	}
	if(is.data.frame(M) && length(M)!=3) {
		stop("maxConnectedSet: argument must be data frame with 3 columns or matrix")
	}
	if(is.data.frame(M)) {
		f1 <- factor(M[[2]])
		f2 <- factor(M[[3]])
		M <- raw2matrix(M)
		out <- 1
	}
	else { out <- 0 }

	sets <- connectedSets(M)	# find connected sets

	nsets <- length(unique(sets$colclasses)) # number of connected sets

	lsets <- vector("numeric",nsets)

	# find sets with maximal numbers of data entries
	maxl <- 0
	for(i in unique(sets$colclasses)) {
		lsets[i] <- length(which(M[which(sets$rowclasses==i),which(sets$colclasses==i)]!=0))
		if(lsets[i] > maxl) { max <- i; maxl <- lsets[i] }
	}
	
	ms <- M[which(sets$rowclasses==max),which(sets$colclasses==max)]
	
	if(out == 0) { # return matrix
		return(ms,maxl,nsets,lsets)
	}
	else {			# return data frame
		ms <- matrix2raw(ms,as.vector(levels(f1),"numeric"), as.vector(levels(f2),"numeric"))
		return(ms,maxl,nsets,lsets)
	}
}
# calculates maximal daylength maxdl [h] at a certain latitude lat [degrees]
maxdaylength <- function(l) {

	res <- .C("maxdaylength",l=as.double(l),maxdl=double(1),PACKAGE="pheno")

	attach(res)
	return(maxdl)
}
# Automatic creation of dense two-way classification design matrix
# for usage of dense robust estimation with rq.fit.sfn (package nprq).
# The sum of the second factor is constrained to be zero. No general mean.
# Usually this is much easier created by:
# y <- factor(f1)
# s <- factor(f2)
# ddm <- as.matrix.csr(model.matrix(~ y + s -1, contrasts=list(s=("contr.sum"))))
# however, this procedure is quite memory demanding and might exceed storage
# capacity for large problems. 
# This procedure here is much less memory comsuming.
# input: data frame with three columns: (observations, factor 1, factor 2)
# (phenology: observation day of phase, year, station)
# output: dense roworder matrix, matrix.csr format (see matrix.csr in package SparseM)
# and the sorted data frame D (data frame is being sorted first by f2 then by f1 )
pheno.ddm <- function(D) {
	if(!is.data.frame(D) || length(D) != 3) 
		stop("my.ddm: argument must be data frame with 3 fields. Exiting ...")

	require(SparseM)
	# order first by factor 2 then by factor 1
	D <-  D[order(D[[3]],D[[2]]),]
	no <- length(D[[1]])	 	# number of observations
	f1 <- factor(D[[2]]) 		# factor 1: year
	n1 <- nlevels(f1) 			# number of levels factor 1 (phenology: years)
	f2 <- factor(D[[3]])		# factor 2: station
	n2 <- nlevels(f2)			#  number of levels factor 2 (phenology: station)
	
	# ra: Object of class numeric, a real array of nnz elements containing the 
	#	non-zero elements of A, stored in row order. Thus, if i<j, all elements 
	#	of row i precede elements from row j. The order of elements within the 
	#	rows is immaterial. 
	# ja: Object of class integer, an integer array of nnz elements containing 
	#	the column indices of the elements stored in  ra. 
	# ia: Object of class integer, an integer array of n+1 elements containing 
	#	pointers to the beginning of each row in the arrays  ra  and  ja . 
	#	Thus  ia[i]  indicates the position in the arrays  ra  and  ja  where 
	#	the ith row begins. The last, (n+1)st, element of  ia  indicates 
	# 	where the n+1 row would start, if it existed. 
	# dimension: Object of class integer, dimension of the matrix
	
	# number of observations in last level of factor 2
	nols <- length(split(D,D[[3]])[[n2]][[1]]) 
	# number of non-zero elements in ra and ja 
	nnz <- (no-nols)*2+nols*n2
	ra=numeric(nnz)
	ja=integer(nnz)
	ia=integer(no+1)
	dimension=integer(2)
	ra[1:((no-nols)*2)] <- 1
	ra[((no-nols)*2+1):nnz] <- rep(c(1,rep(-1,n2-1)),nols)

	ia[1] <- as.integer(1) 
	for(i in 1:(no-nols)) {
		ja[2*i-1] <- as.integer(f1[[i]])
		ja[2*i] <- as.integer(n1+f2[[i]])
		ia[i+1] <- as.integer(ia[i] + 2)
	}
	k <- 2*(no-nols)+1
	for(i in (no-nols+1):no) {
		ia[i+1] <- as.integer(ia[i] + n2)
		ja[k] <- as.integer(f1[[i]])
		ja[(k+1):(k+n2-1)] <- as.integer(c((n1+1):(n1+n2-1)))
		k <- k + n2
	}
	dim <- as.integer(c(no,n1+n2-1))
	ddm <- new("matrix.csr",ra=ra,ja=ja,ia=ia,dimension=dim)
	return(ddm,D=D)
}
# Robust (least absolute deviations LAD/L1) fit of a two-way linear model 
# given a data frame with three columns (x, factor 1, factor 2) or 
# a matrix M where rows of M are ranks of factor 1 levels
# and columns of M are ranks of factor 2 levels, missing values are assumed to be 0.
# No general mean and sum of f2 is constrained to be zero. 
# Estimation method: interior point method in case of dense implementation,
# else Barrodale-Roberts
# Depending on the size of the problem (n>1000) a dense matrix implementation is used.
# Some parameters of the dense algorithm are set according to my experience and should be OK
# for most cases. However, they are only tested for sparse design matrices up to the order
# ~ 90.000x2.900
# Output: p1 : parameter estimations of factor 1 (year effects)
# 		  p2 : parameter estimations of factor 2 (station effects)
#		  resid : residuals
#		  ierr : return code of the l1 estimation
#		  D  : the input as ordered data frame, ordered first after f2 then f1
pheno.lad.fit <- function(D) {
	if(!is.data.frame(D) && !is.matrix(D)) {
		stop("lad.fit: argument must be data frame with 3 columns or matrix")
	}
	if(is.data.frame(D) && length(D)!=3) {
		stop("lad.fit: argument must be data frame with 3 columns or matrix")
	}
	if(is.matrix(D)) {
		D <- matrix2raw(D)
	}

	require(quantreg)
	require(nprq)
	
	D <-  D[order(D[[3]],D[[2]]),]

	s <- factor(D[[3]])
	y <- factor(D[[2]])
	o <- as.vector(D[[1]],"numeric")
	nobs <- length(o)

	if(nobs > 1000) { # sparse matrix implementation
		S <- pheno.ddm(D)
		m <- S$ddm@dimension[2]
	    nnzdmax <- S$ddm@ia[nobs + 1] - 1
		l1fit <- rq.fit.sfn(S$ddm,o,tau=0.5,tmpmax=1000*m,nnzlmax=100*nnzdmax,small=1e-04)
		p1 <- as.vector(l1fit$coef[1:nlevels(y)],"numeric")
		p2 <- as.vector(contr.sum(nlevels(s)) %*% l1fit$coef[(nlevels(y)+1):(nlevels(y)+nlevels(s)-1)],"numeric")
		resid <- S$D[[1]] - l1fit$coef[match(y,levels(y))]
		ierr <- l1fit$ierr 
	}
	else {			# normal fit
		
		ddm <- as.matrix.csr(model.matrix(~ y + s -1, contrasts=list(s=("contr.sum"))))
		l1fit <- rq.fit(ddm,o,tau=0.5,method="br")
		p1 <-  as.vector(l1fit$coef[1:nlevels(y)],"numeric")
		p2 <- as.vector(contr.sum(nlevels(s)) %*% l1fit$coef[(nlevels(y)+1):(nlevels(y)+nlevels(s)-1)],"numeric")
		resid <- as.vector(residuals(l1fit),"numeric")
		ierr <- NA
	}
	return(p1,p2,resid,ierr,D)
}
# Fits a two-way linear mixed model given a data frame with three columns
# (x, factor 1, factor 2) or a matrix M where rows of M are ranks of factor 1 levels
# and columns of M are ranks of factor 2 levels, missing values are assumed to be 0.
# The model assumes the first factor f1 to be fixed and the second factor f2 to
# be random. Errors are assumed to be i.i.d. No general mean and sum of f2 is constrained
# to be zero. Estimation method: restricted maximum likelihood (REML)
# Output: fixed : fixed effects
#		  random: random effects
#		  resid : residuals
#		  SEf1  : standard error group f1 (square root of variance component fixed effect)
#		  SEf2  : standard error group f2 (square root of variance component random effect)
#		  lclf  : lower 95% confidence limit of fixed effects 
#		  uclf  : upper 95% confidence limit of fixed effects 
pheno.mlm.fit <- function(D) {
	if(!is.data.frame(D) && !is.matrix(D)) {
		stop("mlm.fit: argument must be data frame with 3 columns or matrix")
	}
	if(is.data.frame(D) && length(D)!=3) {
		stop("mlm.fit: argument must be data frame with 3 columns or matrix")
	}
	if(is.matrix(D)) {
		D <- matrix2raw(D)
	}

	require(nlme)

	s <- factor(D[[3]])
	y <- factor(D[[2]])
	o <- as.vector(D[[1]],"numeric")
	remlfit <- lme(o ~ y - 1 ,random = ~ 1 | s, method="REML", contrasts=list(s=("contr.sum")),na.action=na.exclude)

	fixed <- as.vector(fixed.effects(remlfit),"numeric")
	random <- as.vector(random.effects(remlfit)[[1]],"numeric")
	resid <- as.vector(residuals(remlfit),"numeric")
	SEf1 <-  summary(remlfit)$sigma
	SEf2 <-   attr(remlfit$apVar,"Pars")[[2]]
	lclf <- as.vector(intervals(remlfit)$fixed[,1],"numeric")
	uclf <- as.vector(intervals(remlfit)$fixed[,3],"numeric")
	return(fixed,random,resid,SEf1,lclf,uclf)
}
# Converts a numeric data frame D with three columns (x, factor 1, factor 2)
# in a matrix M where rows are ranks of levels of factor 1 and columns are
# ranks of levels of factor 2, missing values are set to 0.
raw2matrix <- function(D) {
	if(!is.data.frame(D) || length(D) != 3) 
		stop("raw2matrix: argument must be data frame with 3 fields. Exiting ...")

	n <- length(rownames(D))
	f1 <- factor(D[[2]])
	nrows <- nlevels(f1)
	f2 <- factor(D[[3]])
	ncols <- nlevels(f2)
	M <- matrix(nrow=nrows,ncol=ncols)
	for( k in 1:n) {
		i <- f1[[k]]; j <- f2[[k]]; M[i,j] <- D[k,1]
	}
	for(i in 1:nrows) {
		for(j in 1:ncols) { if(is.na(M[i,j])) M[i,j] <- 0 }
	}
	return(M)
}
# sequential Mann-Kendall test on rank time series (after Sneyers 1990)
# detects approximate potential trend turning points in time series
# returns the progressive and retrograde time series of Kendall's
# normalized tau.
# input: vector x of the time series, length n
# output : progressive/retrograde series, length n-1 + NA at the beginning/end,
# and a vector of indices of the original times where potential approximate
# trend turning points are situated
seqMK <- function(x) {
	n <- length(x)
	y <- rev(x)
	prog <- retr <- vector("numeric",n)
	tp <- vector("logical",n)
	prog[1] <- retr[1] <- NA
	tp[1] <- tp[n] <- FALSE
	if(n < 2)
	    stop("seqMK: not enough finite observations")
	# progressive and retrograde series
	for (i in 2:n) {
		prog[i] <- tau(x[1:i])
		retr[i] <- tau(y[1:i])
	}
	retr <- rev(retr)
	diff <- prog-retr
	# index vector of crossing points
	for (i in 2:(n-2)) {
		if(sign(diff[i])==sign(diff[i+1])) { tp[i+1] <- FALSE }
		else { tp[i+1] <- TRUE }
	}
	return(prog,retr,tp)
}
# Kendall's normalized tau for time series x
# modified after (Kendall and Gibbons 1990)
# input: vector x of the time series
# output : tau
tau <- function(x) {
	n <- length(x)
	if(n < 2)
	    stop("tau: not enough finite observations")
	rankx <- rank(x)
	S <- 0
	for (i in 2:n-1) {
		for (j in i:n) {
			S <- S + sign(rankx[j]-rankx[i])
		}
	}
	ntg <- length(unique(rankx)) # number of tied groups
	b <- rep(1,ntg)
	j <- 0
	for (i in 1:n) { # number of values in each tied group
		if(duplicated(sort(rankx))[i]) {  b[j] <- b[j]+1 }
		else { j <- j+1 }
	}
	tmp <- 0
	for (i in 1:ntg) { tmp <- tmp + b[i]*(b[i]-1)*(2*b[i]+5) }
	# variance of S
	var <- 1/18*(n*(n-1)*(2*n+5)-tmp)
	if(S == 0) { t <- 0 }
	else {
		if(S > 0) { t <- (S-1)/sqrt(var) }
		else  	  { t <- (S+1)/sqrt(var) }
	}
	return(t)
}
