.packageName <- "maxstat"

corrmsrs <- function(X, minprop=0.1, maxprop=0.9) {
  if (is.vector(X)) X <- as.matrix(X)
  if (!is.matrix(X) && !is.data.frame(X)) 
    stop("X is not a matrix nor data.frame")
  if (minprop < 0 || maxprop > 1 || minprop > maxprop) 
    stop("minprop/maxprop are not correct proportions")
  ilist <- vector(ncol(X), mode="list")
  for (i in 1:ncol(X))
    ilist[[i]] <- irank(as.numeric(X[,i]))
  a <- .Call("newcorr", ilist=ilist, as.double(c(minprop, maxprop)),
             PACKAGE="maxstat")
  corrm <- a[[1]]
  coldel <- a[[2]]
  rowdel <- a[[3]]
  corrm <- corrm[rowdel == 0, coldel == 0]
  corrm
}
# $Id: maxstat.R,v 1.51 2005/01/24 13:52:29 hothorn Exp $


print.maxtest <- function(x, digits = 4, ...) {
  x$stats <- NULL
  x$cuts <- NULL
  x$quant <- NULL
  x$method <- paste("Maximally selected", x$smethod,
                    "statistics using",
                    x$pmethod, collapse=" ")

  class(x) <- "htest"
  print(x, digits = digits, quote = TRUE, prefix = "", ...)
} 

print.mmaxtest <- function(x, digits = 4, ...) {
  cat("\n\t Optimally Selected Prognostic Factors \n\n")
  cat("Call: ")
  print(x$call)
  cat("\n")
  cat("\n Selected: \n")
  p.value <- x$p.value
  sx <- x$maxstats[[x$whichmin]]
  sx$method <- x$method
  sx$stats <- NULL
  sx$cuts <- NULL
  sx$quant <- NULL
  sx$method <- paste("Maximally selected", sx$smethod, "statistics using",
                      sx$pmethod, collapse=" ")
  class(sx) <- "htest"
  print(sx, digits = digits, quote = TRUE, prefix = "", ...)
  cat("Adjusted p.value: \n")
  cat(x$p.value, ", error: ", attr(x$p.value, "error"), "\n\n")
} 

plot.maxtest <- function(x, xlab=NULL, ylab=NULL, ...) {
  xname <- unlist(strsplit(x$data.name, "by"))[2]
  if (is.na(x$quant)) {
    smethod <- x$smethod
    if (smethod == "LogRank") smethod <- "log-rank"
    if (is.null(ylab)) ylab <- paste("Standardized", smethod, "statistic")
    if (is.null(xlab)) xlab <- xname
    plot(x$cuts, x$stats, type="b", xlab=xlab, ylab=ylab, ...)
    lines(c(x$estimate, x$estimate), c(0, x$statistic), lty=2)
  } else {
    smethod <- gsub("LogRank", "log-rank", x$smethod)
    ylim <- c(min(x$quant, min(x$stats)), max(x$quant, max(x$stats)))
    ylim <- c(ylim[1]*0.95, ylim[2]*1.05)
    xlength <- range(x$cuts)
    if (is.null(ylab)) ylab <- paste("Standardized", smethod,
                                     "statistic using", x$pmethod)
    if (is.null(xlab)) xlab <- xname
    plot(x$cuts, x$stats, type="b", xlab=xlab, ylab=ylab, ylim=ylim, ...)
    lines(c(x$estimate, x$estimate), c(0, x$statistic), lty=2)
    lines(xlength, c(x$quant, x$quant), col="red")
  }
}

plot.mmaxtest <- function(x, xlab=NULL, ylab=NULL, nrow=2, ...) {
  old.par <- par(no.readonly=TRUE)
  np <- length(x$maxstats)
  ncol <- ceiling(np/nrow)
  par(mfrow=c(nrow, ncol))
  ylim <- c()
  for (i in 1:np) ylim <- c(ylim, x$maxstats[[i]]$stats) 
  ylim <- range(ylim)
  for (i in 1:np)
    plot(x$maxstats[[i]], xlab=xlab, ylab=ylab, ylim=ylim, ...)
  par(old.par)
}

pLausen92 <- function(b, minprop=0.1, maxprop=0.9)
{
  db <- dnorm(b)
  p <- 4*db/b + db*(b - 1/b)*log((maxprop*(1 - minprop))/((1-maxprop)*minprop))
  max(p,0)
}

qLausen92 <- function(p, minprop=0.1, maxprop=0.9)
{
  test <- function(x)
    abs(pLausen92(x, minprop, maxprop) - p)

  return(optimize(test, interval=c(0,10))$minimum)
}

pLausen94 <- function(b, N, minprop=0.1, maxprop=0.9, m=NULL)
{
  if(is.null(m))
    m <- floor(N*minprop):floor(N*maxprop)
  m1 <- m[1:(length(m)-1)]
  m2 <- m[2:length(m)]
  t <- sqrt(1 - m1*(N-m2)/((N-m1)*m2))
  D <- sum(1/pi*exp(-b^2/2)*(t - (b^2/4 -1)*(t^3)/6))
  1 - (pnorm(b) - pnorm(-b)) + D
}

qLausen94 <- function(p, N, minprop=0.1, maxprop=0.9, m=NULL)
{
  test <- function(x)
    abs(pLausen94(x, N, minprop, maxprop, m) - p)

  return(optimize(test, interval=c(0,10))$minimum)
}

index <- function(x) {
  ux <- unique(x)
  ux <- ux[ux < max(ux)]
  lapply(ux, wh, x = x)
}

wh <- function(cut, x)
  which(x <= cut)

cmatrix <- function(X) {
  N <- nrow(X)
  lindx <- unlist(test <- apply(X, 2, index), recursive=FALSE)
  a <- .Call("corr", as.list(lindx), as.integer(N), PACKAGE="maxstat")
  a
}
  
pexactgauss <- function(b, x, minprop=0.1, maxprop=0.9, ...)
{
  if (!require(mvtnorm)) stop("package mvtnorm not loaded")
  if (length(x) > 1) {
    cm <- corrmsrs(x, minprop, maxprop) 
    if (is.null(dim(cm))) return(pnorm(-b)*2)
    p <- pmvnorm(mean=rep(0, nrow(cm)),
                 corr=cm, lower=rep(-b, nrow(cm)),
               upper=rep(b, nrow(cm)), ...)
    msg <- attr(p, "msg")
    if (msg != "Normal Completion") warning(msg)
    return(1 - p)
  }
  if (length(x) == 1) {
    return(pnorm(-b)*2)
  }
}



qexactgauss <- function(p, x, minprop=0.1, maxprop=0.9,...)
{
  test <- function(a)
    abs(pexactgauss(a, x, minprop=minprop, maxprop=maxprop, ...) - p)

  return(optimize(test, interval=c(0,10))$minimum)
}


pmaxstat <- function(b, scores,  msample, quant=FALSE)
{
  if (!require(exactRankTests)) stop("package exactRankTests not loaded")

  # for integers only

  if (!all(round(scores) == scores))
    stop("scores are not integers in pmaxstat")
  if (length(scores) < length(msample))
    stop("incorrect number of cutpoints in pmaxstat")
  if (length(b) != 1)
    stop("b is not a single number in pmaxstat")

  N <- length(scores)

  scores <- scores - min(scores)

  # sample sizes

  m <- 1:(N-1)

  # Expectation and Variance of a Linear Rank Test

  E <- m/N*sum(scores)
  V <- m*(N-m)/(N^2*(N-1))*(N*sum(scores^2) - sum(scores)^2)
  if (length(msample) == 1) {
    b <- b * sqrt(V[msample]) + E[msample]
    return(pperm(b, scores, m=msample, alternative="two.sided"))
  }

  #if(sum(scores) > sum(1:200)) { 
  #  warning("Cannot compute SR p-value. Sum of scores > 20100")
  #  p <- list(1, 1)
  #  names(p) <- c("upper", "lower")
  #  return(p)
  #}

  H <- rep(0, sum(scores)*N)

  totsum <- sum(scores)
  sc <- rep(1, N)

  # Streitberg / Roehmel in C, package "exactRankTest"

  if (!is.loaded("cpermdist2")) stop("Function cpermdist2 from package exactRankTests not found!")

#  H <- .C("cpermdist2", H = as.double(H), as.integer(N),
#                as.integer(totsum), as.integer(sc),
#                as.integer(scores), as.integer(N),
#                as.integer(length(H)), PACKAGE="exactRankTests")$H

  H <- .Call("cpermdist2", as.integer(N),
                as.integer(totsum), as.integer(sc),
                as.integer(scores), as.logical(FALSE), 
                PACKAGE="exactRankTests")

  # add last row, column for compatibility

  H <- matrix(H, nrow=N+1, byrow=TRUE)

  S <- rep(1:(ncol(H)-1), nrow(H) -2)
  S <- matrix(S, nrow(H) -2, ncol(H)-1, byrow=TRUE)
  EM <- matrix(rep(E, ncol(H) -1), nrow(H) -2, ncol(H) - 1)
  VM <- matrix(rep(V, ncol(H) -1), nrow(H) -2, ncol(H) - 1 )
  S <- (S- E)/sqrt(V)

  # remove technical parts

  H <- H[2:(nrow(H)-1), ]
  H <- H[, 2:(ncol(H))]

  # S is the matrix of the standardized values

  S <- abs(S)
  S[H == 0] <- 0

  # extend to number of permutations

  H <- H*gamma(m+1)*gamma(N -m +1)

  if (quant)
    return(list(scores=scores, H=H, E=E, S=S, msample=msample, N=N))

  # those are in general not needed

  H[S <= b] <- 0


  # delete those, which are in m+1 and + max(scores) still > b
  # well, that's the trick

  sH <- apply(H, 1, sum)

  for (i in min(msample):(nrow(H)-1)) {
    indx <- which(H[i,] > 0)
    if (length(indx) > 0) {
      indxmax <- indx[indx < E[i]]
      indxmax <- indxmax[S[i+1, indxmax + max(scores)] > b]
      if (length(indxmax) > 0 & all(!is.na(indxmax)))
        sH[i+1] <- sH[i+1] - sum(H[i, indxmax]) 
      indxmin <- indx[indx > E[i]]
      indxmin <- indxmin[S[i+1, indxmin + min(scores)] > b]
      if (length(indxmin) > 0 & all(!is.na(indxmin)))
        sH[i+1] <- sH[i+1] - sum(H[i, indxmin])
    }
  }

  # only meaningful sample sizes

  sH <- sH[msample]

  gaN <- gamma(N+1)   
  lower <- min(sum(sH)/gaN, 1)  # <- this is a better approx.
  #  upper <- max(apply(H, 1, sum))/gaN
  #  p <- list(upper, lower)
  #  names(p) <- c("upper", "lower")
  #  cat("hl working: ", all.equal(hl(scores, H, E, S, msample, N, b), p), "\n")
  lower
}

qmaxstat <- function(p, scores, msample)
{
  if (p > 1 | p < 0) stop("p not in [0,1]")
  sr <- pmaxstat(3, scores, msample, quant=TRUE)
  tr <- rev(sort(unique(round(sr$S,5))))
  i <- 1
  pw <- 0
  while (pw < p) {
    pw <- hl(sr$scores, sr$H, sr$E, sr$S, sr$msample, sr$N, tr[i])$lower
    i <- i+1
  }
  return(tr[i-1])
}


hl <- function(scores, H, E, S, msample, N, b)
{
  H[S <= b] <- 0

  # delete those, which are in m+1 and + max(scores) still > b
  # well, that's the trick

  sH <- apply(H, 1, sum)

  for (i in min(msample):(nrow(H)-1)) {
    indx <- which(H[i,] > 0)
    if (length(indx) > 0) {
      indxmax <- indx[indx < E[i]]
      indxmax <- indxmax[S[i+1, indxmax + max(scores)] > b]
      if (length(indxmax) > 0 & all(!is.na(indxmax)))
        sH[i+1] <- sH[i+1] - sum(H[i, indxmax]) 
      indxmin <- indx[indx > E[i]]
      indxmin <- indxmin[S[i+1, indxmin + min(scores)] > b]
      if (length(indxmin) > 0 & all(!is.na(indxmin)))
        sH[i+1] <- sH[i+1] - sum(H[i, indxmin])
    }
  }

  # only meaningful sample sizes

  sH <- sH[msample]

  gaN <- gamma(N+1)   
  lower <- min(sum(sH)/gaN, 1)  # <- this is a better approx.
  upper <- max(apply(H, 1, sum))/gaN
  p <- list(upper, lower)
  names(p) <- c("upper", "lower")
  p
}

### just internal functions, not exported (yet), so less sanity checking...

pmaxperm <- function(b, scores, msample, expect,
                     variance, B = 10000, ...) {
  N <- length(scores)
  if (any(msample) > N) stop("invalid split points in msample")
  p <- .Call("maxstatpermdist", scores = as.double(scores),
                           msample = as.integer(msample),
                           expect = as.double(expect),
                           variance = as.double(variance),
                           Nsim = as.integer(B),    
                           pvalonly = as.logical(TRUE),
                           ostat = as.double(b), PACKAGE = "maxstat")
  p
}

qmaxperm <- function(p, scores, msample, expect,
                     variance, B = 10000, ...) {
  N <- length(scores)
  if (length(p) > 1 || p > 1 || p < 0) stop("p must be in [0,1]")
  if (any(msample) > N) stop("invalid split points in msample")
  cp <- .Call("maxstatpermdist", scores = as.double(scores),
                           msample = as.integer(msample),
                           expect = as.double(expect),
                           variance = as.double(variance),
                           Nsim = as.integer(B),    
                           pvalonly = as.logical(FALSE),
                           ostat = NULL, PACKAGE = "maxstat")
  names(cp) <- c("T", "Prob")
  # class(cp) <- c("data.frame", "excondens")
  cs <- cumsum(cp$Prob) 
  quant <- max(cp$T[which(cs <= p)])
  RET <- list(quant = quant, exdens = cp)
  return(RET)
}
# $Id: maxstat.test.R,v 1.17 2003/06/15 15:50:56 hothorn Exp $

maxstat.test <- function(formula, data, ...) 
  UseMethod("maxstat.test", data)

maxstat.test.default <- function(formula, data, ...)
 stop(paste("Do not know how to handle objects of class", class(data)))

maxstat.test.data.frame <-
function(formula, data, subset, na.action, ...)
{
    cl <- match.call()
    if(missing(formula)
       || (length(formula) != 3)
       || (length(attr(terms(formula[-3]), "term.labels")) != 1))
        stop("formula missing or incorrect")
    if(missing(na.action))
        na.action <- getOption("na.action")
    m <- match.call(expand.dots = FALSE)
    mt <- terms(formula, data=data)
    if(is.matrix(eval(m$data, parent.frame())))
        m$data <- as.data.frame(data)
    m[[1]] <- as.name("model.frame")
    m$... <- NULL
    mf <- eval(m, parent.frame())
    response <- attr(attr(mf, "terms"), "response")
    progfact <- attr(attr(mf, "terms"), "term.labels")
    MULTIMAX <- (length(progfact) > 1)
    RNAME <- names(mf)[response]
    PNAMES <- names(mf)[-response]
    X <- mf[,progfact] 
    y <- mf[[response]]
    arg <- list(...)
    DATA <- list(y=y, x=X)
    names(DATA) <- c("y", "x")
    mod <- do.call("maxstat", c(DATA, arg))
    if (MULTIMAX) {
      mod$maxstats <- sapply(mod$maxstats, function(x) { 
                             x$data.name <- paste(c(RNAME, x$data.name), 
                               collapse=" by "); list(x); } )
      mod$data.name <- paste(RNAME, paste(PNAMES, collapse=" + "), collapse
                             = " by ")
    } else {
      mod$data.name <- paste(c(RNAME, PNAMES), collapse=" by ")
    }
    mod$call <- cl
    return(mod)
}


maxstat <- function(y, x=NULL, smethod=c("Wilcoxon", 
         "Median", "NormalQuantil","LogRank", "Data"), 
         pmethod=c("none", "Lau92", "Lau94", "exactGauss", "HL", "condMC", 
         "min"), 
         iscores=(pmethod == "HL"), minprop=0.1, maxprop=0.9,  
         alpha=NULL, keepxy=TRUE, ...) 
{

  if (is.null(x)) stop("no data given")
  MULTIMAX <- is.matrix(x) || is.data.frame(x)
  smethod <- match.arg(smethod)
  pmethod <- match.arg(pmethod)
  
  scores <- cscores(y, type=smethod, int=FALSE)
  if (iscores & sum(scores - floor(scores)) != 0) {
    # check for midranks (Wilcoxon z.B.)
    fscore <- scores - floor(scores)
    if (all(fscore[fscore != 0] == 0.5))
      scores <- 2*scores
    else {
      # and handle real scores the way Hothorn & Lausen 2002 suggest
      scores <- scores - min(scores)
      scores <- round(scores*length(scores)/max(scores))
    }
  }

  if (MULTIMAX) {
    if (pmethod=="none") 
      stop("pmethod not specified.")
    if (!is.null(alpha) & MULTIMAX) 
      warning("cannot compute quantiles for more than one variable")
    mmax <- vector(mode="list", length=ncol(x))
    pvalues <- rep(0, ncol(x))
    statistics <- rep(0, ncol(x))
    for (i in 1:ncol(x)) {
      mmax[[i]] <- cmaxstat(scores, x[,i], pmethod, minprop, 
                           maxprop, alpha, ...)
      mmax[[i]]$data.name <- colnames(x)[i]
      mmax[[i]]$smethod <- smethod
      mmax[[i]]$pmethod <- pmethod 
      pvalues[i] <- mmax[[i]]$p.value
      statistics[i] <- mmax[[i]]$statistic
    }
    STATISTIC <- max(statistics)
    # do not use pexactgauss, we need cm here
    cm <- corrmsrs(x, minprop, maxprop)
    p <- pmvnorm(lower=-STATISTIC, upper=STATISTIC, mean=rep(0,ncol(cm)),
                 corr=cm, ...)
    if (attr(p, "msg") != "Normal Completion") {
      msg <- paste("pvmnorm: ", attr(p, "msg"), collapse=" ")
      warning(msg)
    }
    RET <- list(maxstats=mmax, whichmin=which.min(pvalues), p.value=1 - p,
    cm=cm, univp.values=pvalues)
    class(RET) <- "mmaxtest"
  } else {
    RET <- cmaxstat(scores, x, pmethod, minprop, maxprop, alpha, ...)
  }
  RET$smethod <- smethod
  RET$pmethod <- pmethod
  if (keepxy) { 
    RET$x <- x
    RET$y <- y
  } 
  RET
}


cmaxstat <- function(y, x=NULL, pmethod=c("none", "Lau92", "Lau94",
          "exactGauss", "HL", "condMC", "min"), minprop = 0.1, 
          maxprop=0.9, alpha = NULL, ...)
{
  pmethod <- match.arg(pmethod)

  if (is.null(y) || is.null(x)) stop("no data given")

  if (!is.numeric(x)) {
    if (is.factor(x)) {
      if (!(is.ordered(x) || nlevels(x) == 2)) {
        warning("cannot order in x, returning NA")
        return(NA)
      }
    } else {
      warning("cannot order in x, returning NA")
      return(NA)
    }
  }

  xname <- deparse(substitute(x))
  yname <- deparse(substitute(y))  
  if (length(xname) == 1 & length(yname) == 1)
    DNAME <- paste(xname, "and", yname)
  else
    DNAME <- "y by x" 

  N <- length(y)

  y <- y[order(x)]
  x <- sort(x)
  ties <- duplicated(x)

  m <- which(!ties) - 1 
  if (minprop == 0 & maxprop==1) m <- m[2:(length(m)-1)] else {
    if (all(m < floor(N*minprop))) stop("minprop too large")
    if (all(m > floor(N*maxprop))) stop("maxprop too small")
    m <- m[m >= floor(N*minprop)]
    m <- m[m <= floor(N*maxprop)]
  }

  if(length(m) < 1) stop("no data between minprop, maxprop")

  ss <- sum(y)
  E <- m/N*ss
  V <- m*(N-m)/(N^2*(N-1))*(N*sum(y^2) - ss^2)

  Test <- abs((cumsum(y)[m] - E)/sqrt(V))

  STATISTIC <- max(Test)
  ESTIMATOR <- x[m[min(which(Test == STATISTIC))]]
  names(STATISTIC) <- "M"
  names(ESTIMATOR) <- c("estimated cutpoint")

  if (is.null(alpha)) QUANT <- NA

  if (pmethod == "none") {
    PVAL <- NA
    QUANT <- NA
  }
  if (pmethod == "Lau92") {
    PVAL <- pLausen92(STATISTIC, minprop, maxprop)
    if (!is.null(alpha))
      QUANT <- qLausen92(alpha, minprop, maxprop)
  }
  if (pmethod == "Lau94") {
    PVAL <- pLausen94(STATISTIC, N, minprop, maxprop, m=m)
    if (!is.null(alpha))
       QUANT <- qLausen94(alpha, N, minprop, maxprop, m=m)
  }
  if (pmethod == "exactGauss") {
    PVAL <- pexactgauss(STATISTIC, x, minprop, maxprop, ...)
    if (!is.null(alpha))
       QUANT <- qexactgauss(alpha, x, minprop, maxprop, ...)
  }
  if (pmethod == "HL") {
    PVAL <- pmaxstat(STATISTIC, y, m)
    if (!is.null(alpha))
       QUANT <- qmaxstat(alpha, y, m)
  }
  if (pmethod == "condMC") {
    if (!is.null(alpha)) {
       maxdens <- qmaxperm(alpha, y, m, E, V, ...)
       QUANT <- maxdens$quant
       PVAL <- sum(maxdens$exdens$Prob[maxdens$exdens$T > STATISTIC])
    } else { 
      PVAL <- pmaxperm(STATISTIC, y, m, E, V, ...)
    }
  }

  if (pmethod == "min") {
    PVAL <- min(pLausen92(STATISTIC, minprop, maxprop), 
                pLausen94(STATISTIC, N, minprop, maxprop, m=m),  
                pexactgauss(STATISTIC, x, minprop, maxprop, ...),
                pmaxstat(STATISTIC, y, m))
    if (!is.null(alpha))
       QUANT <- NA
  } 

  RVAL <- list(statistic = STATISTIC, p.value = PVAL,
               method = pmethod,
               estimate = ESTIMATOR, data.name = DNAME,
               stats = Test, cuts = x[m], quant = QUANT)
  class(RVAL) <- "maxtest"
  RVAL
}
.onLoad <- function(lib, pkg) {
    if(!require(survival))
        warning("Could not load package survival")
}
