.packageName <- "corpora"
binom.pval <- function (k, n, p=.5, alternative=c("two.sided", "less", "greater")) {
  alternative <- match.arg(alternative)
  if (any(k < 0) || any(k > n) || any(n < 1)) stop("arguments must be integer vectors with 0 <= k <= n")
  if (any(p < 0) || any(p > 1)) stop("null hypothesis proportion p must be in range [0,1]")
  
  pval <- switch(alternative,
                 two.sided = 2 * pmin(pbinom(k, n, p, lower.tail=TRUE), pbinom(k-1, n, p, lower.tail=FALSE)),
                 less      = pbinom(k, n, p, lower.tail=TRUE),
                 greater   = pbinom(k-1, n, p, lower.tail=FALSE))
  pval <- pmax(0, pmin(1, pval))        # clamp p-value to range [0,1] (may be > 1 in two-sided approximation)
  pval
}
chisq <- function (k1, n1, k2, n2, correct=TRUE, one.sided=FALSE) {
  if (any(k1 < 0) || any(k1 > n1) || any(n1 <= 0)) stop("k1 and n1 must be integers with 0 <= k1 <= n1")
  if (any(k2 < 0) || any(k2 > n2) || any(n2 <= 0)) stop("k2 and n2 must be integers with 0 <= k2 <= n2")
  if (any(k1 + k2 <= 0)) stop("either k1 or k2 must be non-zero")

  l <- max(length(k1), length(n1), length(k2), length(n2)) # ensure that all vectors have the same length
  if (length(k1) < l) k1 <- rep(k1, length.out=l)
  if (length(n1) < l) n1 <- rep(n1, length.out=l)
  if (length(k2) < l) k2 <- rep(k2, length.out=l)
  if (length(n2) < l) n2 <- rep(n2, length.out=l)

  k1 <- as.numeric(k1)                  # force integer -> float conversion to avoid overflow in multiplication below
  n1 <- as.numeric(n1)
  k2 <- as.numeric(k2)
  n2 <- as.numeric(n2)
  
  O11 <- k1                             # construct "observed" contingency table
  O21 <- n1 - k1
  O12 <- k2
  O22 <- n2 - k2

  R1 <- O11 + O12                       # compute row/column sums and sample size
  R2 <- O21 + O22
  C1 <- n1
  C2 <- n2
  N <- n1 + n2

  ## common form for homogeneity test with Yates' correction (Evert 2004, p.82)
  term <- abs(O11 * O22 - O12 * O21)
  if (correct) term <- pmax(term - N/2, 0)
  X2 <- (N * term^2) / (R1 * R2 * C1 * C2)

  # approximate one-sided chi-squared statistic as signed root of X2 (-> standard normal distribution)
  if (one.sided) {                      
    X2 <- sign(k1/n1 - k2/n2) * sqrt(X2)
  }

  X2
}
chisq.pval <- function (k1, n1, k2, n2, correct=TRUE,
                        alternative=c("two.sided", "less", "greater")) {
  alternative <- match.arg(alternative)
  
  if (alternative == "two.sided") {
    X2 <- chisq(k1, n1, k2, n2, correct=correct)
    pval <- pchisq(X2, df=1, lower.tail=FALSE)
  } else {
    z <- chisq(k1, n1, k2, n2, correct=correct, one.sided=TRUE)
    pval <- pnorm(z, lower.tail = (alternative == "less"))
  }

  pval
}
cont.table <- function (k1, n1, k2, n2) {
  if (max(length(k1), length(n1), length(k2), length(n2)) > 1) stop("this function does not accept vector arguments")
  if (any(k1 < 0) || any(k1 > n1) || any(n1 <= 0)) stop("k1 and n1 must be integers with 0 <= k1 <= n1")
  if (any(k2 < 0) || any(k2 > n2) || any(n2 <= 0)) stop("k2 and n2 must be integers with 0 <= k2 <= n2")
  if (any(k1 + k2 <= 0)) stop("either k1 or k2 must be non-zero")

  matrix(c(k1, n1-k1, k2, n2-k2), nrow=2, byrow=FALSE)
}
fisher.pval <- function (k1, n1, k2, n2,
                        alternative=c("two.sided", "less", "greater")) {
  alternative <- match.arg(alternative)
  
  if (any(k1 < 0) || any(k1 > n1) || any(n1 <= 0)) stop("k1 and n1 must be integers with 0 <= k1 <= n1")
  if (any(k2 < 0) || any(k2 > n2) || any(n2 <= 0)) stop("k2 and n2 must be integers with 0 <= k2 <= n2")
  if (any(k1 + k2 <= 0)) stop("either k1 or k2 must be non-zero")

  l <- max(length(k1), length(n1), length(k2), length(n2)) # ensure that all vectors have the same length
  if (length(k1) < l) k1 <- rep(k1, length.out=l)
  if (length(n1) < l) n1 <- rep(n1, length.out=l)
  if (length(k2) < l) k2 <- rep(k2, length.out=l)
  if (length(n2) < l) n2 <- rep(n2, length.out=l)

  k <- k1 + k2

  pval <- switch(alternative,
                 greater   = phyper(k1 - 1, n1, n2, k, lower.tail=FALSE),
                 less      = phyper(k1, n1, n2, k, lower.tail=TRUE),
                 two.sided = 2 * pmin(phyper(k1 - 1, n1, n2, k, lower.tail=FALSE), phyper(k1, n1, n2, k, lower.tail=TRUE)))
  pval <- pmax(0, pmin(1, pval))        # clamp p-value to range [0,1] (may be > 1 in two-sided approximation)
  pval
}
prop.cint <- function(k, n, method=c("binomial", "z.score"), correct=TRUE,
                      conf.level=0.95, alternative=c("two.sided", "less", "greater")) {
  method <- match.arg(method)
  alternative <- match.arg(alternative)
  if (any(k < 0) || any(k > n) || any(n < 1)) stop("arguments must be integer vectors with 0 <= k <= n")
  if (any(conf.level <= 0) || any(conf.level > 1)) stop("conf.level must be in range [0,1]")

  l <- max(length(k), length(n), length(conf.level)) # ensure that all vectors have the same length
  if (length(k) < l) k <- rep(k, length.out=l)
  if (length(n) < l) n <- rep(n, length.out=l)
  if (length(conf.level) < l) conf.level <- rep(conf.level, length.out=l)

  if (method == "binomial") {
    ## compute binomial confidence interval (using incomplete Beta function)
    alpha <- if (alternative == "two.sided") (1 - conf.level) / 2 else (1 - conf.level)
    lower <- qbeta(alpha, k, n - k + 1)
    upper <- qbeta(1 - alpha, k + 1, n - k)
    cint <- switch(alternative,
                   two.sided = data.frame(lower = lower, upper = upper),
                   less      = data.frame(lower = 0,     upper = upper),
                   greater   = data.frame(lower = lower, upper = 1))
  } else {
    ## compute z-score confidence interval (by solving quadratic z-test equation for p)
    alpha <- if (alternative == "two.sided") (1 - conf.level) / 2 else (1 - conf.level)
    z <- qnorm(alpha, lower.tail=FALSE) # z-score corresponding to desired confidence level
    yates <- if (correct) 0.5 else 0.0  # whether to apply Yates' correction
    
    k.star <- k - yates                 # lower boundary of confidence interval (solve implicit equation for z-score test)
    k.star <- pmax(0, k.star)           # Yates' correction cannot be satisfied at boundary of valid range for k
    A <- n + z^2                        # coefficients of quadratic equation that has to be solved
    B <- -2 * k.star - z^2
    C <- k.star^2 / n
    lower <- solve.quadratic(A, B, C, nan.lower=0)$lower

    k.star <- k + yates                 # upper boundary of confidence interval
    k.star <- pmin(n, k.star)
    A <- n + z^2
    B <- -2 * k.star - z^2
    C <- k.star^2 / n
    upper <- solve.quadratic(A, B, C, nan.upper=1)$upper

    cint <- switch(alternative,
                   two.sided = data.frame(lower = lower,    upper = upper),
                   less      = data.frame(lower = rep(0,l), upper = upper),
                   greater   = data.frame(lower = lower,    upper = rep(1,l)))
  }

  cint
}
##
## internal helper functions for quadratic equations
##

## solve quadratic equation a x^2 + b x + c = 0 for vectors of coefficients
solve.quadratic <- function (a, b, c, nan.upper=NA, nan.lower=NA) {
  d <- b * b - 4 * a * c                # discriminant
  data.frame(lower = ifelse(d < 0, rep(nan.upper, length(d)), (-b - sqrt(d)) / (2*a)),
             upper = ifelse(d < 0, rep(nan.lower, length(d)), (-b + sqrt(d)) / (2*a)))
}
rel.risk.cint <- function(k1, n1, k2, n2, 
                          conf.level=0.95, alternative=c("two.sided", "less", "greater"),
                          method=c("binomial", "z.score"), correct=TRUE) {
  method <- match.arg(method)
  alternative <- match.arg(alternative)
  if (any(k1 < 0) || any(k1 > n1) || any(n1 <= 0)) stop("k1 and n1 must be integers with 0 <= k1 <= n1")
  if (any(k2 < 0) || any(k2 > n2) || any(n2 <= 0)) stop("k2 and n2 must be integers with 0 <= k2 <= n2")
  if (any(k1 + k2 <= 0)) stop("either k1 or k2 must be non-zero")
  if (any(conf.level <= 0) || any(conf.level > 1)) stop("conf.level must be in range [0,1]")

  conf.level <- sqrt(conf.level)        # adjust conf.level to ensure relative risk ratio is in computed range

  ## compute individual confidence intervals for population proportions p1 and p2
  p1 <- prop.cint(k1, n1, method=method, correct=correct, conf.level=conf.level,
                  alternative=switch(alternative, two.sided="two.sided", less="less", greater="greater"))
  p2 <- prop.cint(k2, n2, method=method, correct=correct, conf.level=conf.level,
                  alternative=switch(alternative, two.sided="two.sided", less="greater", greater="less"))
  ## note that one-sided confidence intervals are swapped for p2 (because r <= A/B if p1 <= A and p2 >= B)
  l1 <- nrow(p1)
  l2 <- nrow(p2)
  if (l1 != l2) stop("(k1,n1) and (k2,n2) must have the same length")
  
  ## derive conservative confidence interval for the ratio r = p1/p2
  if (alternative == "two.sided") {
    upper <- ifelse(p2$lower >= 0, p1$upper / p2$lower, Inf)
    lower <- ifelse(p2$upper >= 0, p1$lower / p2$upper, Inf)
  }
  else if (alternative == "less") {
    upper <- ifelse(p2$lower >= 0, p1$upper / p2$lower, Inf)
    lower <- rep(0, l1)
  } else {
    upper <- rep(Inf, l1)
    lower <- ifelse(p2$upper >= 0, p1$lower / p2$upper, Inf)
  }
  
  data.frame(lower=lower, upper=upper)
}
z.score <- function (k, n, p=.5, correct=TRUE) {
  if (any(k < 0) || any(k > n) || any(n < 1)) stop("arguments must be integer vectors with 0 <= k <= n")
  if (any(p < 0) || any(p > 1)) stop("null hypothesis proportion p must be in range [0,1]")

  l <- max(length(k), length(n), length(p)) # ensure that all vectors have the same length
  if (length(k) < l) k <- rep(k, length.out=l)
  if (length(n) < l) n <- rep(n, length.out=l)
  if (length(p) < l) p <- rep(p, length.out=l)

  expected <- n * p                     # compute z-score (with optional Yates' correction)
  variance <- n * p * (1-p)
  d <- k - expected
  if (correct) d <- sign(d) * pmax(0, abs(d) - .5)
  z <- d / sqrt(variance)
  z
}
z.score.pval <- function (k, n, p=.5, correct=TRUE, alternative=c("two.sided", "less", "greater")) {
  alternative <- match.arg(alternative)
  
  z <- z.score(k, n, p=p, correct=correct)
  pval <- switch(alternative,
                 two.sided = 2 * pnorm(abs(z), lower.tail=FALSE),
                 less      = pnorm(z, lower.tail=TRUE),
                 greater   = pnorm(z, lower.tail=FALSE))
  pval
}
