.packageName <- "polycor"
# last modified 5 Dec 04 by J. Fox

"hetcor" <-
function(data, ..., ML=FALSE, std.err=TRUE, bins=4){
  UseMethod("hetcor")
  }
# last modified 12 Dec 04 by J. Fox

"hetcor.data.frame" <-
function(data, ML=FALSE, std.err=TRUE, use=c("complete.obs", "pairwise.complete.obs"), bins=4, ...){
  se.r <- function(r, n){
    rho <- r*(1 + (1 - r^2)/(2*(n - 3))) # approx. unbiased estimator
    v <- (((1 - rho^2)^2)/(n + 6))*(1 + (14 + 11*rho^2)/(2*(n + 6)))
    sqrt(v)
    }
  use <- match.arg(use)
  if (class(data) != "data.frame") stop("argument must be a data frame.")
  if (use == "complete.obs") data <- na.omit(data)
  p <- length(data)
  if (p < 2) stop("fewer than 2 variables.")
  R <- matrix(1, p, p)
  Type <- matrix("", p, p)
  SE <- matrix(0, p, p)
  N <- matrix(0, p, p)
  Test <- matrix(0, p, p)
  diag(N) <- if (use == "complete.obs") nrow(data)
             else sapply(data, function(x) sum(!is.na(x)))
  for (i in 2:p) {
    for (j in 1:(i-1)){
      x <- data[[i]]
      y <- data[[j]]
      if (inherits(x, c("numeric", "integer")) && inherits(y, c("numeric", "integer"))) {
         r <- cor(x, y, use="complete.obs")
         Type[i, j] <- Type[j, i] <- "Pearson"
         R[i, j] <- R[j, i] <- r
         if (std.err) {
           n <- sum(complete.cases(x, y))
           SE[i, j] <- SE[j, i] <- se.r(r, n)
           N[i, j] <- N[j, i] <- n
           Test[i, j] <- pchisq(chisq(x, y, r, bins=bins), bins^2 - 2, lower.tail=FALSE)
           }
         }
      else if (inherits(x, "factor") && inherits(y, "factor")) {
         Type[i, j] <- Type[j, i] <- "Polychoric"
         result <- polychor(x, y, ML=ML, std.err=std.err)
         if (std.err){
           n <- sum(complete.cases(x, y))
           R[i, j] <- R[j, i] <- result$rho
           SE[i, j] <- SE[j, i] <- sqrt(result$var[1,1])
           N[i, j] <- N[j, i] <- n
           Test[i, j] <- pchisq(result$chisq, result$df, lower.tail=FALSE)
           }
         else R[i, j] <- R[j, i] <- result
         }
       else {
         if (inherits(x, "factor") && inherits(y, c("numeric", "integer")))
           result <- polyserial(y, x, ML=ML, std.err=std.err, bins=bins)
         else if (inherits(x, c("numeric", "integer")) && inherits(y, "factor"))
           result <- polyserial(x, y, ML=ML, std.err=std.err, bins=bins)
         else {
             stop("columns must be numeric or factors.")
             }
         Type[i, j] <- Type[j, i] <- "Polyserial"
         if (std.err){
           n <- sum(complete.cases(x, y))
           R[i, j] <- R[j, i] <- result$rho
           SE[i, j] <- SE[j, i] <- sqrt(result$var[1,1])
           N[i, j] <- N[j, i] <- n
           Test[i, j] <- pchisq(result$chisq, result$df, lower.tail=FALSE)
           }
         else R[i, j] <- R[j, i] <- result
         }
       }
     }
   rownames(R) <- colnames(R) <- names(data)
   result <- list(correlations=R, type=Type, NA.method=use, ML=ML)
   if (std.err) {
     rownames(SE) <- colnames(SE) <- names(data)
     rownames(N) <- colnames(N) <- names(N)
     rownames(Test) <- colnames(Test) <- names(data)
     result$std.errors <- SE
     result$n <- if (use == "complete.obs") n else N
     result$tests <- Test
     }
   class(result) <- "hetcor"
   result
   }
# last modified 5 Dec 04 by J. Fox

"hetcor.default" <-
function(data, ..., ML=FALSE, std.err=TRUE, bins=4){
  dframe <- data.frame(data, ...)
  names(dframe)[1] <- deparse(substitute(data))
  hetcor(dframe, ML=ML, std.err=std.err, bins=bins)
  }
# last modified 12 Dec 04 by J. Fox

"polychor" <-
function (x, y, ML=FALSE, control=list(), std.err=FALSE, maxcor=.9999){
  f <- function(pars) {
    if (length(pars) == 1){
       rho <- pars
       if (abs(rho) > maxcor) rho <- sign(rho)*maxcor
       row.cuts <- rc
       col.cuts <- cc
       }
     else {
       rho <- pars[1]
       if (abs(rho) > maxcor) rho <- sign(rho)*maxcor
       row.cuts <- pars[2:r]
       col.cuts <- pars[(r+1):(r+c-1)]
       }
    P <- binBvn(rho, row.cuts, col.cuts)
     - sum(tab * log(P))
    }
  tab <- if (missing(y)) x else table(x, y)
  r <- nrow(tab)
  c <- ncol(tab)
  n <- sum(tab)
  rc <- qnorm(cumsum(rowSums(tab))/n)[-r]
  cc <- qnorm(cumsum(colSums(tab))/n)[-c]
  if (ML) {
    result <- optim(c(optimise(f, interval=c(-1, 1))$minimum, rc, cc), f,
      control=control, hessian=std.err)
    if (std.err) {
      chisq <- 2*(result$value + sum(tab * log((tab + 1e-6)/n)))
      df <- length(tab) - r - c
      result <- list(type="polychoric",
                     rho=result$par[1],
                     row.cuts=result$par[2:r],
                     col.cuts=result$par[(r+1):(r+c-1)],
                     var=solve(result$hessian),
                     n=n,
                     chisq=chisq,
                     df=df,
                     ML=TRUE)
      class(result) <- "polycor"
      return(result)
      }
    else return(as.vector(result$par[1]))
    }
  else if (std.err){
    result <- optim(0, f, control=control, hessian=TRUE, method="BFGS")
    chisq <- 2*(result$value + sum(tab *log((tab + 1e-6)/n)))
    df <- length(tab) - r - c 
    result <- list(type="polychoric",
                     rho=result$par,
                     var=1/result$hessian,
                     n=n,
                     chisq=chisq,
                     df=df,
                     ML=FALSE)
    class(result) <- "polycor"
    return(result)
    }
  else optimise(f, interval=c(-1, 1))$minimum
  }
# last modified 12 Dec 04 by J. Fox

"polyserial" <-
function(x, y, ML=FALSE, control=list(), std.err=FALSE, maxcor=.9999, bins=4){
  f <- function(pars){
    rho <- pars[1]
    if (abs(rho) > maxcor) rho <- sign(rho)*maxcor
    cts <- if (length(pars) == 1) c(-Inf, cuts, Inf)
           else c(-Inf, pars[-1], Inf)
    tau <- (matrix(cts, n, s+1, byrow=TRUE) - matrix(rho*z, n, s+1))/
             sqrt(1 - rho^2)
    - sum(log(dnorm(z)*(pnorm(tau[cbind(indices, y+1)]) - pnorm(tau[cbind(indices, y)]))))
    }
  if (!is.numeric(x)) stop("x must be numeric")
  valid <- complete.cases(x, y)
  x <- x[valid]
  y <- y[valid]
  z <- scale(x)
  tab <- table(y)
  n <- sum(tab)
  s <- length(tab)
  indices <- 1:n
  cuts <- qnorm(cumsum(tab)/n)[-s]
  y <- as.numeric(as.factor(y))
  rho <- sqrt((n - 1)/n)*sd(y)*cor(x, y)/sum(dnorm(cuts))
  if (ML) {
    result <- optim(c(rho, cuts), f, control=control, hessian=std.err)
    if (std.err){
        chisq <- chisq(y, z, result$par[1], result$par[-1], bins=bins)
        df <- s*bins - s  - 1
        result <- list(type="polyserial",
                    rho=result$par[1],
                    cuts=result$par[-1],
                    var=solve(result$hessian),
                    n=n,
                    chisq=chisq,
                    df=df,
                    ML=TRUE)
        class(result) <- "polycor"
        return(result)
        }
    else return(as.vector(result$par[1]))  
    }
  else if (std.err){
    result <- optim(rho, f, control=control, hessian=TRUE, method="BFGS")
    chisq <- chisq(y, z, rho, cuts, bins=bins)
    df <- s*bins - s  - 1
    result <- list(type="polyserial",
                rho=result$par,
                var=1/result$hessian,
                n=n,
                chisq=chisq,
                df=df,
                ML=FALSE)
    class(result) <- "polycor"
    return(result)
    }
  else rho
  }
# last modified 12 Dec 04 by J. Fox

"print.hetcor" <-
function(x, digits = max(3, getOption("digits") - 3), ...){
  R <- signif(x$correlations, digits=digits)
  R[upper.tri(R)] <- x$type[upper.tri(R)]
  R <- as.data.frame(R)
  if (x$ML) cat("\nMaximum-Likelihood Estimates\n")
  else cat("\nTwo-Step Estimates\n")
  cat("\nCorrelations/Type of Correlation:\n")
  print(R)
  if (!is.null(x$std.errors)){
    SE <- signif(x$std.errors, digits)
    diag(SE) <- ""
    if (x$NA.method == "complete.obs"){
      SE[upper.tri(SE)] <- ""
      cat("\nStandard Errors:\n")
      SE <- as.data.frame(SE)
      print(SE[,-ncol(SE)])
      cat(paste("\nn =", x$n, "\n"))
      }
    else {
      SE[upper.tri(SE)] <- x$n[upper.tri(SE)]
      diag(SE) <- diag(x$n)
      SE <- as.data.frame(SE)
      cat("\nStandard Errors/Numbers of Observations:\n")
      print(SE)
      }
    Test <- signif(x$tests, digits)
    Test[upper.tri(Test)] <- ""
    diag(Test) <- ""
    Test <- as.data.frame(Test)
    cat("\nP-values for Tests of Bivariate Normality:\n")
    print(Test[,-ncol(Test)])
    }
  invisible(x)
  }
# last modified 10 Dec 04 by J. Fox

"print.polycor" <-
function(x, digits = max(3, getOption("digits") - 3), ...){
  if (x$type == "polychoric"){
    se <- sqrt(diag(x$var))
    se.rho <- se[1]
    est <- if (x$ML) "ML est." else "2-step est."
    cat("\nPolychoric Correlation, ", est, " = ", signif(x$rho, digits),
      " (", signif(se.rho, digits), ")", sep="")
    cat("\nTest of bivariate normality: Chisquare = ", signif(x$chisq, digits),
      ", df = ", x$df, ", p = ", signif(pchisq(x$chisq, x$df, lower.tail=FALSE), digits),
      "\n", sep="")
    r <- length(x$row.cuts)
    c <- length(x$col.cuts)
    if (r == 0) return(invisible(x))
    row.cuts.se <- se[2:(r+1)]
    col.cuts.se <- se[(r+2):(r+c+1)]
    cat("\n  Row Thresholds\n")
    rowThresh <- signif(cbind(x$row.cuts, row.cuts.se), digits)
    colnames(rowThresh) <- c("Threshold", "Std.Err.")
    rownames(rowThresh) <- 1:r
    print(rowThresh)
    cat("\n\n  Column Thresholds\n")
    colThresh <- signif(cbind(x$col.cuts, col.cuts.se), digits)
    colnames(colThresh) <- c("Threshold", "Std.Err.")
    rownames(colThresh) <- 1:c
    print(colThresh)
    }
  else if (x$type == "polyserial"){
    se <- sqrt(diag(x$var))
    se.rho <- se[1]
    est <- if (x$ML) "ML est." else "2-step est."
    cat("\nPolyserial Correlation, ", est, " = ", signif(x$rho, digits),
      " (", signif(se.rho, digits), ")", sep="")
    cat("\nTest of bivariate normality: Chisquare = ", signif(x$chisq, digits),
      ", df = ", x$df, ", p = ", signif(pchisq(x$chisq, x$df, lower.tail=FALSE), digits),
      "\n\n", sep="")
    if (length(se) == 1) return(invisible(x))
    cuts.se <- se[-1]
    thresh <- signif(rbind(x$cuts, cuts.se), digits)
    colnames(thresh) <- 1:length(x$cuts)
    rownames(thresh) <- c("Threshold", "Std.Err.")
    print(thresh)
    }
  else print(unclass(x))
  invisible(x)
  }
# last modified 12 Dec 04 by J. Fox

binBvn <- function(rho, row.cuts, col.cuts, bins=4){  
    row.cuts <- if (missing(row.cuts)) c(-Inf, 1:(bins - 1)/bins, Inf) else  c(-Inf, row.cuts, Inf)
    col.cuts <- if (missing(col.cuts)) c(-Inf, 1:(bins - 1)/bins, Inf) else  c(-Inf, col.cuts, Inf)
    r <- length(row.cuts) - 1
    c <- length(col.cuts) - 1
    P <- matrix(0, r, c)
    R <- matrix(c(1, rho, rho, 1), 2, 2)
    for (i in 1:r){
        for (j in 1:c){
            P[i,j] <- pmvnorm(lower=c(row.cuts[i], col.cuts[j]),
                            upper=c(row.cuts[i+1], col.cuts[j+1]),
                            corr=R)
            }
        }
    P
    }
    
chisq <- function(x, y, rho, row.cuts, col.cuts, zerotol=1e-6, bins=4){  
    if (missing(row.cuts)) row.cuts <- qnorm(1:(bins - 1)/bins)
    if (missing(col.cuts)) col.cuts <- qnorm(1:(bins - 1)/bins)
    P <- binBvn(rho, row.cuts, col.cuts, bins=bins)
    if (!is.factor(x)) x <- cut(scale(x), c(-Inf, row.cuts, Inf))
    if (!is.factor(y)) y <- cut(scale(y), c(-Inf, col.cuts, Inf))
    tab <- table(x, y)
    n <- sum(tab)
    2*sum(tab*log((tab + zerotol)/(P*n)))
    }

as.matrix.hetcor <- function(x) x$correlations
