.packageName <- "genetics"
# $Id: HWE.chisq.R,v 1.2 2003/05/22 17:25:23 warnesgr Exp $


###
### Hardy-Weinberg Equilibrium Significance Test
###


HWE.chisq <- function(x, ...)
  UseMethod("HWE.chisq")

HWE.chisq.genotype <- function (x, simulate.p.value = TRUE, B = 10000, ...)
{
    observed.no <- table(factor(allele(x, 1), levels = allele.names(x)), 
        factor(allele(x, 2), levels = allele.names(x)))
    tab <- observed.no
    tab <- 0.5 * (tab + t(tab))
    k <- ncol(tab)
    if(simulate.p.value)
      {
        test <- chisq.test(tab, simulate.p.value = simulate.p.value, 
                           B = B, ...)
      }
    else
      {
        test <- chisq.test(tab, ...)
        test$parameter <- k*(k-1)/2
        test$p.value <- pchisq(test$statistic, test$parameter, lower = FALSE)
        names(test$statistic) <- "X-squared"
        names(test$parameter) <- "df"
      }
    return(test)
}

# $Id: HWE.exact.R,v 1.4 2003/05/22 17:25:23 warnesgr Exp $
#
# Based on code submitted by David Duffy <davidD@qimr.edu.au>
#
# Exact test for HWE: 2 alleles
#
# See eg Emigh TH.  Comparison of tests for Hardy-Weinberg Equilibrium.
#  Biometrics 1980; 36: 627-642
#

HWE.exact <- function(x)
{
  if(!is.genotype(x))
    stop("x must be of class 'genotype' or 'haplotype'")

  nallele <- length(na.omit(allele.names(x)))

  if(nallele != 2)
    stop("Exact HWE test can only be computed for 2 markers with 2 alleles")


  allele.tab <- table( factor(allele(x,1), levels=allele.names(x)),
                       factor(allele(x,2), levels=allele.names(x)) )

  n11 <- allele.tab[1,1]
  n12 <- allele.tab[1,2] + allele.tab[2,1]
  n22 <- allele.tab[2,2]
  
  n1 <- 2*n11+n12
  n2 <- 2*n22+n12

  
  dhwe2 <- function(n11, n12, n22) {
    f <- function(x) lgamma(x+1)
    n <- n11+n12+n22
    n1 <- 2*n11+n12
    n2 <- 2*n22+n12
    exp(log(2)*(n12) + f(n) - f(n11) - f(n12) - f(n22) - f(2*n) +
        f(n1) + f(n2))
  }


  x12 <- seq(n1 %% 2,min(n1,n2),2)
  x11 <- (n1-x12)/2
  x22 <- (n2-x12)/2
  dist <- data.frame(n11=x11,n12=x12,n22=x22,density=dhwe2(x11,x12,x22))
  dist <- dist[order(dist$density),]

  STATISTIC <- c("N11"=n11,"N12"=n12,"N22"=n22)
  PARAMETER <- c("N1"=n1,"N2"=n2)
  PVAL <- cumsum(dist$density)[dist$n11==n11 &
                               dist$n12==n12 &
                               dist$n22==n22] 
  METHOD <- "Exact Test for Hardy-Weinberg Equilibrium"
  DNAME <- deparse(substitute(x))
  
  retval <- list(statistic = STATISTIC, parameter = PARAMETER, 
        p.value = PVAL, method = METHOD, data.name = DNAME, observed = x)
  class(retval) = "htest"
  return(retval)

}

# $Id: HWE.test.R,v 1.16 2003/05/22 17:25:23 warnesgr Exp $

### Hardy-Weinberg Equilibrium Disequlibrium Estimates, Confidence
### Intervals, and P-values
###



HWE.test <- function(x, ...)
{
	UseMethod("HWE.test")
}


HWE.test.genotype <- function(x, exact=nallele(x)==2,
                              simulate.p.value=!exact, B=10000,
                              conf=0.95, ci.B=1000, ... )
  # future options "bootstrap","exact"
{

  retval <- list()

  # compute disequlibrium
  retval$diseq <- diseq(x)
  
  # compute confidence intervals
  retval$ci <- diseq.ci(x, R=ci.B, conf=conf)


  # compute p-value
  # compute exact p-value

  # do chisq test
  if(exact)
    retval$test  <- HWE.exact(x)
  else
    {
      tab <- retval$diseq$observed.no
      tab  <- 0.5 * (tab + t(tab))   # make symmetric for chisq.test
      retval$test  <- HWE.chisq(x, simulate.p.value=simulate.p.value,B=B,...)
    }

  
  retval$simulate.p.value <- simulate.p.value
  retval$B <- B
  retval$conf <- conf
  retval$ci.B <- ci.B
  retval$test$data.name  <- deparse(substitute(x))
  retval$call  <- match.call()
  class(retval)  <-  c("HWE.test")
  return(retval)
}



print.HWE.test  <-  function(x, show=c("D","D'","r"), ...)
  {

    cat("\n")
    cat("\t-----------------------------------\n")
    cat("\tTest for Hardy-Weinberg-Equilibrium\n")
    cat("\t-----------------------------------\n")
    cat("\n")
    if(!is.null(x$locus))
      {
        cat("\n")
        print( x$locus )
      }
    cat("Call: \n")
    print(x$call)
    cat("\n")
    if("D" %in% show)
      {
        cat("Raw Disequlibrium for each allele pair (D)\n")
        cat("\n") 
        print(x$diseq$D)
        cat("\n")
      }
    if("D'" %in% show)
      {
        cat("Scaled Disequlibrium for each allele pair (D')\n")
        cat("\n") 
        print(x$diseq$Dprime)
        cat("\n")
      }
    if("r" %in% show)
      {
        cat("Correlation coefficient for each allele pair (r)\n")
        cat("\n") 
        print(x$diseq$r)
        cat("\n")
      }

    if( ncol(x$diseq$r) <= 2 )
      cat("Overall Values\n")
    else
      cat("Overall Values (mean absolute-value weighted by expected allele frequency)\n")
    cat("\n")

    show.tab <- NULL
    
    if("D" %in% show)
      show.tab <- rbind(show.tab, "  D"=x$diseq$D.overall)
    if("D'" %in% show)
      show.tab <- rbind(show.tab, "  D'"=x$diseq$Dprime.overall)
    if("r" %in% show)
      show.tab <- rbind(show.tab, "  r"=x$diseq$r.overall)

    colnames(show.tab) <- "Value"

    print(show.tab)
    
    cat("\n") 

    whichvec <- c("D","D'","r") %in% show

    cat("Confidence intervals computed via bootstrap using", x$ci.B, "samples\n")
    cat("\n")

    if(!is.null(x$ci$warning.text))
      cat(strwrap(paste("WARNING:", x$ci$warning.text), prefix="    * "),"\n",
          sep="\n")
    
    show.tab <- matrix(ncol=4, nrow=3)
    tmp <- format(x$ci$ci[,1:3], digits=getOption("digits"))
    show.tab[,1] <- tmp[,1]  # Observed
    show.tab[,2] <- paste("(", tmp[,2], ", ", tmp[,3], ")", sep="" )
    show.tab[,3] <- x$ci$ci[,4]
    show.tab[,4] <- ifelse(x$ci$ci[,5],"YES","*NO*")

    colnames(show.tab) <- c("Observed", "95% CI", "NA's", "Contains Zero?")
    rownames(show.tab) <- paste("  ", rownames(tmp), sep="")
    
    print(show.tab[whichvec,], quote=FALSE)

    cat("\n")
    cat("Significance Test:\n")
    print(x$test)
    cat("\n")
    cat("\n")
  }


HWE.test.data.frame <- function(x, ..., do.Allele.Freq=TRUE,
                                do.HWE.test=TRUE)
{

  data <- makeGenotypes(x)
  names <- names(data)[sapply(data, is.genotype)]
  
  for(i in names)
  {
    gene   <- getlocus(i)
    genedata <- data[[i]]
    
    cat("\n")
    cat("+-------------------------------------\n");
    if(!is.null(gene))
      {
        cat("|\tMarker:\t ")
        print(gene)
      }
    else
      cat("|\tMarker: ", i, "\n")
    cat("+-------------------------------------\n");
  
    if(do.Allele.Freq)
      {
         # compute and print the allele and genotype frequencies
        sum  <-  summary(genedata)
        print(sum)
      }
  
    if(do.HWE.test)
      {
        if(length(allele.names(genedata))<2)
          {
            cat( '*** No variant alleles observed, unable to perform\n',
                 '*** test for Hardy-Wienburg Equilibrium. \n', sep='')
          }
        else
          {
            # now do and print the HWE test
            hwe  <- HWE.test(genedata, ...)
            print(hwe)
          }
  
      }
  }
  
}
# $Id: LD.R,v 1.6 2003/06/03 12:59:14 warnesgr Exp $

# R translation of Cathy Stack's SAS macro
# Assumes 2-alleles

LD <- function(g1,...)
  UseMethod("LD",g1)

LD.data.frame <- function(g1,...)
  {
    gvars <- sapply( g1, function(x) (is.genotype(x) && nallele(x)==2) )
    if(any(gvars==FALSE))
      {
        warning("Non-genotype variables or genotype variables ",
                "with more or less than two alleles detected. ",
                "These variables will be omitted: ",                
                paste( colnames(g1)[!gvars] , collapse=", " )
                )
        g1 <- g1[,gvars]
      }
    

    P <- matrix(nrow=ncol(g1),ncol=ncol(g1))
    rownames(P) <- colnames(g1)
    colnames(P) <- colnames(g1)

    P <- D <- Dprime <- nobs <- chisq <- p.value <- corr <- P

    for(i in 1:(ncol(g1)-1) )
      for(j in (i+1):ncol(g1) )
        {
          ld <- LD( g1[,i], g1[,j] )
          
          D      [i,j] <- ld$D
          Dprime [i,j] <- ld$"D'"
          corr    [i,j] <- ld$"r"
          nobs   [i,j] <- ld$"n"
          chisq  [i,j] <- ld$"X^2"
          p.value[i,j] <- ld$"P-value"
        }
    
    retval <- list(
                   call=match.call(),
                   "D"=D,
                   "D'"=Dprime,
                   "r" = corr,
                   "n"=nobs,
                   "X^2"=chisq,
                   "P-value"=p.value
           )

    class(retval) <- "LD.data.frame"
    
    retval
  }

LD.genotype <- function(g1,g2,...)
  {
    if(is.haplotype(g1) || is.haplotype(g2))
      stop("Haplotype options are not yet supported.")
    
    if(nallele(g1)!=2 || nallele(g2)!=2)
      stop("This function currently only supports 2-allele genotypes.")

    prop.A <- summary(g1)$allele.freq[,2]
    prop.B <- summary(g2)$allele.freq[,2]
    
    major.A <- names(prop.A)[which.max(prop.A)]
    major.B <- names(prop.B)[which.max(prop.B)]
    pA <- max(prop.A, na.rm=TRUE)
    pB <- max(prop.B, na.rm=TRUE)
    pa <- 1-pA
    pb <- 1-pB

    Dmin <- max(-pA*pB, -pa*pb)
    pmin <- pA*pB + Dmin;

    Dmax <- min(pA*pb, pB*pa);
    pmax <- pA*pB + Dmax;

    counts <- table(
                    allele.count(g1, major.A),
                    allele.count(g2, major.B)
                    )

    n3x3 <- matrix(0, nrow=3, ncol=3)
    colnames(n3x3) <- rownames(n3x3) <- 0:2

    # ensure the matrix is 3x3, with highest frequency values in upper left
    for(i in rownames(counts))
      for(j in colnames(counts))
        n3x3[3-as.numeric(i),3-as.numeric(j)] <- counts[i,j]

    
    loglik <- function(pAB,...)
      {
        (2*n3x3[1,1]+n3x3[1,2]+n3x3[2,1])*log(pAB) +
        (2*n3x3[1,3]+n3x3[1,2]+n3x3[2,3])*log(pA-pAB) +
        (2*n3x3[3,1]+n3x3[2,1]+n3x3[3,2])*log(pB-pAB) +
        (2*n3x3[3,3]+n3x3[3,2]+n3x3[2,3])*log(1-pA-pB+pAB) + 
        n3x3[2,2]*log(pAB*(1-pA-pB+pAB) + (pA-pAB)*(pB-pAB))
      }

    # SAS code uses:
    #
    #s <- seq(pmin+0.0001,pmax-0.0001,by=0.0001)
    #lldmx <- loglik(s)
    #maxi <- which.max(lldmx)
    #pAB <- s[maxi]

    # but this should be faster:
    solution <- optimize(
                         loglik,
                         lower=pmin+.Machine$double.eps,
                         upper=pmax-.Machine$double.eps,
                         maximum=TRUE
                         )
    pAB <- solution$maximum

    estD <- pAB - pA*pB
    if (estD>0)  
      estDp <- estD / Dmax
    else
      estDp <- estD / Dmin

    n <-  sum(n3x3)

    corr <- estD / sqrt( pA * pB * pa * pb )
    
    dchi <- (2*n*estD^2)/(pA * pa * pB* pb)
    dpval <- 1 - pchisq(dchi,1)

    retval <- list(
                   call=match.call(),
                   "D"=estD,
                   "D'"=estDp,
                   "r" = corr,
                   "n"=n,
                   "X^2"=dchi,
                   "P-value"=dpval
                   )

    class(retval) <- "LD"
    
    retval
    
  }


# $Id: binsearch.R,v 1.3 2003/05/22 17:25:23 warnesgr Exp $

binsearch <- function(fun, range, ..., target=0,
                      lower=ceiling(min(range)),upper=floor(max(range)),
                      maxiter=100, showiter=FALSE)
    {

      # initialize
      lo <- lower
      hi <- upper
      counter <- 0
      val.lo <- fun(lo,...)
      val.hi <- fun(hi,...)

      # check whether function is increasing or decreasing, & set sign
      # appropriately.
      if( val.lo > val.hi )
        sign <- -1
      else
        sign <- 1

      # check if value is outside specified range
      if(target * sign < val.lo * sign)
          outside.range <- TRUE
      else if(target * sign >  val.hi * sign)
          outside.range <- TRUE
      else
        outside.range <- FALSE

      # iteratively move lo & high closer together until we run out of
      # iterations, or they are adjacent, or they are identical
      while(counter < maxiter && !outside.range )
        {

          counter <- counter+1

          if(hi-lo<=1 || lo<lower || hi>upper) break;

          center <- round((hi - lo)/2 + lo ,0  )
          val <- fun(center)

          if(showiter)
            {
              cat("--------------\n")
              cat("Iteration #", counter, "\n")
              cat("lo=",lo,"\n")
              cat("hi=",hi,"\n")
              cat("center=",center,"\n")
              cat("fun(lo)=",val.lo,"\n")
              cat("fun(hi)=",val.hi,"\n")
              cat("fun(center)=",val,"\n")
            }

          
          if( val==target )
            {
              val.lo <- val.hi <- val
              lo <- hi <- center
              break;
            }
          else if( sign*val < sign*target )
            {
              lo <- center
              val.lo <- val
            }
          else #( val > target )
            {
              hi <- center
              val.hi <- val
            }

        if(showiter)
          {
            cat("new lo=",lo,"\n")
            cat("new hi=",hi,"\n")
            cat("--------------\n")
          }
          
        }
      
      # Create return value
      retval <- list()
      retval$call <- match.call()
      retval$numiter <- counter

      if( outside.range )
        {
          if(target * sign < val.lo * sign)
            {
              warning("Reached lower boundary")
              retval$flag="Lower Boundary"
              retval$where=lo
              retval$value=val.lo
            }
          else #(target * sign >  val.hi * sign)
          {
            warning("Reached upper boundary")
            retval$flag="Upper Boundary"
            retval$where=hi
            retval$value=val.hi
          }
        }
      else if( counter >= maxiter )
        {
          warning("Maximum number of iterations reached")
          retval$flag="Maximum number of iterations reached"
          retval$where=c(lo,hi)
          retval$value=c(fun.lo,fun.hi)
        }
      else if( val.lo==target )
        {
          retval$flag="Found"
          retval$where=lo
          retval$value=val.lo
        }
      else if( val.hi==target )
        {
          retval$flag="Found"
          retval$where=lo
          retval$value=val.lo
        }
      else
        {
          retval$flag="Between Elements"
          retval$where=c(lo, hi)
          retval$value=c(val.lo, val.hi)
        }
      
      return(retval)

    }
                 
                                             

# $Id: ci.balance.R,v 1.4 2004/03/25 17:15:06 warnesgr Exp $

ci.balance <- function(x, est, confidence=0.95, alpha=1-confidence,
                       minval, maxval, na.rm=TRUE)
  {
    if( any(is.na(x) ) )
      {
        if( na.rm)
          x <- na.omit(x)
        else
          stop("Missing values and NaN's not allowed if `na.rm' is FALSE.")
      }

    if(missing(minval))
      {
        minval <- min(x)
        minname <- "min(x)"
      }
    else
      minname <- "Lower Boundary"
    
    if(missing(maxval))
      {
        maxval <- max(x)
        maxname <- "max(x)"
      }
    else
      maxname <- "Upper Boundary"
    
    x <- sort(x)
    n <- length(x)
    half.window <- n * (1-alpha) / 2
    n.below <- sum( x < est ) + sum( x==est )/2
    n.above <- sum( x > est ) + sum( x==est )/2 

    overflow.upper <- max(0, half.window - n.above )
    overflow.lower <- max(0, half.window - n.below ) 
    
    lower.n <- max(1, floor  ( n.below - half.window - overflow.upper ) )
    upper.n <- min(n, ceiling( n - (n.above - half.window - overflow.lower ) ) )
    
    ci <- c( x[lower.n], x[upper.n] )
    names(ci) <- paste( format( c(lower.n, upper.n)/n*100,digits=3 ), "%", sep="")

    if(overflow.lower>0)
      {
        lower.n <- minname
        names(ci)[1] <- minname
        ci[1] <- minval
      }
    if(overflow.upper>0)
      {
        upper.n <- maxname
        names(ci)[2] <- maxname
        ci[2] <- maxval
      }
    

    return(
           ci=ci,
           overflow.upper=overflow.upper,
           overflow.lower=overflow.lower,
           n.above=n.above,
           n.below=n.below,
           lower.n=lower.n,
           upper.n=upper.n
           )
  }

#diseq.old <- function(x)
#{
#  if(!("genotype") %in% class(x) )
#    stop("x must inherit from class 'genotype'.")
  
#  # Estimates and tests per Wier (1996)  Genetic Data Analysis II

#  allele.names  <-  allele.names(x)
  
#  n <- length(na.omit(x))

#  require(ctest)

#  # specify levels so we get zeros where we need them
#  tab  <- table( factor(allele(x,1), levels=allele.names(x)),
#                 factor(allele(x,2), levels=allele.names(x)) )

#  allele.prob  <- table( allele(x) ) / (2 * n)

#  k  <-  length(allele.names)
#  D.hat  <-  matrix(NA, ncol=5, nrow= k * (k+1) / 2 + 1)
#  rnames  <- rep("",nrow(D.hat))
#  index  <- 1
#  for(i in 1:k)
#    for(j in i:k)
#      {
#        rnames[index]  <- paste( allele.names[i],
#                                allele.names[j], sep="/")

#        D.hat[index,1]  <- P  <-  tab[i,j]

#        if(i==j)
#          {
#            D.hat[index,2] <- pp <- n*allele.prob[allele.names[i]] ^ 2
#            D.hat[index,5] <- 1
#          }
#        else
#          {
#            D.hat[index,2] <- pp <- 2 * n * allele.prob[allele.names[i]] *
#                                            allele.prob[allele.names[j]]
#            D.hat[index,5] <- 2

#            D.hat[index,3] <- D  <- P - pp
#            D.hat[index,4] <- D/n / D.hat[index,5]
#          }
          
#        index  <-  index+1
#      }

#  rnames[nrow(D.hat)]  <- "Overall"
#  D.hat[nrow(D.hat),1]  <- n

#  ab  <- abs(D.hat[,3]/D.hat[,5])[-nrow(D.hat)]
##  ab  <- (D.hat[,3]/D.hat[,5])[-nrow(D.hat)]
#  nab  <- D.hat[,5][-nrow(D.hat)]
#  Dh  <- mean(rep(ab,nab),na.rm=TRUE)/n
#  D.hat[nrow(D.hat),4]  <-Dh

#  rownames(D.hat)  <- rnames
#  colnames(D.hat) <- c("Observed","Expected","Obs-Exp","D-hat","")
  
#  retval  <- list()
#  retval$data <-  tab
#  retval$D.hat  <- D.hat
#  retval$call  <- match.call()
#  class(retval)  <-  "diseq.old"
#  return(retval)
#}


#print.diseq.old  <-  function(x, show.table=TRUE, ...)
#  {

#    cat("\n")
#    if(!is.null(x$locus))
#      {
#        cat("\n")
#        print( x$locus )
#      }
#    cat("\n")
#    cat("Call: \n")
#    print(x$call)
#    cat("\n")
#    if(show.table)
#      {
#        cat("Disequlibrium (D-hat) Computation Table:\n")
#        cat("\n") 
#        print(x$D.hat[,-5] )
#        cat("\n")
#      }
#    cat("Overall Disequlibrium:\n")
#    cat("\n")
#    cat("\tD-hat :  ", x$D.hat[nrow(x$D.hat),4], "\n", sep="")
#    cat("\n")
#    cat("\n")
#  }

#diseq.ci.old <- function(x, R=1000, conf=0.95)
#{
#  if (!("genotype") %in% class(x) )
#    stop("x must inherit from class 'genotype'.")
  
#  bootfun <- function(x, ids) {
#    tmp <- diseq.old(x[ids])$D.hat
#    tmp[nrow(tmp),4]
#  }
  
#  bb <- boot( x, bootfun, R=R )
#  bb.ci <- boot.ci(bb, type=type, conf=conf, ...)
#  bb.ci
#}


diseq <- function(x, ...)
{
	UseMethod("diseq")
}

# Pairwise Disequilibrium Measures. For each pair of markers we
# calculated D, D' and r, the most commonly used measures of LD
# (other measures are reviewed in refs 14,15 ). For a pair of markers
# i and j, we defined Dij = p(11) - p(1)p(1), where p(ab) is the
# estimated frequency of the haplotypes with alleles a at marker i and
# b at marker j and  denotes any allele. Then, D'ij = |D11/Dmax|,
# where Dmax = max(p(1)p(1),p(2)p(2)) if Dij > 0 and Dmax =
# max(p(1)p(2),p(2)p(1)) otherwise, and rij = Dij/(p(1)p(1)p(2)p(2)).

diseq.genotype <- function(x, ...)
  { 
    observed.no <- table( factor(allele(x,1), levels=allele.names(x)),
                          factor(allele(x,2), levels=allele.names(x)) )
    observed <- prop.table(observed.no)
    observed <- 1/2 * (observed + t(observed) )

    retval <- diseq.table(observed)
    retval$observed.no <- observed.no
    retval$call <- match.call()
    retval
  }
  
diseq.table <- function(x, ...)
{
  observed <- x
  allele.freq <- apply(observed,1,sum)
  # equal to: allele.freq <- apply(observed,2,sum)

  expected <- outer(allele.freq, allele.freq, "*")

  diseq <- expected - observed
  diag(diseq) <- NA

  dmax.positive <- expected
  # equals: max( p(i)p(j), p(j)p(i) )
  
  dmax.negative <- outer(allele.freq, allele.freq, pmin ) - expected
  # equals: min( p(i) * (1 - p(j)), p(j)( 1 - (1-p(i) ) ) )
  
  dprime <- diseq / ifelse( diseq > 0, dmax.positive, dmax.negative )

  # r gives the pairwise correlation coefficient for pairs containing at lease
  # one allele from the specified pair.
  # For two alleles:
  #    corr coefficient = diseq / sqrt( p(a) * (1-p(a) ) * p(b) * (1-p(b)) )
  p.1.minus.p <- allele.freq * (1-allele.freq)
  r <-  -diseq  / sqrt( outer( p.1.minus.p, p.1.minus.p, "*") )

  # above formula works unchanged for 2 alleles, but requires adjustment
  # for multiple alleles.  
  r <- r * (length(allele.freq) - 1)

  offdiag.expected <- expected
  diag(offdiag.expected) <- NA
  sum.expected <- sum(offdiag.expected, na.rm=TRUE)

  if(all(dim(x)==2)) # 2 allele case
    {
      diseq.overall <- diseq[1,2]
      dprime.overall <- dprime[1,2]
      r.overall <- r[1,2]
    }
  else
    {
      diseq.overall <- sum( abs(diseq) * expected , na.rm=TRUE ) / sum.expected
      dprime.overall <- sum( abs(dprime) * expected , na.rm=TRUE ) / sum.expected
      r.overall <- sum( abs(r) * expected , na.rm=TRUE ) / sum.expected
    }
  
  diag(r) <- 1.0

  retval <- list(
                 call = match.call(),
                 observed=observed,
                 expected=expected,
                 allele.freq=allele.freq,
                 D=diseq,
                 Dprime=dprime,
                 r=r,
                 D.overall=diseq.overall,
                 Dprime.overall=dprime.overall,
                 r.overall = r.overall
                 )

  class(retval) <- "diseq"
  retval
}


print.diseq  <-  function(x, show=c("D","D'","r"), ...)
  {

    cat("\n")
    if(!is.null(x$locus))
      {
        cat("\n")
        print( x$locus )
      }
    cat("\n")
    cat("Call: \n")
    print(x$call)
    cat("\n")
    if("D" %in% show)
      {
        cat("Disequlibrium for each allele pair (D)\n")
        cat("\n") 
        print(x$D)
        cat("\n")
      }
    if("D'" %in% show)
      {
        cat("Disequlibrium for each allele pair (D')\n")
        cat("\n") 
        print(x$Dprime)
        cat("\n")
      }
    if("r" %in% show)
      {
        cat("Correlation coefficient for each allele pair (r)\n")
        cat("\n") 
        print(x$r)
        cat("\n")
      }
    
    if( ncol(x$r) <= 2 )
      cat("Overall Values\n")
    else
      cat("Overall Values (mean absolute-value weighted by expected allele frequency)\n")
    cat("\n")
    
    if("D" %in% show)
      cat("  D :  ", x$D.overall, "\n", sep="")
    if("D'" %in% show)
      cat("  D':  ", x$Dprime.overall, "\n", sep="")
    if("r" %in% show)
      cat("  r :  ", x$r.overall, "\n", sep="")
    cat("\n")
    cat("\n")
  }

diseq.ci <- function(x, R=1000, conf=0.95, correct=TRUE, na.rm=TRUE, ...)
{
  if (!("genotype") %in% class(x) )
    stop("x must inherit from class 'genotype'.")

  if( any(is.na(x) ) )
    {
      if( na.rm)
        x <- na.omit(x)
      else
        stop("Missing values and NaN's not allowed if `na.rm' is FALSE.")
    }

  if( !require(combinat) )
    stop("Depends on availability of 'combinat' library")
  
  # step 1 - generate summary table
  observed.no <- table( factor(allele(x,1), levels=allele.names(x)),
                        factor(allele(x,2), levels=allele.names(x)) )
  observed <- prop.table(observed.no)
  observed <- 1/2 * (observed + t(observed) )

  # step 2 - make table into a probability vector for calling rmultinom
  n <- sum(observed.no)
  prob.vector <- c(observed)

  # step 3 - sample R multinomials with the specified frequenceis
  # (include observed data to avoid bias)
  resample.data <- cbind(c(observed.no),
                         rmultz2( n, prob.vector, R ) )
  
  bootfun <- function(x) {
    observed[,] <- x/n
    observed <- 1/2 * (observed + t(observed) )
    d <-  diseq(observed)
    c( "Overall D "=d$D.overall,
       "Overall D'"=d$Dprime.overall,
       "Overall r "=d$r.overall)
  }

  results <- apply( resample.data, 2, bootfun )

  alpha.2 <- (1-conf)/2

#  ci <- t(apply(results, 1,
#              quantile, c( alpha.2 , 1-alpha.2), na.rm=TRUE ))

  if(length(allele.names(x))<=2)
    {
      ci <- t(apply(results, 1, function(x) quantile(x, c(0.025, 0.975),
                                                     na.rm=na.rm ) ) )
      warning.text <- NULL
    }
  else
    {
      warning.text <- paste("For more than two alleles, overall",
                            "disequlibrium statistics are bounded",
                            "between [0,1].  Because of this, confidence",
                            "intervals for values near 0 and 1 are",
                            "ill-behaved.", sep=" ")
      
      if(correct)
        {
          warning.text <- paste(warning.text, "A rough correction has been applied, but",
                                "the intervals still may not be correct for values near 0 or 1.",
                                sep=" ")

          ci <- t(apply(results, 1,
                        function(x)
                        ci.balance(x,x[1],confidence=conf,
                                   minval=0,maxval=1)$ci ))
        }
      else
        ci <- t(apply(results, 1, function(x) quantile(x, c(0.025, 0.975) ) ) )
      
      warning(paste(strwrap(c(warning.text,"\n"),prefix="  "),collapse="\n") )
    }

  na.count <-  function(x) sum(is.na(x))
  nas <- apply( results, 1, na.count)

  zero.in.range <- (ci[,1] <= 0) & (ci[,2] >= 0)
  
  ci <- cbind( "Observed"=results[,1], ci, "NAs"=nas,
               "Zero in Range"=zero.in.range )

  outside.ci <- (ci[,1] < ci[,2]) | (ci[,1] > ci[,3])
  
  if( any(outside.ci) )
    warning("One or more observed value outide of confidence interval. Check results.")

  if(any(nas>0))
    warning("NAs returned from diseq call")


  retval <- list(
         call=match.call(),
         R=R,
         conf=conf,
         ci=ci,
         warning.text=warning.text
         )
}

# $Id: genotype.R,v 1.29 2004/05/25 19:40:02 warnesgr Exp $

genotype  <- function(a1, a2=NULL, alleles=NULL, sep="/",
                      remove.spaces=TRUE,
                      reorder=c("yes", "no", "default", "ascii", "freq"),
                      allow.partial.missing=FALSE,
                      locus=NULL)                    
{
    if(missing(reorder))
      reorder  <- "freq"
    else
      reorder <- match.arg(reorder)
    
    if(is.genotype(a1)){
        a1  <-  as.character(a1)
        ## ignore a2
        a2 <- NULL
    }
    else
      {
        a1.d <- dim(a1)
        a1 <- as.character(a1)
        dim(a1) <- a1.d
        a1[is.na(a1)] <- ""   # necessary because of bug in grep & friends,
                              # will be fixed in 1.7.1
      }
    
    if(!is.null(a2))
      {
        a2.d <- dim(a2)
        a2 <- as.character(a2)
        dim(a2) <- a2.d
        a1[is.na(a1)] <- ""  # necessary because of bugs in grep & friends
                             # will be fixed in 1.7.1
      }
    
    if(remove.spaces)
    {
        a1dim <- dim(a1)
        a1  <-  gsub("[ \t]", "", a1)
        dim(a1) <- a1dim
        if(!is.null(a2))
            a2  <-  gsub("[ \t]", "", a2)
    }
    
    if(!is.null(dim(a1)) && ncol(a1) > 1)
        parts <- a1[,1:2]
    else if(!is.null(a2))
        parts  <- cbind(a1,a2)
    else
      {
        # if sep is empty, assume allele names are single characters
        # pasted together
        if(sep=="")
          sep  <- 1

        # Based on the value of sep, reformat into our standard
        # name-slash-name format
        if (is.character(sep) )
          {
            part.list   <- strsplit(a1,sep)
            part.list[ sapply(part.list, length)==0] <- NA

            ## Handle missing / empty values correctly. 
            ## Without this, empty elements are silently dropped
            ## and/or cause errors

            # only first field was given
            half.empties  <- lapply(part.list, length)==1
            part.list[half.empties]  <-  lapply(part.list[half.empties],c,NA)
            
            # neither field was given
            empties  <- is.na(a1) | lapply(part.list, length)==0
            part.list[empties]  <- list(c(NA,NA))

            parts <- matrix(unlist(part.list),ncol=2,byrow=TRUE)

          }
        else if (is.numeric(sep))
          parts  <- cbind( substring(a1,1,sep), substring(a1,sep+1,9999))
        else
          stop(paste("I don't know how to handle sep=",sep))
      }

    mode(parts) <- "character"  # needed for bare NA's o
    
    # convert entirely whitespace alleles to NAs
    temp  <- grep("^[ \t]*$", parts)
    parts[temp]  <-  NA

    #parts[parts=="NA"]  <-  NA

    if(!allow.partial.missing)
      parts[is.na(parts[,1]) | is.na(parts[,2]),]  <- c(NA,NA)
    
    if(missing(alleles) || is.null(alleles))
      alleles <- unique(c(na.omit(parts)))
    else
      {
        which.alleles  <- !(parts %in% alleles)
        if(any(which.alleles))
          {
            warning("Found data values not matching specified alleles. ",
                    "Converting to NA.")
            parts[which.alleles] <- NA
          }
      }

    if(reorder!="no")
    {
        if(reorder=="ascii")
        {
            alleles <-  sort(alleles)
        }
        else if(reorder=="freq")
        {
            ## get reordering of alleles by frequency
            tmp  <- names(rev(sort(table(parts))))
            alleles  <- unique(c(tmp,alleles))
        }

        reorder  <- function( x, alleles)
        {
            tmp <- match( x, alleles )
            x[order(tmp)]
        }
        
        parts  <- t(apply(parts,1, reorder, alleles))

      }

    tmp  <-  ifelse( is.na(parts[,1]) & is.na(parts[,2]),
                    NA,
                    apply(parts,1,paste,collapse="/") )
        
    object  <- factor( tmp )

    # force "NA" not to be a factor level
    ll  <- levels(object)  <-  na.omit(levels(object))
    
    class(object)  <-  c("genotype","factor")
    attr(object,"allele.names")  <- alleles
    attr(object,"allele.map")  <- do.call("rbind", strsplit(ll, "/"))
    if(is.null(locus) || is.locus(locus)  )
      attr(object,"locus")  <- locus
    else
      stop("parameter locus must be of class locus")
    return(object)
  }

is.genotype  <- function(x)
    inherits(x, "genotype")

is.haplotype  <- function(x)
    inherits(x, "haplotype")


###
### Haplotype -- differs only in that order of a1,a2 is considered siginificant
###
haplotype <- function (a1, a2 = NULL, alleles = NULL, sep = "/",
                       remove.spaces = TRUE, reorder = "no",
                       allow.partial.missing = FALSE, locus = NULL) 
{
    retval <- genotype(a1 = a1, a2 = a2, alleles = alleles, sep = sep, 
                       remove.spaces = remove.spaces, reorder = reorder,
                       allow.partial.missing = allow.partial.missing, 
                       locus = locus)
    class(retval) <- c("haplotype", "genotype", "factor")
    retval
}


as.haplotype  <- function(x,...)
{
    retval <- as.genotype(x,...,reorder="no")
    class(retval)  <- c("haplotype","genotype","factor")
    retval
}
 
###
### Display by giving values plus list of alleles
###

print.genotype  <-  function(x,...)
  {
    if(!is.null(attr(x,"locus")))
        print(attr(x,"locus"))
    print(as.character(x))
    cat("Alleles:", allele.names(x), "\n" )
    invisible(x)
  }

###
### Conversion Functions
###

as.genotype  <- function (x,...) 
  UseMethod("as.genotype")

# Do we want to do this?
as.genotype.default  <-  function(x,...)
  genotype(x,...)

#  stop("No method to convert this object to a genotype")

# for characters, and factors, just do the standard thing (factors get
# implicitly converted to characters so both have the same effect.
as.genotype.character  <-  function(x,...)
  genotype(x,...)

as.genotype.factor  <-  function(x,...)
  genotype(as.character(x),...)

as.genotype.genotype  <- function(x,...)
  return(x)

as.genotype.haplotype  <- function(x,...)
  return(x)


## genotype.allele.counts give the count of each allele type as a
## matrix.  Collapse back into the form we need

as.genotype.allele.count  <- function(x, alleles=c("A","B"), ...)
  {
    if(!is.matrix(x) & !is.data.frame(x) )
      {
        x  <- cbind(x, 2-x)
        colnames(x)  <- alleles
      }

    if(any(x > 2, na.rm=TRUE) || any( x < 0, na.rm=TRUE ) )
      stop("Allele counts must be in {0,1,2}")
    
    allele.names  <-  colnames(x)
    tmp  <-  apply(x, 1, function(y)
                    rep( colnames(x), ifelse(is.na(y), 0, y) ))

    if(!is.matrix(tmp))
      retval  <-  genotype(sapply(tmp,paste,collapse="/"), alleles=alleles, ...)
    else
      retval  <- genotype(a1=tmp[1,], a2=tmp[2,], ... )
    return(retval)
  }

allele.count.2.genotype  <-  function(...)
  as.genotype.allele.count(...)



as.genotype.table <- function(x, alleles, ...)
  {
    #if(missing(alleles)) alleles <- unique(unlist(dimnames(x)))
    tmp <- outer( rownames(x), colnames(x), paste, sep="/")
    retval <- genotype( rep(tmp,x), alleles=alleles )
    retval
  }


###
### Equality test for genotype, assumes allele order is _not_ significant
###
"==.genotype"  <-  function(x,y)
  {
    if(!is.genotype(y))
      y <- as.genotype(y)
    
    x.a1  <- allele(x,1)
    x.a2  <- allele(x,2)
    
    y.a1  <- allele(y,1)
    y.a2  <- allele(y,2)
    
    return( (x.a1==y.a1 & x.a2==y.a2) | (x.a1==y.a2 & x.a2==y.a1) )
  }

###
### Equality test for haplotype, assumes allele order _is_ significant
###
"==.haplotype"  <-  function(x,y)
  {
    if(!is.genotype(y))
      y <- as.haplotype(y)
    
    x.a1  <- allele(x,1)
    x.a2  <- allele(x,2)

    y.a1  <- allele(y,1)
    y.a2  <- allele(y,2)
    
    return( x.a1==y.a1 & x.a2==y.a2 )
  }
###
### Extract the first and/or second allele.
###
### By default, return a 2 column matrix containing both alleles
###

#allele  <- function (x,...) 
#  UseMethod("allele")


#allele.genotype  <-  function(x, which=c(1,2) )
allele  <-  function(x, which=c(1,2) )
  {
    alleles.x  <- attr(x,"allele.map")
    retval  <- alleles.x[as.integer(x),which]
    attr(retval,"locus")  <- attr(x,"locus")
    attr(retval,"which")  <- which
    attr(retval,"allele.names")  <- allele.names(x)    
    #class(retval)  <- c("allele.genotype", class(retval))
    return( retval)
  }

as.factor  <- function(x, ...)
  UseMethod("as.factor")

as.factor.default  <- get("as.factor",pos="package:base")
formals(as.factor.default) <- c(formals(as.factor.default),alist(...= ))

as.factor.genotype <- function(x, ...)
  {
    attr(x,"class") <- "factor"
    attr(x,"allele.names") <- NULL
    attr(x,"allele.map") <- NULL
    attr(x,"locus") <- NULL
    x
  }

as.factor.allele.genotype  <-  function(x,...)
  factor(x,levels=allele.names(x))
                    
print.allele.genotype  <- function(x,...)
  {
    if(!is.null(attr(x,"locus")))
      print(attr(x,"locus"))
    cat("Allele(s):", attr(x,"which"), "\n")
    attr(x, "which")  <-  attr(x, "class") <- attr(x,"locus") <- attr(x,"allele.names")  <- NULL
    NextMethod("print",x)
  }


###
### Obtain the count of the number of copies of alleles for each individual
###
### By default, return a matrix containing the counts for all possible allele values.
###

#allele.count  <- function (x,...) 
#  UseMethod("allele.count")

#allele.count.default <- function (x, ... )
#  {
#    x <- as.genotype(x)
#    allele.count(x, ...)
#  }

#allele.count.genotype  <- function(x, allele.name=allele.names(x),

allele.count  <- function(x, allele.name=allele.names(x),
                          any=!missing(allele.name), na.rm=FALSE)
{
  if(!missing(allele.name) && length(allele.name)==1)
    {
      a.1  <- allele(x,1)
      a.2  <- allele(x,2)

      retval  <- ifelse(is.na(a.1) | is.na(a.2),
                        ifelse(na.rm, 0, NA),
                        (a.1==allele.name) + (a.2==allele.name) )
#      class(retval)  <- "allele.count"
      attr(retval,"allele") <- allele.name
      attr(retval,"locus")  <- attr(x,"locus")
      return(retval)
    }
  else
    {
      retval  <- sapply( allele.name, function(y) allele.count(x,y))
      if(any==TRUE && is.matrix(retval)  )
      retval  <- apply(retval,1,sum,na.rm=na.rm)
      if(na.rm) retval[is.na(retval)]  <- 0
#      class(retval)  <- "allele.count"
      attr(retval,"locus")  <- attr(x,"locus")
      return(retval)
    }

}


#print.allele.count  <- function(x,...)
#  { 
#    if(!is.null(attr(x,"locus")))
#        print(attr(x,"locus"))
#    
#    if(is.null(attr(x,"allele")))
#      cat("Allele Counts:\n")
#    else
#      cat("Allele Count (", attr(x,"allele"), " allele):\n", sep="")
#    val  <- x
#    attr(val,"class")  <- NULL
#    attr(val,"allele")  <- NULL
#    print(val)
#    invisible(x)
#  }

###
### Check for the presence of alleles for each individual
###
### By default, return a matrix containing indicators for all possible
### allele values except the last.
###
#
#allele.ind  <-  function(x,allele)
#  {
##    if(missing(allele))
##      stop("Alleles to test must be specified")
##    if(length(allele)==1)
#      retval  <- allele.count(x,allele) > 0
##    else
##      retval  <- apply(allele.count(x,allele) ,1,sum) > 0
#
#      if(missing(allele))
#          allele  <-  colnames(retval)
#      attr(retval,"allele")  <- allele
#      attr(retval,"locus")  <- attr(x,"locus")
#      class(retval)  <-  "allele.ind"
#      return(retval)
#  }
    
#print.allele.ind  <- function(x,...)
#  {
#    if(!is.null(attr(x,"locus")))
#      print(attr(x,"locus"))
#    
#    cat("Indicator(s) for allele(s):", attr(x,"allele"), "\n")
#    attr(x,"locus")  <-  attr(x,"class")  <- attr(x,"allele")  <-  NULL
#    NextMethod("print",x)
#  }

###
### Methods for creating subsets based on a genotype
###

homozygote  <- function (x,allele.name,...) 
  UseMethod("homozygote")

homozygote.genotype  <-  function(x,allele.name,...)
  {
    a1  <- allele(x,1)
    a2  <- allele(x,2)
    if(missing(allele.name))
      retval  <- ifelse( is.na(a1) | is.na(a2), NA, a1==a2 )
    else
      retval  <- ifelse( is.na(a1) | is.na(a2), NA,
                         a1==allele.name & a2==allele.name )
    attr(retval,"locus")  <-  attr(x,"locus")
#    class(retval)  <-  "homozygote"
    return(retval)
  }

#print.homozygote  <- function(x,...)
#  {
#    if(!is.null(attr(x,"locus")))
#      print(attr(x,"locus"))
#    
#    cat("Homozygote Indicators:\n")
#    attr(x,"locus")  <-  attr(x,"class")  <- attr(x,"allele")  <-  NULL
#    NextMethod("print",x)
#  }
    

heterozygote  <- function (x,allele.name,...) 
  UseMethod("heterozygote")

heterozygote.genotype  <-  function(x,allele.name,...)
  {
  {
    a1  <- allele(x,1)
    a2  <- allele(x,2)
    if(missing(allele.name))
      retval  <- ifelse( is.na(a1) | is.na(a2), NA, !a1==a2 )
    else
      retval  <- ( (a1==allele.name) + (a2==allele.name) ) == 1
    attr(retval,"locus")  <-  attr(x,"locus")
#    class(retval)  <-  "homozygote"
    return(retval)
  }
  }

#print.heterozygote  <- function(x,...)
#  {
#    if(!is.null(attr(x,"locus")))
#      print(attr(x,"locus"))
#    
#    cat("Heterozygote Indicators:\n")
#    attr(x,"locus")  <-  attr(x,"class")  <- attr(x,"allele")  <-  NULL
#    NextMethod("print",x)
#  }

carrier <- function (x,allele.name,...) 
  UseMethod("carrier")

carrier.genotype  <-  function(x, allele.name=allele.names(x),
                                   any=!missing(allele.name), na.rm=FALSE, ...)
{
  retval  <- allele.count(x,allele.name=allele.name,any=any,na.rm=na.rm) > 0
  
  attr(retval,"allele")  <- retval$allele
  attr(retval,"locus")  <-  attr(x,"locus")
#  class(retval)  <- "carrier"
  return(retval)
}


#print.carrier  <- function(x,...)
#  {
#    if(!is.null(attr(x,"locus")))
#      print(attr(x,"locus"))
#    
#    cat("Carrier Indicator(s) for allele(s):", attr(x,"allele"), "\n")
#    attr(x,"locus")  <-  attr(x,"class")  <- attr(x,"allele")  <-  NULL
#    NextMethod("print",unclass(x))
#  }


###
###
###

allele.names<- function(x)
  {
    retval  <- attr(x,"allele.names")
    if(is.null(retval))
      retval  <- x$allele.names
    return(retval)
  }

###
### Subset method
###

"[.genotype"  <-  function(x, i, drop=FALSE)
  {
    retval  <- NextMethod("[")

    # force "NA" not to be a factor level
    ll  <- levels(retval)  <-  na.omit(levels(retval))
    
    class(retval)  <-  c("genotype","factor")

    if(drop)
      alleles <- unique( unlist(strsplit(ll, "/") ) )
    else
      alleles <- attr(x, "allele.names")
    
    attr(retval,"allele.names")  <- alleles
    attr(retval,"allele.map")  <- do.call("rbind", strsplit(ll, "/"))
    attr(retval,"locus")  <- attr(x,"locus")
    attr(retval,"label")  <-  attr(x,"label")
    return(retval)
  }

"[.haplotype"  <-  function(x, i, drop=FALSE)
  {
    retval  <- NextMethod("[")
    class(retval) <- c("haplotype","genotype","factor")
    retval
  }

###
### Subset Assigment method
###

"[<-.genotype"  <-  function(x, i, value)
  {
    if(!is.genotype(value))
      {
        value <- genotype(value)
      }
    
    lx <- levels(x)
    lv <- levels(value)
    ax <- allele.names(x)
    av <- allele.names(value)

    m  <- is.na(match(av,ax) )
    if( any( m  )  )
       warning(paste("Adding new allele name(s):", av[m] ))
       
    la <- unique(c(lx,lv))
    aa <- unique(c(ax,av))

    cx <- class(x)
    nas <- is.na(x)

    data  <-  match(levels(value)[value],la)
    
    class(x) <- NULL
    x[i] <- data
    attr(x, "levels") <- la
    map  <- attr(x, "allele.map")  <- do.call("rbind", strsplit(la, "/"))
    attr(x, "allele.names")  <- aa
    class(x) <- cx
    x
  }

"[<-.haplotype"  <-  function(x, i, value)
  {
    if(!is.haplotype(value))
      stop("Assigned value must be of class haplotype.")
    NextMethod("[<-")
  }

nallele <- function(x)
  length(allele.names(x))
# $Id: gregorius.R,v 1.3 2003/05/22 17:25:23 warnesgr Exp $
#
# Code contributed by David Duffy <davidD@qumr.edu.au>.
#
# Gregorius, H.-R. 1980. The probability of losing an allele when
# diploid genotypes are sampled.  Biometrics 36, 643-652.
#
# Formula from "Corollary 2" and "Corollary 3" of that paper
#
# N is the number of genotypes sampled,
# freq=frequency of least common allele to be detected by the study,
# missprob=the probability of missing at least one allele
#
# tol=smallest term in series to be accumulated
#
gregorius <- function(freq, N, missprob, tol=1.0e-10, maxN=1e4, maxiter=100,
                      showiter=FALSE)
{
   

  find.alpha <- function(N, freq, tol) #, showiter=FALSE)
    {
      n<- floor(1/freq)
      i<-1
      sgn<- -1
      term<-1.0
      res<-0.0
      while(abs(term)>tol && i<n) {
        sgn<- (-1) ^ (i+1)
        term<- exp( lchoose(n-1,i) +
                   log(exp(N*log(1-i*freq))+
                       exp(i+N*log(freq)+(N-1)*log(n-i))))
        res<-res+sgn*term
        i<-i+1

#        if(showiter)
#          {
#            cat("i=",i,"\n")
#            cat("sgn=",sgn,"\n")
#            cat("term=",term,"\n")
#            cat("res=",res,"\n")
#          }
      }

      max(min(res,1),0)
    }


  retval <- list()
  retval$call <- match.call()
    
  
  if(!missing(N) && missing(missprob) )
    {
      retval$method <- "Compute missprob given N and freq"
      retval$freq <- freq
      retval$N <- N
      retval$missprob <- find.alpha(N=N,freq=freq,tol=tol) 
    }
  else if(missing(N) && !missing(missprob) )
    {
      retval$method <- "Determine minimal N given missprob and freq"
      retval$freq <- freq
      val <- binsearch( function(N) find.alpha(N=N, freq=freq, tol=tol),
                       range=c(1, maxN), target=missprob, showiter=showiter,
                       maxiter=maxiter )
      if(length(val$where)==2)
        {
          retval$N <- val$where[2]
          retval$missprob <- val$value[2]
        }
      else
        {
          retval$N <- val$where[1]
          retval$missprob <- val$value[1] 
       }
    }
  else
    stop("Exactly two of N, freq, and missprob must be specified")

  return(retval)
}

# $Id: hapmcmc.R,v 1.1 2003/07/30 20:39:57 warnesgr Exp $
#
# Code contributed by David Duffy <davidD@qumr.edu.au>:
#
# "If you are interested, this is a toy/prototype for haplotyping via MCMC.  
#  It is much slower than Dan Schaid's haplo.em, but does give the same
#  answers ;)"

#
# Routines for handling genotypes
#
# Convert "1/2" to 1,2
#
geno.as.array <- function(genotypes,renumber=FALSE,miss=NULL,gtp.sep="/") {
  mknum<-function(genotypes, renumber=FALSE, gtp.sep="/") {
    alleles<- strsplit(genotypes, gtp.sep)
    gtp<-cbind(sapply(alleles, function(x) x[1], simplify=TRUE),
               sapply(alleles, function(x) x[2], simplify=TRUE))
    if (renumber) {
      alleles<-unique(unlist(alleles))
      gtp[,1]<-as.numeric(factor(gtp[,1],levels=alleles))
      gtp[,2]<-as.numeric(factor(gtp[,2],levels=alleles))
    }
    if (is.null(miss)) {
      gtp[is.na(genotypes),]<-NA
    }else{
      gtp[is.na(genotypes),]<-miss
    } 
    gtp
  } 
  if (is.null(ncol(genotypes)) || ncol(genotypes)==1) {
    res<-mknum(genotypes, renumber=renumber)
  }else{
    res<-data.frame(mknum(genotypes[,1], renumber=renumber))
    for(i in 2:ncol(genotypes)) {
      res<-cbind(res,mknum(genotypes[,i], renumber=renumber))
    } 
    colnames(res)<-c(t(outer(names(genotypes),1:2,paste,sep="."))) 
  } 
  apply(res,2,as.character)
} 
#
hap <- function(genotypes) {
  res<-geno.as.array(genotypes)
  nc<-ncol(res)
  hap1<-res[,seq(1,nc,2)]
  hap2<-res[,seq(2,nc,2)]
  loci<-colnames(genotypes)
  colnames(hap2)<-colnames(hap1)<-loci
  list(hap1=hap1, hap2=hap2, class="haplotype")
}
hapshuffle <- function(haplotypes, hfreq=NULL, ambiguous=NULL, verbose=FALSE) {
  if (is.null(hfreq)) hfreq<-hapfreq(haplotypes, set=set)  
  if (is.null(ambiguous)) ambiguous<-hapambig(haplotypes)
  nloci<-ncol(haplotypes$hap1)
  nobs<-nrow(haplotypes$hap1)
  for(ind in ambiguous) {
    prop<-curr<-list(hap1=haplotypes$hap1[ind,], hap2=haplotypes$hap2[ind,])
    swap<-sample(c(TRUE,FALSE),nloci,replace=TRUE)
    if (any(swap)) {
      tmp<-prop$hap1[swap]
      prop$hap1[swap]<-prop$hap2[swap]
      prop$hap2[swap]<-tmp
    }
    o1<-paste(curr$hap1,collapse=":")  
    o2<-paste(curr$hap2,collapse=":")  
    n1<-paste(prop$hap1,collapse=":")  
    n2<-paste(prop$hap2,collapse=":")  
    pos.o1<-match(o1,names(hfreq))
    pos.o2<-match(o2,names(hfreq))
    pos.n1<-match(n1,names(hfreq))
    pos.n2<-match(n2,names(hfreq))
    pn<-(hfreq[pos.n1]+0.5)*(hfreq[pos.n2]+0.5)
    po<-(hfreq[pos.o1]+0.5)*(hfreq[pos.o2]+0.5)
    qa<-pn/po
    if (verbose) cat("Person ",ind," ",qa," ",o1,"/",o2," -> ",n1,"/",n2,sep="")
    if (qa>runif(1)) {
      if (verbose) cat(" Accepted\n")
      haplotypes$hap1[ind,]<-prop$hap1
      haplotypes$hap2[ind,]<-prop$hap2
      hfreq[pos.n1]<-hfreq[pos.n1]+1
      hfreq[pos.n2]<-hfreq[pos.n2]+1
      hfreq[pos.o1]<-hfreq[pos.o1]-1
      hfreq[pos.o2]<-hfreq[pos.o2]-1
    }else if (verbose) {
      cat(" Unchanged\n")
    }
  }
  list(hfreq=hfreq, haplotypes=haplotypes, class="hapshuffle")
}

hapambig <- function(haplotypes) {
  which(apply(haplotypes$hap1!=haplotypes$hap2,1,sum)>1)
}

hapenum <- function(haplotypes) {
  dat<-rbind(haplotypes$hap1, haplotypes$hap2)
  dat<-dat[complete.cases(dat),]
  set<-unique(dat[,1])
  for(i in 2:ncol(dat)) set<-outer(set,unique(dat[,i]),paste,sep=":") 
  factor(set)
}

hapfreq <- function(haplotypes, set=NULL) {
  if (is.null(set)) set<-hapenum(haplotypes)
  hap1<-apply(haplotypes$hap1[complete.cases(haplotypes$hap1),],1,paste,collapse=":")
  hap2<-apply(haplotypes$hap2[complete.cases(haplotypes$hap2),],1,paste,collapse=":")
  dat<-c(hap1,hap2)
  table(factor(dat,levels=set))
}

hapmcmc <- function(gtp, B=1000) {
  tot<-2*nrow(gtp)
  hap.dat<-hap(gtp)
  hap.set<-hapenum(hap.dat)
  hap.amb<-hapambig(hap.dat)
  hap.new<-list(hfreq=hapfreq(hap.dat, set=hap.set), haplotypes=hap.dat)
  res<-matrix(nr=B, nc=length(hap.set))
  colnames(res)<-as.character(hap.set)
  rownames(res)<-1:B
  for(i in 1:B) {
    hap.new<-hapshuffle(hap.new$haplotypes,hfreq=hap.new$hfreq,ambiguous=hap.amb)
    res[i,]<-hap.new$hfreq
  }
  apply(res,2,mean)/tot
}

mourant <- function(n) {
  tab<-matrix(c(91,32,5,147,78,17,85,75,7), nr=3)
  rownames(tab)<-c("M/M","M/N","N/N")
  colnames(tab)<-c("S/S","S/s","s/s")
  dat<-as.data.frame.table(tab)
  p<-dat$Freq/sum(dat$Freq)
  dat[sample(1:nrow(dat),n,replace=TRUE,prob=p),1:2]
}
# $Id: library.R,v 1.3 2003/05/30 15:52:19 warnesgr Exp $

## FIXME:  Remove this file when R 1.7.1 is released

library.pos <-
function(package, help, pos = 2, lib.loc = NULL, character.only = FALSE,
         logical.return = FALSE, warn.conflicts = TRUE,
         keep.source = getOption("keep.source.pkgs"),
         verbose = getOption("verbose"), version)
{
    testRversion <- function(fields)
    {
        current <- paste(R.version[c("major", "minor")], collapse = ".")
        ## depends on R version?
        if(!package.dependencies(fields, check = TRUE)) {
            dep <- package.dependencies(fields)[[1]]
            o <- match("R", dep[, 1])
            stop(paste("This is R ", current, ", package ",
                       fields[1, "Package"],
                       " needs ", dep[o, 2], " ", dep[o, 3], sep=""),
                 call. = FALSE)
        }
        ## which version was this package built under?
        if(!is.na(built <- fields[1, "Built"])) {
            builtFields <- strsplit(built, ";")[[1]]
            builtunder <- substring(builtFields[1], 3)
            if(nchar(builtunder) &&
               compareVersion(current, builtunder) < 0) {
                warning(paste("package", fields[1, "Package"],
                              "was built under R version", builtunder),
                        call. = FALSE)
            }
            if(.Platform$OS.type == "unix") {
                platform <- builtFields[2]
                if(length(grep("\\w", platform))) {
                    ## allow for small mismatches, e.g. OS version number.
                    m <- agrep(platform, R.version$platform)
                    if(!length(m))
                        stop(paste("package", fields[1, "Package"],
                                   "was built for", platform),
                             call. = FALSE)
		}
            }
        }
        else
            stop(paste("This package has not been installed properly\n",
                       "See the Note in ?library"))
    }

    checkNoGenerics <- function(env)
    {
        if (exists(".noGenerics", envir = env, inherits = FALSE))
            TRUE
        else {
            ## A package will have created a generic
            ## only if it has created a formal method.
            length(objects(env, pattern="^\\.__M", all=TRUE)) == 0
        }
    }

    checkConflicts <- function(package, pkgname, pkgpath, nogenerics)
    {
        dont.mind <- c("last.dump", "last.warning", ".Last.value",
                       ".Random.seed", ".First.lib", ".Last.lib",
                       ".packageName", ".noGenerics")
        sp <- search()
        lib.pos <- match(pkgname, sp)
        ## ignore generics not defined for the package
        ob <- objects(lib.pos, all = TRUE)
        if(!nogenerics && "package:methods" %in% sp) {
            gen <- getGenerics(lib.pos)
            gen <- gen[gen@package != ".GlobalEnv"]
            ob <- ob[!(ob %in% gen)]
        }
        fst <- TRUE
        ipos <- seq(along = sp)[-c(lib.pos, match("Autoloads", sp))]
        for (i in ipos) {
            obj.same <- match(objects(i, all = TRUE), ob, nomatch = 0)
            if (any(obj.same > 0)) {
                same <- ob[obj.same]
                same <- same[!(same %in% dont.mind)]
                Classobjs <- grep("^\\.__", same)
                if(length(Classobjs)) same <- same[-Classobjs]
                if(length(same)) {
                    if (fst) {
                        fst <- FALSE
                        cat("\nAttaching package ", sQuote(package),
                            ":\n\n", sep = "")
                    }
                    cat("\n\tThe following object(s) are masked",
                        if (i < lib.pos) "_by_" else "from", sp[i],
                        ":\n\n\t", same, "\n\n")
                }
            }
        }
    }

    libraryPkgName <- function(pkgName, sep = "_")
	unlist(strsplit(pkgName, sep))[1]

    libraryPkgVersion <- function(pkgName, sep = "_")
    {
        splitName <- unlist(strsplit(pkgName, sep))
	if (length(splitName) > 1) splitName[2] else NULL
    }

    libraryMaxVersPos <- function(vers)
    {
	## Takes in a character vector of version numbers
        ## returns the position of the maximum version utilizing
        ## compareVersion.  Can't do as.numeric due to the "-" in versions.
	max <- vers[1]

        for (ver in vers) if (compareVersion(max, ver) < 0) max <- ver
	out <- match(max, vers)
	out
    }

    sQuote <- function(s) paste("'", s, "'", sep = "")

    if (is.null(lib.loc)) lib.loc <- .libPaths()

    if(!missing(package)) {
	if(!character.only)
	    package <- as.character(substitute(package))

	if (!missing(version)) {
	     package <- manglePackageName(package, version)
        }
	else {
	   ## Need to find the proper package to install
	   pkgDirs <- list.files(lib.loc,
                                 pattern = paste("^", package, sep=""))
           ## See if any directories in lib.loc match the pattern of
           ## 'package', if none do, just continue as it will get caught
           ## below.  Otherwise, if there is actually a 'package', use
           ## that, and if not, then use the highest versioned dir.
	   if (length(pkgDirs) > 0) {
	       if (!(package %in% pkgDirs)) {
		   ## Need to find the highest version available
		   vers <- unlist(lapply(pkgDirs, libraryPkgVersion))
		   pos <- libraryMaxVersPos(vers)
		   if (length(pos) > 0)
			   package <- pkgDirs[pos]
               }
           }
        }

        if(length(package) != 1)
            stop("argument `package' must be of length 1")
	pkgname <- paste("package", package, sep = ":")
	newpackage <- is.na(match(pkgname, search()))
	if(newpackage) {
            ## Check for the methods package before attaching this
            ## package.
            ## Only if it is _already_ here do we do cacheMetaData.
            ## The methods package caches all other libs when it is
            ## attached.
            ## Note for detail: this does _not_ test whether dispatch is
            ## currently on, but rather whether the package is attached
            ## (cf .isMethodsDispatchOn).
            hadMethods <- "package:methods" %in% search()

            pkgpath <- .find.package(package, lib.loc, quiet = TRUE,
                                     verbose = verbose)
            if(length(pkgpath) == 0) {
               txt <- paste("There is no package called",
			     sQuote(libraryPkgName(package)))
		vers <- libraryPkgVersion(package)
		if (!is.null(vers))
		   txt <- paste(txt, ", version ", vers, sep="")
                if(logical.return) {
                    warning(txt)
		    return(FALSE)
		} else stop(txt)
            }
            which.lib.loc <- dirname(pkgpath)
            descfile <- system.file("DESCRIPTION", package = package,
                                    lib.loc = which.lib.loc)
            if(!nchar(descfile))
            	stop("This is not a valid package -- no DESCRIPTION exists")

            descfields <- read.dcf(descfile, fields =
                           c("Package", "Depends", "Built"))
            testRversion(descfields)

            ## Check for inconsistent naming
            if(descfields[1, "Package"] != libraryPkgName(package)) {
            	warning(paste("Package", sQuote(package), "not found.\n",
			"Using case-insensitive match",
            		sQuote(descfields[1, "Package"]), ".\n",
			"Future versions of R will require exact matches."),
			call.=FALSE)
            	package <- descfields[1, "Package"]
            	pkgname <- paste("package", package, sep = ":")
            	newpackage <- is.na(match(pkgname, search()))
	    }
            if(newpackage) {
		## If the name space mechanism is available and the package
		## has a name space, then the name space loading mechanism
		## takes over.
		if (packageHasNamespace(package, which.lib.loc)) {
		    tt <- try({
			ns <- loadNamespace(package, c(which.lib.loc, lib.loc))
			env <- attachNamespace(ns, pos = pos)
		    })
		    if (inherits(tt, "try-error"))
			if (logical.return)
			    return(FALSE)
			else stop("package/namespace load failed")
		    else {
			on.exit(do.call("detach", list(name = pkgname)))
			nogenerics <- checkNoGenerics(env)
			if(warn.conflicts &&
			   !exists(".conflicts.OK", envir = env, inherits = FALSE))
			   checkConflicts(package, pkgname, pkgpath, nogenerics)
			on.exit()
			if (logical.return)
			    return(TRUE)
			else
			    return(invisible(.packages()))
		    }
		}
		codeFile <- file.path(which.lib.loc, package, "R",
				      package)
		## create environment (not attached yet)
		loadenv <- new.env(hash = TRUE, parent = .GlobalEnv)
		## source file into loadenv
		if(file.exists(codeFile))
		    sys.source(codeFile, loadenv, keep.source = keep.source)
		else if(verbose)
		    warning(paste("Package ", sQuote(package),
				  "contains no R code"))
		## now transfer contents of loadenv to an attached frame
		env <- attach(NULL, pos = pos, name = pkgname)
		## detach does not allow character vector args
		on.exit(do.call("detach", list(name = pkgname)))
		attr(env, "path") <- file.path(which.lib.loc, package)
		## the actual copy has to be done by C code to avoid forcing
		## promises that might have been created using delay().
		.Internal(lib.fixup(loadenv, env))
		## save the package name in the environment
		assign(".packageName", package, envir = env)

		## run .First.lib
		if(exists(".First.lib", envir = env, inherits = FALSE)) {
		    firstlib <- get(".First.lib", envir = env, inherits = FALSE)
		    tt<- try(firstlib(which.lib.loc, package))
		    if(inherits(tt, "try-error"))
			if (logical.return) return(FALSE)
			else stop(".First.lib failed")
		}
		if(!is.null(firstlib <- getOption(".First.lib")[[package]])){
		    tt<- try(firstlib(which.lib.loc, package))
		    if(inherits(tt, "try-error"))
			if (logical.return) return(FALSE)
			else stop(".First.lib failed")
		}
		nogenerics <- checkNoGenerics(env)
		if(warn.conflicts &&
		   !exists(".conflicts.OK", envir = env, inherits = FALSE))
		    checkConflicts(package, pkgname, pkgpath, nogenerics)

		if(!nogenerics && hadMethods &&
		   !identical(pkgname, "package:methods")) cacheMetaData(env, TRUE)
		on.exit()
	    }
	}
	if (verbose && !newpackage)
            warning(paste("Package", sQuote(package),
                          "already present in search()"))
    }
    else if(!missing(help)) {
	if(!character.only)
	    help <- as.character(substitute(help))
        pkgName <- help[1]              # only give help on one package
        pkgPath <- .find.package(pkgName, lib.loc, verbose = verbose)
        docFiles <- file.path(pkgPath, c("DESCRIPTION", "INDEX"))
        ## This is a bit ugly, but in the future we might also have
        ## DESCRIPTION or INDEX files as serialized R objects ...
        if(file.exists(vignetteIndexRDS <-
                       file.path(pkgPath, "Meta", "vignette.rds")))
            docFiles <- c(docFiles, vignetteIndexRDS)
        else
            docFiles <- c(docFiles,
                          file.path(pkgPath, "doc", "00Index.dcf"))
        pkgInfo <- vector(length = 4, mode = "list")
        pkgInfo[[1]] <- paste("\n\t\tInformation on Package",
                              sQuote(pkgName))
        readDocFile <- function(f) {
            if(basename(f) %in% c("DESCRIPTION", "00Index.dcf")) {
                ## This should be in valid DCF format ...
                txt <- try(read.dcf(f))
                if(inherits(txt, "try-error")) {
                    warning(paste("file",
                                  sQuote(f),
                                  "is not in valid DCF format"))
                    return(NULL)
                }
                ## Return a list so that the print method knows to
                ## format as a description list (if non-empty).
                txt <- if(all(dim(txt) >= 1))
                    list(colnames(txt), as.character(txt[1, ]))
                else
                    NULL
            }
            else if(basename(f) %in% c("vignette.rds")) {
                txt <- .readRDS(f)
                ## New-style vignette indexes are data frames with more
                ## info than just the base name of the PDF file and the
                ## title.  For such an index, we give the names of the
                ## vignettes, their titles, and indicate whether PDFs
                ## are available.
                ## The index might have zero rows.
                txt <- if(is.data.frame(txt) && nrow(txt))
                    cbind(basename(gsub("\\.[[:alpha:]]+$", "",
                                        txt$File)),
                          paste(txt$Title,
                                paste(rep.int("(source", NROW(txt)),
                                      ifelse(txt$PDF != "",
                                             ", pdf",
                                             ""),
                                      ")", sep = "")))
                else NULL
            }
            else
                txt <- readLines(f)
            txt
        }
        for(i in which(file.exists(docFiles)))
            pkgInfo[[i+1]] <- readDocFile(docFiles[i])
        y <- list(name = pkgName, path = pkgPath, info = pkgInfo)
        class(y) <- "packageInfo"
        return(y)
    }
    else {
	## library():
        if(is.null(lib.loc))
            lib.loc <- .libPaths()
        db <- matrix(character(0), nr = 0, nc = 3)
        nopkgs <- character(0)

        for(lib in lib.loc) {
            a <- .packages(all.available = TRUE, lib.loc = lib)
            for(i in sort(a)) {
                title <- package.description(i, lib.loc = lib, field="Title")
                if(is.na(title)) title <- ""
                db <- rbind(db, cbind(i, lib, title))
            }
            if(length(a) == 0)
                nopkgs <- c(nopkgs, lib)
        }
        colnames(db) <- c("Package", "LibPath", "Title")
        if((length(nopkgs) > 0) && !missing(lib.loc)) {
            if(length(nopkgs) > 1)
                warning(paste("libraries",
                              paste(sQuote(nopkgs), collapse = ", "),
                              "contain no packages"))
            else
                warning(paste("library",
                              paste(sQuote(nopkgs)),
                              "contains no package"))
        }

        y <- list(header = NULL, results = db, footer = NULL)
        class(y) <- "libraryIQR"
        return(y)
    }

    if (logical.return)
	TRUE
    else invisible(.packages())
  }

# $Id: locus.R,v 1.7 2003/05/29 00:36:10 warnesgr Exp $

getlocus  <- function(x,...)
{
  if(is.locus(x))
    return(x)
  else if(!is.null(x$locus))
        return(x$locus)
  else if(!is.null(attr(x,"locus")))
       return(attr(x,"locus"))
  else
    NULL
}

getmarker <- getgene <- getlocus

locus  <- function(name, chromosome, arm=c("p","q","long","short",NA),
                   index.start=NULL, index.end=NULL)
  {
    
    object  <-  list()

    if(!missing(name))
      object$name  <- name
    
    if(!missing(chromosome))
      object$chromosome <- chromosome
    
    if(!missing(arm))
      {
        arm  <- match.arg( arm )
        object$arm  <- switch( arm, p="p", q="q", long="p", short="q")
      }
    if(!missing(index.start))
      object$index.start  <- index.start
    if(!missing(index.end))
      object$index.end  <- index.end
    
    class(object)  <- "locus"
    return(object)
  }


gene  <-  function(name, chromosome, arm=c("p","q","long","short"),
                   index.start, index.end=NULL)
{
  object  <- locus(name, chromosome, arm, index.start, index.end)
  class(object)  <- c("gene","locus")
  object
}


marker <- function(name, type,
                   locus.name, bp.start, bp.end=NULL, relative.to=NULL,
                   ...
                   )
{
  if(is.locus(locus.name))
      object <- locus.name
  else
    object  <-  locus(locus.name, ...)

  if(!missing(name))
    object$marker.name  <- name

  if(!missing(type))
    object$type  <- type

  if(!missing(bp.start))
    object$bp.start  <- bp.start

  if(!missing(bp.end))
    object$bp.end  <- bp.end

  if(!missing(relative.to))
    object$relative.to  <- relative.to
  
  class(object)  <- c("marker","locus")
  object
}

is.locus  <- function(x)
    inherits(x, "locus")

is.gene  <- function(x)
    inherits(x, "gene")

is.marker  <- function(x)
    inherits(x, "marker")



as.character.locus  <- function(x,...)
  {
    loc <- paste( x$chromosome, x$arm, x$index.start, sep="" )
    if( !is.null(x$index.end ) && x$index.start != x$index.end )
      loc  <- paste(loc, "-", x$index.end, sep="")
    loc
  }

as.character.gene  <- function(x,...)
  as.character.locus(x,...)

as.character.marker  <- function(x,...)
  {
    loc  <- as.character.locus(x)
    loc  <- paste(loc, ":", x$bp.start, sep="")
    if(!is.null(x$bp.end)) loc  <-  paste(loc, "-", x$bp.end, sep="")
    loc
  }

print.locus  <-  function(x,...)
  {
    cat("Locus: ", x$name, " (", as.character.locus(x), ")\n", sep="" )
  }

print.gene  <-  function(x,...)
  {
    cat("Gene: ", x$name, " (", as.character.locus(x), ")\n", sep="" )
  }

print.marker  <- function(x,...)
  {
    cat("Marker: ", paste(x$name,":",x$marker.name,sep=""),
        " (", as.character.marker(x), ")\tType: ",x$type,"\n", sep="" )
  }


"locus<-" <- function(x,value)
  {
    attr(x,"locus") <- value
    x
  }


"marker<-" <- "gene<-" <-  get("locus<-")
# $Id: makeGenotypes.R,v 1.5 2003/05/22 17:25:23 warnesgr Exp $

#
# convert all genotype-compatible variables in a dataframe to genotypes
#
makeGenotypes <- function( data, convert, sep="/", tol=0.5, ...,
                           method=as.genotype )
                          
  {
    data <- as.data.frame(data)
    
    if(missing(convert))
      {
        fun <- function(x) length(unlist(grep("/", as.character(x) )))
        convert <- sapply( data,  fun )/nrow(data) > tol
      }

    #cat("Convert:");print(convert);

    if(is.list(convert))
      {
        if( !all(sapply(convert,length)==2) )
          stop("When convert is a list, each element must be a 2-vector.")

        namelist <- names(data)
        
        for(pair in convert)
           {
             if(!is.character(pair))
               pair <- namelist[pair]
             # replace first column in pair with new data,
             index <- which(colnames(data)==pair[1])
             data[[ index ]] <- method(data[[ pair[1] ]],
                                       data[[ pair[2] ]], sep=sep,
                                                ... )
             colnames(data)[index] <- paste(pair,collapse=sep)
             data[[ pair[1] ]] <- data[[ pair[2] ]] <- NULL
           }
      }
    else
      {
        if(is.character(convert))
          namelist <- convert
        else
          namelist <- colnames(data)[convert]

        for(col in namelist)
          data[[col]] <- method(data[[col]], sep=sep, ... )
      }

    data
}

makeHaplotypes <- function( data, convert, sep="/", tol=0.9, ... )
  {
    makeGenotypes( data=data, convert=convert, sep=sep, tol=tol,
                  method=as.haplotype, ... )    
  }

# $Id: plot.LD.R,v 1.5 2003/06/04 21:22:57 warnesgr Exp $

plot.LD.data.frame <- function(x,
                               digits=3,

                               colorcut=c(0,0.01, 0.025, 0.5, 0.1, 1),
                               colors=heat.colors(length(colorcut)),
                               textcol="black",

                               marker,
                               which="D'",
                               distance,
                               ...)
  {
    oldpar <- par("mfrow")
    
    par(mfrow=c(1,2))

    LDtable(x, digits=digits, colorcut=colorcut, colors=colors,
            textcol=textcol, ...)
    LDplot(x, marker=marker, which=which, distance=distance, ...)
    
    par(mfrow=oldpar)
    invisible()
  }


LDtable <- function(x, 
                    colorcut=c(0,0.01, 0.025, 0.5, 0.1, 1),
                    colors=heat.colors(length(colorcut)),
                    textcol="black",
                    digits=3,
                    show.all=FALSE,
                    which=c("D", "D'", "r", "X^2", "P-value", "n"),
                    colorize="P-value",
                    cex,
                    ...)
  {
    if(! colorize %in% names(x))
      stop(colorize, " not an element of ", deparse(substitute(x)) )

    datatab <- summary(x)
    
    missmatch <- which[!(which %in% names(x))]
    if(length(missmatch)>0)
      stop(missmatch, " not an element of ", deparse(substitute(x)) )

    matform <- function( value, template )
      {
        dim(value) <- dim(template)
        dimnames(value) <- dimnames(template)
        value
      }
    
    tmp <- cut(x[[colorize]], colorcut, include.lowest=TRUE)
    colormat <- matform(as.numeric(tmp), x[[colorize]] )
    n <- matform( paste("(",x$n,")",sep="" ), x$n)

    if(!show.all)
      { # remove blank row/column
        colormat <- colormat[-nrow(colormat),-1, drop=FALSE]
        n <- n[-nrow(n),-1, drop=FALSE]
      }

    #
    # color coded frame boxes
    #
    image(x=1:ncol(colormat), y=1:ncol(colormat),
          z=t(colormat[nrow(colormat):1,]),
          col=colors, xlab="Marker 2\n\n", ylab="Marker 1",
          xaxt="n", yaxt="n",...)
    
    abline(v=-0.5 + 1:(ncol(colormat)+1))
    abline(h=-0.5 + 1:(nrow(colormat)+1))
    
    axis(3, 1:ncol(colormat), colnames(colormat) )
    axis(2, 1:nrow(colormat), rev(rownames(colormat)) )
    
    #
    # text in boxes
    #
    cex.old <- par("cex")

    if(missing(cex))
      cex <-min( c(1/10, 1/(length(which)+1 ) ) /
                 c(strwidth("W"), strheight("W")*1.5))
    
    par(cex=cex)

    lineheight <- strheight("W")*1.5
    center <- lineheight * length(which)/2
    
    for(i in 1:length(which))
      {
        displaymat <- x[[which[i]]]

        if(!show.all)
          displaymat <- displaymat[-nrow(displaymat),-1, drop=FALSE]
        
        if( which[i]=="P-value" )
          displaymat <- format.pval(displaymat, digits=digits)
        else if (which[i]!="n")
          displaymat <- format(displaymat, digits=digits)

        displaymat[] <- gsub("NA.*", "", as.character(displaymat))
        
        text(x=col(colormat),
             y=nrow(colormat) - row(colormat)+ 1 + center - lineheight*(i-1),
             displaymat,
             col=textcol,
             adj=c(0.5, 1)
             )
      }

    text(x=1, y=1, paste(which, collapse="\n"), adj=c(0.5,0.5) )

    par(cex=cex.old)
    
    #
    # title
    #
    title(main="Linkage Disequilibrium\n")

    invisible(colormat)
  }



LDplot <- function(x, 
                   digits=3,
                   marker,
                   distance,
                   which=c("D", "D'", "r", "X^2", "P-value", "n", " "),
                   ...)
{
  which = match.arg(which)
  
  if(missing(marker))
    marker <- colnames(x[[which]])
  else if (is.numeric(marker))
    marker <- colnames(x[[which]])[marker]
  
  datamat <- ifelse( is.na(x[[which]]), t(x[[which]]), x[[which]])

  if(which %in% c("D'","r") )
    diag(datamat) <- 1.0
  else if (which=="P-value")
    diag(datamat) <- 0.0
  
  dimnames(datamat) <- dimnames(x[[which]])
  
  if(missing(distance)) distance <- 1:ncol(datamat)
  distance <- matrix(distance, ncol=ncol(datamat), nrow=nrow(datamat),
                     byrow=TRUE)
  dimnames(distance) <- dimnames(datamat)
  
  matplot(x=t(distance[marker,,drop=FALSE]),
          t(datamat[marker,,drop=FALSE]),
          type="b", 
          xlab="Marker",
          ylab=paste("Linkage Disequilibrium: ", which, sep=""),
          xaxt="n",
          ... )
  
  axis(1, distance[1,], paste(1:ncol(datamat), colnames(datamat), sep=": " ))
  
  title("Pairwise Disequilibrium Plot")

  invisible()
}
# $Id: print.LD.R,v 1.8 2004/05/25 19:40:02 warnesgr Exp $

print.LD <- function(x, digits=getOption("digits"), ...)
  {
    saveopt <- options("digits")
    options(digits=digits)
    cat("\n")
    cat("Pairwise LD\n")
    cat("-----------\n")

    est <- t(as.matrix( c(D=x$"D","D'"=x$"D'","Corr"=x$"r")))
    rownames(est) <- "Estimates:"
    print(est)
    cat("\n")

    test <- t(as.matrix( c("X^2"=x$"X^2", "P-value"=x$"P-value",
                           "N"=x$"n") ) )
    rownames(test) <- "LD Test:"
    print(test)
    cat("\n")

    options(saveopt)
    invisible(x)
  }


summary.LD.data.frame <- function(object, digits=getOption("digits"),
                                which=c("D", "D'", "r", "X^2",
                                        "P-value", "n", " "),
                                rowsep, show.all=FALSE,
                                ...)
  {

    if(missing(rowsep))
      if(length(which)==1)
        rowsep <- NULL
      else
        rowsep <- " "

    if(is.null(rowsep))
      blank <- NULL
    else
      blank <- matrix(rowsep, ncol=ncol(object$"D"), nrow=nrow(object$"D"))
    


    saveopt <- options("digits")
    options(digits=digits)

    
    pdat <- list()
    for(name in which)
         pdat[[name]] <- object[[name]]
    
    tab <- interleave(
                      "D" = if('D' %in% names(pdat)) pdat$D else NULL,
                      "D'" = pdat$"D'",
                      "Corr." = pdat$"r",
                      "X^2"= pdat$"X^2",
                      "P-value" = pdat$"P-value",
                      "n" = pdat$"n",
                      " "=blank,
                      sep=" "
                      )

    statlist <- which[ ! (which %in% c("P-value", "n", " ") ) ]
    statlist[statlist=="X^2"] <- "X\\^2"

    formatlist <- sapply( statlist, function(object) grep(object, rownames(tab) ) )
    formatlist <- unique(sort(unlist(formatlist)))
    
    pvallist   <- grep( "P-value", rownames(tab) )
    
    tab[formatlist,] <- formatC(as.numeric(tab[formatlist,]), digits=digits,
                                format="f")
    tab[pvallist,] <- apply(object$"P-value", c(1,2),
                            function(object)trim(format.pval(object, digits=digits)))
    
    tab[trim(tab)=="NA"] <- NA

    if(!show.all)
      {
         # drop blank row/column
        entrylen <- nrow(tab)/nrow(object$n)
        tab <- tab[1:(nrow(tab) - entrylen),-1]
      }

    
    options(saveopt)
    class(tab) <- "summary.LD.data.frame"
    tab
  }

print.summary.LD.data.frame <- function(x, digits=getOption("digits"), ...)
{
  cat("\n")
  cat("Pairwise LD\n")
  cat("-----------\n")

  print.matrix(unclass(x), digits=digits, quote=FALSE,
               na.print="    ", right=TRUE) 
        
  cat("\n")

  invisible(x)

  
}


print.LD.data.frame <- function(x, ...)
  print(summary(x))
# $Id: summary.genotype.R,v 1.10 2003/08/04 13:48:40 warnesgr Exp $

###
### Provide the frequency and proportions of alleles and genotypes
###

# used when summary.genotype is called from summary.data.frame:
shortsummary.genotype <- function(object, ..., maxsum)
  {
    tmp <- summary.factor(object, maxsum=maxsum)
    retval <- paste(format(tmp), " (", format(round(prop.table(tmp)*100)), "%)", sep='' )
    names(retval) <- names(tmp)
    #retval <- retval[order(tmp, decreasing=TRUE)]
    retval
  }

# general function
summary.genotype  <-  function(object,...,maxsum)
  {
    # if we are called from within summary.data.frame, fall back to
    # summary.factor so that we don't mess up the display
    if(!missing(maxsum))
      return(shortsummary.genotype(object,...,maxsum=maxsum))
    
    retval  <-  list()
#    retval$genotype  <- object
    retval$allele.names  <- allele.names(object)
    
    retval$locus  <- attr(object,"locus")
    class(retval)  <- "summary.genotype"
    af  <- table(allele(object))
    paf <- prop.table(af)
    retval$allele.freq    <- cbind("Count"=af,"Proportion"=paf)

    gf  <- table( object )
    pgf <- prop.table(gf)
    retval$genotype.freq  <- cbind("Count"=gf,"Proportion"=pgf)


    ### from code submitted by David Duffy <davidD@qimr.edu.au>
    #
    n.typed<-sum(gf)
    correction<-n.typed/max(1,n.typed-1)
    ehet<-(1-sum(paf*paf))
    matings<- (paf %*% t(paf))^2
    uninf.mating.freq <- sum(matings)-sum(diag(matings))
    pic<- ehet - uninf.mating.freq

    retval$Hu <- correction * ehet
    retval$pic <- pic
    retval$n.typed <- n.typed
    retval$n.total <- length(object)
    retval$nallele <- nallele(object)
    #
    ###

    
    if(any(is.na(object))){
        retval$allele.freq    <- rbind(retval$allele.freq,
                                       "NA"=c(sum(is.na(allele(object))),NA))
        retval$genotype.freq  <- rbind(retval$genotype.freq,
                                       "NA"=c(sum(is.na(object)),NA))
    }



    
    return(retval)
  }

print.summary.genotype  <-  function(x,...,round=2)
  { 
    if(!is.null(x$locus))
      {
        cat("\n")
        print( x$locus )
      }

    cat("\n")
    cat("Number persons typed: ", x$n.typed,
      " (", round(100*x$n.typed/x$n.total,1), "%)\n", sep="")

    cat("\n")
    cat("Allele Frequency: (", x$nallele, " alleles)\n", sep="")
    print(round(x$allele.freq,digits=round),...)
    cat("\n")


    cat("\n")
    cat("Genotype Frequency:\n")
    print(round(x$genotype.freq,digits=round),...)
    cat("\n")
    
    cat("Heterozygosity (Hu)  = ",  x$Hu, "\n", sep="")
    cat("Poly. Inf. Content   = ",  x$pic, "\n", sep="")
    cat("\n")
    
    invisible(x)
  }

# $Id: write.pedigree.file.R,v 1.2 2003/05/28 15:01:46 warnesgr Exp $

write.pedigree.file <- function(data,
                                family, pid, father, mother, sex,
                                file="pedigree.txt"
                                )
  {
    # pedigree file format
    # --------------------
    #
    # <family> <pid> <father> <mother> <sex> <genotype_1> ... <genotype_n>
    #
    # <family> is a unique identifier for each family, and within each family
    # <pid> is a unique identifier for an individual.
    # <father> and <mother> identify the individuals father and mother
    #    (if this line refers to a founder, these should be set to
    #     zero).
    #  <sex> denotes the individuals sex, using the convention
    #     1=male, 2=female.
    #
    # Each <genotype> is encoded as two integer allele numbers.

    if(missing(family)) 
      family <- 1:nrow(data)
    if(missing(pid))
      pid <- 1:nrow(data)
    if(missing(father))
      father <- rep(0,nrow(data))
    if(missing(mother))
      mother <- rep(0,nrow(data))
    if(missing(sex))
      sex <- rep(0,nrow(data))

    pedigree <- list()
    pedigree$family <- format(family)
    pedigree$pid    <- format(pid)
    pedigree$father <- format(father)
    pedigree$mother <- format(mother)
    pedigree$sex    <- format(sex)

    which <- sapply(data, is.genotype)
    if(!all(which)) warning("Data contianed non-genotype variables.",
                            " These have been ignored: ",
                            paste(colnames(data)[!which]) )

    data <- data[,which]

    for( col in names(data) )
      {
        name.1 <- paste(col,".1")
        name.2 <- paste(col,".2")


        allele.number <- function(g, ind)
          as.numeric(as.factor(allele(g,ind), levels=allele.names(g, ind) ))
        
        pedigree[[name.1]] <- allele.number( data[[col]], 1)
        pedigree[[name.2]] <- allele.number( data[[col]], 2)
      }
    pedigree <- as.data.frame(pedigree)
    
    # NA's are represented as 0
    pedigree[is.na(pedigree)] <- 0

    write.table(pedigree,   file=file, sep=" ", row.names=FALSE,
                col.names=F, quote=F)
  }

write.marker.file<-function(data, location, file="marker.txt")
{
    # marker.map file format
    # --------------------
    #
    # MARKERID   NAME      LOCATION
    # <col#>     <name>    <location>

    which <- sapply(data, is.genotype)
    if(!all(which)) warning("Data contianed non-genotype variables.",
                            " These have been ignored: ",
                            paste(colnames(data)[!which]) )

    data <- data[,which]
    if(missing(location)) location <- 1:ncol(data)

    ## Create marker.map file data frame
    marker.map <- cbind(
                             formatC(1:ncol(data), width=8),
                             formatC(colnames(data), width=8, flag="-"),
                             formatC(location, width=8)
                             )

    marker.map <- rbind(c("MARKERID", "NAME    ","LOCATION"), marker.map)
    
    write.table(marker.map, file=file, sep="   ", row.names=FALSE,
                col.names=F, quote=F)    
}
write.pop.file <- function(data,
                           file="",
                           digits=2,
                           description="Data from R"
                           )
  {
    which <- sapply(data, is.genotype)
    if(!all(which)) warning("Data contianed non-genotype variables.",
                            " These have been ignored: ",
                            paste(colnames(data)[!which]) )

    data <- data[,which]

    # convert allele names into two or three digit numbers
    for( col in names(data) )
      {
        # first convert to numbers
        a1 <- as.numeric(factor(allele(data[[col]],1)))
        a2 <- as.numeric(factor(allele(data[[col]],2)))

        # convert NA to 0
        a1[is.na(a1)] <- 0
        a2[is.na(a2)] <- 0
        
        # now format to have correct # of digits
        a1 <- formatC( a1, width=digits, flag="0")
        a2 <- formatC( a2, width=digits, flag="0")
        
        # now paste back together
        data[[col]] <- paste(a1,a2,sep="")
      }

    if(file=="")
      f <- stdout()
    else
      f <- file(file,"w")

    # header line
    cat(description, file=f)
    cat("\n", file=f)

    # marker names
    cat(colnames(data),sep=" ", file=f)
    cat("\n", file=f)

    # group token
    cat("POP", file=f)
    cat("\n", file=f)

    # write allele data.  First token is row id, followed by a comma
    # markers are separated by space
    rownames(data) <- paste(rownames(data),",", sep="")
    write.table( data, file=f, sep=" ", quote=FALSE, col.names=F)

    if(file!="")
      close(f)
    
  }

# $Id: zzz.R,v 1.8 2003/05/30 15:52:19 warnesgr Exp $

.First.lib <- function(libname, pkgname)
{
  if( is.null(formals(library)$pos) )
    library <- library.pos
  
  library(combinat,pos=3)
  library(MASS, pos=3)
  library(gregmisc,pos=3)
}
