.packageName <- "gap"
# worked 28/6/03
tbyt<-function(h,n) {
   D<-VarD<-Dmax<-VarDmax<-Dprime<-VarDprime<-x2<-0
   z<-.C("tbyt",h=as.vector(h), haplotypes=as.double(n),
          D=as.double(D), VarD=as.double(VarD),
          Dmax=as.double(Dmax), VarDmax=as.double(VarDmax),
          Dprime=as.double(Dprime), VarDprime=as.double(VarDprime),
          x2=as.double(x2),PACKAGE="gap")

    list(h=h,n=n,D=z$D,VarD=z$VarD,
         Dmax=z$Dmax,VarDmax=z$VarDmax,Dprime=z$Dprime,
         VarDprime=z$VarDprime,x2=z$x2)
}

kbyl<-function(n1,n2,h,n,optrho=2)
{
   x2<-seX2<-rho<-seR<-klinfo<-0
   VarDp<-0
   Dijtable<-Dmaxtable<-Dijptable<-VarDijtable<-VarDijptable<-matrix(rep(0,n1*n2),nrow=n1)
   z<-.C("kbyl",nalleles1=as.integer(n1), nalleles2=as.integer(n2),
          h=as.double(h), haplotypes=as.double(n),
          VarDp=as.double(VarDp),Dijtable=matrix(Dijtable,nrow=n1),
          Dmaxtable=matrix(Dmaxtable,nrow=n1),
          Dijptable=matrix(Dijptable,nrow=n1),
          VarDijtable=matrix(VarDijtable,nrow=n1),
          VarDijptable=matrix(VarDijptable,nrow=n1),
          x2=as.double(x2), seX2=as.double(seX2),
          rho=as.double(rho), seR=as.double(seR), optrho=as.integer(optrho),
          klinfo=as.double(klinfo),PACKAGE="gap")

   list(n1=z$nalleles1, n2=z$nalleles2, h=z$h, n=z$haplotypes,
   VarDp=z$VarDp,Dijtable=z$Dijtable, Dmaxtable=z$Dmaxtable,
   Dijptable=z$Dijptable, VarDijtable=z$VarDijtable, VarDijptable=z$VarDijptable,
   x2=z$x2, seX2=z$seX2,
   rho=z$rho, seR=z$seR, optrho=z$optrho, klinfo=z$klinfo)
}
# This function fits a Bradley-Terry Model to a squared contingency table
# JH Zhao 5/1/2004

bt <- function(x)
{
  n <- dim(x)[1]
  a <- - diag(n)
  a[,1] <- 1
  allele <- a
  for (i in 2:(n-1))
  {
    a <- - diag(n)
    a[,i] <- 1
    allele <- rbind(allele,a)
  }
  count <- rep(0,n*(n-1))
  y <- count + 1
  k <- 1
  for (i in 1:n)
  {
    for (j in 1:n) if(i!=j)
    {
      count[k] <- x[i,j]
      k <- k + 1
    }
  }
  # This generates format as required by ETDT
  # adapted from bt.sas (16/05/1999)
  toETDT <- function(a)
  {
     n <- dim(a)[1]
     nn1 <- n*(n-1)/2;
     C<-tr<-i1<-i2<-rep(0,nn1)
     ij <- matrix(rep(0,n*nn1),nrow=nn1)
     vi <- rep(0,n)
     l <- 1
     for(i in 1:(n-1))
     {
        for(j in (i+1):n)
        {
          C[l] <- a[i,j]+a[j,i]
          tr[l] <- a[i,j]
          i1[l] <- i
          i2[l] <- j
          ij[l,i] <- 1
          ij[l,j] <- -1
          l <- l + 1
        }
        vi[i] <- i
     }
     cbind(i1,i2,C,tr,ij)
  }

  bt.glm<-glm(y~-1+allele,weights=count,family="binomial")
  list(y=y,count=count,allele=allele,bt.glm=bt.glm,etdt.dat=toETDT(x))
}
chow.test <- function(y1,x1,y2,x2,x=NULL)
{
	mlr <- function(xy)
	{
		N <- nrow(xy)
		P <- ncol(xy)
		R <- cor(xy)
		b <- solve(R[-P,-P], R[,P][-P])
		variance <- var(xy)
		vr <- diag(variance)
		vr <- (vr/vr[P])[-P]
		b <- b/sqrt(vr)
		sse <- (variance[P, P]-(var(xy)[,P][-P])%*%b)*(N-1)
		sse
	}
        xy1<-cbind(x1,y1)
        xy2<-cbind(x2,y2)
	sse12 <- mlr(xy1)+mlr(xy2)
# in case of pooled x is known
        if(!is.null(x))
        {
          xy <- cbind(x,c(y1,y2))
          sse <- mlr(xy)
        }
        else 
	sse <- mlr(rbind(xy1, xy2))
        df1 <- ncol(xy1)
	df2 <- nrow(xy1)+nrow(xy2)-2*(df1)
	f <- (sse-sse12)*df2/(df1*sse12)
	p <- pf(f, df1, df2, lower=F)
	z <- c(f, df1, df2, p)
	names(z) <- c("F value", "d.f.1", "d.f.2", "P value")
	z
}
# program to obtain power for family-based and population-based association study
# Jing Hua Zhao 30-12-98
# Risch & Merikangas 1996
# Science 273: 1516-17 13SEP1996
# Science 275: 1327-30 28FEB1997

fbsize <- function (gamma,p,debug=0,error=0)
# Family-based sample sizes
{
  strlen <- function(x) length(unlist(strsplit(as.character(x),split="")))
  sn <- function (all,op)
  # m=0,v=1 under the null hypotheses
  # to be used by fbsize()
  {
    m <- all[1]
    v <- all[2]
    z1beta <- -0.84              # 1-beta=0.8,-.84162123
    if (op==1) zalpha <- 3.72    # alpha=1E-4,3.7190165
    else zalpha <- 5.33          # alpha=5E-8,5.3267239
    s <- ((zalpha-sqrt(v)*z1beta)/m)^2/2 # shared/transmitted for each parent
    if(op==3) s <- s/2                  # the sample size is halved
    s
  }

  q <- 1-p
  k <- (p*gamma+q)^2
  va <- 2*p*q*((gamma-1)*(p*gamma+q))^2
  vd <- (p*q*(gamma-1)^2)^2

  if (debug==1) cat("K=",k, "VA=",va, "VD=",vd,"\n")

  w <- p*q*(gamma-1)^2/(p*gamma+q)^2
  y <- (1+w)/(2+w)
  lambdas <- (1+0.5*w)^2
  lambdao <- 1+w
  h <- h1 <- p*q*(gamma+1)/(p*gamma+q)
  pA <- gamma/(gamma+1)

# ASP
  nl.m <- 0
  nl.v <- 1
  aa.m <- 2*y-1
  if (error==1) aa.v <- 0
  else aa.v <- 4*y*(1-y)
  aa <- c(aa.m,aa.v)

  n1 <- sn(aa,1)

# TDT
  aa.m <- sqrt(h)*(gamma-1)/(gamma+1)
  aa.v <- 1-h*((gamma-1)/(gamma+1))^2
  aa <- c(aa.m,aa.v)

  n2 <- sn(aa,2)

# ASP-TDT
  h <- h2 <- p*q*(gamma+1)^2/(2*(p*gamma+q)^2+p*q*(gamma-1)^2)
  aa.m <- sqrt(h)*(gamma-1)/(gamma+1)
  aa.v <- 1-h*((gamma-1)/(gamma+1))^2
  aa <- c(aa.m,aa.v)

  n3 <- sn(aa,3)

  cat(format(gamma,width=4,nsmall=2),
      format(p,width=5,nsmall=2),
      format(round(y,digits=3),nsmall=3),
      rep("",10-strlen(ceiling(n1))),ceiling(n1),
      format(round(pA,digits=3),nsmall=3),
      format(round(h1,digits=3),nsmall=3),
      rep("",8-strlen(ceiling(n2))),ceiling(n2),
      format(round(h2,digits=3),nsmall=3),
      rep("",8-strlen(ceiling(n3))),ceiling(n3),
      format(round(lambdao,digits=2),nsmall=2),
      format(round(lambdas,digits=2),nsmall=2),"\n")
  list(gamma=gamma,p=p,y=y,n1=n1,pA=pA,h1=h1,n2=n2,h2=h2,n3=n3,
      lambdao=lambdao,lambdas=lambdas)

}

#MSmodel <- function (lambdas, dom=0)
# Ebers G et al (1996) on multiple sclerosis Nat Genet 13:472
# for modest lambdas, two models are the same
# lambdas=sibling risk ratio associated with the locus
#{
#  if (dom!=0) {
#     # model with moderate amount of dominance variance
#     y <- 1-0.5/sqrt(lambdas)
#     z2 <- y*y
#     z1 <- 2*y*(1-y)
#     z0 <- (1-y)^2
#  }
#  else {
#    # additive model, dominance variance eq 0#
#    z2 <- 0.5-0.25/lambdas
#    z1 <- 0.5
#    z0 <- 0.25/lambdas
#  }
#  z <- c(z2,z1,z0)
#  z
#}
genecounting <- function(data,weight=NULL,convll=1,handle.miss=0,eps=0.00001,maxit=50,pl=0.001)
{
  if(is.null(weight)) weight<-rep(1,dim(data)[1])
# precis<-1
# to call dpmach
# tol<-1.2
#  while(tol>1.0)
#  {
#     precis<-precis/2.0;
#     tol<-1.0+precis;
#  }
  precis<-.Machine$double.eps
  gid<-1:(dim(data)[1])
  nloci=dim(data)[2]/2
  loci<-rep(0,nloci)
  for (i in 1:nloci)
  {
      loci[i]=max(data[,c(2*i-1,2*i)])
  }
  data<-as.matrix(data)
  data<-t(data)
  hapall<-1
  for(i in 1:nloci)
  {
    hapall<-hapall*loci[i];
  }
  h0<-h1<-hapid<-rep(0,hapall)
  obscom<-length(weight)
  prob<-rep(0,obscom)
  lnl0<-lnl1<-0
  npusr<-npdat<-rep(0,2)
# 13/11/2003
# change to reduce memory request
# htrtable<-matrix(rep(0,obscom*hapall),nrow=obscom)
  htrtable<-0 
  iter<-0
  converge<-0
  z <- .C("gc",
           Rhandlemissing=as.integer(handle.miss),
           convll=as.integer(convll),
           eps=as.double(eps),
           maxit=as.integer(maxit),
           Rpl=as.double(pl),
           precis=as.double(precis),
           gid=as.integer(gid),
           Rnloci=as.integer(nloci),
           Rloci=as.integer(loci),
           Robscom=as.integer(obscom),
           Rhapall=as.integer(hapall),
           genotype=as.integer(data),
           count=as.integer(weight),
           hapid=as.integer(hapid),
           prob=as.double(prob),
           Rh0=as.double(h0),
           Rh1=as.double(h1),
           lnl0=as.double(lnl0),
           lnl1=as.double(lnl1),
           npusr=as.integer(npusr),
           npdat=as.integer(npdat),
           htrtable=as.double(htrtable),
           iter=as.integer(iter),
           converge=as.integer(converge),PACKAGE="gap"
           )
  x<-0
  hapid<-0
# Dprime<-sum(z$Rh0*abs(z$Rh1-z$Rh0))
# x<-t(matrix(z$htrtable/2,nrow=hapall))
# hapid<-apply(x,2,sum)>0
# x<-x[,hapid]
# hapid<-(1:hapall)[hapid]
  di0<-1-sum((z$Rh0)^2)
  di1<-1-sum((z$Rh1)^2)
  list(h=z$Rh1, h0=z$Rh0, prob=z$prob, l0=z$lnl0, l1=z$lnl1,
       hapid=hapid, npusr=z$npusr, npdat=z$npdat, htrtable=x,
       iter=z$iter,converge=z$converge,di0=di0,di1=di1)
}

gc.em<-function(data, locus.label=NA, converge.eps=0.000001, maxiter=500, handle.miss=0)
{
# to run genecounting
  weight<-rep(1,dim(data)[1])
  nloci<-dim(data)[2]/2
  loci<-rep(0,nloci)
  for (i in 1:nloci)
  {
      loci[i]<-max(data[,c(2*i-1,2*i)])
  }
  if(all(is.na(locus.label))) {
     locus.label<- paste("loc-",1:nloci,sep="")
  }
  data.gc<-genecounting(data,weight,eps=converge.eps,pl=0.001,maxit=maxiter,handle.miss)
  hap.prob<-data.gc$h
  hap.prob.noLD<-data.gc$h0
  lnlike<-data.gc$l1
  lr<-2*(data.gc$l1-data.gc$l0)
  df<-data.gc$npdat-sum(loci)-length(loci)
  niter<-data.gc$iter
  converge<-data.gc$converge
# to further extract information and obtain unique haplotypes
  hapas<-read.table("assign.dat")
  unlink("assign.dat")
  newnames<-c("subj","chr",locus.label,"post","hapid")
  names(hapas)<-newnames
  ncol<-nloci+4
  nrow<-dim(hapas)[1]/2
  indx1<-2*1:nrow-1
  indx2<-2*1:nrow
  indx.subj<-hapas$subj[indx1]
  hapdat<-hapas[,-c(1,2,ncol-1)]
  post<-hapas$post[indx1]
  hapid<-hapas$hapid
  hapdat<-data.frame(hapdat,one=rep(1,nrow*2))
  attach(hapdat)
  tmp<-by(hapdat,one,unique)
  haplotype<-as.matrix(tmp[[1]])
  tmp<-order(haplotype[,nloci+1])
  haplotype<-haplotype[tmp,1:(dim(haplotype)[2]-2)]
  dimnames(haplotype)<-list(1:length(haplotype[,1]),locus.label)
  hap1<-hapid[indx1]
  hap2<-hapid[indx2]
  uhap <- sort(unique(hapid))
  hap.prob<-hap.prob[uhap]
  nreps<-tapply(indx.subj,indx.subj,length)
# 13/11/2003
# haplotype trend regression
# assign.dat already has sequential number to avoid duplicate IDs
  idx.subj<-sort(unique(indx.subj))
  N<-length(idx.subj)
  P<-length(uhap)
  idx.subj<-cbind(1:N,idx.subj)
  idx.uhap<-cbind(1:P,uhap)
  htrtable<-matrix(rep(0,N*P),nrow=N)
  for(l in 1:nrow)
  {
    i<-idx.subj[,1][idx.subj[,2]==indx.subj[l]]
    j1<-idx.uhap[,1][idx.uhap[,2]==hap1[l]]
    htrtable[i,j1]<-htrtable[i,j1]+post[l]
    j2<-idx.uhap[,1][idx.uhap[,2]==hap2[l]]
    htrtable[i,j2]<-htrtable[i,j2]+post[l]
  }
  htrtable<-htrtable/2
  dimnames(htrtable)<-list(NULL,as.character(uhap))

  list(lnlike=lnlike,lr=lr,
       hap.prob=hap.prob,hap.prob.noLD=hap.prob.noLD,indx.subj=indx.subj,
       post=post,hap1code=hap1,hap2code=hap2,haplotype=haplotype,
       nreps=nreps,converge=converge,niter=niter,uhap=uhap,htrtable=htrtable)
}
gcontrol<-function(data,zeta=1000,kappa=4,tau2=1,epsilon=0.01,ngib=500,burn=50,idum=2348)
{
  nkdata<-length(data)
  deltot<-rep(0,nkdata)
  kdata<-t((data))
  x<-a<-rep(0,nkdata)
  z<-.C("gcontrol",kdata=as.double(kdata),nkdata=as.integer(nkdata),
        zeta=as.double(zeta),kappa=as.double(kappa),tau2=as.double(tau2),
        epsilon=as.double(epsilon),ngib=as.integer(ngib),burn=as.integer(burn),
        idumR=as.integer(idum),deltot=as.double(deltot),x=as.double(array(x)),A=as.double(a),PACKAGE="gap")

  nkdata6<-nkdata/6
  list(deltot=z$deltot[1:nkdata6],x2=z$x[1:nkdata6],A=z$A[1:nkdata6])
}
gif <- function(data,gifset)
{
  famsize<-dim(data)[1]
  giflen<-length(gifset)
  gifval<-0
  z<-.C("gif",data=as.integer(t(data)),famsize=as.integer(famsize),
        gifset=as.integer(array(gifset)),giflen=as.integer(giflen),
        gifval=as.double(gifval),PACKAGE="gap")

  list(gifval=z$gifval)
}
hap<-function(id,data,nloci,loci=rep(2,nloci),names=paste("loci",1:nloci,sep=""),
              mb=0,pr=0,po=0.001,to=0.001,th=1,maxit=100,n=0,
              ss=0,rs=0,rp=0,ro=0,rv=0,sd=0,mm=0,mi=0,mc=50,ds=0.1,de=0,q=0)
{
  if (rv & ro) stop("rv and ro flags cannot both be set\n");
# if (mi==0 & (mc | ds | de)) stop("mc, ds, de parameters are only legal if mi is set\n");
  if (rp & mm==0) stop("rp option only relevant with mm # option\n");
  nobs<-dim(data)[1]
  data<-as.matrix(data)
  if(length(id)!=dim(data)[1]) stop("id and data should have the same length")
  l1<-niter<-converge<-0

  z<-.C("hap",nobs=as.integer(nobs),idstr=as.character(id),data=as.character(t(data)),
        nloci=as.integer(nloci),loci=as.integer(loci),names=as.character(names),mb=as.double(mb),
        pr=as.double(pr),po=as.double(po),to=as.double(to),th=as.double(th),
        maxitt=as.double(maxit),n=as.integer(n),sst=as.integer(ss),rst=as.integer(rs),
        rp=as.integer(rp),ro=as.integer(ro),rv=as.integer(rv),sd=as.double(sd),
        mm=as.integer(mm),mi=as.integer(mi),mc=as.integer(mc),ds=as.double(ds),
        de=as.double(de),q=as.integer(q),l1=as.double(l1),niter=as.integer(niter),
        converged=as.integer(converge),PACKAGE="gap")

  list(l1=z$l1,converge=z$converged,niter=z$niter)
}
# 15-10-03 WAH
# 16-10-03 UCL office to add sort=F in merge
hap.em<-function(id,data,locus.label=NA,converge.eps=0.000001,maxiter=500)
{
  data<-as.matrix(data)
  nloci<-dim(data)[2]/2
  loci<-rep(0,nloci)
  for (i in 1:nloci)
  {
     loci[i]<-max(data[,c(2*i-1,2*i)])
  }
  if(all(is.na(locus.label))) {
     locus.label<- paste("loc-",1:nloci,sep="")
  }
##
  z<- hap(id=id,data=data,nloci,loci=loci,ss=1)
#
  tmp1<-read.table("hap.out",header=T)
# unlink("hap.out")
  haplotype<-as.matrix(tmp1[,1:nloci])
  dimnames(haplotype)<-list(1:length(haplotype[,1]),locus.label)
  uhap<-hapid<-1:(dim(tmp1)[1])
  tmp1<-data.frame(tmp1,hapid)
  hap.prob<-tmp1[,nloci+1]
#
  tmp2<-read.table("assign.out",header=T)
# unlink("assign.out")
  nrow<-dim(tmp2)[1]/2
  indx1<-2*1:nrow-1
  indx2<-2*1:nrow
  indx.subj<-tmp2[indx1,1]
  post<-tmp2[indx1,nloci+3]

  tmp<-merge(tmp1[,-(nloci+1)],tmp2[,-c(1,2)],sort=F)
  hap1<-tmp[indx1,(nloci+1)]
  hap2<-tmp[indx2,(nloci+1)]
  nreps<-tapply(indx.subj,indx.subj,length)

  list (lnlike=z$l1,hap.prob=hap.prob,indx.subj=indx.subj,post=post,
        hap1code=hap1,hap2code=hap2,haplotype=haplotype,nreps=nreps,
        converge=z$converge,niter=z$niter,uhap=uhap)
}
hap.score<-function(y, geno, trait.type="gaussian",
                    offset = NA, x.adj = NA, skip.haplo=.005,
                    locus.label=NA, miss.val=0, n.sim=0, method="gc", id=NA, handle.miss=0, n.miss.loci=NA, sexid=NA)
{
  require(haplo.score)
  trait.int <- charmatch(trait.type, c("gaussian", "binomial", "poisson", "ordinal"))
  if(is.na(trait.int)) stop("Invalid trait type")
  if(trait.int == 0)   stop("Ambiguous trait type")
  if(length(y)!=nrow(geno)) stop("Dims of y and geno are not compatible")
  n.loci <- ncol(geno)/2
  if(n.loci != (floor(ncol(geno)/2)) )stop("Odd number of cols of geno")
  if(handle.miss==0)
  {
    miss <- apply(is.na(geno),1,any)
    if(!all(is.na(miss.val))) {
       for(mval in miss.val){
          miss <- miss | apply(geno==mval, 1, any)
       }
    }
  }
  else
  {
    if(is.na(n.miss.loci)) stop("Maximum number of missing loci (n.miss.loci) not specified")
    nmiss <- apply(is.na(geno),1,sum)
    if(!all(is.na(miss.val))) {
       for(mval in miss.val) {
          nmiss <- nmiss + apply(geno==mval, 1, sum)
       }
    }
    if(n.miss.loci<0 | n.miss.loci >= n.loci) stop("Invalid control for number of missing loci")
    miss <- rep(F, length(y))
    for(i in 1:length(y)) if(nmiss[i] > n.miss.loci*2) miss[i] <- T
  }
  adjusted <- T
  if( all(is.na(x.adj)) ) adjusted <- F
  if(adjusted){
    x.adj <- as.matrix(x.adj)
    if(nrow(x.adj)!=length(y)) stop("Dims of y and x.adj are not compatible")
  }
  miss <- miss | is.na(y) 
  if(adjusted) miss <- miss| apply(is.na(x.adj),1,any)
  if(trait.int==3) {
     if(all(is.na(offset))) stop("Missing offset")
     miss <- miss | is.na(offset)
     offset <- offset[!miss]
  }
  y <- as.numeric(y[!miss])
  geno <- geno[!miss,]
  if(adjusted) x.adj <- x.adj[!miss,,drop=F]
  if(trait.int==2) {
    if(!all(y==1|y==0)) stop("Invalid y values")
    if(all(y==1) | all(y==0)) stop("No variation in y values")
  }
  if(trait.int==4){
     y <- factor(y)
     y.lev <- levels(y)
     y <- as.numeric(y)
     if(max(y) < 3) stop("Less than 3 levels for y values")
  }
  n.subj <- length(y)
  if(all(is.na(id))) id <- 1:n.subj
  method.id<-charmatch(method, c("gc", "hap"))
  if(is.na(method.id)) stop("Invalid selection of method")
  if(method.id == 0)   stop("Ambiguous method")
  else if(method.id==1) haplo <- gc.em(data=geno, locus.label, converge.eps=0.00001, maxiter=5000, handle.miss=handle.miss)
  else haplo <- hap.em(id, data=geno, locus.label, converge.eps=0.00001, maxiter=5000) 
  if(!haplo$converge) stop("EM for haplo failed to converge")
  hap1 <- haplo$hap1code
  hap2 <- haplo$hap2code
  indx <- haplo$indx.subj
  post <- haplo$post
  nreps <- as.vector(haplo$nreps)
  uhap<-haplo$uhap
  which.haplo<-haplo$hap.prob>=skip.haplo
  uhap<-uhap[which.haplo]
  x <- outer(hap1,uhap,"==") + outer(hap2,uhap,"==")
  n.x <- ncol(x)
  x.post<-matrix(rep(NA, n.subj * n.x), ncol=n.x)
  for(j in 1:n.x){
     x.post[,j] <- tapply(x[,j]*post, indx, sum)
  }
  if(trait.int <= 3){ 
    if(!adjusted){
       mu <- switch(trait.int, mean(y), mean(y), sum(y)/sum(offset) )
       a  <- switch(trait.int, var(y), 1, 1)
       x.adj <- matrix(rep(1,n.subj),ncol=1)
     }
     if(adjusted){
        reg.out <- glm(y ~ x.adj, family=trait.type)
        x.adj <- cbind(rep(1,n.subj),x.adj)
        mu <- reg.out$fitted.values
        a  <- switch(trait.int,sum(reg.out$residuals^2)/reg.out$df.residual,1, 1)
      }
     v <- switch(trait.int, 1/a, mu*(1-mu), mu )
     tmp <- haplo.score.glm(y, mu, a, v, x.adj, nreps, x.post, post, x)
     u.score <- tmp$u.score
     v.score <- tmp$v.score
   }
   if(trait.int ==4) {
      if(adjusted){
         library("Design")
         library("Hmisc")
         reg.out <- lrm(y ~ x.adj)
         K <- max(y)
         n.xadj <- ncol(x.adj)
         alpha <- reg.out$coef[1:(K-1)]
         beta <- reg.out$coeff[K:(K-1 + n.xadj)]

         tmp <- haplo.score.podds(y, alpha, beta, x.adj, nreps, x.post,
                              post, x)
       }
      if(!adjusted){
         tbl <- table(y)
         s <- 1- (cumsum(tbl)-tbl)/n.subj
         alpha <-  - log((1-s[-1])/s[-1])
         tmp <- haplo.score.podds(y, alpha, beta=NA, x.adj=NA, nreps, x.post, post, x)
       }
      u.score <- tmp$u.score
      v.score <- tmp$v.score
    }
   tmp<-Ginv(v.score)
   df <- tmp$rank
   g.inv <- tmp$Ginv
   score.global <- u.score%*% g.inv %*%u.score
   score.haplo <- u.score / sqrt(diag(v.score))
   score.max <-  max(score.haplo^2)
   if(n.sim==0){
      score.global.p.sim <- NA
      score.haplo.p.sim <- rep(NA,length(score.haplo))
      score.max.p.sim <- NA
      n.val.global <- NA
      n.val.haplo <- NA
   }
   if(n.sim > 0){
      score.global.rej <- 0
      score.haplo.rej  <- rep(0,length(score.haplo))
      score.max.rej    <- 0
      n.val.global <- 0
      n.val.haplo <- 0
      if(trait.int<=3){
         mu.rand <- mu
         v.rand <- v  
       }
      for(i in 1:n.sim){
         rand.ord <- order(runif(n.subj))
         if(trait.int <=3){ 
           if(adjusted){
              mu.rand <- mu[rand.ord]
              v.rand <- switch(trait.int, v, v[rand.ord], v[rand.ord])
            }
           tmp <- haplo.score.glm(y[rand.ord], mu.rand, a, v.rand, 
                                x.adj[rand.ord,], nreps, x.post, post, x)
         }
         if(trait.int ==4){
            if(adjusted){             
               tmp <- haplo.score.podds(y[rand.ord], alpha, beta, 
                               x.adj[rand.ord,,drop=F],nreps, x.post, post, x)
            }
            if(!adjusted) {
               tmp <- haplo.score.podds(y[rand.ord], alpha, beta=NA, 
                               x.adj=NA,nreps, x.post, post, x)
             }

          }
         u.score <- tmp$u.score
         v.score <- tmp$v.score
         tmp <- Ginv(v.score)
         g.inv <- tmp$Ginv  
         score.global.sim <- u.score %*% g.inv %*% u.score
         score.haplo.sim  <- (u.score / sqrt(diag(v.score)))^2
         score.max.sim <- max(score.haplo.sim)
         if(!is.na(score.global.sim)) {
            n.val.global <- n.val.global +1
            if(score.global.sim >= score.global) score.global.rej <- score.global.rej +1
          }
         if(!any(is.na(score.haplo.sim))){
            n.val.haplo <- n.val.haplo + 1
            score.haplo.rej <- score.haplo.rej +
                               ifelse(score.haplo.sim >= score.haplo^2, 1, 0)
            if(score.max.sim >= score.max) score.max.rej <- score.max.rej +1
          }
      }
      score.global.p.sim <- score.global.rej /  n.val.global
      score.haplo.p.sim <- score.haplo.rej / n.val.haplo
      score.max.p.sim <- score.max.rej / n.val.haplo
    }
   score.global.p <- 1 - pchisq(score.global,df)
   score.haplo.p <- 1-pchisq(score.haplo^2,1)
   if(all(is.na(locus.label))) {
      locus.label<- paste("loc-",1:n.loci,sep="")
    }
   obj <- (list(score.global=score.global, df=df,score.global.p=score.global.p,
       score.global.p.sim=score.global.p.sim,
       score.haplo=score.haplo,score.haplo.p=score.haplo.p,
       score.haplo.p.sim=score.haplo.p.sim,
       score.max.p.sim=score.max.p.sim,
       haplotype=haplo$haplotype[which.haplo,],
       hap.prob=haplo$hap.prob[which.haplo],
       locus.label=locus.label,
       n.sim=n.sim, n.val.global=n.val.global, n.val.haplo=n.val.haplo))
   class(obj) <- "haplo.score"
   return(obj)
}
 
# 13-9-2003 start to implement
# 14-9-2003 in shape
# 21-9-2003 start extensive checking
# 23-9-2003 rewrite interface to genecounting
# 26-9-2003 done with successful use of byand order
# 17-10-2003 start to implement missing genotype code
  

htr <- function(y, x, n.sim=0)
{
   require(MASS)
   mlr <- function(y,x)
   {
      N <- length(y)
      L <- dim(x)[2]
      l <- L - 1
      x1 <- cbind(rep(1,N),x[,1:l])
      b1 <- ginv(t(x1) %*% x1) %*% t(x1) %*% y
      y2 <- sum(y)^2
      redss <- y2 / N
      dtssc <- t(y) %*% y - redss
      dssm <- t(b1) %*% t(x1) %*% y
      reg.ss <- dssm - redss
      reg.df <- L - 1
      err.df <- (N - 1) - reg.df
      err.ss <- dtssc - reg.ss
      reg.ms <- reg.ss / reg.df
      err.ms <- err.ss / err.df
      fstat <- reg.ms / err.ms
      pv.fstat <- 1- pf(fstat, reg.df, err.df)
      fv <- rep(1,L)
      pi <- rep(1,L)
      for (i in 1:L)
      {
          x2 <- cbind(rep(1,N),x[,i])
          b2 <- ginv(t(x2) %*% x2) %*% t(x2) %*% y
          dssm <- t(b2) %*% t(x2) %*% y
          reg.ss <- dssm - redss
          reg.df <- dim(x2)[2] - 1
          err.df <- (N - 1) - reg.df
          err.ss <- dtssc - reg.ss
          reg.ms <- reg.ss / reg.df
          err.ms <- err.ss / err.df
          b.fstat <- reg.ms / err.ms
          b.pv <- 1 - pf(b.fstat, reg.df, err.df)
          fv[i] <- b.fstat
          pi[i] <- b.pv
      }
      list(f=fstat,p=pv.fstat,fv=fv,pi=pi)
   }

   N <- length(y)
   L <- dim(x)[2]
   z0 <- mlr(y,x)
   if (n.sim==0) list(f=z0$f,p=z0$p,fv=z0$fv,pi=z0$pi)
   else
   {
      p <- 0
      pi <- rep(0,L)
      for (i in 1:n.sim)
      {
          rand.ord <- order(runif(N))
          y <- y[rand.ord]
          z <- htr(y,x)
          if (z$f >= z0$f)  p <- p + 1
          for (j in 1:L) if (z$fv[j] >= z0$fv[j]) pi[j] <- pi[j] + 1
      }
      p <- p / n.sim
      for (j in 1:L) pi[j] <- pi[j] / n.sim
      list(f=z0$f,p=p,fv=z0$fv,pi=pi)
   }
}
# 08-2-2004 Working but miss.value needs to be added later
# 09-2-2004 Add using genotype counts
# 11-2-2004 Works ok in all three modes
# 17-2-2004 Add is.miss function

hwe <- function(data, is.count=FALSE, is.genotype=FALSE, yates.correct=FALSE, miss.val=0)
{
  g2a.c <- function (s)
  {
      d <- 1 + 8 * (s - 1)
      u <- 1 + ((1 + (sqrt(d) - 1) - 1) / 2)
      u <- ceiling(u)
      l = s - u * (u - 1) / 2
      list (l=l,u=u)
  }
  g2a <- function (x)
  {
      i <- 1 + floor((sqrt(8 * x + 1) - 1)/2)
      j <- x - i * (i - 1)/2
      i <- ifelse(j == 0, i - 1, i)
      j <- ifelse(j == 0, i, j)
      list(l=j,u=i)
  }
  is.miss <- function(data,is.genotype,miss.val=0)
  {
     if (is.genotype)
     {
        id <- array(FALSE, length(data))
        for (i in 1:length(miss.val))
            id <- id | data==miss.val[i]
     }
     else
     {
        id <- array(FALSE, length(data[,1]))  
        for (i in 1:length(miss.val))
            id <- id | apply(data==miss.val[i],1,any)
     }
     return (id)  
  }
  if (!is.count)
  {
     if (!is.genotype)
     {
        data <- data[!is.miss(data,is.genotype),]
        n.obs <- length(data[,1])
        genotype <- array(0,n.obs)
        n.allele <- max(data)
        a1 <- data[,1]
        a2 <- data[,2]
        for (i in 1:n.obs)
        {
            l <- min(a1[i],a2[i])
            u <- max(a1[i],a2[i])
            genotype[i] <- l + u * (u - 1) / 2
            geno.table <- table(genotype)
        }
     }
     else
     {
        data <- data[!is.miss(data,is.genotype)]
        geno.table <- table(data)
        n.obs <- length(data)
        a1 <- a2 <- array(0,n.obs)
        genotype <- data
        s <- max(data)
        z <- g2a(s)
        n.allele <- z$u
        for (i in 1:n.obs)
        {
            z <- g2a(data[i])
            a1[i] <- z$l
            a2[i] <- z$u
        }
     }
     n.obs2 <- 2 * n.obs
     allele.freq <- table(c(a1,a2)) / n.obs2
     n.geno <- length(geno.table)
     geno.name <- as.numeric(names(geno.table))

     x2 <- lrt <- 0
     n.genotype <- n.allele * (n.allele + 1) / 2
     all.genotype <- array(0,n.genotype)

     for (i in 1:n.geno)
     {
         j <- geno.name[i]
         all.genotype[j] <- geno.table[i]
     }
     to.warn <- FALSE
     for (i in 1:n.genotype)
     {
         o <- all.genotype[i]
         z <- g2a(i)
         a1 <- z$l
         a2 <- z$u
         e <- ifelse(a1==a2,1,2) * allele.freq[a1] * allele.freq[a2] * n.obs
         if (e < 0.5) to.warn <- TRUE
         if (yates.correct)
            x2 <- x2 + (abs (o - e) - 0.5)^2 / e
         else
            x2 <- x2 + (o - e)^2 / e
         if (o>0) lrt <- lrt + o * log(o / e)
     }
  }
  else
  {
      n.genotype <- length(data)
      n.obs <- sum(data)
      e <- array(0,n.genotype)
      n.allele <- (sqrt(1 + 8 * n.genotype) - 1) / 2
      allele.freq <- array(0,n.allele)
      to.warn <- FALSE
      k <- 0
      for (i in 1:n.allele)
      {
          for (j in 1:i)
          {
              k <- k + 1
              allele.freq[i] <- allele.freq[i] + data[k]
              allele.freq[j] <- allele.freq[j] + data[k]
              if(e[k] < 0.5) to.warn <- TRUE
          }
      }
      for (i in 1:n.allele) allele.freq[i] <- allele.freq[i] / n.obs / 2
      k <- 0
      for (i in 1:n.allele)
          for (j in 1:i)
          {
              k <- k + 1
              e[k] <- ifelse(i==j,1,2) * allele.freq[i] * allele.freq[j] * n.obs
          }
      x2 <- lrt <- 0
      for (i in 1:n.genotype)
      {
          o <- data[i]
          x2 <- x2 + (o - e[i])^2 / e[i]
          if (yates.correct) x2 <- x2 + (abs(o - e[i]) - 0.5)^2 / e[i]
          if (o > 0) lrt <- lrt + o * log(o / e[i])
      }
  }
  if (to.warn) cat("there is at least one cell with expected value < 0.5\n")
  df <- n.genotype - n.allele
  rho <- x2 / n.obs
  cat("Pearson x2=",x2,"df=",df,"p=",1-pchisq(x2,df),sep="\t")
  cat("\n")
  list (allele.freq=allele.freq,x2=x2, p.x2=1-pchisq(x2,df),
        lrt=lrt, p.lrt=1-pchisq(x2,df), df=df, rho=rho)
}
hwe.hardy<-function(a,alleles=3,seed=3000,sample=c(1000,1000,5000))
{
  if (alleles<3) stop("number of alleles should be at least 3")
  p<-1.0
  se<-0.0
  swp<-rep(0,3)
  z<-.C("hwe_hardy",a=as.integer(a), alleles=as.integer(alleles), 
        seed=as.integer(seed), gss=as.integer(sample),
        p=as.double(p), se=as.double(se), swp=as.double(swp),PACKAGE="gap")

  list(p=z$p, se=z$se, swp=z$swp)
}
kin.morgan<-function(ped)
{
   pedsize<-dim(ped)[1]
   kin<-rep(0,pedsize*(pedsize+1)/2)
   z<-.C("kin_morgan",data=as.integer(t(ped)),
          pedsize=as.integer(pedsize),kin=as.double(array(kin)),PACKAGE="gap")

    list(kin=z$kin)
}
# 18-10-03 start to implement
# 19-10-03 worked but could not figure out loops
# 20-10-03 polished and tested with Abbas Parsian's homozygosity mapping pedigrees, add documentation
#
makeped<-function(pifile="pedfile.pre",pofile="pedfile.ped",auto.select=1,
                  with.loop=0,loop.file=NA,auto.proband=1,proband.file=NA)
{
  z<-.C("makeped",pifile=as.character(pifile),pofile=as.character(pofile),autoselect=as.integer(auto.select),
        withloop=as.integer(with.loop),loopfile=as.character(loop.file),
        autoproband=as.integer(auto.proband),probandfile=as.character(proband.file),PACKAGE="gap")
}
mia<-function(hapfile="hap.out",assfile="assign.out",miafile="mia.out",so=0,ns=0,mi=0,allsnps=0,sas=0)
{
  z<-.C("mia",hapfile=as.character(hapfile),assfile=as.character(assfile),
        miafile=as.character(miafile),so=as.integer(so),ns=as.integer(ns),mi=as.integer(mi),
        allsnps=as.integer(allsnps),sas=as.integer(sas),PACKAGE="gap")
}
mtdt <- function(x,n.sim=0)
{
#
# lower triangular matrix
#
  tril <- function(t)
  {
    m <- dim(t)[1]
    n <- dim(t)[2]
    if(m!=n) stop("in fact, rows and columns have to be equal")
    a <- matrix(rep(0,m*n),nrow=m)
    for(i in 1:m)
    {
      for(j in 1:i)
      {
        a[i,j] <- t[i,j]
      }
    }
    a
  }
#
# upper triangular matrix
#
  triu <- function(t)
  {
    m <- dim(t)[1]
    n <- dim(t)[2]
    if(m!=n) stop("in fact, rows and columns have to be equal")
    a <- matrix(rep(0,m*n),nrow=m)
    for(i in 1:m)
    {
      for(j in i:n)
      {
        a[i,j] <- t[i,j]
      }
    }
    a
  }
#
# Spielman & Ewens' statistic (diagnoal elements kept)
# SAS 20/07/1999
#
  T <- x
  T <- T-diag(diag(T))
  m <- dim(T)[1]
#
# Find non-zero off-diagonal elements of table.
#
  b <- matrix(rep(0,m*m),nrow=m)
  i <- j <- matrix(rep(0,m*(m-1)/2),ncol=1)
  for (ii in 1:m)
  {
     for (jj in 1:(ii-1)) b[ii,jj]=T[ii,jj]+T[jj,ii]
  }
  l <- 0
  for (ii in 1:(m-1))
  {
     for (jj in (ii + 1):m)
        if (b[jj,ii]>0)
        {
           l <- l + 1
           i[l] <- jj
           j[l] <- ii
        }
  }
#
# Count number of different heterozygotes.
#
  NH <- l
#
# Count number of each heterozygote.
#
  t0 <- T
  tc <- apply(t0,2,sum)
  tr <- apply(t0,1,sum)
  c0 <- (m-1)/m
  se0 <- c0*sum((tc-tr)^2/(tc+tr))
  if (n.sim>0)
  {
    C <- rep(0,NH)
    for (k in 1:NH) C[k] <- T[i[k],j[k]] + T[j[k],i[k]]
    X <- rep(0,n.sim)
    for (k in 1:n.sim)
    {
       for (L in 1:NH)
       {
          T[i[L],j[L]] <- rbinom(1,C[L],0.5) # sum(runif(C(L))<0.5)
          T[j[L],i[L]] <- C[L] - T[i[L],j[L]]
       }
       tc <- apply(T,2,sum)
       tr <- apply(T,1,sum)
       X[k] <- c0*sum((tc-tr)^2/(tc+tr))
    }
    MCp <- 0
    for (k in 1:n.sim) if (X[k]>=se0) MCp <- MCp + 1
    pSE <- MCp/n.sim
    sSE <- sqrt(pSE*(1-pSE)/n.sim)
    cat('Spielman-Ewens Chi-square and empirical p (se): ', se0, pSE, sSE, "\n")
  } else cat('Spielman-Ewens Chi-square: ', se0, "\n")
#
# Simulate tables and compute TDT chi-square statistics.
#
# Should diag(T) is kept, the statistic will be similar to Spielman-Ewens'
# se.check

  T <- x

# This is according to Mike Miller's Matlab program
# Produce inverse (IV) of variance-covariance matrix (V).  This is
# constant across repeated samples in the Monte Carlo simulation.

  V <- diag(apply(T,1,sum)+apply(T,2,sum))-tril(T)-t(triu(T))-t(tril(T))-triu(T)
  IV <- solve(V[1:(m-1),1:(m-1)])

  T0 <- T
  d0=apply(T0[1:(m-1),1:(m-1)],1,sum)-apply(T0[1:(m-1),1:(m-1)],2,sum)
  x0 <- t(d0)%*%IV%*%d0
  se.check <- x0

  T <- T - diag(diag(T))
  V <- diag(apply(T,1,sum)+apply(T,2,sum))-tril(T)-t(triu(T))-t(tril(T))-triu(T)
  IV <- solve(V[1:(m-1),1:(m-1)])

  d0=apply(T0[1:(m-1),1:(m-1)],1,sum)-apply(T0[1:(m-1),1:(m-1)],2,sum)
  st0 <- t(d0)%*%IV%*%d0
  if (n.sim>0)
  {
    C <- rep(0,NH)
    for (k in 1:NH) C[k] <- T[i[k],j[k]] + T[j[k],i[k]]
    X <- rep(0,n.sim)
    for (k in 1:n.sim)
    {
       for (L in 1:NH)
       {
          T[i[L],j[L]] <- rbinom(1,C[L],0.5) # sum(runif(C(L))<0.5)
          T[j[L],i[L]] <- C[L]-T[i[L],j[L]]
       }
       d <- apply(T[1:(m-1),1:(m-1)],1,sum)-apply(T[1:(m-1),1:(m-1)],2,sum)
       X[k] <- t(d)%*%IV%*%d
    }
    MCp <- 0
    for (k in 1:n.sim) if (X[k]>st0) MCp <- MCp + 1
    pST <- MCp/n.sim
    sST <- sqrt(pST*(1-pST)/n.sim)
    cat('Stuart Chi-square and p (se): ', st0, pST, sST,"\n")
  } else {
    cat('Stuart Chi-square ',st0, "\n")
    cat('Value of Chi-square if diagonal elements are kept: ',se.check,"\n")
  }
  if (n.sim>0) list(SE=se0,pSE=pSE,sSE=sSE,ST=st0,pST=pST,sST=sST)
  else list(SE=se0,ST=st0)
}
muvar <- function(n.loci=1,y1=c(0,1,1),y12=c(1,1,1,1,1,0,0,0,0),p1=0.99,p2=0.9)
{
  if(n.loci==1)
    .C("onelocus",y1=as.single(y1),p1=as.single(p1),PACKAGE="gap")
  else
    .C("twolocus",y12=as.single(y12),p1=as.single(p1),p2=as.single(p2),PACKAGE="gap")
}

pbsize <- function (gamma=4.5, p=0.15, kp, x2alpha=29.72, zalpha=5.45, z1beta=-0.84)
# population-based sample size
# alpha=5e-8,beta=0.8, alpha would give 5% genome-wide significance level
# x2alpha = 29.72 (Q=29.7168)
#
# lambda is the NCP from the marginal table
# pi is the pr(Affected|aa)
{
  q <- 1-p
  pi <- kp/(gamma*p+q)^2
  lambda <- pi*p*q*(gamma-1)^2/(1-pi*(gamma*p+q)^2)
  n <- (z1beta-zalpha)^2/lambda
  n
}
pfc <- function(famdata,enum=0)
{
  famsize<-dim(famdata)[1]
  p<-tailp<-sump<-1.0
  stat<-rep(0,20)
  nenum<-0
  z<-.Fortran("family",famdata=as.integer(matrix(famdata,ncol=3)),
               famsize=as.integer(famsize),p=as.double(p),,stat=as.double(stat),toenum=as.integer(enum),
               tailp=as.double(tailp),sump=as.double(sump),nenum=as.double(nenum),PACKAGE="gap")

  if(enum==0) list(p=z$p,stat=z$stat[1:5])
  else list(p=z$p,stat=z$stat[1:5],tailp=z$tailp,sump=z$sump,nenum=z$nenum)
}
# R port of GENECOUNTING/PREPARE
# 29-1-2004 start implementing
# 30-1-2004 in shape
# 31-1-2004 working

pgc <- function (data,handle.miss=1,is.genotype=0,with.id=0)
{
    nobs <- dim(data)[1]
    nloci2 <- dim(data)[2]
    if (is.genotype)
    {
       nloci <- nloci2
       data<-cbind(data,data)
       a1 <- a2 <- gid <- 0
       for (i in 1:nobs)
       {
           row.i <- data[i,]
           for (j in 1:nloci)
           {
               .C("g2a_",s=as.integer(row.i[j]),a1=as.integer(a1),a2=as.integer(a2),gid=as.integer(gid),PACKAGE="gap")
               data[i,2*j-1] <- a1
               data[i,2*j] <- a2
           }
       }
    }
    else nloci <- nloci2/2
    data <- as.matrix(data)
    stack <- rbind(data[,(2*1:nloci)-1],data[,(2*1:nloci)])
    alleles <- apply(stack,2,max)
    idsave <- wt <- array(0,nobs)
    obscom <- nobs
    data <- t(data)
    gret <- matrix(array(0,nobs*nloci2),nrow=nobs)
    z <- .C("pgc",gdata=as.integer(data),handlemiss=as.integer(handle.miss),nobs=as.integer(nobs),nloci=as.integer(nloci),
            alleles=as.integer(alleles), wt=as.integer(wt),gret=as.integer(gret),
            withid=as.integer(with.id),idsave=as.double(idsave),obscom=as.integer(obscom),PACKAGE="gap")
    subset <- 1:(z$obscom)
    gret <- matrix(z$gret,nrow=nloci2)[,subset]
    if (with.id) list(cdata=t(gret),obscom=z$obscom,idsave=z$idsave[subset],wt=z$wt[subset])
    else list(cdata=t(gret),obscom=z$obscom,wt=z$wt[subset])
}
s2k <- function(y1,y2)
{
  if (length(y1)!=length(y2)) stop ("wrong number of elements")
  tablen<-length(y1)
  data<-c(y1,y2)
  x2a<-x2b<-0
  col1<-col2<-1
  p<-0
  z<-.C("x22k",data=as.integer(array(data)), tablen=as.integer(tablen),
        x2a=as.double(x2a), x2b=as.double(x2b), col1=as.integer(col1),
        col2=as.integer(col2), p=as.double(p),PACKAGE="gap")
  c1<-z$col1
  c2<-z$col2
  cat("\nthe maximum accumulated table below and above",c1,"\n")
  a<-sum(y1[1:c1])
  b<-sum(y1[-(1:c1)])
  c<-sum(y2[1:c1])
  d<-sum(y2[-(1:c1)])
  cat(a,b,a+b,"\n",c,d,c+d,"\n",a+c,b+d,a+b+c+d,"\n")
  cat("\nthe 1-to-other table with and without column",c2,"\n")
  a<-y1[c2]
  b<-sum(y1[-c2])
  c<-y2[c2]
  d<-sum(y2[-c2])
  cat(a,b,a+b,"\n",c,d,c+d,"\n",a+c,b+d,a+b+c+d,"\n")
  cat("\n")  

  list(x2a=z$x2a,x2b=z$x2b,col1=z$col1,col2=z$col2,p=z$p)
}
whscore <- function(allele,type)
{
  n<-dim(allele)[1]
  s<-0
  if(type==1)
    z<-.C("score_pairs",data=as.integer(t(allele)),n=as.integer(n),arscore=as.double(s),PACKAGE="gap")
  else
    z<-.C("score_all",data=as.integer(t(allele)),n=as.integer(n),arscore=as.double(s),PACKAGE="gap")

  z$arscore
}
".First.lib" <- function(lib, pkg) library.dynam("gap", pkg, lib)
