.packageName <- "emplik"
BJnoint <- function(x, y, delta, beta0 = NA, maxiter = 30, error = 0.00001)
{
# This is an R function to compute the Buckley-James estimator for
# censored regression WITHOUT intercept term. (but you may still force x 
# to have a column of 1's). It calls another function iter(). This function
# use to call a C function.  Written by mai zhou, mai@ms.uky.edu
# First (C) version Jan. 1994. Last C revision: May 10, 1995
# R version: 2004 Jan 20. R speed actually is OK, no need of C function.
#
# Input:
# x is a matrix of N rows (covariates).
# y is the observed (censored) responses --- a vector of length N.
# delta is a vector of length N. delta =1 means (y) is not censored.
#           delta = 0 means y is right censored, i.e. the true
#        response is larger than y.
#
# Optinal input: maxiter = number of maximum iteration, default is 20.
#                          minimum iteration is internally set to 3.
#                error = when the consecutive iteration changes less then
#                        error, the iteration will stop. default is .00001
# Output:
# the estimate, beta, and an extra integer at the end: number of iterations.
#
# Bug: More careful for ties. Do something better when last obs. is censored.

     x <- as.matrix(x)            # to insure x is a matrix, not a vector
     newtemp <- matrix(NA, ncol = ncol(x), nrow = 3)
     newtemp[1,] <- beta0
     if(is.na(beta0)) newtemp[1, ] <- lm(y ~ 0 + x)$coef  # get initial est.
     for(i in 2:3) {
           newtemp[i, ] <- iter(x, y, delta, newtemp[i - 1, ])
     }
     num <- 2                        # do at least 2 iterations
     while(num <= maxiter && error < sum(abs(newtemp[2, ] - newtemp[3, ])))
          {
          newtemp[2, ] <- newtemp[3, ] 
          newtemp[3, ] <- iter(x, y, delta, newtemp[2, ])
          num <- num + 1
          }
    if(num > maxiter)   # always take average? or only when not converge?
    {newtemp[3, ] <- (newtemp[2, ] + newtemp[3, ])/2} 
    list(beta=newtemp[3, ], iteration=num)
}
DnR <- function(x, d, w, y=rep(-Inf, length(x)) )
{
# inputs should be from  Wdataclean2(), i.e. ordered and weighted.
# y is the truncation times, y do not have to be the same length
# as x, but should be length(y) = sum(w).

allrisk <- rev(cumsum(rev(w)))
posi <- d == 1
uncenx <- x[posi]
uncenw <- w[posi]
uncenR <- allrisk[posi]

if(any(y > -Inf)) { 
  inde <- function(u, v) { as.numeric(u >= v) }
  uuij <- outer(y, uncenx, FUN="inde")
  trunca <- as.vector( rowsum( uuij, group= rep(1, length(y))) ) 
  uncenR <- uncenR - trunca
}

list( times = uncenx, n.risk = uncenR, n.event = uncenw )
}

LTRC <- function(x,d,w=rep(1, length(d)),y=rep(-Inf, length(x)) ) {

#### This works for Left Truncated and Right Censored data.
#### this is actually Lynden-Bell or WJT estimator.
#### Or the Kaplan-Meier/Nelson Aalen est. for right censor only data.

temp <- Wdataclean2(x,d,w)
dd <- temp$dd
ww <- temp$weight
dd[length(dd)] <- 1
xx <- temp$value

######why not use DnR?

temp <- DnR(xx,dd,ww,y=y)

NelAal <- temp$n.event/temp$n.risk
survP <- cumprod( 1 - NelAal )
NelAal <- cumsum(NelAal)
jumps <- -diff( c(1, survP) )

list(times=xx[dd==1], survjump=jumps, surv=survP, CumHaz=NelAal)
}
WKM <- function(x,d,zc=rep(1,length(d)),w=rep(1,length(d))) {

temp <- Wdataclean3(x,d,zc,w)
dd <- temp$dd
ww <- temp$weight
dd[length(dd)] <- 1

######why not use DnR?

allrisk <- rev(cumsum(rev(ww)))
survP <- cumprod( 1 -  (dd*ww)/allrisk )
jumps <- -diff( c(1, survP) )

logel <- sum(ww[dd==1]*log(jumps[dd==1])) + sum(ww[dd==0]*log(survP[dd==0]))

list(times=temp$value, jump=jumps, surv=survP, logel=logel)

}
################################################################
##This function compute a weighted Kaplan-Meier estimator
## x = times, d = censoring status, w = weights 
################################################################
Wdataclean2 <- function (z, d, wt = rep(1,length(z)) ) 
{  niceorder <- order(z,-d)
   sortedz <- z[niceorder]
   sortedd <- d[niceorder]
   sortedw <- wt[niceorder]

   n <- length(sortedd)
   y1 <- sortedz[-1] != sortedz[-n]
   y2 <- sortedd[-1] != sortedd[-n]
   y <- y1 | y2

   ind <- c(which(y | is.na(y)), n)

   csumw <- cumsum(sortedw)

   list( value = sortedz[ind], dd = sortedd[ind],
         weight = diff(c(0, csumw[ind])) )
}
##############################################################
# this function sorts the data, and collaps them
# if there are true tie. and number of tie counted in weight
##############################################################
Wdataclean3 <- function(z, d, zc=rep(1,length(z)), wt = rep(1,length(z)) ) 
{  niceorder <- order(z,-d)
   sortedz <- z[niceorder]
   sortedd <- d[niceorder]
   sortedw <- wt[niceorder]
   sortedzc <- zc[niceorder]

   n <- length(sortedd)
   y1 <- sortedz[-1] != sortedz[-n]
   y2 <- sortedd[-1] != sortedd[-n]
   y3 <- sortedzc[-1] != sortedzc[-n]
   y <- y1 | y2 | y3

   ind <- c(which(y | is.na(y)), n)

   csumw <- cumsum(sortedw)

   list( value = sortedz[ind], dd = sortedd[ind],
         weight = diff(c(0, csumw[ind])) )
}
##############################################################
# this function sorts the data, and collaps them
# if there are true tie. and number of tie counted in weight.
# zc acts as a control of coolaps: even if (z[i],d[i]) = (z[j],d[j])
# but zc[i] != zc[j] then obs. i and j will not collaps into one.
##############################################################
Wdataclean5 <- function(z,d,zc=rep(1,length(z)),wt=rep(1,length(z)),xmat) 
{  
   xmat <- as.matrix(xmat) 
   if(nrow(xmat) != length(z)) stop("nrow(xmat) must = length(z)")

   niceorder <- order(z,-d)
   sortedz <- z[niceorder]
   sortedd <- d[niceorder]
   sortedw <- wt[niceorder]
   sortedzc <- zc[niceorder]
   sortedxmat <- as.matrix(xmat[niceorder,])

   n <- length(sortedd)
   y1 <- sortedz[-1] != sortedz[-n]
   y2 <- sortedd[-1] != sortedd[-n]
   y3 <- sortedzc[-1] != sortedzc[-n]
   y <- y1 | y2 | y3

   ind <- c(which(y | is.na(y)), n)

   csumw <- cumsum(sortedw)

   list( value = sortedz[ind], dd = sortedd[ind],
         weight = diff(c(0, csumw[ind])),
         xxmat = as.matrix(sortedxmat[ind,] ) )
}
##############################################################
# this function sorts the data, and collaps them
# if there are true tie. and number of tie counted in weight.
# zc acts as a control of coolaps: even if (z[i],d[i]) = (z[j],d[j])
# but zc[i] != zc[j], obs. i and j will not collaps into one.
# the matrix is also sorted according to (z -d).
##############################################################
bjtest <- function(y, d, x, beta) {  ## depends on WKM( ), redistF( )
n <- length(y)
x <- as.matrix(x)
xdim <- dim(x)
if ( xdim[1] != n ) stop("check dim of x")
if ( length(beta) != xdim[2] ) stop("check dim of x and beta")

e <- y - as.vector( x %*% beta )
ordere <- order(e, -d)
esort <- e[ordere]
dsort <- d[ordere]
xsort <- as.matrix(x[ordere,])
dsort[length(dsort)] <- 1  #last one as uncensored always

## use KM as F (need to be an n vector prob)
temp0 <- WKM(esort,dsort)
pKM <- temp0$jump
temp <- redistF( y=esort, d=dsort, Fdist=pKM ) 
weight <- temp$weight/n   #the prob weight matrix

A <- matrix(0, ncol=xdim[2], nrow=n)
for (i in 1:n) if (dsort[i] == 1) { 
  A[i, ] <- t(as.matrix(weight[1:i,i])) %*% xsort[1:i, ]
  A[i, ] <- A[i,]/pKM[i] 
}

myfun <- function(t, q) { t*q }

temp2 <- el.cen.EM2(x=esort, d=dsort, fun=myfun, mu=rep(0, xdim[2]), q=A)
pnew <- temp2$prob
logel1 <- temp0$logel
logel2 <- temp2$loglik 

list(prob=pnew, logel=logel1, logel2=logel2, "-2LLR"=2*(logel1-logel2))
}
bjtest1d <- function(y, d, x, beta) {  ## depends on WKM( ), redistF( )
n <- length(y)                         ## dimension of x must be n x 1.
if ( length(x) != n ) stop("check dim of x")
if ( length(beta) != 1 ) stop("check dim of beta")

e <- y - beta * x 
ordere <- order(e, -d)
esort <- e[ordere]
dsort <- d[ordere]
xsort <- x[ordere]
dsort[length(dsort)] <- 1  #last one as uncensored always

## use KM as F (need to be an n vector prob)
temp0 <- WKM(esort,dsort)
pold <- temp0$jump
temp <- redistF( y=esort, d=dsort, Fdist=pold ) 
weight <- temp$weight/n   #the prob weight matrix
####pold <- colSums(weight)  ##not needed, just let pold= WKM()$jump

A <- rep(0, n)
for (i in 1:n) if (dsort[i] == 1) { 
  A[i] <- sum( weight[1:i,i] * xsort[1:i] )/pold[i] 
}

AA <- A[ dsort == 1 ]
myfun <- function(t, q) { t*q }

temp2 <- el.cen.EM(x=esort, d=dsort, fun=myfun, mu=0, q=AA)
pnew <- temp2$prob
logel1 <- temp0$logel
logel2 <- temp2$loglik 

list(prob=pnew, logel=logel1, logel2=logel2, "-2LLR"=2*(logel1-logel2))
}
el.cen.EM <- function(x,d, fun=function(t){t},mu,maxit=25,error=1e-9, ...) {
   xvec <- as.vector(x)
   nn <- length(xvec)
   if(nn <= 1) stop ("Need more observations")
   if(length(d)!=nn) stop("length of x and d must agree")
   if(any((d!=0)&(d!=1)&(d!=2)))
     stop("d must be 0(right-censored) or 1(uncensored) or 2(left-censored)")
   if(!is.numeric(xvec)) stop("x must be numeric")
   if(length(mu)!=1) stop("check the dim of constraint mu")

   temp <- Wdataclean2(xvec,d)
   x <- temp$value
   d <- temp$dd
   w <- temp$weight

   ###### change the last obs. among d=1,0, so that d=1
   ###### change the first obs. among d=1,2 so that d=1
   ###### this ensures we got a proper dist. for NPMLE. (no mass escape)
   INDEX10 <- which(d != 2)
   d[ INDEX10[length(INDEX10)] ] <- 1
   INDEX12 <- which(d != 0)
   d[ INDEX12[1] ] <- 1

   xd1 <- x[d==1]
    if(length(xd1) <= 1) stop("need more distinct uncensored obs.")
   funxd1 <- fun(xd1, ...) 
   xd0 <- x[d==0]
   xd2 <- x[d==2]
   wd1 <- w[d==1]
   wd0 <- w[d==0]
   wd2 <- w[d==2]
   m <- length(xd0)
   mleft <- length(xd2)

##############################################
#### do the computation in 4 different cases.#
##############################################
   if( (m>0) && (mleft>0) ) { 
   pnew <- el.test.wt(funxd1, wt=wd1, mu)$prob
   n <- length(pnew)
   k <- rep(NA, m)
   for(i in 1:m) { k[i] <- 1+n - sum( xd1 > xd0[i] ) }
   kk <- rep(NA, mleft)
   for(j in 1:mleft) { kk[j] <- sum( xd1 < xd2[j] ) }
   num <- 1
   while(num < maxit) {
     wd1new <- wd1
     sur <- rev(cumsum(rev(pnew)))
     cdf <- 1 - c(sur[-1],0)
     for(i in 1:m)
        {wd1new[k[i]:n] <- wd1new[k[i]:n] + wd0[i]*pnew[k[i]:n]/sur[k[i]]}
     for(j in 1:mleft)
        {wd1new[1:kk[j]] <- wd1new[1:kk[j]] + wd2[j]*pnew[1:kk[j]]/cdf[kk[j]]}
     pnew <- el.test.wt(funxd1, wt=wd1new, mu)$prob
     num <- num +1
     }
   logel <- sum(wd1*log(pnew)) + sum(wd0*log(sur[k])) + sum(wd2*log(cdf[kk]))
   logel00 <- NA
   funNPMLE <- NA
  }
  if( (m>0) && (mleft==0) ) {
   temp3 <- WKM(x,d,w)
   logel00 <- temp3$logel
   funNPMLE <- sum( funxd1 * temp3$jump )
#  now the iteration 
   pnew <- el.test.wt(x=funxd1, wt=wd1, mu=mu)$prob
   n <- length(pnew)
   k <- rep(NA,m)
   for(i in 1:m) { k[i] <- 1 + n - sum( xd1 > xd0[i] ) }
   num <- 1
   while(num < maxit) {
     wd1new <- wd1
     sur <- rev(cumsum(rev(pnew)))
     for(i in 1:m)
        {wd1new[k[i]:n] <- wd1new[k[i]:n] + wd0[i]*pnew[k[i]:n]/sur[k[i]]}
     pnew <- el.test.wt(funxd1, wt=wd1new, mu)$prob
     num <- num +1
     }
   sur <- rev(cumsum(rev(pnew)))
   logel <- sum( wd1*log(pnew)) + sum( wd0*log(sur[k]) )
  }
  if( (m==0) && (mleft>0) ) {
   kk <- rep(NA, mleft)
   for(j in 1:mleft) { kk[j] <- sum( xd1 < xd2[j] ) }
   pnew <- el.test.wt(funxd1, wt=wd1, mu)$prob
   n <- length(pnew)
   num <- 1
   while(num < maxit) {
     wd1new <- wd1
     cdf <- cumsum(pnew) 
     for(j in 1:mleft)
        {wd1new[1:kk[j]] <- wd1new[1:kk[j]] + wd2[j]*pnew[1:kk[j]]/cdf[kk[j]]}
     pnew <- el.test.wt(funxd1, wt=wd1new, mu)$prob
     num <- num +1
     }
   logel <- sum( wd1*log(pnew)) + sum( wd2*log( cdf[kk] ) )
   logel00 <- NA   #### do I need a left WKM() ???
   funNPMLE <- NA 
  }
  if( (m==0) && (mleft==0) ) {
    funNPMLE <- sum( funxd1 * wd1/sum(wd1) )
    logel00 <- sum( wd1*log( wd1/sum(wd1) ) )
    pnew <- el.test.wt(funxd1, wt=wd1, mu)$prob
    logel <- sum( wd1*log(pnew) ) 
  }
# get ready for exit
  tval <- 2*(logel00 - logel)
  list(loglik=logel, times=xd1, prob=pnew, funMLE=funNPMLE, 
             "-2LLR"=tval, Pval= 1-pchisq(tval, df=1) )
}
el.cen.EM2 <- function(x,d,xc=1:length(x),fun,mu,maxit=25,error=1e-9,...){
####
#### xc is collaps control: if for index i and j, (x[] d[]) are identical
####     should they be merged into one obs. with weight 2?
####     if xc[i] != xc[j] then they do not merge. 
####     For regression applications, you should not merge -- use
####     xc = 1:length(x) .
####     The order of censoring is based on x, not fun(x).
####
   xvec <- as.vector(x) 
   d <- as.vector(d) 
   mu <- as.vector(mu)
   xc <- as.vector(xc)
   n <- length(d) 

   if (length(xvec)!=n) stop ("length of d and x must agree")
   if (length(xc)!=n) stop ("length of xc and d must agree")
   if(n <= 1) stop ("Need more observations")
   if(any((d!=0)&(d!=1)&(d!=2)))
     stop("d must be 0(right-censored) or 1(uncensored) or 2(left-censored)")
   if(!is.numeric(xvec)) stop("x must be numeric")
   if(!is.numeric(mu)) stop("mu must be numeric")

   funx <- as.matrix(fun(xvec, ...))   ##get the matrix before sorting xvec
   pp <- ncol(funx)
   if(length(mu)!=pp) stop("length of mu and ncol of fun(x) must agree")

   temp <- Wdataclean5(z=xvec,d,zc=xc, xmat=funx)  ## collaps control zc
   x <- temp$value
   d <- temp$dd
   w <- temp$weight
   funx <- temp$xxmat     ## make sure funx and x are in same order

   ###### change the last obs. among d=1,0, so that d=1
   ###### change the first obs. among d=1,2 so that d=1
   ###### this ensures we got a proper CDF for NPMLE. (no mass escapes)
   INDEX10 <- which(d != 2)
   d[ INDEX10[length(INDEX10)] ] <- 1
   INDEX12 <- which(d != 0)
   d[ INDEX12[1] ] <- 1

   xd1 <- x[d==1]
    if(length(xd1) <= 1) stop("need more distinct uncensored obs.")

   funxd1 <- funx[d==1,] 
   xd0 <- x[d==0]
   xd2 <- x[d==2]
   wd1 <- w[d==1]
   wd0 <- w[d==0]
   wd2 <- w[d==2]
   m <- length(xd0)
   mleft <- length(xd2)

##############################################
#### do the computation in 4 different cases.#
##############################################
 if( (m>0) && (mleft>0) ) { 
   pnew <- el.test.wt2(x=funxd1, wt=wd1, mu=mu)$prob
   n <- length(pnew)
   k <- rep(NA, m)
   for(i in 1:m) { k[i] <- 1+n - sum( xd1 > xd0[i] ) }
   kk <- rep(NA, mleft)
   for(j in 1:mleft) { kk[j] <- sum( xd1 < xd2[j] ) }
   num <- 1
   while(num < maxit) {
     wd1new <- wd1
     sur <- rev(cumsum(rev(pnew)))
     cdf <- 1 - c(sur[-1],0)
     for(i in 1:m)
        {wd1new[k[i]:n] <- wd1new[k[i]:n] + wd0[i]*pnew[k[i]:n]/sur[k[i]]}
     for(j in 1:mleft) {
     wd1new[1:kk[j]] <- wd1new[1:kk[j]] + wd2[j]*pnew[1:kk[j]]/cdf[kk[j]]}
     pnew <- el.test.wt2(x=funxd1, wt=wd1new, mu=mu)$prob
     num <- num +1
     }
   logel <- sum(wd1*log(pnew)) + sum(wd0*log(sur[k])) + sum(wd2*log(cdf[kk]))
   logel00 <- NA
   }
  if( (m>0) && (mleft==0) ) {
   pnew <- el.test.wt2(x=funxd1, wt=wd1, mu=mu)$prob
   n <- length(pnew)
   k <- rep(NA,m)
   for(i in 1:m) { k[i] <- 1 + n - sum( xd1 > xd0[i] ) }
   num <- 1
   while(num < maxit) {
     wd1new <- wd1
     sur <- rev(cumsum(rev(pnew)))
     for(i in 1:m)
        {wd1new[k[i]:n] <- wd1new[k[i]:n] + wd0[i]*pnew[k[i]:n]/sur[k[i]]}
     pnew <- el.test.wt2(x=funxd1, wt=wd1new, mu=mu)$prob
     num <- num+1
     }
   sur <- rev(cumsum(rev(pnew)))
   logel <- sum( wd1*log(pnew)) + sum( wd0*log(sur[k]) )
   logel00 <- WKM(x,d, zc=xc, w)$logel
   }
  if( (m==0) && (mleft>0) ) {
   kk <- rep(NA, mleft)
   for(j in 1:mleft) { kk[j] <- sum( xd1 < xd2[j] ) }
   pnew <- el.test.wt2(x=funxd1, wt=wd1, mu=mu)$prob
   n <- length(pnew)
   num <- 1
   while(num < maxit) {
     wd1new <- wd1
     cdf <- cumsum(pnew) 
     for(j in 1:mleft)
       {wd1new[1:kk[j]] <- wd1new[1:kk[j]] + wd2[j]*pnew[1:kk[j]]/cdf[kk[j]]}
     pnew <- el.test.wt2(x=funxd1, wt=wd1new, mu=mu)$prob
     num <- num+1
     }
   logel <- sum( wd1*log(pnew)) + sum( wd2*log( cdf[kk] ) )
   logel00 <- NA   ### ???  do I need a left WKM ??
   }
  if( (m==0) && (mleft==0) ) { 
    num <- 0
    pnew <- el.test.wt2(x=funxd1, wt=wd1, mu)$prob
    logel <- sum( wd1*log(pnew) ) 
    logel00 <- sum( wd1*log( wd1/sum(wd1) ) )
  }
  tval <- 2*(logel00 - logel)
  list(loglik=logel, times=xd1, prob=pnew, iters=num, 
             "-2LLR"=tval, Pval= 1-pchisq(tval, df=length(mu)) )
}
el.cen.test <- function(x,d,fun=function(x){x},mu,error=1e-8,maxit=15)
{
   xvec <- as.vector(x)
   n <- length(xvec)
   if(n <= 2) stop ("Need more observation")
   if(length(d)!=n) stop("length of x and d must agree")
   if(any((d!=0)&(d!=1)))
      stop ("d must be 0(right-censored) or 1(uncensored)")
   if(!is.numeric(xvec)) stop("x must be numeric")
   if(length(mu)!=1) stop("check the dim of constraint mu")

   temp <- Wdataclean2(xvec,d)
   dd <- temp$dd
   dd[length(dd)] <- 1

   if(all(dd==1)) stop("there is no censoring, please use el.test()")

   xx <- temp$value
   n <- length(xx) 
   ww <- temp$weight
   w0 <- WKM(xx, dd, ww)$jump
   uncenw0 <- w0[dd==1]
   funxx <- fun(xx)

   if((mu>max(funxx))|(mu<min(funxx))) stop("check the value of mu/fun")

   xbar <- sum(funxx[dd==1] * uncenw0)

    #********* begin initial calculation******************
    # get vector dvec which is the first derivative vector

     dvec01 <- uncenw0
     rk <- 1:n            ######## rank(sortx) = 1:n  yes!
     cenrk <- rk[dd==0]
     mm <- length(cenrk)
     dvec02 <- rep(0,mm)
     for(j in 1:mm)  dvec02[j] <- sum(w0[cenrk[j]:n])
     dvec00 <- rep(0,n)
     dvec00[dd==1] <- dvec01
     dvec00[dd==0] <- dvec02
     dvec0 <- ww/dvec00

     # get matix Dmat which is Decompition of 2nd derivative matrix.
     # Dmat0 <- diag(1/dvec0)
     Dmat0 <- dvec00/sqrt(ww)

     # get constraint matrix Amat
     mat <- matrix(rep(dd,mm),ncol=mm, nrow=n)
     for(i in 1:mm)
     {
        mat[1:cenrk[i],i] <- 0
        mat[cenrk[i],i] <- -1
     }

    Amat <- as.matrix(cbind(dd, funxx * dd, mat))

     # get constraint vector bvec
     bvec0 <- c(0,as.vector(mu-xbar),rep(0,mm))

     # Use solve3.QP to maximize the loglikelihood function
     value0<-solve3.QP(Dmat0,dvec0,Amat,bvec0,meq=mm+2,factorized=TRUE)

     w <- dvec00 + value0$solution

     if(any(w<=0))
     stop("There is no probability satisfying the constraints")

     #**********end initial calculation **********************
     #**********begin iteration ******************************
     # update vector Dmat, dvec and bvec after initial calculation
     bvec <- rep(0,mm+2)
     diff <- 10
     m <- 0
     while( (diff>error) & (m<maxit) )
      {
         dvec <- ww/w
         # get matix Dmat
         #  Dmat <- diag(w)
         Dmat <- w/sqrt(ww)
         value0 <- solve3.QP(Dmat,dvec,Amat,bvec,meq=mm+2,factorized=TRUE)
         w <- w + value0$solution
         #diff <- sum(value0$solution^2)
         diff <- sum(abs( value0$solution) )
         m <- m+1
       }
     #**********end iteration ******************************

     lik00 <- sum(ww*log(dvec00))

     tval <- 2*(lik00 - sum(ww*log(w)))
   list("-2LLR"=tval, Pval=1-pchisq(tval, df=1),
            weights=w[dd==1], xtime=xx[dd==1], iteration=m, error=diff)
}

el.ltrc.EM <- function(y,x,d,fun=function(t){t},mu,maxit=30,error=1e-9) {
   yvec <- as.vector(y)
   xvec <- as.vector(x)
   nn <- length(xvec)
   if(nn <= 1) stop ("Need more observations")
   if(length(d)!=nn) stop("length of x and d must agree")
   if(any((d!=0)&(d!=1)))
     stop("d must be 0(right-censored) or 1(uncensored)")
   if(!is.numeric(xvec)) stop("x must be numeric")
   if(length(mu)!=1) stop("check the dim of constraint mu")

   yvec <- yvec[yvec > -Inf] 
   N <- length(yvec)
   if ( N == 0 ) {
    temp1 <- el.cen.EM(xvec,d,fun=fun,mu=mu)
    WILKS <- temp1$"-2LLR"
    pnew <- temp1$prob
   }

   temp <- Wdataclean2(xvec,d)
   x <- temp$value
   d <- temp$dd
   w <- temp$weight

   ###### change the last obs.'s censoring indicator, so that d=1
   ###### this ensures we got a proper df for NPMLE. (no mass escape)
   d[length(d)] <- 1

   xd1 <- x[d==1]
   funxd1 <- fun(xd1) 
   n <- length(xd1) 
    if(n <= 1) stop("need more distinct uncensored obs.")
   xd0 <- x[d==0]
   wd1 <- w[d==1]
   wd0 <- w[d==0]
   mright <- length(xd0)

   if ( mright == 0 ) {
      temp2 <- el.trun.test(yvec,xd1,fun=fun,mu=mu)
      WILKS <- temp2$"-2LLR"
      pnew <- temp2$NPMLEmu
   } 
  if( mright>0 ) {
   p0 <- LTRC(x,d,w,yvec)$survjump
   pnew <- p0
   k <- rep(NA,mright)
   for(i in 1:mright) { k[i] <- 1 + n - sum( xd1 > xd0[i] ) }
   indi <- function(u,v){ as.numeric(u > v) }
   uij <- outer(xd1,yvec,FUN="indi")
   num <- 1
   while(num <= maxit) {
     wd1new <- wd1
##########right censor 
     sur <- rev(cumsum(rev(pnew)))
     for(i in 1:mright)
        {wd1new[k[i]:n] <- wd1new[k[i]:n] + wd0[i]*pnew[k[i]:n]/sur[k[i]]}
#########left truncated
     pvec <- as.vector( pnew %*% uij )
     wd1new <- wd1new + as.vector(rowsum(t(pnew*(1-uij))/pvec, group=rep(1,N)))
######### weighted computation
     pnew <- el.test.wt(funxd1, wt=wd1new, mu=mu)$prob
     num <- num +1
     }
   sur <- rev(cumsum(rev(pnew)))
   pvec <- as.vector( pnew %*% uij )
   logel <- sum(wd1*log(pnew))+sum(wd0*log(sur[k]))-sum(log(pvec))
   sur0 <- rev(cumsum(rev(p0)))
   pvec0 <- as.vector( p0 %*% uij )
   logel0 <- sum(wd1*log(p0))+sum(wd0*log(sur0[k]))-sum(log(pvec0))
   WILKS <- 2*(logel0 - logel)
  }
  list(times=xd1, prob=pnew, "-2LLR"= WILKS, Pval=1-pchisq(WILKS,df=1))
}
##########################################################
####### el.test(), from Owen, Modified by Mai Zhou #######
##########################################################

el.test <- function( x, mu, lam, maxit=25, gradtol=1e-7, 
                        svdtol = 1e-9, itertrace=FALSE ){
x <- as.matrix(x)
n <- nrow(x)
p <- ncol(x)
mu <- as.vector(mu)
if( length(mu) !=p )
  stop("mu must have same dimension as observation vectors.")
if( n <= p )
  stop("Need more observations than length(mu) in el.test().")

z <- t( t(x) -mu )

#
#    Scale the problem, by a measure of the size of a 
# typical observation.  Add a tiny quantity to protect
# against dividing by zero in scaling.  Since z*lam is
# dimensionless, lam must be scaled inversely to z.
#
TINY <- sqrt( .Machine$double.xmin )
scale <- mean( abs(z) ) + TINY
z <- z/scale
if( !missing(lam) ){
  lam <- as.vector(lam)
  lam <- lam*scale
  if( logelr(z,rep(0,p),lam)>0 )lam <- rep(0,p)
}
if(  missing(lam)  )
  lam <- rep(0,p)
#
#     Take some precaution against users specifying
# tolerances too small.
#

if(  svdtol < TINY )svdtol <- TINY
if(  gradtol < TINY)gradtol <- TINY

#
#    Preset the weights for combining Newton and gradient
# steps at each of 16 inner iterations, starting with
# the Newton step and progressing towards shorter vectors
# in the gradient direction.  Most commonly only the Newton
# step is actually taken, though occasional step reductions
# do occur.
#

nwts <- c( 3^-c(0:3), rep(0,12) )
gwts <- 2^( -c(0:(length(nwts)-1)))
gwts <- (gwts^2 - nwts^2)^.5
gwts[12:16] <- gwts[12:16] * 10^-c(1:5)

#
#    Iterate, finding the Newton and gradient steps, and
# choosing a step that reduces the objective if possible.
#

nits <- 0
gsize <- gradtol + 1
while(  nits<maxit && gsize > gradtol  ){
  arg  <- 1 + z %*% lam
  wts1 <- as.vector( llogp(arg, 1/n) )
  wts2 <- as.vector( -llogpp(arg, 1/n) )^.5
  grad <- as.matrix( -z*wts1 )
  #############grad <- as.vector( apply( grad, 2, sum ) )
  grad <- as.vector(rowsum(grad, rep(1, nrow(grad)) ) )
  gsize <- mean( abs(grad) )
  hess <- z*wts2
#                                   -1
#    The Newton step is -(hess'hess)    grad,
#  where the matrix hess is a sqrt of the Hessian.
#  Use svd on hess to get a stable solution.
#

## may try La.svd() in R (v. > 1.0) for better LAPACK.
## or use QR decomposition on hess to solve it.

  svdh <- svd( hess )
##  svdh <- La.svd( hess )
  if( min(svdh$d) < max(svdh$d)*svdtol )
    svdh$d <- svdh$d + max(svdh$d)*svdtol
  nstep <- svdh$v %*% (t(svdh$u)/svdh$d)
## nstep <- t(svdh$vt) %*% (t(svdh$u)/svdh$d)
  nstep <- as.vector( nstep %*% matrix(wts1/wts2,n,1) )

  gstep <- -grad
  if(  sum(nstep^2) < sum(gstep^2) )
    gstep <- gstep*(sum(nstep^2)^.5/sum(gstep^2)^.5)
  ologelr <- -sum( llog(arg,1/n) )
  ninner <- 0
  for(  i in 1:length(nwts) ){
    nlogelr <- logelr( z,rep(0,p),lam+nwts[i]*nstep+gwts[i]*gstep )
    if( nlogelr < ologelr ){
      lam <- lam+nwts[i]*nstep+gwts[i]*gstep
      ninner <- i
      break
    }
  }
  nits <- nits+1
  if(  ninner==0  )nits <- maxit
  if( itertrace )
    print( c(lam, nlogelr, gsize, ninner) )
}

list( "-2LLR" = -2*nlogelr, Pval = 1-pchisq(-2*nlogelr, df=p),
     lambda = lam/scale, grad=grad*scale,
 hess=t(hess)%*%hess*scale^2, wts=wts1, nits=nits )
}

logelr <- function( x, mu, lam ){ 
x <- as.matrix(x)
n <- nrow(x)
p <- ncol(x)
if(  n <= p  )
  stop("Need more observations than variables in logelr.")
mu <- as.vector(mu)
if(  length(mu) != p  )
  stop("Length of mean doesn't match number of variables in logelr.")

z <- t( t(x) -mu )
arg <- 1 + z %*% lam
return( - sum( llog(arg,1/n) ) ) 
}

#
#    The function llog() is equal to the natural
#  logarithm on the interval from eps >0 to infinity.
#  Between -infinity and eps, llog() is a quadratic.
#  llogp() and llogpp() are the first two derivatives
#  of llog().  All three functions are continuous
#  across the "knot" at eps.
#
#    A variation with a second knot at a large value
#  M did not appear to work as well.
#
#    The cutoff point, eps, is usually 1/n, where n
#  is the number of observations.  Unless n is extraordinarily
#  large, dividing by eps is not expected to cause numerical
#  difficulty.
#

llog <- function( z, eps ){

ans <- z
lo <- (z<eps)
ans[ lo  ] <- log(eps) - 1.5 + 2*z[lo]/eps - 0.5*(z[lo]/eps)^2
ans[ !lo ] <- log( z[!lo] )
ans
}

llogp <- function( z, eps ){

ans <- z
lo <- (z<eps)
ans[ lo  ] <- 2.0/eps - z[lo]/eps^2
ans[ !lo ] <- 1/z[!lo]
ans
}

llogpp <- function( z, eps ){

ans <- z
lo <- (z<eps)
ans[ lo  ] <- -1.0/eps^2
ans[ !lo ] <- -1.0/z[!lo]^2
ans
}

el.test.wt <- function(x, wt, mu) {
#x <- as.matrix(x)
#if( ncol(x) != 1 ) stop("x must be a vector") 
if( length(mu) != 1 ) stop("mu must be a scalar")

xmu <- x-mu
allw <- sum(wt)
BU <- 0.02*allw/max(abs(xmu))

lamfun <- function(lam,xmu,wt,allw) { sum(wt*xmu/(allw+lam*xmu)) }

if(lamfun(0,xmu,wt,allw) == 0) lam0 <- 0 
else {
 if( lamfun(0,xmu,wt,allw) > 0 ) {lo <- 0
                                up <- BU
                                while(lamfun(up,xmu,wt,allw)>0)
                                     up <- up + BU
                                 }
 else {up <- 0
      lo <- - BU
      while(lamfun(lo,xmu,wt,allw) < 0 )
           lo <- lo - BU
     }
 lam0 <- uniroot(lamfun,lower=lo,upper=up,tol=1e-9,xmu=xmu,wt=wt,allw=allw)$root
}
pi <- wt/(allw + lam0*xmu)
list(x=x, wt=wt, prob=pi)
}
el.test.wt2 <- function(x, wt, mu, maxit=25, gradtol=1e-7, Hessian = FALSE, 
                        svdtol = 1e-9, itertrace=FALSE ){
x <- as.matrix(x)
n <- nrow(x)
p <- ncol(x)
mu <- as.vector(mu)
if( length(mu) !=p )
  stop("mu must have same dimension as observation vectors.")
if( n <= p )
  stop("Need more observations than length(mu) in el.test.wt2().")

z <- t( t(x) -mu )

wt <- as.vector(wt) 
if( length(wt) != n ) stop("length of wt must be n=nrow(x)")
if( any(wt < 0) ) stop("wt must be >= 0")

allw <- sum(wt)

#
#    Scale the problem, by a measure of the size of a 
# typical observation.  Add a tiny quantity to protect
# against dividing by zero in scaling.  Since z*lam is
# dimensionless, lam must be scaled inversely to z.
#
TINY <- sqrt( .Machine$double.xmin )
scale <- mean( abs(z) ) + TINY
z <- z/scale
##### if( !missing(lam) ){
##### lam <- as.vector(lam)
##### lam <- lam*scale
##### if( logelr(z,rep(0,p),lam)>0 )lam <- rep(0,p)
#####  }
##### if(  missing(lam)  )
  lam <- rep(0,p)
#
#     Take some precaution against users specifying
# tolerances too small.
#

if(svdtol < TINY ) svdtol <- TINY
if(gradtol < TINY) gradtol <- TINY

#
#    Preset the weights for combining Newton and gradient
# steps at each of 16 inner iterations, starting with
# the Newton step and progressing towards shorter vectors
# in the gradient direction.  Most commonly only the Newton
# step is actually taken, though occasional step reductions
# do occur.
#

nwts <- c( 3^-c(0:3), rep(0,12) )
gwts <- 2^( -c(0:(length(nwts)-1)))
gwts <- (gwts^2 - nwts^2)^.5
gwts[12:16] <- gwts[12:16] * 10^-c(1:5)

#
#    Iterate, finding the Newton and gradient steps, and
# choosing a step that reduces the objective if possible.
#

nits <- 0
gsize <- gradtol + 1
while(  nits<maxit && gsize > gradtol  ){

  arg  <- allw + z %*% lam
###  wts1 <- as.vector( llogp(arg, 1/n) )
  wts2 <- as.vector( -llogpp(arg, 1/n) )^.5
###  wtwts1 <- wt*wts1 
###  grad <- as.matrix( z*wtwts1 )
  #############grad <- as.vector( apply( grad, 2, sum ) )
###  grad <- as.vector(rowsum(grad, rep(1, nrow(grad)) ) )
  grad <- gradf(z, wt, lam) 
  gsize <- mean( abs(grad) )
  wtwts2 <- sqrt(wt)*wts2
  hess <- z*wtwts2
#                                   -1
#    The Newton step is -(hess'hess)    grad,
#  where the matrix hess is a sqrt of the Hessian.
#  Use svd on hess to get a stable solution.
#

## may try La.svd() in R (v. > 1.0) for better LAPACK
## or better, use QR decomposition on hess to solve it.
## QR is faster than svd! 

##  svdh <- svd( hess )
####  svdh <- La.svd( hess )
##  if( min(svdh$d) < max(svdh$d)*svdtol )
##    svdh$d <- svdh$d + max(svdh$d)*svdtol
##  nstep <- svdh$v %*% (t(svdh$u)/svdh$d)
#### nstep <- t(svdh$vt) %*% (t(svdh$u)/svdh$d)
##  nstep <- as.vector( nstep %*% matrix(wts1/wts2,n,1) )

svdh <- La.svd( hess )
nstep <- t(svdh$vt) %*% (svdh$vt/(svdh$d)^2)
nstep <- as.vector( nstep %*% grad )

  gstep <- grad
  if( sum(nstep^2) < sum(gstep^2) )
    gstep <- gstep*(sum(nstep^2)^.5/sum(gstep^2)^.5)

###  ologelr <- logwelr( z, rep(0,p), wt, lam ) 

  ninner <- 0
  for(  i in 1:length(nwts) ){

###   nlogelr <- logwelr(z,rep(0,p),wt, lam+nwts[i]*nstep+gwts[i]*gstep )
    ngrad <- gradf(z,wt, lam+nwts[i]*nstep+gwts[i]*gstep )
    ngsize <- mean( abs(ngrad) ) 

    if( ngsize  < gsize  ){
      lam <- lam+nwts[i]*nstep+gwts[i]*gstep
      ninner <- i
      break
    }
  }
  nits <- nits+1
  if( ninner==0 )nits <- maxit
  if( itertrace )
    print( c(lam, nlogelr, gsize, ninner) )
}

Hess <- NA 
if( Hessian ) Hess <- t(hess)%*%hess*scale^2

list( prob= as.vector(wt/as.vector(allw + z %*% lam)), 
       lambda = lam/scale, grad=grad*scale, hess=Hess, nits=nits )
}

#
#logwelr <- function(x, mu, wt, lam){ 
#x <- as.matrix(x)
#n <- nrow(x)
#p <- ncol(x)
#if(  n <= p  )
#  stop("Need more observations than variables in logelr.")
#mu <- as.vector(mu)
#if(  length(mu) != p  )
#  stop("Length of mean doesn't match number of variables in logelr.")
#
#allw <- sum(wt) 
#z <- t( t(x) -mu )
#arg <- allw/(allw + z %*% lam)
#return( sum( wt* llog(arg,1/n) ) ) 
#}
#

gradf <- function(z,wt,lam) {
allw <- sum(wt)
arg <- allw + z %*% lam
n <- length(wt) 
wts1 <- as.vector( llogp(arg, 1/n) )
wtwts1<- wt*wts1
grad <- as.matrix(z*wtwts1)
as.vector( rowsum( grad, rep(1, nrow(grad)) ) ) 
}

##########################################################
#    The function llog() is equal to the natural
#  logarithm on the interval from eps >0 to infinity.
#  Between -infinity and eps, llog() is a quadratic.
#  llogp() and llogpp() are the first two derivatives
#  of llog().  All three functions are continuous
#  across the "knot" at eps.
#
#    A variation with a second knot at a large value
#  M did not appear to work as well.
#
#    The cutoff point, eps, is usually 1/n, where n
#  is the number of observations.  Unless n is extraordinarily
#  large, dividing by eps is not expected to cause numerical
#  difficulty.
#
#### These functions have been defined inside el.test(). No need to repeat.
#
#llog <- function( z, eps ){
#
#ans <- z
#lo <- (z<eps)
#ans[ lo  ] <- log(eps) - 1.5 + 2*z[lo]/eps - 0.5*(z[lo]/eps)^2
#ans[ !lo ] <- log( z[!lo] )
#ans
#}
#
#llogp <- function( z, eps ){
#
#ans <- z
#lo <- (z<eps)
#ans[ lo  ] <- 2.0/eps - z[lo]/eps^2
#ans[ !lo ] <- 1/z[!lo]
#ans
#}
#
#llogpp <- function( z, eps ){
#
#ans <- z
#lo <- (z<eps)
#ans[ lo  ] <- -1.0/eps^2
#ans[ !lo ] <- -1.0/z[!lo]^2
#ans
#}
##########################################################
el.test.wt3 <- function(x, wt, mu, maxit=25, gradtol=1e-7, Hessian = FALSE, 
                        svdtol = 1e-9, itertrace=FALSE ){
x <- as.matrix(x)
n <- nrow(x)
p <- ncol(x)
mu <- as.vector(mu)
if( length(mu) !=p )
  stop("mu must have same dimension as observation vectors.")
if( n <= p )
  stop("Need more observations than length(mu) in el.test.wt2().")

z <- t( t(x) -mu )

wt <- as.vector(wt) 
if( length(wt) != n ) stop("length of wt must equal to n=nrow(x)")
if( any(wt < 0) ) stop("wt must be >= 0")

allw <- sum(wt)

#
#    Scale the problem, by a measure of the size of a 
# typical observation.  Add a tiny quantity to protect
# against dividing by zero in scaling.  Since z*lam is
# dimensionless, lam must be scaled inversely to z.
#
TINY <- sqrt( .Machine$double.xmin )
scale <- mean( abs(z) ) + TINY
z <- z/scale
##### if( !missing(lam) ){
##### lam <- as.vector(lam)
##### lam <- lam*scale
##### if( logelr(z,rep(0,p),lam)>0 )lam <- rep(0,p)
#####  }
##### if(  missing(lam)  )
  lam <- rep(0,p)
#
#     Take some precaution against users specifying
# tolerances too small.
#

if(svdtol < TINY ) svdtol <- TINY
if(gradtol < TINY) gradtol <- TINY

#
#    Preset the weights for combining Newton and gradient
# steps at each of 16 inner iterations, starting with
# the Newton step and progressing towards shorter vectors
# in the gradient direction.  Most commonly only the Newton
# step is actually taken, though occasional step reductions
# do occur.
#

nwts <- c( 3^-c(0:3), rep(0,12) )
gwts <- 2^( -c(0:(length(nwts)-1)))
gwts <- (gwts^2 - nwts^2)^.5
gwts[12:16] <- gwts[12:16] * 10^-c(1:5)

#
#    Iterate, finding the Newton and gradient steps, and
# choosing a step that reduces the objective if possible.
#

nits <- 0
gsize <- gradtol + 1
while(  nits<maxit && gsize > gradtol  ){

  arg  <- allw + z %*% lam
###  wts1 <- as.vector( llogp(arg, 1/n) )
  wts2 <- as.vector( -llogpp(arg, 1/n) )^.5
###  wtwts1 <- wt*wts1 
###  grad <- as.matrix( z*wtwts1 )
  #############grad <- as.vector( apply( grad, 2, sum ) )
###  grad <- as.vector(rowsum(grad, rep(1, nrow(grad)) ) )
  grad <- gradf(z, wt, lam) 
  gsize <- mean( abs(grad) )
  wtwts2 <- sqrt(wt)*wts2
  hess <- z*wtwts2
#                                   -1
#    The Newton step is -(hess'hess)    grad,
#  where the matrix hess is a sqrt of the Hessian.
#  Use svd on hess to get a stable solution.
#
## may try La.svd() in R (v. > 1.0) for better LAPACK.

##  svdh <- svd( hess )
####  svdh <- La.svd( hess )
##  if( min(svdh$d) < max(svdh$d)*svdtol )
##    svdh$d <- svdh$d + max(svdh$d)*svdtol
##  nstep <- svdh$v %*% (t(svdh$u)/svdh$d)
#### nstep <- t(svdh$vt) %*% (t(svdh$u)/svdh$d)
##  nstep <- as.vector( nstep %*% matrix(wts1/wts2,n,1) )

svdh <- La.svd( hess )
nstep <- t(svdh$vt) %*% (svdh$vt/(svdh$d)^2)
nstep <- as.vector( nstep %*% grad )

  gstep <- grad
  if( sum(nstep^2) < sum(gstep^2) )
    gstep <- gstep*(sum(nstep^2)^.5/sum(gstep^2)^.5)

  ologelr <- logwelr( z, rep(0,p), wt, lam ) 

  ninner <- 0
  for(  i in 1:length(nwts) ){
   nlogelr <- logwelr(z,rep(0,p),wt, lam+nwts[i]*nstep+gwts[i]*gstep )
###    ngrad <- gradf(z,wt, lam+nwts[i]*nstep+gwts[i]*gstep )
###    ngsize <- mean( abs(ngrad) ) 
    if( nlogelr  < ologelr  ){
      lam <- lam+nwts[i]*nstep+gwts[i]*gstep
      ninner <- i
      break
    }
  }
  nits <- nits+1
  if( ninner==0 )nits <- maxit
  if( itertrace )
    print( c(lam, nlogelr, gsize, ninner) )
}

Hess <- NA 
if( Hessian ) Hess <- t(hess)%*%hess*scale^2

list( prob= as.vector(wt/as.vector(allw + z %*% lam)), 
       lambda = lam/scale, grad=grad*scale, hess=Hess, nits=nits )
}


logwelr <- function(x, mu, wt, lam){ 
x <- as.matrix(x)
n <- nrow(x)
p <- ncol(x)
if(  n <= p  )
  stop("Need more observations than variables in logelr.")
mu <- as.vector(mu)
if(  length(mu) != p  )
  stop("Length of mean doesn't match number of variables in logelr.")

allw <- sum(wt) 
z <- t( t(x) -mu )
arg <- allw/(allw + z %*% lam)
return( sum( wt* llog(arg,1/n) ) ) 
}


gradf <- function(z,wt,lam) {
allw <- sum(wt)
arg <- allw + z %*% lam
n <- length(wt) 
wts1 <- as.vector( llogp(arg, 1/n) )
wtwts1<- wt*wts1
grad <- as.matrix(z*wtwts1)
as.vector( rowsum( grad, rep(1, nrow(grad)) ) ) 
}

##########################################################
#    The function llog() is equal to the natural
#  logarithm on the interval from eps >0 to infinity.
#  Between -infinity and eps, llog() is a quadratic.
#  llogp() and llogpp() are the first two derivatives
#  of llog().  All three functions are continuous
#  across the "knot" at eps.
#
#    A variation with a second knot at a large value
#  M did not appear to work as well.
#
#    The cutoff point, eps, is usually 1/n, where n
#  is the number of observations.  Unless n is extraordinarily
#  large, dividing by eps is not expected to cause numerical
#  difficulty.
#
#### These functions have been defined inside el.test(). 
#### No need to repeat.
#
#llog <- function( z, eps ){
#
#ans <- z
#lo <- (z<eps)
#ans[ lo  ] <- log(eps) - 1.5 + 2*z[lo]/eps - 0.5*(z[lo]/eps)^2
#ans[ !lo ] <- log( z[!lo] )
#ans
#}
#
#llogp <- function( z, eps ){
#
#ans <- z
#lo <- (z<eps)
#ans[ lo  ] <- 2.0/eps - z[lo]/eps^2
#ans[ !lo ] <- 1/z[!lo]
#ans
#}
#
#llogpp <- function( z, eps ){
#
#ans <- z
#lo <- (z<eps)
#ans[ lo  ] <- -1.0/eps^2
#ans[ !lo ] <- -1.0/z[!lo]^2
#ans
#}
##########################################################
el.trun.test <- function(y,x,fun=function(t){t},mu,maxit=20,error=1e-9) {
x <- as.vector(x)
y <- as.vector(y)
temp <- Wdataclean2(x,d=rep(1, length(x))) 
x <- temp$value
wt0 <- temp$weight

indi <- function(u,v){ as.numeric(u > v) }
uij <- outer(x,y,FUN="indi")
m <- length(y)
w0 <- rep(1/length(x), length(x))
xmu <- fun(x) - mu
for(i in 1:maxit) {
     pvec0 <- as.vector( w0 %*% uij )
     nvec <- wt0 + as.vector(rowsum( t(w0*(1-uij))/pvec0, group=rep(1, m)))
     w0 <- nvec/sum(nvec)
}
w <- w0
for(i in 1:maxit) {
       pvec <- as.vector( w %*% uij )
       nvec <- wt0 + as.vector(rowsum( t(w*(1-uij))/pvec, group=rep(1, m)))
       w <- el.test.wt(x=xmu, wt=nvec, mu=0)$prob
}
pvec <- as.vector( w %*% uij )
pvec0 <- as.vector( w0 %*% uij )
ELR <- sum(wt0*log(w0)) - sum(log(pvec0)) - sum(wt0*log(w)) + sum(log(pvec))
return(list(NPMLE=w0, NPMLEmu=w, "-2LLR"=2*ELR) )
}
###################################
#######  emplikH1.test() ##########
###################################

emplikH1.test <- function(x, d, y= -Inf, theta, fun, 
	              tola = .Machine$double.eps^.25)
{
n <- length(x)
if( n <= 2 ) stop("Need more observations")
if( length(d) != n ) stop("length of x and d must agree")
if(any((d!=0)&(d!=1))) stop("d must be 0/1's for censor/not-censor")
if(!is.numeric(x)) stop("x must be numeric values --- observed times")

#temp<-summary(survfit(Surv(x,d),se.fit=F,type="fleming",conf.type="none"))
#
newdata <- Wdataclean2(x,d)
temp <- DnR(newdata$value, newdata$dd, newdata$weight, y=y)

time <- temp$times         # only uncensored time?  Yes. 
risk <- temp$n.risk
jump <- (temp$n.event)/risk

funtime <- fun(time)
funh <- (n/risk) * funtime    # that is Zi 
funtimeTjump <- funtime * jump 

if(jump[length(jump)] >= 1) funh[length(jump)] <- 0  #for inthaz and weights

inthaz <- function(x, ftj, fh, thet){ sum(ftj/(1 + x * fh)) - thet } 

diff <- inthaz(0, funtimeTjump, funh, theta)

if( diff == 0 ) { lam <- 0 } else {
    step <- 0.2/sqrt(n) 
    if(abs(diff) > 6*log(n)*step )
    stop("given theta value is too far away from theta0")

    mini<-0
    maxi<-0
    if(diff > 0) {
    maxi <- step
    while(inthaz(maxi, funtimeTjump, funh, theta) > 0 && maxi < 50*log(n)*step) 
    maxi <- maxi+step 
    } 
    else { 
    mini <- -step 
    while(inthaz(mini, funtimeTjump, funh, theta) < 0 && mini > - 50*log(n)*step) 
    mini <- mini - step 
    } 

    if(inthaz(mini, funtimeTjump, funh, theta)*inthaz(maxi, funtimeTjump, funh, theta) > 0 )
    stop("given theta is too far away from theta0")

    temp2 <- uniroot(inthaz,c(mini,maxi), tol = tola, 
                  ftj=funtimeTjump, fh=funh, thet=theta)  
    lam <- temp2$root
}

onepluslamh<- 1 + lam * funh   ### this is 1 + lam Zi in Ref. 

weights <- jump/onepluslamh  #need to change last jump to 1? NO. see above

loglik <- 2*(sum(log(onepluslamh)) - sum((onepluslamh-1)/onepluslamh) ) 
#?is that right? YES  see (3.2) in Ref. above. This ALR, or Poisson LR.

#last <- length(jump)    ## to compute loglik2, we need to drop last jump
#if (jump[last] == 1) {
#                     risk1 <- risk[-last]
#                     jump1 <- jump[-last]
#                     weights1 <- weights[-last]
#                     } else { 
#                            risk1 <- risk
#                            jump1 <- jump
#                            weights1 <- weights
#                            }
#loglik2 <- 2*( sum(log(onepluslamh)) + 
#          sum( (risk1 -1)*log((1-jump1)/(1- weights1) ) )  ) 
##? this likelihood seems have negative values sometimes???

list( "-2LLR"=loglik,  ### logemlikv2=loglik2, 
      lambda=lam, times=time, wts=weights, 
      nits=temp2$nf, message=temp2$message )
}

###############################################
############# emplikH2() ######################
###############################################

emplikH2.test <- function(x, d, y= -Inf, K, fun, 
	                tola = .Machine$double.eps^.25,...)
{
if(!is.numeric(x)) stop("x must be numeric values -- observed times")
n <- length(x) 
if( n <= 2 ) stop("Need more observations than two")
if( length(d) != n ) stop("length of x and d must agree") 
if(any((d!=0)&(d!=1))) stop("d must be 0/1's for censor/not-censor")

#temp <- summary(survfit(Surv(x,d),se.fit=F,type="fleming",conf.type="none"))
#
newdata <- Wdataclean2(x,d)
temp <- DnR(newdata$value, newdata$dd, newdata$weight, y=y)


Dtime <- temp$times         # only uncensored time?  Yes. 
risk <- temp$n.risk 
jump <- (temp$n.event)/risk

funtime <- fun(Dtime,...)
funh <- (n/risk) * funtime                      # that is Zi  
funtimeTjump <- funtime * jump 

if(jump[length(jump)] >= 1) funh[length(jump)] <- 0  #for inthaz and weights

inthaz <- function(x, ftj, fh, Konst){ sum(ftj/(1 + x * fh)) - Konst } 

diff <- inthaz(0, funtimeTjump, funh, K)

if( diff == 0 ) { lam <- 0 } else {
    step <- 0.2/sqrt(n) 
    if(abs(diff) > 99*log(n)*step )        ##why 99*log(n)? no reason, you 
    stop("given theta value is too far away from theta0") # need something. 

    mini<-0
    maxi<-0 
    if(diff > 0) { 
    maxi <- step 
    while(inthaz(maxi, funtimeTjump, funh, K) > 0  && maxi < 50*log(n)*step) 
    maxi <- maxi+step 
    } 
    else { 
    mini <- -step 
    while(inthaz(mini, funtimeTjump, funh, K) < 0 && mini > - 50*log(n)*step) 
    mini <- mini - step 
    } 

    if(inthaz(mini,funtimeTjump,funh,K)*inthaz(maxi,funtimeTjump,funh,K)>0)
    stop("given theta is too far away from theta0")

    temp2 <- uniroot(inthaz,c(mini,maxi), tol = tola, 
                  ftj=funtimeTjump, fh=funh, Konst=K)  
    lam <- temp2$root 
} 

onepluslamh<- 1 + lam * funh   # this is 1 + lam Zi in Ref. 

weights <- jump/onepluslamh  #need to change last jump to 1? NO. see above

loglik <- 2*(sum(log(onepluslamh)) - sum((onepluslamh-1)/onepluslamh) )
#?is that right? YES see (3.2) in Ref. above. This is ALR, or Poisson LR.

#last <- length(jump)      #to compute loglik2, we need to drop last jump
#if (jump[last] == 1) {
#                     risk1 <- risk[-last]
#                     jump1 <- jump[-last]
#                     weights1 <- weights[-last]
#                     } else {
#                            risk1 <- risk
#                            jump1 <- jump
#                            weights1 <- weights
#                            }
#
#loglik2 <- 2*( sum(log(onepluslamh)) + 
#          sum( (risk1 -1)*log((1-jump1)/(1- weights1) ) )  ) 
# this version of LR seems to give negative value sometime???

list( "-2LLR"=loglik,  ### drop this output "-2logemLRv2"=loglik2, 
      lambda=lam, times=Dtime, wts=weights, 
      nits=temp2$nf, message=temp2$message )
}
# what should be the fun() and K if I want to perform a (1-sample) 
# log-rank test?
# fun3 <- function(t1, z1) { psum( t( outer(z1, t1, FUN=">=") ) ) } 
# this is similar to the function in LogRank2() function. Need psum/2/3.
# And K = int R(t) dH(t)  = sum( H(z1) ) For example if H() is 
# exponential(0.3) then H(t) = 0.3*t, i.e. K <- sum(0.3* z1) 
# so finally a call may look like
#
# Assume z1 and d1 are the inputs:
# emlik2(z1, d1, sum(0.3* z1), 
#   fun3 <- function(t1,z){psum(t(outer(z,t1,FUN=">=") ) ) }, z=z1)
#
# Now use z1<-c(1,2,3,4,5) and d1<-c(1,1,0,1,1) we get
# emlik2(z1, d1, sum(0.25* z1),
#   fun3 <- function(t1,z){psum(t(outer(z,t1,FUN=">=") ) ) }, z=z1)
#
# with outputs that include this (and more)
# $ "-2logemLR":
# [1] 0.02204689
#This tests if the (censored) obs. z1 is from exp(0.25)

########################################################
############ emplikdisc.test() #########################
########################################################

emplikdisc.test <- function(x, d, y= -Inf, K, fun, 
	                     tola=.Machine$double.eps^.25, theta)
{
n <- length(x) 
if(n <= 2) stop("Need more observations")
if(length(d) != n ) stop("length of x and d must agree") 
if(any((d!=0)&(d!=1))) stop("d must be 0/1's for censor/not-censor")
if(!is.numeric(x)) stop("x must be numeric values --- observed times")

#temp<-summary(survfit(Surv(x,d),se.fit=F,type="fleming",conf.type="none"))
#
newdata <- Wdataclean2(x,d)
temp <- DnR(newdata$value, newdata$dd, newdata$weight,y=y)

otime <- temp$times         # only uncensored time?  Yes. 
orisk <- temp$n.risk
odti <- temp$n.event

###if the last jump is of size 1, we need to drop last jump from computation
last <- length(orisk) 
if (orisk[last] == odti[last]) {
                     otime <- otime[-last] 
                     orisk <- orisk[-last]
                     odti  <- odti[-last]
                     }
######## compute the function g(ti, theta) 
gti <- fun(otime,theta) 

Kcenter <- sum(gti * log(1- odti/orisk))

### the constrain function. To be solved in equation later.

constr <- function(x, Konst, gti, rti, dti, n) { 
               rtiLgti <- rti + x*n*gti
               OneminusdH <- (rtiLgti - dti)/rtiLgti
               if( any(OneminusdH <= 0) ) stop("estimator not well defined")
               sum(gti*log(OneminusdH)) -  Konst } 

##############################################################

differ <- constr(0, Konst=K, gti=gti, rti=orisk, dti=odti, n=n)

if( abs(differ) < tola ) { lam <- 0 } else {
    step <- 0.2/sqrt(n) 
    if(abs(differ) > 200*log(n)*step )   #Why 200 ? 
    { print( Kcenter )
      stop("the given K value is too far away from K_0. \n Move K closer to the above value")
    }

    mini<-0
    maxi<-0   
######### assume the constrain function is increasing in lam (=x) 
    if(differ > 0) { 
    mini <- -step 
    while(constr(mini, Konst=K, gti=gti, rti=orisk, dti=odti, n=n) > 0
 	          && mini > -200*log(n)*step )
    mini <- mini - step 
    } 
    else { 
    maxi <- step 
    while(constr(maxi, Konst=K, gti=gti, rti=orisk, dti=odti, n=n) < 0
                  &&  maxi < 200*log(n)*step )
    maxi <- maxi+step 
    }

    if(constr(mini, Konst=K, gti=gti, rti=orisk, dti=odti, n=n)*constr(maxi, 
                Konst=K, gti=gti, rti=orisk, dti=odti, n=n) > 0 )
    stop("given theta/K is/are too far away from theta0/K0")

# Now we solve the equation to get lambda, to satisfy the constraint of Ho

    temp2 <- uniroot(constr,c(mini,maxi), tol = tola, 
                  Konst=K, gti=gti, rti=orisk, dti=odti, n=n) 
    lam <- temp2$root 
}
####################################################################
rPlgti <- orisk + n*lam*gti

loglik <- 2*sum(odti*log(rPlgti/orisk) +
           (orisk-odti)*log(((orisk-odti)*rPlgti)/(orisk*(rPlgti-odti)) ) )

#?is that right? YES the -2log lik ratio. 
# Notice the output time and jumps has less the last point.
list("discrete.-2LLR"=loglik, lambda=lam, times=otime,
                jumps=odti/rPlgti)
}

iter <- function(x, y, delta, beta)
{
# This function computes one iteration of the EM for 
# censored regression est. (Buckley-James) It first order the
# data according to the residuals, then 
# compute conditional expectations, then compute a new beta by lm(). 
#
# Input: 
# x is a matrix of N rows (the covariates), 
# y is the censored response, a vector of length N.
# delta is a vector of length N.
# (delta =1 means (y) is uncensored. delta = 0 means
# the  (y) is censored.)
# beta is the initial est. and is a vector of length = no. of column(x)
#
# Output: the new value of beta, 
#
        N <- length(delta)
        u <- x %*% beta
        res <- y - u
        niceorder <- order(res, - delta) # order the obs according to
        resorder <- res[niceorder]     # res, if tie then according to
        dorder <- delta[niceorder]  # delta value i.e. d=1 comes first
        uorder <- u[niceorder] 
        ystar <- y[niceorder]  # should I just let ystar <- delta ?
        xorder <- as.matrix(x[niceorder,])

temp <- WKM(x=resorder, d=dorder)

jifen <- rev( cumsum( rev(resorder * temp$jump)) )
Sresorder <- temp$surv

for (i in 1:N) if (dorder[i] == 0) {
           ystar[i] <- uorder[i] + jifen[i]/Sresorder[i]
}

return( lm( ystar ~ 0 + xorder )$coef )
}
redistF <- function(y, d, Fdist) {
n <- length(d)
if ( length(y) != n ) stop("length of y and d must agree")
if ( any((d != 0) & (d !=1)) ) stop("d must be either 0 or 1")
if ( length(Fdist) != n ) stop("Fdist must have length n")
# Fdist must be a probability distribution
# the time vector is the ysort, and the prob mass is in Fdist

yorder <- order(y, -d)
ysort <- y[yorder]
dsort <- d[yorder]

WeightMat <- diag( rep(1, n) )

for (i in 1:(n-1)) 
     if ( dsort[i] == 0 ) {
           WeightMat[i, (i+1):n] <- Fdist[(i+1):n]/sum(Fdist[(i+1):n])
           WeightMat[i,i] <- 0
                          }

list(y=ysort, d=dsort, weight=WeightMat, ordY=yorder)
}
##########################
####### solve3.QP ########
##########################

solve3.QP <- function(D, d, A, b, meq, factorized=FALSE) {
#### This code works for QP problem: min 1/2x'Dx-d'x
#### where the matrix D is diagonal and the constraints
#### are all equalities, i.e. t(A)x=b.
#### Inputs:
#### D should be a vector of length n, this means the matrix diag(D), but
#### if factorized=TRUE, D actually is diag(D)^(-1/2).
#### d is a vector of length n
#### A is a matrix of n x p
#### b is a vector with length p.  Finally meq = integer p.
#### The input meq are here for the compatibility with solve.QP in R package
#### quadprog. Written by Mai Zhou (mai@ms.uky.edu) Jan.30, 2001
D <- as.vector(D)
if(length(b)!=meq) stop("length of constraints not matched")
if(length(D)!=length(d)) stop("dimention of D and d not match")
if(dim(A)[1]!=length(D)) stop("dimention of D and A not match")
if(dim(A)[2]!=meq) stop("dimention of A not match with meq")

         if(!factorized) { D <- 1/sqrt(D) }
         QRout <- qr(D*A)
         temp <- rep(0,meq)
         if(any(b!=0)) {temp<-forwardsolve(t(qr.R(QRout)),b)}
         temp2 <- temp - t(qr.Q(QRout)) %*% (D * d)
         eta <- backsolve(qr.R(QRout), temp2)
         sol <- D^2 * (d + A %*% eta )
list( solution=sol )
}

