.packageName <- "vabayelMix"
"CostKL" <-
function(Ncat,data,m0,am0,aiv0,biv0,api0,m,am,aiv,biv,api,Catwm){
 Nsamples <- dim(data)[1];
 Ndim <- dim(data)[2];

 sum1 <<- 0.5*sum( am0*(1/am + m*m) );
 sum2 <<- 0.5*sum( log(am) ) ;
 v <- as.vector( Catwm[Catwm > 0] );
 sum3 <<- sum( v*log(v) );
 sum4 <<- sum( lgamma(biv0)-lgamma(biv) + biv*log(aiv) - biv0*log(aiv0)) ;
 sum5 <<- sum( lgamma(api0)-lgamma(api)  );
 sum6 <<- lgamma(sum(api))-lgamma(sum(api0)) ;
 sum7 <<- -(0.5 + log(mean(am0)))*Ncat*Ndim ;
 sum8 <<- 0.5*Nsamples*log(2*pi);
 
 Ckl <- sum1 + sum2 + sum3 + sum4 + sum5 + sum6 + sum7 +sum8 ;

 return(list(ckl=Ckl));


}

"MembProbFn2" <-
function(data,NewVals,Nsamples){
print("Entering MembProbFn2: Computing probabilities");

# constants
Ncat <- dim(NewVals$mean)[1];
Ndim <- dim(NewVals$mean)[2];
print(paste("Ncat=",Ncat, " Ndim=",Ndim," Nsamples=",Nsamples,sep=""));

# input
m   <- NewVals$mean;
am  <- NewVals$ivarm;
aiv <- NewVals$ivara;
biv <- NewVals$ivarb;
api <- NewVals$dapi;
# variables to be computed
pcat <- matrix(rep(0,times=Ncat*Nsamples),nrow=Ncat,ncol=Nsamples);
wCl <- vector(length=Nsamples);

# Weight of each cluster
prob.cat <- api/sum(api) ;

# Find which cluster each data point belongs to
pcat <- matrix(0,nrow=Ncat,ncol=Nsamples);
wCl <- vector(length=Nsamples);
for ( sn in 1:Nsamples){
  # p(c|d_s) = p(c) p(d_s|c);
  for ( c in 1:Ncat){

  pcat[c,sn] <- prob.cat[c]*exp(-0.5*sum( (biv[c,]/aiv[c,])*(data[sn,]-m[c,])*(data[sn,]-m[c,])))*sqrt(prod(biv[c,]/aiv[c,]));

  }

  spcat <- sum( pcat[,sn] );
  pcat[,sn] <- pcat[,sn]/spcat ;

  wCl[sn] <- which.max(pcat[,sn]);
}


 print("Finished membprobFn2");
 return(list( wcl=wCl, probs=pcat));


} ### END OF FUNCTION

"UpdateCatw" <-
function(Ncat,data,m,am,aiv,biv,api){
  Nsamples <- dim(data)[1];
  Ndim <- dim(data)[2];
  # Values needed
  s.api <- sum(api);
  t1 <- digamma(s.api);

  # temporary variables needed
  alpha <- matrix(nrow=Ncat, ncol=Nsamples);
  v <- rep(0,Ndim);
  lambda <- matrix(nrow=Ncat, ncol=Nsamples);
  s.lambda <- rep(0,Ncat);
  
  for ( c in 1:Ncat){
      
    value1 <- digamma(api[c]) - t1 + 0.5*( sum(-log(aiv[c,])+ digamma(biv[c,])) );
    vec1 <- m[c,]*m[c,] ;
    vec2 <- 1/am[c,];
    vec3 <- biv[c,]/aiv[c,] ;

   for ( n in 1:Nsamples){
#      print(c("Doing ",c,n));
      value2 <- sum( (data[n,]*data[n,]-2*data[n,]*m[c,]+ vec1 + vec2 )*vec3 );

      alpha[c,n] <- value1 - 0.5* value2;
   }
     
  }
     
  for ( c in 1:Ncat){
     s.lambda[c] <- 0;
   for( n in 1:Nsamples){

     lambda[c,n] <- 1/sum( exp( alpha[,n]-alpha[c,n] ) );
     s.lambda[c] <- s.lambda[c]+lambda[c,n];
   }
  }


   return(list(cwm=lambda, scw=s.lambda));


} # endoffunction

"UpdateMix" <-
function(Ncat,data,m0,am0,aiv0,biv0,api0,m,am,aiv,biv,lambda,s.lambda){

  # Ncat : Number of components to look for
  # Ndim : Number of dimensions of observation vector
  # m0   : initial hyperparameter for means, same dimension as m
  # m    : means matrix, of dimension Ncat x Ndim
  # am   : inverse variances of means, Ncat x Ndim. (am0)
  # aiv  : par for inverse variance posterior, Ncat x Ndim
  # biv  : par for inverse variance posterior, Ncat x Ndim
  Ndim <- dim(data)[2];
  #tmp var needed
  v <- rep(0,Ndim);
     
  # store old values
  m.o <- m ; am.o <- am ; biv.o <- biv ; aiv.o <- aiv ;

  for ( c in 1:Ncat){
   # Update of other posterior parameters
   # inverse variance of mean
   am[c,] <- am0[c,] + s.lambda[c]*biv.o[c,]/aiv.o[c,];
   # Update of mean
   for ( i in 1:Ndim){
     v[i] <- sum(lambda[c,]*data[,i]);
   }
   m[c,] <- ( m0[c,]+ v*biv.o[c,]/aiv.o[c,] )/am[c,] ;
     
   # par1 of inv. variance
   biv[c,] <- biv0[c,] + 0.5*s.lambda[c];


   for ( i in 1:Ndim){                                   # par2 of inv. variance
   aiv[c,i] <- aiv0[c,i] + 0.5*sum( lambda[c,]*( data[,i]*data[,i]-2*data[,i]*m[c,i]+ m[c,i]*m[c,i] + 1/am[c,i] ) ) ;

   }
  }

  # learn dirichlet posterior parameters
  api <- api0 + s.lambda ;

  return(list(mean = m, ivarm = am, ivarb = biv, ivara= aiv, dapi=api));

} # END of function

"UseBasicPrior" <-
function(data,weights.v){

Ndim <- dim(data)[2];
Ncat <- length(weights.v);

# for means assume prior gaussian of means
m0 <- matrix(rep(0,Ncat*Ndim), nrow=Ncat, ncol=Ndim);
for( c in 1:Ncat){
 for( d in 1:Ndim){
  m0[c,d] <- runif(1,min=min(data[,d]),max=max(data[,d]));
 }
}
# and inv.variances 
am0 <- matrix(rep(0.001,Ncat*Ndim),nrow=Ncat, ncol=Ndim);

for( d in 1:Ndim){
 1/var(data[,d]) -> est.ivar;
 am0[,d] <- rep(est.ivar,Ncat);
}
 
# for inv. variances assume gamma distribution with parameters
aiv0 <- matrix(rep(0.001,Ncat*Ndim),nrow=Ncat, ncol=Ndim);
biv0 <- matrix(rep(0.001,Ncat*Ndim),nrow=Ncat, ncol=Ndim);

for( d in 1:Ndim){
 1/var(data[,d]) -> est.ivar;
 biv0[,d] <- est.ivar*aiv0[,d];
}

# parameter of dirichlet posterior
api0 <- weights.v ;
#################################################################
useprior.l <- list(mean=m0, ivarm=am0, ivara=aiv0, ivarb=biv0, dapi=api0);

return(useprior.l);

} # END OF FUNCTION

"pack" <-
function(data.m,kurt.range=c(-2,0),cluster=T,method=c("bic","vb")){

# compute kurtosis  
k.v <- vector();
for(r in 1:nrow(data.m)){
 k.v[r] <- unbiasedKurt(data.m[r,]);
}
feat.idx <- intersect(which(k.v >= kurt.range[1]),which(k.v <= kurt.range[2]));
print(paste("There are ",length(feat.idx)," genes in the specified kurtosis range",sep=""));

class.lv <- list();
sizeSubG.v <- vector();
if(cluster){
  # check for missing entries
  if( length(which(is.na(as.vector(data.m)))) > 0){
   print("Matrix has missing values. Please impute them and rerun.");
   break;
  }
  if(method=="bic"){
   library(mclust);
   sel.idx <- vector();
   i <- 1;
   for(r in feat.idx ){
    m <- matrix(data.m[r,],ncol=1,nrow=ncol(data.m));
    em.o <- Mclust(m,minG=1,maxG=2);
    ncl <- length(levels(factor(em.o$classification)));
    sizeCluster.v <- summary(factor(em.o$classification));
    if (ncl > 1){
     sel.idx <- c(sel.idx,r);
     sizeSubG.v[r] <- min(sizeCluster.v);
     class.lv[[r]] <- em.o$classification;
    }
    print(paste("Done ",i," out of ",length(feat.idx)," genes",sep=""));
    i <- i+1;
   }
  }
  else if (method=="vb"){
   sel.idx <- vector(); i <-1;
   for(r in feat.idx){
    m <- matrix(data.m[r,],ncol=1,nrow=ncol(data.m));
    prior <- UseBasicPrior(m,weights.v=c(1,1));
    vb.o <- vabayelMix(m,prior=prior,Ncat=2,nruns=5,npick=1,verbatim=F,MaxIt=100);
    ncl <- length(levels(factor(vb.o$wcl)));
    sizeCluster.v <- summary(factor(vb.o$wcl));
    if (ncl > 1){
     sel.idx <- c(sel.idx,r);
     sizeSubG.v[r] <- min(sizeCluster.v);
     class.lv[[r]] <- vb.o$wcl;
    }
    print(paste("Done ",i," out of ",length(feat.idx)," genes",sep=""));
    i <-i+1;
   }
 }
 out.m <- matrix(nrow=length(sel.idx),ncol=3);
 colnames(out.m) <- c("Kurtosis","Subgroup size","Index");
 k.s <- sort(k.v[sel.idx],decreasing=F,index.return=T);
 out.m[,1] <- k.s$x;
 out.m[,2] <- sizeSubG.v[sel.idx[k.s$ix]];
 out.m[,3] <- sel.idx[k.s$ix];
 rownames(out.m) <- rownames(data.m)[sel.idx[k.s$ix]];
}
else { # don't cluster
 sel.idx <- feat.idx;
 out.m <- matrix(nrow=length(sel.idx),ncol=2);
 colnames(out.m) <- c("Kurtosis","Index");
 k.s <- sort(k.v[sel.idx],decreasing=F,index.return=T);
 out.m[,1] <- k.s$x;
 out.m[,2] <- sel.idx[k.s$ix];
 rownames(out.m) <- rownames(data.m)[sel.idx[k.s$ix]];
  class.lv <- NULL;
}

 return(list(out=out.m,class=class.lv));
 
} # end of function

"unbiasedKurt" <-
function(v){
  z <- (v-mean(v,na.rm=T))/sqrt(var(v,na.rm=T));
  n <- length(which(is.na(v)==F));
  k <- n*(n+1)*sum(z^4)/((n-1)*(n-2)*(n-3)) - (3*(n-1)^2)/((n-2)*(n-3));
  return(k);
}

"vabayelMix" <-
function(data, prior=NA, Ncat, nruns=100, npick=1, MaxIt=500, conv.tol=0.001, nCVconv=10, verbatim=FALSE){

if( is.na(Ncat) ){
  stop("Must specify =maximum number of categories/clusters to look for.");
}

# Basic constants
Nsamples <- dim(data)[1];
Ndim <- dim(data)[2];

# Prior hyperparameters (labeled by 0) 

if( is.na(prior) ){ # then use broad priors
  print("Using broad hyperparameters in priors");
# for means assume prior gaussian of means
m0 <- matrix(rep(0,Ncat*Ndim), nrow=Ncat, ncol=Ndim);
# and inv.variances 
am0 <- matrix(rep(0.001,Ncat*Ndim),nrow=Ncat, ncol=Ndim);
# for inv. variances assume gamma distribution with parameters
aiv0 <- matrix(rep(0.001,Ncat*Ndim),nrow=Ncat, ncol=Ndim);
biv0 <- matrix(rep(0.001,Ncat*Ndim),nrow=Ncat, ncol=Ndim);
# parameter of dirichlet posterior
api0 <- rep(1,times=Ncat);
}
else { # have specified priors so use it instead
  
  if(length(prior) != 5 ){
   stop("prior must be a list of length 5");
  }
  if( (ncol(prior$mean)==Ndim) && (nrow(prior$mean)==Ncat) ){
    m0 <- prior$mean ;
  }
  else {
    stop("Wrong dimensions for the mean of prior mean");
  }
  if( (ncol(prior$ivarm)==Ndim) && (nrow(prior$ivarm)==Ncat) ){
    am0 <- prior$ivarm ;
  }
  else {
    stop("Wrong dimensions for inv variance of prior mean");
  }
  if( (ncol(prior$ivara)==Ndim) && (nrow(prior$ivara)==Ncat) ){
    aiv0 <- prior$ivara ;
  }
  else {
    stop("Wrong dimensions for parameter a in prior for inv. variance");
  }
  if( (ncol(prior$ivarb)==Ndim) && (nrow(prior$ivarb)==Ncat) ){
    biv0 <- prior$ivarb ;
  }
  else {
    stop("Wrong dimensions for parameter b in prior for inv. variance");
  }
  if( length(prior$dapi)==Ncat ){
    api0 <- prior$dapi ;
  }
  else {
    stop("Wrong dimensions for dirichlet prior ");
  }

}

  
##############################################################################

### VARIABLES NEEDED #################################################
wCl <- matrix(nrow=npick,ncol=Nsamples);
v.Costs <- vector() ; v.NC <- vector();
l.NewVals <- list(NULL);
#########################################################################

### START RUNS ##########################################################
for ( run in 1:nruns){

# ENSEMBLE INITIALISATION (only randomise means)#########################
m <- matrix(rep(0,Ncat*Ndim),nrow=Ncat,ncol=Ndim);
for ( i in 1:Ndim){
 m[,i] <- runif(Ncat,min=min(data[,i]),max=max(data[,i]));
}
am <- am0;
aiv <- aiv0;
biv <- biv0;
api <- api0;
############################################################################
############# STARTIN ITERATIONS ###########################################
print("About to enter iterations");
t <- 0; inc <-1 ;
Cost.old <- rep(10^6,times=nCVconv);
while( (t < MaxIt) && (inc==1)){
  if(verbatim){
  print(c("Iteration",t)); 
 # Update Categorical weights for updating posterior parameters
  print("Calling UpdateCatw");
  }
  Catw <- UpdateCatw(Ncat,data,m,am,aiv,biv,api);
 # Update posterior parameters 
  if(verbatim){
  print("Calling UpdateMix");
  }
  NewVals <- UpdateMix(Ncat,data,m0,am0,aiv0,biv0,api0,m,am,aiv,biv,Catw$cwm,Catw$scw);
  if( verbatim==TRUE){
  print("Means");
  print(NewVals$mean);
  print("Weights");
  print(NewVals$dapi);
  }
  m <- NewVals$mean;
  am <- NewVals$ivarm;
  aiv <- NewVals$ivara;
  biv <- NewVals$ivarb;
  api <-NewVals$dapi;

  Cost <- CostKL(Ncat,data,m0,am0,aiv0,biv0,api0,m,am,aiv,biv,api,Catw$cwm);
  Cost.new <- Cost$ckl ;

  if( verbatim==TRUE){
  print("Cost");
  print(Cost$ckl);
  }
  
  if ( mean(abs(Cost.new-Cost.old)) < conv.tol ){
    inc <- 0;
  }
  Cost.old <- sort( Cost.old );
  Cost.old[nCVconv] <- Cost.new ; 
  t <- t + 1;
} # iteration loop
######### END OF ITERATIONS ######################################
  v.Costs <- c(v.Costs, Cost$ckl);
  v.NC <- c(v.NC, inc);
  l.NewVals[[run]] <- NewVals;
} # matches runs loop  
######### END OF RUNS #############################################

# Finished runs, pick best runs ( minimise the Cost function ) and compute cluster membership probabilities and assign samples to clusters.
sv.Costs <- sort( v.Costs, decreasing=FALSE, index.return=TRUE);
EstVals <- list(NULL);
MembPr2 <- list(NULL);
for ( i in 1:npick){
  EstVals[[i]] <- l.NewVals[[sv.Costs$ix[i]]];
  # Compute membership probs. for these  
  MembPr2[[i]] <- MembProbFn2(data,EstVals[[i]],Nsamples);
  wCl[i,] <- MembPr2[[i]]$wcl ;
}

 return(list(estvals=EstVals,wcl=wCl,probs=MembPr2, costs=v.Costs[sv.Costs$ix], conv=v.NC[sv.Costs$ix]));


} ### END OF FUNCTION

