.packageName <- "lga"
"gap" <- function(x, K, B, criteria=c("tibshirani", "DandF","none"), nnode=NULL, scale=TRUE){
    ## Setup the criteria
    criteria <- match.arg(criteria)
    doall <- ifelse(criteria=="tibshirani", FALSE, TRUE)

    ## Scale the dataset (if required), otherwise parse to a matrix
    x <- as.matrix(x)
    if (any(is.na(x))) stop("Missing data in x")
    if(scale)
        x <- scale(x, center=FALSE, scale=TRUE)
    if (!is.numeric(x)) stop("Data does not appear to be numeric.\n")
    n <- nrow(x); d <- ncol(x)

    GAP <- logWks <- rep(NA, K)
    ElogWks <- matrix(NA, nrow=K, ncol=2,
                      dimnames=list(NULL, c("ElogWks", "Sks")))
    finished <- FALSE

    ## Check that K is appropriate
    if (K > floor(n/d)) {
        cat("Not enough observations to consider",K, "clusters.Using K =",
            floor(n/d),"instead.\n" )
        K <- floor(n/d)
    }
    k <- 0

    if(!is.null(nnode)) {
        ## set up the nodes
        cat("Setting up nodes \n")
        if (!(require(snow))) stop("Can't find required packages: snow")
        cl <- makeCluster(nnode)
        clusterEvalQ(cl, library(lga))
        ## Randomize the seed (dirty, yes, but doesn't require SPRNG)
        clusterApply(cl, runif(length(cl), max=10e6),set.seed)
    }

    ## start looping through k
    while (!finished & k < K){
        k <- k+1

        ## Calculate the GAP for actual data
        cat("\nCalculating GAP at k =", k,"\nCalculating log(W_k)")
        logWks[k] <- gap.logW(x, k)

        ## Now bootstrap on reference distribution
        cat("\nCalculating E log(W_k) (Bootstrap)")

        if (is.null(nnode))
            BootOutput <- gap.boot(x, B, n, k)
        else {
            B <- ceiling(B/nnode)*nnode
            BootOutput <- unlist(clusterCall(cl, gap.boot, x, B/nnode, n, k))
        }

        ## Calculate GAP statistic, and return results
        ElogWks[k,] <- c(mean(BootOutput), sqrt(var(BootOutput)*(1+1/B)))
        GAP[k] <- ElogWks[k,1] - logWks[k]
        if (k > 1)
            if(GAP[k-1] >= GAP[k]-ElogWks[k,2] & !doall)
                finished <- TRUE
    }

    if (!is.null(nnode)){
        cat("Closing nodes\n")
        stopCluster(cl)
    }

    outdata <- cbind(Gap=GAP[1:k], logWks=logWks[1:k], ElogWks[1:k,])
    rownames(outdata) <- paste("k=", 1:dim(outdata)[1], sep="")
    output <- list(finished=finished, nclust=k-1, data=outdata, criteria=criteria)
    class(output) <- "gap"
    output$nclust <- criteria(output)


    plot(output)
    print(output)
    invisible(output)
}


plot.gap <- function(x, ...){
    GapData <- x$data
    logWk <- GapData[,2]
    ElogWk <- GapData[,3:4]
    op <- par(mfrow=c(1,2)); on.exit(par(op))

    ## First plot  - number of clusters vs Obs and Exp log(Wk)
    ysize <- range(logWk, ElogWk[,1])
    ysize <- c(floor(ysize[1]), ceiling(ysize[2]))
    yseq <- seq(from=ysize[1], to=ysize[2], length=5)
    plot(logWk, type="b", pch="O", ylim=ysize, axes=F,
         xlab="Number of clusters k", ylab="Obs and Exp log(Wk)")
    points(ElogWk[,1], type="b", pch="E")
    box()
    axis(1, 1:length(logWk), at=1:length(logWk))
    axis(2, yseq, at=yseq)

    ## Second plot - number of clusters vs Gap statistic
    plot(GapData[,1], type="b", axes=F, xlab="Number of clusters k", ylab="Gap")
    axis(1, 1:length(logWk), at=1:length(logWk))
    ysize <- range(GapData[,1], GapData[,1]-ElogWk[,2])
    ysize <- c(floor(ysize[1]), ceiling(ysize[2]))
    yseq <- seq(from=ysize[1], to=ysize[2], length=5)
    axis(2, yseq, at=yseq)
    arrows(1:dim(GapData)[1], GapData[,1], 1:dim(GapData)[1], GapData[,1] - ElogWk[,2],
           angle=90, code=2 , length=0.1)
    if(!is.na(x$nclust))
        points(x$nclust, GapData[x$nclust,1], pch=19, col='red')
}

print.gap <- function(x, ...){
    cat("\n\nCriteria used:", x$criteria,"\n")
    if (!x$criteria == "none"){
        if (is.na(x$nclust))
            cat("Nothing conclusive found for K =", nrow(x$data), "- consider raising K (if possible).\n")
        else
            cat("Gap suggests there are", x$nclust,"clusters\n\n")
    }
    print(x$data)
}

"gap.boot" <- function(xsc, B, n, k){
    ## this function generates B samples of size n using the function gap.genBox
    ## these are then given to gap.logW.
    ## Uses the boot function from the library of the same name.
    ## This returns a matrix (t) with the statistic for each replicate
    return(boot(data=xsc, statistic= gap.logW, R=B, sim="parametric",
                ran.gen=gap.genBox, mle=n, k=k)$t)
}

"gap.genBox" <- function(x, n){
    ## generates a uniform box over the range of x, transformed to the
    ## eigenvectors of x.
    y <- scale(x, scale=FALSE)
    svdOut <- svd(y)
    Ranges <- apply(y%*%svdOut$v,2,range)
    z <- apply(Ranges, 2, function(x, nn)
               runif(nn, min=x[1], max=x[2]), nn=n)
    zPrime <- z %*% t(svdOut$v)
    return(sweep(zPrime, 2, attr(y, 'scaled:center'), FUN="+"))
}


"gap.logW" <- function(x, k){
    cat(".")
    n <- dim(x)[1]
    d <- dim(x)[2]

    if (k==1)
        groups <- rep(1, n)
    else
        groups <- lga(x, k, niter=20, scale=FALSE, silent=TRUE)$cluster

    hpcoef <- matrix(NA, nrow=k, ncol=d+1)
    for (i in 1:k)
        hpcoef[i,] <- lga.orthreg(x[groups==i,])
    return(log(lga.calculateROSS(hpcoef, x, n, d, groups)))
}

"criteria" <- function(x){
    switch(x$criteria,
           none = criteria.none(x),
           DandF = criteria.DandF(x),
           tibshirani = criteria.tibshirani(x))
}

"criteria.none" <- function(x) {
    ## just return the GAP values
    return(NA)
}

"criteria.DandF" <- function(x){
    ## the method given in Dudoit and Fridlyand (2002)
    y <- x$data
    crit <- diff(y[which.max(y[,"Gap"]), c("Sks", "Gap")])
    nclust <- min(which(y[,"Gap"] > crit))
    return(ifelse(nclust == nrow(y), NA, nclust))
}

"criteria.tibshirani" <- function(x) {
    ## the method given in Tibshirani, Walter & Hastie (2001)
    return(ifelse(!x$finished, NA, x$nclust))
}
"lga" <- function(x, k, biter=NULL, niter=10, showall=FALSE, scale=TRUE,
                  nnode=NULL, silent=FALSE){
    ## Scale the dataset (if required), otherwise parse to a matrix
    x <- as.matrix(x)
    if (any(is.na(x))) stop("Missing data in x")

    if(scale)
        x <- scale(x, center=FALSE, scale=TRUE)
    if (!is.numeric(x)) stop("Data does not appear to be numeric.\n")
    n <- nrow(x); d <- ncol(x)

    ## Set the number of random starts (if not provided)
    if (is.null(biter))
        if(is.na(biter <- lga.NumberStarts(n, d, k, p=0.95)))
            stop("NumberStarts ill-specified. Rerun, setting biter \n")

    if (!silent)
        cat("LGA Algorithm \nk =", k, "\tBiter =", biter, "\tNiter =", niter, "\n")

    ## Choose the starting hyperplane coefficients for each biter
    hpcoef <- list()
    for (j in 1:biter){
        ## Choose starting clusters
        clindex <- matrix(sample(1:n, size=k*d, replace=FALSE), nrow=k)
        ## form initial hyperplanes - each row is a hyperplane
        hpcoef[[j]] <- matrix(NA, nrow=k, ncol=(d+1))
        for (i in 1:k)
            hpcoef[[j]][i,] <- lga.orthreg(x[clindex[i,],])
    }

    if(is.null(nnode)){
        ## not parallel
        outputsl <- lapply(hpcoef, lga.iterate, x, k, d, n, niter)
    }
    else {
        ## parallel
        if (!silent) cat("Setting up nodes \n")
        if (!(require(snow))) stop("Can't find required packages: snow")
        cl <- makeCluster(nnode)
        clusterEvalQ(cl, library(lga))
        outputsl <- clusterApplyLB(cl, hpcoef, lga.iterate, x, k, d, n, niter)
        stopCluster(cl)
        if (!silent) cat("Nodes closed \n")
    }

    outputs <- matrix(unlist(outputsl), ncol = biter, nrow = n + 2)

    ## Find the number of converged results
    nconverg <- sum(outputs[n+1,])
    if (nconverg == 0)
        warning("LGA failed to converge for any iteration\n")
    if (!showall){
        ## remove any columns with NAs
        outputs <- outputs[,complete.cases(t(outputs)), drop=FALSE]
        if (nconverg != 0)
            outputs <- outputs[,outputs[n+1,]==1, drop=FALSE]
        outputs <- outputs[, which.min(outputs[n+2,]), drop=FALSE]
        if(ncol(outputs) > 1)
            outputs <- lga.CheckUnique(outputs)
    }
    if (!silent) {
        cat("\nFinished.\n")
        if(showall) cat("\nReturning all outputs \n", "")
    }
    fout <- list(cluster=outputs[1:n,], ROSS=outputs[n+2,], converged=outputs[n+1,], biter=biter, niter=niter, nconverg=nconverg, scaled=scale, k=k, x=x)
    class(fout) <- "lga"
    return(fout)
}


"print.lga" <- function(x, ...) { ## S3method for printing LGA class
    cat("Output from LGA:\n\nDataset scaled =", x$scale, "\tk =",x$k,"\tniter =",x$niter,"\tbiter =",
        x$biter,"\nNumber of converged =",x$nconverg,"\nBest clustering has ROSS =",x$ROSS,"\n")
}

"plot.lga" <- function(x, ...) { ## S3method for plotting LGA class
    nclust <- x$k;   d <- ncol(x$x)
    hp <- matrix(NA, ncol=nclust, nrow=(d+1))

    ## Fit the hyperplanes
    for (i in 1:nclust)
        hp[,i] <- lga.orthreg(x$x[x$cluster == i,])

    ## 2-d case
    if (d == 2){
        plot(x$x, col=x$cluster, type="p", xlab="",ylab="", ...)
        for (i in 1:nclust)
            abline(a= hp[3,i]/hp[2,i], b= -hp[1,i]/hp[2,i], lty=2, col=i)
    }
    else{
        ## We do a rather complicated trick here using frames, as we need to know what pair of axes we are plotting
        ## in order to do the correct hyperplane intersections.
	lgaPlotCounter <- 0  ## this increments for each plot
	sysFrame <- sys.frame(sys.nframe()) ## this is the pointer to the current frame

	refMatrix<- matrix(NA, ncol=2, nrow=0) ## this matrix tells us which pair of axes
	for (j in 2:d) for (i in 1:(j-1)) refMatrix <- rbind(refMatrix, c(i,j))

        ## data.frame makes splom compatible with earlier versions of R
        splom(data.frame(x$x), lower.panel=function(x,y,...){}, upper.panel=function(x, y, hp, clusters, sysframe, refMatrix, ...)
          {
              int <- nrow(hp); nclust <- ncol(hp)
              assign("lgaPlotCounter", get("lgaPlotCounter", envir=sysframe) + 1, envir=sysframe)
              refV <- refMatrix[get("lgaPlotCounter", envir=sysframe),]
              panel.xyplot(x, y, col=clusters, ...)
              for (i in 1:nclust)
                  panel.abline(a= hp[int,i]/hp[ refV[2], i], b= -hp[ refV[1], i]/hp[ refV[2], i], lty=2, col=i)
          }, hp=hp, clusters=x$cluster, sysframe=sysFrame, refMatrix=refMatrix)
    }
}

"lga.calculateROSS" <- function(hpcoef, xsc, n, d, groups){
    ## This function calculates the total Residual Orthogonal Sum of Squares for a given grouping
    dist <- (sweep(xsc %*% t(hpcoef[,1:d, drop=FALSE]), 2, hpcoef[,d+1, drop=FALSE], FUN="-"))^2
    return(sum(dist[cbind(1:n, groups)]))
}

"lga.CheckUnique" <- function(x){
    "CheckUnique.Rand" <- function(z) {
        sum(z^2)-0.5*(sum(apply(z,1,sum)^2)+ sum(apply(z,2,sum)^2))
    }
    d <- dim(x)[2]
    index <- rep(TRUE,d)
    for (i in 1:(d-1)){
        for (j in (i+1):d){
            y <- table(x[,i], x[,j])
            z <- CheckUnique.Rand(y)
            if (z==0) index[j] <- FALSE
        }
    }
    if (sum(index) > 1) { ## In the incredibly unlikely situation....
        warning("Two unique solutions with identical ROSS.")
    }
    return(x[,index, drop=FALSE])
}

"lga.dodist" <- function(y, coeff, k, d, n){
    ## This function calculates the (orthogonal) Residuals for different hyerplanes,
    ## and returns the closest for each observation
    dist <- (y %*% t(coeff[,1:d])- matrix(coeff[,d+1], ncol=k, nrow=n, byrow=TRUE))^2
    return(max.col(-dist, ties.method="first"))
}

"lga.iterate" <-  function(hpcoef, xsc, k, d, n, niter){
    ## give the function the inital set of hyperplanes (in hpcoef)
    groups <- lga.dodist(xsc, hpcoef, k, d, n)
    iter <- 0
    if (any(table(groups) < d) | (length(unique(groups)) < k))
        iter <- (niter+10)
    oldgroups <- 1
    converged <- FALSE
    while (!converged & iter < niter) {
        iter <- iter+1
        oldgroups <- groups
        for (i in 1:k){
            hpcoef[i,] <- lga.orthreg(xsc[groups==i,])
        }
        groups <- lga.dodist(xsc, hpcoef, k, d, n)
        if (any(table(groups) < d) | (length(unique(groups)) < k))
            iter <- (niter+10) # if there aren't enough obs in a group
        if (all(oldgroups==groups)) converged <- TRUE
    }
    return(c(groups, converged, lga.calculateROSS(hpcoef, xsc, n, d, groups)))
}

"lga.NumberStarts" <- function(n, d, k, p=0.95){
    ## Calculate the number of starting positions (from article)
    n1 <- ceiling(n/k)
    return(ceiling(log(1-p)/log(1-choose(n1, d)^k/choose(n1*k, k*d))))
}

"lga.orthreg" <- function(x){
    ## Perform orthogonal regression.
    ## We use svd rather than eigen for numerical stability (esp when eigenvalues are small)
    y <- scale(x, scale=FALSE)
    emat <- svd(y)$v[,dim(y)[2]]
    return(c(emat, emat %*% attr(y, 'scaled:center')))
}

