.packageName <- "VLMC"
###-- Allow two ways to store the alphabet!  --- just the beginning

## alphabet() : return  *vector* of  `alpha.len'  alphabet "characters"

alphabet <- function(x, ...) UseMethod("alphabet")
alphabet.vlmc <- function(x, ...) {
    if(!is.vlmc(x)) stop("`x' is not a valid `vlmc' object")
    if(length(list(...)))
	stop("Extra arguments ", deparse(substitute(...)), " are discarded.")
    k <- x$alpha.len
    na <- length(a <- x$alpha)
    if(na == 1)  {
        if(nchar(a) != k)
            stop("invalid `alpha' string, #{chars} != alpha.len")
        strsplit(a, NULL)[[1]]
    } else {
        if(na != k)
            stop("invalid `alpha' vector, length != alpha.len")
        a
    }
}

### How can I make functions from a package share a common utility ?
###  ==> use a NAMESPACE !
##   ss <- function(x) strsplit(x,NULL)[[1]]

int2alpha <- function(i, alpha)
{
    ## {0,1,..} |--> "alphabet" representation of discrete t.s.
    (strsplit(alpha,NULL)[[1]])[1:1 + i]
}
alpha2int <- function(x, alpha)
{
    ## (single) character |--> {0,1,..}  representation of discrete t.s.
    match(x, strsplit(alpha,NULL)[[1]]) - 1:1
}

int2char <- function(i, alpha)
{
    ## {0,1,..} |--> "alphabet" representation -- as 1 string --
    paste(int2alpha(i,alpha), collapse="")
}
char2int <- function(x, alpha)
{
    ## 1-string |--> {0,1,..}  representation of discrete t.s.
    ss <- function(x) strsplit(x,NULL)[[1]]
    match(ss(x), ss(alpha)) - 1:1
}

id2ctxt <- function(id, m = nchar(alpha), alpha = NULL) {
    ## Purpose: Compute context from "ID" as returned by predict.vlmc

    if((m <- as.integer(m)) < 2)
        stop("alphabet length `m' must be >= 2")
    ## Improve (but then, use C anyway!):
    r <- vector("list", n <- length(id <- as.integer(id)))
    i.ok <- !is.na(id)
    r[!i.ok] <- NA
    lev <- floor(1e-7 + log(id, m))

    for(i in 1:n) if(i.ok[i]) {
        ii <- id[i]
        ## convert ID `ii' to {0:(m-1)} coded vector `rr':
        rr <- integer(lev[i])
        for(ll in lev[i]:1) {
            rr[ll] <- ii %% m
            ii <- ii %/% m
        }
        r[[i]] <- rr
    }
    if(is.null(alpha) || (is.logical(alpha) && !alpha))
        r # list of integer vectors
    else if(is.logical(alpha) && alpha)
        ## return string, using "01.." alphabet
        sapply(r, function(i)paste(i, collapse=""))
    else if(is.character(alpha)) { ## using  `alpha' alphabet
        if(length(alpha) > 1) ## return vector of characters
            sapply(r, function(i) alpha[1:1 + i])
        else ## return string
            sapply(r, function(i) int2char(i,alpha))
    }
    else {
        warning("invalid `alpha'; using alpha = NULL")
        r
    }
}
## })# local
####- vlmctree() & as.dendrogram() --- R-level recursive tree construction

vlmctree <- function(x)
{
  ## Purpose: Compute the Tree representation of a "vlmc" object (as R list).
  ## -------------------------------------------------------------------------
  ## Arguments: x: a "vlmc" object {usually a fit from vlmc(..)}.
  ## -------------------------------------------------------------------------
  ## Author: Martin Maechler, Date:  1 Apr 2000, 18:02
  if(!is.vlmc(x)) stop("first argument must be a \"vlmc\" object; see ?vlmc")
  vvec <- (x $ vlmc.vec)#.Alias
  k <- (x $ alpha.len)#.Alias
  if(vvec[1] != k) stop("invalid vlmc structure {alpha.len}")

  vtree <- .vvec2tree(vvec[-1], k = vvec[1], chk.lev = 0)
}

.vvec2tree <- function(vv, k, chk.lev)
{
  ## Purpose: RECURSIVELY construct tree from a vvec of a "vlmc" object
  ##	      *not* using alphabet, (just k = |alphabet|).
  ## Do as load_tree(.)	 {in ../src/io.c }
  ## -------------------------------------------------------------------------
  ## Author: Martin Maechler, Date:  1 Apr 2000, 18:11

  if((lev <- vv[1]) >= 0) { ## non-end
      if(lev != chk.lev)
	  stop(paste("malformed vlmc tree at level",chk.lev))

      ii <- 1:k
      node <- list(level = lev, count = vv[1 + ii], child = vector("list", k))
      node $ total <- sum(node $ count)

      vv <- vv[ - c(ii, k+1)]# the first 1..(k+1) ones
      for(i in ii) {
	  r <- .vvec2tree(vv, k=k, chk.lev = chk.lev+1)
          vv <-
              if(!is.null(r)) {
                  node$child[[i]] <- r[[1]]
                  r[[2]]
              } else vv[-1] ## child[i] remains NULL
      }
      node$level[2] <- ## parent level :
          if(all(sapply(node$child, is.null))) { ## this is a leaf
              node$child <- NULL
              0 # parent level
          } else 1:1 + max(sapply(node$child, function(n)
                                  if(is.null(n)) 0 else n$level[2]))
      node$level <- as.integer(node$level)
      if(lev > 0)
	  list(node, vv)
      else { ## toplevel
	  class(node) <- "vtree"
	  node
      }
  }
  ## else return NULL
}

str.vtree <- function(object, ...)
{
    ## Purpose: str method for "vtree" lists  [[recursive]]
    if(!is.null(lev <- object$level)) {
	nch <- length(object$child)
	cat(if(lev[1])
	    paste(rep(" ", lev[1]+1), collapse="..") else "`vtree':\n",
            paste("{", lev[2],"}", sep=""),
	    format(object$count),"|", object$total, "; ",
	    if(nch) paste(nch,"children") else "_leaf_",
	    "\n")
	for(ch in object$child)
	    str.vtree(ch, ...)
    }
}


###- as.dendrogram() method - in order to plot the context - tree

## Generic and hclust method are in
##  ~/R/D/r-devel/R/src/library/stats/R/dendrogram.R

### FIXME:
### =====

## (*) Add "midpoint" such that I can plot with center = FALSE
##     (I *can* plot, but it's not okay;   center = TRUE is okay)

## CONSIDER:  Make this a new class *inheriting* from dendrogram
##            The new class would have its own print and plot method...

as.dendrogram.vlmc <- function(object, ...)
{
    if(!is.vlmc(object))
        stop("first argument must be a \"vlmc\" object; see ?vlmc")
    vvec <- (object $ vlmc.vec)#.Alias
    k <- object $ alpha.len
    if(vvec[1] != k) stop("invalid vlmc structure {alpha.len}")
    p <- unname(object $ size["ord.MC"]) # maximal MC order
    abc <- alphabet(object)

    vv2dendro <- function(vv, cl.lev)
    {
        ## construct the nested list, level `cl.lev' from `vvec' -- recursively!
        if((lev <- vv[1]) >= 0) { ## non-end
            if(lev != cl.lev)
                stop(paste("malformed vlmc tree at level",cl.lev))

            ii <- 1:k
            node <- vector("list", k)   # k children
            names(node) <- 0:(k-1)
            count <- vv[1 + ii] 	# and their counts
            vv <- vv[ - c(ii, k+1)]     # drop the first 1..(k+1) ones
            for(i in ii) { ## extract child[i] (and *its* children)
                r <- vv2dendro(vv, cl.lev = cl.lev+1)
                ## downdating `vv',  updating node[[i]]:
                vv <-
                    if(!is.null(r)) {
                        node[[i]] <- r[[1]]
                        attr(node[[i]], "edgetext") <- abc[i]

                        r[[2]]
                    }
                    else ## empty child[i]; drop NULL node[[i]] below
                        vv[-1]
            }
            ##- cat("lev=",lev,";", "count=",count,"  vv : \n"); str(vv)
            if(all(kids0 <- sapply(node, is.null))) { ## this is a leaf
                node <- sum(count)
                attr(node,"members") <- 1:1
                attr(node,"leaf") <- TRUE
            }
            else { ## has at least one child;
                node[kids0] <- NULL ## drop the NULL ones (but remember which!)
                attr(node,"0-kids") <- (0:(k-1))[kids0]
                ## attr(node,"height") <- ## parent level :
                ##    1:1 + max(sapply(node, function(n) attr(n,"height")))
                attr(node,"members") <- ## parent level :
                    sum(sapply(node, function(n) attr(n,"members")))
            }
            attr(node,"height") <- p - lev
            ## keep the full count[] :
            attr(node,"count") <- count

            list(node, vv)
        }
        ## else vv[1] = -1 :  return NULL
    }

    r <- vv2dendro(vvec[-1], 0)[[1]]
    class(r) <- "dendrogram"
    r
}

draw <- function(x, ...) UseMethod("draw")

draw.vlmc <-
function(x, kind = 3,
         flag = TRUE,
         show.hidden = 0,
         cumulative = TRUE,
         delta = cumulative,
         debug = FALSE, ...)
{
  ## Purpose: Draw a fitted "vlmc" object, see ?vlmc
  ## Author: Martin Maechler, Date: 21 Mar 2000, 12:10

  if(!is.vlmc(x))
      stop("first argument must be a \"vlmc\" object; see ?vlmc")
  ivlmc <- x $ vlmc
  invisible(
  .C("draw_p",
     vlmc.vec     = as.integer(ivlmc),
     size         = length(ivlmc),
     alpha.len    = as.integer  (x$ alpha.len),
     alpha        = as.character(x$ alpha),

     flag         = as.logical(flag),
     debug        = as.logical(debug),
     kind         = as.integer(kind),
     show.hidden  = as.integer(show.hidden),
     cumulative   = as.logical(cumulative),
     delta        = as.logical(delta),
     ## Not allowed because of character variable (alpha):
     ## DUP = FALSE,
     PACKAGE = "VLMC")
            )
}
## Purpose: Entropy of a fitted "vlmc" object, see ?vlmc
## ------------------------------------------------------------------------
## $Id: entropy.R,v 1.9 2002/02/05 09:14:34 maechler Exp $
## Author: Martin Maechler, Date:  5 Apr 2000, 18:31

## Entropy  ===  Log[Likelihood] !
entropy <- function(object)
{
    if(!is.vlmc(object))
        stop("first argument must be a \"vlmc\" object; see ?vlmc")
    ivlmc <- object $ vlmc
    .C("entropy_sub",
       vlmc.vec     = as.integer(ivlmc),
       size         = length(ivlmc),
       alpha.len    = as.integer(object$ alpha.len),
       r = double(1),
       DUP = FALSE,
       PACKAGE = "VLMC")$r
}

logLik.vlmc <- function(object, ...)
{
    r <- entropy(object)
    attr(r, "df") <- (object$alpha.len - 1) * unname(object$size["context"])
    class(r) <- "logLik"
    r
}


### Maybe -- rather call this on 2 `vlmc' objects
entropy2 <- function(ivlmc1, ivlmc2, alpha.len = ivlmc1[1])
{
    ## Purpose: Entropy between two vlmc (sub) trees, see ?vlmc
    ## ------------------------------------------------------------------------
    ## Author: Martin Maechler, Date:  10 Apr 2000

###-- untested -- maybe non-sense
    if(0 >= (alpha.len <- as.integer(alpha.len)))
        stop("alphabet length must be >= 1")
    if(ivlmc2[1] != alpha.len)
        stop("alphabet length differs for 2nd arg")

    ##-- no checks, we really use the integer vectors themselves ..
    .C("entropy2_sub",
       vlmc.vec     = as.integer(ivlmc1), size = length(ivlmc1),
       vlmc2.vec    = as.integer(ivlmc2), size = length(ivlmc2),
       alpha.len    = alpha.len,
       r = double(1),
       DUP = FALSE,
       PACKAGE = "VLMC")$r
}

## Purpose: Akaike Information Criterion for VLMC objects
## -------------------------------------------------------------------------
## Arguments: VLMC object
## -------------------------------------------------------------------------
## Author: Martin Maechler, Date: 21 Dec 2000

##Now in R:  AIC <- function (object, ...) UseMethod()

## for R versions < 1.4:
if(paste(R.version$major, R.version$minor, sep=".") < 1.4)
    AIC.vlmc <- (AIC.lm)#.Alias
#### $Id: predict.R,v 1.11 2003/09/08 18:35:57 maechler Exp $
predict.vlmc <-
function(object, newdata,
         type = c("probs", "class","response", "id.node", "depth", "ALL"),
         se.fit = FALSE,
         ## dispersion = NULL,
         ## terms=NULL,
         allow.subset = TRUE, check.alphabet = TRUE,
         ...)
{
    ## Predict probabilities for new series from a fitted "vlmc" object
    ## -----------------------------------------------------------------------
    ## Author: Martin Maechler, Date: 10 Apr 2000, 16:36

    ## o type = c("link", "response", "terms"),
    ##		 is just `aped' from predict.glm ...
    ##   predict.multinom  seems better :
    ##	 	(object, newdata, type = c("class", "probs"), ...)
    ## 	probs := Pr[ X_i | context ]

    if(!is.vlmc(object))
	stop("first argument must be a \"vlmc\" object; see ?vlmc")

    type <- match.arg(type)

    ## FIXME: se.fit & dispersion are NOT YET supported
    if (!missing(se.fit))	.NotYetUsed("se.fit")
    ##if (!missing(dispersion))	.NotYetUsed("dispersion")

    alphabet <- strsplit(object$alpha,NULL)[[1]]

    ivlmc <- object $ vlmc

    ## newdata : must be discrete time series -- just as "dts" in  vlmc()
    if(missing(newdata)) {
        newdata <- object $ y
        if(is.null(newdata))
            stop("no `newdata' specified, and object$y is NULL")
    }
    else if(is.character(newdata)) {
	if(!all(1 == nchar(newdata)))
	    stop("character argument must have *all* 1-character strings")
    } else if(!(is.factor(newdata) || is.numeric(newdata)))
        stop("`newdata' must be discrete t.s.: character, factor, or numeric (in 0:m1)")

    if(!is.factor(newdata)) # must make sure the integer conversion is ok
        newdata <- factor(newdata, levels = alphabet)
    ## newdata is now a factor
    n <- length(newdata)
    int.data <- as.integer(newdata) - as.integer(1)

    if(check.alphabet) {
	nABC <- levels(newdata) # possibly unsorted
	alpha.len <- length(nABC)
	if(alpha.len > object$alpha.len)
	    stop("alphabet of `newdata' is larger than the vlmc fit `object' one")
	is.smaller <- alpha.len < object$alpha.len
	if(is.smaller && !allow.subset)
	    stop("alphabet of `newdata' is smaller than the vlmc fit `object' one")
	if(any(nchar(nABC) > 1)) {
	    nABC <- abbreviate(nABC, min=1)
	    if(any(nchar(nABC) > 1))
		nABC <-
		    if(alpha.len <= 10) as.character(0:(alpha.len - 1))
		    else letters[1:alpha.len]
	}
	Alpha <- paste(nABC, collapse = "")
        if(Alpha != if(is.smaller) substr(object$alpha, 1,alpha.len)
        else object$alpha)
	    warning(paste("alphabet of newdata `", Alpha,
			  "' differs from that of the vlmc fit `",
                          object$alpha,"'", sep=""))
    }
    m <- as.integer(object $ alpha.len)

    ##-- consider allowing MULTIPLE types simultaneously --
    ## "kind" coding for call to .C():
    kind <-
        as.integer(if(type == "ALL") 1 + 4 # Probs + ID => get everything
                   else switch(type,
                               probs = 1,
                               class =, response = 2,
                               id.node = 4,
                               depth = 8)
                   )
    res <- flags <- integer(n); res[] <- NA
    do.probs <- type %in% c("probs","ALL")
    probs <-
        if(do.probs)
            matrix(as.double(NA), n, m,
                   dimnames = list(as.character(newdata), alphabet))
###%-- TODO : first row of prob[], instead of NA, use marginals Pr[ Y[i] = k ]!!
        else double(0)

    ## This gives the prediction Probabilities / Class / Context.Nr / Depth
    r <- .C("predict_vlmc_p",
            vlmc.vec	= as.integer(ivlmc),
            size	= length(ivlmc),
            alpha.len	= m,
            Data	= int.data,
            data.len	= n,
            kind	= kind,

            ## Output (one of these, depending on `kind'):
            res		= res,
            flags	= flags,
            probs	= probs,

            NAOK	= TRUE,
            DUP		= FALSE,
            PACKAGE	= "VLMC")[c("res","probs","flags")]
    if(type == "probs")
        r[["probs"]] ## was  structure(r[["probs"]], flags = r[["flags"]])
    else if(type == "ALL") {
        names(r)[1] <- "ID"
        structure(c(r,
                    list(ctxt = id2ctxt(r $ID, m = m, alpha = object$alpha),
                         fitted = alphabet[max.col(r $probs)],
                         alpha = object$alpha, alpha.len = m)),
                  class = "predict.vlmc")
    }
    else structure(if (type == "class")
                        factor(alphabet[1+r[["res"]]], levels=alphabet)
                   else r[["res"]])# , flags = r[["flags"]]
}

fitted.vlmc <- function(object, ...) predict(object, type = "class", ...)

print.predict.vlmc  <- function(x, digits = getOption("digits"), ...)
{
    if(!inherits(x,"predict.vlmc") ||
       is.null(x$probs) || is.null(x$ID) || is.null(x$ctxt))
        stop("not a valid `predict.vlmc' object")
    Fprob <-
        ## MASS should be available, required in ./zzz.R
        if(exists("fractions",mode="function") # am still defensive ..
           ) function(x) as.character(fractions(x))
        else
        function(x) format(x,digits=digits)
    colnames(x$probs) <- paste("Pr[X=",colnames(x$probs),"]")
    print(noquote(cbind(fit = x$fitted,
                        Fprob(x$probs),
                        id  = x $ ID,
                        flags= x $ flags,
                        ctxt = x $ ctxt)))
    invisible(x)
}


if(FALSE) {
    ## NOTE:  base R
    ##  family.R has  the following deviance for the binomial family :
    dev.resids <- function(y, mu, wt)
	2 * wt * (y * log(ifelse(y == 0, 1, y/mu)) +
		  (1 - y) * log(ifelse(y == 1, 1, (1 - y)/(1 - mu))))
}

## OLD (and identical, but much slower):
deviance.vlmc <- function(object, ...)
{
    ## Purpose: Deviance =  - 2 * log likelihood(.)
    ## Author: Martin Maechler, Date: 26 Apr 2000, 15:30
    dn <- dimnames(pr <- predict(object))
    n <- nrow(pr)
    -2 * sum(log(pr[cbind(1:n, match(dn[[1]],dn[[2]]))[-1,]]))
}
## Fast
deviance.vlmc <- function(object, ...) -2 * entropy(object)

## Author: Martin Maechler, Date: 26 Apr 2000, 15:30

## Using "classwise" (a la multinom()) + the same "type"s as residuals.glm :
residuals.vlmc <-
    function(object, type = c("classwise", "deviance",
                     "pearson", "working", "response", "partial"),
             y = object$y, ...)
{
    type <- match.arg(type) # i.e. default is "classwise"
    if(!is.character(y))
        stop("y argument must be a proper discrete time series (as char.)")
    if(!is.numeric(m <- object$alpha.len))
        stop("no valid $alpha.len in first argument")

    switch(type,
           classwise =,
           deviance = {
               n <- nrow(pr <- predict(object, new = y))# n x m  matrix
               ij <- cbind(1:n, match(y, colnames(pr)))
           },
           pearson =,
           working =,
           response =
           n <- length(mu <- predict(object, new = y, type = "response")),
                                        # in {0:(m-1)}
           ## mu differs from glm(., binomial) for m = 2 {have no link!}

           partial = stop("partial residuals not yet available"))

    if(type == "classwise") {
        Y <- matrix(0, n, m)
        Y[ij] <- 1
        return(Y - pr)
    }
    else { ## type != "classwise"
        ##if(m > 2 && !quiet) ## dubious for m > 2!
        ##warning("vlmc( m > 2 ): mostly only `classwise' residuals make sense")
        yi <- alpha2int(y, object$alpha)
        switch(type,
               deviance = {
                   ## The absolute deviance residuals for general |alphabet| :
                   dr <- sqrt(-2*log(pr[ij]))
                   mu <- yi[max.col(pr)]# << some randomness here (sign only)!
                   ifelse(yi > mu, dr, -dr)
               },
               pearson =,
               working =,
               response = yi - mu)
    }
}

## This is how  residuals.glm(.)  works  {for dev.resids(), see above }
if(FALSE){
    mu	<- (object$fitted.values)#.Alias
    wts <- (object$prior.weights)#.Alias
    switch(type,
	   deviance = if(object$df.res > 0) {
	       d.res <- sqrt(pmax((object$family$dev.resids)(y, mu, wts), 0))
	       ifelse(y > mu, d.res, -d.res)
	   } else rep(0, length(mu)),
	   pearson = object$residuals * sqrt(object$weights),
	   working = object$residuals,
	   response = y - mu,
	   partial = object$residuals + predict(object,type="terms")
	   )
}

RCplot <-
    function(x, r2 = residuals(x, "deviance")^2,
             alphabet = x$alpha, lab.horiz = k <= 20, do.call = TRUE,
             cex.axis = if(k <= 20) 1 else if(k <= 40) 0.8 else 0.6,
             y.fact = if(.Device == "postscript") 1.2 else 0.75,
             col = "gray70", xlab = "Context", main = NULL,
             med.pars = list(col = "red", pch = 12, cex= 1.25 * cex.axis),
             ylim = range(0, r2, finite=TRUE), ...)
{
    ## Author: Martin Maechler, Date:  1 Mar 2002, 17:39
    namx <- deparse(substitute(x))
    if(!is.vlmc(x)) stop("`x' must be a fitted VLMC object")
    fID <- id2ctxt(predict(x, type="id"), alpha = alphabet)
    ok <- fID != "NA"
    ## drop those with "NA" context (at least the first one!)
    ## FIXME: should we tell about this ?
    fID <- as.factor(fID[ok])
    r2 <- r2[ok]
    tfID <- table(fID)
    k <- length(tfID)
    if(is.null(main))
        main <- paste(if(!do.call)"VLMC", "Residuals vs. Context")
    labs <- c("#{obs}:", tfID)
    if(!lab.horiz && missing(ylim)) { ## use space *below* 0 line
        ## find out about string width: -> use x-direction
        op <- par(xaxs = "i"); plot.new(); plot.window(xlim=ylim, ylim=0:1)
        y0 <- y.fact * max(strwidth(labs, cex=cex.axis))
        ylim[1] <- min(ylim[1], - 1.4 * y0)
        par(op)
    }
    op <- par(cex.axis = cex.axis)
    on.exit(par(op))
    ## plot.factor calling (and returning) boxplot():
    rp <- plot(fID, r2, varwidth = TRUE, xlab = xlab, main = main,
               ylab = paste("residuals(",namx,", \"deviance\") ^ 2", sep=""),
               ylim = ylim, col = col, las = if(lab.horiz) 0 else 2, ...)
    abline(h = 0, lty = 3)
    if(any(i0 <- abs(meds <- rp$stats[3,]) < 1e-3))
        do.call("points", c(list(x=which(i0), y= meds[i0]), med.pars))
    if(lab.horiz)
        text(c(.2, 1:k), -.1, labs, xpd=FALSE, cex=cex.axis)
    else {
        text(c((par("usr")[1]+1)/2, 1:k), -0.3*y0, labs,
             xpd=NA, srt = 90, adj = c(1, 0.5), cex=cex.axis)
    }
    if(do.call) mtext(deparse(x$call))
    invisible(list(k = k, fID = fID, rp = rp))
}
simulate.vlmc <-
    function(x, n, n.start = 64 * x$size["context"], integer.return = FALSE)
{
    ## Author: Martin Maechler, Date: 10 Apr 2000
    if(!is.vlmc(x))
	stop("first argument must be a \"vlmc\" object; see ?vlmc")
    ivlmc <- x $ vlmc

    if(0 > (n <- as.integer(n)))
        stop("require output-length n must be >= 0")
    n <- as.integer(n + n.start)
    m <- as.integer(x $ alpha.len)

    iy <- .C("sim_vlmc",
             vlmc.vec	= as.integer(ivlmc),
             size	= length(ivlmc),
             m          = m,
             data.len	= n,
             y 		= integer(n),
             DUP	= FALSE,
             PACKAGE	= "VLMC")$y[-(1:n.start)]

    if(integer.return) iy else strsplit(x$alpha, NULL)[[1]][1 + iy]
}





#### $Id: vlmc.R,v 1.31 2004/05/05 16:41:47 maechler Exp $
vlmc.version <- "VLMC 1.3-7;  after $Date: 2004/05/05 16:41:47 $ UTC"
##		      ----- same as the one in ../DESCRIPTION !

vlmc <-
function(dts,
	 cutoff.prune =
		qchisq(alpha.c, df= max(0.1,alpha.len-1), lower.tail=FALSE)/2,
	 alpha.c = 0.05,
	 threshold.gen = 2,
         code1char = TRUE,
	 y = TRUE, debug = FALSE, quiet = FALSE,
	 dump = 0, ctl.dump = c(width.ct = 1+log10(n), nmax.set = -1)
	 )
{
  ## Purpose: Fit a VLMC to a discrete time-serie
  ## ----------------------------------------------------------------------
  ## Arguments: dts : numeric / character / factor
  ## ----------------------------------------------------------------------
  ## Author: Martin Mchler, Date: 17 Mar 2000

  cl <- match.call()
  if(!is.atomic(dts))
      stop("vlmc() only works on vectors (integer, character, factor)")
  if(is.character(dts)) {
    if(!all(i1 <- (1 == (nc <- nchar(dts)))))
        ## FIXME, change this to a `note()'!
      warning("character argument has elements of more than 1 character")
  }
  ## common format: factor w/ levels =^= alphabet
  n <- length(f.dts <- as.factor(dts))
  Data <- as.integer(f.dts) - 1:1 #-> is integer in {0,1,...}
  alphabet <- levels(f.dts)# possibly unsorted!
  alpha.len <- length(alphabet)
  ## FIXME
  if(alpha.len > length(LETTERS))
    stop("alphabet too large; currently limited to maximally 26 letters")
  ialph <- 0:(alpha.len - 1)
  if(code1char && any(nchar(alphabet) > 1)) {
    if(!quiet)
	warning("alphabet with >1-letter strings; trying to abbreviate")
    alphabet <- abbreviate(alphabet, min=1)
    if(any(nchar(alphabet) > 1))
      alphabet <-
	if(alpha.len <= 10) as.character(ialph)
	else letters[1:alpha.len]
  }
  Alpha <- paste(alphabet, collapse = "")
  if(debug)
    cat("vlmc: Alpha = '",Alpha,"' ; |X| = ",alpha.len,"\n", sep="")
  ## Check consistency of Data & alphabet :
  idat <- sort(as.integer(names(table(Data))))
  if(!all(ialph == idat)) {
    if(!is.null(xtraD <- setdiff(idat, ialph)))
      stop(paste("Data has 'letters' not in alphabet:",
		 paste(xtraD,collapse=", ")))
    else if(!quiet)
        warning("alphabet is larger than set of values in Data")
  }

  dump <- as.integer(dump[1])
  if(dump < 0) stop("`dump' must be non-negative integer")
  if(dump > 0) {
    ctl.dump <- as.integer(ctl.dump)
    if(length(ctl.dump) != 2) stop("`ctl.dump' must be integer(2).")
    if(ctl.dump[2] < 1) # default -- FIXME : should depend also on cutoff..
	ctl.dump[2] <- as.integer(max(6, 15 - log10(n)))
    if(ctl.dump[1] < 0) stop("`ctl.dump[1]' must be non-negative.")
    ## Fixme : need even more consistency checks ..
  }
  if(debug) cat("vlmc: ctl.dump = ",ctl.dump,"\n")

  r <- .C("vlmc_p",
	  data	 = Data,
	  n	 = n,
	  threshold.gen= as.integer(threshold.gen),
	  cutoff.prune = as.double(cutoff.prune),
	  alpha.len    = as.integer(alpha.len),
	  alpha	       = as.character(Alpha),
	  debug	     = as.integer(as.logical(debug)),
	  dump.flags = as.integer(c(dump, ctl.dump)),

	  size = integer(4),
	  ## Not allowed because of character variable (alpha):
	  ## DUP = FALSE,
	  PACKAGE = "VLMC")
  ## Now that we know the size of the result, we can "give" the space,
  ## and put the result tree (as integer vector) into it:
  names(r$size) <- c("total","nr.leaves","context","ord.MC")
  r$size <- rev(r$size)
  rvec <- .C("getvlmc",
	     size = r$size["total"],
	     vlmc.vec = integer(r$size["total"]),
	     ##DUP = FALSE,
	     PACKAGE = "VLMC")$vlmc

  ## Consistency checks (catch some programming errors):
  if(alpha.len != rvec[1])
      warning(paste(".C(\"vlmc\"..) : |alphabet| inconsistency:",
		    alpha.len, "!=", rvec[1]))
  r$vlmc.vec <- rvec
  if(y) r$y <- alphabet[1:1 + Data]
  r$data <- r$debug <- r$dump.flags <- NULL
  r$call <- cl
  class(r) <- "vlmc"
  r
}

is.vlmc <- function(x)
    inherits(x, "vlmc") && is.integer(x$vlmc.vec) && is.character(x$alpha)

print.vlmc <- function(x, digits = max(3, getOption("digits") - 3), ...)
{
  ## Purpose: "print" Method for "vlmc" objects
  ## ----------------------------------------------------------------------
  ## Author: Martin Mchler, Date: 18 Mar 00, 11:26
  if(!is.vlmc(x)) stop("first argument must be a \"vlmc\" object; see ?vlmc")
  ox <- x
  vvec <- (x $ vlmc.vec)#.Alias
  used.args <- names(x$call)
  cat("\n`vlmc', a Variable Length Markov Chain;\n\t alphabet '",x$alpha,
      "', |alphabet| = ",x$alpha.len,
      ", n = ",x$n,".\nCall: ",deparse(x$call),
      if(!any(used.args %in% c("cutoff.prune","alpha.c")))
      paste(";	default cutoff =", format(x$cutoff,digits=digits)),
      ##";  |result| = ", length(vvec), ", MC order = ", x$size[4],
      "\n -> extensions (= $size ) :\n",sep="")
  print(x $ size)
  cat("AIC = ", format(AIC(x), digits = digits), "\n")
  invisible(ox)
}

### Accessors; well, this may be too extreme OO:
if(FALSE) {
 size <- function(x,...) UseMethod("size")
 size.vlmc <-
  function(x, type = c("ord.MC", "context", "nr.leaves", "total")) x$size[type]
}

summary.vlmc <- function(object, ...)
{
  ## Purpose: "summary" Method for "vlmc" objects
  ## -----------------------------------------------------------
  ## Author: Martin Mchler, Date: 1 Apr 00, 11:26

    p <- predict(object, type = "class")
    conf.tab <- table(data = object$y, predicted = p)
    structure(c(object,
		list(confusion.table = conf.tab,
		     depth.stats = summary(predict(object, type = "depth")[-1]),
		     R2 = sum(diag(conf.tab))/object$n)),
	      class = c("summary.vlmc", class(object)))
}

print.summary.vlmc <-
    function(x, digits = getOption("digits"), vvec.printing = FALSE, ...)
{
  ## Purpose: "print" Method for "vlmc.summary" objects
  ## -----------------------------------------------------------
  ## Author: Martin Mchler, Date: 1 Apr 00, 11:30

  print.vlmc(x, digits = digits, ...)
  cat("R^2 = %{correctly predicted} = ",
      format(100 * x$R2, digits= max(2, digits - 3)), "%\n", sep="")
  cat("Confusion matrix:\n")
  print(x$confusion.table, digits = digits, ...)
  cat("Markov chain depths along the data:\n")
  print(noquote(formatC(x$depth.stats, digits = digits)))

  if(FALSE) { ## doesn't make sense anymore
      x$call <- x$vlmv.vec <- NULL
      str(unclass(x), vec.len = 7, digits.d = digits,...)
  }

  if(vvec.printing) {
      vvec <- (x $ vlmc.vec)#.Alias
      if(vvec[1] != x$alpha.len)
	  stop("invalid vlmc structure {alpha.len}")
      if((lV <- length(vvec)) > 10000)
	  warning("|vvec| > 10000; not printing. Use `prt.vvec()' if you want")
      else {
	  cat("\ncontext tree encoding `vvec'tor:\n")
	  if(2*lV > getOption("expressions")) {
	      oop <- options(expressions = 2*lV)
	      on.exit(options(oop))
	  }
	  prt.vvec(vvec[-1], nalph = vvec[1])
      }
  }

  invisible(x)
} ## print.summary.vlmc

prt.vvec <- function(v, nalph, pad = " ")
{
  ## Purpose: RECURSIVEly print result vector of a vlmc -- not knowing alphabet
  ## ----------------------------------------------------------------------
  ## Author: Martin Mchler, Date: 18 Mar 00, 16:53
  lv <- length(v)
  if(!lv) {
    cat("\n"); return()
  }
  else if(v[1] == -1) {
    cat(" -")
    i <- NULL
  }
  else if (lv <= nalph)
      stop("v[] is not long enough")
  else {
    i <- 1 + 1:nalph
    cat(if(v[1] != 0) "\n",
	sapply(3*v[1], function(n)paste(character(n),collapse= pad)),
	"{",v[1],"} [", paste(formatC(v[i],w=1),collapse=", "), "]", sep="")
  }
  prt.vvec(v[-c(1,i)], nalph, pad = pad)
}
.First.lib <- function(lib, pkg) {
    library.dynam("VLMC",pkg,lib)
    require(MASS)# only for fractions() and rational()
    if(paste(R.version$major, R.version$minor, sep=".") < 1.9)
        require(mva) else require(stats)# for dendrogram class & methods
}
