### R code from vignette source 'evtree.Rnw'

###################################################
### code chunk number 1: setup
###################################################
options(prompt = "R> ", continue = "+  ", width = 70, useFancyQuotes = FALSE)
library("rpart")
library("evtree")
data("BBBClub", package = "evtree")
cache <- FALSE


###################################################
### code chunk number 2: chess22
###################################################
X1 <- rep(seq(0.25, 1.75, 0.5), each = 4)
X2 <- rep(seq(0.25, 1.75, 0.5), 4)
Y <- rep(1, 16)
Y[(X1 < 1 & X2 < 1) | (X1 > 1 & X2 > 1)] <- 2
Y <- factor(Y, labels = c("O", "X"))
chess22 <- data.frame(Y, X1, X2)
print(evtree(Y ~ ., data = chess22, minbucket = 1, minsplit = 2))


###################################################
### code chunk number 3: chess22-plot
###################################################
plot(X2 ~ X1, data = chess22, xlim = c(0, 2), ylim = c(0, 2), pch = c(1, 4)[Y], col = c("black", "slategray")[Y])


###################################################
### code chunk number 4: BBBClub-rpart-ctree (eval = FALSE)
###################################################
## data("BBBClub", package = "evtree")
## library("rpart")
## rp  <- as.party(rpart(choice ~ ., data = BBBClub, minbucket = 10))
## rp2 <- as.party(rpart(choice ~ ., data = BBBClub, minbucket = 10,
##   maxdepth = 2))
## ct  <- ctree(choice ~ ., data = BBBClub, minbucket = 10, mincrit = 0.99)
## ct2 <- ctree(choice ~ ., data = BBBClub, minbucket = 10, mincrit = 0.99,
##   maxdepth = 2)
## plot(rp)
## plot(ct)


###################################################
### code chunk number 5: BBBClub-evtree (eval = FALSE)
###################################################
## set.seed(1090)
## ev <- evtree(choice ~ ., data = BBBClub, minbucket = 10, maxdepth = 2)


###################################################
### code chunk number 6: BBBClub-cache
###################################################
if(cache & file.exists("BBBClub-trees.rda")) {
load("BBBClub-trees.rda")
} else {
data("BBBClub", package = "evtree")
library("rpart")
rp  <- as.party(rpart(choice ~ ., data = BBBClub, minbucket = 10))
rp2 <- as.party(rpart(choice ~ ., data = BBBClub, minbucket = 10,
  maxdepth = 2))
ct  <- ctree(choice ~ ., data = BBBClub, minbucket = 10, mincrit = 0.99)
ct2 <- ctree(choice ~ ., data = BBBClub, minbucket = 10, mincrit = 0.99,
  maxdepth = 2)
plot(rp)
plot(ct)
set.seed(1090)
ev <- evtree(choice ~ ., data = BBBClub, minbucket = 10, maxdepth = 2)
if(cache) {
  save(rp, rp2, ct, ct2, ev, file = "BBBClub-trees.rda")
} else {
  if(file.exists("BBBClub-trees.rda")) file.remove("BBBClub-trees.rda")
}
}


###################################################
### code chunk number 7: BBBClub-rpart-plot
###################################################
plot(rp)


###################################################
### code chunk number 8: BBBClub-ctree-plot
###################################################
plot(ct)


###################################################
### code chunk number 9: BBBClub-evtree-display
###################################################
plot(ev)
ev


###################################################
### code chunk number 10: BBBClub-evtree-plot
###################################################
plot(ev)


###################################################
### code chunk number 11: evtree-performance
###################################################
mc <- function(obj) 1 - mean(predict(obj) == BBBClub$choice)
evalfun <- function(obj) 2 * nrow(BBBClub) * mc(obj) +
  width(obj) * log(nrow(BBBClub))
trees <- list("evtree" = ev, "rpart" = rp, "ctree" = ct, "rpart2" = rp2,
  "ctree2" = ct2)
round(sapply(trees, function(obj) c("misclassification" = mc(obj),
  "evaluation function" = evalfun(obj))), digits = 3)


###################################################
### code chunk number 12: evtree-structure
###################################################
ftable(tab <- table(evtree = predict(ev), rpart  = predict(rp),
  ctree  = predict(ct), observed = BBBClub$choice))
sapply(c("evtree", "rpart", "ctree"), function(nam) {
  mt <- margin.table(tab, c(match(nam, names(dimnames(tab))), 4))
  c(abs = as.vector(rowSums(mt))[2],
    rel = round(100 * prop.table(mt, 1)[2, 2], digits = 3))
})


###################################################
### code chunk number 13: benchmark-results
###################################################
## load results
for(i in Sys.glob("results/*.RData")) load(i)

## preprocess for reference evtree
preprocess <- function(d, dname = "datasetname", isclassification = TRUE){
    if(isclassification) d[, 1:3] <- 1 - d[ ,1:3]
    d <- as.data.frame(d)
    colnames(d) <- c("evtree", "rpart", "ctree","evtree", "rpart", "ctree")
    for(i in 3:1) d[, i] <- d[, i] / d[, 1] * 100
    for(i in 6:4) d[, i] <- d[, i] / d[, 4] * 100
    x <- d[, 1:3]
    y <- d[, 4:6]
    rval <- reshape(x, idvar="samp", times=names(x), timevar = "alg",varying= list(names(x)), direction="long")
    names(rval)[2] <- "accuracy"
    rval$complexity <- reshape(y, idvar="samp", times=names(y), timevar = "alg",varying= list(names(y)), direction="long")[,2]
    rval$alg <- factor(rval$alg, levels = c("evtree", "ctree", "rpart"))
    rval$ds <- dname
    rval
}

## collect results for all datasets
r <- rbind(
  preprocess(d = rglass, dname = "Glass identification", isclassification = TRUE),
  preprocess(d = rheart, dname = "Statlog heart", isclassification = TRUE),
  preprocess(d = rionosphere, dname = "Ionosphere", isclassification = TRUE),
  preprocess(d = rmusk, dname = "Musk", isclassification = TRUE),
  preprocess(d = rbreastcancer, dname = "Breast cancer database", isclassification = TRUE),
  preprocess(d = rpima, dname = "Pima Indians diabetes", isclassification = TRUE),
  preprocess(d = rvowel, dname = "Vowel", isclassification = TRUE),
  preprocess(d = rcredit, dname = "Statlog German credit", isclassification = TRUE),
  preprocess(d = rcontraceptive, dname = "Contraceptive method", isclassification = TRUE),
  preprocess(d = rdna, dname = "DNA", isclassification = TRUE),
  preprocess(d = rspam, dname = "Spam", isclassification = TRUE),
  preprocess(d = rmagicgamma, dname = "Magic gamma telescope", isclassification = TRUE),
  preprocess(d = rservo, dname = "Servo", isclassification = FALSE),
  preprocess(d = rbostonhousing, dname = "Boston housing", isclassification = FALSE),
  preprocess(d = rmel0101, dname = "MEL0101", isclassification = FALSE),
  preprocess(d = rhdg0202, dname = "HDG0202", isclassification = FALSE),
  preprocess(d = rhdg0502, dname = "HDG0502", isclassification = FALSE)
)
r$ds <- factor(r$ds)
r$samp <- factor(r$samp)
r$dssamp <- r$ds:r$samp

## compute multiple comparisons
library("multcomp")
cstats <- function(alg = "rpart", value = "accuracy", data = r) {
  dlab <- rev(unique(data$ds))
  k <- length(dlab)
  mean  <- numeric(k)
  lower <- numeric(k)
  upper <- numeric(k)
  names(data)[names(data) == value] <- "value"
  for(i in 1:k) {
    mod1 <- lm(value ~ alg, data = subset(data, ds == dlab[i]))
    pt <- glht(mod1, linfct = mcp(alg = "Dunnett"))
    w <- confint(pt)$confint
    d <- which(levels(r$alg) == alg) - 2
    mean[i]  <-  w[1+d]
    lower[i] <-  w[3+d]
    upper[i] <-  w[5+d]
  }
  rval <- data.frame(mean, lower, upper)
  rownames(rval) <- dlab
  return(rval)
}

acc_rpart <- cstats("rpart", "accuracy")
com_rpart <- cstats("rpart", "complexity")
acc_ctree <- cstats("ctree", "accuracy")
com_ctree <- cstats("ctree", "complexity")

## function for visualization
ciplot <- function(x, xlim = NULL, main = "", xlab = "", ylab = TRUE) {
  nam <- rownames(x)
  k <- length(nam)
  plot(x$mean, 1:k, xlim = xlim, axes = FALSE, xlab = "", ylab = "", pch = 19)
  arrows(x$lower, 1:k, x$upper, 1:k, angle = 90, length = 0.05, code = 3)
  if(xlab == "") axis(1, labels = FALSE) else axis(1)
  if(ylab) ylab <- nam
  axis(2, at = 1:k, labels = ylab, las = 1, cex = 0.8)  
  axis(2, at = k + 1.5, labels = main, tick = FALSE, las = 1, outer = TRUE, cex.axis = 1.5, xpd = TRUE)
  mtext(xlab, side = 1, line = 3, xpd = TRUE)
  abline(h = 5.5)
  abline(v = 0, lty = 2)  
  box()
}


###################################################
### code chunk number 14: benchmark-plot
###################################################
par(mfrow = c(2, 2), oma = c(5, 10, 2, 0), mar = c(1, 1, 2, 1))

xlim1 <- range(cbind(acc_rpart, acc_ctree))
xlim2 <- range(cbind(com_rpart, com_ctree))

ciplot(acc_rpart, xlim = xlim1, main = "rpart", ylab = TRUE, xlab = "")
ciplot(com_rpart, xlim = xlim2, main = "",      ylab = FALSE, xlab = "")
ciplot(acc_ctree, xlim = xlim1, main = "ctree", ylab = TRUE,
  xlab = "relative difference in predictive accuracy (%)")
ciplot(com_ctree, xlim = xlim2, main = "",      ylab = FALSE,
  xlab = "relative difference in complexity (%)")


###################################################
### code chunk number 15: chessboard
###################################################
chessboard44 <- function(n = 4000, noisevariables = 6, noise = 0) {
  chess44 <- array(0,c(n,noisevariables+3))
  for(i in 1:(noisevariables+2))
      chess44[,i] <- as.numeric(runif(dim(chess44)[1]))*4

   x <- chess44[,1]
   y <- chess44[,2]
   chess44[, ncol(chess44)] <- 0
   for(k in 1:4)  
      chess44[(x <= k & x > k-1 & y <= k & y > k-1), ncol(chess44)] <- 1
   for(k in 1:2)  
      chess44[(x <= k & x > k-1 & y <= k+2 & y > k+1), ncol(chess44)] <- 1
   for(k in 1:2)  
      chess44[(y <= k & y > k-1 & x <= k+2 & x > k+1), ncol(chess44)] <- 1

   if(noise > 0) {
      flipclasslist <- sample(n, n * (noise / 100), replace = FALSE)

      for(i in 1:length(flipclasslist)){
	  if(chess44[flipclasslist[i], ncol(chess44)] == 1)
	      chess44[flipclasslist[i], ncol(chess44)] = 0
	  else if(chess44[flipclasslist[i], ncol(chess44)] == 0)
	      chess44[flipclasslist[i], ncol(chess44)] = 1
      }
  }

  chess44 <- as.data.frame(chess44)
  chess44[,ncol(chess44)] <- as.factor(chess44[,ncol(chess44)])
  names(chess44) <- c(paste("X", 1:8, sep = ""), "Y")
  chess44
}


###################################################
### code chunk number 16: chessboard-plot
###################################################
chess44 <- chessboard44(2000)
plot(X2 ~ X1, data = chess44, xlim = c(0, 4), ylim = c(0, 4), pch = c(1, 4)[Y], col = c("black", "slategray")[Y])


###################################################
### code chunk number 17: chessboard-table
###################################################
library("xtable")
load("./results/chessboard44_0.RData")
load("./results/chessboard44_5.RData")
load("./results/chessboard44_10.RData")

chesstable_means  <- as.data.frame( rbind(apply(rchessboard44_0,2,mean), apply(rchessboard44_5,2,mean) , apply(rchessboard44_10,2,mean) )) 
names(chesstable_means) <-  c("\\code{evtree}", "\\code{rpart}", "\\code{ctree}", "\\code{evtree}", "\\code{rpart}", "\\code{ctree}")
chesstable_means[,1:3] <-  format(chesstable_means[,1:3]*100, digits=1, nsmall=1)
chesstable_means[,4:6] <-  format(chesstable_means[,4:6], digits=1, nsmall=1)

chesstable_sd  <- as.data.frame( rbind(apply(rchessboard44_0,2,sd), apply(rchessboard44_5,2,sd) , apply(rchessboard44_10,2,sd) )) 
names(chesstable_sd) <-  c("\\code{evtree}", "\\code{rpart}", "\\code{ctree}", "\\code{evtree}", "\\code{rpart}", "\\code{ctree}")
chesstable_sd[,1:3] <-  format(chesstable_sd[,1:3]*100, digits=1, nsmall=1)
chesstable_sd[,4:6] <-  format(chesstable_sd[,4:6], digits=1, nsmall=1)

chesstable <- chesstable_means
for(j in 1:ncol(chesstable_means)){
	for(i in 1:nrow(chesstable_means)){
		chesstable[i,j] <- paste(chesstable_means[i,j] ,  "(", chesstable_sd[i,j], ")",  sep="")	
	}
}

chesstable <- cbind(as.integer(rbind(0,5,10)), chesstable)
colnames(chesstable)[1] = ""

print(xtable(chesstable,
caption = "Mean (and standard deviation) of accuracy and number of terminal nodes for simulated $4 \\times 4$ chessboard examples.",
caption.placement= "bottom",
label= "tab:resultsChessboard"), 
include.rownames = FALSE, allign= "rllllll", hline.after=NULL,
sanitize.text.function = identity,
add.to.row=list(pos=list(-1,-1, 0, 3), command=c(
"\\toprule", 
c("\\multicolumn{1}{l}{Noise (\\%)} & \\multicolumn{3}{l}{Accuracy}  & \\multicolumn{3}{l}{Terminal nodes}\\\\",
"\\midrule",
"\\bottomrule"
)))
)


