R : Copyright 2005, The R Foundation for Statistical Computing Version 2.1.1 (2005-06-20), ISBN 3-900051-07-0 R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for a HTML browser interface to help. Type 'q()' to quit R. > ### *

> ### > attach(NULL, name = "CheckExEnv") > assign(".CheckExEnv", as.environment(2), pos = length(search())) # base > ## add some hooks to label plot pages for base and grid graphics > setHook("plot.new", ".newplot.hook") > setHook("persp", ".newplot.hook") > setHook("grid.newpage", ".gridplot.hook") > > assign("cleanEx", + function(env = .GlobalEnv) { + rm(list = ls(envir = env, all.names = TRUE), envir = env) + RNGkind("default", "default") + set.seed(1) + options(warn = 1) + delayedAssign("T", stop("T used instead of TRUE"), + assign.env = .CheckExEnv) + delayedAssign("F", stop("F used instead of FALSE"), + assign.env = .CheckExEnv) + sch <- search() + newitems <- sch[! sch %in% .oldSearch] + for(item in rev(newitems)) + eval(substitute(detach(item), list(item=item))) + missitems <- .oldSearch[! .oldSearch %in% sch] + if(length(missitems)) + warning("items ", paste(missitems, collapse=", "), + " have been removed from the search path") + }, + env = .CheckExEnv) > assign("..nameEx", "__{must remake R-ex/*.R}__", env = .CheckExEnv) # for now > assign("ptime", proc.time(), env = .CheckExEnv) > grDevices::postscript("randomForest-Examples.ps") > assign("par.postscript", graphics::par(no.readonly = TRUE), env = .CheckExEnv) > options(contrasts = c(unordered = "contr.treatment", ordered = "contr.poly")) > options(warn = 1) > library('randomForest') randomForest 4.5-12 Type rfNews() to see new features/changes/bug fixes. > > assign(".oldSearch", search(), env = .CheckExEnv) > assign(".oldNS", loadedNamespaces(), env = .CheckExEnv) > cleanEx(); ..nameEx <- "MDSplot" > > ### * MDSplot > > flush(stderr()); flush(stdout()) > > ### Name: MDSplot > ### Title: Multi-dimensional Scaling Plot of Proximity matrix from > ### randomForest > ### Aliases: MDSplot > ### Keywords: classif tree > > ### ** Examples > > set.seed(1) > data(iris) > iris.rf <- randomForest(Species ~ ., iris, proximity=TRUE, + keep.forest=FALSE) > MDSplot(iris.rf, iris$Species) Loading required package: RColorBrewer > ## Using different symbols for the classes: > MDSplot(iris.rf, iris$Species, palette=rep(1, 3), pch=as.numeric(iris$Species)) > > > > cleanEx(); ..nameEx <- "classCenter" > > ### * classCenter > > flush(stderr()); flush(stdout()) > > ### Name: classCenter > ### Title: Prototypes of groups. > ### Aliases: classCenter > ### Keywords: classif > > ### ** Examples > > data(iris) > iris.rf <- randomForest(iris[,-5], iris[,5], prox=TRUE) > iris.p <- classCenter(iris[,-5], iris[,5], iris.rf$prox) > plot(iris[,3], iris[,4], pch=21, xlab=names(iris)[3], ylab=names(iris)[4], + bg=c("red", "blue", "green")[as.numeric(factor(iris$Species))], + main="Iris Data with Prototypes") > points(iris.p[,3], iris.p[,4], pch=21, cex=2, bg=c("red", "blue", "green")) > > > > cleanEx(); ..nameEx <- "combine" > > ### * combine > > flush(stderr()); flush(stdout()) > > ### Name: combine > ### Title: Combine Ensembles of Trees > ### Aliases: combine > ### Keywords: regression classif > > ### ** Examples > > data(iris) > rf1 <- randomForest(Species ~ ., iris, ntree=50, norm.votes=FALSE) > rf2 <- randomForest(Species ~ ., iris, ntree=50, norm.votes=FALSE) > rf3 <- randomForest(Species ~ ., iris, ntree=50, norm.votes=FALSE) > rf.all <- combine(rf1, rf2, rf3) > print(rf.all) Call: randomForest(formula = Species ~ ., data = iris, ntree = 50, norm.votes = FALSE) Type of random forest: classification Number of trees: 150 No. of variables tried at each split: 2 > > > > cleanEx(); ..nameEx <- "getTree" > > ### * getTree > > flush(stderr()); flush(stdout()) > > ### Name: getTree > ### Title: Extract a single tree from a forest. > ### Aliases: getTree > ### Keywords: tree > > ### ** Examples > > data(iris) > ## Look at the third trees in the forest. > getTree(randomForest(iris[,-5], iris[,5], ntree=10), 3, labelVar=TRUE) left daughter right daughter split var split point status prediction 1 2 3 Petal.Width 0.80 1 2 0 0 0.00 -1 setosa 3 4 5 Petal.Width 1.55 1 4 0 0 0.00 -1 versicolor 5 6 7 Petal.Length 5.05 1 6 8 9 Sepal.Length 6.50 1 7 10 11 Petal.Width 1.70 1 8 12 13 Sepal.Length 5.95 1 9 0 0 0.00 -1 versicolor 10 14 15 Sepal.Width 2.85 1 11 0 0 0.00 -1 virginica 12 16 17 Petal.Length 4.65 1 13 0 0 0.00 -1 virginica 14 0 0 0.00 -1 versicolor 15 0 0 0.00 -1 virginica 16 0 0 0.00 -1 virginica 17 0 0 0.00 -1 versicolor > > > > cleanEx(); ..nameEx <- "grow" > > ### * grow > > flush(stderr()); flush(stdout()) > > ### Name: grow > ### Title: Add trees to an ensemble > ### Aliases: grow grow.default grow.randomForest > ### Keywords: regression classif > > ### ** Examples > > data(iris) > iris.rf <- randomForest(Species ~ ., iris, ntree=50, norm.votes=FALSE) > iris.rf <- grow(iris.rf, 50) > print(iris.rf) Call: randomForest(formula = Species ~ ., data = iris, ntree = 50, norm.votes = FALSE) Type of random forest: classification Number of trees: 100 No. of variables tried at each split: 2 > > > > cleanEx(); ..nameEx <- "imports85" > > ### * imports85 > > flush(stderr()); flush(stdout()) > > ### Name: imports85 > ### Title: The Automobile Data > ### Aliases: imports85 > ### Keywords: datasets > > ### ** Examples > > data(imports85) > imp85 <- imports85[,-2] # Too many NAs in normalizedLosses. > imp85 <- imp85[complete.cases(imp85), ] > ## Drop empty levels for factors. > imp85[] <- lapply(imp85, function(x) if (is.factor(x)) x[, drop=TRUE] else x) > > stopifnot(require(randomForest)) > price.rf <- randomForest(price ~ ., imp85, do.trace=10, ntree=100) | Out-of-bag | Tree | MSE %Var(y) | 10 | 8.872e+06 13.63 | 20 | 4.533e+06 6.96 | 30 | 4.314e+06 6.63 | 40 | 3.939e+06 6.05 | 50 | 3.845e+06 5.91 | 60 | 3.914e+06 6.01 | 70 | 3.759e+06 5.77 | 80 | 3.68e+06 5.65 | 90 | 3.751e+06 5.76 | 100 | 3.8e+06 5.84 | > print(price.rf) Call: randomForest(formula = price ~ ., data = imp85, do.trace = 10, ntree = 100) Type of random forest: regression Number of trees: 100 No. of variables tried at each split: 8 Mean of squared residuals: 3800154 % Var explained: 94.16 > numDoors.rf <- randomForest(numOfDoors ~ ., imp85, do.trace=10, ntree=100) ntree OOB 1 2 10: 17.19% 16.22% 18.52% 20: 11.40% 8.04% 16.05% 30: 13.47% 9.82% 18.52% 40: 12.44% 10.71% 14.81% 50: 11.40% 9.82% 13.58% 60: 12.44% 10.71% 14.81% 70: 11.92% 9.82% 14.81% 80: 10.88% 8.93% 13.58% 90: 11.40% 8.93% 14.81% 100: 11.40% 8.04% 16.05% > print(numDoors.rf) Call: randomForest(formula = numOfDoors ~ ., data = imp85, do.trace = 10, ntree = 100) Type of random forest: classification Number of trees: 100 No. of variables tried at each split: 4 OOB estimate of error rate: 11.4% Confusion matrix: four two class.error four 103 9 0.08035714 two 13 68 0.16049383 > > > > cleanEx(); ..nameEx <- "margin" > > ### * margin > > flush(stderr()); flush(stdout()) > > ### Name: margin > ### Title: Margins of randomForest Classifier > ### Aliases: margin plot.margin > ### Keywords: classif > > ### ** Examples > > set.seed(1) > data(iris) > iris.rf <- randomForest(Species ~ ., iris, keep.forest=FALSE) > plot(margin(iris.rf, iris$Species)) Loading required package: RColorBrewer > > > > cleanEx(); ..nameEx <- "na.roughfix" > > ### * na.roughfix > > flush(stderr()); flush(stdout()) > > ### Name: na.roughfix > ### Title: Rough Imputation of Missing Values > ### Aliases: na.roughfix na.roughfix.default na.roughfix.data.frame > ### Keywords: NA > > ### ** Examples > > data(iris) > iris.na <- iris > set.seed(111) > ## artificially drop some data values. > for (i in 1:4) iris.na[sample(150, sample(20)), i] <- NA > iris.roughfix <- na.roughfix(iris.na) > iris.narf <- randomForest(Species ~ ., iris.na, na.action=na.roughfix) > print(iris.narf) Call: randomForest(formula = Species ~ ., data = iris.na, na.action = na.roughfix) Type of random forest: classification Number of trees: 500 No. of variables tried at each split: 2 OOB estimate of error rate: 4.67% Confusion matrix: setosa versicolor virginica class.error setosa 50 0 0 0.00 versicolor 0 46 4 0.08 virginica 0 3 47 0.06 > > > > cleanEx(); ..nameEx <- "outlier" > > ### * outlier > > flush(stderr()); flush(stdout()) > > ### Name: outlier > ### Title: Compute outlying measures > ### Aliases: outlier outlier.randomForest outlier.default > ### Keywords: classif > > ### ** Examples > > set.seed(1) > iris.rf <- randomForest(iris[,-5], iris[,5], proximity=TRUE) > plot(outlier(iris.rf), type="h", + col=c("red", "green", "blue")[as.numeric(iris$Species)]) > > > > cleanEx(); ..nameEx <- "partialPlot" > > ### * partialPlot > > flush(stderr()); flush(stdout()) > > ### Name: partialPlot > ### Title: Partial dependence plot > ### Aliases: partialPlot partialPlot.default partialPlot.randomForest > ### Keywords: classif regression tree > > ### ** Examples > > data(airquality) > airquality <- na.omit(airquality) > set.seed(131) > ozone.rf <- randomForest(Ozone ~ ., airquality) > partialPlot(ozone.rf, airquality, Temp) > > data(iris) > set.seed(543) > iris.rf <- randomForest(Species~., iris) > partialPlot(iris.rf, iris, Petal.Width, "versicolor") > > > > cleanEx(); ..nameEx <- "plot.randomForest" > > ### * plot.randomForest > > flush(stderr()); flush(stdout()) > > ### Name: plot.randomForest > ### Title: Plot method for randomForest objects > ### Aliases: plot.randomForest > ### Keywords: classif regression tree > > ### ** Examples > > data(mtcars) > plot(randomForest(mpg ~ ., mtcars, keep.forest=FALSE, ntree=100), log="y") > > > > cleanEx(); ..nameEx <- "predict.randomForest" > > ### * predict.randomForest > > flush(stderr()); flush(stdout()) > > ### Name: predict.randomForest > ### Title: predict method for random forest objects > ### Aliases: predict.randomForest > ### Keywords: classif regression > > ### ** Examples > > data(iris) > set.seed(111) > ind <- sample(2, nrow(iris), replace = TRUE, prob=c(0.8, 0.2)) > iris.rf <- randomForest(Species ~ ., data=iris[ind == 1,]) > iris.pred <- predict(iris.rf, iris[ind == 2,]) > table(observed = iris[ind==2, "Species"], predicted = iris.pred) predicted observed setosa versicolor virginica setosa 5 0 0 versicolor 0 8 2 virginica 0 1 14 > > > > cleanEx(); ..nameEx <- "randomForest" > > ### * randomForest > > flush(stderr()); flush(stdout()) > > ### Name: randomForest > ### Title: Classification and Regression with Random Forest > ### Aliases: randomForest randomForest.formula randomForest.default > ### print.randomForest > ### Keywords: classif regression tree > > ### ** Examples > > ## Classification: > ##data(iris) > set.seed(71) > iris.rf <- randomForest(Species ~ ., data=iris, importance=TRUE, + proximity=TRUE) > print(iris.rf) Call: randomForest(formula = Species ~ ., data = iris, importance = TRUE, proximity = TRUE) Type of random forest: classification Number of trees: 500 No. of variables tried at each split: 2 OOB estimate of error rate: 4% Confusion matrix: setosa versicolor virginica class.error setosa 50 0 0 0.00 versicolor 0 47 3 0.06 virginica 0 3 47 0.06 > ## Look at variable importance: > round(importance(iris.rf), 2) setosa versicolor virginica MeanDecreaseAccuracy MeanDecreaseGini Sepal.Length 1.45 1.89 1.91 1.35 9.88 Sepal.Width 1.12 0.74 1.07 0.82 2.52 Petal.Length 3.67 4.39 4.19 2.51 41.27 Petal.Width 3.83 4.49 4.30 2.52 45.58 > ## Do MDS on 1 - proximity: > iris.mds <- cmdscale(1 - iris.rf$proximity, eig=TRUE) > op <- par(pty="s") > pairs(cbind(iris[,1:4], iris.mds$points), cex=0.6, gap=0, + col=c("red", "green", "blue")[as.numeric(iris$Species)], + main="Iris Data: Predictors and MDS of Proximity Based on RandomForest") > par(op) > print(iris.mds$GOF) [1] 0.4810833 0.5046561 > > ## The `unsupervised' case: > set.seed(17) > iris.urf <- randomForest(iris[, -5]) > MDSplot(iris.urf, iris$Species) Loading required package: RColorBrewer > > ## Regression: > ## data(airquality) > set.seed(131) > ozone.rf <- randomForest(Ozone ~ ., data=airquality, mtry=3, + importance=TRUE, na.action=na.omit) > print(ozone.rf) Call: randomForest(formula = Ozone ~ ., data = airquality, mtry = 3, importance = TRUE, na.action = na.omit) Type of random forest: regression Number of trees: 500 No. of variables tried at each split: 3 Mean of squared residuals: 293.4674 % Var explained: 73.26 > ## Show "importance" of variables: higher value mean more important: > round(importance(ozone.rf), 2) %IncMSE IncNodePurity Solar.R 11.95 10603.58 Wind 23.84 45485.92 Temp 45.27 54250.22 Month 5.26 1934.22 Day 1.23 6756.16 > > > > graphics::par(get("par.postscript", env = .CheckExEnv)) > cleanEx(); ..nameEx <- "rfImpute" > > ### * rfImpute > > flush(stderr()); flush(stdout()) > > ### Name: rfImpute > ### Title: Missing Value Imputations by randomForest > ### Aliases: rfImpute rfImpute.formula rfImpute.default > ### Keywords: regression classif tree > > ### ** Examples > > data(iris) > iris.na <- iris > set.seed(111) > ## artificially drop some data values. > for (i in 1:4) iris.na[sample(150, sample(20)), i] <- NA > set.seed(222) > iris.imputed <- rfImpute(Species ~ ., iris.na) ntree OOB 1 2 3 300: 4.67% 0.00% 8.00% 6.00% ntree OOB 1 2 3 300: 5.33% 0.00% 8.00% 8.00% ntree OOB 1 2 3 300: 5.33% 0.00% 8.00% 8.00% ntree OOB 1 2 3 300: 6.00% 0.00% 8.00% 10.00% ntree OOB 1 2 3 300: 6.00% 0.00% 8.00% 10.00% > set.seed(333) > iris.rf <- randomForest(Species ~ ., iris.imputed) > print(iris.rf) Call: randomForest(formula = Species ~ ., data = iris.imputed) Type of random forest: classification Number of trees: 500 No. of variables tried at each split: 2 OOB estimate of error rate: 5.33% Confusion matrix: setosa versicolor virginica class.error setosa 50 0 0 0.00 versicolor 0 46 4 0.08 virginica 0 4 46 0.08 > > > > cleanEx(); ..nameEx <- "treesize" > > ### * treesize > > flush(stderr()); flush(stdout()) > > ### Name: treesize > ### Title: Size of trees in an ensemble > ### Aliases: treesize > ### Keywords: regression classif > > ### ** Examples > > data(iris) > iris.rf <- randomForest(Species ~ ., iris) > hist(treesize(iris.rf)) > > > > cleanEx(); ..nameEx <- "tuneRF" > > ### * tuneRF > > flush(stderr()); flush(stdout()) > > ### Name: tuneRF > ### Title: Tune randomForest for the optimal mtry parameter > ### Aliases: tuneRF > ### Keywords: classif tree > > ### ** Examples > > data(fgl, package="MASS") > fgl.res <- tuneRF(fgl[,-10], fgl[,10], stepFactor=1.5) mtry = 3 OOB error = 25.7% Searching left ... mtry = 2 OOB error = 25.7% 0 0.05 Searching right ... mtry = 4 OOB error = 24.3% 0.05454545 0.05 mtry = 6 OOB error = 24.3% 0 0.05 > > > > cleanEx(); ..nameEx <- "varImpPlot" > > ### * varImpPlot > > flush(stderr()); flush(stdout()) > > ### Name: varImpPlot > ### Title: Variable Importance Plot > ### Aliases: varImpPlot > ### Keywords: regression classif tree > > ### ** Examples > > set.seed(4543) > data(mtcars) > mtcars.rf <- randomForest(mpg ~ ., data=mtcars, ntree=1000, keep.forest=FALSE, + importance=TRUE) > varImpPlot(mtcars.rf) > > > > cleanEx(); ..nameEx <- "varUsed" > > ### * varUsed > > flush(stderr()); flush(stdout()) > > ### Name: varUsed > ### Title: Variables used in a random forest > ### Aliases: varUsed > ### Keywords: tree > > ### ** Examples > > data(iris) > set.seed(17) > varUsed(randomForest(Species~., iris, ntree=100)) [1] 171 113 265 254 > > > > ### *