.packageName <- "clusterfly"
# Clustefly
# This method creates a convenient data structure for dealing with a dataset and a number of alternative clusterings.
# 
# Once you have created a clusterfly object, you can add 
# clusterings to it with \code{\link{cfly_cluster}}, and
# visualise then in GGobi with \code{\link{cfly_show}} and
# \code{\link{cfly_animate}}.	 Static graphics are also
# available: \code{\link{cfly_pcp}} will produce a parallel
# coordinates plot, \code{\link{cfly_dist}} will show
# the distribution of each variable in each cluster, and
# \code{\link{cfly_fluct}} compares two clusterings with a
# fluctuation diagram.
# 
# If you want to standardise the cluster labelling to one
# group, look at \code{\link{clarify}} and \code{\link{cfly_clarify}}
# 
# @arguments data frame to be clustered
# @arguments rescale, if true each variable will be scaled to have mean 0 and variance 1.
# @seealso vignette("introduction")
# @alias package-clusterfly
# @keyword dynamic 
clusterfly <- function(df, rescale=TRUE) {
	if (rescale) df <- rescaler(df)

	g <- NULL
	getg <- function(cf, reset=FALSE) {
		if (reset && valid_ggobi(g)) close(g)
		if (!valid_ggobi(g)) g <<- ggobi(df)
		invisible(g)
	}

	structure(list(
		df = df, 
		clusters = list(), 
		ggobi = getg
	), class="clusterfly")
}


# Dynamic plot: show in ggobi
# Opens an instance ggobi for this dataset (if not already open), and colours the points according the cluster assignment.
# 
# @arguments clusterfly object
# @arguments clustering to display
# @arguments add convex hull? see \code{\link{addhull}} for details
# @keyword dynamic 
# cfly_show(o, 1)
# cfly_show(o, "kmeans")
cfly_show <- function(cf, idx = "true", hulls = FALSE) {
	g <- cf$ggobi()[1]
	cl <- cf$clusters[[idx]]
	glyph_colour(g) <- cl
	if (hulls) {
		addhull(g[1], g, cl)
		glyph_colour(g['hulls']) <- g['hulls']$id
	}
}

# Add clustering
# 
# @keyword internal 
"[[<-.clusterfly" <- function(x, i, value) {
	x$clusters[[i]] <- value
	x
}

# Convert clusterfly object to data.frame
# Concatenates data and cluster assignments into one data.frame.	Cluster assignments are prefixed with \code{cl_}
# 
# @arguments clusterfly object
# @arguments 
# @keyword manip 
as.data.frame.clusterfly <- function(x, ...) {
	cl <- as.data.frame(x$clusters)
	names(cl) <- paste("cl_", names(cl), sep="")
	cbind(x$df, cl)
}

# Clarify clusters
# Match all cluster indices to common reference
# 
# It's a good idea to run this before running any
# animation sequences so that unnecessary colour 
# changes are minimised.
# 
# @arguments clusterfly object
# @arguments method to use, see \code{\link{clarify}}
# @arguments index to reference clustering
# @keyword manip
#X o <- cfly_clarify(o, "Region")
cfly_clarify <- function(cf, reference=1, method="rowmax") {
	ref <- cf$clusters[[reference]]
	cf$clusters <- sapply(cf$cluster, function(x) clarify(x, ref, method=method), simplify=FALSE)
	cf
}

# Add clustering
# Add clustering 
# 
# Clustering method needs to respond to \code{\link{clusters}},
# if the default does not work, you will need to write
# your own to extract clusters.
# 
# @arguments clusterfly object
# @arguments clusterfing method (function)
# @arguments arguments passed to clustering method
# @arguments name of clustering
# @keyword manip 
#X cfly_cluster(o, kmeans, 4)
#X cfly_cluster(o, kmeans, 4, name="blah")
cfly_cluster <- function(cf, method, ..., name = deparse(substitute(method))) {
	cf[[name]] <- clusters(method(cf$df, ...))
	cf
}

# Print clusterfly object
# @keyword internal 
print.clusterfly <- function(x, ...) {
	cat("Data:		 ", paste(names(x$df), collapse=", "), "	[", nrow(x$df), "x", ncol(x$df), "]\n", sep="")
	cat("Clusters: ", paste(names(x$clusters), collapse=", "), "\n", sep="")	
}


# Dynamic plot: Animate glyph colours
# Animate glyph colours according to provided cluster ids.
# 
# This function will animiate endless until you manually break the loop
# using Ctrl-Break or Ctrl-C.	 
# 
# @arguments list of cluster ids that you want to animate across
# @arguments number of seconds to pause between each change
# @keyword dynamic 
#X # Press Ctrl-Break or Ctrl-C to exit
#X \dontrun{cfly_animate(cfly_clarify(o))}
cfly_animate <- function(cf, i = 1:length(cf$clusters), pause = 1) {
	g <- cf$ggobi()
	gd <- g[1]
	colours <- cf$clusters[i]
	while(TRUE) {
		for(col in colours) {
			if (!valid_ggobi(g)) return()
			glyph_colour(gd) <- col
			Sys.sleep(pause)
		}
	}
}

# Create multivariate ellipse
# Randomly sample points from a probability contour of a multivariate normal
# 
# There are two ways to use this function.	You can either supply
# a data set for which a multivariate normal ellipse will be drawn
# or you can supply the mean vector, covariance matrix and number
# of dimensions yourself. 
#
# @arguments data frame or matrix
# @arguments number of points to sample
# @arguments proportion of density contained within ellipse
# @arguments mean vector
# @arguments variance-covariance matrix
# @arguments degrees of freedom used for calculating F statistic
# @keyword internal
ellipse <- function(data, npoints=1000, cl=0.95, mean=colMeans(data), cov=var(data), df=nrow(data)) 
{
	norm.vec <- function(x) x / sqrt(sum(x^2))

	p <- length(mean)
	ev <- eigen(cov)

	sphere <- matrix(rnorm(npoints*p), ncol=p)
	cntr <- t(apply(sphere, 1, norm.vec))

	cntr <- cntr %*% diag(sqrt(ev$values)) %*% t(ev$vectors)
	cntr <- cntr * sqrt(p * (df-1) * qf(cl, p, df-p) / (df * (df-p)))
	if (!missing(data)) colnames(cntr) <- colnames(data)

	cntr + rep(mean, each=npoints) 
}
# Get clusters
# Extract clusters from clustering object
# 
# @arguments object
# @keyword internal
# @alias clusters.kmeans
# @alias clusters.default 
clusters <- function(x) UseMethod("clusters", x)
clusters.kmeans <- function(x) as.vector(x$cluster)
clusters.default <- function(x) as.vector(x)
# Static plot: Parallel coordinates
# Draw a parallel coordinates plot, facetted across clustering.
# 
# This really only a proof of concept, a truly useful PCP
# needs interaction, especially to move the variables around.
# 
# @arguments clusterfly object
# @arguments clustering to use
# @arguments other arguments passed to \code{\link[ggplot2]{geom_line}}
# @keyword hplot
# cfly_pcp(o, "kmeans") 
cfly_pcp <- function(cfly, index, ...) {
	df <- cbind(cfly$df, .cluster=cfly$clusters[[index]])
	ggpcp(df, vars=setdiff(names(df), ".cluster")) + geom_line(...) + facet_grid(. ~ .cluster)
}

# Static plot: Variable distribution
# Draw a density plot for each continuous variable, facetted across clustering.
# 
# This allows you to quickly visualise how the cluster
# vary in a univariate manner.  Currently, it is a bit
# of a hack, because \code{\link[ggplot]{ggplot}} does
# not support plots with different scales, so the variables
# are manually rescaled prior to plotting.
# 
# This plot is inspired by Gaguin \url{http://www.rosuda.org/gaguin}
# 
# @arguments clusterfly object
# @arguments clustering to use
# @arguments scaling to use
# @keyword hplot
#X cfly_dist(o, "kmeans")
#X cfly_dist(o, "kmeans") + scale_y_continuous(limit=c(0, 2))
cfly_dist <- function(cfly, index, scale="range") {
	df <- cbind(cfly$df, .cluster=factor(cfly$clusters[[index]]))
	dfm <- melt(rescaler(df, scale), id=".cluster")
	
	ggplot(dfm, aes(x=value)) + geom_density() + facet_grid(.cluster ~ variable)
}

# Static plot: Fluctuation diagram
# Draw a fluctuation diagram comparing two clusterings
# 
# @arguments clusterfly object
# @arguments first clustering, will be reordered to match \code{b} if clarify=TRUE
# @arguments second clustering
# @arguments use \code{\link{clarify}} to rearranged cluster indices?
# @keyword hplot
#X cfly_fluct(o, "kmeans","hierarchical", clarify=TRUE) 
cfly_fluct <- function(cfly, a, b, clarify=TRUE, ...) {
	ca <- cfly$clusters[[a]]
	cb <- cfly$clusters[[b]]
	
	if (clarify) ca <- clarify(ca, cb)
	
	p <- ggfluctuation(table(ca,cb), ...)
	p$aspect.ratio <- 1
	p + scale_y_discrete(a) + scale_x_discrete(b)
}
# Need a new type of linking to make this work
# Brushing a node should highlight all nodes and leaves below it
# (investigate nested set representation for efficient storage)
# 
# Can this be done using rggobi and the old linking code?
# Should it be added to ggobi as a new type of linking?
#		* If so, as the general case - defined by edges?
#		* Or for the particular nested set representation?


# Hierfly, a method for visualisation hierarchical clustering.
# This method supplements a data set with information needed to draw a dendrogram
# 
# Intermediate cluster nodes are added as needed, and positioned at the 
# centroid of the combined clusters.
# 
# @arguments data set
# @arguments distance metric to use, see \code{\link{dist}} for list of possibilities
# @arguments cluster distance measure to use, see \code{\link{hclust}} for details
# @returns object of type, hierfly
# @seealso \code{\link{cut.hierfly}}, \code{\link{ggobi.hierfly}}
# @keyword cluster
#X h <- hierfly(iris)
#X ggobi(h)
#X h <- hierfly(iris, method="single")
hierfly <- function(data, metric="euclidean", method="average") {
	cat <- sapply(data, is.factor)
	h <- hclust(dist(data[,!cat], metric), method)

	data$ORDER <- order(h$order)
	data$HEIGHT <- 0
	data$LEVEL <- 0
	data$POINTS <- 1

	for (i in 1:nrow(h$merge)) {
		newr <- combinerows(data[as.character(-h$merge[i,]),], cat)
		newr$HEIGHT <- h$height[i]
		newr$LEVEL <- i
		rownames(newr) <- as.character(-i)

		data <- rbind(data, newr)
	}

	data$node <- (as.numeric(rownames(data)) < 0) + 0
	
	structure(list(data=data, hclust=h), class="hierfly")
}

# Combine multiple rows in cluster hierarchy to make intermediate node
# @keyword internal
combinerows <- function(df, cat) {
	same <- function(x) if (length(unique(x)) == 1) x[1] else NA
	points <- df$POINTS
	
	cont <- as.data.frame(lapply(df[, !cat, drop=FALSE] * points, sum)) / sum(points)
	cat <- as.data.frame(lapply(df[, cat, drop=FALSE], same))
	
	df <- if (nrow(cont) > 0 && nrow(cat) > 0) {
		cbind(cont, cat)
	} else if (nrow(cont) > 0) {
		cont
	} else {
		cat
	}
	df$POINTS <- sum(points)
	df
}


# Method to print hierfly objects
# @keyword internal 
print.hierfly <- function(x, ...) {
	print(str(x))
}

# Visualise hierarchical clustering with GGobi
# Displays both data, and dendrogram in original high-d space
# 
# This adds four new variables to the original data set:
# 
# \itemize{
#		\item ORDER, the order in which the clusters are joined
#		\item HEIGHT, the height of the branch, ie. the dissimilarity between the branches
#		\item LEVEL, the level of the branch
#		\item POINTS, the number of points in the branch
# }
#
# Make sure to select "attach edge set (edges)" in the in the edges menu on the 
# plot window, when you create a new plot.
# 
# A tour over the original variables will show how the clusters agglomerate
# in space.	 Plotting order vs height, level or points will give various
# types of dendograms.	A correlation tour with height/level/points on the y 
# axis and the original variables on the x axis will show a mobile blowing 
# in the wind.
# 
# @arguments hierfly object to visualise in GGobi
# @seealso \code{\link{cut.hierfly}}
# @keyword cluster
# @keyword dynamic
#X h <- hierfly(iris)
#X ggobi(h)
#X h <- hierfly(iris, method="single")
ggobi.hierfly <- function(data, ...) {
	h <- data$hclust
	data <- data$data
	
	g <- ggobi(data)
	d <- g[1]
	glyph_type(d) <- ifelse(data$node != 0, 1, 6)

	e <- data.frame(level=1:length(h$height), height=h$height)[rep(1:length(h$height), 2), ]
	rownames(e) <- paste("e", 1:nrow(e), sep="")

	g$edges <- e
	edges(g$edges) <- cbind(as.character(-h$merge), -rep(1:nrow(h$merge), 2))

	d <- displays(g)[[1]]
	edges(d) <- g[2]
	
	invisible(g)
}

# Colour hierfly object
# Colour hierfly object into k clusters
# 
# @arguments hierfly object to colour
# @arguments number of clusters
# @arguments GGobi instance displaying x, will create new if not specified
# @keyword cluster
#X h <- hierfly(iris)
#X hfly <- ggobi(h)
#X cut(h, 2, hfly)
#X h <- hierfly(iris, method="ward")
#X g <- ggobi(h)
#X cut(h, 2, g)
cut.hierfly <- function(x, k=2, g=ggobi(x), ...) {
	d <- g[1]
	glyph_colour(d) <- c(cutree(x$hclust, k=k) + 1, rep(1, length(x$hclust$height)))
}
# Add convex hulls
# Add conver hulls using the tool qconvex
# 
# To use this command you must have qconvex installed and available
# on your path.	 I'm not sure if this will work on windows (probably not)
# but it's not a big loss, because the technique isn't very useful
# anyway.
# 
# @arguments ggobi dataset
# @arguments ggobi reference
# @arguments grouping variable
# @keyword hplot 
addhull <- function(gd, g, by) {
	mat <- as.data.frame(gd)
	rownames(mat) <- rownames(gd)
	by <- rep(by, length=nrow(gd))
	
	edges <- tapply(1:length(by), by, function(i) {
		qh <- qhull(as.data.frame(mat)[i,])
		if(is.null(qh)) return()
		cbind(qh, by[i[1]])
	})
	edges <- do.call(rbind, compact(edges))
		
	g['hulls'] <- data.frame(id=as.numeric(edges[,3]))
	setEdges(g['hulls'], edges[,1], edges[,2])
}

# qhull
# Generate convex hulls for a matrix
# 
# @keyword internal 
qhull <- function(mat) {
	if (nrow(mat) < 5) return()
	#if (is.null(rownames(mat))) rownames(mat) <- 1:nrow(mat)
	
	output <- system(paste("echo '", qhullout(mat), "' | qconvex QbB i"), TRUE)[-1]
	facets <- do.call(rbind, lapply(strsplit(output, " "), function(x) as.numeric(sort(x)))) + 1
	
	combs <- subset(expand.grid(i = 1:ncol(facets), j = 1:ncol(facets)), i < j)
	edges <- unique(do.call(rbind, 
		lapply(1:nrow(combs), function(x) facets[, unlist(combs[x,])])
	))

	t(apply(edges, 1, function(x) rownames(mat)[x]))
	#edges
}

# qhullout
# Generate output data to calculate convex hulls
# 
# @keyword internal 
qhullout <- function(mat) {
	paste(
		ncol(mat), "\n",
		nrow(mat), "\n",
		paste(apply(mat, 1, paste, collapse=" "), collapse="\n"),
		sep=""
	)
	
}

# Display model based clustering with mvn ellipses
# Displays the results of model based clustering with an ellipse drawn from the multivariate normal model for each group.
# 
# @arguments output from me function
# @arguments input data frame to me
# @keyword cluster
# @keyword dynamic
#X if(require("mclust")) {
#X eei <- me(modelName = "EEI", data = iris[,-5], z = unmap(iris[,5]))
#X vvv <- me(modelName = "VVV", data = iris[,-5], z = unmap(iris[,5]))
#X vvi <- me(modelName = "VVI", data = iris[,-5], z = unmap(iris[,5]))
#X mefly(eei, iris[,-5])
#X mefly(vvi, iris[,-5])
#X mefly(vvv, iris[,-5])
#X }
mefly <- function(model, data) {
	mean <- model$parameters$mean
	var <- model$parameters$variance$sigma

	ellipses <- do.call("rbind", lapply(1:ncol(mean), function(i) { 
		data.frame(ellipse(mean = mean[,i], cov = var[,, i], df=10), cluster=i)
	}))
	colnames(ellipses) <- c(colnames(data), "cluster")
	ellipses$TYPE <- factor("ellipse")
	data$TYPE <- factor("data")

	all <- rbind.fill(ellipses, cbind(data, cluster=max.col(model$z)))

	g <- ggobi(all)
	glyph_type(g[1]) <- c(1,6)[all$TYPE]
	glyph_colour(g[1]) <- all$cluster
	invisible(g)
}
# Visualise Kohonen self organising maps with GGobi
# Displays both data, and map in original high-d space.
# 
# Map variables added as map1 and map2.  Plot these to 
# get traditional SOM plot.  Tour over all other variables to
# see how well the map fits the original data.
# 
# @arguments SOM object
# @keyword cluster
# @keyword dynamic
#X \dontrun{
#X d.music <- read.csv("http://www.ggobi.org/book/data/music-all.csv")
#X 
#X music <- rescaler(d.music)[complete.cases(d.music), 1:10]
#X music.som <- som(music[,-(1:3)], 6, 6, neigh="bubble", rlen=1000)
#X ggobi(music.som)
#X }
ggobi.som <- function(data, ...) {
	som <- data
	original <- data.frame(
		som$data, 
		map1 = jitter(som$visual$x) + 1, 
		map2 = jitter(som$visual$y) + 1, 
		net=factor(FALSE)
	)

	xs <- som$xdim
	ys <- som$ydim

	net <- som$code
	colnames(net) <- colnames(som$data)
	net <- cbind(net, expand.grid(map1=1:xs, map2=1:ys), net=factor(TRUE))
	rownames(net) <- paste("net", 1:nrow(net), sep="")

	df <- rbind(original, net)

	g <- ggobi(df)
	glyph_colour(g[1]) <- c(1,3)[df$net]
	d <- displays(g)[[1]]
	variables(d) <- list(X = "map1", Y = "map2")

	# Add net edges
	netlines <- make_net(xs, ys)
	edges(g) <- netlines
	edges(d) <- g[2]
	
	invisible(g)
}

# Make edge structure for SOM net
# @keyword internal
make_net <- function(xs, ys) {
	netlines <- with(expand.grid(y=1:(xs-1), x=1:(ys)), rbind(
		cbind((x - 1) * xs + y, (x - 1)		 * xs + y + 1),
		cbind((x - 1) * xs + y,	 x				 * xs + y)
	))
	netlines <- rbind(netlines, cbind(1:(ys-1) * xs, 2:ys * xs))
	netlines <- apply(netlines, 2, function(x) paste("net", x, sep=""))
	netlines
}

# Animate by setting rlen = something small
# and calling som.update repeatedly
# ---- doesn't work, som.update doesn't seem to do anything
# ggobi_som_update <- function(som, g) {
# 
#		# Update clustering
#		old <- g[1][!as.log
# ical(g[1]$net), c("map1", "map2")]
#	 jitter <- old - round(old)
#		
#		g[1][!as.logical(g[1]$net), c("map1", "map2")] <- cbind(som$visual$x, som$visual$y) + 1	 + jitter
#		
#		g[1][as.logical(g[1]$net), setdiff(names(g[1]), c("map1", "map2", "net"))] <- som$code
#		# Update cluster centres
#		
# }
# 
# 
# music.som <- som(music[,-(1:3)], 5, 7, neigh="bubble", rlen=100)
# g <- ggobi_som(music.som)
# 
# music.som <- som.update(music.som, rlen=50)
# ggobi_som_update(music.som, g)
# Hierachical clustering
# Convenient methods for hierachical clustering
# 
# @arguments data frame
# @arguments method to use, see \code{\link{hclust}}
# @arguments distance metric to use, see \code{\link{dist}}
# @arguments number of clusters to retrieve, see \code{\link{cut}}
# @keyword cluster
hierarchical <- function(df, method="complete", metric="euclidean", n=5) {
	if (metric == 'correlation') {
		df <- scale(as.matrix(df))
		metric <- "euclidean"
	}
	as.vector(cutree(hclust(dist(df, metric), method=method), n))
}

# Clarify matrix
# Clarify matrix ordering to minimize off diagonals
# 
# @arguments cluster assignments to reassign 
# @arguments matrix b
# @value vector of reassigned cluster a
# @keyword manip 
# @seealso \code{\link[e1071]{matchClasses}}
clarify <- function(a, b, method="greedy") {
	m <- matchClasses(table(a,b), method=method, verbose=FALSE)
	as.vector(m[a])
}
# Xtable.table
# Fix lacking xtable table output
# 
# @keyword internal 
xtable.table <- function(x, ...) {
	class(x) <- "matrix"
	xtable.matrix(x, ...)
}
