.packageName <- "dglm"
dglm <- function(formula = formula(data),
		dformula = ~1,
		family = gaussian,
		dlink = "log",
		data = sys.parent(),
		subset = NULL,
		weights = NULL,
		contrasts = NULL,
		method = "ml",
		mustart = NULL,
		betastart = NULL,
		phistart = NULL,
		control = dglm.control(...),
		ykeep = TRUE,
		xkeep = FALSE,
		zkeep = FALSE,
		...)
#
#	Double generalized linear models
#	Gordon Smyth, Walter and Eliza Hall Institute of Medical Research
#	S-Plus version created 8 Dec 1997, last revised 22 Oct 1999.
#
#	Ported to R by Peter Dunn, 22 August 2005
#
{
#  Set up mean submodel: 
#               y   response
#               X   design matrix
#               w   prior weights
#          offset   offset in linear predictor
#          family   response family
#         mustart   starting values for mu (optional)
#       betastart   starting values for coefficients (optional)
#
#	Save call for future reference
	call <- match.call()

#	Get family for mean model
	if (is.character(family))
		family <- get(family, mode = "function", envir = parent.frame())
	if (is.function(family))
        family <- family()
    if (is.null(family$family)) {
        print(family)
        stop("'family' not recognized")
    }
#
#	Evaluate the model frame
	mnames <- c("", "formula", "data", "weights", "subset")
	cnames <- names(call)

	cnames <- cnames[match(mnames,cnames,0)]
	mcall <- call[cnames]
	mcall[[1]] <- as.name("model.frame")
	mframe <- eval(mcall, sys.parent())
	mf <- match.call(expand.dots=FALSE)

#	Now extract the glm components
	y <- model.response(mframe, "numeric")
	
	if(is.null(dim(y))) {
		N <- length(y) 
	} else {
		N <- dim(y)[1]
	}
	nobs <- N # Needed for some of the  eval  calls
	mterms <- attr(mframe, "terms")
	X <- model.matrix(mterms, mframe, contrasts)
	
	weights <- model.weights(mframe)
	if(is.null(weights)) weights <- rep(1,N)
	if ( is.null(weights) ) weights <- rep(1, N )
   if (!is.null(weights) && any(weights < 0)) {
        stop("negative weights not allowed")
	}
	
	offset <- model.offset(mframe)
	if ( is.null(offset) ) offset <- rep(0, N )
	if (!is.null(offset) && length(offset) != NROW(y)) {
        stop(gettextf("number of offsets is %d should equal %d (number of observations)", 
            length(offset), NROW(y)), domain = NA)
	}

#
#  Set up dispersion submodel:
#               Z   design matrix 
#         doffset   offset in linear predictor
#         dfamily   family for unit deviances
#        phistart   starting values for phi (optional)
#
#	Setup dformula with y on left hand side
#	(to ensure that mean and dispersion submodels have same length)
	mcall$formula <- formula
	mcall$formula[3] <- switch(match(length(dformula),c(0,2,3)),
		1,dformula[2],dformula[3])
#
#	Evaluate the model frame and extract components
	mframe <- eval(mcall, sys.parent())
	dterms <- attr(mframe, "terms")
	Z <- model.matrix(dterms, mframe, contrasts)
	doffset <- model.extract(mframe, offset)
	if ( is.null(doffset) ) doffset <- rep(0, N )
#
#	Parse the dispersion link and evaluate as list
	name.dlink <- substitute(dlink)
	if(is.name(name.dlink)) {
		if(is.character(dlink)) { # link is character variable
			name.dlink <- dlink
		} else {                  # link is name without quotes
			dlink <- name.dlink <- as.character(name.dlink)
		}
	} else {
		if(is.call(name.dlink))  # power link
			name.dlink <- deparse(name.dlink)
	}
	if(!is.null(dlink$name))   name.dlink <- dlink$name
#
#	Construct the dispersion variance function
	if ( family$family=="Tweedie") {
		tweedie.p <- call$family$var.power
	}

	Digamma <- family$family=="Gamma" || (family$family=="Tweedie" && tweedie.p==2)

	if (Digamma) {

		linkinv <- make.link(name.dlink)$linkinv
		linkfun <- make.link(name.dlink)$linkfun
		mu.eta <- make.link(name.dlink)$mu.eta
		valid.eta <- make.link(name.dlink)$valid.eta
		init <- expression({
			if (any(y <= 0)){
				print(y)
				print( any(y<=0) )
	        stop("non-positive values not allowed for the DM gamma family")
			  }
				n <- rep.int(1, nobs)
				mustart <- y
				})

		
		dfamily <- structure(list(family="Digamma", variance=varfun.digamma, 
				dev.resids=function(y,mu,wt){wt * unitdeviance.digamma(y,mu)}, # gaussian()$dev.resids are resids^2
				aic=function(y, n, mu, wt, dev) NA,  
				link=name.dlink, linkfun=linkfun, linkinv=linkinv, 
				mu.eta=mu.eta, initialize=init, 
				validmu=function(mu) { all(mu>0) }, valideta=valid.eta) )

	} else { # Not digamma family
		eval(substitute(dfamily <- Gamma(link=lk), list(lk=name.dlink ) ))
	}
#
#	Remember if log-link for use with initial values
#
	dlink <- as.character(dfamily$link)
	logdlink <- dlink=="log"
#
#  Match method (ml or reml)
#
	if(!is.null(call$method)) {
		name.method <- substitute(method)
		if(!is.character(name.method))
			name.method <- deparse(name.method)
		list.methods <- c("ml","reml","ML","REML","Ml","Reml")
		i.method <- pmatch(method,list.methods,nomatch=0)
		if(!i.method) stop("Method must be ml or reml")
		method <- switch(i.method,"ml","reml","ml","reml","ml","reml")
	}
	reml <- method=="reml"
#
#  Starting values.  If explicit starting values are not supplied,
#  regress link(y) on X and dlink(d) on Z by ordinary least squares.
# 

	if ( is.null(mustart) ) {
		eval(family$initialize)  
		mu <- mustart
		mustart <- NULL
	}

	if(!is.null(betastart)) {
		eta <- X %*% betastart
		mu <- family$linkinv(eta+offset)
	} else {
		if(!is.null(mustart)) {
			mu <- mustart
			eta <- family$linkfun(mu)-offset
		} else {

			eta <- lm.fit(X,family$linkfun(mu)-offset,singular.ok=TRUE)$fitted.values
			# Recall:  fitted values are on the linear predictor scale
			mu <- family$linkinv(eta+offset)
		}
	}

#	Careful:   Called dev.resid, but appear to be the deviance residuals squared
	d <- ( family$dev.resids(y, mu, weights) )

 	if(!is.null(phistart)) {
 		phi <- phistart
 		deta <- dfamily$linkfun(phi)-doffset
	} else {
 		deta <- lm.fit(Z,dfamily$linkfun(d+(d==0)/6)-doffset,singular.ok=TRUE)$fitted.values
 		if(logdlink) deta <- deta + 1.27036
 		phi <- dfamily$linkinv(deta+offset)
 	}

	zm <- eta + (y - mu) / family$mu.eta(eta)
	wm <- eval(family$variance(mu))*weights/phi

	mfit <- lm.wfit(X,zm,wm,method="qr",singular.ok=TRUE) # ,qr=reml)
	eta <- mfit$fitted.values
	mu <- family$linkinv(eta+offset)
	d <- family$dev.resids(y, mu, weights)

#  Initial (minus twice log) likelihood or adjusted profile likelihood
	const <- dglm.constant(y,family,weights)
	if(Digamma) {
		h <- 2*(lgamma(weights/phi)+(1+log(phi/weights))*weights/phi)
	} else {
		h <- log(phi/weights)
	}
	m2loglik <- const + sum(h + d/phi)

	if(reml)
		m2loglik <- m2loglik + 2*log(abs(prod(diag(mfit$R))))
		
	m2loglikold <- m2loglik+1

#  Estimate model by alternate iterations

	epsilon <- control$epsilon
	maxit <- control$maxit
	trace <- control$trace
	iter <- 0

	while ( abs(m2loglikold-m2loglik)/(abs(m2loglikold)+1) > epsilon && iter < maxit )
	{
################################
#		dispersion submodel
		hdot <- 1/dfamily$mu.eta( deta )
		
		if(Digamma) {
			delta <- 2*weights*( log(weights/phi) - digamma(weights/phi) )
			u <- 2*weights^2*( trigamma(weights/phi) - phi/weights)
			fdot <- phi^2 / u * hdot  # p 50
		} else { # In normal and iG cases, eg, the dispersion sub-model is gamma
 			delta <- phi
			u <- phi^2 # variance function for disp. model; u(delta)=delta^2 is gamma
			fdot <- hdot
		}
		wd <- 1 / (fdot^2 * u)	# ie Smyth, p 50.  We don't include the factor of 2,
										# as that is the disp. parameter, which enters via
										# the  dispersion=2  argument for the summary.

		if(reml) {
			h <- hat(mfit$qr)
			delta <- delta - phi*h
			wd <- wd - 2*(h/hdot^2/phi^2) + h^2
		}
		if(any(wd<0)) {
			wd[wd<0] <- 0
		}
		zd <- deta + (d - delta) * fdot

		dfit <- lm.wfit(Z, zd, wd, method="qr", singular.ok=TRUE)
		deta <- dfit$fitted.values
	
		phi <- dfamily$linkinv(deta+doffset)

################################
#		mean submodel
		zm <- eta + (y - mu) / family$mu.eta(eta)
		fam.wt <- expression( weights * family$variance(mu) ) 
		wm <- eval( fam.wt )/phi
		mfit <- lm.wfit(X,zm,wm,method="qr",singular.ok=TRUE)
		eta <- mfit$fitted.values
		mu <- family$linkinv(eta+offset)
		d <- family$dev.resids(y, mu, weights)

#		overall likelihood
		m2loglikold <- m2loglik
		if(Digamma) {
			h <- 2*(lgamma(weights/phi)+(1+log(phi/weights))*weights/phi)
		} else {
			h <- log(phi/weights)
		}
		m2loglik <- const + sum(h + d/phi)
		if(reml) {
			m2loglik <- m2loglik + 2*log(abs(prod(diag(mfit$R))))
		}
		iter <- iter+1
		if(trace)
			cat("DGLM iteration ", iter, ": -2*log-likelihood = ",
			format(round(m2loglik, 4)), " \n", sep = "")
	}

#
#  Output for mean model:
#  Exactly as for glm.object.  As for lm.object except that
#  linear.predictors and prior.weights are new components, and fitted.values
#  has a new definition.
#
	mfit$formula <- call$formula
	mfit$call <- call
	mfit$family <- family

	mfit$linear.predictors <- mfit$fitted.values+offset
	mfit$fitted.values <- mu
	mfit$prior.weights <- weights
	mfit$terms <- mterms
	mfit$contrasts <- attr(X, "contrasts")
	intercept <- attr(mterms, "intercept")
	mfit$df.null <- N - sum(weights == 0) - as.integer(intercept)
	mfit$call <- call
	mfit$deviance <- sum(d/phi)
	mfit$aic <- NA
	mfit$null.deviance <- glm.fit(x=X, y=y, weights=weights/phi, offset=offset, family=family)
	if(length(mfit$null.deviance)>1) mfit$null.deviance <- mfit$null.deviance$null.deviance
	if(ykeep) mfit$y <- y
	if(xkeep) mfit$x <- X
	class(mfit) <- c("glm","lm")

#
#  Output for dispersion model:
#  As for glm.object except that prior.weights are not relevant.  Is
#  nested in one output component.
#
	dfit$family <- dfamily
	dfit$prior.weights <- rep(1, N)
	dfit$linear.predictors <- dfit$fitted.values+doffset
	dfit$fitted.values <- phi
	dfit$terms <- dterms
	dfit$aic <- NA
	call$formula <- call$dformula
	call$dformula <- NULL
	call$family <- call(dfamily$family,link=name.dlink)
	dfit$call <- call
	dfit$residuals <- dfamily$dev.resid(d, phi, wt=rep(1/2,N) )
	dfit$deviance <- sum( dfit$residuals  )
	dfit$null.deviance <- glm.fit(x=Z, y=d, weights=rep(1/2,N), offset=doffset, family=dfamily)
	if(length(dfit$null.deviance)>1) dfit$null.deviance <- dfit$null.deviance$null.deviance
	if(ykeep) dfit$y <- d
	if(zkeep) dfit$z <- Z
	dfit$formula <- as.vector(attr(dterms, "formula"))
	dfit$iter <- iter
	class(dfit) <- c("glm","lm")
	out <- c(mfit, list(dispersion.fit = dfit, iter=iter, method=method, m2loglik=m2loglik))
	class(out) <- c("dglm","glm","lm")
	out
}














######################################################################################
dglm.control <- function(epsilon = 1e-007, maxit = 50, trace = FALSE, ...)
{
#  Control iteration for dglm
#  As for glm.control but with different defaults
#  GKS  9 Jan 98
#
	if(epsilon <= 0) {
		warning("the value of epsilon supplied is zero or negative;
the default value of 1e-7 was used instead")
		epsilon <- 1e-007}
	if(maxit < 1) {
		warning("the value of maxit supplied is zero or negative;
the default value of 50 was used instead")
		maxit <- 50}
	list(epsilon = epsilon, maxit = maxit, trace = as.logical(trace)[1])
}






dglm.constant <- function(y,family,weights=1)
{
#  Constant term appearing in glm log-likelihood
#  Used by dglm
#  GKS  6 Jan 98, 4 Jul 98, 23 Sep 99.
#
#  Exact cases (in binomial case, exact for phi near 1)
#

	if ( family$family=="Tweedie"){ 
		tweedie.p <- get("tweedie.p", envir = parent.frame() )
	}
	
	const <- switch(family$family[1],
		"Gaussian" = length(y)*log(2*pi),
		"Poisson" = 2*sum(y-y*ifelse(y>0,log(y),0)+lgamma(y+1)),
		"Gamma" = 2*sum(log(y)),
		"Inverse Gaussian" = sum(log(2*pi*y^3)),
		"Tweedie" = switch(match( tweedie.p,c(0,1,2,3),nomatch=0),
			length(y)*log(2*pi),
			2*sum(y-y*ifelse(y>0,log(y),0)+lgamma(y+1)),
			2*sum(log(y)),
			sum(log(2*pi*y^3)) ),
		"Binomial" = -2*sum(lgamma(weights+1)-lgamma(weights*y+1)-
		lgamma(weights*(1-y)+1)+weights*(y*ifelse(y>0,log(y),0)+
		(1-y)*ifelse(1-y>0,log(1-y),0)))+sum(log(weights))
	)
#
#  Saddle-point approximation
#
	if(is.null(const)) {
		V <- family$variance(y)
		if(any(V==0)) V[V==0] <- family$variance(y[V==0]+1/6)
		const <- sum(log(2*pi*V))
		if(length(V)==1 && length(y)>1) const <- length(y)*const
	}
	const
}





summary.dglm <- function(object, dispersion=NULL, correlation = FALSE, ...)
{
#  Summarize a double glm
#  GKS  7 Jan 98
	sm <- summary.glm(object,dispersion=dispersion,correlation=correlation, ...)
	sd <- summary.glm(object$dispersion.fit,dispersion=2,correlation=correlation, ...)
	ans <- structure( c(sm, list(dispersion.summary=sd, outer.iter=object$iter,
		m2loglik=object$m2loglik)), class=c("summary.dglm","summary.glm"))
		
	class(ans) <- "summary.dglm"
   return(ans)
}






print.summary.dglm <- function(x, ..., digits = NULL, quote = TRUE, prefix = "", residuals = FALSE)
{
#  Print summary of double glm
#  GKS  7 Jan 98
#
	xd <- x$dispersion.summary
	x$dispersion.summary <- NULL
	if(is.null(digits))
		digits <- options()$digits
	else {
		old.digits <- options(digits = digits)
		on.exit(options(old.digits))
	}
	cat("\nCall: ")
	print(x$call)
#
#  Mean submodel
#
#	cat("\nMEAN MODEL")
	nas <- x$nas
	coef <- x$coef
	correl <- x$correl
	if(any(nas)) {
		nc <- length(nas)
		cnames <- names(nas)
		coef1 <- array(NA, c(nc, 3), list(cnames, dimnames(coef)[[2]]))
		coef1[!nas,  ] <- coef
		coef <- coef1
		if(!is.null(correl)) {
			correl1 <- matrix(NA, nc, nc, dimnames = list(cnames, cnames)
				)
			correl1[!nas, !nas] <- correl
			correl <- correl1
		}
	}
	dresid <- x$deviance.resid
	n <- length(dresid)
	rdf <- x$df[2]
	if(residuals) {
		if(rdf > 5) {
			cat("Deviance Residuals:\n")
			rq <- quantile(as.vector(dresid))
			names(rq) <- c("Min", "1Q", "Median", "3Q", "Max")
			print(rq, digits = digits)
		}
		else if(rdf > 0) {
			cat("Deviance Residuals:\n")
			print(dresid, digits = digits)
		}
	}
	if(any(nas))
		cat("\nMean Coefficients: (", sum(nas), 
			" not defined because of singularities)\n", sep = "")
	else cat("\nMean Coefficients:\n")
	print(coef, digits = digits)

	cat(paste("(Dispersion Parameters for", x$family$family, 
		"family estimated as below", ")\n"))
	int <- attr(x$terms, "intercept")
	if(is.null(int))
		int <- 1
	cat("\n    Scaled Null Deviance:", format(round(x$null.deviance, digits)), "on", n - 
		int, "degrees of freedom\n")
	cat("Scaled Residual Deviance:", format(round(x$deviance, digits)), "on", round(
		rdf, digits), "degrees of freedom\n")
#       cat("\nNumber of Fisher Scoring Iterations:", format(trunc(x$iter)), "\n")
	if(!is.null(correl)) {
		p <- dim(correl)[2]
		if(p > 1) {
			cat("\nCorrelation of Coefficients:\n")
			ll <- lower.tri(correl)
			correl[ll] <- format(round(correl[ll], digits))
			correl[!ll] <- ""
			print(correl[-1,  - p, drop = F], quote = F, digits = digits)
		}
	}
#
#  Dispersion submodel
#
	nas <- xd$nas
	coef <- xd$coef
	correl <- xd$correl
	if(any(nas)) {
		nc <- length(nas)
		cnames <- names(nas)
		coef1 <- array(NA, c(nc, 3), list(cnames, dimnames(coef)[[2]]))
		coef1[!nas,  ] <- coef
		coef <- coef1
		if(!is.null(correl)) {
			correl1 <- matrix(NA, nc, nc, dimnames = list(cnames, cnames)
				)
			correl1[!nas, !nas] <- correl
			correl <- correl1
		}
	}
	dresid <- xd$deviance.resid
	n <- length(dresid)
	rdf <- xd$df[2]
	if(residuals) {
		if(rdf > 5) {
			cat("Deviance Residuals:\n")
			rq <- quantile(as.vector(dresid))
			names(rq) <- c("Min", "1Q", "Median", "3Q", "Max")
			print(rq, digits = digits)
		}
		else if(rdf > 0) {
			cat("Deviance Residuals:\n")
			print(dresid, digits = digits)
		}
	}
	if(any(nas))
		cat("\nDispersion Coefficients: (", sum(nas), 
			" not defined because of singularities)\n", sep = "")
	else cat("\nDispersion Coefficients:\n")
	print(coef, digits = digits)
	cat(paste("(Dispersion parameter for", xd$family$family, 
		"family taken to be", format(round(xd$dispersion, digits)), ")\n"))
	int <- attr(xd$terms, "intercept")
	if(is.null(int))
		int <- 1
	cat("\n    Scaled Null Deviance:", format(round(xd$null.deviance, digits)), "on", n - 
		int, "degrees of freedom\n")
	cat("Scaled Residual Deviance:", format(round(xd$deviance, digits)), "on", round(
		rdf, digits), "degrees of freedom\n")
#	cat("\nNumber of Fisher Scoring Iterations:", format(trunc(xd$iter)), "\n")
	if(!is.null(correl)) {
		p <- dim(correl)[2]
		if(p > 1) {
			cat("\nCorrelation of Coefficients:\n")
			ll <- lower.tri(correl)
			correl[ll] <- format(round(correl[ll], digits))
			correl[!ll] <- ""
			print(correl[-1,  - p, drop = F], quote = F, digits = digits)
		}
	}
#
#  Overall iteration
#
	cat("\nMinus Twice the Log-Likelihood:", format(round(x$m2loglik, digits)), "\n")
	cat("Number of Alternating Iterations:", format(trunc(x$outer.iter)), "\n")
	invisible(NULL)
}








anova.dglm <- function(object, ..., dispersion = NULL, test = NULL)
{
#  ANOVA for double glm (likelihood ratio tests for mean and dispersion models)
#  GKS  16 Jan 98
#
	response <- as.character(formula(object)[2])
	mterms <- object$terms
	dterms <- object$dispersion.fit$terms
	df <- c(length(mterms),length(dterms))
#
#  Extract null mean model formula
#
	if(df[1]>0) {
		o <- attr(mterms, "offset")
		i <- attr(mterms, "intercept")
		factor.labels <- dimnames(attr(mterms,"factors"))[[1]]
		fm <- paste(response,"~")
		if(!is.null(o)) {
			fm <- paste(fm,factor.labels[o])
			if(i == 0)
				fm <- paste(fm,"-1")
			}
		else {
			if(i == 0)
				fm <- paste(fm,"-1")
			else
				fm <- paste(fm,"1")
			}
		fm <- parse(text=fm)
		mode(fm) <- "call"
		df[1] <- object$rank - i
	}
#
#  Extract null dispersion model formula
#
	if(df[2]>0) {
		o <- attr(dterms, "offset")
		i <- attr(dterms, "intercept")
		factor.labels <- dimnames(attr(dterms,"factors"))[[1]]
		if(!is.null(o)) {
			fd <- paste("~",factor.labels[o])
			if(i == 0)
				fd <- paste(fd,"-1")
			}
		else {
			if(i == 0)
				fd <- "~ -1"
			else
				fd <- "~ 1"
			}
		fd <- parse(text=fd)
		mode(fd) <- "call"
		df[2] <- object$dispersion.fit$rank - i
	}
#
#  Fit null models and store likelihoods
# 
	lik <- rep(object$m2loglik,4)
	names(lik) <- c("Null","Mean","Full","Disp")
	ncall <- object$call
	if(df[2]>0) {	
		ncall["dformula"] <- fd
		lik["Mean"] <- eval(ncall)$m2loglik
		if(df[1]>0) {
			ncall["formula"] <- fm
			lik["Null"] <- eval(ncall)$m2loglik
			ncall["dformula"] <- object$call["dformula"]
			ncall["formula"] <- fm
			lik["Disp"] <- eval(ncall)$m2loglik
			}
		else {
			lik["Null"] <- lik["Mean"]
			lik["Disp"] <- lik["Full"]
			}
		}
	else {
		lik["Mean"] <- lik["Full"]
		if(df[1]>0) {
			ncall["formula"] <- fm
			lik["Null"] <- eval(ncall)$m2loglik
			lik["Disp"] <- lik["Null"]
			}
		else
			lik["Disp"] <- lik["Null"] <- lik["Full"]
		}
	seqdev <- c(lik["Null"] - lik["Mean"], lik["Mean"] - lik["Full"])
	adjdev <- c(lik["Disp"] - lik["Full"], lik["Mean"] - lik["Full"])
#
#  Output table
#
	heading <- c("Analysis of Deviance Table",
		paste("\n",object$family$family," double generalized linear model",sep=""),
		paste("\nResponse: ", response,"\n", sep = "") )
	aod <- data.frame(row.names = c("Mean model","Dispersion model"),
		DF = df,
		Seq.Chisq = seqdev,
		Seq.P = 1-pchisq(seqdev,ifelse(df>0,df,NA)),
		Adj.Chisq=adjdev,
		Adj.P = 1-pchisq(adjdev,ifelse(df>0,df,NA)))
	structure(aod, heading=heading, class=c("anova","data.frame") )
	
}

