.packageName <- "concord"
# accepts a "score matrix" like:
#
# 		rater1	rater2	...
# object1	1	2
# object2	1	1
# ...
#
# and returns a "count matrix" like:
#
#		1	2	...
# object1	1	1
# object2	2	0
# ...
#
# as used by kappa.nom()

scores.to.counts<-function(scores) {
 if(is.data.frame(scores))
  score.names<-levels(as.factor(as.vector(unlist(scores))))
 if(is.matrix(scores))
  score.names<-levels(as.factor(as.vector(scores)))
 if(missing(score.names)) stop("scores must be a data frame or matrix")
 score.levels<-as.numeric(score.names)
 nlevels<-length(score.levels)
 nobj<-length(scores[,1])
 counts<-matrix(0,nrow=nobj,ncol=nlevels)
 colnames(counts)<-score.names
 for(i in 1:nobj) {
  for(j in 1:nlevels) counts[i,j]<-sum(scores[i,]==score.levels[j],na.rm=TRUE)
 }
 return(counts)
}

cohen.kappa<-function(classif,type=c("count","score")) {
 if(!missing(classif)) {
  if(type == "score") classif.mat<-scores.to.counts(classif)
  else classif.mat<-as.matrix(classif)
  k<-apply(classif.mat,1,sum)
  # check that all the row sums are equal
  if(any(k != k[1])) {
   # stick on an extra column of no-classification counts
   classif.mat<-cbind(classif.mat,max(k)-k)
   # recalculate the row sums
   k<-apply(classif.mat,1,sum)
   # let the user know
   cat("Different row sums, a no-classification category was added.\n\n")
  }
  matdim<-dim(classif.mat)
  N<-matdim[1]
  Cj<-apply(classif.mat,2,sum)
  pj<-Cj/(N*k[1])
  PE<-sum(pj^2)
  Si<-1/(k[1]*(k[1]-1))*sum(classif.mat*(classif.mat-1))
  PA<-(1/N)*sum(Si)
  K<-(PA-PE)/(1-PE)
  varK<-(2/(N*k[1]*(k[1]-1)))*
   (PE-(2*k[1]-3)*PE^2+2*(k[1]-2)*sum(pj^3))/(1-PE)^2
  Z<-K/sqrt(varK)
  p<-1-pnorm(Z)
  cat("Kappa test for nominally classified data\n")
  cat(matdim[2],"categories -",k[1],"methods\n\n")
  cat("kappa =",K,", z =",Z,", p =",p,"\n")
  invisible(list(kappa=K,Z=Z,p=p))
 }
 else {
  cat("Usage: cohen.kappa(classif,type=c(\"count\",\"score\"))\n")
  cat("\twhere classif is a data frame or matrix of counts\n")
  cat("\tof data objects (as rows) into categories (as columns)\n")
  cat("\tor of data objects (as rows) into category scores of\n")
  cat("\traters (as columns). If the latter, specify type=\"score\"\n")  
  cat("\tNote: if all row sums (number of classifiers) are not equal,\n")
  cat("\ta no-classification category will be added to make them so.\n")
 }
}

# wtpc calculates the weighted percentages using the following formula:
# <weighted pc><-(100/<n methods>)*<n ratings>/<n data objects>
# The format of the data is the same as that used for calculating the
# kappa for nominal data kappa.nom()

wtpc<-function(x,n.methods,n.objects,type=c("count","score")) {
 if(!missing(x) && !missing(n.methods) && !missing(n.objects)) {
  if(type == "score") x<-scores.to.counts(x)
  if(is.data.frame(x)) sumx<-sapply(x,sum)
  if(is.matrix(x)) sumx<-apply(x,2,sum)
  else sumx<-sum(x)
  return((100/n.methods)*sumx/n.objects)
 }
 else {
  cat("Usage: wtpc(x,n.methods,n.objects,type=c(\"count\",\"score\"))\n")
  cat("\twhere x is a vector, data frame or matrix of ratings,\n")
  cat("\tif x is scores rather than counts, specify type=score\n")
  cat("\tn.methods is the number of rating methods (e.g. raters)\n")
  cat("\tand n.objects is the number of data objects (e.g. subjects)\n")
 }
}
# tiecorr calculates the correction for tied ranks

tiecorr <- function (rankarray) {
 tie3margsum <- 0
 ranksize <- dim(rankarray)
 for (rowindex in 1:ranksize[1]) {
  tie3rowsum <- 0
  rankindex <- 1
  while (rankindex <= ranksize[2]) {
   tiesum <- sum(rankindex == rankarray[rowindex,])
   if(tiesum > 1) tie3rowsum <- tie3rowsum + (tiesum^3 - tiesum)
   rankindex <- rankindex + 0.5
  }
  tie3margsum <- tie3margsum + tie3rowsum
 }
 return(tie3margsum)
}

# calculates a Z score for a zero-sum contrast on the rank array

BEZ <- function (rankarray,lambda) {
 ranksize <- dim(rankarray)
 L <- 0
 lambda2sum <- 0
 for (col in 1:ranksize[2]) {
  L<-L+lambda[col]*sum(rankarray[,col])
  lambda2sum<-lambda[col]^2 + lambda2sum
 }
 Z<-L/sqrt((ranksize[1]*ranksize[2]*(ranksize[2]+1)*lambda2sum)/12)
 Z<-Z*sqrt(ranksize[1]/tiecorr(rankarray))
 return(Z)
}

# computes Kendall's W from a matrix of either scores or ranks where
# rows are scoring or ranking methods and columns are data objects

kendall.w <- function (x,lambda,descending=TRUE,ranks=FALSE) {
 if (nargs() > 0) {
  if (!is.data.frame(x) && !is.matrix(x))
   stop("x must be a dataframe or matrix")
  datadim<-dim(x)
  if(is.null(colnames(x))) cnames<-as.character(1:datadim[2])
  else cnames<-colnames(x)
  col.width<-max(nchar(cnames))
  if(!missing(lambda)) max.lambda.len<-max(nchar(unlist(lambda)))
  else max.lambda.len<-4
  if(col.width <= max.lambda.len) col.width<-max.lambda.len+1
  cnames<-formatC(cnames,width=col.width)
  if(ranks) rank.mat<-x
  else {
   meanscore<-sapply(x,mean)
   rank.mat <- t(as.matrix(x))
   if(descending) rank.mat <- max(rank.mat) - rank.mat
   exist.tie<-0
   for (i in 1:datadim[1]) rank.mat[,i]<-rank(rank.mat[,i])
   rank.mat <- t(rank.mat)
  }
  exist.tie<-length(unlist(apply(rank.mat,1,unique)))<length(rank.mat)
  meanranks<-apply(rank.mat,2,mean)
  grandmean<-mean(meanranks)
  if(exist.tie) {
   Tj<-tiecorr(rank.mat)
   W<-sum((meanranks-grandmean)^2)/
    ((datadim[2]*(datadim[2]^2-1)-Tj/datadim[1])/12)
  }
  else W<-sum((meanranks-grandmean)^2)/(datadim[2]*(datadim[2]^2-1)/12)
  if(datadim[2] > 7) {
   p.table<-NA
   p.chisq<-pchisq(datadim[1]*(datadim[2]-1)*W,datadim[2]-1,lower.tail=FALSE)
  }
  else {
   p.table<-ifelse(W > Wcrit05[datadim[2]-2,datadim[1]-2],"<0.05",">0.05")
   p.chisq<-NA
   cat("\nRanks\n")
   print(rank.mat)
  }
  cat("\nMean ranks\n")
  print(meanranks)
  cat("\n")
  if(!missing(lambda)) {
   cat("Contrasts\n")
   cat(cnames)
   cat(paste(rep(" ",7),sep="",collapse=""))
   cat("Z\n")
   ldim <- dim(lambda)
   if(is.null(ldim)) {
    zstat<-round(BEZ(rank.mat, lambda), 3)
    cat(formatC(lambda,width=col.width),rep(" ",8-nchar(zstat)),zstat,"\n")
   }
   else {
    zstat <- vector("numeric",ldim[1])
    for (i in 1:ldim[1]) {
     zstat[i]<-round(BEZ(rank.mat, lambda[i, ]), 3)
     cat(formatC(lambda[i,],width=col.width),rep(" ",8-nchar(zstat[i])),zstat[i],"\n")
    }
   }
   cat("\n")
  }
  return(list(W=W,p.table=p.table,p.chisq=p.chisq))
 }
 else {
  cat("Usage: kendall.w(x[,lambda,descending=TRUE,ranks=FALSE])\n")
  cat("\twhere x is a matrix of scores or ranks and lambda a matrix of contrasts\n")
 }
}
# calculates the coincidence matrix for the kripp.alpha() function

coincidence.matrix<-function(x) {
 levx<-(levels(as.factor(x)))
 nval<-length(levx)
 # set up a coincidence matrix to hold the match/mismatch data
 cm<-matrix(rep(0,nval*nval),nrow=nval)
 dimx<-dim(x)
 # calculate correction factor (?) for data with missing values
 vn<-function(datavec) sum(!is.na(datavec))
 if(any(is.na(x))) mc<-apply(x,2,vn)-1
 else mc<-rep(1,dimx[2])
 for(col in 1:dimx[2]) {
  for(i1 in 1:(dimx[1]-1)) {
   for(i2 in (i1+1):dimx[1]) {
    if(!is.na(x[i1,col]) && !is.na(x[i2,col])) {
     index1<-which(levx==x[i1,col])
     index2<-which(levx==x[i2,col])
     cm[index1,index2]<-cm[index1,index2]+(1+(index1==index2))/mc[col]
     if(index1 != index2) cm[index2,index1]<-cm[index1,index2]
    }
   }
  }
 }
 nmv<-sum(apply(cm,2,sum))
 return(list(statistic=NA,coincidence.matrix=cm,data.values=levx,nmatchval=nmv))
}

# calculates Krippendorff's alpha

kripp.alpha<-function(x,method="nominal") {
 if(!missing(x)) {
  cm<-coincidence.matrix(x)
  dimcm<-dim(cm$coincidence.matrix)
  # upper triangle of the coincidence matrix as a vector
  utcm<-as.vector(cm$coincidence.matrix[upper.tri(cm$coincidence.matrix)])
  # diagonal of the coincidence matrix
  diagcm<-diag(cm$coincidence.matrix)
  # sum of diagonal elements of coincidence matrix
  occ<-sum(diagcm)
  # the marginal sums for the coincidence matrix
  nc<-apply(cm$coincidence.matrix,1,sum)
  # calculate this term to simplify
  ncnc<-sum(nc*(nc-1))
  # need the data values for interval and ratio methods
  dv<-as.numeric(cm$data.values)
  diff2<-rep(0,length(utcm))
  ncnk<-rep(0,length(utcm))
  ck<-1
  for(k in 2:dimcm[2]) {
   for(c in 1:(k-1)) {
    ncnk[ck]<-nc[c]*nc[k]
    if(method == "nominal") diff2[ck]<-1
    if(method == "ordinal") {
     diff2[ck]<-nc[c]/2
     if(k > (c+1)) {
      for(g in (c+1):(k-1)) {
       diff2[ck]<-diff2[ck]+nc[g]
      }
     }
     diff2[ck]<-diff2[ck]+nc[k]/2
     diff2[ck]<-diff2[ck]^2
    }
    if(method == "interval") diff2[ck]<-(dv[c]-dv[k])^2
    if(method == "ratio") {
     diff2[ck]<-(dv[c]-dv[k])^2/(dv[c]+dv[k])^2
    }
    ck<-ck+1
   }
  }
  cm$statistic<-1-(cm$nmatchval-1)*sum(utcm*diff2)/sum(ncnk*diff2)
  return(cm)
 }
 else {
  cat("Usage: kripp.alpha(x,method=c(\"nominal\",\"ordinal\",\"interval\",\"ratio\"))\n")
  cat("\twhere x is a classifier by object matrix of classifications\n\n")
 }
}
# mcnemar.mh computes the simple 2x2 McNemar test for marginal
# homogeneity.

mcnemar.mh<-function(x) {
 if(is.matrix(x) || is.data.frame(x)) {
  dimx<-dim(x)
  if(length(dimx) == 2) {
   if(any(dimx>2)) {
    if(dimx[1] == 2) mnx<-as.matrix(table(x[1,],x[2,]))
    else mnx<-as.matrix(table(x[,1],x[,2]))
   }
   else mnx<-as.matrix(x)
   mns<-(mnx[1,2]-mnx[2,1])^2/(mnx[1,2]+mnx[2,1])
   if((mnx[1,2]+mnx[2,1]) < 10)
    warning("low cell counts - consider binomial test")
   return(list(statistic=mns,p=1-pchisq(mns,1)))
  }
  else cat("Dimension higher than 2, cannot compute\n")
 }
 cat("Usage: mcnemar.mh(x)\n")
 cat("\twhere x is an nx2 matrix or data frame of scores\n")
 cat("\tor a 2x2 matrix or data frame of rater agreement on 2 categories\n")
}
# rater.bias computes a Chi-squared value for a systematic bias
# of one rater compared with another.

rater.bias<-function(x) {
 if(is.matrix(x) || is.data.frame(x)) {
  dimx<-dim(x)
  if(length(dimx) == 2) {
   # if dimension lengths are unequal, assume it's a nx2 score matrix
   if(dimx[1] != dimx[2]) {
    if(dimx[1] == 2) rbx<-as.matrix(table(x[1,],x[2,]))
    else rbx<-as.matrix(table(x[,1],x[,2]))
   }
   else rbx<-as.matrix(x)
   print(rbx)
   rbb<-sum(rbx[upper.tri(rbx)])
   rbc<-sum(rbx[lower.tri(rbx)])
   rbstat<-(rbb-rbc)^2/(rbb+rbc)
   return(list(statistic=rbstat,p=1-pchisq(rbstat,1)))
  }
  else cat("Dimension higher than 2, cannot compute\n")  
 }
 cat("Usage: rater.bias(x)\n")
 cat("\twhere x is an nx2 or 2xn  matrix of category scores for n objects\n")
 cat("\tor a CxC matrix or data frame of rater agreement on C categories\n")
}
# stuart.maxwell.mh computes the marginal homogeneity test for
# a CxC matrix of assignments of objects to C categories or an
# nx2 or 2xn matrix of category scores for n data objects by two
# raters. The statistic is distributed as Chi-square with C-1
# degrees of freedom.

stuart.maxwell.mh<-function(x) {
 if(is.matrix(x) || is.data.frame(x)) {
  dimx<-dim(x)
  if(length(dimx) == 2) {
   if(dimx[1] != dimx[2]) {
    # if dimension lengths are unequal, assume it's a score matrix
    if(dimx[1] == 2) smx<-as.matrix(table(x[1,],x[2,]))
    # assume the matrix is nx2
    else smx<-as.matrix(table(x[,1],x[,2]))
   }
   else smx<-as.matrix(x)
   # get the marginals
   rowsums<-apply(smx,1,sum)
   colsums<-apply(smx,2,sum)
   equalsums<-rowsums == colsums
   if(any(equalsums)) {
    # dump any categories with perfect agreement
    smx<-smx[!equalsums,!equalsums]
    # bail out if too many categories have disappeared
    if(dim(smx)[1] < 2) stop("Too many equal marginals, cannot compute")
    # get new marginals
    rowsums<-apply(smx,1,sum)
    colsums<-apply(smx,2,sum)
   }
   # use K-1 marginals
   Kminus1<-length(rowsums)-1
   smd<-(rowsums-colsums)[1:Kminus1]
   smS<-matrix(0,nrow=Kminus1,ncol=Kminus1)
   for(i in 1:Kminus1) {
    for(j in 1:Kminus1) {
     if(i == j) smS[i,j]<-rowsums[i] + colsums[j] - 2 * smx[i,j]
     else smS[i,j]<--(smx[i,j]+smx[j,i])
    }
   }
   smstat<-t(smd)%*%solve(smS)%*%smd
   return(list(statistic=smstat,p=1-pchisq(smstat,Kminus1)))
  }
  else cat("Dimension higher than 2, cannot compute\n")  
 }
 else {
  cat("Usage: stuart.maxwell.mh(x)\n")
  cat("\twhere x is an nx2 matrix or data frame of category scores for n objects\n")
  cat("\tor a CxC matrix or data frame of rater agreement on C categories\n")
 }
}
