| get.otu.jgr {CADStat} | R Documentation |
This function constructs the operational taxonomic units using the results/output from get.mergedfile
get.otu.jgr(bcnt, optlist = NULL, ndc = TRUE, outputFile = "sum.otu.txt")
bcnt |
a taxonomic matrix that have merged taxonomy of the benthic count names and the itis.ttable data, it can be output from get.mergedfile function or user input |
optlist |
~~Describe optlist here~~ |
ndc |
a flag |
outputFile |
a tab-delimited text file for output of the get.otu function that constructs the operational taxonomic units |
bioinfer1.JGR,
bioinfer2.JGR,
bioinfer3.JGR,
JGRMessageBox,
get.mergedfile,
get.mismatch,
dup.sel,
get.duplicates
##---- Should be DIRECTLY executable !! ----
##-- ==> Define data, use random,
##-- or do help(data=index) for the standard data sets.
## The function is currently defined as
function (bcnt, optlist = NULL, ndc = TRUE, outputFile = "sum.otu.txt")
{
names0 <- names(bcnt)
siteid <- names0[1]
nameid <- names0[2]
abnid <- names0[3]
if (is.list(optlist)) {
optlist <- optlist$tnames
}
w <- regexpr("\.", optlist)
optlist.spec <- optlist[w != -1]
JGRMessageBox(msg = "<HTML> Please wait while R compute inferences! <br/> It may take several minutes! </HTML>",
w.title = "BiologicalInferences: Info")
if (!is.null(optlist)) {
if (length(optlist.spec) > 0) {
spec <- sort(unique(bcnt$SPECIES))
name.orig <- character(0)
name.change <- character(0)
for (i in 1:length(spec)) {
if (is.na(match(spec[i], optlist.spec))) {
w <- regexpr("\.", spec[i])
gen <- substring(spec[i], 1, w - 1)
spec.half <- substring(spec[i], w + 1, nchar(spec[i]))
opt.sel <- character(0)
speclist <- as.list(rep(NA, times = 1))
k <- 1
repeat {
w2 <- regexpr("[A-Z]+", spec.half)
if (w2 == -1)
break
speclist[[k]] <- substring(spec.half, w2,
w2 + attributes(w2)$match.length - 1)
spec.half <- substring(spec.half, w2 + attributes(w2)$match.length,
nchar(spec.half))
k <- k + 1
}
ind1 <- grep(gen, optlist.spec)
if (length(ind1) > 0) {
ind.sel <- ind1
for (k in 1:length(speclist)) {
ind2 <- grep(speclist[[k]], optlist.spec)
ind.all <- c(ind.sel, ind2)
ind.sel <- ind.all[duplicated(ind.all)]
}
opt.sel <- c(opt.sel, optlist.spec[ind.sel])
}
if (length(opt.sel) > 0) {
if (length(opt.sel) > 1) {
specnew <- select.list(c(opt.sel, "NONE"),
preselect = "NONE", title = paste(spec[i]))
}
else {
specnew <- opt.sel
}
if ((specnew != "") & (specnew != "NONE")) {
spec[i] <- gsub("\(", ".", spec[i])
incvec <- regexpr(spec[i], bcnt$SPECIES) !=
-1
incvec[is.na(incvec)] <- FALSE
name.orig <- c(name.orig, spec[i])
name.change <- c(name.change, specnew)
bcnt$SPECIES[incvec] <- toupper(specnew)
}
}
}
}
if (length(name.orig) > 0) {
cat("Review the changes in species names: \n")
dftemp <- data.frame(name.orig, name.change)
names(dftemp) <- c("Original name", "Revised name")
print(dftemp)
cat("\n")
}
}
}
tlev <- names0[4:length(names0)]
tname <- rep(NA, times = nrow(bcnt))
for (i in length(tlev):1) {
incvec <- is.na(tname)
tname[incvec] <- bcnt[incvec, tlev[i]]
}
lookup <- unique.data.frame(data.frame(tname, bcnt[, nameid]))
names(lookup) <- c("TNAME", "TAXANAME")
getocc <- function(x) length(unique(x))
numocc <- tapply(bcnt[, siteid], tname, getocc)
df1 <- data.frame(names(numocc), numocc)
names(df1) <- c("TNAME", "NUMOCC")
df2 <- unique.data.frame(data.frame(bcnt[, tlev], tname))
names(df2) <- c(tlev, "TNAME")
df2 <- merge(df2, df1, by = "TNAME")
if (!is.null(optlist)) {
otufin <- rep(NA, times = nrow(df2))
tlevel <- rep(NA, times = nrow(df2))
for (i in 1:nrow(df2)) {
j <- length(tlev)
while (is.na(df2[i, tlev[j]])) j <- j - 1
while (is.na(match(df2[i, tlev[j]], optlist)) & (j >
1)) j <- j - 1
if (!is.na(match(df2[i, tlev[j]], optlist))) {
otufin[i] <- df2[i, tlev[j]]
tlevel[i] <- j
}
}
otufin1 <- otufin
}
else {
otufin <- levels(df2$TNAME)[df2$TNAME]
otufin1 <- otufin
}
in.all <- rep(TRUE, times = nrow(df2))
otufin2 <- rep(NA, times = nrow(df2))
for (i in 1:(length(tlev) - 1)) {
taxa.all <- df2[, tlev[i]]
taxa.red <- taxa.all[in.all]
taxa.u <- sort(unique(taxa.red))
in.all.n <- in.all
for (j in 1:length(taxa.u)) {
incvec <- taxa.all == taxa.u[j]
incvec[is.na(incvec)] <- FALSE
numocc.loc <- df2$NUMOCC[incvec]
otufin.loc <- otufin1[incvec]
v <- otufin.loc == taxa.u[j]
v[is.na(v)] <- FALSE
if (sum(v) > 0) {
a <- sum(numocc.loc[v])
b <- sum(numocc.loc[!v])
c <- sum(v)
d <- sum(!v)
if ((a >= b) | ((c == 1) & (d == 1))) {
otufin2[incvec] <- taxa.u[j]
in.all.n[incvec] <- FALSE
}
else {
otufin2[incvec] <- otufin.loc
otufin2[otufin1 == taxa.u[j]] <- NA
in.all.n[otufin1 == taxa.u[j]] <- FALSE
otufin1[otufin1 == taxa.u[j]] <- NA
}
in.all <- in.all.n
}
}
}
incvec <- (!is.na(otufin1)) & in.all
otufin2[incvec] <- otufin1[incvec]
df2 <- data.frame(df2, otufin, otufin2)
df2 <- df2[do.call(order, df2[, tlev]), ]
if (is.character(outputFile)) {
write.table(df2, file = outputFile, sep = "\t", row.names = FALSE)
JGRMessageBox(msg = paste("Check OTU assignments in",
outputFile), w.title = "BiologicalInferences: info")
cat("Check OTU assignments in", outputFile, "\n")
}
if (ndc) {
df3 <- df2[, c("TNAME", "otufin2")]
}
else {
df3 <- df2[, c("TNAME", "otufin")]
}
names(df3) <- c("TNAME", "OTU")
bcnt <- data.frame(bcnt, tname)
bcnt <- merge(df3, bcnt, by.x = "TNAME", by.y = "tname")
bcnt.otu <- bcnt[, c(siteid, nameid, abnid, "TNAME", "OTU")]
return(bcnt.otu)
}