| get.mismatch {CADStat} | R Documentation |
This function compares taxa names from benthic count matrix with itis taxa table; returns a vector of unrecognized taxa names for users to correct/verify.
get.mismatch(bcnt, itis.ttable, exlocal = character(0), outputFile = NULL)
bcnt |
a benthic sample count matrix including 3 columns: Sample ID, genus/species name, and counts |
itis.ttable |
taxonomic master table downloaded from itis website |
exlocal |
temporary var |
outputFile |
no outputFile at this stage |
bioinfer1.JGR,
bioinfer2.JGR,
bioinfer3.JGR,
JGRMessageBox,
get.mergedfile,
dup.sel,
get.duplicates,
get.otu.jgr
##---- Should be DIRECTLY executable !! ----
##-- ==> Define data, use random,
##-- or do help(data=index) for the standard data sets.
## The function is currently defined as
function (bcnt, itis.ttable, exlocal = character(0), outputFile = NULL)
{
f.tname <- (names(bcnt))[2]
tlevs <- names(itis.ttable)
imatch <- match("TAXON", tlevs)
tlevs <- tlevs[-imatch]
if (is.factor(bcnt[, 2])) {
f1 <- sort(unique(levels(bcnt[, 2])[bcnt[, 2]]))
}
else {
if (is.character(bcnt[, 2])) {
f1 <- sort(unique(bcnt[, 2]))
}
else {
JGRMessageBox(w.title = "Error", msg = "2nd field is neither factor nor character")
}
}
f2 <- toupper(f1)
dfref <- data.frame(I(f1), I(f2))
itis.taxa <- itis.ttable$TAXON
substr <- as.list(rep(NA, times = 1))
i <- 1
tmiss0 <- dfref$f2
w1 <- regexpr("\(", tmiss0)
w2 <- regexpr("\)", tmiss0)
incvec <- (w1 != -1) & (w2 != -1)
tmiss0[incvec] <- paste(substring(tmiss0[incvec], 1, w1[incvec] -
1), substring(tmiss0[incvec], w2[incvec] + 1, nchar(tmiss0[incvec])))
repeat {
w <- regexpr("[A-Z]+", tmiss0)
if (sum(w != -1) == 0)
break
substr[[i]] <- substring(tmiss0, w, w + attributes(w)$match.length -
1)
w3 <- w + attributes(w)$match.length
tmiss0 <- substring(tmiss0, w3, nchar(tmiss0))
if (sum(tmiss0 != "") == 0)
break
i <- i + 1
}
exlist <- c("DUPLICATE", "SETAE", "CODE", "GROUP", "TYPE",
"GENUS", "PANEL", "SAND", "TURRET", "CASE", "LARVAE",
toupper(exlocal))
sp.name <- rep("", times = length(substr[[1]]))
if (length(substr) > 1) {
for (i in 1:length(substr[[1]])) {
for (j in 2:length(substr)) {
if ((nchar(substr[[j]][i]) > 3) & (!(substr[[j]][i] %in%
exlist))) {
if (sp.name[i] == "") {
sp.name[i] <- substr[[j]][i]
}
else {
sp.name[i] <- paste(sp.name[i], substr[[j]][i],
sep = "/")
}
}
}
}
}
dfref$f2 <- substr[[1]]
dfref$sp.name <- sp.name
imatch <- match("FAMILY", toupper(tlevs))
tlevs.loc <- tlevs[length(tlevs):imatch]
for (i in 1:nrow(dfref)) {
if (nchar(substr[[2]][i]) > 3) {
imatch1 <- match(substr[[1]][i], itis.taxa)
imatch2 <- match(substr[[2]][i], itis.taxa)
if (is.na(imatch1) | is.na(imatch2)) {
if (!is.na(imatch2)) {
dfref$f2[i] <- substr[[2]][i]
dfref$sp.name[i] <- ""
}
}
else {
comp1 <- itis.ttable[imatch1, tlevs.loc]
comp2 <- itis.ttable[imatch2, tlevs.loc]
tlev.sav <- ""
for (j in 1:length(comp1)) {
if (!is.na(comp1[j]) & !is.na(comp2[j])) {
if ((comp1[j] == comp2[j]) & (comp1[j] !=
"")) {
tlev.sav <- tlevs.loc[j]
break
}
}
if (tlev.sav != "") {
dfref$f2[i] <- comp1[, tlev.sav]
dfref$sp.name[i] <- ""
}
}
}
}
}
dfref <<- dfref
tmiss0 <- character(0)
for (i in 1:length(dfref$f2)) {
if (!(dfref$f2[i] %in% itis.taxa)) {
tmiss0 <- c(tmiss0, dfref$f2[i])
}
}
tmiss0 <- sort(unique(tmiss0))
if (length(tmiss0) > 0) {
b <- .jnew("org.neptuneinc.cadstat.plots.BiologicalInferencesTaxaNameUnrecog")
.jcall(b, "Ljavax/swing/JFrame;", "getMyGUI", length(tmiss0),
tmiss0)
}
else {
get.duplicates(bcnt, tmiss1 = character(0), itis.ttable)
}
}