R : Copyright 2005, The R Foundation for Statistical Computing Version 2.1.1 (2005-06-20), ISBN 3-900051-07-0 R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for a HTML browser interface to help. Type 'q()' to quit R. > ### *

> ### > attach(NULL, name = "CheckExEnv") > assign(".CheckExEnv", as.environment(2), pos = length(search())) # base > ## add some hooks to label plot pages for base and grid graphics > setHook("plot.new", ".newplot.hook") > setHook("persp", ".newplot.hook") > setHook("grid.newpage", ".gridplot.hook") > > assign("cleanEx", + function(env = .GlobalEnv) { + rm(list = ls(envir = env, all.names = TRUE), envir = env) + RNGkind("default", "default") + set.seed(1) + options(warn = 1) + delayedAssign("T", stop("T used instead of TRUE"), + assign.env = .CheckExEnv) + delayedAssign("F", stop("F used instead of FALSE"), + assign.env = .CheckExEnv) + sch <- search() + newitems <- sch[! sch %in% .oldSearch] + for(item in rev(newitems)) + eval(substitute(detach(item), list(item=item))) + missitems <- .oldSearch[! .oldSearch %in% sch] + if(length(missitems)) + warning("items ", paste(missitems, collapse=", "), + " have been removed from the search path") + }, + env = .CheckExEnv) > assign("..nameEx", "__{must remake R-ex/*.R}__", env = .CheckExEnv) # for now > assign("ptime", proc.time(), env = .CheckExEnv) > grDevices::postscript("genetics-Examples.ps") > assign("par.postscript", graphics::par(no.readonly = TRUE), env = .CheckExEnv) > options(contrasts = c(unordered = "contr.treatment", ordered = "contr.poly")) > options(warn = 1) > library('genetics') Loading required package: combinat Loading required package: gdata Loading required package: gtools Loading required package: MASS Attaching package: 'genetics' The following object(s) are masked from package:MASS : genotype The following object(s) are masked from package:base : as.factor > > assign(".oldSearch", search(), env = .CheckExEnv) > assign(".oldNS", loadedNamespaces(), env = .CheckExEnv) > cleanEx(); ..nameEx <- "HWE.chisq" > > ### * HWE.chisq > > flush(stderr()); flush(stdout()) > > ### Name: HWE.chisq > ### Title: Perform Chi-Square Test for Hardy-Weinberg Equilibrium > ### Aliases: HWE.chisq HWE.chisq.genotype > ### Keywords: misc > > ### ** Examples > > ## Don't show: > set.seed(4657613) > ## End Don't show > example.data <- c("D/D","D/I","D/D","I/I","D/D", + "D/D","D/D","D/D","I/I","") > g1 <- genotype(example.data) > g1 [1] "D/D" "D/I" "D/D" "I/I" "D/D" "D/D" "D/D" "D/D" "I/I" NA Alleles: D I > > HWE.chisq(g1) Pearson's Chi-squared test with simulated p-value (based on 10000 replicates) data: tab X-squared = 4.7056, df = NA, p-value = 0.01340 > # compare with > HWE.exact(g1) Exact Test for Hardy-Weinberg Equilibrium data: g1 N11 = 6, N12 = 1, N22 = 2, N1 = 13, N2 = 5, p-value = 0.05882 > # and > HWE.test(g1) Warning in diseq.ci(x, R = ci.B, conf = conf) : NAs returned from diseq call ----------------------------------- Test for Hardy-Weinberg-Equilibrium ----------------------------------- Call: HWE.test.genotype(x = g1) Raw Disequlibrium for each allele pair (D) D I D -0.1450617 I -0.1450617 Scaled Disequlibrium for each allele pair (D') D I D -1.88 I -1.88 Correlation coefficient for each allele pair (r) D I D 0.7230769 I 0.7230769 Overall Values Value D -0.1450617 D' -1.8800000 r 0.7230769 Confidence intervals computed via bootstrap using 1000 samples Observed 95% CI NA's Contains Zero? Overall D -0.14506173 (-0.24691358, 0.01234568) 0 YES Overall D' -1.88000000 (-8.00000000, 0.12500000) 24 YES Overall r 0.72307692 (-0.12500000, 1.00000000) 24 YES Significance Test: Exact Test for Hardy-Weinberg Equilibrium data: g1 N11 = 6, N12 = 1, N22 = 2, N1 = 13, N2 = 5, p-value = 0.05882 > > three.data <- c(rep("A/A",8), + rep("C/A",20), + rep("C/T",20), + rep("C/C",10), + rep("T/T",3)) > > g3 <- genotype(three.data) > g3 [1] "A/A" "A/A" "A/A" "A/A" "A/A" "A/A" "A/A" "A/A" "C/A" "C/A" "C/A" "C/A" [13] "C/A" "C/A" "C/A" "C/A" "C/A" "C/A" "C/A" "C/A" "C/A" "C/A" "C/A" "C/A" [25] "C/A" "C/A" "C/A" "C/A" "C/T" "C/T" "C/T" "C/T" "C/T" "C/T" "C/T" "C/T" [37] "C/T" "C/T" "C/T" "C/T" "C/T" "C/T" "C/T" "C/T" "C/T" "C/T" "C/T" "C/T" [49] "C/C" "C/C" "C/C" "C/C" "C/C" "C/C" "C/C" "C/C" "C/C" "C/C" "T/T" "T/T" [61] "T/T" Alleles: C A T > > HWE.chisq(g3, B=10000) Pearson's Chi-squared test with simulated p-value (based on 10000 replicates) data: tab X-squared = 14.9503, df = NA, p-value = 0.0039 > > > > > cleanEx(); ..nameEx <- "HWE.exact" > > ### * HWE.exact > > flush(stderr()); flush(stdout()) > > ### Name: HWE.exact > ### Title: Exact Test of Hardy-Weinberg Equilibrium for 2-Allele Markers > ### Aliases: HWE.exact > ### Keywords: misc > > ### ** Examples > > example.data <- c("D/D","D/I","D/D","I/I","D/D", + "D/D","D/D","D/D","I/I","") > g1 <- genotype(example.data) > g1 [1] "D/D" "D/I" "D/D" "I/I" "D/D" "D/D" "D/D" "D/D" "I/I" NA Alleles: D I > > HWE.exact(g1) Exact Test for Hardy-Weinberg Equilibrium data: g1 N11 = 6, N12 = 1, N22 = 2, N1 = 13, N2 = 5, p-value = 0.05882 > # compare with > HWE.chisq(g1) Pearson's Chi-squared test with simulated p-value (based on 10000 replicates) data: tab X-squared = 4.7056, df = NA, p-value = 0.01160 > > ## Don't show: > set.seed(465764) > ## End Don't show > > g2 <- genotype(sample( c("A","C"), 100, p=c(100,10), rep=TRUE), + sample( c("A","C"), 100, p=c(100,10), rep=TRUE) ) > HWE.exact(g2) Exact Test for Hardy-Weinberg Equilibrium data: g2 N11 = 80, N12 = 20, N22 = 0, N1 = 180, N2 = 20, p-value = 0.5915 > > > > > cleanEx(); ..nameEx <- "HWE.test" > > ### * HWE.test > > flush(stderr()); flush(stdout()) > > ### Name: HWE.test > ### Title: Estimate Disequilibrium and Test for Hardy-Weinberg Equilibrium > ### Aliases: HWE.test HWE.test.genotype HWE.test.data.frame print.HWE.test > ### Keywords: misc > > ### ** Examples > > ## Don't show: > set.seed(4657613) > ## End Don't show > example.data <- c("D/D","D/I","D/D","I/I","D/D", + "D/D","D/D","D/D","I/I","") > g1 <- genotype(example.data) > g1 [1] "D/D" "D/I" "D/D" "I/I" "D/D" "D/D" "D/D" "D/D" "I/I" NA Alleles: D I > > HWE.test(g1) Warning in diseq.ci(x, R = ci.B, conf = conf) : NAs returned from diseq call ----------------------------------- Test for Hardy-Weinberg-Equilibrium ----------------------------------- Call: HWE.test.genotype(x = g1) Raw Disequlibrium for each allele pair (D) D I D -0.1450617 I -0.1450617 Scaled Disequlibrium for each allele pair (D') D I D -1.88 I -1.88 Correlation coefficient for each allele pair (r) D I D 0.7230769 I 0.7230769 Overall Values Value D -0.1450617 D' -1.8800000 r 0.7230769 Confidence intervals computed via bootstrap using 1000 samples Observed 95% CI NA's Contains Zero? Overall D -0.14506173 (-0.24691358, 0.01234568) 0 YES Overall D' -1.88000000 (-8.00000000, 0.12500000) 28 YES Overall r 0.72307692 (-0.12500000, 1.00000000) 28 YES Significance Test: Exact Test for Hardy-Weinberg Equilibrium data: g1 N11 = 6, N12 = 1, N22 = 2, N1 = 13, N2 = 5, p-value = 0.05882 > > #compare with > diseq(g1) Call: diseq.genotype(x = g1) Disequlibrium for each allele pair (D) D I D -0.1450617 I -0.1450617 Disequlibrium for each allele pair (D') D I D -1.88 I -1.88 Correlation coefficient for each allele pair (r) D I D 0.7230769 I 0.7230769 Observed vs Expected frequency table Obs Exp Obs-Exp D/D 0.66666667 0.5216049 0.1450617 I/D 0.05555556 0.2006173 -0.1450617 D/I 0.05555556 0.2006173 -0.1450617 I/I 0.22222222 0.0771605 0.1450617 Overall Values D : -0.1450617 D': -1.88 r : 0.7230769 > diseq.ci(g1) Warning in diseq.ci(g1) : NAs returned from diseq call > HWE.chisq(g1) Pearson's Chi-squared test with simulated p-value (based on 10000 replicates) data: tab X-squared = 4.7056, df = NA, p-value = 0.0121 > HWE.exact(g1) Exact Test for Hardy-Weinberg Equilibrium data: g1 N11 = 6, N12 = 1, N22 = 2, N1 = 13, N2 = 5, p-value = 0.05882 > > three.data <- c(rep("A/A",8), + rep("C/A",20), + rep("C/T",20), + rep("C/C",10), + rep("T/T",3)) > > g3 <- genotype(three.data) > g3 [1] "A/A" "A/A" "A/A" "A/A" "A/A" "A/A" "A/A" "A/A" "C/A" "C/A" "C/A" "C/A" [13] "C/A" "C/A" "C/A" "C/A" "C/A" "C/A" "C/A" "C/A" "C/A" "C/A" "C/A" "C/A" [25] "C/A" "C/A" "C/A" "C/A" "C/T" "C/T" "C/T" "C/T" "C/T" "C/T" "C/T" "C/T" [37] "C/T" "C/T" "C/T" "C/T" "C/T" "C/T" "C/T" "C/T" "C/T" "C/T" "C/T" "C/T" [49] "C/C" "C/C" "C/C" "C/C" "C/C" "C/C" "C/C" "C/C" "C/C" "C/C" "T/T" "T/T" [61] "T/T" Alleles: C A T > > HWE.test(g3, ci.B=10000) Warning in return(ci = ci, overflow.upper = overflow.upper, overflow.lower = overflow.lower, : multi-argument returns are deprecated Warning in return(ci = ci, overflow.upper = overflow.upper, overflow.lower = overflow.lower, : multi-argument returns are deprecated Warning in return(ci = ci, overflow.upper = overflow.upper, overflow.lower = overflow.lower, : multi-argument returns are deprecated Warning in diseq.ci(x, R = ci.B, conf = conf) : For more than two alleles, overall disequlibrium statistics are bounded between [0,1]. Because of this, confidence intervals for values near 0 and 1 are ill-behaved. A rough correction has been applied, but the intervals still may not be correct for values near 0 or 1. ----------------------------------- Test for Hardy-Weinberg-Equilibrium ----------------------------------- Call: HWE.test.genotype(x = g3, ci.B = 10000) Raw Disequlibrium for each allele pair (D) C A T C 0.01881215 0.05912389 A 0.01881215 -0.06288632 T 0.05912389 -0.06288632 Scaled Disequlibrium for each allele pair (D') C A T C 0.1296296 0.5641026 A 0.1296296 -0.4186047 T 0.5641026 -0.4186047 Correlation coefficient for each allele pair (r) C A T C -0.0825061 -0.2887945 A -0.0825061 0.3367077 T -0.2887945 0.3367077 Overall Values (mean absolute-value weighted by expected allele frequency) Value D 0.04117893 D' 0.33329337 r 0.20272574 Confidence intervals computed via bootstrap using 10000 samples * WARNING: For more than two alleles, overall disequlibrium * statistics are bounded between [0,1]. Because of this, * confidence intervals for values near 0 and 1 are ill-behaved. A * rough correction has been applied, but the intervals still may * not be correct for values near 0 or 1. Observed 95% CI NA's Contains Zero? Overall D 0.04117893 (0.00000000, 0.06415079) 0 YES Overall D' 0.33329337 (0.00000000, 0.50156871) 0 YES Overall r 0.20272574 (0.00000000, 0.30460657) 0 YES Significance Test: Pearson's Chi-squared test with simulated p-value (based on 10000 replicates) data: g3 X-squared = 14.9503, df = NA, p-value = 0.004000 > > > > > cleanEx(); ..nameEx <- "LD" > > ### * LD > > flush(stderr()); flush(stdout()) > > ### Name: LD > ### Title: Pairwise linkage disequilibrium between genetic markers. > ### Aliases: LD LD.genotype LD.data.frame > ### Keywords: misc > > ### ** Examples > > > g1 <- genotype( c('T/A', NA, 'T/T', NA, 'T/A', NA, 'T/T', 'T/A', + 'T/T', 'T/T', 'T/A', 'A/A', 'T/T', 'T/A', 'T/A', 'T/T', + NA, 'T/A', 'T/A', NA) ) > > g2 <- genotype( c('C/A', 'C/A', 'C/C', 'C/A', 'C/C', 'C/A', 'C/A', 'C/A', + 'C/A', 'C/C', 'C/A', 'A/A', 'C/A', 'A/A', 'C/A', 'C/C', + 'C/A', 'C/A', 'C/A', 'A/A') ) > > g3 <- genotype( c('T/A', 'T/A', 'T/T', 'T/A', 'T/T', 'T/A', 'T/A', 'T/A', + 'T/A', 'T/T', 'T/A', 'T/T', 'T/A', 'T/A', 'T/A', 'T/T', + 'T/A', 'T/A', 'T/A', 'T/T') ) > > # Compute LD on a single pair > > LD(g1,g2) Pairwise LD ----------- D D' Corr Estimates: 0.1419402 0.8110866 0.6029553 X^2 P-value N LD Test: 10.90665 0.0009581958 15 > > # Compute LD table for all 3 genotypes > > data <- makeGenotypes(data.frame(g1,g2,g3)) > LD(data) Pairwise LD ----------- g2 g3 g1 D 0.1419402 0.0899457 g1 D' 0.8110866 0.4151342 g1 Corr. 0.6029553 0.4000333 g1 X^2 10.9066513 4.8007990 g1 P-value 0.0009581958 0.02844654 g1 n 15 15 g2 D 0.1836927 g2 D' 0.9996881 g2 Corr. 0.7712137 g2 X^2 23.7908198 g2 P-value 1.073934e-06 g2 n 20 > > > > cleanEx(); ..nameEx <- "binsearch" > > ### * binsearch > > flush(stderr()); flush(stdout()) > > ### Name: binsearch > ### Title: Binary Search > ### Aliases: binsearch > ### Keywords: optimize programming > > ### ** Examples > > > ### Toy examples > > # search for x=10 > binsearch( function(x) x-10, range=c(0,20) ) $call binsearch(fun = function(x) x - 10, range = c(0, 20)) $numiter [1] 1 $flag [1] "Found" $where [1] 10 $value [1] 0 > > # search for x=10.1 > binsearch( function(x) x-10.1, range=c(0,20) ) $call binsearch(fun = function(x) x - 10.1, range = c(0, 20)) $numiter [1] 5 $flag [1] "Between Elements" $where [1] 10 11 $value [1] -0.1 0.9 > > ### Classical toy example > > # binary search for the index of 'M' among the sorted letters > fun <- function(X) ifelse(LETTERS[X] > 'M', 1, + ifelse(LETTERS[X] < 'M', -1, 0 ) ) > > binsearch( fun, range=1:26 ) $call binsearch(fun = fun, range = 1:26) $numiter [1] 5 $flag [1] "Found" $where [1] 13 $value [1] 0 > # returns $where=13 > LETTERS[13] [1] "M" > > ### Substantive example, from genetics > > # Determine the necessary sample size to detect all alleles with > # frequency 0.07 or greater with probability 0.95. > power.fun <- function(N) 1 - gregorius(N=N, freq=0.07)$missprob > > binsearch( power.fun, range=c(0,100), target=0.95 ) $call binsearch(fun = power.fun, range = c(0, 100), target = 0.95) $numiter [1] 8 $flag [1] "Between Elements" $where [1] 76 77 $value [1] 0.9464850 0.9502088 > > # equivalent to > gregorius( freq=0.07, missprob=0.05) $call gregorius(freq = 0.07, missprob = 0.05) $method [1] "Determine minimal N given missprob and freq" $freq [1] 0.07 $N [1] 77 $missprob [1] 0.04979116 > > > > cleanEx(); ..nameEx <- "ci.balance" > > ### * ci.balance > > flush(stderr()); flush(stdout()) > > ### Name: ci.balance > ### Title: Experimental Function to Correct Confidence Intervals At or Near > ### Boundaries of the Parameter Space by 'Sliding' the Interval on the > ### Quantile Scale. > ### Aliases: ci.balance > ### Keywords: misc > > ### ** Examples > > # These are nonsensical examples which simply exercise the > # computation. See the code to diseq.ci for a real example. > # > # FIXME: Add real example using boot or bootstrap. > > set.seed(7981357) > x <- abs(rnorm(100,1)) > ci.balance(x,1, minval=0) Warning in return(ci = ci, overflow.upper = overflow.upper, overflow.lower = overflow.lower, : multi-argument returns are deprecated $ci 2% 98% 0.0584496 3.0860422 $overflow.upper [1] 0 $overflow.lower [1] 0 $n.above [1] 50 $n.below [1] 50 $lower.n [1] 2 $upper.n [1] 98 > ci.balance(x,1) Warning in return(ci = ci, overflow.upper = overflow.upper, overflow.lower = overflow.lower, : multi-argument returns are deprecated $ci 2% 98% 0.0584496 3.0860422 $overflow.upper [1] 0 $overflow.lower [1] 0 $n.above [1] 50 $n.below [1] 50 $lower.n [1] 2 $upper.n [1] 98 > > x <- rnorm(100,1) > x <- ifelse(x>1, 1, x) > ci.balance(x,1, maxval=1) Warning in return(ci = ci, overflow.upper = overflow.upper, overflow.lower = overflow.lower, : multi-argument returns are deprecated $ci 5% Upper Boundary -0.6396774 1.0000000 $overflow.upper [1] 25.5 $overflow.lower [1] 0 $n.above [1] 22 $n.below [1] 78 $lower.n [1] 5 $upper.n [1] "Upper Boundary" > ci.balance(x,1) Warning in return(ci = ci, overflow.upper = overflow.upper, overflow.lower = overflow.lower, : multi-argument returns are deprecated $ci 5% max(x) -0.6396774 1.0000000 $overflow.upper [1] 25.5 $overflow.lower [1] 0 $n.above [1] 22 $n.below [1] 78 $lower.n [1] 5 $upper.n [1] "max(x)" > > > > cleanEx(); ..nameEx <- "diseq" > > ### * diseq > > flush(stderr()); flush(stdout()) > > ### Name: diseq > ### Title: Estimate or Compute Confidence Interval for the Single-Marker > ### Disequilibrium > ### Aliases: diseq diseq.table diseq.genotype diseq.ci print.diseq > ### Keywords: misc > > ### ** Examples > > ## Don't show: > set.seed(7981357) > ## End Don't show > example.data <- c("D/D","D/I","D/D","I/I","D/D", + "D/D","D/D","D/D","I/I","") > g1 <- genotype(example.data) > g1 [1] "D/D" "D/I" "D/D" "I/I" "D/D" "D/D" "D/D" "D/D" "I/I" NA Alleles: D I > > diseq(g1) Call: diseq.genotype(x = g1) Disequlibrium for each allele pair (D) D I D -0.1450617 I -0.1450617 Disequlibrium for each allele pair (D') D I D -1.88 I -1.88 Correlation coefficient for each allele pair (r) D I D 0.7230769 I 0.7230769 Observed vs Expected frequency table Obs Exp Obs-Exp D/D 0.66666667 0.5216049 0.1450617 I/D 0.05555556 0.2006173 -0.1450617 D/I 0.05555556 0.2006173 -0.1450617 I/I 0.22222222 0.0771605 0.1450617 Overall Values D : -0.1450617 D': -1.88 r : 0.7230769 > diseq.ci(g1) Warning in diseq.ci(g1) : NAs returned from diseq call > HWE.test(g1) # does the same, plus tests D-hat=0 Warning in diseq.ci(x, R = ci.B, conf = conf) : NAs returned from diseq call ----------------------------------- Test for Hardy-Weinberg-Equilibrium ----------------------------------- Call: HWE.test.genotype(x = g1) Raw Disequlibrium for each allele pair (D) D I D -0.1450617 I -0.1450617 Scaled Disequlibrium for each allele pair (D') D I D -1.88 I -1.88 Correlation coefficient for each allele pair (r) D I D 0.7230769 I 0.7230769 Overall Values Value D -0.1450617 D' -1.8800000 r 0.7230769 Confidence intervals computed via bootstrap using 1000 samples Observed 95% CI NA's Contains Zero? Overall D -0.14506173 (-0.24691358, 0.01234568) 0 YES Overall D' -1.88000000 (-8.00000000, 0.12500000) 20 YES Overall r 0.72307692 (-0.12500000, 1.00000000) 20 YES Significance Test: Exact Test for Hardy-Weinberg Equilibrium data: g1 N11 = 6, N12 = 1, N22 = 2, N1 = 13, N2 = 5, p-value = 0.05882 > > three.data <- c(rep("A/A",8), + rep("C/A",20), + rep("C/T",20), + rep("C/C",10), + rep("T/T",3)) > > g3 <- genotype(three.data) > g3 [1] "A/A" "A/A" "A/A" "A/A" "A/A" "A/A" "A/A" "A/A" "C/A" "C/A" "C/A" "C/A" [13] "C/A" "C/A" "C/A" "C/A" "C/A" "C/A" "C/A" "C/A" "C/A" "C/A" "C/A" "C/A" [25] "C/A" "C/A" "C/A" "C/A" "C/T" "C/T" "C/T" "C/T" "C/T" "C/T" "C/T" "C/T" [37] "C/T" "C/T" "C/T" "C/T" "C/T" "C/T" "C/T" "C/T" "C/T" "C/T" "C/T" "C/T" [49] "C/C" "C/C" "C/C" "C/C" "C/C" "C/C" "C/C" "C/C" "C/C" "C/C" "T/T" "T/T" [61] "T/T" Alleles: C A T > > diseq(g3) Call: diseq.genotype(x = g3) Disequlibrium for each allele pair (D) C A T C 0.01881215 0.05912389 A 0.01881215 -0.06288632 T 0.05912389 -0.06288632 Disequlibrium for each allele pair (D') C A T C 0.1296296 0.5641026 A 0.1296296 -0.4186047 T 0.5641026 -0.4186047 Correlation coefficient for each allele pair (r) C A T C -0.0825061 -0.2887945 A -0.0825061 0.3367077 T -0.2887945 0.3367077 Observed vs Expected frequency table Obs Exp Obs-Exp C/C 0.16393443 0.24187046 -0.077936039 A/C 0.16393443 0.14512228 0.018812147 T/C 0.16393443 0.10481053 0.059123891 C/A 0.16393443 0.14512228 0.018812147 A/A 0.13114754 0.08707337 0.044074174 T/A 0.00000000 0.06288632 -0.062886321 C/T 0.16393443 0.10481053 0.059123891 A/T 0.00000000 0.06288632 -0.062886321 T/T 0.04918033 0.04541790 0.003762429 Overall Values (mean absolute-value weighted by expected allele frequency) D : 0.04117893 D': 0.3332934 r : 0.2027257 > diseq.ci(g3, ci.B=10000, ci.type="bca") Warning in return(ci = ci, overflow.upper = overflow.upper, overflow.lower = overflow.lower, : multi-argument returns are deprecated Warning in return(ci = ci, overflow.upper = overflow.upper, overflow.lower = overflow.lower, : multi-argument returns are deprecated Warning in return(ci = ci, overflow.upper = overflow.upper, overflow.lower = overflow.lower, : multi-argument returns are deprecated Warning in diseq.ci(g3, ci.B = 10000, ci.type = "bca") : For more than two alleles, overall disequlibrium statistics are bounded between [0,1]. Because of this, confidence intervals for values near 0 and 1 are ill-behaved. A rough correction has been applied, but the intervals still may not be correct for values near 0 or 1. > > # only show observed vs expected table > print(diseq(g3),show='table') Call: diseq.genotype(x = g3) Observed vs Expected frequency table Obs Exp Obs-Exp C/C 0.16393443 0.24187046 -0.077936039 A/C 0.16393443 0.14512228 0.018812147 T/C 0.16393443 0.10481053 0.059123891 C/A 0.16393443 0.14512228 0.018812147 A/A 0.13114754 0.08707337 0.044074174 T/A 0.00000000 0.06288632 -0.062886321 C/T 0.16393443 0.10481053 0.059123891 A/T 0.00000000 0.06288632 -0.062886321 T/T 0.04918033 0.04541790 0.003762429 > > > > > cleanEx(); ..nameEx <- "expectedGenotypes" > > ### * expectedGenotypes > > flush(stderr()); flush(stdout()) > > ### Name: expectedGenotypes > ### Title: Construct expected genotypes according to known allele variants > ### Aliases: expectedGenotypes > ### Keywords: manip > > ### ** Examples > > ## Not run: Scrapie example > > scrapie <- c("ARQ/ARQ", "ARQ/ARQ", "ARR/ARQ", "AHQ/ARQ", "ARQ/ARQ") > expectedGenotypes(as.genotype(scrapie)) [1] "AHQ/AHQ" "AHQ/ARQ" "AHQ/ARR" "ARQ/ARQ" "ARQ/ARR" "ARR/ARR" > expectedGenotypes(alleles=c("ARR", "AHQ", "ARH", "ARQ", "VRR", "VRQ")) [1] "AHQ/AHQ" "AHQ/ARH" "AHQ/ARQ" "AHQ/ARR" "AHQ/VRQ" "AHQ/VRR" "ARH/ARH" [8] "ARH/ARQ" "ARH/ARR" "ARH/VRQ" "ARH/VRR" "ARQ/ARQ" "ARQ/ARR" "ARQ/VRQ" [15] "ARQ/VRR" "ARR/ARR" "ARR/VRQ" "ARR/VRR" "VRQ/VRQ" "VRQ/VRR" "VRR/VRR" > scrapie <- genotype(scrapie, + alleles=c("ARR", "AHQ", "ARH", "ARQ", "VRR", "VRQ"), + reorder="yes") > expectedGenotypes(scrapie) [1] "AHQ/AHQ" "AHQ/ARH" "AHQ/ARQ" "AHQ/ARR" "AHQ/VRQ" "AHQ/VRR" "ARH/ARH" [8] "ARH/ARQ" "ARH/ARR" "ARH/VRQ" "ARH/VRR" "ARQ/ARQ" "ARQ/ARR" "ARQ/VRQ" [15] "ARQ/VRR" "ARR/ARR" "ARR/VRQ" "ARR/VRR" "VRQ/VRQ" "VRQ/VRR" "VRR/VRR" > > > > > cleanEx(); ..nameEx <- "genotype" > > ### * genotype > > flush(stderr()); flush(stdout()) > > ### Name: genotype > ### Title: Genotype or Haplotype Objects. > ### Aliases: genotype haplotype is.genotype is.haplotype as.genotype > ### as.haplotype print.genotype ==.genotype ==.haplotype [.genotype > ### [.haplotype [<-.genotype [<-.haplotype heterozygote.genotype > ### homozygote.genotype print.allele.count print.allele.genotype > ### allele.count.genotype as.genotype.allele.count as.genotype.character > ### as.genotype.default as.genotype.factor as.genotype.genotype > ### as.genotype.haplotype as.genotype.table nallele > ### Keywords: misc > > ### ** Examples > > # several examples of genotype data in different formats > example.data <- c("D/D","D/I","D/D","I/I","D/D", + "D/D","D/D","D/D","I/I","") > g1 <- genotype(example.data) > g1 [1] "D/D" "D/I" "D/D" "I/I" "D/D" "D/D" "D/D" "D/D" "I/I" NA Alleles: D I > > example.data2 <- c("C-C","C-T","C-C","T-T","C-C", + "C-C","C-C","C-C","T-T","") > g2 <- genotype(example.data2,sep="-") > g2 [1] "C/C" "C/T" "C/C" "T/T" "C/C" "C/C" "C/C" "C/C" "T/T" NA Alleles: C T > > example.nosep <- c("DD", "DI", "DD", "II", "DD", + "DD", "DD", "DD", "II", "") > g3 <- genotype(example.nosep,sep="") > g3 [1] "D/D" "D/I" "D/D" "I/I" "D/D" "D/D" "D/D" "D/D" "I/I" NA Alleles: D I > > example.a1 <- c("D", "D", "D", "I", "D", "D", "D", "D", "I", "") > example.a2 <- c("D", "I", "D", "I", "D", "D", "D", "D", "I", "") > g4 <- genotype(example.a1,example.a2) > g4 [1] "D/D" "D/I" "D/D" "I/I" "D/D" "D/D" "D/D" "D/D" "I/I" NA Alleles: D I > > example.mat <- cbind(a1=example.a1, a1=example.a2) > g5 <- genotype(example.mat) > g5 [1] "D/D" "D/I" "D/D" "I/I" "D/D" "D/D" "D/D" "D/D" "I/I" NA Alleles: D I > > example.data5 <- c("D / D","D / I","D / D","I / I", + "D / D","D / D","D / D","D / D", + "I / I","") > g5 <- genotype(example.data5,rem=TRUE) > g5 [1] "D/D" "D/I" "D/D" "I/I" "D/D" "D/D" "D/D" "D/D" "I/I" NA Alleles: D I > > # show how genotype and haplotype differ > data1 <- c("C/C", "C/T", "T/C") > data2 <- c("C/C", "T/C", "T/C") > > test1 <- genotype( data1 ) > test2 <- genotype( data2 ) > > test3 <- haplotype( data1 ) > test4 <- haplotype( data2 ) > > test1==test2 [1] TRUE TRUE TRUE > test3==test4 [1] TRUE FALSE TRUE > > test1=="C/T" [1] FALSE TRUE TRUE > test1=="T/C" [1] FALSE TRUE TRUE > > test3=="C/T" [1] FALSE TRUE FALSE > test3=="T/C" [1] FALSE FALSE TRUE > > ## "Messy" example > > m3 <- c("D D/\t D D","D\tD/ I", "D D/ D D","I/ I", + "D D/ D D","D D/ D D","D D/ D D","D D/ D D", + "I/ I","/ ","/I") > > genotype(m3) [1] "DD/DD" "DD/I" "DD/DD" "I/I" "DD/DD" "DD/DD" "DD/DD" "DD/DD" "I/I" [10] NA NA Alleles: DD I > summary(genotype(m3)) Number of samples typed: 9 (81.8%) Allele Frequency: (2 alleles) Count Proportion DD 13 0.72 I 5 0.28 NA 4 NA Genotype Frequency: Count Proportion DD/DD 6 0.67 DD/I 1 0.11 I/I 2 0.22 NA 2 NA Heterozygosity (Hu) = 0.4513889 Poly. Inf. Content = 0.32074 > > m4 <- c("D D","D I","D D","I I", + "D D","D D","D D","D D", + "I I"," "," I") > > genotype(m4,sep=1) [1] "D/D" "D/I" "D/D" "I/I" "D/D" "D/D" "D/D" "D/D" "I/I" NA NA Alleles: D I > genotype(m4,sep=" ",remove.spaces=FALSE) [1] "D/D" "D/I" "D/D" "I/I" "D/D" "D/D" "D/D" "D/D" "I/I" NA NA NA Alleles: D I > summary(genotype(m4,sep=" ",remove.spaces=FALSE)) Number of samples typed: 9 (75%) Allele Frequency: (2 alleles) Count Proportion D 13 0.72 I 5 0.28 NA 6 NA Genotype Frequency: Count Proportion D/D 6 0.67 D/I 1 0.11 I/I 2 0.22 NA 3 NA Heterozygosity (Hu) = 0.4513889 Poly. Inf. Content = 0.32074 > > m5 <- c("DD","DI","DD","II", + "DD","DD","DD","DD", + "II"," "," I") > genotype(m5,sep=1) [1] "D/D" "D/I" "D/D" "I/I" "D/D" "D/D" "D/D" "D/D" "I/I" NA NA Alleles: D I > haplotype(m5,sep=1,remove.spaces=FALSE) [1] "D/D" "D/I" "D/D" "I/I" "D/D" "D/D" "D/D" "D/D" "I/I" NA NA Alleles: D I > > g5 <- genotype(m5,sep="") > h5 <- haplotype(m5,sep="") > > heterozygote(g5) [1] FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE NA NA > homozygote(g5) [1] TRUE FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE NA NA > carrier(g5,"D") [1] TRUE TRUE TRUE FALSE TRUE TRUE TRUE TRUE FALSE NA NA > > g5[9:10] <- haplotype(m4,sep=" ",remove=FALSE)[1:2] > g5 [1] "D/D" "D/I" "D/D" "I/I" "D/D" "D/D" "D/D" "D/D" "D/D" "D/I" NA Alleles: D I > > g5[9:10] [1] "D/D" "D/I" Alleles: D I > allele(g5[9:10],1) [1] "D" "D" attr(,"which") [1] 1 attr(,"allele.names") [1] "D" "I" > allele(g5,1)[9:10] [1] "D" "D" > > # drop unused alleles > g5[9:10,drop=TRUE] [1] "D/D" "D/I" Alleles: D I > h5[9:10,drop=TRUE] [1] "I/I" NA Alleles: I > > # Convert allele.counts into genotype > > x <- c(0,1,2,1,1,2,NA,1,2,1,2,2,2) > g <- as.genotype.allele.count(x, alleles=c("C","T") ) Warning in genotype(sapply(tmp, paste, collapse = "/"), alleles = alleles, : Found data values not matching specified alleles. Converting to NA. > g [1] "T/T" "C/T" "C/C" "C/T" "C/T" "C/C" NA "C/T" "C/C" "C/T" "C/C" "C/C" [13] "C/C" Alleles: C T > > > > > cleanEx(); ..nameEx <- "gregorius" > > ### * gregorius > > flush(stderr()); flush(stdout()) > > ### Name: gregorius > ### Title: Probability of Observing All Alleles with a Given Frequency in a > ### Sample of a Specified Size. > ### Aliases: gregorius > ### Keywords: misc > > ### ** Examples > > > # Compute the probability of missing an allele with frequency 0.15 when > # 20 genotypes are sampled: > gregorius(freq=0.15, N=20) $call gregorius(freq = 0.15, N = 20) $method [1] "Compute missprob given N and freq" $freq [1] 0.15 $N [1] 20 $missprob [1] 0.1938351 > > # Determine what sample size is required to observe all alleles with true > # frequency 0.15 with probability 0.95 > gregorius(freq=0.15, missprob=1-0.95) $call gregorius(freq = 0.15, missprob = 1 - 0.95) $method [1] "Determine minimal N given missprob and freq" $freq [1] 0.15 $N [1] 29 $missprob [1] 0.04520557 > > > > > cleanEx(); ..nameEx <- "homozygote" > > ### * homozygote > > flush(stderr()); flush(stdout()) > > ### Name: homozygote > ### Title: Extract Features of Genotype objects > ### Aliases: homozygote heterozygote carrier carrier.genotype allele > ### allele.count allele.names > ### Keywords: misc > > ### ** Examples > > > example.data <- c("D/D","D/I","D/D","I/I","D/D","D/D","D/D","D/D","I/I","") > g1 <- genotype(example.data) > g1 [1] "D/D" "D/I" "D/D" "I/I" "D/D" "D/D" "D/D" "D/D" "I/I" NA Alleles: D I > > heterozygote(g1) [1] FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE NA > homozygote(g1) [1] TRUE FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE NA > > carrier(g1,"D") [1] TRUE TRUE TRUE FALSE TRUE TRUE TRUE TRUE FALSE NA > carrier(g1,"D",na.rm=TRUE) [1] TRUE TRUE TRUE FALSE TRUE TRUE TRUE TRUE FALSE FALSE > > # get count of one allele > allele.count(g1,"D") [1] 2 1 2 0 2 2 2 2 0 NA attr(,"allele") [1] "D" > > # get count of each allele > allele.count(g1) # equivalent to D I [1,] 2 0 [2,] 1 1 [3,] 2 0 [4,] 0 2 [5,] 2 0 [6,] 2 0 [7,] 2 0 [8,] 2 0 [9,] 0 2 [10,] NA NA > allele.count(g1, c("D","I"), any=FALSE) D I [1,] 2 0 [2,] 1 1 [3,] 2 0 [4,] 0 2 [5,] 2 0 [6,] 2 0 [7,] 2 0 [8,] 2 0 [9,] 0 2 [10,] NA NA > > # get combined count for both alleles > allele.count(g1,c("I","D")) [1] 2 2 2 2 2 2 2 2 2 NA > > # get second allele > allele(g1,2) [1] "D" "I" "D" "I" "D" "D" "D" "D" "I" NA attr(,"which") [1] 2 attr(,"allele.names") [1] "D" "I" > > # get both alleles > allele(g1) [,1] [,2] [1,] "D" "D" [2,] "D" "I" [3,] "D" "D" [4,] "I" "I" [5,] "D" "D" [6,] "D" "D" [7,] "D" "D" [8,] "D" "D" [9,] "I" "I" [10,] NA NA attr(,"which") [1] 1 2 attr(,"allele.names") [1] "D" "I" > > > > > cleanEx(); ..nameEx <- "locus" > > ### * locus > > flush(stderr()); flush(stdout()) > > ### Name: locus > ### Title: Create and Manipulate Locus, Gene, and Marker Objects > ### Aliases: locus gene marker is.gene is.locus is.marker print.gene > ### print.locus print.marker as.character.locus as.character.gene > ### as.character.marker getlocus getmarker getgene locus<- marker<- > ### gene<- > ### Keywords: misc > > ### ** Examples > > ar2 <- gene("AR2",chromosome=7,arm="q",index.start=35) > ar2 Gene: AR2 (7q35) > > par <- locus(name="AR2 Psedogene", + chromosome=1, + arm="q", + index.start=32, + index.end=42) > par Locus: AR2 Psedogene (1q32-42) > > c109t <- marker(name="C-109T", + type="SNP", + locus.name="AR2", + chromosome=7, + arm="q", + index.start=35, + bp.start=-109, + relative.to="start of coding region") > c109t Marker: AR2:C-109T (7q35:-109) Type: SNP > > c109t <- marker(name="C-109T", + type="SNP", + locus=ar2, + bp.start=-109, + relative.to="start of coding region") > c109t Marker: AR2:C-109T (7q35:-109) Type: SNP > > > > example.data <- c("D/D","D/I","D/D","I/I","D/D", + "D/D","D/D","D/D","I/I","") > g1 <- genotype(example.data, locus=ar2) > g1 Gene: AR2 (7q35) [1] "D/D" "D/I" "D/D" "I/I" "D/D" "D/D" "D/D" "D/D" "I/I" NA Alleles: D I > > getlocus(g1) Gene: AR2 (7q35) > > summary(g1) Gene: AR2 (7q35) Number of samples typed: 9 (90%) Allele Frequency: (2 alleles) Count Proportion D 13 0.72 I 5 0.28 NA 2 NA Genotype Frequency: Count Proportion D/D 6 0.67 D/I 1 0.11 I/I 2 0.22 NA 1 NA Heterozygosity (Hu) = 0.4513889 Poly. Inf. Content = 0.32074 > HWE.test(g1) Warning in diseq.ci(x, R = ci.B, conf = conf) : NAs returned from diseq call ----------------------------------- Test for Hardy-Weinberg-Equilibrium ----------------------------------- Call: HWE.test.genotype(x = g1) Raw Disequlibrium for each allele pair (D) D I D -0.1450617 I -0.1450617 Scaled Disequlibrium for each allele pair (D') D I D -1.88 I -1.88 Correlation coefficient for each allele pair (r) D I D 0.7230769 I 0.7230769 Overall Values Value D -0.1450617 D' -1.8800000 r 0.7230769 Confidence intervals computed via bootstrap using 1000 samples Observed 95% CI NA's Contains Zero? Overall D -0.14506173 (-0.24691358, 0.01234568) 0 YES Overall D' -1.88000000 (-8.00000000, 0.12500000) 22 YES Overall r 0.72307692 (-0.12500000, 1.00000000) 22 YES Significance Test: Exact Test for Hardy-Weinberg Equilibrium data: g1 N11 = 6, N12 = 1, N22 = 2, N1 = 13, N2 = 5, p-value = 0.05882 > > g2 <- genotype(example.data, locus=c109t) > summary(g2) Marker: AR2:C-109T (7q35:-109) Type: SNP Number of samples typed: 9 (90%) Allele Frequency: (2 alleles) Count Proportion D 13 0.72 I 5 0.28 NA 2 NA Genotype Frequency: Count Proportion D/D 6 0.67 D/I 1 0.11 I/I 2 0.22 NA 1 NA Heterozygosity (Hu) = 0.4513889 Poly. Inf. Content = 0.32074 > > getlocus(g2) Marker: AR2:C-109T (7q35:-109) Type: SNP > > heterozygote(g2) [1] FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE NA attr(,"locus") Marker: AR2:C-109T (7q35:-109) Type: SNP > homozygote(g1) [1] TRUE FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE NA attr(,"locus") Gene: AR2 (7q35) > > allele(g1,1) [1] "D" "D" "D" "I" "D" "D" "D" "D" "I" NA attr(,"locus") Gene: AR2 (7q35) attr(,"which") [1] 1 attr(,"allele.names") [1] "D" "I" > > carrier(g1,"I") [1] FALSE TRUE FALSE TRUE FALSE FALSE FALSE FALSE TRUE NA attr(,"locus") Gene: AR2 (7q35) > > heterozygote(g2) [1] FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE NA attr(,"locus") Marker: AR2:C-109T (7q35:-109) Type: SNP > > > > cleanEx(); ..nameEx <- "makeGenotypes" > > ### * makeGenotypes > > flush(stderr()); flush(stdout()) > > ### Name: makeGenotypes > ### Title: Convert columns in a dataframe to genotypes or haplotypes > ### Aliases: makeGenotypes makeHaplotypes > ### Keywords: misc > > ### ** Examples > > ## Not run: > ##D # common case > ##D data <- read.csv(file="genotype_data.csv") > ##D data <- makeGenotypes(data) > ## End(Not run) > > # Create a test data set where there are several genotypes in columns > # of the form "A/T". > test1 <- data.frame(Tmt=sample(c("Control","Trt1","Trt2"),20, replace=TRUE), + G1=sample(c("A/T","T/T","T/A",NA),20, replace=TRUE), + N1=rnorm(20), + I1=sample(1:100,20,replace=TRUE), + G2=paste(sample(c("134","138","140","142","146"),20, + replace=TRUE), + sample(c("134","138","140","142","146"),20, + replace=TRUE), + sep=" / "), + G3=sample(c("A /T","T /T","T /A"),20, replace=TRUE), + comment=sample(c("Possible Bad Data/Lab Error",""),20, + rep=TRUE) + ) > test1 Tmt G1 N1 I1 G2 G3 comment 1 Control 0.91897737 44 142 / 146 T /A Possible Bad Data/Lab Error 2 Trt1 A/T 0.78213630 72 138 / 140 T /T 3 Trt1 T/A 0.07456498 40 138 / 140 A /T Possible Bad Data/Lab Error 4 Trt2 A/T -1.98935170 33 146 / 134 A /T 5 Control T/T 0.61982575 76 142 / 142 T /A 6 Trt2 T/T -0.05612874 21 138 / 140 T /T Possible Bad Data/Lab Error 7 Trt2 A/T -0.15579551 72 134 / 140 A /T Possible Bad Data/Lab Error 8 Trt1 T/T -1.47075238 13 140 / 138 T /A Possible Bad Data/Lab Error 9 Trt1 -0.47815006 25 146 / 138 A /T 10 Control T/T 0.41794156 15 140 / 140 T /A Possible Bad Data/Lab Error 11 Control T/T 1.35867955 24 146 / 140 T /T 12 Control T/A -0.10278773 6 142 / 134 T /T 13 Trt2 T/T 0.38767161 65 138 / 134 A /T 14 Trt1 A/T -0.05380504 88 140 / 142 T /T Possible Bad Data/Lab Error 15 Trt2 -1.37705956 78 134 / 146 T /T Possible Bad Data/Lab Error 16 Trt1 T/A -0.41499456 80 134 / 140 A /T 17 Trt2 -0.39428995 46 142 / 140 T /T 18 Trt2 A/T -0.05931340 42 134 / 140 A /T 19 Trt1 T/A 1.10002537 82 140 / 146 A /T 20 Trt2 T/T 0.76317575 61 142 / 140 A /T > > # now automatically convert genotype columns > geno1 <- makeGenotypes(test1) > geno1 Tmt G1 N1 I1 G2 G3 comment 1 Control 0.91897737 44 142/146 T/A Possible Bad Data/Lab Error 2 Trt1 T/A 0.78213630 72 140/138 T/T 3 Trt1 T/A 0.07456498 40 140/138 T/A Possible Bad Data/Lab Error 4 Trt2 T/A -1.98935170 33 134/146 T/A 5 Control T/T 0.61982575 76 142/142 T/A 6 Trt2 T/T -0.05612874 21 140/138 T/T Possible Bad Data/Lab Error 7 Trt2 T/A -0.15579551 72 140/134 T/A Possible Bad Data/Lab Error 8 Trt1 T/T -1.47075238 13 140/138 T/A Possible Bad Data/Lab Error 9 Trt1 -0.47815006 25 146/138 T/A 10 Control T/T 0.41794156 15 140/140 T/A Possible Bad Data/Lab Error 11 Control T/T 1.35867955 24 140/146 T/T 12 Control T/A -0.10278773 6 142/134 T/T 13 Trt2 T/T 0.38767161 65 134/138 T/A 14 Trt1 T/A -0.05380504 88 140/142 T/T Possible Bad Data/Lab Error 15 Trt2 -1.37705956 78 134/146 T/T Possible Bad Data/Lab Error 16 Trt1 T/A -0.41499456 80 140/134 T/A 17 Trt2 -0.39428995 46 140/142 T/T 18 Trt2 T/A -0.05931340 42 140/134 T/A 19 Trt1 T/A 1.10002537 82 140/146 T/A 20 Trt2 T/T 0.76317575 61 140/142 T/A > > # Create a test data set where there are several haplotypes with alleles > # in adjacent columns. > test2 <- data.frame(Tmt=sample(c("Control","Trt1","Trt2"),20, replace=TRUE), + G1.1=sample(c("A","T",NA),20, replace=TRUE), + G1.2=sample(c("A","T",NA),20, replace=TRUE), + N1=rnorm(20), + I1=sample(1:100,20,replace=TRUE), + G2.1=sample(c("134","138","140","142","146"),20, + replace=TRUE), + G2.2=sample(c("134","138","140","142","146"),20, + replace=TRUE), + G3.1=sample(c("A ","T ","T "),20, replace=TRUE), + G3.2=sample(c("A ","T ","T "),20, replace=TRUE), + comment=sample(c("Possible Bad Data/Lab Error",""),20, + rep=TRUE) + ) > test2 Tmt G1.1 G1.2 N1 I1 G2.1 G2.2 G3.1 G3.2 1 Control A A -0.50595746 3 142 142 T A 2 Control A A 1.34303883 53 134 138 A A 3 Trt2 T A -0.21457941 89 140 140 T T 4 Trt1 A T -0.17955653 38 140 146 T A 5 Trt2 A -0.10019074 5 138 146 T T 6 Control T T 0.71266631 14 146 146 A T 7 Trt2 T A -0.07356440 33 134 146 T A 8 Trt2 A A -0.03763417 16 146 142 A T 9 Trt2 A T -0.68166048 14 134 138 T T 10 Trt1 -0.32427027 23 140 142 T T 11 Trt2 T 0.06016044 23 134 146 T T 12 Trt1 A A -0.58889449 14 134 138 A T 13 Control T 0.53149619 99 146 138 T T 14 Trt2 T -1.51839408 33 142 146 T A 15 Control T 0.30655786 51 138 134 T A 16 Trt1 A -1.53644982 69 140 138 A T 17 Control T -0.30097613 10 134 140 T T 18 Trt2 T -0.52827990 12 138 134 A T 19 Control T -0.65209478 6 146 140 T A 20 Trt2 T T -0.05689678 93 142 146 T T comment 1 Possible Bad Data/Lab Error 2 3 4 5 Possible Bad Data/Lab Error 6 Possible Bad Data/Lab Error 7 8 9 Possible Bad Data/Lab Error 10 11 Possible Bad Data/Lab Error 12 Possible Bad Data/Lab Error 13 14 15 Possible Bad Data/Lab Error 16 Possible Bad Data/Lab Error 17 18 Possible Bad Data/Lab Error 19 Possible Bad Data/Lab Error 20 Possible Bad Data/Lab Error > > # specifly the locations of the columns to be paired for haplotypes > makeHaplotypes(test2, convert=list(c("G1.1","G1.2"),6:7,8:9)) Tmt G1.1/G1.2 N1 I1 G2.1/G2.2 G3.1/G3.2 1 Control A/A -0.50595746 3 142/142 T/A 2 Control A/A 1.34303883 53 134/138 A/A 3 Trt2 T/A -0.21457941 89 140/140 T/T 4 Trt1 A/T -0.17955653 38 140/146 T/A 5 Trt2 -0.10019074 5 138/146 T/T 6 Control T/T 0.71266631 14 146/146 A/T 7 Trt2 T/A -0.07356440 33 134/146 T/A 8 Trt2 A/A -0.03763417 16 146/142 A/T 9 Trt2 A/T -0.68166048 14 134/138 T/T 10 Trt1 -0.32427027 23 140/142 T/T 11 Trt2 0.06016044 23 134/146 T/T 12 Trt1 A/A -0.58889449 14 134/138 A/T 13 Control 0.53149619 99 146/138 T/T 14 Trt2 -1.51839408 33 142/146 T/A 15 Control 0.30655786 51 138/134 T/A 16 Trt1 -1.53644982 69 140/138 A/T 17 Control -0.30097613 10 134/140 T/T 18 Trt2 -0.52827990 12 138/134 A/T 19 Control -0.65209478 6 146/140 T/A 20 Trt2 T/T -0.05689678 93 142/146 T/T comment 1 Possible Bad Data/Lab Error 2 3 4 5 Possible Bad Data/Lab Error 6 Possible Bad Data/Lab Error 7 8 9 Possible Bad Data/Lab Error 10 11 Possible Bad Data/Lab Error 12 Possible Bad Data/Lab Error 13 14 15 Possible Bad Data/Lab Error 16 Possible Bad Data/Lab Error 17 18 Possible Bad Data/Lab Error 19 Possible Bad Data/Lab Error 20 Possible Bad Data/Lab Error > > # Create a test data set where the data is coded as numeric allele > # counts (0-2). > test3 <- data.frame(Tmt=sample(c("Control","Trt1","Trt2"),20, replace=TRUE), + G1=sample(c(0:2,NA),20, replace=TRUE), + N1=rnorm(20), + I1=sample(1:100,20,replace=TRUE), + G2=sample(0:2,20, replace=TRUE), + comment=sample(c("Possible Bad Data/Lab Error",""),20, + rep=TRUE) + ) > test3 Tmt G1 N1 I1 G2 comment 1 Trt1 1 -1.733218407 77 1 2 Control 2 0.002131860 16 2 3 Trt2 2 -0.630300334 85 1 Possible Bad Data/Lab Error 4 Trt2 0 -0.340968580 95 1 Possible Bad Data/Lab Error 5 Trt2 1 -1.156572363 59 1 Possible Bad Data/Lab Error 6 Trt2 1 1.803141908 51 2 7 Trt1 0 -0.331132036 19 1 8 Trt2 NA -1.605513412 1 2 9 Control NA 0.197193439 88 2 10 Trt2 NA 0.263175646 14 1 11 Trt2 NA -0.985826700 3 2 12 Trt1 0 -2.888920672 94 1 Possible Bad Data/Lab Error 13 Trt2 1 -0.640481703 30 0 Possible Bad Data/Lab Error 14 Trt2 2 0.570507636 17 2 Possible Bad Data/Lab Error 15 Trt2 0 -0.059723276 40 2 Possible Bad Data/Lab Error 16 Trt1 NA -0.098178744 46 0 Possible Bad Data/Lab Error 17 Trt1 2 0.560820729 44 1 Possible Bad Data/Lab Error 18 Control 0 -1.186458639 52 1 Possible Bad Data/Lab Error 19 Trt2 1 1.096777044 85 1 Possible Bad Data/Lab Error 20 Trt2 NA -0.005344028 6 1 > > # specifly the locations of the columns, and a non-standard conversion > makeGenotypes(test3, convert=c('G1','G2'), method=as.genotype.allele.count) Warning in genotype(sapply(tmp, paste, collapse = "/"), alleles = alleles, : Found data values not matching specified alleles. Converting to NA. Tmt G1 N1 I1 G2 comment 1 Trt1 B/A -1.733218407 77 A/B 2 Control A/A 0.002131860 16 A/A 3 Trt2 A/A -0.630300334 85 A/B Possible Bad Data/Lab Error 4 Trt2 B/B -0.340968580 95 A/B Possible Bad Data/Lab Error 5 Trt2 B/A -1.156572363 59 A/B Possible Bad Data/Lab Error 6 Trt2 B/A 1.803141908 51 A/A 7 Trt1 B/B -0.331132036 19 A/B 8 Trt2 -1.605513412 1 A/A 9 Control 0.197193439 88 A/A 10 Trt2 0.263175646 14 A/B 11 Trt2 -0.985826700 3 A/A 12 Trt1 B/B -2.888920672 94 A/B Possible Bad Data/Lab Error 13 Trt2 B/A -0.640481703 30 B/B Possible Bad Data/Lab Error 14 Trt2 A/A 0.570507636 17 A/A Possible Bad Data/Lab Error 15 Trt2 B/B -0.059723276 40 A/A Possible Bad Data/Lab Error 16 Trt1 -0.098178744 46 B/B Possible Bad Data/Lab Error 17 Trt1 A/A 0.560820729 44 A/B Possible Bad Data/Lab Error 18 Control B/B -1.186458639 52 A/B Possible Bad Data/Lab Error 19 Trt2 B/A 1.096777044 85 A/B Possible Bad Data/Lab Error 20 Trt2 -0.005344028 6 A/B > > > > > cleanEx(); ..nameEx <- "plot.genotype" > > ### * plot.genotype > > flush(stderr()); flush(stdout()) > > ### Name: plot.genotype > ### Title: Plot genotype object > ### Aliases: plot.genotype > ### Keywords: hplot > > ### ** Examples > > set <- c("A/A", "A/B", "A/B", "B/B", "B/B", "B/B", + "B/B", "B/C", "C/C", "C/C") > set <- genotype(set, alleles=c("A", "B", "C"), reorder="yes") > plot(set) > plot(set, type="allele", what="number") > > > > cleanEx(); ..nameEx <- "power.casectl" > > ### * power.casectl > > flush(stderr()); flush(stdout()) > > ### Name: power.casectrl > ### Title: Power for case-control genetics study > ### Aliases: power.casectrl > ### Keywords: design > > ### ** Examples > > > # single calc > power.casectrl(p=0.1, N=50, gamma=1.1, kp=.1, alpha=5e-2, minh='r') [1] 0.07976395 > > # for a range of sample sizes > power.casectrl(p=0.1, N=c(25,50,100,200,500), gamma=1.1, kp=.1, + alpha=5e-2, minh='r') [1] 0.06476753 0.07976395 0.11031669 0.17285937 0.35920589 > > # create a power table > fun <- function(p) + power.casectrl(p=p, N=seq(100,1000,by=100), gamma=1.1, kp=.1, + alpha=5e-2, minh='recessive') > > m <- sapply( X=seq(0.1,0.9, by=0.1), fun ) > colnames(m) <- seq(0.1,0.9, by=0.1) > rownames(m) <- seq(100,1000,by=100) > > print(round(m,2)) 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 100 0.11 0.31 0.61 0.87 0.98 1 1 1 1 200 0.17 0.54 0.89 0.99 1.00 1 1 1 1 300 0.24 0.71 0.97 1.00 1.00 1 1 1 1 400 0.30 0.83 0.99 1.00 1.00 1 1 1 1 500 0.36 0.90 1.00 1.00 1.00 1 1 1 1 600 0.42 0.94 1.00 1.00 1.00 1 1 1 1 700 0.47 0.97 1.00 1.00 1.00 1 1 1 1 800 0.52 0.98 1.00 1.00 1.00 1 1 1 1 900 0.57 0.99 1.00 1.00 1.00 1 1 1 1 1000 0.62 1.00 1.00 1.00 1.00 1 1 1 1 > > > > cleanEx(); ..nameEx <- "print.LD" > > ### * print.LD > > flush(stderr()); flush(stdout()) > > ### Name: print.LD > ### Title: Textual and graphical display of linkage disequilibrium (LD) > ### objects > ### Aliases: print.LD print.LD.data.frame summary.LD.data.frame > ### print.summary.LD.data.frame plot.LD.data.frame LDtable LDplot > ### Keywords: misc > > ### ** Examples > > > g1 <- genotype( c('T/A', NA, 'T/T', NA, 'T/A', NA, 'T/T', 'T/A', + 'T/T', 'T/T', 'T/A', 'A/A', 'T/T', 'T/A', 'T/A', 'T/T', + NA, 'T/A', 'T/A', NA) ) > > g2 <- genotype( c('C/A', 'C/A', 'C/C', 'C/A', 'C/C', 'C/A', 'C/A', 'C/A', + 'C/A', 'C/C', 'C/A', 'A/A', 'C/A', 'A/A', 'C/A', 'C/C', + 'C/A', 'C/A', 'C/A', 'A/A') ) > > g3 <- genotype( c('T/A', 'T/A', 'T/T', 'T/A', 'T/T', 'T/A', 'T/A', 'T/A', + 'T/A', 'T/T', 'T/A', 'T/T', 'T/A', 'T/A', 'T/A', 'T/T', + 'T/A', 'T/A', 'T/A', 'T/T') ) > data <- makeGenotypes(data.frame(g1,g2,g3)) > > # Compute & display LD for one marker pair > ld <- LD(g1,g2) > print(ld) Pairwise LD ----------- D D' Corr Estimates: 0.1419402 0.8110866 0.6029553 X^2 P-value N LD Test: 10.90665 0.0009581958 15 > > # Compute LD table for all 3 genotypes > ldt <- LD(data) > > # display the results > print(ldt) # textual display Pairwise LD ----------- g2 g3 g1 D 0.1419402 0.0899457 g1 D' 0.8110866 0.4151342 g1 Corr. 0.6029553 0.4000333 g1 X^2 10.9066513 4.8007990 g1 P-value 0.0009581958 0.02844654 g1 n 15 15 g2 D 0.1836927 g2 D' 0.9996881 g2 Corr. 0.7712137 g2 X^2 23.7908198 g2 P-value 1.073934e-06 g2 n 20 > LDtable(ldt) # graphical color-coded table > LDplot(ldt, distance=c(124, 834, 927)) # LD plot vs distance > > # more markers makes prettier plots! > data <- list() > nobs <- 1000 > ngene <- 20 > s <- seq(0,1,length=ngene) > a1 <- a2 <- matrix("", nrow=nobs, ncol=ngene) > for(i in 1:length(s) ) + { + + rallele <- function(p) sample( c("A","T"), 1, p=c(p, 1-p)) + + if(i==1) + { + a1[,i] <- sample( c("A","T"), 1000, p=c(0.5,0.5), replace=TRUE) + a2[,i] <- sample( c("A","T"), 1000, p=c(0.5,0.5), replace=TRUE) + } + else + { + p1 <- pmax( pmin( 0.25 + s[i] * as.numeric(a1[,i-1]=="A"),1 ), 0 ) + p2 <- pmax( pmin( 0.25 + s[i] * as.numeric(a2[,i-1]=="A"),1 ), 0 ) + a1[,i] <- sapply(p1, rallele ) + a2[,i] <- sapply(p2, rallele ) + } + + data[[paste("G",i,sep="")]] <- genotype(a1[,i],a2[,i]) + } > data <- data.frame(data) > data <- makeGenotypes(data) > > ldt <- LD(data) > plot(ldt, digits=2, marker=19) # do LDtable & LDplot on in a single > # graphics window > > > > cleanEx(); ..nameEx <- "summary.genotype" > > ### * summary.genotype > > flush(stderr()); flush(stdout()) > > ### Name: summary.genotype > ### Title: Allele and Genotype Frequency from a Genotype or Haplotype > ### Object > ### Aliases: summary.genotype print.summary.genotype > ### Keywords: misc > > ### ** Examples > > > example.data <- c("D/D","D/I","D/D","I/I","D/D", + "D/D","D/D","D/D","I/I","") > g1 <- genotype(example.data) > g1 [1] "D/D" "D/I" "D/D" "I/I" "D/D" "D/D" "D/D" "D/D" "I/I" NA Alleles: D I > > summary(g1) Number of samples typed: 9 (90%) Allele Frequency: (2 alleles) Count Proportion D 13 0.72 I 5 0.28 NA 2 NA Genotype Frequency: Count Proportion D/D 6 0.67 D/I 1 0.11 I/I 2 0.22 NA 1 NA Heterozygosity (Hu) = 0.4513889 Poly. Inf. Content = 0.32074 > > > > cleanEx(); ..nameEx <- "write.pop.file" > > ### * write.pop.file > > flush(stderr()); flush(stdout()) > > ### Name: write.pop.file > ### Title: Create genetics data files > ### Aliases: write.pop.file write.pedigree.file write.marker.file > ### Keywords: IO > > ### ** Examples > > # TBA > > > > ### *