makeGenotypes            package:genetics            R Documentation

_C_o_n_v_e_r_t _c_o_l_u_m_n_s _i_n _a _d_a_t_a_f_r_a_m_e _t_o _g_e_n_o_t_y_p_e_s _o_r _h_a_p_l_o_t_y_p_e_s

_D_e_s_c_r_i_p_t_i_o_n:

     Convert columns in a dataframe to genotypes or haplotypes.

_U_s_a_g_e:

     makeGenotypes(data, convert, sep = "/", tol = 0.5, ..., method=as.genotype)
     makeHaplotypes(data, convert, sep = "/", tol = 0.9, ...)

_A_r_g_u_m_e_n_t_s:

    data: Dataframe containing columns to be converted

 convert: Vector or list of pairs specifying which columns contain
          genotype/haplotype data.   See below for details.

     sep: Genotype separator

     tol: See below.

     ...: Optional arguments to as.genotype function

  method: Function used to perform the conversion.

_D_e_t_a_i_l_s:

     The functions makeGenotypes and makeHaplotypes allow the
     conversion of all of the genetic variables in a dataset to
     genotypes or haplotypes in a single step.

     The parameter 'convert' may be missing, a vector of column names,
     indexes or true/false indictators, or a list of column name or
     index pairs.

     When the argument 'convert' is not provided, the function will
     look for columns where at least 'tol'*100% of the records contain
     the separator character 'sep' ('/' by default).  These columns
     will then be assumed to contain both of the genotype/haplotype
     alleles and will be converted in-place to genotype variables.

     When the argument 'convert' is a vector of column names, indexes
     or true/false indictators, the corresponding columns will be
     assumed to contain both of the genotype/haplotype alleles and will
     be converted in-place to genotype variables.

     When the argument 'convert' is a list containing column name or
     index pairs, the two elements of each pair will be assumed to
     contain the individual alleles of a genotype/haplotype.  The first
     column specified in each pair will be replaced with the new
     genotype/haplotype variable named 'name1 + sep + name2'.  The
     second column will be removed.

     Note that the 'method' argument may be used to supply a
     non-standard conversion function, such as
     'as.genotype.allele.count', which converts from [0,1,2] to
     ['A/A','A/B','A/C'] (or the specified allele names).  See the
     example below.

_V_a_l_u_e:

     Dataframe containing converted genotype/haplotype variables. All
     other variables will be unchanged.

_A_u_t_h_o_r(_s):

     Gregory R. Warnes Gregory_R_Warnes@groton.pfizer.com 

_S_e_e _A_l_s_o:

     'genotype'

_E_x_a_m_p_l_e_s:

     ## Not run: 
     # common case
     data <- read.csv(file="genotype_data.csv")
     data <- makeGenotypes(data)
     ## End(Not run)

     # Create a test data set where there are several genotypes in columns
     # of the form "A/T".
     test1 <- data.frame(Tmt=sample(c("Control","Trt1","Trt2"),20, replace=TRUE),
                     G1=sample(c("A/T","T/T","T/A",NA),20, replace=TRUE),
                     N1=rnorm(20),
                     I1=sample(1:100,20,replace=TRUE),
                     G2=paste(sample(c("134","138","140","142","146"),20,
                                     replace=TRUE),
                              sample(c("134","138","140","142","146"),20,
                                     replace=TRUE),
                              sep=" / "),
                     G3=sample(c("A /T","T /T","T /A"),20, replace=TRUE),
                     comment=sample(c("Possible Bad Data/Lab Error",""),20,
                                    rep=TRUE)
                     )
     test1

     # now automatically convert genotype columns
     geno1 <- makeGenotypes(test1)
     geno1

     # Create a test data set where there are several haplotypes with alleles
     # in adjacent columns.
     test2 <- data.frame(Tmt=sample(c("Control","Trt1","Trt2"),20, replace=TRUE),
                         G1.1=sample(c("A","T",NA),20, replace=TRUE),
                         G1.2=sample(c("A","T",NA),20, replace=TRUE),
                         N1=rnorm(20),
                         I1=sample(1:100,20,replace=TRUE),
                         G2.1=sample(c("134","138","140","142","146"),20,
                                     replace=TRUE),
                         G2.2=sample(c("134","138","140","142","146"),20,
                                     replace=TRUE),
                         G3.1=sample(c("A ","T ","T "),20, replace=TRUE),
                         G3.2=sample(c("A ","T ","T "),20, replace=TRUE),
                         comment=sample(c("Possible Bad Data/Lab Error",""),20,
                                        rep=TRUE)
                        ) 
     test2

     # specifly the locations of the columns to be paired for haplotypes
     makeHaplotypes(test2, convert=list(c("G1.1","G1.2"),6:7,8:9))

     # Create a test data set where the data is coded as numeric allele
     # counts (0-2).
     test3 <- data.frame(Tmt=sample(c("Control","Trt1","Trt2"),20, replace=TRUE),
                         G1=sample(c(0:2,NA),20, replace=TRUE),
                         N1=rnorm(20),
                         I1=sample(1:100,20,replace=TRUE),
                         G2=sample(0:2,20, replace=TRUE),
                         comment=sample(c("Possible Bad Data/Lab Error",""),20,
                                        rep=TRUE)
                        ) 
     test3

     # specifly the locations of the columns, and a non-standard conversion
     makeGenotypes(test3, convert=c('G1','G2'), method=as.genotype.allele.count)

