simulateScores            package:hddplot            R Documentation

_G_e_n_e_r_a_t_e _l_i_n_e_a_r _d_i_s_c_r_i_m_i_n_a_n_t _s_c_o_r_e_s _f_r_o_m _r_a_n_d_o_m _d_a_t_a, _a_f_t_e_r _s_e_l_e_c_t_i_o_n

_D_e_s_c_r_i_p_t_i_o_n:

     Simulates the effect of generating scores from random data,
     possibly  with predicted scores calculates also for additional
     'observations'

_U_s_a_g_e:

     simulateScores(nrows = 7129, cl = rep(1:3, c(19, 10, 2)), x = NULL, cl.other = 4, x.other = NULL, nfeatures = 15, dimen=2, seed = NULL)

_A_r_g_u_m_e_n_t_s:

   nrows: number of rows of random data matrix

      cl: classifying factor

       x: data matrix, by default randomly generated

cl.other: classifying factor for additional observations

 x.other: additional observations

nfeatures: number of features to select (by default uses  aov
          F-statistic)

   dimen: number of sets of discriminant scores to retain (at most one
          less than number of levels of 'cl')

    seed: set, if required, so that calculations can be reproduced

_D_e_t_a_i_l_s:

_V_a_l_u_e:

  scores: matrix of scores

      cl: classifying factor

   other: matrix of 'other' scores

cl.other: classifying factor for 'scores.other'

nfeatures: number of features used in generating the scores

_N_o_t_e:

_A_u_t_h_o_r(_s):

     John Maindonald

_R_e_f_e_r_e_n_c_e_s:

_S_e_e _A_l_s_o:

_E_x_a_m_p_l_e_s:

     scorelist <- simulateScores(nrows=500, cl=rep(1:3, c(19,10,2)))
     plot(scorelist$scores, col=unclass(scorelist$cl), pch=16)

     ## The function is currently defined as
     function(nrows=7129, cl=rep(1:3, c(19,10,2)), x=NULL, 
                cl.other=4, x.other=NULL, nfeatures=15, 
                dimen=2, seed=NULL){
         if(!is.null(seed))set.seed(seed)
         m <- length(cl)
         m.other <- length(cl.other)
         if(is.null(x)){
           x <- matrix(rnorm(nrows*m), nrow=nrows)
           rownames(x) <- paste(1:nrows)
         }
         else nrows <- dim(x)[1]
         if(is.null(x.other)){
           x.other <- matrix(rnorm(nrows*m.other), nrow=nrows)
           rownames(x.other) <- paste(1:nrows)
         }
         if(is.numeric(cl))cl <- paste("Gp", cl, sep="")
         if(is.numeric(cl.other))cl.other <- paste("Gp", cl.other, sep="")
         cl <- factor(cl)
         if(dimen>length(levels(cl))-1)dimen <- length(levels(cl))-1
         cl.other <- factor(cl.other)
         xx.random <- matrix(rnorm(nrows*m), nrow=nrows)
         ordfeatures <- orderFeatures(xx.random, cl=cl, values=T)
         stat <- ordfeatures$stat[1:nfeatures]
         ord.use <- ordfeatures$ord[1:nfeatures]
         dfUse.ord <- data.frame(t(x[ord.use, ]))
         dfUseOther.ord <- data.frame(t(x.other[ord.use, ]))
         ordUse.lda <-  lda(dfUse.ord, grouping=cl)
         scores <- predict(ordUse.lda, dimen=2)$x
         scores.other <- predict(ordUse.lda, newdata=dfUseOther.ord, dimen=dimen)$x
         invisible(list(scores=scores, cl=cl, other=scores.other, 
                        cl.other=cl.other, nfeatures=nfeatures))
       }

