samplecube             package:sampling             R Documentation

_S_a_m_p_l_e _c_u_b_e _m_e_t_h_o_d

_D_e_s_c_r_i_p_t_i_o_n:

     Select a balanced sample (a vector of 0 and 1) or an almost
     balanced sample. Firstly, the flight phase is applied. Next, if
     needed, the landing phase is applied on the result of the flight
     phase.

_U_s_a_g_e:

     samplecube(X,pik,order=1,comment=TRUE,method=1)

_A_r_g_u_m_e_n_t_s:

       X: matrix of auxiliary variables on which the sample must be
          balanced.

     pik: vector of inclusion probabilities.

   order: 1, the data are randomly arranged,
           2, no change in data order,
           3, the data are sorted in decreasing order. 

 comment: a comment is written during the execution if 'comment' is
          equal to 'TRUE'.

  method: 1, for a landing phase by linear programming,
           2, for a landing phase by suppression of variables.

_R_e_f_e_r_e_n_c_e_s:

     Till, Y. (2006), _Sampling Algorithms_, Springer.
      Chauvet, G. and Till, Y. (2006). A fast algorithm of balanced
     sampling. _to appear in Computational Statistics_. 
      Chauvet, G. and Till, Y. (2005). New SAS macros for balanced
     sampling. In INSEE, editor, _Journes de Mthodologie
     Statistique_, Paris.
      Deville, J.-C. and Till, Y. (2004). Efficient balanced sampling:
     the cube method. _Biometrika_, 91, 893-912.
      Deville, J.-C. and Till, Y. (2005). Variance approximation under
     balanced sampling. _Journal of Statistical Planning and
     Inference_, 128/2:411-425.

_S_e_e _A_l_s_o:

     'landingcube', 'fastflightcube'

_E_x_a_m_p_l_e_s:

     ############
     ## Example 1
     ############
     # matrix of balancing variables
     X=cbind(c(1,1,1,1,1,1,1,1,1),c(1.1,2.2,3.1,4.2,5.1,6.3,7.1,8.1,9.1))
     # Vector of inclusion probabilities.
     # The sample has the size equal to 3.
     pik=c(1/3,1/3,1/3,1/3,1/3,1/3,1/3,1/3,1/3)
     # Selection of the sample
     s=samplecube(X,pik,order=1,comment=TRUE)
     # The selected sample
     (1:length(pik))[s==1]
     ############
     ## Example 2
     ############
     # 2 strata and 2 auxiliary variables
     # we verify by simulations the values of the inclusion probabilities
     X=rbind(c(1,0,1,2),c(1,0,2,5),c(1,0,3,7),c(1,0,4,9),
     c(1,0,5,1),c(1,0,6,5),c(1,0,7,7),c(1,0,8,6),c(1,0,9,9),
     c(1,0,10,3),c(0,1,11,3),c(0,1,12,2),c(0,1,13,3),
     c(0,1,14,6),c(0,1,15,8),c(0,1,16,9),c(0,1,17,1),
     c(0,1,18,2),c(0,1,19,3),c(0,1,20,4))
     pik=rep(1/2,times=20)
     ppp=rep(0,times=20)
     sim=100
     for(i in (1:sim))
             ppp=ppp+samplecube(X,pik,1,FALSE) 
     ppp=ppp/sim
     print(ppp)
     print(pik)
     ############
     ## Example 3
     ############
     # unequal probability sampling by cube method
     # one auxiliary variable equal to the inclusion probability
     N=200
     pik=runif(N)
     pikfin=samplecube(array(pik,c(N,1)),pik,1,TRUE)
     ############ 
     ## Example 4
     ############
     # p auxiliary variables generated randomly
     N=1000
     p=7
     x<-rnorm(N*p,10,3)
     # random inclusion probabilities 
     pik<- runif(N)
     X=array(x,c(N,p))
     X=cbind(cbind(X,rep(1,times=N)),pik)
     pikfin=samplecube(X,pik,1,TRUE)
     ############ 
     ## Example 5
     ############
     # strata and an auxiliary variable
     N=5000
     a=rep(1,times=N)
     b=rep(0,times=N)
     V1=c(a,b,b)
     V2=c(b,a,b)
     V3=c(b,b,a)
     V4=1:(3*N)
     X=cbind(V1,V2,V3)
     pik=rep(2/10,times=3*N)
     pikfin=samplecube(X,pik,1,TRUE)
     ############
     ## Example 6
     ############
     # Selection of a balanced sample using the MU284 population,
     # simulation and comparison of the variance with
     # unequal probability sampling of fixed sample size.
     ############
     data(MU284)
     # Computation of the inclusion probabilities
     pik=inclusionprobabilities(MU284$P75,50)
     # Definition of the matrix of balancing variables
     X=cbind(MU284$P75,MU284$CS82,MU284$SS82,MU284$S82,MU284$ME84,MU284$REV84)
     # Computation of the Horvitz-Thompson estimator for a balanced sample
     HTestimator(MU284$RMT85,pik,samplecube(X,pik,1,FALSE))
     # Computation of the Horvitz-Thompson estimator for an unequal probability sample
     HTestimator(MU284$RMT85,pik,samplecube(matrix(pik),pik,1,FALSE))
     # simulations; for a better accurancy, let sim=50 
     sim=8
     res1=rep(0,times=sim)
     res2=rep(0,times=sim)
     for(i in 1:sim)
     {
     cat("Simulation number ",i,"\n")
     res1[i]=HTestimator(MU284$RMT85,pik,samplecube(X,pik,1,FALSE))
     res2[i]=HTestimator(MU284$RMT85,pik,samplecube(matrix(pik),pik,1,FALSE) )
     }
     # summary and boxplots
     summary(res1)
     summary(res2)
     ss=cbind(res1,res2)
     colnames(ss) <- c("balanced sampling","uneq prob sampling")
     boxplot(data.frame(ss), las=1)

