% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/DataGeneration.R
\name{DataGeneration}
\alias{DataGeneration}
\title{Generating an artificial item response dataset}
\usage{
DataGeneration(
  seed = 1,
  N = 2000,
  nitem_D = 0,
  nitem_P = 0,
  nitem_C = 0,
  model_D = "2PL",
  model_P = "GPCM",
  latent_dist = "Normal",
  item_D = NULL,
  item_P = NULL,
  item_C = NULL,
  theta = NULL,
  prob = 0.5,
  d = 1.7,
  sd_ratio = 1,
  m = 0,
  s = 1,
  a_l = 0.8,
  a_u = 2.5,
  b_m = NULL,
  b_sd = NULL,
  c_l = 0,
  c_u = 0.2,
  categ = 5,
  possible_ans = seq(0.1, 0.9, length = 5)
)
}
\arguments{
\item{seed}{A numeric value that is used for random sampling.
Seed number can guarantee a replicability of the result.}

\item{N}{A numeric value of the number of examinees.}

\item{nitem_D}{A numeric value of the number of dichotomous items.}

\item{nitem_P}{A numeric value of the number of polytomous items.}

\item{nitem_C}{A numeric value of the number of continuous response items.}

\item{model_D}{A vector or a character string that represents the probability model for the dichotomous items.}

\item{model_P}{A character string that represents the probability model for the polytomous items.}

\item{latent_dist}{A character string that determines the type of latent distribution.
Currently available options are \code{"beta"} (four-parameter beta distribution; \code{\link{rBeta.4P}}),
\code{"chi"} (\eqn{\chi^2} distribution; \code{\link{rchisq}}),
\code{"normal"}, \code{"Normal"}, or \code{"N"} (standard normal distribution; \code{\link{rnorm}}),
and \code{"Mixture"} or \code{"2NM"} (two-component Gaussian mixture distribution; see Li (2021) for details.)}

\item{item_D}{An item parameter matrix for using fixed parameter values. The number of columns should be 3: \code{a} parameter for the first, \code{b} parameter for the second, and \code{c} parameter for the third column. Default is \code{NULL}.}

\item{item_P}{An item parameter matrix for using fixed parameter values. The number of columns should be 7: \code{a} parameter for the first, and \code{b} parameters for the rest of the columns. Default is \code{NULL}.}

\item{item_C}{An item parameter matrix for using fixed parameter values. The number of columns should be 3: \code{a} parameter for the first, \code{b} parameter for the second, and \code{nu} parameter for the third column. Default is \code{NULL}.}

\item{theta}{An ability parameter vector for using fixed parameter values. Default is \code{NULL}.}

\item{prob}{A numeric value for using \code{latent_dist = "2NM"}.
It is the \eqn{\pi = \frac{n_1}{N}} parameter of two-component Gaussian mixture distribution, where \eqn{n_1} is the estimated number of examinees belonging to the first Gaussian component and \eqn{N} is the total number of examinees (Li, 2021).}

\item{d}{A numeric value for using \code{latent_dist = "2NM"}.
It is the \eqn{\delta = \frac{\mu_2 - \mu_1}{\bar{\sigma}}} parameter of two-component Gaussian mixture distribution,
where \eqn{\mu_1} and \eqn{\mu_2} are the estimated means of the first and second Gaussian components, respectively.
And \eqn{\bar{\sigma}} is the overall standard deviation of the latent distribution (Li, 2021).
Without loss of generality, \eqn{\mu_2 \ge \mu_1} is assumed, thus \eqn{\delta \ge 0}.}

\item{sd_ratio}{A numeric value for using \code{latent_dist = "2NM"}.
It is the \eqn{\zeta = \frac{\sigma_2}{\sigma_1}} parameter of two-component Gaussian mixture distribution, where \eqn{\sigma_1} and \eqn{\sigma_2} are the estimated standard deviations of the first and second Gaussian components, respectively (Li, 2021).}

\item{m}{A numeric value of the overall mean of the latent distribution. The default is 0.}

\item{s}{A numeric value of the overall standard deviation of the latent distribution. The default is 1.}

\item{a_l}{A numeric value. The lower bound of item discrimination parameters (\emph{a}).}

\item{a_u}{A numeric value. The upper bound of item discrimination parameters (\emph{a}).}

\item{b_m}{A numeric value. The mean of item difficulty parameters (\emph{b}).
If unspecified, \code{m} is passed on to the value.}

\item{b_sd}{A numeric value. The standard deviation of item difficulty parameters (\emph{b}).
If unspecified, \code{s} is passed on to the value.}

\item{c_l}{A numeric value. The lower bound of item guessing parameters (\emph{c}).}

\item{c_u}{A numeric value. The lower bound of item guessing parameters (\emph{c}).}

\item{categ}{A scalar or a numeric vector of length \code{nitem_P}. The default is 5.
If \code{length(categ)>1}, the \emph{i}th element equals the number of categories of the \emph{i}th polyotomous item.}

\item{possible_ans}{Possible options for continuous items (e.g., 0.1, 0.3, 0.5, 0.7, 0.9)}
}
\value{
This function returns a \code{list} of several objects:
\item{theta}{A vector of ability parameters (\eqn{\theta}).}
\item{item_D}{A matrix of dichotomous item parameters.}
\item{initialitem_D}{A matrix that contains initial item parameter values for dichotomous items.}
\item{data_D}{A matrix of dichotomous item responses where rows indicate examinees and columns indicate items.}
\item{item_P}{A matrix of polytomous item parameters.}
\item{initialitem_P}{A matrix that contains initial item parameter values for polytomous items.}
\item{data_P}{A matrix of polytomous item responses where rows indicate examinees and columns indicate items.}
\item{item_D}{A matrix of continuous response item parameters.}
\item{initialitem_D}{A matrix that contains initial item parameter values for continuous response items.}
\item{data_D}{A matrix of continuous response item responses where rows indicate examinees and columns indicate items.}
}
\description{
This function generates an artificial item response dataset allowing various options.
}
\examples{
# Dichotomous item responses

Alldata <- DataGeneration(N = 500,
                          nitem_D = 10)


# Polytomous item responses

Alldata <- DataGeneration(N = 1000,
                          nitem_P = 10)


# Mixed-format items

Alldata <- DataGeneration(N = 1000,
                          nitem_D = 20,
                          nitem_P = 10)

# Continuous items

AllData <- DataGeneration(N = 1000,
                          nitem_C = 10)

# Dataset from non-normal latent density using two-component Gaussian mixture distribution

Alldata <- DataGeneration(N=1000,
                          nitem_P = 10,
                          latent_dist = "2NM",
                          d = 1.664,
                          sd_ratio = 2,
                          prob = 0.3)

}
\references{
Li, S. (2021). Using a two-component normal mixture distribution as a latent distribution in estimating parameters of item response models. \emph{Journal of Educational Evaluation, 34}(4), 759-789.
}
\author{
Seewoo Li \email{cu@yonsei.ac.kr}
}
