| PSID1976 {AER} | R Documentation |
Cross-section data originating from the 1976 Panel Study of Income Dynamics (PSID), based on data for the previous year, 1975.
data("PSID1976")
A data frame containing 753 observations on 22 variables.
wage > 0 or hours > 0.)wage. This variable is currently kept for
consistency with earlier releases. It will be removed in the near future.)participation == "yes", then select only those
women with non-zero wage. Only 325 women work in 1975 and have a non-zero wage in 1976.This data set is also known as the Mroz (1987) data.
Warning: typical applications using these data employ the variable
wage aka earnings as the dependent variable. The variable
repwage is the reported wage in a 1976 interview, named RPWG by Greene (2003).
Online complements to Greene (2003). Table F4.1.
http://pages.stern.nyu.edu/~wgreene/Text/tables/tablelist5.htm
Greene, W.H. (2003). Econometric Analysis, 5th edition. Upper Saddle River, NJ: Prentice Hall.
McCullough, B.D. (2004). Some Details of Nonlinear Estimation. In: Altman, M., Gill, J., and McDonald, M.P.: Numerical Issues in Statistical Computing for the Social Scientist. Hoboken, NJ: John Wiley, Ch. 8, 199–218.
Mroz, T.A. (1987). The Sensitivity of an Empirical Model of Married Women's Hours of Work to Economic and Statistical Assumptions. Econometrica, 55, 765–799.
Wooldridge, J.M. (2002). Econometric Analysis of Cross-Section and Panel Data. Cambridge, MA: MIT Press.
## data and transformations
data("PSID1976")
PSID1976$kids <- with(PSID1976, factor((youngkids + oldkids) > 0,
levels = c(FALSE, TRUE), labels = c("no", "yes")))
PSID1976$nwincome <- with(PSID1976, (fincome - hours * wage)/1000)
PSID1976$partnum <- as.numeric(PSID1976$participation) - 1
###################
## Greene (2003) ##
###################
## Example 4.1, Table 4.2
## (reproduced in Example 7.1, Table 7.1)
gr_lm <- lm(log(hours * wage) ~ age + I(age^2) + education + kids,
data = PSID1976, subset = participation == "yes")
summary(gr_lm)
vcov(gr_lm)
## Example 4.5
summary(gr_lm)
## or equivalently
gr_lm1 <- lm(log(hours * wage) ~ 1, data = PSID1976, subset = participation == "yes")
anova(gr_lm1, gr_lm)
## Example 21.4, p. 681
gr_probit1 <- glm(participation ~ age + I(age^2) + fincome + education + kids,
data = PSID1976, family = binomial(link = "probit") )
gr_probit2 <- glm(participation ~ age + I(age^2) + fincome + education,
data = PSID1976, family = binomial(link = "probit"))
gr_probit3 <- glm(participation ~ kids/(age + I(age^2) + fincome + education),
data = PSID1976, family = binomial(link = "probit"))
## LR test of all coefficients
lrtest(gr_probit1)
## Chow-type test
lrtest(gr_probit2, gr_probit3)
## equivalently:
anova(gr_probit2, gr_probit3, test = "Chisq")
## Table 21.3
summary(gr_probit1)
## Example 22.8, Table 22.7, p. 786
library("sampleSelection")
gr_2step <- selection(participation ~ age + I(age^2) + fincome + education + kids,
wage ~ experience + I(experience^2) + education + city,
data = PSID1976, method = "2step")
gr_ml <- selection(participation ~ age + I(age^2) + fincome + education + kids,
wage ~ experience + I(experience^2) + education + city,
data = PSID1976, method = "ml")
gr_ols <- lm(wage ~ experience + I(experience^2) + education + city,
data = PSID1976, subset = participation == "yes")
## NOTE: ML estimates agree with Greene, 5e errata.
## Standard errors are based on the Hessian (here), while Greene has BHHH/OPG.
#######################
## Wooldridge (2002) ##
#######################
## Table 15.1, p. 468
wl_lpm <- lm(partnum ~ nwincome + education + experience + I(experience^2) +
age + youngkids + oldkids, data = PSID1976)
wl_logit <- glm(participation ~ nwincome + education + experience + I(experience^2) +
age + youngkids + oldkids, family = binomial, data = PSID1976)
wl_probit <- glm(participation ~ nwincome + education + experience + I(experience^2) +
age + youngkids + oldkids, family = binomial(link = "probit"), data = PSID1976)
## (same as Altman et al.)
## convenience functions
pseudoR2 <- function(obj) 1 - as.vector(logLik(obj)/logLik(update(obj, . ~ 1)))
misclass <- function(obj) 1 - sum(diag(prop.table(table(
model.response(model.frame(obj)), round(fitted(obj))))))
coeftest(wl_logit)
logLik(wl_logit)
misclass(wl_logit)
pseudoR2(wl_logit)
coeftest(wl_probit)
logLik(wl_probit)
misclass(wl_probit)
pseudoR2(wl_probit)
## Table 16.2, p. 528
form <- hours ~ nwincome + education + experience + I(experience^2) + age + youngkids + oldkids
wl_ols <- lm(form, data = PSID1976)
wl_tobit <- tobit(form, data = PSID1976)
summary(wl_ols)
summary(wl_tobit)
#######################
## McCullough (2004) ##
#######################
## p. 203
mc_probit <- glm(participation ~ nwincome + education + experience + I(experience^2) +
age + youngkids + oldkids, family = binomial(link = "probit"), data = PSID1976)
mc_tobit <- tobit(hours ~ nwincome + education + experience + I(experience^2) + age +
youngkids + oldkids, data = PSID1976)
coeftest(mc_probit)
coeftest(mc_tobit)
coeftest(mc_tobit, vcov = vcovOPG)