Last data update: 2014.03.03
R: Data and Examples from Stock and Watson (2007)
StockWatson2007 R Documentation
Data and Examples from Stock and Watson (2007)
Description
This manual page collects a list of examples from the book. Some
solutions might not be exact and the list is certainly not complete.
If you have suggestions for improvement (preferably in the form of code),
please contact the package maintainer.
References
Stock, J.H. and Watson, M.W. (2007). Introduction to Econometrics , 2nd ed. Boston: Addison Wesley.
URL http://wps.aw.com/aw_stock_ie_2/0,12040,3332253-,00.html .
See Also
CartelStability
, CASchools
, CigarettesSW
,
CollegeDistance
, CPSSW04
, CPSSW3
, CPSSW8
,
CPSSW9298
, CPSSW9204
, CPSSWEducation
,
Fatalities
, Fertility
, Fertility2
, FrozenJuice
,
GrowthSW
, Guns
, HealthInsurance
, HMDA
,
Journals
, MASchools
, NYSESW
, ResumeNames
,
SmokeBan
, SportsCards
, STAR
, TeachingRatings
,
USMacroSW
, USMacroSWM
, USMacroSWQ
, USSeatBelts
,
USStocksSW
, WeakInstrument
Examples
###############################
## Current Population Survey ##
###############################
## p. 165
data("CPSSWEducation", package = "AER")
plot(earnings ~ education, data = CPSSWEducation)
fm <- lm(earnings ~ education, data = CPSSWEducation)
coeftest(fm, vcov = sandwich)
abline(fm)
############################
## California test scores ##
############################
## data and transformations
data("CASchools", package = "AER")
CASchools$stratio <- with(CASchools, students/teachers)
CASchools$score <- with(CASchools, (math + read)/2)
## p. 152
fm1 <- lm(score ~ stratio, data = CASchools)
coeftest(fm1, vcov = sandwich)
## p. 159
fm2 <- lm(score ~ I(stratio < 20), data = CASchools)
## p. 199
fm3 <- lm(score ~ stratio + english, data = CASchools)
## p. 224
fm4 <- lm(score ~ stratio + expenditure + english, data = CASchools)
## Table 7.1, p. 242 (numbers refer to columns)
fmc3 <- lm(score ~ stratio + english + lunch, data = CASchools)
fmc4 <- lm(score ~ stratio + english + calworks, data = CASchools)
fmc5 <- lm(score ~ stratio + english + lunch + calworks, data = CASchools)
## Equation 8.2, p. 258
fmquad <- lm(score ~ income + I(income^2), data = CASchools)
## Equation 8.11, p. 266
fmcub <- lm(score ~ income + I(income^2) + I(income^3), data = CASchools)
## Equation 8.23, p. 272
fmloglog <- lm(log(score) ~ log(income), data = CASchools)
## Equation 8.24, p. 274
fmloglin <- lm(log(score) ~ income, data = CASchools)
## Equation 8.26, p. 275
fmlinlogcub <- lm(score ~ log(income) + I(log(income)^2) + I(log(income)^3),
data = CASchools)
## Table 8.3, p. 292 (numbers refer to columns)
fmc2 <- lm(score ~ stratio + english + lunch + log(income), data = CASchools)
fmc7 <- lm(score ~ stratio + I(stratio^2) + I(stratio^3) + english + lunch + log(income),
data = CASchools)
#####################################
## Economics journal Subscriptions ##
#####################################
## data and transformed variables
data("Journals", package = "AER")
journals <- Journals[, c("subs", "price")]
journals$citeprice <- Journals$price/Journals$citations
journals$age <- 2000 - Journals$foundingyear
journals$chars <- Journals$charpp*Journals$pages/10^6
## Figure 8.9 (a) and (b)
plot(subs ~ citeprice, data = journals, pch = 19)
plot(log(subs) ~ log(citeprice), data = journals, pch = 19)
fm1 <- lm(log(subs) ~ log(citeprice), data = journals)
abline(fm1)
## Table 8.2, use HC1 for comparability with Stata
fm1 <- lm(subs ~ citeprice, data = log(journals))
fm2 <- lm(subs ~ citeprice + age + chars, data = log(journals))
fm3 <- lm(subs ~ citeprice + I(citeprice^2) + I(citeprice^3) +
age + I(age * citeprice) + chars, data = log(journals))
fm4 <- lm(subs ~ citeprice + age + I(age * citeprice) + chars, data = log(journals))
coeftest(fm1, vcov = vcovHC(fm1, type = "HC1"))
coeftest(fm2, vcov = vcovHC(fm2, type = "HC1"))
coeftest(fm3, vcov = vcovHC(fm3, type = "HC1"))
coeftest(fm4, vcov = vcovHC(fm4, type = "HC1"))
waldtest(fm3, fm4, vcov = vcovHC(fm3, type = "HC1"))
###############################
## Massachusetts test scores ##
###############################
## compare Massachusetts with California
data("MASchools", package = "AER")
data("CASchools", package = "AER")
CASchools$stratio <- with(CASchools, students/teachers)
CASchools$score4 <- with(CASchools, (math + read)/2)
## parts of Table 9.1, p. 330
vars <- c("score4", "stratio", "english", "lunch", "income")
cbind(
CA_mean = sapply(CASchools[, vars], mean),
CA_sd = sapply(CASchools[, vars], sd),
MA_mean = sapply(MASchools[, vars], mean),
MA_sd = sapply(MASchools[, vars], sd))
## Table 9.2, pp. 332--333, numbers refer to columns
MASchools$higheng <- with(MASchools, english > median(english))
fm1 <- lm(score4 ~ stratio, data = MASchools)
fm2 <- lm(score4 ~ stratio + english + lunch + log(income), data = MASchools)
fm3 <- lm(score4 ~ stratio + english + lunch + income + I(income^2) + I(income^3),
data = MASchools)
fm4 <- lm(score4 ~ stratio + I(stratio^2) + I(stratio^3) + english + lunch +
income + I(income^2) + I(income^3), data = MASchools)
fm5 <- lm(score4 ~ stratio + higheng + I(higheng * stratio) + lunch +
income + I(income^2) + I(income^3), data = MASchools)
fm6 <- lm(score4 ~ stratio + lunch + income + I(income^2) + I(income^3),
data = MASchools)
## for comparability with Stata use HC1 below
coeftest(fm1, vcov = vcovHC(fm1, type = "HC1"))
coeftest(fm2, vcov = vcovHC(fm2, type = "HC1"))
coeftest(fm3, vcov = vcovHC(fm3, type = "HC1"))
coeftest(fm4, vcov = vcovHC(fm4, type = "HC1"))
coeftest(fm5, vcov = vcovHC(fm5, type = "HC1"))
coeftest(fm6, vcov = vcovHC(fm6, type = "HC1"))
## Testing exclusion of groups of variables
fm3r <- update(fm3, . ~ . - I(income^2) - I(income^3))
waldtest(fm3, fm3r, vcov = vcovHC(fm3, type = "HC1"))
fm4r_str1 <- update(fm4, . ~ . - stratio - I(stratio^2) - I(stratio^3))
waldtest(fm4, fm4r_str1, vcov = vcovHC(fm4, type = "HC1"))
fm4r_str2 <- update(fm4, . ~ . - I(stratio^2) - I(stratio^3))
waldtest(fm4, fm4r_str2, vcov = vcovHC(fm4, type = "HC1"))
fm4r_inc <- update(fm4, . ~ . - I(income^2) - I(income^3))
waldtest(fm4, fm4r_inc, vcov = vcovHC(fm4, type = "HC1"))
fm5r_str <- update(fm5, . ~ . - stratio - I(higheng * stratio))
waldtest(fm5, fm5r_str, vcov = vcovHC(fm5, type = "HC1"))
fm5r_inc <- update(fm5, . ~ . - I(income^2) - I(income^3))
waldtest(fm5, fm5r_inc, vcov = vcovHC(fm5, type = "HC1"))
fm5r_high <- update(fm5, . ~ . - higheng - I(higheng * stratio))
waldtest(fm5, fm5r_high, vcov = vcovHC(fm5, type = "HC1"))
fm6r_inc <- update(fm6, . ~ . - I(income^2) - I(income^3))
waldtest(fm6, fm6r_inc, vcov = vcovHC(fm6, type = "HC1"))
##################################
## Home mortgage disclosure act ##
##################################
## data
data("HMDA", package = "AER")
## 11.1, 11.3, 11.7, 11.8 and 11.10, pp. 387--395
fm1 <- lm(I(as.numeric(deny) - 1) ~ pirat, data = HMDA)
fm2 <- lm(I(as.numeric(deny) - 1) ~ pirat + afam, data = HMDA)
fm3 <- glm(deny ~ pirat, family = binomial(link = "probit"), data = HMDA)
fm4 <- glm(deny ~ pirat + afam, family = binomial(link = "probit"), data = HMDA)
fm5 <- glm(deny ~ pirat + afam, family = binomial(link = "logit"), data = HMDA)
## Table 11.1, p. 401
mean(HMDA$pirat)
mean(HMDA$hirat)
mean(HMDA$lvrat)
mean(as.numeric(HMDA$chist))
mean(as.numeric(HMDA$mhist))
mean(as.numeric(HMDA$phist)-1)
prop.table(table(HMDA$insurance))
prop.table(table(HMDA$selfemp))
prop.table(table(HMDA$single))
prop.table(table(HMDA$hschool))
mean(HMDA$unemp)
prop.table(table(HMDA$condomin))
prop.table(table(HMDA$afam))
prop.table(table(HMDA$deny))
## Table 11.2, pp. 403--404, numbers refer to columns
HMDA$lvrat <- factor(ifelse(HMDA$lvrat < 0.8, "low",
ifelse(HMDA$lvrat >= 0.8 & HMDA$lvrat <= 0.95, "medium", "high")),
levels = c("low", "medium", "high"))
HMDA$mhist <- as.numeric(HMDA$mhist)
HMDA$chist <- as.numeric(HMDA$chist)
fm1 <- lm(I(as.numeric(deny) - 1) ~ afam + pirat + hirat + lvrat + chist + mhist +
phist + insurance + selfemp, data = HMDA)
fm2 <- glm(deny ~ afam + pirat + hirat + lvrat + chist + mhist + phist + insurance +
selfemp, family = binomial, data = HMDA)
fm3 <- glm(deny ~ afam + pirat + hirat + lvrat + chist + mhist + phist + insurance +
selfemp, family = binomial(link = "probit"), data = HMDA)
fm4 <- glm(deny ~ afam + pirat + hirat + lvrat + chist + mhist + phist + insurance +
selfemp + single + hschool + unemp, family = binomial(link = "probit"), data = HMDA)
fm5 <- glm(deny ~ afam + pirat + hirat + lvrat + chist + mhist + phist + insurance +
selfemp + single + hschool + unemp + condomin +
I(mhist==3) + I(mhist==4) + I(chist==3) + I(chist==4) + I(chist==5) + I(chist==6),
family = binomial(link = "probit"), data = HMDA)
fm6 <- glm(deny ~ afam * (pirat + hirat) + lvrat + chist + mhist + phist + insurance +
selfemp + single + hschool + unemp, family = binomial(link = "probit"), data = HMDA)
coeftest(fm1, vcov = sandwich)
fm4r <- update(fm4, . ~ . - single - hschool - unemp)
waldtest(fm4, fm4r, vcov = sandwich)
fm5r <- update(fm5, . ~ . - single - hschool - unemp)
waldtest(fm5, fm5r, vcov = sandwich)
fm6r <- update(fm6, . ~ . - single - hschool - unemp)
waldtest(fm6, fm6r, vcov = sandwich)
fm5r2 <- update(fm5, . ~ . - I(mhist==3) - I(mhist==4) - I(chist==3) - I(chist==4) -
I(chist==5) - I(chist==6))
waldtest(fm5, fm5r2, vcov = sandwich)
fm6r2 <- update(fm6, . ~ . - afam * (pirat + hirat) + pirat + hirat)
waldtest(fm6, fm6r2, vcov = sandwich)
fm6r3 <- update(fm6, . ~ . - afam * (pirat + hirat) + pirat + hirat + afam)
waldtest(fm6, fm6r3, vcov = sandwich)
#########################################################
## Shooting down the "More Guns Less Crime" hypothesis ##
#########################################################
## data
data("Guns", package = "AER")
## Empirical Exercise 10.1
fm1 <- lm(log(violent) ~ law, data = Guns)
fm2 <- lm(log(violent) ~ law + prisoners + density + income +
population + afam + cauc + male, data = Guns)
fm3 <- lm(log(violent) ~ law + prisoners + density + income +
population + afam + cauc + male + state, data = Guns)
fm4 <- lm(log(violent) ~ law + prisoners + density + income +
population + afam + cauc + male + state + year, data = Guns)
coeftest(fm1, vcov = sandwich)
coeftest(fm2, vcov = sandwich)
printCoefmat(coeftest(fm3, vcov = sandwich)[1:9,])
printCoefmat(coeftest(fm4, vcov = sandwich)[1:9,])
###########################
## US traffic fatalities ##
###########################
## data from Stock and Watson (2007)
data("Fatalities")
## add fatality rate (number of traffic deaths
## per 10,000 people living in that state in that year)
Fatalities$frate <- with(Fatalities, fatal/pop * 10000)
## add discretized version of minimum legal drinking age
Fatalities$drinkagec <- cut(Fatalities$drinkage,
breaks = 18:22, include.lowest = TRUE, right = FALSE)
Fatalities$drinkagec <- relevel(Fatalities$drinkagec, ref = 4)
## any punishment?
Fatalities$punish <- with(Fatalities,
factor(jail == "yes" | service == "yes", labels = c("no", "yes")))
## plm package
library("plm")
## for comparability with Stata we use HC1 below
## p. 351, Eq. (10.2)
f1982 <- subset(Fatalities, year == "1982")
fm_1982 <- lm(frate ~ beertax, data = f1982)
coeftest(fm_1982, vcov = vcovHC(fm_1982, type = "HC1"))
## p. 353, Eq. (10.3)
f1988 <- subset(Fatalities, year == "1988")
fm_1988 <- lm(frate ~ beertax, data = f1988)
coeftest(fm_1988, vcov = vcovHC(fm_1988, type = "HC1"))
## pp. 355, Eq. (10.8)
fm_diff <- lm(I(f1988$frate - f1982$frate) ~ I(f1988$beertax - f1982$beertax))
coeftest(fm_diff, vcov = vcovHC(fm_diff, type = "HC1"))
## pp. 360, Eq. (10.15)
## (1) via formula
fm_sfe <- lm(frate ~ beertax + state - 1, data = Fatalities)
## (2) by hand
fat <- with(Fatalities,
data.frame(frates = frate - ave(frate, state),
beertaxs = beertax - ave(beertax, state)))
fm_sfe2 <- lm(frates ~ beertaxs - 1, data = fat)
## (3) via plm()
fm_sfe3 <- plm(frate ~ beertax, data = Fatalities,
index = c("state", "year"), model = "within")
coeftest(fm_sfe, vcov = vcovHC(fm_sfe, type = "HC1"))[1,]
## uses different df in sd and p-value
coeftest(fm_sfe2, vcov = vcovHC(fm_sfe2, type = "HC1"))[1,]
## uses different df in p-value
coeftest(fm_sfe3, vcov = vcovHC(fm_sfe3, type = "HC1", method = "white1"))[1,]
## pp. 363, Eq. (10.21)
## via lm()
fm_stfe <- lm(frate ~ beertax + state + year - 1, data = Fatalities)
coeftest(fm_stfe, vcov = vcovHC(fm_stfe, type = "HC1"))[1,]
## via plm()
fm_stfe2 <- plm(frate ~ beertax, data = Fatalities,
index = c("state", "year"), model = "within", effect = "twoways")
coeftest(fm_stfe2, vcov = vcovHC) ## different
## p. 368, Table 10.1, numbers refer to cols.
fm1 <- plm(frate ~ beertax, data = Fatalities, index = c("state", "year"),
model = "pooling")
fm2 <- plm(frate ~ beertax, data = Fatalities, index = c("state", "year"),
model = "within")
fm3 <- plm(frate ~ beertax, data = Fatalities, index = c("state", "year"),
model = "within", effect = "twoways")
fm4 <- plm(frate ~ beertax + drinkagec + jail + service + miles + unemp + log(income),
data = Fatalities, index = c("state", "year"), model = "within", effect = "twoways")
fm5 <- plm(frate ~ beertax + drinkagec + jail + service + miles,
data = Fatalities, index = c("state", "year"), model = "within", effect = "twoways")
fm6 <- plm(frate ~ beertax + drinkage + punish + miles + unemp + log(income),
data = Fatalities, index = c("state", "year"), model = "within", effect = "twoways")
fm7 <- plm(frate ~ beertax + drinkagec + jail + service + miles + unemp + log(income),
data = Fatalities, index = c("state", "year"), model = "within", effect = "twoways")
## summaries not too close, s.e.s generally too small
coeftest(fm1, vcov = vcovHC)
coeftest(fm2, vcov = vcovHC)
coeftest(fm3, vcov = vcovHC)
coeftest(fm4, vcov = vcovHC)
coeftest(fm5, vcov = vcovHC)
coeftest(fm6, vcov = vcovHC)
coeftest(fm7, vcov = vcovHC)
######################################
## Cigarette consumption panel data ##
######################################
## data and transformations
data("CigarettesSW", package = "AER")
CigarettesSW$rprice <- with(CigarettesSW, price/cpi)
CigarettesSW$rincome <- with(CigarettesSW, income/population/cpi)
CigarettesSW$tdiff <- with(CigarettesSW, (taxs - tax)/cpi)
c1985 <- subset(CigarettesSW, year == "1985")
c1995 <- subset(CigarettesSW, year == "1995")
## convenience function: HC1 covariances
hc1 <- function(x) vcovHC(x, type = "HC1")
## Equations 12.9--12.11
fm_s1 <- lm(log(rprice) ~ tdiff, data = c1995)
coeftest(fm_s1, vcov = hc1)
fm_s2 <- lm(log(packs) ~ fitted(fm_s1), data = c1995)
fm_ivreg <- ivreg(log(packs) ~ log(rprice) | tdiff, data = c1995)
coeftest(fm_ivreg, vcov = hc1)
## Equation 12.15
fm_ivreg2 <- ivreg(log(packs) ~ log(rprice) + log(rincome) | log(rincome) + tdiff,
data = c1995)
coeftest(fm_ivreg2, vcov = hc1)
## Equation 12.16
fm_ivreg3 <- ivreg(log(packs) ~ log(rprice) + log(rincome) | log(rincome) + tdiff + I(tax/cpi),
data = c1995)
coeftest(fm_ivreg3, vcov = hc1)
## Table 12.1, p. 448
ydiff <- log(c1995$packs) - log(c1985$packs)
pricediff <- log(c1995$price/c1995$cpi) - log(c1985$price/c1985$cpi)
incdiff <- log(c1995$income/c1995$population/c1995$cpi) -
log(c1985$income/c1985$population/c1985$cpi)
taxsdiff <- (c1995$taxs - c1995$tax)/c1995$cpi - (c1985$taxs - c1985$tax)/c1985$cpi
taxdiff <- c1995$tax/c1995$cpi - c1985$tax/c1985$cpi
fm_diff1 <- ivreg(ydiff ~ pricediff + incdiff | incdiff + taxsdiff)
fm_diff2 <- ivreg(ydiff ~ pricediff + incdiff | incdiff + taxdiff)
fm_diff3 <- ivreg(ydiff ~ pricediff + incdiff | incdiff + taxsdiff + taxdiff)
coeftest(fm_diff1, vcov = hc1)
coeftest(fm_diff2, vcov = hc1)
coeftest(fm_diff3, vcov = hc1)
## checking instrument relevance
fm_rel1 <- lm(pricediff ~ taxsdiff + incdiff)
fm_rel2 <- lm(pricediff ~ taxdiff + incdiff)
fm_rel3 <- lm(pricediff ~ incdiff + taxsdiff + taxdiff)
linearHypothesis(fm_rel1, "taxsdiff = 0", vcov = hc1)
linearHypothesis(fm_rel2, "taxdiff = 0", vcov = hc1)
linearHypothesis(fm_rel3, c("taxsdiff = 0", "taxdiff = 0"), vcov = hc1)
## testing overidentifying restrictions (J test)
fm_or <- lm(residuals(fm_diff3) ~ incdiff + taxsdiff + taxdiff)
(fm_or_test <- linearHypothesis(fm_or, c("taxsdiff = 0", "taxdiff = 0"), test = "Chisq"))
## warning: df (and hence p-value) invalid above.
## correct df: # instruments - # endogenous variables
pchisq(fm_or_test[2,5], df.residual(fm_diff3) - df.residual(fm_or), lower.tail = FALSE)
#####################################################
## Project STAR: Student-teacher achievement ratio ##
#####################################################
## data
data("STAR", package = "AER")
## p. 488
fmk <- lm(I(readk + mathk) ~ stark, data = STAR)
fm1 <- lm(I(read1 + math1) ~ star1, data = STAR)
fm2 <- lm(I(read2 + math2) ~ star2, data = STAR)
fm3 <- lm(I(read3 + math3) ~ star3, data = STAR)
coeftest(fm3, vcov = sandwich)
## p. 489
fmke <- lm(I(readk + mathk) ~ stark + experiencek, data = STAR)
coeftest(fmke, vcov = sandwich)
## equivalently:
## - reshape data from wide into long format
## - fit a single model nested in grade
## (a) variables and their levels
nam <- c("star", "read", "math", "lunch", "school", "degree", "ladder",
"experience", "tethnicity", "system", "schoolid")
lev <- c("k", "1", "2", "3")
## (b) reshaping
star <- reshape(STAR, idvar = "id", ids = row.names(STAR),
times = lev, timevar = "grade", direction = "long",
varying = lapply(nam, function(x) paste(x, lev, sep = "")))
## (c) improve variable names and type
names(star)[5:15] <- nam
star$id <- factor(star$id)
star$grade <- factor(star$grade, levels = lev,
labels = c("kindergarten", "1st", "2nd", "3rd"))
rm(nam, lev)
## (d) model fitting
fm <- lm(I(read + math) ~ 0 + grade/star, data = star)
#################################################
## Quarterly US macroeconomic data (1957-2005) ##
#################################################
## data
data("USMacroSW", package = "AER")
library("dynlm")
usm <- ts.intersect(USMacroSW, 4 * 100 * diff(log(USMacroSW[, "cpi"])))
colnames(usm) <- c(colnames(USMacroSW), "infl")
## Equation 14.7, p. 536
fm_ar1 <- dynlm(d(infl) ~ L(d(infl)),
data = usm, start = c(1962,1), end = c(2004,4))
coeftest(fm_ar1, vcov = sandwich)
## Equation 14.13, p. 538
fm_ar4 <- dynlm(d(infl) ~ L(d(infl), 1:4),
data = usm, start = c(1962,1), end = c(2004,4))
coeftest(fm_ar4, vcov = sandwich)
## Equation 14.16, p. 542
fm_adl41 <- dynlm(d(infl) ~ L(d(infl), 1:4) + L(unemp),
data = usm, start = c(1962,1), end = c(2004,4))
coeftest(fm_adl41, vcov = sandwich)
## Equation 14.17, p. 542
fm_adl44 <- dynlm(d(infl) ~ L(d(infl), 1:4) + L(unemp, 1:4),
data = usm, start = c(1962,1), end = c(2004,4))
coeftest(fm_adl44, vcov = sandwich)
## Granger causality test mentioned on p. 547
waldtest(fm_ar4, fm_adl44, vcov = sandwich)
## Equation 14.28, p. 559
fm_sp1 <- dynlm(infl ~ log(gdpjp), start = c(1965,1), end = c(1981,4), data = usm)
coeftest(fm_sp1, vcov = sandwich)
## Equation 14.29, p. 559
fm_sp2 <- dynlm(infl ~ log(gdpjp), start = c(1982,1), end = c(2004,4), data = usm)
coeftest(fm_sp2, vcov = sandwich)
## Equation 14.34, p. 563: ADF by hand
fm_adf <- dynlm(d(infl) ~ L(infl) + L(d(infl), 1:4),
data = usm, start = c(1962,1), end = c(2004,4))
coeftest(fm_adf)
## Figure 14.5, p. 570
## SW perform partial break test of unemp coefs
## here full model is used
library("strucchange")
infl <- usm[, "infl"]
unemp <- usm[, "unemp"]
usm <- ts.intersect(diff(infl), lag(diff(infl), k = -1), lag(diff(infl), k = -2),
lag(diff(infl), k = -3), lag(diff(infl), k = -4), lag(unemp, k = -1),
lag(unemp, k = -2), lag(unemp, k = -3), lag(unemp, k = -4))
colnames(usm) <- c("dinfl", paste("dinfl", 1:4, sep = ""), paste("unemp", 1:4, sep = ""))
usm <- window(usm, start = c(1962, 1), end = c(2004, 4))
fs <- Fstats(dinfl ~ ., data = usm)
sctest(fs, type = "supF")
plot(fs)
## alternatively: re-use fm_adl44
mf <- model.frame(fm_adl44)
mf <- ts(as.matrix(mf), start = c(1962, 1), freq = 4)
colnames(mf) <- c("y", paste("x", 1:8, sep = ""))
ff <- as.formula(paste("y", "~", paste("x", 1:8, sep = "", collapse = " + ")))
fs <- Fstats(ff, data = mf, from = 0.1)
plot(fs)
lines(boundary(fs, alpha = 0.01), lty = 2, col = 2)
lines(boundary(fs, alpha = 0.1), lty = 3, col = 2)
##########################################
## Monthly US stock returns (1931-2002) ##
##########################################
## package and data
library("dynlm")
data("USStocksSW", package = "AER")
## Table 14.3, p. 540
fm1 <- dynlm(returns ~ L(returns), data = USStocksSW, start = c(1960,1))
coeftest(fm1, vcov = sandwich)
fm2 <- dynlm(returns ~ L(returns, 1:2), data = USStocksSW, start = c(1960,1))
waldtest(fm2, vcov = sandwich)
fm3 <- dynlm(returns ~ L(returns, 1:4), data = USStocksSW, start = c(1960,1))
waldtest(fm3, vcov = sandwich)
## Table 14.7, p. 574
fm4 <- dynlm(returns ~ L(returns) + L(d(dividend)),
data = USStocksSW, start = c(1960, 1))
fm5 <- dynlm(returns ~ L(returns, 1:2) + L(d(dividend), 1:2),
data = USStocksSW, start = c(1960, 1))
fm6 <- dynlm(returns ~ L(returns) + L(dividend),
data = USStocksSW, start = c(1960, 1))
##################################
## Price of frozen orange juice ##
##################################
## load data
data("FrozenJuice")
## Stock and Watson, p. 594
library("dynlm")
fm_dyn <- dynlm(d(100 * log(price/ppi)) ~ fdd, data = FrozenJuice)
coeftest(fm_dyn, vcov = vcovHC(fm_dyn, type = "HC1"))
## equivalently, returns can be computed 'by hand'
## (reducing the complexity of the formula notation)
fj <- ts.union(fdd = FrozenJuice[, "fdd"],
ret = 100 * diff(log(FrozenJuice[,"price"]/FrozenJuice[,"ppi"])))
fm_dyn <- dynlm(ret ~ fdd, data = fj)
## Stock and Watson, p. 595
fm_dl <- dynlm(ret ~ L(fdd, 0:6), data = fj)
coeftest(fm_dl, vcov = vcovHC(fm_dl, type = "HC1"))
## Stock and Watson, Table 15.1, p. 620, numbers refer to columns
## (1) Dynamic Multipliers
fm1 <- dynlm(ret ~ L(fdd, 0:18), data = fj)
coeftest(fm1, vcov = NeweyWest(fm1, lag = 7, prewhite = FALSE))
## (2) Cumulative Multipliers
fm2 <- dynlm(ret ~ L(d(fdd), 0:17) + L(fdd, 18), data = fj)
coeftest(fm2, vcov = NeweyWest(fm2, lag = 7, prewhite = FALSE))
## (3) Cumulative Multipliers, more lags in NW
coeftest(fm2, vcov = NeweyWest(fm2, lag = 14, prewhite = FALSE))
## (4) Cumulative Multipliers with monthly indicators
fm4 <- dynlm(ret ~ L(d(fdd), 0:17) + L(fdd, 18) + season(fdd), data = fj)
coeftest(fm4, vcov = NeweyWest(fm4, lag = 7, prewhite = FALSE))
## monthly indicators needed?
fm4r <- update(fm4, . ~ . - season(fdd))
waldtest(fm4, fm4r, vcov= NeweyWest(fm4, lag = 7, prewhite = FALSE)) ## close ...
#############################################
## New York Stock Exchange composite index ##
#############################################
## returns
data("NYSESW", package = "AER")
ret <- 100 * diff(log(NYSESW))
plot(ret)
## fit GARCH(1,1)
library("tseries")
fm <- garch(coredata(ret))
Results
R version 3.3.1 (2016-06-21) -- "Bug in Your Hair"
Copyright (C) 2016 The R Foundation for Statistical Computing
Platform: x86_64-pc-linux-gnu (64-bit)
R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.
R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.
Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.
> library(AER)
Loading required package: car
Loading required package: lmtest
Loading required package: zoo
Attaching package: 'zoo'
The following objects are masked from 'package:base':
as.Date, as.Date.numeric
Loading required package: sandwich
Loading required package: survival
> png(filename="/home/ddbj/snapshot/RGM3/R_CC/result/AER/StockWatson2007.Rd_%03d_medium.png", width=480, height=480)
> ### Name: StockWatson2007
> ### Title: Data and Examples from Stock and Watson (2007)
> ### Aliases: StockWatson2007
> ### Keywords: datasets
>
> ### ** Examples
>
> ###############################
> ## Current Population Survey ##
> ###############################
>
> ## p. 165
> data("CPSSWEducation", package = "AER")
> plot(earnings ~ education, data = CPSSWEducation)
> fm <- lm(earnings ~ education, data = CPSSWEducation)
> coeftest(fm, vcov = sandwich)
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -3.134371 0.925571 -3.3864 0.0007174 ***
education 1.466925 0.071918 20.3972 < 2.2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
> abline(fm)
>
>
> ############################
> ## California test scores ##
> ############################
>
> ## data and transformations
> data("CASchools", package = "AER")
> CASchools$stratio <- with(CASchools, students/teachers)
> CASchools$score <- with(CASchools, (math + read)/2)
>
> ## p. 152
> fm1 <- lm(score ~ stratio, data = CASchools)
> coeftest(fm1, vcov = sandwich)
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 698.93295 10.33966 67.597 < 2.2e-16 ***
stratio -2.27981 0.51825 -4.399 1.382e-05 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
>
> ## p. 159
> fm2 <- lm(score ~ I(stratio < 20), data = CASchools)
> ## p. 199
> fm3 <- lm(score ~ stratio + english, data = CASchools)
> ## p. 224
> fm4 <- lm(score ~ stratio + expenditure + english, data = CASchools)
>
> ## Table 7.1, p. 242 (numbers refer to columns)
> fmc3 <- lm(score ~ stratio + english + lunch, data = CASchools)
> fmc4 <- lm(score ~ stratio + english + calworks, data = CASchools)
> fmc5 <- lm(score ~ stratio + english + lunch + calworks, data = CASchools)
>
> ## Equation 8.2, p. 258
> fmquad <- lm(score ~ income + I(income^2), data = CASchools)
> ## Equation 8.11, p. 266
> fmcub <- lm(score ~ income + I(income^2) + I(income^3), data = CASchools)
> ## Equation 8.23, p. 272
> fmloglog <- lm(log(score) ~ log(income), data = CASchools)
> ## Equation 8.24, p. 274
> fmloglin <- lm(log(score) ~ income, data = CASchools)
> ## Equation 8.26, p. 275
> fmlinlogcub <- lm(score ~ log(income) + I(log(income)^2) + I(log(income)^3),
+ data = CASchools)
>
> ## Table 8.3, p. 292 (numbers refer to columns)
> fmc2 <- lm(score ~ stratio + english + lunch + log(income), data = CASchools)
> fmc7 <- lm(score ~ stratio + I(stratio^2) + I(stratio^3) + english + lunch + log(income),
+ data = CASchools)
>
>
> #####################################
> ## Economics journal Subscriptions ##
> #####################################
>
> ## data and transformed variables
> data("Journals", package = "AER")
> journals <- Journals[, c("subs", "price")]
> journals$citeprice <- Journals$price/Journals$citations
> journals$age <- 2000 - Journals$foundingyear
> journals$chars <- Journals$charpp*Journals$pages/10^6
>
> ## Figure 8.9 (a) and (b)
> plot(subs ~ citeprice, data = journals, pch = 19)
> plot(log(subs) ~ log(citeprice), data = journals, pch = 19)
> fm1 <- lm(log(subs) ~ log(citeprice), data = journals)
> abline(fm1)
>
> ## Table 8.2, use HC1 for comparability with Stata
> fm1 <- lm(subs ~ citeprice, data = log(journals))
> fm2 <- lm(subs ~ citeprice + age + chars, data = log(journals))
> fm3 <- lm(subs ~ citeprice + I(citeprice^2) + I(citeprice^3) +
+ age + I(age * citeprice) + chars, data = log(journals))
> fm4 <- lm(subs ~ citeprice + age + I(age * citeprice) + chars, data = log(journals))
> coeftest(fm1, vcov = vcovHC(fm1, type = "HC1"))
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 4.766212 0.055258 86.253 < 2.2e-16 ***
citeprice -0.533053 0.033959 -15.697 < 2.2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
> coeftest(fm2, vcov = vcovHC(fm2, type = "HC1"))
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 3.206648 0.379725 8.4447 1.102e-14 ***
citeprice -0.407718 0.043717 -9.3262 < 2.2e-16 ***
age 0.423649 0.119064 3.5581 0.0004801 ***
chars 0.205614 0.097751 2.1035 0.0368474 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
> coeftest(fm3, vcov = vcovHC(fm3, type = "HC1"))
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 3.4075956 0.3735992 9.1210 < 2.2e-16 ***
citeprice -0.9609365 0.1601349 -6.0008 1.121e-08 ***
I(citeprice^2) 0.0165099 0.0254886 0.6477 0.518015
I(citeprice^3) 0.0036666 0.0055147 0.6649 0.507008
age 0.3730539 0.1176966 3.1696 0.001805 **
I(age * citeprice) 0.1557773 0.0518947 3.0018 0.003081 **
chars 0.2346178 0.0977318 2.4006 0.017428 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
> coeftest(fm4, vcov = vcovHC(fm4, type = "HC1"))
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 3.433521 0.367471 9.3436 < 2.2e-16 ***
citeprice -0.898910 0.144648 -6.2144 3.656e-09 ***
age 0.373515 0.117527 3.1781 0.0017529 **
I(age * citeprice) 0.140959 0.040199 3.5065 0.0005769 ***
chars 0.229466 0.096493 2.3781 0.0184822 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
> waldtest(fm3, fm4, vcov = vcovHC(fm3, type = "HC1"))
Wald test
Model 1: subs ~ citeprice + I(citeprice^2) + I(citeprice^3) + age + I(age *
citeprice) + chars
Model 2: subs ~ citeprice + age + I(age * citeprice) + chars
Res.Df Df F Pr(>F)
1 173
2 175 -2 0.249 0.7799
>
>
> ###############################
> ## Massachusetts test scores ##
> ###############################
>
> ## compare Massachusetts with California
> data("MASchools", package = "AER")
> data("CASchools", package = "AER")
> CASchools$stratio <- with(CASchools, students/teachers)
> CASchools$score4 <- with(CASchools, (math + read)/2)
>
> ## parts of Table 9.1, p. 330
> vars <- c("score4", "stratio", "english", "lunch", "income")
> cbind(
+ CA_mean = sapply(CASchools[, vars], mean),
+ CA_sd = sapply(CASchools[, vars], sd),
+ MA_mean = sapply(MASchools[, vars], mean),
+ MA_sd = sapply(MASchools[, vars], sd))
CA_mean CA_sd MA_mean MA_sd
score4 654.15655 19.053347 709.827273 15.126474
stratio 19.64043 1.891812 17.344091 2.276666
english 15.76816 18.285927 1.117676 2.900940
lunch 44.70524 27.123381 15.315909 15.060068
income 15.31659 7.225890 18.746764 5.807637
>
> ## Table 9.2, pp. 332--333, numbers refer to columns
> MASchools$higheng <- with(MASchools, english > median(english))
> fm1 <- lm(score4 ~ stratio, data = MASchools)
> fm2 <- lm(score4 ~ stratio + english + lunch + log(income), data = MASchools)
> fm3 <- lm(score4 ~ stratio + english + lunch + income + I(income^2) + I(income^3),
+ data = MASchools)
> fm4 <- lm(score4 ~ stratio + I(stratio^2) + I(stratio^3) + english + lunch +
+ income + I(income^2) + I(income^3), data = MASchools)
> fm5 <- lm(score4 ~ stratio + higheng + I(higheng * stratio) + lunch +
+ income + I(income^2) + I(income^3), data = MASchools)
> fm6 <- lm(score4 ~ stratio + lunch + income + I(income^2) + I(income^3),
+ data = MASchools)
>
> ## for comparability with Stata use HC1 below
> coeftest(fm1, vcov = vcovHC(fm1, type = "HC1"))
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 739.62113 8.60727 85.9298 < 2.2e-16 ***
stratio -1.71781 0.49906 -3.4421 0.000692 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
> coeftest(fm2, vcov = vcovHC(fm2, type = "HC1"))
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 682.431602 11.497244 59.3561 < 2.2e-16 ***
stratio -0.689179 0.269973 -2.5528 0.01138 *
english -0.410745 0.306377 -1.3407 0.18145
lunch -0.521465 0.077659 -6.7148 1.653e-10 ***
log(income) 16.529359 3.145722 5.2546 3.566e-07 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
> coeftest(fm3, vcov = vcovHC(fm3, type = "HC1"))
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 7.4403e+02 2.1318e+01 34.9016 < 2.2e-16 ***
stratio -6.4091e-01 2.6848e-01 -2.3872 0.01785 *
english -4.3712e-01 3.0332e-01 -1.4411 0.15103
lunch -5.8182e-01 9.7353e-02 -5.9764 9.488e-09 ***
income -3.0667e+00 2.3525e+00 -1.3036 0.19379
I(income^2) 1.6369e-01 8.5330e-02 1.9183 0.05641 .
I(income^3) -2.1793e-03 9.7033e-04 -2.2459 0.02574 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
> coeftest(fm4, vcov = vcovHC(fm4, type = "HC1"))
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 665.4960529 81.3317629 8.1825 2.600e-14 ***
stratio 12.4259758 14.0101690 0.8869 0.37613
I(stratio^2) -0.6803029 0.7365191 -0.9237 0.35671
I(stratio^3) 0.0114737 0.0126663 0.9058 0.36605
english -0.4341659 0.2997883 -1.4482 0.14903
lunch -0.5872165 0.1040207 -5.6452 5.283e-08 ***
income -3.3815370 2.4906830 -1.3577 0.17602
I(income^2) 0.1741018 0.0892596 1.9505 0.05244 .
I(income^3) -0.0022883 0.0010078 -2.2706 0.02418 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
> coeftest(fm5, vcov = vcovHC(fm5, type = "HC1"))
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 759.9142249 23.2331213 32.7082 < 2.2e-16 ***
stratio -1.0176806 0.3703923 -2.7476 0.006521 **
highengTRUE -12.5607339 9.7934588 -1.2826 0.201046
I(higheng * stratio) 0.7986123 0.5552225 1.4384 0.151805
lunch -0.7085098 0.0908442 -7.7992 2.785e-13 ***
income -3.8665072 2.4884002 -1.5538 0.121721
I(income^2) 0.1841250 0.0898247 2.0498 0.041612 *
I(income^3) -0.0023364 0.0010153 -2.3013 0.022349 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
> coeftest(fm6, vcov = vcovHC(fm6, type = "HC1"))
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 7.4736e+02 2.0278e+01 36.8563 < 2e-16 ***
stratio -6.7188e-01 2.7128e-01 -2.4767 0.01404 *
lunch -6.5308e-01 7.2980e-02 -8.9487 < 2e-16 ***
income -3.2180e+00 2.3057e+00 -1.3956 0.16427
I(income^2) 1.6479e-01 8.4634e-02 1.9471 0.05284 .
I(income^3) -2.1550e-03 9.6995e-04 -2.2218 0.02735 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
>
> ## Testing exclusion of groups of variables
> fm3r <- update(fm3, . ~ . - I(income^2) - I(income^3))
> waldtest(fm3, fm3r, vcov = vcovHC(fm3, type = "HC1"))
Wald test
Model 1: score4 ~ stratio + english + lunch + income + I(income^2) + I(income^3)
Model 2: score4 ~ stratio + english + lunch + income
Res.Df Df F Pr(>F)
1 213
2 215 -2 7.7448 0.0005664 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
>
> fm4r_str1 <- update(fm4, . ~ . - stratio - I(stratio^2) - I(stratio^3))
> waldtest(fm4, fm4r_str1, vcov = vcovHC(fm4, type = "HC1"))
Wald test
Model 1: score4 ~ stratio + I(stratio^2) + I(stratio^3) + english + lunch +
income + I(income^2) + I(income^3)
Model 2: score4 ~ english + lunch + income + I(income^2) + I(income^3)
Res.Df Df F Pr(>F)
1 211
2 214 -3 2.8565 0.03809 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
> fm4r_str2 <- update(fm4, . ~ . - I(stratio^2) - I(stratio^3))
> waldtest(fm4, fm4r_str2, vcov = vcovHC(fm4, type = "HC1"))
Wald test
Model 1: score4 ~ stratio + I(stratio^2) + I(stratio^3) + english + lunch +
income + I(income^2) + I(income^3)
Model 2: score4 ~ stratio + english + lunch + income + I(income^2) + I(income^3)
Res.Df Df F Pr(>F)
1 211
2 213 -2 0.4463 0.6406
> fm4r_inc <- update(fm4, . ~ . - I(income^2) - I(income^3))
> waldtest(fm4, fm4r_inc, vcov = vcovHC(fm4, type = "HC1"))
Wald test
Model 1: score4 ~ stratio + I(stratio^2) + I(stratio^3) + english + lunch +
income + I(income^2) + I(income^3)
Model 2: score4 ~ stratio + I(stratio^2) + I(stratio^3) + english + lunch +
income
Res.Df Df F Pr(>F)
1 211
2 213 -2 7.7487 0.0005657 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
>
> fm5r_str <- update(fm5, . ~ . - stratio - I(higheng * stratio))
> waldtest(fm5, fm5r_str, vcov = vcovHC(fm5, type = "HC1"))
Wald test
Model 1: score4 ~ stratio + higheng + I(higheng * stratio) + lunch + income +
I(income^2) + I(income^3)
Model 2: score4 ~ higheng + lunch + income + I(income^2) + I(income^3)
Res.Df Df F Pr(>F)
1 212
2 214 -2 4.0062 0.0196 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
> fm5r_inc <- update(fm5, . ~ . - I(income^2) - I(income^3))
> waldtest(fm5, fm5r_inc, vcov = vcovHC(fm5, type = "HC1"))
Wald test
Model 1: score4 ~ stratio + higheng + I(higheng * stratio) + lunch + income +
I(income^2) + I(income^3)
Model 2: score4 ~ stratio + higheng + I(higheng * stratio) + lunch + income
Res.Df Df F Pr(>F)
1 212
2 214 -2 5.8468 0.003375 **
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
> fm5r_high <- update(fm5, . ~ . - higheng - I(higheng * stratio))
> waldtest(fm5, fm5r_high, vcov = vcovHC(fm5, type = "HC1"))
Wald test
Model 1: score4 ~ stratio + higheng + I(higheng * stratio) + lunch + income +
I(income^2) + I(income^3)
Model 2: score4 ~ stratio + lunch + income + I(income^2) + I(income^3)
Res.Df Df F Pr(>F)
1 212
2 214 -2 1.5835 0.2077
>
> fm6r_inc <- update(fm6, . ~ . - I(income^2) - I(income^3))
> waldtest(fm6, fm6r_inc, vcov = vcovHC(fm6, type = "HC1"))
Wald test
Model 1: score4 ~ stratio + lunch + income + I(income^2) + I(income^3)
Model 2: score4 ~ stratio + lunch + income
Res.Df Df F Pr(>F)
1 214
2 216 -2 6.5479 0.001737 **
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
>
>
> ##################################
> ## Home mortgage disclosure act ##
> ##################################
>
> ## data
> data("HMDA", package = "AER")
>
> ## 11.1, 11.3, 11.7, 11.8 and 11.10, pp. 387--395
> fm1 <- lm(I(as.numeric(deny) - 1) ~ pirat, data = HMDA)
> fm2 <- lm(I(as.numeric(deny) - 1) ~ pirat + afam, data = HMDA)
> fm3 <- glm(deny ~ pirat, family = binomial(link = "probit"), data = HMDA)
> fm4 <- glm(deny ~ pirat + afam, family = binomial(link = "probit"), data = HMDA)
> fm5 <- glm(deny ~ pirat + afam, family = binomial(link = "logit"), data = HMDA)
>
> ## Table 11.1, p. 401
> mean(HMDA$pirat)
[1] 0.3308136
> mean(HMDA$hirat)
[1] 0.2553461
> mean(HMDA$lvrat)
[1] 0.7377759
> mean(as.numeric(HMDA$chist))
[1] 2.116387
> mean(as.numeric(HMDA$mhist))
[1] 1.721008
> mean(as.numeric(HMDA$phist)-1)
[1] 0.07352941
> prop.table(table(HMDA$insurance))
no yes
0.97983193 0.02016807
> prop.table(table(HMDA$selfemp))
no yes
0.8836134 0.1163866
> prop.table(table(HMDA$single))
no yes
0.6067227 0.3932773
> prop.table(table(HMDA$hschool))
no yes
0.01638655 0.98361345
> mean(HMDA$unemp)
[1] 3.774496
> prop.table(table(HMDA$condomin))
no yes
0.7117647 0.2882353
> prop.table(table(HMDA$afam))
no yes
0.857563 0.142437
> prop.table(table(HMDA$deny))
no yes
0.8802521 0.1197479
>
> ## Table 11.2, pp. 403--404, numbers refer to columns
> HMDA$lvrat <- factor(ifelse(HMDA$lvrat < 0.8, "low",
+ ifelse(HMDA$lvrat >= 0.8 & HMDA$lvrat <= 0.95, "medium", "high")),
+ levels = c("low", "medium", "high"))
> HMDA$mhist <- as.numeric(HMDA$mhist)
> HMDA$chist <- as.numeric(HMDA$chist)
>
> fm1 <- lm(I(as.numeric(deny) - 1) ~ afam + pirat + hirat + lvrat + chist + mhist +
+ phist + insurance + selfemp, data = HMDA)
> fm2 <- glm(deny ~ afam + pirat + hirat + lvrat + chist + mhist + phist + insurance +
+ selfemp, family = binomial, data = HMDA)
> fm3 <- glm(deny ~ afam + pirat + hirat + lvrat + chist + mhist + phist + insurance +
+ selfemp, family = binomial(link = "probit"), data = HMDA)
> fm4 <- glm(deny ~ afam + pirat + hirat + lvrat + chist + mhist + phist + insurance +
+ selfemp + single + hschool + unemp, family = binomial(link = "probit"), data = HMDA)
> fm5 <- glm(deny ~ afam + pirat + hirat + lvrat + chist + mhist + phist + insurance +
+ selfemp + single + hschool + unemp + condomin +
+ I(mhist==3) + I(mhist==4) + I(chist==3) + I(chist==4) + I(chist==5) + I(chist==6),
+ family = binomial(link = "probit"), data = HMDA)
> fm6 <- glm(deny ~ afam * (pirat + hirat) + lvrat + chist + mhist + phist + insurance +
+ selfemp + single + hschool + unemp, family = binomial(link = "probit"), data = HMDA)
> coeftest(fm1, vcov = sandwich)
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.1829933 0.0276089 -6.6281 4.197e-11 ***
afamyes 0.0836967 0.0225101 3.7182 0.0002053 ***
pirat 0.4487963 0.1133333 3.9600 7.717e-05 ***
hirat -0.0480227 0.1093055 -0.4393 0.6604528
lvratmedium 0.0314498 0.0127097 2.4745 0.0134125 *
lvrathigh 0.1890511 0.0500520 3.7771 0.0001626 ***
chist 0.0307716 0.0045737 6.7279 2.150e-11 ***
mhist 0.0209104 0.0112637 1.8565 0.0635134 .
phistyes 0.1970876 0.0348005 5.6634 1.664e-08 ***
insuranceyes 0.7018841 0.0450008 15.5972 < 2.2e-16 ***
selfempyes 0.0598438 0.0204759 2.9227 0.0035035 **
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
>
> fm4r <- update(fm4, . ~ . - single - hschool - unemp)
> waldtest(fm4, fm4r, vcov = sandwich)
Wald test
Model 1: deny ~ afam + pirat + hirat + lvrat + chist + mhist + phist +
insurance + selfemp + single + hschool + unemp
Model 2: deny ~ afam + pirat + hirat + lvrat + chist + mhist + phist +
insurance + selfemp
Res.Df Df F Pr(>F)
1 2366
2 2369 -3 5.9463 0.0004893 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
> fm5r <- update(fm5, . ~ . - single - hschool - unemp)
> waldtest(fm5, fm5r, vcov = sandwich)
Wald test
Model 1: deny ~ afam + pirat + hirat + lvrat + chist + mhist + phist +
insurance + selfemp + single + hschool + unemp + condomin +
I(mhist == 3) + I(mhist == 4) + I(chist == 3) + I(chist ==
4) + I(chist == 5) + I(chist == 6)
Model 2: deny ~ afam + pirat + hirat + lvrat + chist + mhist + phist +
insurance + selfemp + condomin + I(mhist == 3) + I(mhist ==
4) + I(chist == 3) + I(chist == 4) + I(chist == 5) + I(chist ==
6)
Res.Df Df F Pr(>F)
1 2359
2 2362 -3 5.1601 0.001482 **
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
> fm6r <- update(fm6, . ~ . - single - hschool - unemp)
> waldtest(fm6, fm6r, vcov = sandwich)
Wald test
Model 1: deny ~ afam * (pirat + hirat) + lvrat + chist + mhist + phist +
insurance + selfemp + single + hschool + unemp
Model 2: deny ~ afam + pirat + hirat + lvrat + chist + mhist + phist +
insurance + selfemp + afam:pirat + afam:hirat
Res.Df Df F Pr(>F)
1 2364
2 2367 -3 5.8876 0.0005316 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
>
> fm5r2 <- update(fm5, . ~ . - I(mhist==3) - I(mhist==4) - I(chist==3) - I(chist==4) -
+ I(chist==5) - I(chist==6))
> waldtest(fm5, fm5r2, vcov = sandwich)
Wald test
Model 1: deny ~ afam + pirat + hirat + lvrat + chist + mhist + phist +
insurance + selfemp + single + hschool + unemp + condomin +
I(mhist == 3) + I(mhist == 4) + I(chist == 3) + I(chist ==
4) + I(chist == 5) + I(chist == 6)
Model 2: deny ~ afam + pirat + hirat + lvrat + chist + mhist + phist +
insurance + selfemp + single + hschool + unemp + condomin
Res.Df Df F Pr(>F)
1 2359
2 2365 -6 1.2305 0.2873
>
> fm6r2 <- update(fm6, . ~ . - afam * (pirat + hirat) + pirat + hirat)
> waldtest(fm6, fm6r2, vcov = sandwich)
Wald test
Model 1: deny ~ afam * (pirat + hirat) + lvrat + chist + mhist + phist +
insurance + selfemp + single + hschool + unemp
Model 2: deny ~ lvrat + chist + mhist + phist + insurance + selfemp +
single + hschool + unemp + pirat + hirat
Res.Df Df F Pr(>F)
1 2364
2 2367 -3 4.9217 0.00207 **
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
>
> fm6r3 <- update(fm6, . ~ . - afam * (pirat + hirat) + pirat + hirat + afam)
> waldtest(fm6, fm6r3, vcov = sandwich)
Wald test
Model 1: deny ~ afam * (pirat + hirat) + lvrat + chist + mhist + phist +
insurance + selfemp + single + hschool + unemp
Model 2: deny ~ lvrat + chist + mhist + phist + insurance + selfemp +
single + hschool + unemp + pirat + hirat + afam
Res.Df Df F Pr(>F)
1 2364
2 2366 -2 0.2634 0.7685
>
>
>
> #########################################################
> ## Shooting down the "More Guns Less Crime" hypothesis ##
> #########################################################
>
> ## data
> data("Guns", package = "AER")
>
> ## Empirical Exercise 10.1
> fm1 <- lm(log(violent) ~ law, data = Guns)
> fm2 <- lm(log(violent) ~ law + prisoners + density + income +
+ population + afam + cauc + male, data = Guns)
> fm3 <- lm(log(violent) ~ law + prisoners + density + income +
+ population + afam + cauc + male + state, data = Guns)
> fm4 <- lm(log(violent) ~ law + prisoners + density + income +
+ population + afam + cauc + male + state + year, data = Guns)
> coeftest(fm1, vcov = sandwich)
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 6.134919 0.019287 318.078 < 2.2e-16 ***
lawyes -0.442965 0.047488 -9.328 < 2.2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
> coeftest(fm2, vcov = sandwich)
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 2.9817e+00 6.0668e-01 4.9149 1.016e-06 ***
lawyes -3.6839e-01 3.4654e-02 -10.6304 < 2.2e-16 ***
prisoners 1.6126e-03 1.8000e-04 8.9591 < 2.2e-16 ***
density 2.6688e-02 1.4294e-02 1.8671 0.062142 .
income 1.2051e-06 7.2498e-06 0.1662 0.868007
population 4.2710e-02 3.1345e-03 13.6255 < 2.2e-16 ***
afam 8.0853e-02 1.9916e-02 4.0598 5.241e-05 ***
cauc 3.1200e-02 9.6897e-03 3.2200 0.001317 **
male 8.8709e-03 1.2014e-02 0.7384 0.460435
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
> printCoefmat(coeftest(fm3, vcov = sandwich)[1:9,])
Estimate Std. Error t value Pr(>|t|)
(Intercept) 4.0368e+00 3.7479e-01 10.7708 < 2.2e-16 ***
lawyes -4.6141e-02 1.9435e-02 -2.3741 0.01776 *
prisoners -7.1008e-05 9.4831e-05 -0.7488 0.45414
density -1.7229e-01 1.0221e-01 -1.6857 0.09213 .
income -9.2037e-06 6.5619e-06 -1.4026 0.16102
population 1.1525e-02 9.4572e-03 1.2186 0.22325
afam 1.0428e-01 1.6133e-02 6.4636 1.526e-10 ***
cauc 4.0861e-02 5.2487e-03 7.7850 1.585e-14 ***
male -5.0273e-02 7.5923e-03 -6.6215 5.518e-11 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
> printCoefmat(coeftest(fm4, vcov = sandwich)[1:9,])
Estimate Std. Error t value Pr(>|t|)
(Intercept) 3.9720e+00 4.3322e-01 9.1685 < 2.2e-16 ***
lawyes -2.7994e-02 1.8692e-02 -1.4976 0.1345
prisoners 7.5994e-05 8.0008e-05 0.9498 0.3424
density -9.1555e-02 6.2588e-02 -1.4628 0.1438
income 9.5859e-07 6.9440e-06 0.1380 0.8902
population -4.7545e-03 6.4673e-03 -0.7351 0.4624
afam 2.9186e-02 2.0298e-02 1.4379 0.1507
cauc 9.2500e-03 8.2188e-03 1.1255 0.2606
male 7.3326e-02 1.8116e-02 4.0475 5.542e-05 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
>
>
> ###########################
> ## US traffic fatalities ##
> ###########################
>
> ## data from Stock and Watson (2007)
> data("Fatalities")
> ## add fatality rate (number of traffic deaths
> ## per 10,000 people living in that state in that year)
> Fatalities$frate <- with(Fatalities, fatal/pop * 10000)
> ## add discretized version of minimum legal drinking age
> Fatalities$drinkagec <- cut(Fatalities$drinkage,
+ breaks = 18:22, include.lowest = TRUE, right = FALSE)
> Fatalities$drinkagec <- relevel(Fatalities$drinkagec, ref = 4)
> ## any punishment?
> Fatalities$punish <- with(Fatalities,
+ factor(jail == "yes" | service == "yes", labels = c("no", "yes")))
> ## plm package
> library("plm")
Loading required package: Formula
>
> ## for comparability with Stata we use HC1 below
> ## p. 351, Eq. (10.2)
> f1982 <- subset(Fatalities, year == "1982")
> fm_1982 <- lm(frate ~ beertax, data = f1982)
> coeftest(fm_1982, vcov = vcovHC(fm_1982, type = "HC1"))
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 2.01038 0.14957 13.4408 <2e-16 ***
beertax 0.14846 0.13261 1.1196 0.2687
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
>
> ## p. 353, Eq. (10.3)
> f1988 <- subset(Fatalities, year == "1988")
> fm_1988 <- lm(frate ~ beertax, data = f1988)
> coeftest(fm_1988, vcov = vcovHC(fm_1988, type = "HC1"))
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.85907 0.11461 16.2205 < 2.2e-16 ***
beertax 0.43875 0.12786 3.4314 0.001279 **
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
>
> ## pp. 355, Eq. (10.8)
> fm_diff <- lm(I(f1988$frate - f1982$frate) ~ I(f1988$beertax - f1982$beertax))
> coeftest(fm_diff, vcov = vcovHC(fm_diff, type = "HC1"))
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.072037 0.065355 -1.1022 0.276091
I(f1988$beertax - f1982$beertax) -1.040973 0.355006 -2.9323 0.005229 **
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
>
> ## pp. 360, Eq. (10.15)
> ## (1) via formula
> fm_sfe <- lm(frate ~ beertax + state - 1, data = Fatalities)
> ## (2) by hand
> fat <- with(Fatalities,
+ data.frame(frates = frate - ave(frate, state),
+ beertaxs = beertax - ave(beertax, state)))
> fm_sfe2 <- lm(frates ~ beertaxs - 1, data = fat)
> ## (3) via plm()
> fm_sfe3 <- plm(frate ~ beertax, data = Fatalities,
+ index = c("state", "year"), model = "within")
>
> coeftest(fm_sfe, vcov = vcovHC(fm_sfe, type = "HC1"))[1,]
Estimate Std. Error t value Pr(>|t|)
-0.655873722 0.203279719 -3.226459220 0.001398372
>
> ## uses different df in sd and p-value
> coeftest(fm_sfe2, vcov = vcovHC(fm_sfe2, type = "HC1"))[1,]
Estimate Std. Error t value Pr(>|t|)
-0.655873722 0.188153627 -3.485841496 0.000555894
>
> ## uses different df in p-value
> coeftest(fm_sfe3, vcov = vcovHC(fm_sfe3, type = "HC1", method = "white1"))[1,]
Estimate Std. Error t value Pr(>|t|)
-0.6558737222 0.1881536274 -3.4858414958 0.0005673213
>
>
> ## pp. 363, Eq. (10.21)
> ## via lm()
> fm_stfe <- lm(frate ~ beertax + state + year - 1, data = Fatalities)
> coeftest(fm_stfe, vcov = vcovHC(fm_stfe, type = "HC1"))[1,]
Estimate Std. Error t value Pr(>|t|)
-0.6399800 0.2547149 -2.5125346 0.0125470
> ## via plm()
> fm_stfe2 <- plm(frate ~ beertax, data = Fatalities,
+ index = c("state", "year"), model = "within", effect = "twoways")
> coeftest(fm_stfe2, vcov = vcovHC) ## different
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
beertax -0.63998 0.34963 -1.8305 0.06824 .
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
>
>
> ## p. 368, Table 10.1, numbers refer to cols.
> fm1 <- plm(frate ~ beertax, data = Fatalities, index = c("state", "year"),
+ model = "pooling")
> fm2 <- plm(frate ~ beertax, data = Fatalities, index = c("state", "year"),
+ model = "within")
> fm3 <- plm(frate ~ beertax, data = Fatalities, index = c("state", "year"),
+ model = "within", effect = "twoways")
> fm4 <- plm(frate ~ beertax + drinkagec + jail + service + miles + unemp + log(income),
+ data = Fatalities, index = c("state", "year"), model = "within", effect = "twoways")
> fm