'data.frame': 2756 obs. of 20 variables: $ school : int 1 1 1 1 1 1 1 1 1 1 ... $ alc7 : int 1 1 1 7 3 6 1 5 4 3 ... $ rskreb71: int 1 3 1 2 1 NA 1 2 1 2 ... $ rskreb72: int NA NA NA NA NA NA NA 3 2 3 ... $ rskreb73: int NA NA NA NA NA NA NA 2 1 2 ... $ rskreb74: int NA NA NA NA NA NA NA 3 2 4 ... $ likepa71: int 4 2 3 3 2 NA 1 4 3 3 ... $ likepa72: int 5 2 4 2 2 NA 5 3 3 2 ... $ likepa73: int 4 1 3 3 2 NA 1 3 2 3 ... $ likepa74: int 5 3 1 5 4 4 3 4 3 2 ... $ likepa75: int 4 4 4 4 3 3 4 4 3 3 ... $ posatt71: int 1 1 1 1 1 2 1 NA NA NA ... $ posatt72: int 1 2 1 1 1 2 4 NA NA NA ... $ posatt73: int 1 1 1 1 1 2 1 NA NA NA ... $ alc8 : int 1 8 4 8 5 7 1 3 5 3 ... $ rskreb81: int 1 4 1 2 2 3 2 3 1 4 ... $ rskreb82: int NA NA NA NA NA NA NA 3 1 4 ... $ rskreb83: int NA NA NA NA NA NA NA 2 1 2 ... $ rskreb84: int NA NA NA NA NA NA NA 3 2 4 ... $ alc9 : int 3 NA 7 NA 5 7 NA 6 6 7 ...
Dataset data.graham.ex6:
'data.frame': 2756 obs. of 9 variables: $ school : int 1 1 1 1 1 1 1 1 1 1 ... $ program : int 0 0 0 0 0 0 0 0 0 0 ... $ alc7 : int 1 1 1 7 3 6 1 5 4 3 ... $ riskreb7: int 1 3 1 2 1 NA 1 2 1 2 ... $ likepar7: int 4 2 3 3 2 NA 1 4 3 3 ... $ posatt7 : int 1 1 1 1 1 2 1 NA NA NA ... $ alc8 : int 1 8 4 8 5 7 1 3 5 3 ... $ riskreb8: int 1 4 1 2 2 3 2 3 1 4 ... $ alc9 : int 3 NA 7 NA 5 7 NA 6 6 7 ...
Dataset data.graham.ex8a:
'data.frame': 1023 obs. of 20 variables: $ skill1 : int 28 29 27 29 29 NA NA NA 29 NA ... $ skill2 : int NA NA 29 29 NA NA NA NA NA 21 ... $ skill3 : int NA NA 29 29 29 NA 28 10 29 25 ... $ skill4 : int NA 29 25 29 29 28 29 NA NA NA ... $ skill5 : int 29 29 28 28 29 NA 29 10 NA 25 ... $ iplanV1: int 14 18 15 17 16 NA NA NA 18 NA ... $ iplanV2: int NA NA 17 16 NA NA NA NA NA 16 ... $ iplanV3: int NA NA 16 18 18 NA 17 1 18 16 ... $ iplanV4: int NA 18 14 18 14 6 18 NA NA NA ... $ iplanV5: int 13 18 12 18 18 NA 18 3 NA 5 ... $ planA1 : int 1 0 2 8 3 NA NA NA 7 NA ... $ planA2 : int NA NA 0 4 NA NA NA NA NA 6 ... $ planA3 : int NA NA 1 4 7 NA 2 0 1 7 ... $ planA4 : int NA 8 0 4 6 0 0 NA NA NA ... $ planA5 : int 0 7 1 5 7 NA 2 0 NA 6 ... $ planV1 : int NA NA NA NA NA NA NA NA NA NA ... $ planV2 : int NA NA NA NA NA NA NA NA NA 1 ... $ planV3 : int NA NA 1 NA NA NA NA 0 NA 1 ... $ planV4 : int NA NA NA NA 2 NA NA NA NA NA ... $ planV5 : int 2 NA 2 NA NA NA NA 0 NA NA ...
Dataset data.graham.ex8b:
'data.frame': 2570 obs. of 6 variables: $ rskreb71: int 1 3 1 2 1 NA 1 2 1 2 ... $ rskreb72: int NA NA NA NA NA NA NA 3 2 3 ... $ posatt71: int 1 1 1 1 1 2 1 NA NA NA ... $ posatt72: int 1 2 1 1 1 2 4 NA NA NA ... $ posatt73: int 1 1 1 1 1 2 1 NA NA NA ... $ posatt : int 3 4 3 3 3 6 6 NA NA NA ...
Dataset data.graham.ex8c:
'data.frame': 2756 obs. of 16 variables: $ s1 : int 1 1 1 1 1 1 1 1 1 1 ... $ s2 : int 0 0 0 0 0 0 0 0 0 0 ... $ s3 : int 0 0 0 0 0 0 0 0 0 0 ... $ s4 : int 0 0 0 0 0 0 0 0 0 0 ... $ s5 : int 0 0 0 0 0 0 0 0 0 0 ... $ s6 : int 0 0 0 0 0 0 0 0 0 0 ... $ s7 : int 0 0 0 0 0 0 0 0 0 0 ... $ s8 : int 0 0 0 0 0 0 0 0 0 0 ... $ s9 : int 0 0 0 0 0 0 0 0 0 0 ... $ s10 : int 0 0 0 0 0 0 0 0 0 0 ... $ s11 : int 0 0 0 0 0 0 0 0 0 0 ... $ xalc7 : int 1 1 1 7 3 6 1 5 4 3 ... $ rskreb72: int NA NA NA NA NA NA NA 3 2 3 ... $ likepa71: int 4 2 3 3 2 NA 1 4 3 3 ... $ posatt71: int 1 1 1 1 1 2 1 NA NA NA ... $ alc8 : int 1 8 4 8 5 7 1 3 5 3 ...
Source
The datasets were downloaded from
http://methodology.psu.edu/pubs/books/missing.
References
Graham, J. W. (2012). Missing data. New York: Springer.
Examples
## Not run:
library(mitools)
library(mice)
library(Amelia)
library(jomo)
#############################################################################
# EXAMPLE 1: data.graham.8a | Imputation under multivariate normal model
#############################################################################
data(data.graham.ex8a)
dat <- data.graham.ex8a
dat <- dat[,1:10]
vars <- colnames(dat)
V <- length(vars)
# remove persons with completely missing data
dat <- dat[ rowMeans( is.na(dat) ) < 1 , ]
summary(dat)
# some descriptive statistics
psych::describe(dat)
#**************
# imputation under a multivariate normal model
M <- 7 # number of imputations
#--------- mice package
# define imputation method
impM <- rep("norm" , V)
names(impM) <- vars
# mice imputation
imp1a <- mice::mice( dat , imputationMethod=impM , m=M , maxit=4 )
summary(imp1a)
# convert into a list of datasets
datlist1a <- miceadds::mids2datlist(imp1a)
#--------- Amelia package
imp1b <- Amelia::amelia( dat , m=M )
summary(imp1b)
datlist1b <- imp1b$imputations
#--------- jomo package
imp1c <- jomo::jomo1con(Y = dat , nburn=100, nbetween=10, nimp=M)
str(imp1c)
# convert into a list of datasets
datlist1c <- miceadds::jomo2datlist(imp1c)
# alternatively one can use the jomo wrapper function
imp1c1 <- jomo::jomo(Y = dat , nburn=100, nbetween=10, nimp=M)
#############################################################################
# EXAMPLE 2: data.graham.8b | Imputation with categorical variables
#############################################################################
data(data.graham.ex8b)
dat <- data.graham.ex8b
vars <- colnames(dat)
V <- length(vars)
# descriptive statistics
psych::describe(dat)
#*******************************
# imputation in mice using predictive mean matching
imp1a <- mice::mice( dat , m=5 , maxit=10)
datlist1a <- mitools::imputationList( miceadds::mids2datlist(imp1a) )
print(datlist1a)
#*******************************
# imputation in jomo treating all variables as categorical
# Note that variables must have values from 1 to N
# use categorize function from sirt package here
dat.categ <- sirt::categorize( dat , categorical=colnames(dat) , lowest=1 )
dat0 <- dat.categ$data
# imputation in jomo treating all variables as categorical
Y_numcat <- apply( dat0 , 2 , max , na.rm=TRUE )
imp1b <- jomo::jomo1cat(Y.cat = dat0, Y.numcat = Y_numcat, nburn=100,
nbetween=10, nimp=5)
# recode original categories
datlist1b <- sirt::decategorize( imp1b , categ_design = dat.categ$categ_design )
# convert into a list of datasets
datlist1b <- miceadds::jomo2datlist(datlist1b)
datlist1b <- mitools::imputationList( datlist1b )
# Alternatively, jomo can be used but categorical variables must be
# declared as factors
dat <- dat0
# define two variables as factors
vars <- scan.vec(" rskreb71 rskreb72")
for (vv in vars){
dat[, vv] <- as.factor( dat[,vv] )
}
# use jomo
imp1b1 <- jomo::jomo(Y = dat , nburn=30, nbetween=10, nimp=5)
#****************************
# compare frequency tables for both imputation packages
fun_prop <- function( variable ){
t1 <- table(variable)
t1 / sum(t1)
}
# variable rskreb71
res1a <- with( datlist1a , fun_prop(rskreb71) )
res1b <- with( datlist1b , fun_prop(rskreb71) )
summary( miceadds::NMIcombine(qhat = res1a , NMI = FALSE ) )
summary( miceadds::NMIcombine(qhat = res1b , NMI = FALSE ) )
# variable posatt
res2a <- with( datlist1a , fun_prop(posatt) )
res2b <- with( datlist1b , fun_prop(posatt) )
summary( miceadds::NMIcombine(qhat = res2a , NMI = FALSE ) )
summary( miceadds::NMIcombine(qhat = res2b , NMI = FALSE ) )
## End(Not run)