Last data update: 2014.03.03

R: Dataset Reading
data.readR Documentation

Dataset Reading

Description

This dataset contains N=328 students and I=12 items measuring reading competence. All 12 items are arranged into 3 testlets (items with common text stimulus) labeled as A, B and C. The allocation of items to testlets is indicated by their variable names.

Usage

data(data.read)

Format

A data frame with 328 persons on the following 12 variables. Rows correspond to persons and columns to items. The following items are included in data.read:

Testlet A: A1, A2, A3, A4

Testlet B: B1, B2, B3, B4

Testlet C: C1, C2, C3, C4

Examples

## Not run: 
data(data.read)
dat <- data.read
I <- ncol(dat)

# list of needed packages for the following examples
packages <- scan(what="character")
     eRm  ltm  TAM mRm  CDM  mirt psychotools  IsingFit  igraph  qgraph  pcalg 
     poLCA  randomLCA psychomix MplusAutomation lavaan
        
# load packages. make an installation if necessary     
miceadds::library_install(packages)

#*****************************************************
# Model 1: Rasch model
#*****************************************************

#----  M1a: rasch.mml2 (in sirt)
mod1a <- sirt::rasch.mml2(dat)
summary(mod1a)

#----  M1b: smirt (in sirt)
Qmatrix <- matrix(1,nrow=I , ncol=1)
mod1b <- sirt::smirt(dat,Qmatrix=Qmatrix)
summary(mod1b)

#----  M1c: gdm (in CDM)
theta.k <- seq(-6,6,len=21)
mod1c <- CDM::gdm(dat,theta.k=theta.k,irtmodel="1PL", skillspace="normal")
summary(mod1c)

#----  M1d: tam.mml (in TAM)
mod1d <- TAM::tam.mml( resp=dat )
summary(mod1d)

#----  M1e: RM (in eRm) 
mod1e <- eRm::RM( dat )
  # eRm uses Conditional Maximum Likelihood (CML) as the estimation method.
summary(mod1e)
eRm::plotPImap(mod1e)

#----  M1f: mrm (in mRm)
mod1f <- mRm::mrm( dat , cl=1)   # CML estimation
mod1f$beta  # item parameters

#----  M1g: mirt (in mirt)
mod1g <- mirt::mirt( dat , model=1 , itemtype="Rasch" , verbose=TRUE )
print(mod1g)
summary(mod1g)
coef(mod1g)
    # arrange coefficients in nicer layout
mirt.wrapper.coef(mod1g)$coef  

#----  M1h: rasch (in ltm)
mod1h <- ltm::rasch( dat  , control=list(verbose=TRUE ) )
summary(mod1h)
coef(mod1h)

#----  M1i: RaschModel.fit (in psychotools)
mod1i <- psychotools::RaschModel.fit(dat)  # CML estimation
summary(mod1i)
plot(mod1i)

#----  M1j: noharm.sirt (in sirt)
Fpatt <- matrix( 0 , I , 1 )
Fval <- 1 + 0*Fpatt
Ppatt <- Pval <- matrix(1,1,1)
mod1j <- sirt::noharm.sirt( dat=dat , Ppatt=Ppatt,Fpatt=Fpatt , Fval=Fval , Pval=Pval )
summary(mod1j)
  #   Normal-ogive model, multiply item discriminations with constant D=1.7. 
  #   The same holds for other examples with noharm.sirt and R2noharm.
plot(mod1j) 

#----  M1k: rasch.pml3 (in sirt)
mod1k <- sirt::rasch.pml3( dat=dat)
  #         pairwise marginal maximum likelihood estimation
summary(mod1k)

#----  M1l: running Mplus (using MplusAutomation package)
mplus_path <- "c:/Mplus7/Mplus.exe"    # locate Mplus executable
  # specify Mplus object
mplusmod <- MplusAutomation::mplusObject(
    TITLE = "1PL in Mplus ;" , 
    VARIABLE = paste0( "CATEGORICAL ARE " , paste0(colnames(dat),collapse=" ") ) , 
    MODEL = "
       ! fix all item loadings to 1
       F1 BY A1@1 A2@1 A3@1 A4@1 ;
       F1 BY B1@1 B2@1 B3@1 B4@1 ;
       F1 BY C1@1 C2@1 C3@1 C4@1 ;
       ! estimate variance
       F1 ;
            ",
    ANALYSIS = "ESTIMATOR=MLR;" , 
    OUTPUT = "stand;" ,
    usevariables = colnames(dat)  ,  rdata = dat )
  # write Mplus syntax
filename <- "mod1u"   # specify file name
  # create Mplus syntaxes
res2 <- MplusAutomation::mplusModeler(object = mplusmod , dataout = paste0(filename,".dat") , 
               modelout= paste0(filename,".inp"), run = 0 )         
  # run Mplus model
MplusAutomation::runModels( filefilter = paste0(filename,".inp"), Mplus_command = mplus_path)
  # alternatively, the system() command can also be used
  # get results
mod1l <- MplusAutomation::readModels(target = getwd() , filefilter = filename )
mod1l$summaries    # summaries
mod1l$parameters$unstandardized   # parameter estimates

#*****************************************************
# Model 2: 2PL model
#*****************************************************

#----  M2a: rasch.mml2 (in sirt)
mod2a <- sirt::rasch.mml2(dat , est.a=1:I)
summary(mod2a)

#----  M2b: smirt (in sirt)
mod2b <- sirt::smirt(dat,Qmatrix=Qmatrix,est.a="2PL")
summary(mod2b)

#----  M2c: gdm (in CDM)
mod2c <- CDM::gdm(dat,theta.k=theta.k,irtmodel="2PL", skillspace="normal")
summary(mod2c)

#----  M2d: tam.mml (in TAM)
mod2d <- TAM::tam.mml.2pl( resp=dat )
summary(mod2d)

#----  M2e: mirt (in mirt)
mod2e <- mirt::mirt( dat , model=1 , itemtype="2PL" )
print(mod2e)
summary(mod2e)
mirt.wrapper.coef(mod1g)$coef

#----  M2f: ltm (in ltm)
mod2f <- ltm::ltm( dat ~ z1 , control=list(verbose=TRUE ) )
summary(mod2f)
coef(mod2f)
plot(mod2f)

#----  M2g: R2noharm (in NOHARM, running from within R using sirt package)
  # define noharm.path where 'NoharmCL.exe' is located
noharm.path <- "c:/NOHARM"
  # covariance matrix
P.pattern <- matrix( 1 , ncol=1 , nrow=1 )
P.init <- P.pattern
P.init[1,1] <- 1
  # loading matrix
F.pattern <- matrix(1,I,1)
F.init <- F.pattern
  # estimate model
mod2g <- sirt::R2noharm( dat = dat , model.type="CFA" , F.pattern = F.pattern , 
             F.init = F.init , P.pattern = P.pattern , P.init = P.init , 
             writename = "ex2g" , noharm.path = noharm.path  , dec ="," )
summary(mod2g)

#----  M2h: noharm.sirt (in sirt)
mod2h <- sirt::noharm.sirt( dat=dat , Ppatt=P.pattern,Fpatt=F.pattern , 
              Fval=F.init , Pval=P.init )
summary(mod2h)
plot(mod2h)

#----  M2i: rasch.pml2 (in sirt)
mod2i <- sirt::rasch.pml2(dat, est.a=1:I)
summary(mod2i)

#----  M2j: WLSMV estimation with cfa (in lavaan)
lavmodel <- "F =~ A1+A2+A3+A4+B1+B2+B3+B4+
                        C1+C2+C3+C4"
mod2j <- lavaan::cfa( data=dat , model=lavmodel, std.lv = TRUE, ordered=colnames(dat))
summary(mod2j , standardized=TRUE , fit.measures=TRUE , rsquare=TRUE)

#*****************************************************
# Model 3: 3PL model (note that results can be quite unstable!)
#*****************************************************

#----  M3a: rasch.mml2 (in sirt)
mod3a <- sirt::rasch.mml2(dat , est.a=1:I, est.c=1:I)
summary(mod3a)

#----  M3b: smirt (in sirt)
mod3b <- sirt::smirt(dat,Qmatrix=Qmatrix,est.a="2PL" , est.c=1:I)
summary(mod3b)

#----  M3c: mirt (in mirt)
mod3c <- mirt::mirt( dat , model=1 , itemtype="3PL" , verbose=TRUE)
summary(mod3c)
coef(mod3c)
  # stabilize parameter estimating using informative priors for guessing parameters
mirtmodel <- mirt::mirt.model("
            F = 1-12
            PRIOR = (1-12, g, norm, -1.38, 0.25)
            ")
  # a prior N(-1.38,.25) is specified for transformed guessing parameters: qlogis(g)
  # simulate values from this prior for illustration
N <- 100000
logit.g <- stats::rnorm(N, mean=-1.38 , sd=sqrt(.5) )
graphics::plot( stats::density(logit.g) )  # transformed qlogis(g)
graphics::plot( stats::density( stats::plogis(logit.g)) )  # g parameters
  # estimate 3PL with priors
mod3c1 <- mirt::mirt(dat, mirtmodel, itemtype = "3PL",verbose=TRUE)
coef(mod3c1)
  # In addition, set upper bounds for g parameters of .35
mirt.pars <- mirt::mirt( dat , mirtmodel , itemtype = "3PL" ,  pars="values")
ind <- which( mirt.pars$name == "g" )
mirt.pars[ ind , "value" ] <- stats::plogis(-1.38)
mirt.pars[ ind , "ubound" ] <- .35
  # prior distribution for slopes
ind <- which( mirt.pars$name == "a1" )
mirt.pars[ ind , "prior_1" ] <- 1.3
mirt.pars[ ind , "prior_2" ] <- 2
mod3c2 <- mirt::mirt(dat, mirtmodel, itemtype = "3PL",
                pars=mirt.pars,verbose=TRUE , technical=list(NCYCLES=100) )
coef(mod3c2)
mirt.wrapper.coef(mod3c2)

#----  M3d: ltm (in ltm)
mod3d <- ltm::tpm( dat , control=list(verbose=TRUE ) , max.guessing=.3)
summary(mod3d)
coef(mod3d) # => numerical instabilities

#*****************************************************
# Model 4: 3-dimensional Rasch model
#*****************************************************

# define Q-matrix
Q <- matrix( 0 , nrow=12 , ncol=3 )
Q[ cbind(1:12 , rep(1:3,each=4) ) ] <- 1
rownames(Q) <- colnames(dat)
colnames(Q) <- c("A","B","C")

# define nodes
theta.k <- seq(-6,6,len=13 )

#----  M4a: smirt (in sirt)
mod4a <- sirt::smirt(dat,Qmatrix=Q,irtmodel="comp" , theta.k=theta.k , maxiter=30)
summary(mod4a)

#----  M4b: rasch.mml2 (in sirt)
mod4b <- sirt::rasch.mml2(dat,Q=Q,theta.k=theta.k , mmliter=30)
summary(mod4b)

#----  M4c: gdm (in CDM)
mod4c <- CDM::gdm( dat , irtmodel="1PL" , theta.k=theta.k , skillspace="normal" , 
            Qmatrix=Q , maxiter=30 , centered.latent=TRUE )
summary(mod4c)

#----  M4d: tam.mml (in TAM)
mod4d <- TAM::tam.mml( resp=dat , Q=Q , control=list(nodes=theta.k , maxiter=30) )
summary(mod4d)

#----  M4e: R2noharm (in NOHARM, running from within R using sirt package)
noharm.path <- "c:/NOHARM"
  # covariance matrix
P.pattern <- matrix( 1 , ncol=3 , nrow=3 )
P.init <- 0.8+0*P.pattern
diag(P.init) <- 1
  # loading matrix
F.pattern <- 0*Q
F.init <- Q
  # estimate model
mod4e <- sirt::R2noharm( dat = dat , model.type="CFA" , F.pattern = F.pattern , 
    F.init = F.init , P.pattern = P.pattern , P.init = P.init , 
    writename = "ex4e" , noharm.path = noharm.path  , dec ="," )
summary(mod4e)

#----  M4f: mirt (in mirt)
cmodel <- mirt::mirt.model("
     F1 = 1-4
     F2 = 5-8 
     F3 = 9-12
     # equal item slopes correspond to the Rasch model
     CONSTRAIN = (1-4, a1), (5-8, a2) , (9-12,a3)
     COV = F1*F2, F1*F3 , F2*F3 
     " )
mod4f <- mirt::mirt(dat, cmodel , verbose=TRUE)
summary(mod4f)

#*****************************************************
# Model 5: 3-dimensional 2PL model
#*****************************************************

#----  M5a: smirt (in sirt)
mod5a <- sirt::smirt(dat,Qmatrix=Q,irtmodel="comp" , est.a="2PL" , theta.k=theta.k , 
                 maxiter=30)
summary(mod5a)

#----  M5b: rasch.mml2 (in sirt)
mod5b <- sirt::rasch.mml2(dat,Q=Q,theta.k=theta.k ,est.a=1:12, mmliter=30)
summary(mod5b)

#----  M5c: gdm (in CDM)
mod5c <- CDM::gdm( dat , irtmodel="2PL" , theta.k=theta.k , skillspace="loglinear" , 
            Qmatrix=Q , maxiter=30 , centered.latent=TRUE ,
            standardized.latent=TRUE)
summary(mod5c)

#----  M5d: tam.mml (in TAM)
mod5d <- TAM::tam.mml.2pl( resp=dat , Q=Q , control=list(nodes=theta.k , maxiter=30) )
summary(mod5d)

#----  M5e: R2noharm (in NOHARM, running from within R using sirt package)
noharm.path <- "c:/NOHARM"
  # covariance matrix
P.pattern <- matrix( 1 , ncol=3 , nrow=3 )
diag(P.pattern) <- 0
P.init <- 0.8+0*P.pattern
diag(P.init) <- 1
  # loading matrix
F.pattern <- Q
F.init <- Q
  # estimate model
mod5e <- sirt::R2noharm( dat = dat , model.type="CFA" , F.pattern = F.pattern , 
    F.init = F.init , P.pattern = P.pattern , P.init = P.init , 
    writename = "ex5e" , noharm.path = noharm.path  , dec ="," )
summary(mod5e)

#----  M5f: mirt (in mirt)
cmodel <- mirt::mirt.model("
   F1 = 1-4
   F2 = 5-8
   F3 = 9-12
   COV = F1*F2, F1*F3 , F2*F3 
   "  )
mod5f <- mirt::mirt(dat, cmodel , verbose=TRUE)
summary(mod5f)

#*****************************************************
# Model 6: Network models (Graphical models)
#*****************************************************

#----  M6a: Ising model using the IsingFit package (undirected graph)
#        - fit Ising model using the "OR rule" (AND=FALSE)
mod6a <- IsingFit::IsingFit(x=dat, family="binomial" , AND=FALSE)
summary(mod6a)
##           Network Density:                 0.29 
##    Gamma:                  0.25 
##    Rule used:              Or-rule 
# plot results
qgraph::qgraph(mod6a$weiadj,fade = FALSE)

#**-- graph estimation using pcalg package

# some packages from Bioconductor must be downloaded at first (if not yet done)
if (FALSE){  # set 'if (TRUE)' if packages should be downloaded 
     source("http://bioconductor.org/biocLite.R")
     biocLite("RBGL")
     biocLite("Rgraphviz")
	}

#----  M6b: graph estimation based on Pearson correlations
V <- colnames(dat)
n <- nrow(dat)
mod6b <- pcalg::pc(suffStat = list(C = stats::cor(dat), n = n ),
             indepTest = gaussCItest, ## indep.test: partial correlations
             alpha=0.05, labels = V, verbose = TRUE)
plot(mod6b)
# plot in qgraph package
qgraph::qgraph(mod6b , label.color= rep( c( "red" , "blue","darkgreen" ) , each=4 ) ,
         edge.color="black")
summary(mod6b)

#----  M6c: graph estimation based on tetrachoric correlations
mod6c <- pcalg::pc(suffStat = list(C = tetrachoric2(dat)$rho, n = n ),
             indepTest = gaussCItest, alpha=0.05, labels = V, verbose = TRUE)
plot(mod6c)
summary(mod6c)

#----  M6d: Statistical implicative analysis (in sirt)
mod6d <- sirt::sia.sirt(dat , significance=.85 )
  # plot results with igraph and qgraph package
plot( mod6d$igraph.obj  , vertex.shape="rectangle" , vertex.size=30 )
qgraph::qgraph( mod6d$adj.matrix )

#*****************************************************
# Model 7: Latent class analysis with 3 classes
#*****************************************************

#----  M7a: randomLCA (in randomLCA)
  #        - use two trials of starting values
mod7a <- randomLCA::randomLCA(dat,nclass=3, notrials=2, verbose=TRUE)
summary(mod7a)
plot(mod7a,type="l" , xlab="Item")

#----  M7b: rasch.mirtlc (in sirt)
mod7b <- sirt::rasch.mirtlc( dat , Nclasses = 3 ,seed= -30 ,  nstarts=2 )   
summary(mod7b)
matplot( t(mod7b$pjk) , type="l" , xlab="Item" )

#----  M7c: poLCA (in poLCA)
  #   define formula for outcomes
f7c <- paste0( "cbind(" , paste0(colnames(dat),collapse=",") , ") ~ 1 " )
dat1 <- as.data.frame( dat + 1 ) # poLCA needs integer values from 1,2,..
mod7c <- poLCA::poLCA( stats::as.formula(f7c),dat1,nclass=3 , verbose=TRUE)
plot(mod7c)

#----  M7d: gom.em (in sirt) 
  #    - the latent class model is a special grade of membership model
mod7d <- sirt::gom.em( dat , K=3 , problevels=c(0,1) ,  model="GOM"  )            
summary(mod7d)

#---- - M7e: mirt (in mirt)
  # define three latent classes
Theta <- diag(3)
  # define mirt model
I <- ncol(dat)  # I = 12
mirtmodel <- mirt::mirt.model("
        C1 = 1-12
        C2 = 1-12 
        C3 = 1-12
        ")
  # get initial parameter values
mod.pars <- mirt::mirt(dat, model=mirtmodel ,  pars = "values")   
  # modify parameters: only slopes refer to item-class probabilities
set.seed(9976)
  # set starting values for class specific item probabilities
mod.pars[ mod.pars$name == "d" ,"value" ]  <- 0
mod.pars[ mod.pars$name == "d" ,"est" ]  <- FALSE
b1 <- stats::qnorm( colMeans( dat ) )
mod.pars[ mod.pars$name == "a1" ,"value" ]  <- b1
  # random starting values for other classes
mod.pars[ mod.pars$name %in% c("a2","a3") ,"value" ]  <- b1 + stats::runif( 12*2 , -1 ,1 )
mod.pars
  #** define prior for latent class analysis
lca_prior <- function(Theta,Etable){
  # number of latent Theta classes
  TP <- nrow(Theta)
  # prior in initial iteration
  if ( is.null(Etable) ){ prior <- rep( 1/TP , TP ) }    
  # process Etable (this is correct for datasets without missing data)
  if ( ! is.null(Etable) ){  
    # sum over correct and incorrect expected responses 
    prior <- ( rowSums(Etable[ , seq(1,2*I,2)]) + rowSums(Etable[,seq(2,2*I,2)]) )/I
                 }
  prior <- prior / sum(prior)  
  return(prior)
}
  #** estimate model
mod7e <- mirt::mirt(dat, mirtmodel , pars = mod.pars , verbose=TRUE , 
            technical = list( customTheta=Theta , customPriorFun = lca_prior) )
  # compare estimated results
print(mod7e)
summary(mod7b)
  # The number of estimated parameters is incorrect because mirt does not correctly count
  # estimated parameters from the user customized  prior distribution.
mod7e@nest <- as.integer(sum(mod.pars$est) + 2)  # two additional class probabilities
  # extract log-likelihood
mod7e@logLik
  # compute AIC and BIC
( AIC <- -2*mod7e@logLik+2*mod7e@nest )
( BIC <- -2*mod7e@logLik+log(mod7e@Data$N)*mod7e@nest )
  # RMSEA and SRMSR fit statistic
mirt::M2(mod7e)     # TLI and CFI does not make sense in this example            
  #** extract item parameters
mirt.wrapper.coef(mod7e)
  #** extract class-specific item-probabilities
probs <- apply( coef1[ , c("a1","a2","a3") ] , 2 , stats::plogis )
matplot( probs , type="l" , xlab="Item" , main="mirt::mirt")
  #** inspect estimated distribution
mod7e@Theta
mod7e@Prior[[1]]

#*****************************************************
# Model 8: Mixed Rasch model with two classes
#*****************************************************

#----  M8a: raschmix (in psychomix)
mod8a <- psychomix::raschmix(data= as.matrix(dat) , k = 2, scores = "saturated")
summary(mod8a)

#----  M8b: mrm (in mRm)
mod8b <- mRm::mrm(data.matrix=dat, cl=2)
mod8b$conv.to.bound
plot(mod8b)
print(mod8b)

#----  M8c: mirt (in mirt)
  #* define theta grid
theta.k <- seq( -5 , 5 , len=9 )
TP <- length(theta.k)
Theta <- matrix( 0 , nrow=2*TP , ncol=4)
Theta[1:TP,1:2] <- cbind(theta.k , 1 )
Theta[1:TP + TP,3:4] <- cbind(theta.k , 1 )
Theta
  # define model
I <- ncol(dat)  # I = 12
mirtmodel <- mirt::mirt.model("
        F1a = 1-12  # slope Class 1
        F1b = 1-12  # difficulty Class 1
        F2a = 1-12  # slope Class 2
        F2b = 1-12  # difficulty Class 2
        CONSTRAIN = (1-12,a1),(1-12,a3)
        ")
  # get initial parameter values
mod.pars <- mirt::mirt(dat, model=mirtmodel ,  pars = "values")   
  # set starting values for class specific item probabilities
mod.pars[ mod.pars$name == "d" ,"value" ]  <- 0
mod.pars[ mod.pars$name == "d" ,"est" ]  <- FALSE
mod.pars[ mod.pars$name == "a1" ,"value" ]  <- 1
mod.pars[ mod.pars$name == "a3" ,"value" ]  <- 1
  # initial values difficulties
b1 <-  stats::qlogis( colMeans(dat) )
mod.pars[ mod.pars$name == "a2" ,"value" ]  <- b1
mod.pars[ mod.pars$name == "a4" ,"value" ]  <- b1 + stats::runif(I , -1 , 1)
  #* define prior for mixed Rasch analysis
mixed_prior <- function(Theta,Etable){
  NC <- 2   # number of theta classes
  TP <- nrow(Theta) / NC
  prior1 <- stats::dnorm( Theta[1:TP,1] )
  prior1 <- prior1 / sum(prior1) 
  if ( is.null(Etable) ){   prior <- c( prior1 , prior1 ) }
  if ( ! is.null(Etable) ){  
    prior <- ( rowSums( Etable[ , seq(1,2*I,2)] ) + 
                   rowSums( Etable[,seq(2,2*I,2)]) )/I
    a1 <- stats::aggregate( prior , list( rep(1:NC , each=TP) ) , sum )
    a1[,2] <- a1[,2] / sum( a1[,2])
    # print some information during estimation
    cat( paste0( " Class proportions: " , 
              paste0( round(a1[,2] , 3 ) , collapse= " " ) ) , "\n")
    a1 <- rep( a1[,2] , each=TP )
    # specify mixture of two normal distributions
    prior <- a1*c(prior1,prior1)
         }  
  prior <- prior / sum(prior)  
  return(prior)
     }
  #* estimate model
mod8c <- mirt::mirt(dat, mirtmodel , pars=mod.pars , verbose=TRUE ,
        technical = list(  customTheta=Theta , customPriorFun = mixed_prior ) )
  # Like in Model 7e, the number of estimated parameters must be included.
mod8c@nest <- as.integer(sum(mod.pars$est) + 1)  
      # two class proportions and therefore one probability is freely estimated.
  #* extract item parameters
mirt.wrapper.coef(mod8c)
  #* estimated distribution
mod8c@Theta
mod8c@Prior

#----  M8d: tamaan (in TAM)

tammodel <- "
ANALYSIS:
  TYPE=MIXTURE ;
  NCLASSES(2);
  NSTARTS(7,20);
LAVAAN MODEL:
  F =~ A1__C4
  F ~~ F
ITEM TYPE:
  ALL(Rasch);
    "    
mod8d <- TAM::tamaan( tammodel , resp=dat )
summary(mod8d)
# plot item parameters
I <- 12
ipars <- mod8d$itempartable_MIXTURE[ 1:I , ]
plot( 1:I , ipars[,3] , type="o" , ylim= range( ipars[,3:4] ) , pch=16 ,
        xlab="Item" , ylab="Item difficulty")
lines( 1:I , ipars[,4] , type="l", col=2 , lty=2)
points( 1:I , ipars[,4] ,  col=2 , pch=2)

#*****************************************************
# Model 9: Mixed 2PL model with two classes
#*****************************************************

#----  M9a: tamaan (in TAM)

tammodel <- "
ANALYSIS:
  TYPE=MIXTURE ;
  NCLASSES(2);
  NSTARTS(10,30);
LAVAAN MODEL:
  F =~ A1__C4
  F ~~ F
ITEM TYPE:
  ALL(2PL);
    "       
mod9a <- TAM::tamaan( tammodel , resp=dat )
summary(mod9a)

#*****************************************************
# Model 10: Rasch testlet model
#*****************************************************

#----  M10a: tam.fa (in TAM)
dims <- substring( colnames(dat),1,1 )  # define dimensions
mod10a <- TAM::tam.fa( resp=dat , irtmodel="bifactor1" , dims=dims ,
                control=list(maxiter=60) )
summary(mod10a)

#----  M10b: mirt (in mirt)
cmodel <- mirt::mirt.model("
        G = 1-12 
        A = 1-4
        B = 5-8
        C = 9-12
        CONSTRAIN = (1-12,a1), (1-4, a2), (5-8, a3) , (9-12,a4)    
      ")
mod10b <- mirt::mirt(dat, model=cmodel , verbose=TRUE)
summary(mod10b)
coef(mod10b)
mod10b@logLik   # equivalent is slot( mod10b , "logLik")

#alternatively, using a dimensional reduction approach (faster and better accuracy)
cmodel <- mirt::mirt.model("
      G = 1-12
      CONSTRAIN = (1-12,a1), (1-4, a2), (5-8, a3) , (9-12,a4)
     ")
item_bundles <- rep(c(1,2,3), each = 4)
mod10b1 <- mirt::bfactor(dat, model=item_bundles, model2=cmodel , verbose=TRUE)
coef(mod10b1)

#----  M10c: smirt (in sirt)
  # define Q-matrix
Qmatrix <- matrix(0,12,4)
Qmatrix[,1] <- 1
Qmatrix[ cbind( 1:12 , match( dims , unique(dims)) +1 ) ]  <- 1
  # uncorrelated factors
variance.fixed <- cbind( c(1,1,1,2,2,3) , c(2,3,4,3,4,4) , 0 )
  # estimate model
mod10c <- sirt::smirt( dat , Qmatrix=Qmatrix , irtmodel="comp" ,
              variance.fixed=variance.fixed , qmcnodes=1000 , maxiter=60)
summary(mod10c)

#*****************************************************
# Model 11: Bifactor model
#*****************************************************

#----  M11a: tam.fa (in TAM)
dims <- substring( colnames(dat),1,1 )  # define dimensions
mod11a <- TAM::tam.fa( resp=dat , irtmodel="bifactor2" , dims=dims ,
                 control=list(maxiter=60) )
summary(mod11a)

#----  M11b: bfactor (in mirt)
dims1 <- match( dims , unique(dims) )
mod11b <- mirt::bfactor(dat, model=dims1 , verbose=TRUE)
summary(mod11b)
coef(mod11b)
mod11b@logLik

#----  M11c: smirt (in sirt)
  # define Q-matrix
Qmatrix <- matrix(0,12,4)
Qmatrix[,1] <- 1
Qmatrix[ cbind( 1:12 , match( dims , unique(dims)) +1 ) ]  <- 1
  # uncorrelated factors
variance.fixed <- cbind( c(1,1,1,2,2,3) , c(2,3,4,3,4,4) , 0 )
  # estimate model
mod11c <- sirt::smirt( dat , Qmatrix=Qmatrix , irtmodel="comp" , est.a="2PL" ,  
                variance.fixed=variance.fixed , qmcnodes=1000 , maxiter=60)
summary(mod11c)

#*****************************************************
# Model 12: Located latent class model: Rasch model with three theta classes
#*****************************************************

# use 10th item as the reference item
ref.item <- 10
# ability grid
theta.k <- seq(-4,4,len=9)

#----  M12a: rasch.mirtlc (in sirt)
mod12a <- sirt::rasch.mirtlc(dat , Nclasses=3, modeltype="MLC1" , ref.item=ref.item )
summary(mod12a)

#----  M12b: gdm (in CDM)
theta.k <- seq(-1 , 1 , len=3)      # initial matrix
b.constraint <- matrix( c(10,1,0) , nrow=1,ncol=3)
  # estimate model
mod12b <- CDM::gdm( dat , theta.k = theta.k , skillspace="est" , irtmodel="1PL",
              b.constraint=b.constraint , maxiter=200)
summary(mod12b)

#----  M12c: mirt (in mirt)
items <- colnames(dat)
  # define three latent classes
Theta <- diag(3)
  # define mirt model
I <- ncol(dat)  # I = 12
mirtmodel <- mirt::mirt.model("
        C1 = 1-12
        C2 = 1-12 
        C3 = 1-12
        CONSTRAIN = (1-12,a1),(1-12,a2),(1-12,a3)
        ")
  # get parameters
mod.pars <- mirt(dat, model=mirtmodel ,  pars = "values")   
 # set starting values for class specific item probabilities
mod.pars[ mod.pars$name == "d" ,"value" ]  <- stats::qlogis( colMeans(dat,na.rm=TRUE) )
  # set item difficulty of reference item to zero
ind <- which( ( paste(mod.pars$item) == items[ref.item] ) & 
               ( ( paste(mod.pars$name) == "d" ) ) )                        
mod.pars[ ind ,"value" ]  <- 0
mod.pars[ ind ,"est" ]  <- FALSE
  # initial values for a1, a2 and a3
mod.pars[ mod.pars$name %in% c("a1","a2","a3") ,"value" ]  <- c(-1,0,1)
mod.pars
  #* define prior for latent class analysis
lca_prior <- function(Theta,Etable){
  # number of latent Theta classes
  TP <- nrow(Theta)
  # prior in initial iteration
  if ( is.null(Etable) ){
    prior <- rep( 1/TP , TP )
              }    
  # process Etable (this is correct for datasets without missing data)
  if ( ! is.null(Etable) ){  
    # sum over correct and incorrect expected responses 
    prior <- ( rowSums( Etable[ , seq(1,2*I,2)] ) + rowSums( Etable[ , seq(2,2*I,2)] ) )/I
            }
  prior <- prior / sum(prior)  
  return(prior)
   }
 #* estimate model
mod12c <- mirt(dat, mirtmodel , technical = list(
            customTheta=Theta , customPriorFun = lca_prior) ,
            pars = mod.pars , verbose=TRUE )
  # estimated parameters from the user customized  prior distribution.
mod12c@nest <- as.integer(sum(mod.pars$est) + 2)            
  #* extract item parameters
coef1 <- mirt.wrapper.coef(mod12c)
  #* inspect estimated distribution
mod12c@Theta
coef1$coef[1,c("a1","a2","a3")]
mod12c@Prior[[1]]

#*****************************************************
# Model 13: Multidimensional model with discrete traits
#*****************************************************
# define Q-Matrix
Q <- matrix( 0 , nrow=12,ncol=3)
Q[1:4,1] <- 1
Q[5:8,2] <- 1
Q[9:12,3] <- 1
# define discrete theta distribution with 3 dimensions
Theta <- scan(what="character",nlines=1)
  000 100 010 001 110 101 011 111
Theta <- as.numeric( unlist( lapply( Theta , strsplit , split="")   ) )
Theta <- matrix(Theta , 8 , 3 , byrow=TRUE )
Theta

#----  Model 13a: din (in CDM)
mod13a <- CDM::din( dat , q.matrix=Q , rule="DINA")
summary(mod13a)
# compare used Theta distributions
cbind( Theta , mod13a$attribute.patt.splitted)

#----  Model 13b: gdm (in CDM)
mod13b <- CDM::gdm( dat , Qmatrix=Q , theta.k=Theta , skillspace="full")
summary(mod13b)

#----  Model 13c: mirt (in mirt)
  # define mirt model
I <- ncol(dat)  # I = 12
mirtmodel <- mirt::mirt.model("
        F1 = 1-4
        F2 = 5-8
        F3 = 9-12
        ")
  # get parameters
mod.pars <- mirt(dat, model=mirtmodel ,  pars = "values")   
# starting values d parameters (transformed guessing parameters)
ind <- which(  mod.pars$name == "d"  )
mod.pars[ind,"value"] <- stats::qlogis(.2)
# starting values transformed slipping parameters
ind <- which( ( mod.pars$name %in% paste0("a",1:3)  ) &  ( mod.pars$est ) )
mod.pars[ind,"value"] <- stats::qlogis(.8) - stats::qlogis(.2)
mod.pars

  #* define prior for latent class analysis
lca_prior <- function(Theta,Etable){
  TP <- nrow(Theta)
  if ( is.null(Etable) ){
    prior <- rep( 1/TP , TP )
              }    
  if ( ! is.null(Etable) ){  
    prior <- ( rowSums( Etable[ , seq(1,2*I,2)] ) + rowSums( Etable[ , seq(2,2*I,2)] ) )/I
            }
  prior <- prior / sum(prior)  
  return(prior)
}
 #* estimate model
mod13c <- mirt(dat, mirtmodel , technical = list(
            customTheta=Theta , customPriorFun = lca_prior) ,
            pars = mod.pars , verbose=TRUE )
  # estimated parameters from the user customized  prior distribution.
mod13c@nest <- as.integer(sum(mod.pars$est) + 2)            
  #* extract item parameters
coef13c <- mirt.wrapper.coef(mod13c)$coef
  #* inspect estimated distribution
mod13c@Theta
mod13c@Prior[[1]]

 #-* comparisons of estimated  parameters
# extract guessing and slipping parameters from din
dfr <- coef(mod13a)[ , c("guess","slip") ]
colnames(dfr) <- paste0("din.",c("guess","slip") )
# estimated parameters from gdm
dfr$gdm.guess <- stats::plogis(mod13b$item$b)
dfr$gdm.slip <- 1 - stats::plogis( rowSums(mod13b$item[,c("b.Cat1","a.F1","a.F2","a.F3")] ) )
# estimated parameters from mirt
dfr$mirt.guess <- stats::plogis( coef13c$d )
dfr$mirt.slip <- 1 - stats::plogis( rowSums(coef13c[,c("d","a1","a2","a3")]) )
# comparison
round(dfr[, c(1,3,5,2,4,6)],3)
  ##      din.guess gdm.guess mirt.guess din.slip gdm.slip mirt.slip
  ##   A1     0.691     0.684      0.686    0.000    0.000     0.000
  ##   A2     0.491     0.489      0.489    0.031    0.038     0.036
  ##   A3     0.302     0.300      0.300    0.184    0.193     0.190
  ##   A4     0.244     0.239      0.240    0.337    0.340     0.339
  ##   B1     0.568     0.579      0.577    0.163    0.148     0.151
  ##   B2     0.329     0.344      0.340    0.344    0.326     0.329
  ##   B3     0.817     0.827      0.825    0.014    0.007     0.009
  ##   B4     0.431     0.463      0.456    0.104    0.089     0.092
  ##   C1     0.188     0.191      0.189    0.013    0.013     0.013
  ##   C2     0.050     0.050      0.050    0.239    0.238     0.239
  ##   C3     0.000     0.002      0.001    0.065    0.065     0.065
  ##   C4     0.000     0.004      0.000    0.212    0.212     0.212

# estimated class sizes
dfr <- data.frame( "Theta" = Theta , "din"=mod13a$attribute.patt$class.prob ,
                   "gdm"=mod13b$pi.k , "mirt" = mod13c@Prior[[1]])
# comparison
round(dfr,3)
  ##     Theta.1 Theta.2 Theta.3   din   gdm  mirt
  ##   1       0       0       0 0.039 0.041 0.040
  ##   2       1       0       0 0.008 0.009 0.009
  ##   3       0       1       0 0.009 0.007 0.008
  ##   4       0       0       1 0.394 0.417 0.412
  ##   5       1       1       0 0.011 0.011 0.011
  ##   6       1       0       1 0.017 0.042 0.037
  ##   7       0       1       1 0.042 0.008 0.016
  ##   8       1       1       1 0.480 0.465 0.467   

#*****************************************************
# Model 14: DINA model with two skills
#*****************************************************

# define some simple Q-Matrix (does not really make in this application)
Q <- matrix( 0 , nrow=12,ncol=2)
Q[1:4,1] <- 1
Q[5:8,2] <- 1
Q[9:12,1:2] <- 1
# define discrete theta distribution with 3 dimensions
Theta <- scan(what="character",nlines=1)
  00 10 01 11
Theta <- as.numeric( unlist( lapply( Theta , strsplit , split="")   ) )
Theta <- matrix(Theta , 4 , 2 , byrow=TRUE )
Theta

#----  Model 14a: din (in CDM)
mod14a <- CDM::din( dat , q.matrix=Q , rule="DINA")
summary(mod14a)
# compare used Theta distributions
cbind( Theta , mod14a$attribute.patt.splitted)

#----  Model 14b: mirt (in mirt)
  # define mirt model
I <- ncol(dat)  # I = 12
mirtmodel <- mirt::mirt.model("
        F1 = 1-4
        F2 = 5-8
        (F1*F2) = 9-12
        ")    
#-> constructions like (F1*F2*F3) are also allowed in mirt.model
  # get parameters
mod.pars <- mirt(dat, model=mirtmodel ,  pars = "values")   
# starting values d parameters (transformed guessing parameters)
ind <- which(  mod.pars$name == "d"  )
mod.pars[ind,"value"] <- stats::qlogis(.2)
# starting values transformed slipping parameters
ind <- which( ( mod.pars$name %in% paste0("a",1:3)  ) &  ( mod.pars$est ) )
mod.pars[ind,"value"] <- stats::qlogis(.8) - stats::qlogis(.2)
mod.pars
 #* use above defined prior lca_prior
 # lca_prior <- function(prior,Etable) ...
 #* estimate model
mod14b <- mirt(dat, mirtmodel , technical = list(
            customTheta=Theta , customPriorFun = lca_prior) ,
            pars = mod.pars , verbose=TRUE )
  # estimated parameters from the user customized  prior distribution.
mod14b@nest <- as.integer(sum(mod.pars$est) + 2)            
  #* extract item parameters
coef14b <- mirt.wrapper.coef(mod14b)$coef

 #-* comparisons of estimated  parameters
# extract guessing and slipping parameters from din
dfr <- coef(mod14a)[ , c("guess","slip") ]
colnames(dfr) <- paste0("din.",c("guess","slip") )
# estimated parameters from mirt
dfr$mirt.guess <- stats::plogis( coef14b$d )
dfr$mirt.slip <- 1 - stats::plogis( rowSums(coef14b[,c("d","a1","a2","a3")]) )
# comparison
round(dfr[, c(1,3,2,4)],3)
  ##      din.guess mirt.guess din.slip mirt.slip
  ##   A1     0.674      0.671    0.030     0.030
  ##   A2     0.423      0.420    0.049     0.050
  ##   A3     0.258      0.255    0.224     0.225
  ##   A4     0.245      0.243    0.394     0.395
  ##   B1     0.534      0.543    0.166     0.164
  ##   B2     0.338      0.347    0.382     0.380
  ##   B3     0.796      0.802    0.016     0.015
  ##   B4     0.421      0.436    0.142     0.140
  ##   C1     0.850      0.851    0.000     0.000
  ##   C2     0.480      0.480    0.097     0.097
  ##   C3     0.746      0.746    0.026     0.026
  ##   C4     0.575      0.577    0.136     0.137

# estimated class sizes
dfr <- data.frame( "Theta" = Theta , "din"=mod13a$attribute.patt$class.prob ,
                    "mirt" = mod14b@Prior[[1]])
# comparison
round(dfr,3)
  ##     Theta.1 Theta.2   din  mirt
  ##   1       0       0 0.357 0.369
  ##   2       1       0 0.044 0.049
  ##   3       0       1 0.047 0.031
  ##   4       1       1 0.553 0.551  
  
#*****************************************************
# Model 15: Rasch model with non-normal distribution
#*****************************************************

# A non-normal theta distributed is specified by log-linear smoothing
# the distribution as described in
# Xu, X., & von Davier, M. (2008). Fitting the structured general diagnostic model 
# to NAEP data. ETS Research Report ETS RR-08-27. Princeton, ETS. 

# define theta grid
theta.k <- matrix( seq(-4,4,len=15) , ncol=1 )
# define design matrix for smoothing (up to cubic moments)
delta.designmatrix <- cbind( 1 , theta.k , theta.k^2 , theta.k^3 )
# constrain item difficulty of fifth item (item B1) to zero
b.constraint <- matrix( c(5,1,0) , ncol=3 )

#----  Model 15a: gdm (in CDM)
mod15a <- CDM::gdm( dat , irtmodel="1PL" , theta.k=theta.k , 
               b.constraint=b.constraint  )
summary(mod15a)
 # plot estimated distribution
barplot( mod15a$pi.k[,1] , space=0 , names.arg = round(theta.k[,1],2) ,
           main="Estimated Skewed Distribution (gdm function)")

#----  Model 15b: mirt (in mirt)
 # define mirt model
mirtmodel <- mirt::mirt.model("
    F = 1-12
    ")   
 # get parameters
mod.pars <- mirt(dat, model=mirtmodel , pars = "values" , itemtype="Rasch")
  # fix variance (just for correct counting of parameters)
mod.pars[ mod.pars$name=="COV_11" , "est"] <- FALSE
  # fix item difficulty
ind <- which( ( mod.pars$item == "B1" ) & ( mod.pars$name == "d" ) )
mod.pars[ ind , "value"] <- 0
mod.pars[ ind , "est"] <- FALSE

 # define prior
loglinear_prior <- function(Theta,Etable){
    TP <- nrow(Theta)
    if ( is.null(Etable) ){
    prior <- rep( 1/TP , TP )
           }
    # process Etable (this is correct for datasets without missing data)
    if ( ! is.null(Etable) ){
          # sum over correct and incorrect expected responses
       prior <- ( rowSums( Etable[ , seq(1,2*I,2)] ) + rowSums( Etable[ , seq(2,2*I,2)] ) )/I
       # smooth prior using the above design matrix and a log-linear model
       # see Xu & von Davier (2008).
       y <- log( prior + 1E-15 )
       lm1 <- lm( y ~ 0 + delta.designmatrix , weights = prior )
       prior <- exp(fitted(lm1))   # smoothed prior
           }
    prior <- prior / sum(prior)
    return(prior)
}

#* estimate model
mod15b <- mirt(dat, mirtmodel , technical = list(
                customTheta= theta.k , customPriorFun = loglinear_prior ) ,
                pars = mod.pars , verbose=TRUE )                                              
# estimated parameters from the user customized prior distribution.
mod15b@nest <- as.integer(sum(mod.pars$est) + 3)
#* extract item parameters
coef1 <- mirt.wrapper.coef(mod15b)$coef

#** compare estimated item parameters
dfr <- data.frame( "gdm"=mod15a$item$b.Cat1 , "mirt"=coef1$d )
rownames(dfr) <- colnames(dat)
round(t(dfr),4)
  ##            A1     A2      A3      A4 B1      B2     B3     B4     C1    C2     C3    C4
  ##   gdm  0.9818 0.1538 -0.7837 -1.3197  0 -1.0902 1.6088 -0.170 1.9778 0.006 1.1859 0.135
  ##   mirt 0.9829 0.1548 -0.7826 -1.3186  0 -1.0892 1.6099 -0.169 1.9790 0.007 1.1870 0.136
# compare estimated theta distribution
dfr <- data.frame( "gdm"=mod15a$pi.k , "mirt"= mod15b@Prior[[1]] )
round(t(dfr),4)
  ##        1 2     3     4      5      6      7      8      9     10     11     12     13
  ##   gdm  0 0 1e-04 9e-04 0.0056 0.0231 0.0652 0.1299 0.1881 0.2038 0.1702 0.1129 0.0612
  ##   mirt 0 0 1e-04 9e-04 0.0056 0.0232 0.0653 0.1300 0.1881 0.2038 0.1702 0.1128 0.0611
  ##            14    15
  ##   gdm  0.0279 0.011
  ##   mirt 0.0278 0.011

## End(Not run)

Results