Formula whose left hand side specifies the event
history, i.e., either via Surv() or Hist().
data
Data frame in which the formula is interpreted
unspecialsDesign
Passed as is to
model.design.
specials
Character vector of special function names.
Usually the body of the special functions is function(x)x but
e.g., strata from the survival package does treat
the values
specialsFactor
Passed as is to model.design.
specialsDesign
Passed as is to model.design
stripSpecials
Passed as specials to
strip.terms
stripArguments
Passed as arguments to
strip.terms
stripAlias
Passed as alias.names to
strip.terms
stripUnspecials
Passed as unspecials to
strip.terms
dropIntercept
Passed as is to model.design
check.formula
If TRUE check if formula is a Surv or Hist
thing.
response
If FALSE do not get response data (event.history).
Details
Obtain a list with the data used for event history regression analysis. This
function cannot be used directly on the user level but inside a function
to prepare data for survival analysis.
Value
A list which contains
- the event.history (see Hist)
- the design matrix (see model.design)
- one entry for each special (see model.design)
Author(s)
Thomas A. Gerds <tag@biostat.ku.dk>
See Also
model.frame model.design Hist
Examples
## Here are some data with an event time and no competing risks
## and two covariates X1 and X2.
## Suppose we want to declare that variable X1 is treated differently
## than variable X2. For example, X1 could be a cluster variable, or
## X1 should have a proportional effect on the outcome.
dsurv <- data.frame(time=1:7,
status=c(0,1,1,0,0,0,1),
X2=c(2.24,3.22,9.59,4.4,3.54,6.81,5.05),
X3=c(1,1,1,1,0,0,1),
X4=c(44.69,37.41,68.54,38.85,35.9,27.02,41.84),
X1=factor(c("a","b","a","c","c","a","b"),
levels=c("c","a","b")))
## We pass a formula and the data
e <- EventHistory.frame(Hist(time,status)~prop(X1)+X2+cluster(X3)+X4,
data=dsurv,
specials=c("prop","cluster"),
stripSpecials=c("prop","cluster"))
names(e)
## The first element is the event.history which is result of the left hand
## side of the formula:
e$event.history
## same as
with(dsurv,Hist(time,status))
## to see the structure do
colnames(e$event.history)
unclass(e$event.history)
## in case of competing risks there will be an additional column called event,
## see help(Hist) for more details
## The other elements are the design, i.e., model.matrix for the non-special covariates
e$design
## and a data.frame for the special covariates
e$prop
## The special covariates can be returned as a model.matrix
e2 <- EventHistory.frame(Hist(time,status)~prop(X1)+X2+cluster(X3)+X4,
data=dsurv,
specials=c("prop","cluster"),
stripSpecials=c("prop","cluster"),
specialsDesign=TRUE)
e2$prop
## and the non-special covariates can be returned as a data.frame
e3 <- EventHistory.frame(Hist(time,status)~prop(X1)+X2+cluster(X3)+X4,
data=dsurv,
specials=c("prop","cluster"),
stripSpecials=c("prop","cluster"),
specialsDesign=TRUE,
unspecialsDesign=FALSE)
e3$design
## the general idea is that the function is used to parse the combination of
## formula and data inside another function. Here is an example with
## competing risks
SampleRegression <- function(formula,data=parent.frame()){
thecall <- match.call()
ehf <- EventHistory.frame(formula=formula,
data=data,
stripSpecials=c("prop","cluster","timevar"),
specials=c("prop","timevar","cluster"))
time <- ehf$event.history[,"time"]
status <- ehf$event.history[,"status"]
## event as a factor
if (attr(ehf$event.history,"model")=="competing.risks"){
event <- ehf$event.history[,"event"]
Event <- getEvent(ehf$event.history)
list(response=data.frame(time,status,event,Event),X=ehf[-1])
}
else{ # no competing risks
list(response=data.frame(time,status),X=ehf[-1])
}
}
dsurv$outcome <- c("cause1","0","cause2","cause1","cause2","cause2","0")
SampleRegression(Hist(time,outcome)~prop(X1)+X2+cluster(X3)+X4,dsurv)
## let's test if the parsing works
form1 <- Hist(time,outcome!="0")~prop(X1)+X2+cluster(X3)+X4
form2 <- Hist(time,outcome)~prop(X1)+cluster(X3)+X4
ff <- list(form1,form2)
lapply(ff,function(f){SampleRegression(f,dsurv)})
## here is what the riskRegression package uses to
## distinguish between covariates with
## time-proportional effects and covariates with
## time-varying effects:
## Not run:
library(riskRegression)
data(Melanoma)
f <- Hist(time,status)~prop(thick)+strata(sex)+age+prop(ulcer,power=1)+timevar(invasion,test=1)
## here the unspecial terms, i.e., the term age is treated as prop
## also, strata is an alias for timvar
EHF <- prodlim::EventHistory.frame(formula,
Melanoma[1:10],
specials=c("timevar","strata","prop","const","tp"),
stripSpecials=c("timevar","prop"),
stripArguments=list("prop"=list("power"=0),
"timevar"=list("test"=0)),
stripAlias=list("timevar"=c("strata"),
"prop"=c("tp","const")),
stripUnspecials="prop",
specialsDesign=TRUE,
dropIntercept=TRUE)
EHF$prop
EHF$timevar
## End(Not run)