Last data update: 2014.03.03

R: Prunning the Maximal tree
best.tree.BIC.AICR Documentation

Prunning the Maximal tree

Description

this function is set to prune back the maximal tree by using the BIC or the AIC criterion.

Usage

best.tree.BIC.AIC(xtree, xdata, Y.name, X.names,
                  family = "binomial", verbose = TRUE)

Arguments

xtree

a tree to prune

xdata

the dataset used to build the tree

Y.name

the name of the dependent variable

X.names

the names of independent confounding variables to consider in the linear part of the glm

family

the glm family considered depending on the type of the dependent variable.

verbose

Logical; TRUE for printing progress during the computation (helpful for debugging)

Value

a list of four elements:

best_index

The size of the selected trees by BIC and AIC

tree

The selected trees by BIC and AIC

fit_glm

The fitted pltr models selected with BIC, and AIC

Timediff

The execution time of the selection procedure

Author(s)

Cyprien Mbogning and Wilson Toussile

References

Mbogning, C., Perdry, H., Toussile, W., Broet, P.: A novel tree-based procedure for deciphering the genomic spectrum of clinical disease entities. Journal of Clinical Bioinformatics 4:6, (2014)

Akaike, H.: A new look at the statistical model identification. IEEE Trans. Automat. Control AC-19, 716-723 (1974)

Schwarz, G.: Estimating the dimension of a model. The Annals of Statistics 6, 461-464 (1978)

See Also

best.tree.CV, pltr.glm

Examples

data(burn)

args.rpart <- list(minbucket = 10, maxdepth = 4, cp = 0, maxcompete = 0, 
                    maxsurrogate = 0)
 family <- "binomial"
 X.names = "Z2"
 Y.name = "D2"
 G.names = c('Z1','Z3','Z4','Z5','Z6','Z7','Z8','Z9','Z10','Z11')
 
pltr.burn <- pltr.glm(burn, Y.name, X.names, G.names, args.rpart = args.rpart,
                   family = family, iterMax = 4, iterMin = 3, verbose = FALSE)

## Prunned back the maximal tree using either the BIC or the AIC criterion

pltr.burn_prun <- best.tree.BIC.AIC(xtree = pltr.burn$tree, burn, Y.name, 
                                    X.names, family = family)

## plot the BIC selected tree

plot(pltr.burn_prun$tree$BIC, main = 'BIC selected tree')
text(pltr.burn_prun$tree$BIC, xpd = TRUE, cex = .6, col = 'blue')

## Not run: 
##load the data set

data(data_pltr)

## Set the parameters

args.rpart <- list(minbucket = 40, maxdepth = 10, cp = 0)
family <- "binomial"
Y.name <- "Y"
X.names <- "G1"
G.names <- paste("G", 2:15, sep="")

## build a maximal tree

fit_pltr <- pltr.glm(data_pltr, Y.name, X.names, G.names, args.rpart = args.rpart, 
                     family = family,iterMax = 5, iterMin = 3)
                     
##prunned back the maximal tree by BIC or AIC criterion

tree_select <- best.tree.BIC.AIC(xtree = fit_pltr$tree,data_pltr,Y.name, 
                                 X.names, family = family)
                            
plot(tree_select$tree$BIC, main = 'BIC TREE')
text(tree_select$tree$BIC, minlength = 0L, xpd = TRUE, cex = .6)


## End(Not run)

Results


R version 3.3.1 (2016-06-21) -- "Bug in Your Hair"
Copyright (C) 2016 The R Foundation for Statistical Computing
Platform: x86_64-pc-linux-gnu (64-bit)

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.

> library(GPLTR)
Loading required package: rpart
Loading required package: parallel
> png(filename="/home/ddbj/snapshot/RGM3/R_CC/result/GPLTR/best.tree.BIC.AIC.Rd_%03d_medium.png", width=480, height=480)
> ### Name: best.tree.BIC.AIC
> ### Title: Prunning the Maximal tree
> ### Aliases: best.tree.BIC.AIC
> ### Keywords: documentation tree
> 
> ### ** Examples
> 
> data(burn)
> 
> args.rpart <- list(minbucket = 10, maxdepth = 4, cp = 0, maxcompete = 0, 
+                     maxsurrogate = 0)
>  family <- "binomial"
>  X.names = "Z2"
>  Y.name = "D2"
>  G.names = c('Z1','Z3','Z4','Z5','Z6','Z7','Z8','Z9','Z10','Z11')
>  
> pltr.burn <- pltr.glm(burn, Y.name, X.names, G.names, args.rpart = args.rpart,
+                    family = family, iterMax = 4, iterMin = 3, verbose = FALSE)
> 
> ## Prunned back the maximal tree using either the BIC or the AIC criterion
> 
> pltr.burn_prun <- best.tree.BIC.AIC(xtree = pltr.burn$tree, burn, Y.name, 
+                                     X.names, family = family)
Number of leaves in the max tree =  10 
Best sub-tree 
 2 3 

 Number of sub-trees =  2 
Best sub-tree 
 2 6 7 

 Number of sub-trees =  3 
Best sub-tree 
 6 7 4 5 

 Number of sub-trees =  4 
Best sub-tree 
 7 4 5 12 13 

 Number of sub-trees =  5 
Best sub-tree 
 7 4 5 13 24 25 

 Number of sub-trees =  6 
Best sub-tree 
 4 5 13 24 25 14 15 

 Number of sub-trees =  7 
Best sub-tree 
 4 13 24 25 14 15 10 11 

 Number of sub-trees =  8 
Best sub-tree 
 4 13 24 25 14 15 10 22 23 

 Number of sub-trees =  9 
> 
> ## plot the BIC selected tree
> 
> plot(pltr.burn_prun$tree$BIC, main = 'BIC selected tree')
> text(pltr.burn_prun$tree$BIC, xpd = TRUE, cex = .6, col = 'blue')
> 
> ## Not run: 
> ##D ##load the data set
> ##D 
> ##D data(data_pltr)
> ##D 
> ##D ## Set the parameters
> ##D 
> ##D args.rpart <- list(minbucket = 40, maxdepth = 10, cp = 0)
> ##D family <- "binomial"
> ##D Y.name <- "Y"
> ##D X.names <- "G1"
> ##D G.names <- paste("G", 2:15, sep="")
> ##D 
> ##D ## build a maximal tree
> ##D 
> ##D fit_pltr <- pltr.glm(data_pltr, Y.name, X.names, G.names, args.rpart = args.rpart, 
> ##D                      family = family,iterMax = 5, iterMin = 3)
> ##D                      
> ##D ##prunned back the maximal tree by BIC or AIC criterion
> ##D 
> ##D tree_select <- best.tree.BIC.AIC(xtree = fit_pltr$tree,data_pltr,Y.name, 
> ##D                                  X.names, family = family)
> ##D                             
> ##D plot(tree_select$tree$BIC, main = 'BIC TREE')
> ##D text(tree_select$tree$BIC, minlength = 0L, xpd = TRUE, cex = .6)
> ##D 
> ## End(Not run)
> 
> 
> 
> 
> 
> dev.off()
null device 
          1 
>