a named list. Restrict enrichment analysis to these category variables
verbose
A list to control verbosity:
output.step:
after how many variables passed of category 2 a control output should be printed
show.cat1:
show current level of category 1
show.cat2:
show current level of category 2
show.cat3:
show current level of category 3
Details
This function applies a test for association for all combinations of all variables of all categories to be tested. Depending on the settings in the concubfilter-object, a one-sided or two-sided test is made, using the exact hypergeometric test as implemented in the hypergea-package if the smallest expected value is smaller than 5, or using the chi-squared test as implemented in the loglm-function implemented in the MASS-package. The minimum expected value can be changed in the concub-object by the user (approx-parameter).
In this function only filter-settings those filter settings are used, which skip the tests.
Examples
##
## a completely artificial example run
## through the routines of the package
##
R <- 500
#generate R random gene-ids
ID <- sapply(1:R, function(r){paste( sample(LETTERS, 10), collapse="" ) } )
ID <- unique(ID)
#assign artificial differentially expressed genes randomly
category1 <- list( deg.smallFC=sample(ID, 100, rep=FALSE),
deg.hughFC=sample(ID, 100, rep=FALSE) )
#assign artificial GO terms of genes randomly
category2 <- list( go1=sample(ID, 50, replace=FALSE),
go2=sample(ID, 166, replace=FALSE),
go3=sample(ID, 74, replace=FALSE),
go4=sample(ID, 68, replace=FALSE) )
#assign artificial sequence length of genes randomly
LEN <- setNames(sample(seq(100, 1000, 100), length(ID), replace=TRUE), ID)
category3 <- split( ID, f=factor(LEN, levels=seq(100, 1000, 100)) )
CatList <- list(deg=category1, go=category2, len=category3)
ConCubFilter.obj <- new("concubfilter", names=names(CatList))
ConCub.obj <- new("concub", fact=CatList)
ConCub.obj.2 <- runConCub( obj=ConCub.obj, filter=ConCubFilter.obj, nthreads=1 )
ConCub.obj.2
Results
R version 3.3.1 (2016-06-21) -- "Bug in Your Hair"
Copyright (C) 2016 The R Foundation for Statistical Computing
Platform: x86_64-pc-linux-gnu (64-bit)
R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.
R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.
Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.
> library(geecc)
geecc 1.6.0 loaded
> png(filename="/home/ddbj/snapshot/RGM3/R_BC/result/geecc/runConCub.Rd_%03d_medium.png", width=480, height=480)
> ### Name: runConCub
> ### Title: Enrichment analysis on two- or three-way contingency tables.
> ### Aliases: runConCub
>
> ### ** Examples
>
> ##
> ## a completely artificial example run
> ## through the routines of the package
> ##
> R <- 500
> #generate R random gene-ids
> ID <- sapply(1:R, function(r){paste( sample(LETTERS, 10), collapse="" ) } )
> ID <- unique(ID)
>
> #assign artificial differentially expressed genes randomly
> category1 <- list( deg.smallFC=sample(ID, 100, rep=FALSE),
+ deg.hughFC=sample(ID, 100, rep=FALSE) )
> #assign artificial GO terms of genes randomly
> category2 <- list( go1=sample(ID, 50, replace=FALSE),
+ go2=sample(ID, 166, replace=FALSE),
+ go3=sample(ID, 74, replace=FALSE),
+ go4=sample(ID, 68, replace=FALSE) )
> #assign artificial sequence length of genes randomly
> LEN <- setNames(sample(seq(100, 1000, 100), length(ID), replace=TRUE), ID)
> category3 <- split( ID, f=factor(LEN, levels=seq(100, 1000, 100)) )
> CatList <- list(deg=category1, go=category2, len=category3)
>
> ConCubFilter.obj <- new("concubfilter", names=names(CatList))
> ConCub.obj <- new("concub", fact=CatList)
> ConCub.obj.2 <- runConCub( obj=ConCub.obj, filter=ConCubFilter.obj, nthreads=1 )
Testing: counts ~ deg + go + len (mi)
> ConCub.obj.2
####################
# settings
####################
Comparing null-model 'count ~ deg + go + len' against alternative model 'count ~ deg*go*len'
Using chi-squared approximation
Category 1 (deg) with 2 variables
$deg.smallFC
[1] "MORFNCGVEB" "XGRZMJDVPI" "PKVJBFANZQ" "UCIRGABYHQ" "ESYMZITWVF"
[6] "DAVRUSIGZL"
$deg.hughFC
[1] "EKBSVRMGLJ" "TLAEWGSCIV" "BNWQCPHDMF" "JWRHTAXSPG" "PVNTBLQSED"
[6] "BNUSJEIWYX"
deg.smallFC deg.hughFC
100 100
Category 2 (go) with 4 variables
$go1
[1] "ZUEDSAJIXV" "KFXTINYLOG" "MJQWOTNPGR" "SZDFPVBQJL" "RUXOHSFJVA"
[6] "YQISKXDCFA"
$go2
[1] "BNUSJEIWYX" "XSDUMPKOFY" "JZYCRUBSQG" "SDBTWZFQKP" "LDAPBHWOIF"
[6] "QXMEUBPYRJ"
$go3
[1] "UXIZQWLOGR" "TQERXMBIJN" "KAOUHEDBFC" "OFHBVRWYKX" "MORFNCGVEB"
[6] "DUHGESYMCA"
$go4
[1] "XZKIABYNFE" "EKBSVRMGLJ" "LHOXTBYJPV" "YMLOBJVCZR" "EBKXMTPWDJ"
[6] "JQZKHADUMV"
go1 go2 go3 go4
50 166 74 68
Category 3 (len) with 10 variables
$`100`
[1] "BEXLPFGCMD" "QKEJUFOTYI" "SNGDCOXIBE" "IENQRDOKZB" "EKBXQVZMOL"
[6] "JYHQFIVZLB"
$`200`
[1] "VGASBFZOEM" "NOFAIEMLJU" "HVNPOGQWKA" "QSGRFNMOLW" "BMGFTVPKRC"
[6] "SCGQLETVPH"
$`300`
[1] "DGUFKPENHZ" "XBUJEPGFLD" "OEZCNRDJVM" "UOWXGJHFKL" "XQORMKNECH"
[6] "LDAPBHWOIF"
$`400`
[1] "XWAPQTKCYZ" "MWISHFNXYJ" "KVRNTJCDHS" "XZKIABYNFE" "ZNGLJHSFXU"
[6] "NMLSDGKPIV"
$`500`
[1] "TQEBFKIOYN" "JWTDZPNVRA" "MSVUCQWGTX" "IYVZSOTGJL" "ARLTKEJUGS"
[6] "VUQHWIDJPS"
100 200 300 400 500
36 61 45 49 55
[... output truncated after 5 items]
Population provided or guessed from categories (500 items):
[1] "MORFNCGVEB" "XGRZMJDVPI" "PKVJBFANZQ" "UCIRGABYHQ" "ESYMZITWVF"
[6] "DAVRUSIGZL" "BSEAIZHXDG" "SWEHNOXDAG" "VOTDSAWJFK" "IQCODFVTNL"
[11] "OWPNDTIQHB" "YHIPWRBFUJ" "XENHYJFQSG" "TBGOARMIEQ" "GEPSRJFMLY"
[16] "SGMNDALCTQ" "XZRWCUGSVL" "ESDRCKYMGQ" "LOVWYNADPQ" "QYJABTEUVH"
[... output truncated after 20 items]
>
>
>
>
>
> dev.off()
null device
1
>