Last data update: 2014.03.03

R: Enrichment analysis on two- or three-way contingency tables.
runConCubR Documentation

Enrichment analysis on two- or three-way contingency tables.

Description

Perform the enrichment analysis on two- or three-way contingency tables.

Usage

runConCub(obj, filter, nthreads = 2, rng = NULL,
	verbose=list(output.step=0, show.cat1=FALSE,
	show.cat2=FALSE, show.cat3=FALSE))

Arguments

obj

an object with class concub

filter

an object with class concubfilter

nthreads

number of threads to use in hypergeom.test

rng

a named list. Restrict enrichment analysis to these category variables

verbose

A list to control verbosity:

output.step:

after how many variables passed of category 2 a control output should be printed

show.cat1:

show current level of category 1

show.cat2:

show current level of category 2

show.cat3:

show current level of category 3

Details

This function applies a test for association for all combinations of all variables of all categories to be tested. Depending on the settings in the concubfilter-object, a one-sided or two-sided test is made, using the exact hypergeometric test as implemented in the hypergea-package if the smallest expected value is smaller than 5, or using the chi-squared test as implemented in the loglm-function implemented in the MASS-package. The minimum expected value can be changed in the concub-object by the user (approx-parameter). In this function only filter-settings those filter settings are used, which skip the tests.

Examples

##
## a completely artificial example run
## through the routines of the package
##
R <- 500
#generate R random gene-ids
ID <- sapply(1:R, function(r){paste( sample(LETTERS, 10), collapse="" ) } )
ID <- unique(ID)

#assign artificial differentially expressed genes randomly
category1 <- list( deg.smallFC=sample(ID, 100, rep=FALSE),
	deg.hughFC=sample(ID, 100, rep=FALSE) )
#assign artificial GO terms of genes randomly
category2 <- list( go1=sample(ID, 50, replace=FALSE),
	go2=sample(ID, 166, replace=FALSE),
	go3=sample(ID, 74, replace=FALSE),
	go4=sample(ID, 68, replace=FALSE) )
#assign artificial sequence length of genes randomly
LEN <- setNames(sample(seq(100, 1000, 100), length(ID), replace=TRUE), ID)
category3 <- split( ID, f=factor(LEN, levels=seq(100, 1000, 100)) )
CatList <- list(deg=category1, go=category2, len=category3)

ConCubFilter.obj <- new("concubfilter", names=names(CatList))
ConCub.obj <- new("concub", fact=CatList)
ConCub.obj.2 <- runConCub( obj=ConCub.obj, filter=ConCubFilter.obj, nthreads=1 )
ConCub.obj.2

Results


R version 3.3.1 (2016-06-21) -- "Bug in Your Hair"
Copyright (C) 2016 The R Foundation for Statistical Computing
Platform: x86_64-pc-linux-gnu (64-bit)

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.

> library(geecc)
geecc 1.6.0 loaded
> png(filename="/home/ddbj/snapshot/RGM3/R_BC/result/geecc/runConCub.Rd_%03d_medium.png", width=480, height=480)
> ### Name: runConCub
> ### Title: Enrichment analysis on two- or three-way contingency tables.
> ### Aliases: runConCub
> 
> ### ** Examples
> 
> ##
> ## a completely artificial example run
> ## through the routines of the package
> ##
> R <- 500
> #generate R random gene-ids
> ID <- sapply(1:R, function(r){paste( sample(LETTERS, 10), collapse="" ) } )
> ID <- unique(ID)
> 
> #assign artificial differentially expressed genes randomly
> category1 <- list( deg.smallFC=sample(ID, 100, rep=FALSE),
+ 	deg.hughFC=sample(ID, 100, rep=FALSE) )
> #assign artificial GO terms of genes randomly
> category2 <- list( go1=sample(ID, 50, replace=FALSE),
+ 	go2=sample(ID, 166, replace=FALSE),
+ 	go3=sample(ID, 74, replace=FALSE),
+ 	go4=sample(ID, 68, replace=FALSE) )
> #assign artificial sequence length of genes randomly
> LEN <- setNames(sample(seq(100, 1000, 100), length(ID), replace=TRUE), ID)
> category3 <- split( ID, f=factor(LEN, levels=seq(100, 1000, 100)) )
> CatList <- list(deg=category1, go=category2, len=category3)
> 
> ConCubFilter.obj <- new("concubfilter", names=names(CatList))
> ConCub.obj <- new("concub", fact=CatList)
> ConCub.obj.2 <- runConCub( obj=ConCub.obj, filter=ConCubFilter.obj, nthreads=1 )
Testing: counts ~ deg + go + len (mi)

> ConCub.obj.2

####################
# settings
####################
Comparing null-model 'count ~ deg + go + len' against alternative model 'count ~ deg*go*len' 
Using chi-squared approximation

Category 1 (deg) with 2 variables
$deg.smallFC
[1] "MORFNCGVEB" "XGRZMJDVPI" "PKVJBFANZQ" "UCIRGABYHQ" "ESYMZITWVF"
[6] "DAVRUSIGZL"

$deg.hughFC
[1] "EKBSVRMGLJ" "TLAEWGSCIV" "BNWQCPHDMF" "JWRHTAXSPG" "PVNTBLQSED"
[6] "BNUSJEIWYX"

deg.smallFC  deg.hughFC 
        100         100 

Category 2 (go) with 4 variables
$go1
[1] "ZUEDSAJIXV" "KFXTINYLOG" "MJQWOTNPGR" "SZDFPVBQJL" "RUXOHSFJVA"
[6] "YQISKXDCFA"

$go2
[1] "BNUSJEIWYX" "XSDUMPKOFY" "JZYCRUBSQG" "SDBTWZFQKP" "LDAPBHWOIF"
[6] "QXMEUBPYRJ"

$go3
[1] "UXIZQWLOGR" "TQERXMBIJN" "KAOUHEDBFC" "OFHBVRWYKX" "MORFNCGVEB"
[6] "DUHGESYMCA"

$go4
[1] "XZKIABYNFE" "EKBSVRMGLJ" "LHOXTBYJPV" "YMLOBJVCZR" "EBKXMTPWDJ"
[6] "JQZKHADUMV"

go1 go2 go3 go4 
 50 166  74  68 

Category 3 (len) with 10 variables
$`100`
[1] "BEXLPFGCMD" "QKEJUFOTYI" "SNGDCOXIBE" "IENQRDOKZB" "EKBXQVZMOL"
[6] "JYHQFIVZLB"

$`200`
[1] "VGASBFZOEM" "NOFAIEMLJU" "HVNPOGQWKA" "QSGRFNMOLW" "BMGFTVPKRC"
[6] "SCGQLETVPH"

$`300`
[1] "DGUFKPENHZ" "XBUJEPGFLD" "OEZCNRDJVM" "UOWXGJHFKL" "XQORMKNECH"
[6] "LDAPBHWOIF"

$`400`
[1] "XWAPQTKCYZ" "MWISHFNXYJ" "KVRNTJCDHS" "XZKIABYNFE" "ZNGLJHSFXU"
[6] "NMLSDGKPIV"

$`500`
[1] "TQEBFKIOYN" "JWTDZPNVRA" "MSVUCQWGTX" "IYVZSOTGJL" "ARLTKEJUGS"
[6] "VUQHWIDJPS"

100 200 300 400 500 
 36  61  45  49  55 
[... output truncated after 5 items]

Population provided or guessed from categories (500 items):
 [1] "MORFNCGVEB" "XGRZMJDVPI" "PKVJBFANZQ" "UCIRGABYHQ" "ESYMZITWVF"
 [6] "DAVRUSIGZL" "BSEAIZHXDG" "SWEHNOXDAG" "VOTDSAWJFK" "IQCODFVTNL"
[11] "OWPNDTIQHB" "YHIPWRBFUJ" "XENHYJFQSG" "TBGOARMIEQ" "GEPSRJFMLY"
[16] "SGMNDALCTQ" "XZRWCUGSVL" "ESDRCKYMGQ" "LOVWYNADPQ" "QYJABTEUVH"
[... output truncated after 20 items]


> 
> 
> 
> 
> 
> dev.off()
null device 
          1 
>