Last data update: 2014.03.03

R: Run CLUster Evaluation
runClueR Documentation

Run CLUster Evaluation

Description

Takes in a time-course matrix and test for enrichment of the clustering using cmeans or kmeans clustering algorithm with a reference annotation.

Usage

runClue(Tc, annotation, rep = 10, kRange, clustAlg = "cmeans",
  effectiveSize = c(5, 100), pvalueCutoff = 0.05, alpha = 0.5)

Arguments

Tc

a numeric matrix to be clustered. The columns correspond to the time-course and the rows correspond to phosphorylation sites.

annotation

a list with names correspond to kinases and elements correspond to substrates belong to each kinase.

rep

number of times the clustering is to be applied. This is to account for variability in the clustering algorithm.

kRange

the range of k to be tested for clustering.

clustAlg

the clustering algorithm to be used. The default is cmeans clustering.

effectiveSize

the size of annotation groups to be considered for calculating enrichment. Groups that are too small or too large will be removed from calculating overall enrichment of the clustering.

pvalueCutoff

a pvalue cutoff for determining which kinase-substrate groups to be included in calculating overall enrichment of the clustering.

alpha

a penalty factor for penalizing large number of clusters.

Value

a clue output that contains the input parameters used for evaluation and the evaluation results. Use ls(x) to see details of output. 'x' be the output here.

Examples

# load the human ES phosphoprotoemics data (Rigbolt et al. Sci Signal. 4(164):rs3, 2011)
data(hES)
# load the PhosphoSitePlus annotations (Hornbeck et al. Nucleic Acids Res. 40:D261-70, 2012)
data(PhosphoSite)

# make a subset of hES dataset for demonstrating the example in a short time frame
ids <- c("CK2A1", "ERK1", "ERK2", "CDK7",
"p90RSK", "p70S6K", "PKACA", "CDK1", "DNAPK", "ATM", "CDK2")
hESs <- hES[rownames(hES) %in% unlist(PhosphoSite.human[ids]),]

# run CLUE with a repeat of 3 times and a range from 2 to 13
set.seed(2)
clueObj <- runClue(Tc=hESs, annotation=PhosphoSite.human, rep=2, kRange=13)

# visualize the evaluation outcome
Ms <- apply(clueObj$evlMat, 2, mean, na.rm=TRUE)
Ss <- apply(clueObj$evlMat, 2, sd, na.rm=TRUE)
library(Hmisc)
errbar(1:length(Ms), Ms, Ms+Ss, Ms-Ss, cex=1.2, type="b", xaxt="n", xlab="k", ylab="E")
axis(1, at=1:12, labels=paste("k=", 2:13, sep=""))

# generate the optimal clustering results
best <- clustOptimal(clueObj, rep=10, mfrow=c(3, 4))

# list enriched clusters
best$enrichList

# obtain the optimal clustering object (not run)
# best$clustObj

Results


R version 3.3.1 (2016-06-21) -- "Bug in Your Hair"
Copyright (C) 2016 The R Foundation for Statistical Computing
Platform: x86_64-pc-linux-gnu (64-bit)

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.

> library(ClueR)
Loading required package: e1071
> png(filename="/home/ddbj/snapshot/RGM3/R_CC/result/ClueR/runClue.Rd_%03d_medium.png", width=480, height=480)
> ### Name: runClue
> ### Title: Run CLUster Evaluation
> ### Aliases: runClue
> 
> ### ** Examples
> 
> # load the human ES phosphoprotoemics data (Rigbolt et al. Sci Signal. 4(164):rs3, 2011)
> data(hES)
> # load the PhosphoSitePlus annotations (Hornbeck et al. Nucleic Acids Res. 40:D261-70, 2012)
> data(PhosphoSite)
> 
> # make a subset of hES dataset for demonstrating the example in a short time frame
> ids <- c("CK2A1", "ERK1", "ERK2", "CDK7",
+ "p90RSK", "p70S6K", "PKACA", "CDK1", "DNAPK", "ATM", "CDK2")
> hESs <- hES[rownames(hES) %in% unlist(PhosphoSite.human[ids]),]
> 
> # run CLUE with a repeat of 3 times and a range from 2 to 13
> set.seed(2)
> clueObj <- runClue(Tc=hESs, annotation=PhosphoSite.human, rep=2, kRange=13)
repeat 1 
repeat 2 
> 
> # visualize the evaluation outcome
> Ms <- apply(clueObj$evlMat, 2, mean, na.rm=TRUE)
> Ss <- apply(clueObj$evlMat, 2, sd, na.rm=TRUE)
> library(Hmisc)
Loading required package: lattice
Loading required package: survival
Loading required package: Formula
Loading required package: ggplot2

Attaching package: 'Hmisc'

The following object is masked from 'package:e1071':

    impute

The following objects are masked from 'package:base':

    format.pval, round.POSIXt, trunc.POSIXt, units

> errbar(1:length(Ms), Ms, Ms+Ss, Ms-Ss, cex=1.2, type="b", xaxt="n", xlab="k", ylab="E")
> axis(1, at=1:12, labels=paste("k=", 2:13, sep=""))
> 
> # generate the optimal clustering results
> best <- clustOptimal(clueObj, rep=10, mfrow=c(3, 4))
> 
> # list enriched clusters
> best$enrichList
$`cluster 1`
     kinase pvalue                size
[1,] "CDK7" "0.00141500579716477" "5" 
     substrates                                 
[1,] "MCM2;4;|MCM2;5;|MCM2;7;|MCM2;13;|MCM2;27;"

$`cluster 2`
     kinase pvalue              size
[1,] "CDK2" "0.004316754611606" "15"
     substrates                                                                                                                                                                        
[1,] "HNRNPUL1;716;|HNRNPUL1;718;|MCM4;32;|PAICS;27;|MARCKS;118;|MARCKS;131;|MARCKS;132;|MARCKS;134;|MARCKS;135;|MARCKS;26;|MARCKS;27;|ZC3HC1;394;|ZC3HC1;395;|ELAVL1;202;|MARCKS;133;"

$`cluster 4`
     kinase pvalue                size
[1,] "ERK2" "0.00124540506797472" "5" 
[2,] "ERK1" "0.00267650411136892" "5" 
     substrates                                                
[1,] "NUP50;221;|CTTN;418;|NUP153;516;|NUP153;522;|ZC3HC1;358;"
[2,] "NUP50;221;|CTTN;418;|NUP153;516;|NUP153;522;|ZC3HC1;358;"

$`cluster 5`
     kinase  pvalue                 size
[1,] "CK2A1" "0.000331182736008614" "8" 
     substrates                                                                                 
[1,] "HMGA2;101;|EIF2S2;67;|CREB1;143;|MYH10;1952;|MYH10;1956;|HMGA2;100;|MYH10;1960;|MDC1;378;"

$`cluster 6`
     kinase pvalue                size
[1,] "ATM"  "0.00244506260354681" "5" 
[2,] "CDK1" "0.0109288389890909"  "11"
     substrates                                                                                                               
[1,] "PRKDC;2612;|TP53BP1;1219;|PRKDC;2638;|PRKDC;2647;|PRKDC;2609;"                                                          
[2,] "HMGCS1;495;|LMNA;390;|LMNA;392;|NSFL1C;140;|HMGA1;36;|PPP1R12A;473;|LMNB1;23;|DUT;11;|GIGYF2;30;|HMGA1;53;|HIST1H1E;18;"

$`cluster 7`
     kinase pvalue               size
[1,] "CDK2" "0.0236393247525251" "10"
     substrates                                                                                                            
[1,] "ADRBK1;670;|CTTN;405;|MCM2;53;|MCM2;40;|ANKRD17;2042;|ANKRD17;2044;|ANKRD17;2045;|NUFIP2;219;|NUFIP2;220;|SF3B1;211;"

$`cluster 8`
     kinase   pvalue                 size
[1,] "p70S6K" "8.84512603831859e-06" "7" 
[2,] "PKACA"  "0.00601962416391702"  "5" 
     substrates                                                              
[1,] "NCBP1;22;|RPS6;235;|RPS6;236;|RPS6;240;|RPS6;244;|EIF4B;422;|NCBP1;21;"
[2,] "CTNNB1;552;|RPS6;236;|FLNA;2152;|ARHGEF7;516;|STMN1;16;"               

> 
> # obtain the optimal clustering object (not run)
> # best$clustObj
> 
> 
> 
> 
> 
> dev.off()
null device 
          1 
>