Last data update: 2014.03.03

R: Generate optimal clustering
clustOptimalR Documentation

Generate optimal clustering

Description

Takes a clue output and generate the optimal clustering of the time-course data.

Usage

clustOptimal(clueObj, rep, user.maxK = NULL, visualize = TRUE, ...)

Arguments

clueObj

the output from runClue.

rep

number of times the clustering is to be applied to find the best clustering result.

user.maxK

user defined optimal k value for generating optimal clustering. If not provided, the optimal k that is identified by clue will be used.

visualize

a boolean parameter indicating whether to visualize the clustering results.

...

pass additional parameter for controlling the plot if visualize is TRUE.

Value

return a list containing optimal clustering object and enriched kinases or gene sets.

Examples

# simulate a time-series data with 4 distinctive profile groups and each group with
# a size of 50 phosphorylation sites.
simuData <- temporalSimu(seed=1, groupSize=50, sdd=1, numGroups=4)

# create an artificial annotation database. Generate 20 kinase-substrate groups each
# comprising 10 substrates assigned to a kinase.
# among them, create 4 groups each contains phosphorylation sites defined to have the
# same temporal profile.
kinaseAnno <- list()
groupSize <- 50
for (i in 1:4) {
 kinaseAnno[[i]] <- paste("p", (groupSize*(i-1)+1):(groupSize*(i-1)+10), sep="_")
}

for (i in 5:20) {
 set.seed(i)
 kinaseAnno[[i]] <- paste("p", sample.int(nrow(simuData), size = 10), sep="_")
}
names(kinaseAnno) <- paste("KS", 1:20, sep="_")

# run CLUE with a repeat of 2 times and a range from 2 to 7
set.seed(1)
clueObj <- runClue(Tc=simuData, annotation=kinaseAnno, rep=5, kRange=7)

# visualize the evaluation outcome
Ms <- apply(clueObj$evlMat, 2, mean, na.rm=TRUE)
Ss <- apply(clueObj$evlMat, 2, sd, na.rm=TRUE)
library(Hmisc)
errbar(1:length(Ms), Ms, Ms+Ss, Ms-Ss, cex=1.2, type="b", xaxt="n", xlab="k", ylab="E")
axis(1, at=1:6, labels=paste("k=", 2:7, sep=""))

# generate optimal clustering results using the optimal k determined by CLUE
best <- clustOptimal(clueObj, rep=3, mfrow=c(2, 3))

# list enriched clusters
best$enrichList

# obtain the optimal clustering object (not run)
# best$clustObj

Results


R version 3.3.1 (2016-06-21) -- "Bug in Your Hair"
Copyright (C) 2016 The R Foundation for Statistical Computing
Platform: x86_64-pc-linux-gnu (64-bit)

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.

> library(ClueR)
Loading required package: e1071
> png(filename="/home/ddbj/snapshot/RGM3/R_CC/result/ClueR/clustOptimal.Rd_%03d_medium.png", width=480, height=480)
> ### Name: clustOptimal
> ### Title: Generate optimal clustering
> ### Aliases: clustOptimal
> 
> ### ** Examples
> 
> # simulate a time-series data with 4 distinctive profile groups and each group with
> # a size of 50 phosphorylation sites.
> simuData <- temporalSimu(seed=1, groupSize=50, sdd=1, numGroups=4)
> 
> # create an artificial annotation database. Generate 20 kinase-substrate groups each
> # comprising 10 substrates assigned to a kinase.
> # among them, create 4 groups each contains phosphorylation sites defined to have the
> # same temporal profile.
> kinaseAnno <- list()
> groupSize <- 50
> for (i in 1:4) {
+  kinaseAnno[[i]] <- paste("p", (groupSize*(i-1)+1):(groupSize*(i-1)+10), sep="_")
+ }
> 
> for (i in 5:20) {
+  set.seed(i)
+  kinaseAnno[[i]] <- paste("p", sample.int(nrow(simuData), size = 10), sep="_")
+ }
> names(kinaseAnno) <- paste("KS", 1:20, sep="_")
> 
> # run CLUE with a repeat of 2 times and a range from 2 to 7
> set.seed(1)
> clueObj <- runClue(Tc=simuData, annotation=kinaseAnno, rep=5, kRange=7)
repeat 1 
repeat 2 
repeat 3 
repeat 4 
repeat 5 
> 
> # visualize the evaluation outcome
> Ms <- apply(clueObj$evlMat, 2, mean, na.rm=TRUE)
> Ss <- apply(clueObj$evlMat, 2, sd, na.rm=TRUE)
> library(Hmisc)
Loading required package: lattice
Loading required package: survival
Loading required package: Formula
Loading required package: ggplot2

Attaching package: 'Hmisc'

The following object is masked from 'package:e1071':

    impute

The following objects are masked from 'package:base':

    format.pval, round.POSIXt, trunc.POSIXt, units

> errbar(1:length(Ms), Ms, Ms+Ss, Ms-Ss, cex=1.2, type="b", xaxt="n", xlab="k", ylab="E")
> axis(1, at=1:6, labels=paste("k=", 2:7, sep=""))
> 
> # generate optimal clustering results using the optimal k determined by CLUE
> best <- clustOptimal(clueObj, rep=3, mfrow=c(2, 3))
> 
> # list enriched clusters
> best$enrichList
$`cluster 1`
     kinase  pvalue                 size
[1,] "KS_1"  "2.32414913991406e-06" "10"
[2,] "KS_12" "0.0364491355691728"   "6" 
     substrates                                
[1,] "p_1|p_2|p_3|p_4|p_5|p_6|p_7|p_8|p_9|p_10"
[2,] "p_2|p_5|p_7|p_14|p_34|p_35"              

$`cluster 2`
     kinase pvalue                 size
[1,] "KS_4" "5.69137651667111e-07" "10"
     substrates                                                   
[1,] "p_151|p_152|p_153|p_154|p_155|p_156|p_157|p_158|p_159|p_160"

$`cluster 3`
     kinase  pvalue                 size substrates                          
[1,] "KS_2"  "0.000348551790215745" "7"  "p_51|p_52|p_53|p_54|p_55|p_57|p_58"
[2,] "KS_14" "0.0207823584031702"   "5"  "p_51|p_73|p_83|p_94|p_100"         

$`cluster 4`
     kinase pvalue                 size
[1,] "KS_3" "3.56487129089672e-05" "9" 
     substrates                                             
[1,] "p_101|p_102|p_103|p_105|p_106|p_107|p_108|p_109|p_110"

> 
> # obtain the optimal clustering object (not run)
> # best$clustObj
> 
> 
> 
> 
> 
> dev.off()
null device 
          1 
>