Last data update: 2014.03.03

R: A Function to Perform Characteristic Direction Analysis.
chdirAnalysisR Documentation

A Function to Perform Characteristic Direction Analysis.

Description

This function takes genome-wide expression data as input and returns the characteristic direction - a unit vector in expression space which characterizes the differential expression. Also produced are 2D projections of the data and the characteristic direction. Optionaly this function will produce an evaluation of the signifcance of the result.

Usage

chdirAnalysis(datain, sampleclass, gammas = list(1), nnull = 10, CalculateSig = FALSE)

Arguments

datain

A data frame containing the common gene names (first) and the expression profiles.

sampleclass

A factor with levels "'1'" and "'2'" indicating the class of the samples in the data. For each column of the data frame (excluding the gene names) this factor should contain an entry indicating the class from which the sample derives (e.g. controll sample,"'1'"", or perturbed sample, "'2'".)

gammas

A set of values for the shrinkage parameter. The default value is gammas=c(1.0).

nnull

If a significance estimate is to be made the number of random directions used is set with this value.

CalculateSig

A logical value which determines whether a significance estimate is to be calculated.

Value

chdirprops

This is a list of properties of the characteristic direction. The first element is chdirprops$chdir, the vector in expresion space whose direction characerises the differential expression. The second element, chdirprops$pca2d, is the 2D PCA projection of the data. The third element, chdirprops$chdir_pca2d is the 2D projection of the characteristic direction vector into PCA space.

results

A list with an element corresponding to each of the shrinkage parameter vlues giing the sorted list of genes and their characteristic direction coefficients. If a significance estimate has been made then only the significant genes are returned here.

plots

for each value of the shrinkage parameter a 2D PCA projection of the data and the characteristic direction is generated. If a significance estimate is made then the significance curve is also produced. A positive peak indicates that the two classes of samples are significantly different.

Author(s)

Neil R Clark and Avi Ma'ayan

References

Clark, Neil R., et al. "The characteristic direction: a geometrical approach to identify differentially expressed genes." BMC bioinformatics 15.1 (2014): 79.

See Also

chdirSig

Examples

##---- Should be DIRECTLY executable !! ----
##-- ==>  Define data, use random,
##--	or do  help(data=index)  for the standard data sets.

##################################
#
# An example characteristic direction analysis
#
##################################

# Load the example data

data(example_expression_data)
data(example_sampleclass)
data(example_gammas)

# Examine the expression data
head(example_expression_data)

# Examine the corresponding sample class factor
example_sampleclass

# Run the analysis
chdir_analysis_example <- chdirAnalysis(example_expression_data,example_sampleclass,example_gammas
,CalculateSig=TRUE,nnull=10)

# Examine the results with the first value of the shrinkage parameter (gamma)

# show the first few of the most important genes.

lapply(chdir_analysis_example$results, function(x) x[1:10])

# We can also extract the results of the code{chdirSig} function
# for example chdir_analysis_example$chdirprops[[1]] gives the whole
# characteristic direction vector for each value of gamma:

lapply(chdir_analysis_example$chdirprops[[1]],head)

# and the estimated number of significant genes can be recovered with

chdir_analysis_example$chdirprops$number_sig_genes


## The function is currently defined as
function (datain, sampleclass, gammas = list(1), nnull = 3, CalculateSig = FALSE) 
{
    if (length(sampleclass) != (length(datain) - 1)) 
        stop("number of elements in sampleclass is inconsistent with input data")
    if (!is.data.frame(datain)) 
        stop("Input data is not in the form of a data frame")
    if (FALSE %in% (c("1", "2") %in% levels(sampleclass))) 
        stop("sample class does not include '1' and '2'")
    if (length(datain[sampleclass == 1]) < 2) 
        stop("too few controll samples")
    if (length(datain[sampleclass == 2]) < 2) 
        stop("too few samples")
    chdirresults <- chdirSig(datain, sampleclass, gammas, nnull = nnull, 
        CalculateSig = CalculateSig)
    chdirplots(chdirresults, sampleclass, gammas, CalculateSig)
    outAll <- lapply(chdirresults[[1]], function(x) {
        x[sort.list(x^2, decreasing = TRUE), ]
    })
    if (CalculateSig) {
        outSig <- mapply(function(x, ns) {
            x[sort.list(x^2, decreasing = TRUE)[1:ns], ]
        }, chdirresults[[1]], chdirresults[[6]])
        list(chdirprops = chdirresults, results = outSig)
    }
    else {
        list(chdirprops = chdirresults, results = outAll)
    }
  }

Results


R version 3.3.1 (2016-06-21) -- "Bug in Your Hair"
Copyright (C) 2016 The R Foundation for Statistical Computing
Platform: x86_64-pc-linux-gnu (64-bit)

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.

> library(GeoDE)
Loading required package: Matrix
Loading required package: MASS
> png(filename="/home/ddbj/snapshot/RGM3/R_CC/result/GeoDE/chdirAnalysis.Rd_%03d_medium.png", width=480, height=480)
> ### Name: chdirAnalysis
> ### Title: A Function to Perform Characteristic Direction Analysis.
> ### Aliases: chdirAnalysis
> ### Keywords: differential expression DE genes
> 
> ### ** Examples
> 
> ##---- Should be DIRECTLY executable !! ----
> ##-- ==>  Define data, use random,
> ##--	or do  help(data=index)  for the standard data sets.
> 
> ##################################
> #
> # An example characteristic direction analysis
> #
> ##################################
> 
> # Load the example data
> 
> data(example_expression_data)
> data(example_sampleclass)
> data(example_gammas)
> 
> # Examine the expression data
> head(example_expression_data)
  genenames  Controll Controll.1 Controll.2      Pert.    Pert..1   Pert..2
1   MTERFD2 138.64200 167.130000 156.199000 186.640000 122.005000 161.38200
2     SCRIB  52.65380  38.977800  68.963200  94.300900  60.634300  99.01180
3      ZXDC  59.37390  53.952500  55.103300  82.780500  52.770000  80.15000
4    MRPL32 333.80200 375.288000 475.200000 477.085000 327.193000 468.31600
5     WDR69   0.33557   0.614205   0.989874   0.421603   0.890432   1.05624
6     FOXL1   1.03177   0.901720   1.644250   1.170400   1.337110   1.31237
> 
> # Examine the corresponding sample class factor
> example_sampleclass
[1] 1 1 1 2 2 2
Levels: 1 2
> 
> # Run the analysis
> chdir_analysis_example <- chdirAnalysis(example_expression_data,example_sampleclass,example_gammas
+ ,CalculateSig=TRUE,nnull=10)
   |                                                                               |                                                                      |   0%   |                                                                               |=======                                                               |  10%   |                                                                               |==============                                                        |  20%   |                                                                               |=====================                                                 |  30%   |                                                                               |============================                                          |  40%   |                                                                               |===================================                                   |  50%   |                                                                               |==========================================                            |  60%   |                                                                               |=================================================                     |  70%   |                                                                               |========================================================              |  80%   |                                                                               |===============================================================       |  90%   |                                                                               |======================================================================| 100%
> 
> # Examine the results with the first value of the shrinkage parameter (gamma)
> 
> # show the first few of the most important genes.
> 
> lapply(chdir_analysis_example$results, function(x) x[1:10])
[[1]]
      MCL1      LIMD2      RPL27    MRPS18A      TBL1X       SOD1       DPP4 
-0.6078472  0.3791251 -0.3477567  0.2718869 -0.2083958  0.1979625  0.1789876 
      NOX4     POLR2I    ZDHHC20 
 0.1386403 -0.1214708 -0.1193214 

> 
> # We can also extract the results of the code{chdirSig} function
> # for example chdir_analysis_example$chdirprops[[1]] gives the whole
> # characteristic direction vector for each value of gamma:
> 
> lapply(chdir_analysis_example$chdirprops[[1]],head)
[[1]]
                    1
MTERFD2 -0.0005105981
SCRIB    0.0148638842
ZXDC     0.0198058553
MRPL32  -0.0986935062
WDR69   -0.0002376169
FOXL1   -0.0006025896

> 
> # and the estimated number of significant genes can be recovered with
> 
> chdir_analysis_example$chdirprops$number_sig_genes
[[1]]
[1] 87

> 
> 
> ## The function is currently defined as
> function (datain, sampleclass, gammas = list(1), nnull = 3, CalculateSig = FALSE) 
+ {
+     if (length(sampleclass) != (length(datain) - 1)) 
+         stop("number of elements in sampleclass is inconsistent with input data")
+     if (!is.data.frame(datain)) 
+         stop("Input data is not in the form of a data frame")
+     if (FALSE %in% (c("1", "2") %in% levels(sampleclass))) 
+         stop("sample class does not include '1' and '2'")
+     if (length(datain[sampleclass == 1]) < 2) 
+         stop("too few controll samples")
+     if (length(datain[sampleclass == 2]) < 2) 
+         stop("too few samples")
+     chdirresults <- chdirSig(datain, sampleclass, gammas, nnull = nnull, 
+         CalculateSig = CalculateSig)
+     chdirplots(chdirresults, sampleclass, gammas, CalculateSig)
+     outAll <- lapply(chdirresults[[1]], function(x) {
+         x[sort.list(x^2, decreasing = TRUE), ]
+     })
+     if (CalculateSig) {
+         outSig <- mapply(function(x, ns) {
+             x[sort.list(x^2, decreasing = TRUE)[1:ns], ]
+         }, chdirresults[[1]], chdirresults[[6]])
+         list(chdirprops = chdirresults, results = outSig)
+     }
+     else {
+         list(chdirprops = chdirresults, results = outAll)
+     }
+   }
function (datain, sampleclass, gammas = list(1), nnull = 3, CalculateSig = FALSE) 
{
    if (length(sampleclass) != (length(datain) - 1)) 
        stop("number of elements in sampleclass is inconsistent with input data")
    if (!is.data.frame(datain)) 
        stop("Input data is not in the form of a data frame")
    if (FALSE %in% (c("1", "2") %in% levels(sampleclass))) 
        stop("sample class does not include '1' and '2'")
    if (length(datain[sampleclass == 1]) < 2) 
        stop("too few controll samples")
    if (length(datain[sampleclass == 2]) < 2) 
        stop("too few samples")
    chdirresults <- chdirSig(datain, sampleclass, gammas, nnull = nnull, 
        CalculateSig = CalculateSig)
    chdirplots(chdirresults, sampleclass, gammas, CalculateSig)
    outAll <- lapply(chdirresults[[1]], function(x) {
        x[sort.list(x^2, decreasing = TRUE), ]
    })
    if (CalculateSig) {
        outSig <- mapply(function(x, ns) {
            x[sort.list(x^2, decreasing = TRUE)[1:ns], ]
        }, chdirresults[[1]], chdirresults[[6]])
        list(chdirprops = chdirresults, results = outSig)
    }
    else {
        list(chdirprops = chdirresults, results = outAll)
    }
}
> 
> 
> 
> 
> 
> dev.off()
null device 
          1 
>