Last data update: 2014.03.03

R: Sample data for differential splicing analysis
data_dmDSdataR Documentation

Sample data for differential splicing analysis

Description

We use a subset of kallisto transcript counts from PasillaTranscriptExpr package.

Usage

data_dmDSdata

Format

data_dmDSdata is a dmDSdata object. See Examples.

Value

data_dmDSdata

Source

Brooks AN, Yang L, Duff MO, et al. Conservation of an RNA regulatory map between Drosophila and mammals. Genome Res. 2011;21(2):193-202

PasillaTranscriptExpr package

Examples


#############################
### Create dmDSdata object 
#############################
### Get kallisto transcript counts from 'PasillaTranscriptExpr' package

library(PasillaTranscriptExpr)

data_dir  <- system.file("extdata", package = "PasillaTranscriptExpr")

metadata <- read.table(file.path(data_dir, "metadata.txt"), 
 header = TRUE, as.is = TRUE)
metadata

counts <- read.table(file.path(data_dir, "counts.txt"), 
 header = TRUE, as.is = TRUE)
head(counts)

# Create a dmDSdata object
d <- dmDSdata(counts = counts[, metadata$SampleName], 
 gene_id = counts$gene_id, feature_id = counts$feature_id, 
 sample_id = metadata$SampleName, group = metadata$condition)

plotData(d)

# Use a subset of genes, which is defined in the following file
gene_id_subset <- readLines(file.path(data_dir, "gene_id_subset.txt"))
d <- d[names(d) %in% gene_id_subset, ]

plotData(d)

data_dmDSdata <- d

###################################
### Differential splicing analysis
###################################
# If possible, use BPPARAM = BiocParallel::MulticoreParam() with more workers

d <- data_dmDSdata

head(counts(d))
samples(d)
head(names(d))
length(d)
d[1:20, ]
d[1:20, 1:3]

### Filtering
# Check what is the minimal number of replicates per condition 
table(samples(d)$group)

d <- dmFilter(d, min_samps_gene_expr = 7, min_samps_feature_expr = 3, 
 min_samps_feature_prop = 0)
plotData(d)

### Calculate dispersion
d <- dmDispersion(d, BPPARAM = BiocParallel::SerialParam())
plotDispersion(d)

head(mean_expression(d))
common_dispersion(d)
head(genewise_dispersion(d))

### Fit full model proportions
d <- dmFit(d, BPPARAM = BiocParallel::SerialParam())

head(proportions(d))
head(statistics(d))

### Fit null model proportions and test for DS
d <- dmTest(d, BPPARAM = BiocParallel::SerialParam())
plotTest(d)

head(proportions(d))
head(statistics(d))
head(results(d))

### Plot feature proportions for top DS gene
res <- results(d)
res <- res[order(res$pvalue, decreasing = FALSE), ]

gene_id <- res$gene_id[1]

plotFit(d, gene_id = gene_id)
plotFit(d, gene_id = gene_id, plot_type = "lineplot")
plotFit(d, gene_id = gene_id, plot_type = "ribbonplot")

Results


R version 3.3.1 (2016-06-21) -- "Bug in Your Hair"
Copyright (C) 2016 The R Foundation for Statistical Computing
Platform: x86_64-pc-linux-gnu (64-bit)

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.

> library(DRIMSeq)
> png(filename="/home/ddbj/snapshot/RGM3/R_BC/result/DRIMSeq/data_dmDSdata.Rd_%03d_medium.png", width=480, height=480)
> ### Name: data_dmDSdata
> ### Title: Sample data for differential splicing analysis
> ### Aliases: data_dmDSdata
> ### Keywords: datasets
> 
> ### ** Examples
> 
> 
> #############################
> ### Create dmDSdata object 
> #############################
> ### Get kallisto transcript counts from 'PasillaTranscriptExpr' package
> 
> library(PasillaTranscriptExpr)
> 
> data_dir  <- system.file("extdata", package = "PasillaTranscriptExpr")
> 
> metadata <- read.table(file.path(data_dir, "metadata.txt"), 
+  header = TRUE, as.is = TRUE)
> metadata
    LibraryName LibraryLayout SampleName condition
1   Untreated-1        SINGLE  GSM461176       CTL
2   Untreated-3        PAIRED  GSM461177       CTL
3   Untreated-4        PAIRED  GSM461178       CTL
4 CG8144_RNAi-1        SINGLE  GSM461179        KD
5 CG8144_RNAi-3        PAIRED  GSM461180        KD
6 CG8144_RNAi-4        PAIRED  GSM461181        KD
7   Untreated-6        SINGLE  GSM461182       CTL
> 
> counts <- read.table(file.path(data_dir, "counts.txt"), 
+  header = TRUE, as.is = TRUE)
> head(counts)
   feature_id     gene_id   GSM461176   GSM461177   GSM461178 GSM461179
1 FBtr0300689 FBgn0031208 0.00000e+00 0.00000e+00 0.00000e+00  27.04866
2 FBtr0300690 FBgn0031208 5.01688e+00 4.00000e+00 1.00518e+00   0.00000
3 FBtr0330654 FBgn0031208 0.00000e+00 0.00000e+00 0.00000e+00   0.00000
4 FBtr0078166 FBgn0002121 1.19845e+02 1.65861e-06 9.54854e-02 685.96100
5 FBtr0078167 FBgn0002121 6.13896e-04 3.66545e-04 6.66691e-01 420.15300
6 FBtr0078168 FBgn0002121 9.76956e+01 2.73017e-06 5.50984e-06 279.84050
    GSM461180   GSM461181   GSM461182
1 0.00000e+00 0.00000e+00 0.00000e+00
2 2.00000e+00 2.00464e+00 0.00000e+00
3 0.00000e+00 0.00000e+00 0.00000e+00
4 1.37563e-03 6.25907e+01 1.86834e+02
5 1.44138e+00 9.81068e-07 5.66896e-05
6 1.32354e-05 0.00000e+00 1.51869e+02
> 
> # Create a dmDSdata object
> d <- dmDSdata(counts = counts[, metadata$SampleName], 
+  gene_id = counts$gene_id, feature_id = counts$feature_id, 
+  sample_id = metadata$SampleName, group = metadata$condition)
> 
> plotData(d)
> 
> # Use a subset of genes, which is defined in the following file
> gene_id_subset <- readLines(file.path(data_dir, "gene_id_subset.txt"))
> d <- d[names(d) %in% gene_id_subset, ]
> 
> plotData(d)
> 
> data_dmDSdata <- d
> 
> ###################################
> ### Differential splicing analysis
> ###################################
> # If possible, use BPPARAM = BiocParallel::MulticoreParam() with more workers
> 
> d <- data_dmDSdata
> 
> head(counts(d))
      gene_id  feature_id GSM461176 GSM461177 GSM461178 GSM461182 GSM461179
1 FBgn0000256 FBtr0290077 1100.5800 714.85000  633.5960  638.7270  370.1460
2 FBgn0000256 FBtr0290078  126.0320  37.74080   74.1007  122.9000 1553.8400
3 FBgn0000256 FBtr0290082   12.9458   9.06533    0.0000   17.9112   38.0889
4 FBgn0000256 FBtr0077511 1265.0700 452.53000  349.5160  639.7500 2090.0460
5 FBgn0000256 FBtr0290080    0.0000   0.00000    0.0000    0.0000    0.0000
6 FBgn0000256 FBtr0290081   49.5323  17.05450   17.4484   27.8115  166.6237
  GSM461180 GSM461181
1  109.4460   95.8060
2  283.4050  447.4100
3    0.0000    0.0000
4  191.8950  263.3610
5    0.0000    0.0000
6   44.8433   46.0622
> samples(d)
  sample_id group
1 GSM461176   CTL
2 GSM461177   CTL
3 GSM461178   CTL
4 GSM461182   CTL
5 GSM461179    KD
6 GSM461180    KD
7 GSM461181    KD
> head(names(d))
[1] "FBgn0000256" "FBgn0032003" "FBgn0020309" "FBgn0259735" "FBgn0032785"
[6] "FBgn0040297"
> length(d)
[1] 42
> d[1:20, ]
An object of class dmDSdata 
with 20 genes and 7 samples
* data accessors: counts(), samples()
> d[1:20, 1:3]
An object of class dmDSdata 
with 20 genes and 3 samples
* data accessors: counts(), samples()
> 
> ### Filtering
> # Check what is the minimal number of replicates per condition 
> table(samples(d)$group)

CTL  KD 
  4   3 
> 
> d <- dmFilter(d, min_samps_gene_expr = 7, min_samps_feature_expr = 3, 
+  min_samps_feature_prop = 0)
> plotData(d)
> 
> ### Calculate dispersion
> d <- dmDispersion(d, BPPARAM = BiocParallel::SerialParam())
! Using common_dispersion = 10.7 as disp_init !
> plotDispersion(d)
> 
> head(mean_expression(d))
      gene_id mean_expression
1 FBgn0000256       2622.2863
2 FBgn0020309      13217.7136
3 FBgn0259735      11992.9028
4 FBgn0032785        317.7085
5 FBgn0040297       2568.8159
6 FBgn0032979       4080.7465
> common_dispersion(d)
[1] 10.69633
> head(genewise_dispersion(d))
      gene_id genewise_dispersion
1 FBgn0000256           66.824633
2 FBgn0020309            6.394084
3 FBgn0259735           42.450876
4 FBgn0032785           11.353434
5 FBgn0040297           12.928444
6 FBgn0032979           36.305322
> 
> ### Fit full model proportions
> d <- dmFit(d, BPPARAM = BiocParallel::SerialParam())
> 
> head(proportions(d))
      gene_id  feature_id         CTL          KD
1 FBgn0000256 FBtr0290077 0.355151653 0.077817640
2 FBgn0000256 FBtr0290078 0.044908202 0.268952808
3 FBgn0000256 FBtr0290082 0.007009802 0.002253173
4 FBgn0000256 FBtr0077511 0.285034501 0.221457537
5 FBgn0000256 FBtr0290081 0.018487583 0.038609367
6 FBgn0000256 FBtr0077513 0.278530792 0.376075341
> head(statistics(d))
      gene_id    lik_CTL     lik_KD
1 FBgn0000256 -11836.779 -12928.587
2 FBgn0020309 -37232.882 -59448.103
3 FBgn0259735 -10730.324 -19654.756
4 FBgn0032785  -1084.363  -1085.801
5 FBgn0040297  -8799.471 -11620.110
6 FBgn0032979  -2783.728  -2753.305
> 
> ### Fit null model proportions and test for DS
> d <- dmTest(d, BPPARAM = BiocParallel::SerialParam())
Running comparison between groups: CTL, KD
> plotTest(d)
> 
> head(proportions(d))
      gene_id  feature_id         CTL          KD        null
1 FBgn0000256 FBtr0290077 0.355151653 0.077817640 0.207148460
2 FBgn0000256 FBtr0290078 0.044908202 0.268952808 0.106036514
3 FBgn0000256 FBtr0290082 0.007009802 0.002253173 0.004951749
4 FBgn0000256 FBtr0077511 0.285034501 0.221457537 0.286658150
5 FBgn0000256 FBtr0290081 0.018487583 0.038609367 0.027199214
6 FBgn0000256 FBtr0077513 0.278530792 0.376075341 0.354810707
> head(statistics(d))
      gene_id        CTL         KD   lik_null df
1 FBgn0000256 -11836.779 -12928.587 -24818.028  6
2 FBgn0020309 -37232.882 -59448.103 -96690.269  4
3 FBgn0259735 -10730.324 -19654.756 -30385.356  2
4 FBgn0032785  -1084.363  -1085.801  -2171.895  2
5 FBgn0040297  -8799.471 -11620.110 -20422.589  4
6 FBgn0032979  -2783.728  -2753.305  -5537.344  1
> head(results(d))
      gene_id          lr df       pvalue   adj_pvalue
1 FBgn0000256 105.3224556  6 1.940713e-20 5.045854e-19
2 FBgn0020309  18.5696496  4 9.546600e-04 3.545880e-03
3 FBgn0259735   0.5526324  2 7.585730e-01 8.575173e-01
4 FBgn0032785   3.4627070  2 1.770446e-01 3.518222e-01
5 FBgn0040297   6.0150383  4 1.980280e-01 3.518222e-01
6 FBgn0032979   0.6207793  1 4.307578e-01 6.588061e-01
> 
> ### Plot feature proportions for top DS gene
> res <- results(d)
> res <- res[order(res$pvalue, decreasing = FALSE), ]
> 
> gene_id <- res$gene_id[1]
> 
> plotFit(d, gene_id = gene_id)
> plotFit(d, gene_id = gene_id, plot_type = "lineplot")
> plotFit(d, gene_id = gene_id, plot_type = "ribbonplot")
> 
> 
> 
> 
> 
> 
> dev.off()
null device 
          1 
>