Last data update: 2014.03.03
|
R: Sample data for differential splicing analysis
data_dmDSdata | R Documentation |
Sample data for differential splicing analysis
Description
We use a subset of kallisto transcript counts from
PasillaTranscriptExpr package.
Usage
data_dmDSdata
Format
data_dmDSdata is a dmDSdata object. See
Examples.
Value
data_dmDSdata
Source
Brooks AN, Yang L, Duff MO, et al. Conservation of an RNA regulatory
map between Drosophila and mammals. Genome Res. 2011;21(2):193-202
PasillaTranscriptExpr package
Examples
#############################
### Create dmDSdata object
#############################
### Get kallisto transcript counts from 'PasillaTranscriptExpr' package
library(PasillaTranscriptExpr)
data_dir <- system.file("extdata", package = "PasillaTranscriptExpr")
metadata <- read.table(file.path(data_dir, "metadata.txt"),
header = TRUE, as.is = TRUE)
metadata
counts <- read.table(file.path(data_dir, "counts.txt"),
header = TRUE, as.is = TRUE)
head(counts)
# Create a dmDSdata object
d <- dmDSdata(counts = counts[, metadata$SampleName],
gene_id = counts$gene_id, feature_id = counts$feature_id,
sample_id = metadata$SampleName, group = metadata$condition)
plotData(d)
# Use a subset of genes, which is defined in the following file
gene_id_subset <- readLines(file.path(data_dir, "gene_id_subset.txt"))
d <- d[names(d) %in% gene_id_subset, ]
plotData(d)
data_dmDSdata <- d
###################################
### Differential splicing analysis
###################################
# If possible, use BPPARAM = BiocParallel::MulticoreParam() with more workers
d <- data_dmDSdata
head(counts(d))
samples(d)
head(names(d))
length(d)
d[1:20, ]
d[1:20, 1:3]
### Filtering
# Check what is the minimal number of replicates per condition
table(samples(d)$group)
d <- dmFilter(d, min_samps_gene_expr = 7, min_samps_feature_expr = 3,
min_samps_feature_prop = 0)
plotData(d)
### Calculate dispersion
d <- dmDispersion(d, BPPARAM = BiocParallel::SerialParam())
plotDispersion(d)
head(mean_expression(d))
common_dispersion(d)
head(genewise_dispersion(d))
### Fit full model proportions
d <- dmFit(d, BPPARAM = BiocParallel::SerialParam())
head(proportions(d))
head(statistics(d))
### Fit null model proportions and test for DS
d <- dmTest(d, BPPARAM = BiocParallel::SerialParam())
plotTest(d)
head(proportions(d))
head(statistics(d))
head(results(d))
### Plot feature proportions for top DS gene
res <- results(d)
res <- res[order(res$pvalue, decreasing = FALSE), ]
gene_id <- res$gene_id[1]
plotFit(d, gene_id = gene_id)
plotFit(d, gene_id = gene_id, plot_type = "lineplot")
plotFit(d, gene_id = gene_id, plot_type = "ribbonplot")
Results
R version 3.3.1 (2016-06-21) -- "Bug in Your Hair"
Copyright (C) 2016 The R Foundation for Statistical Computing
Platform: x86_64-pc-linux-gnu (64-bit)
R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.
R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.
Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.
> library(DRIMSeq)
> png(filename="/home/ddbj/snapshot/RGM3/R_BC/result/DRIMSeq/data_dmDSdata.Rd_%03d_medium.png", width=480, height=480)
> ### Name: data_dmDSdata
> ### Title: Sample data for differential splicing analysis
> ### Aliases: data_dmDSdata
> ### Keywords: datasets
>
> ### ** Examples
>
>
> #############################
> ### Create dmDSdata object
> #############################
> ### Get kallisto transcript counts from 'PasillaTranscriptExpr' package
>
> library(PasillaTranscriptExpr)
>
> data_dir <- system.file("extdata", package = "PasillaTranscriptExpr")
>
> metadata <- read.table(file.path(data_dir, "metadata.txt"),
+ header = TRUE, as.is = TRUE)
> metadata
LibraryName LibraryLayout SampleName condition
1 Untreated-1 SINGLE GSM461176 CTL
2 Untreated-3 PAIRED GSM461177 CTL
3 Untreated-4 PAIRED GSM461178 CTL
4 CG8144_RNAi-1 SINGLE GSM461179 KD
5 CG8144_RNAi-3 PAIRED GSM461180 KD
6 CG8144_RNAi-4 PAIRED GSM461181 KD
7 Untreated-6 SINGLE GSM461182 CTL
>
> counts <- read.table(file.path(data_dir, "counts.txt"),
+ header = TRUE, as.is = TRUE)
> head(counts)
feature_id gene_id GSM461176 GSM461177 GSM461178 GSM461179
1 FBtr0300689 FBgn0031208 0.00000e+00 0.00000e+00 0.00000e+00 27.04866
2 FBtr0300690 FBgn0031208 5.01688e+00 4.00000e+00 1.00518e+00 0.00000
3 FBtr0330654 FBgn0031208 0.00000e+00 0.00000e+00 0.00000e+00 0.00000
4 FBtr0078166 FBgn0002121 1.19845e+02 1.65861e-06 9.54854e-02 685.96100
5 FBtr0078167 FBgn0002121 6.13896e-04 3.66545e-04 6.66691e-01 420.15300
6 FBtr0078168 FBgn0002121 9.76956e+01 2.73017e-06 5.50984e-06 279.84050
GSM461180 GSM461181 GSM461182
1 0.00000e+00 0.00000e+00 0.00000e+00
2 2.00000e+00 2.00464e+00 0.00000e+00
3 0.00000e+00 0.00000e+00 0.00000e+00
4 1.37563e-03 6.25907e+01 1.86834e+02
5 1.44138e+00 9.81068e-07 5.66896e-05
6 1.32354e-05 0.00000e+00 1.51869e+02
>
> # Create a dmDSdata object
> d <- dmDSdata(counts = counts[, metadata$SampleName],
+ gene_id = counts$gene_id, feature_id = counts$feature_id,
+ sample_id = metadata$SampleName, group = metadata$condition)
>
> plotData(d)
>
> # Use a subset of genes, which is defined in the following file
> gene_id_subset <- readLines(file.path(data_dir, "gene_id_subset.txt"))
> d <- d[names(d) %in% gene_id_subset, ]
>
> plotData(d)
>
> data_dmDSdata <- d
>
> ###################################
> ### Differential splicing analysis
> ###################################
> # If possible, use BPPARAM = BiocParallel::MulticoreParam() with more workers
>
> d <- data_dmDSdata
>
> head(counts(d))
gene_id feature_id GSM461176 GSM461177 GSM461178 GSM461182 GSM461179
1 FBgn0000256 FBtr0290077 1100.5800 714.85000 633.5960 638.7270 370.1460
2 FBgn0000256 FBtr0290078 126.0320 37.74080 74.1007 122.9000 1553.8400
3 FBgn0000256 FBtr0290082 12.9458 9.06533 0.0000 17.9112 38.0889
4 FBgn0000256 FBtr0077511 1265.0700 452.53000 349.5160 639.7500 2090.0460
5 FBgn0000256 FBtr0290080 0.0000 0.00000 0.0000 0.0000 0.0000
6 FBgn0000256 FBtr0290081 49.5323 17.05450 17.4484 27.8115 166.6237
GSM461180 GSM461181
1 109.4460 95.8060
2 283.4050 447.4100
3 0.0000 0.0000
4 191.8950 263.3610
5 0.0000 0.0000
6 44.8433 46.0622
> samples(d)
sample_id group
1 GSM461176 CTL
2 GSM461177 CTL
3 GSM461178 CTL
4 GSM461182 CTL
5 GSM461179 KD
6 GSM461180 KD
7 GSM461181 KD
> head(names(d))
[1] "FBgn0000256" "FBgn0032003" "FBgn0020309" "FBgn0259735" "FBgn0032785"
[6] "FBgn0040297"
> length(d)
[1] 42
> d[1:20, ]
An object of class dmDSdata
with 20 genes and 7 samples
* data accessors: counts(), samples()
> d[1:20, 1:3]
An object of class dmDSdata
with 20 genes and 3 samples
* data accessors: counts(), samples()
>
> ### Filtering
> # Check what is the minimal number of replicates per condition
> table(samples(d)$group)
CTL KD
4 3
>
> d <- dmFilter(d, min_samps_gene_expr = 7, min_samps_feature_expr = 3,
+ min_samps_feature_prop = 0)
> plotData(d)
>
> ### Calculate dispersion
> d <- dmDispersion(d, BPPARAM = BiocParallel::SerialParam())
! Using common_dispersion = 10.7 as disp_init !
> plotDispersion(d)
>
> head(mean_expression(d))
gene_id mean_expression
1 FBgn0000256 2622.2863
2 FBgn0020309 13217.7136
3 FBgn0259735 11992.9028
4 FBgn0032785 317.7085
5 FBgn0040297 2568.8159
6 FBgn0032979 4080.7465
> common_dispersion(d)
[1] 10.69633
> head(genewise_dispersion(d))
gene_id genewise_dispersion
1 FBgn0000256 66.824633
2 FBgn0020309 6.394084
3 FBgn0259735 42.450876
4 FBgn0032785 11.353434
5 FBgn0040297 12.928444
6 FBgn0032979 36.305322
>
> ### Fit full model proportions
> d <- dmFit(d, BPPARAM = BiocParallel::SerialParam())
>
> head(proportions(d))
gene_id feature_id CTL KD
1 FBgn0000256 FBtr0290077 0.355151653 0.077817640
2 FBgn0000256 FBtr0290078 0.044908202 0.268952808
3 FBgn0000256 FBtr0290082 0.007009802 0.002253173
4 FBgn0000256 FBtr0077511 0.285034501 0.221457537
5 FBgn0000256 FBtr0290081 0.018487583 0.038609367
6 FBgn0000256 FBtr0077513 0.278530792 0.376075341
> head(statistics(d))
gene_id lik_CTL lik_KD
1 FBgn0000256 -11836.779 -12928.587
2 FBgn0020309 -37232.882 -59448.103
3 FBgn0259735 -10730.324 -19654.756
4 FBgn0032785 -1084.363 -1085.801
5 FBgn0040297 -8799.471 -11620.110
6 FBgn0032979 -2783.728 -2753.305
>
> ### Fit null model proportions and test for DS
> d <- dmTest(d, BPPARAM = BiocParallel::SerialParam())
Running comparison between groups: CTL, KD
> plotTest(d)
>
> head(proportions(d))
gene_id feature_id CTL KD null
1 FBgn0000256 FBtr0290077 0.355151653 0.077817640 0.207148460
2 FBgn0000256 FBtr0290078 0.044908202 0.268952808 0.106036514
3 FBgn0000256 FBtr0290082 0.007009802 0.002253173 0.004951749
4 FBgn0000256 FBtr0077511 0.285034501 0.221457537 0.286658150
5 FBgn0000256 FBtr0290081 0.018487583 0.038609367 0.027199214
6 FBgn0000256 FBtr0077513 0.278530792 0.376075341 0.354810707
> head(statistics(d))
gene_id CTL KD lik_null df
1 FBgn0000256 -11836.779 -12928.587 -24818.028 6
2 FBgn0020309 -37232.882 -59448.103 -96690.269 4
3 FBgn0259735 -10730.324 -19654.756 -30385.356 2
4 FBgn0032785 -1084.363 -1085.801 -2171.895 2
5 FBgn0040297 -8799.471 -11620.110 -20422.589 4
6 FBgn0032979 -2783.728 -2753.305 -5537.344 1
> head(results(d))
gene_id lr df pvalue adj_pvalue
1 FBgn0000256 105.3224556 6 1.940713e-20 5.045854e-19
2 FBgn0020309 18.5696496 4 9.546600e-04 3.545880e-03
3 FBgn0259735 0.5526324 2 7.585730e-01 8.575173e-01
4 FBgn0032785 3.4627070 2 1.770446e-01 3.518222e-01
5 FBgn0040297 6.0150383 4 1.980280e-01 3.518222e-01
6 FBgn0032979 0.6207793 1 4.307578e-01 6.588061e-01
>
> ### Plot feature proportions for top DS gene
> res <- results(d)
> res <- res[order(res$pvalue, decreasing = FALSE), ]
>
> gene_id <- res$gene_id[1]
>
> plotFit(d, gene_id = gene_id)
> plotFit(d, gene_id = gene_id, plot_type = "lineplot")
> plotFit(d, gene_id = gene_id, plot_type = "ribbonplot")
>
>
>
>
>
>
> dev.off()
null device
1
>
|