A subset of data from GEUVADIS project where 462 RNA-Seq samples from
lymphoblastoid cell lines were obtained. The genome sequencing data of the
same individuals is provided by the 1000 Genomes Project. The samples in this
project come from five populations: CEPH (CEU), Finns (FIN), British (GBR),
Toscani (TSI) and Yoruba (YRI). Here, we use a subset of CEPH data from
chromosome 19 available in GeuvadisTranscriptExpr package.
Usage
data_dmSQTLdata
Format
data_dmSQTLdata is a dmSQTLdata object.
See Examples.
Value
data_dmSQTLdata
Source
Lappalainen T, Sammeth M, Friedlander MR, et al. Transcriptome and
genome sequencing uncovers functional variation in humans. Nature.
2013;501(7468):506-11
GeuvadisTranscriptExpr package
Examples
#############################
### Create dmSQTLdata object
#############################
# Use subsets of data defined in GeuvadisTranscriptExpr package
library(GeuvadisTranscriptExpr)
counts <- GeuvadisTranscriptExpr::counts
genotypes <- GeuvadisTranscriptExpr::genotypes
gene_ranges <- GeuvadisTranscriptExpr::gene_ranges
snp_ranges <- GeuvadisTranscriptExpr::snp_ranges
# Make sure that samples in counts and genotypes are in the same order
sample_id <- colnames(counts[, -(1:2)])
d <- dmSQTLdataFromRanges(counts = counts[, sample_id],
gene_id = counts$Gene_Symbol, feature_id = counts$TargetID,
gene_ranges = gene_ranges, genotypes = genotypes[, sample_id],
snp_id = genotypes$snpId, snp_ranges = snp_ranges, sample_id = sample_id,
window = 5e3, BPPARAM = BiocParallel::SerialParam())
plotData(d)
data_dmSQTLdata <- d
#############################
### sQTL analysis
#############################
# If possible, use BPPARAM = BiocParallel::MulticoreParam() with more workers
d <- data_dmSQTLdata
head(names(d))
length(d)
d[1:10, ]
d[1:10, 1:10]
### Filtering
d <- dmFilter(d, min_samps_gene_expr = 70, min_samps_feature_expr = 5,
min_samps_feature_prop = 0, minor_allele_freq = 5,
BPPARAM = BiocParallel::SerialParam())
plotData(d)
### Calculate dispersion
d <- dmDispersion(d, BPPARAM = BiocParallel::SerialParam())
plotDispersion(d)
### Fit full model proportions
d <- dmFit(d, BPPARAM = BiocParallel::SerialParam())
### Fit null model proportions and test for sQTLs
d <- dmTest(d, BPPARAM = BiocParallel::SerialParam())
plotTest(d)
head(results(d))
### Plot feature proportions for top sQTL
res <- results(d)
res <- res[order(res$pvalue, decreasing = FALSE), ]
gene_id <- res$gene_id[1]
snp_id <- res$snp_id[1]
plotFit(d, gene_id, snp_id)
plotFit(d, gene_id, snp_id, plot_type = "boxplot2", order = FALSE)
plotFit(d, gene_id, snp_id, plot_type = "ribbonplot")
Results
R version 3.3.1 (2016-06-21) -- "Bug in Your Hair"
Copyright (C) 2016 The R Foundation for Statistical Computing
Platform: x86_64-pc-linux-gnu (64-bit)
R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.
R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.
Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.
> library(DRIMSeq)
> png(filename="/home/ddbj/snapshot/RGM3/R_BC/result/DRIMSeq/data_dmSQTLdata.Rd_%03d_medium.png", width=480, height=480)
> ### Name: data_dmSQTLdata
> ### Title: Sample data for sQTL analysis
> ### Aliases: data_dmSQTLdata
> ### Keywords: datasets
>
> ### ** Examples
>
>
> #############################
> ### Create dmSQTLdata object
> #############################
>
> # Use subsets of data defined in GeuvadisTranscriptExpr package
> library(GeuvadisTranscriptExpr)
>
> counts <- GeuvadisTranscriptExpr::counts
> genotypes <- GeuvadisTranscriptExpr::genotypes
> gene_ranges <- GeuvadisTranscriptExpr::gene_ranges
> snp_ranges <- GeuvadisTranscriptExpr::snp_ranges
>
> # Make sure that samples in counts and genotypes are in the same order
> sample_id <- colnames(counts[, -(1:2)])
>
> d <- dmSQTLdataFromRanges(counts = counts[, sample_id],
+ gene_id = counts$Gene_Symbol, feature_id = counts$TargetID,
+ gene_ranges = gene_ranges, genotypes = genotypes[, sample_id],
+ snp_id = genotypes$snpId, snp_ranges = snp_ranges, sample_id = sample_id,
+ window = 5e3, BPPARAM = BiocParallel::SerialParam())
>
> plotData(d)
[[1]]
[[2]]
[[3]]
>
> data_dmSQTLdata <- d
>
> #############################
> ### sQTL analysis
> #############################
> # If possible, use BPPARAM = BiocParallel::MulticoreParam() with more workers
>
> d <- data_dmSQTLdata
>
> head(names(d))
[1] "ENSG00000221983.2" "ENSG00000095066.6" "ENSG00000167604.7"
[4] "ENSG00000105290.4" "ENSG00000079462.2" "ENSG00000152443.8"
> length(d)
[1] 50
> d[1:10, ]
An object of class dmSQTLdata
with 10 genes and 91 samples
> d[1:10, 1:10]
An object of class dmSQTLdata
with 10 genes and 10 samples
>
> ### Filtering
> d <- dmFilter(d, min_samps_gene_expr = 70, min_samps_feature_expr = 5,
+ min_samps_feature_prop = 0, minor_allele_freq = 5,
+ BPPARAM = BiocParallel::SerialParam())
> plotData(d)
[[1]]
[[2]]
[[3]]
>
>
> ### Calculate dispersion
> d <- dmDispersion(d, BPPARAM = BiocParallel::SerialParam())
! Using common_dispersion = 4 as disp_init !
> plotDispersion(d)
>
> ### Fit full model proportions
> d <- dmFit(d, BPPARAM = BiocParallel::SerialParam())
>
> ### Fit null model proportions and test for sQTLs
> d <- dmTest(d, BPPARAM = BiocParallel::SerialParam())
> plotTest(d)
>
> head(results(d))
gene_id block_id snp_id lr df pvalue adj_pvalue
1 ENSG00000221983.2 block_3 snp_19_18678970 1.262112 2 0.5320296 0.999976
1.1 ENSG00000221983.2 block_3 snp_19_18685964 1.262112 2 0.5320296 0.999976
1.2 ENSG00000221983.2 block_3 snp_19_18687175 1.262112 2 0.5320296 0.999976
1.3 ENSG00000221983.2 block_3 snp_19_18689904 1.262112 2 0.5320296 0.999976
2 ENSG00000221983.2 block_4 snp_19_18679155 1.075932 2 0.5839347 0.999976
2.1 ENSG00000221983.2 block_4 snp_19_18679379 1.075932 2 0.5839347 0.999976
>
> ### Plot feature proportions for top sQTL
> res <- results(d)
> res <- res[order(res$pvalue, decreasing = FALSE), ]
>
> gene_id <- res$gene_id[1]
> snp_id <- res$snp_id[1]
>
> plotFit(d, gene_id, snp_id)
> plotFit(d, gene_id, snp_id, plot_type = "boxplot2", order = FALSE)
> plotFit(d, gene_id, snp_id, plot_type = "ribbonplot")
>
>
>
>
>
>
> dev.off()
null device
1
>