Last data update: 2014.03.03

R: Sample data for sQTL analysis
data_dmSQTLdataR Documentation

Sample data for sQTL analysis

Description

A subset of data from GEUVADIS project where 462 RNA-Seq samples from lymphoblastoid cell lines were obtained. The genome sequencing data of the same individuals is provided by the 1000 Genomes Project. The samples in this project come from five populations: CEPH (CEU), Finns (FIN), British (GBR), Toscani (TSI) and Yoruba (YRI). Here, we use a subset of CEPH data from chromosome 19 available in GeuvadisTranscriptExpr package.

Usage

data_dmSQTLdata

Format

data_dmSQTLdata is a dmSQTLdata object. See Examples.

Value

data_dmSQTLdata

Source

Lappalainen T, Sammeth M, Friedlander MR, et al. Transcriptome and genome sequencing uncovers functional variation in humans. Nature. 2013;501(7468):506-11

GeuvadisTranscriptExpr package

Examples


#############################
### Create dmSQTLdata object
#############################

# Use subsets of data defined in GeuvadisTranscriptExpr package
library(GeuvadisTranscriptExpr)

counts <- GeuvadisTranscriptExpr::counts
genotypes <- GeuvadisTranscriptExpr::genotypes
gene_ranges <- GeuvadisTranscriptExpr::gene_ranges
snp_ranges <- GeuvadisTranscriptExpr::snp_ranges

# Make sure that samples in counts and genotypes are in the same order
sample_id <- colnames(counts[, -(1:2)])

d <- dmSQTLdataFromRanges(counts = counts[, sample_id], 
   gene_id = counts$Gene_Symbol, feature_id = counts$TargetID, 
   gene_ranges = gene_ranges, genotypes = genotypes[, sample_id], 
   snp_id = genotypes$snpId, snp_ranges = snp_ranges, sample_id = sample_id, 
   window = 5e3, BPPARAM = BiocParallel::SerialParam())

plotData(d)

data_dmSQTLdata <- d

#############################
### sQTL analysis
#############################
# If possible, use BPPARAM = BiocParallel::MulticoreParam() with more workers

d <- data_dmSQTLdata

head(names(d))
length(d)
d[1:10, ]
d[1:10, 1:10]

### Filtering
d <- dmFilter(d, min_samps_gene_expr = 70, min_samps_feature_expr = 5, 
   min_samps_feature_prop = 0, minor_allele_freq = 5, 
   BPPARAM = BiocParallel::SerialParam())
plotData(d)


### Calculate dispersion
d <- dmDispersion(d, BPPARAM = BiocParallel::SerialParam())
plotDispersion(d)

### Fit full model proportions
d <- dmFit(d, BPPARAM = BiocParallel::SerialParam())

### Fit null model proportions and test for sQTLs
d <- dmTest(d, BPPARAM = BiocParallel::SerialParam())
plotTest(d)

head(results(d))

### Plot feature proportions for top sQTL
res <- results(d)
res <- res[order(res$pvalue, decreasing = FALSE), ]

gene_id <- res$gene_id[1]
snp_id <- res$snp_id[1]

plotFit(d, gene_id, snp_id)
plotFit(d, gene_id, snp_id, plot_type = "boxplot2", order = FALSE)
plotFit(d, gene_id, snp_id, plot_type = "ribbonplot")

Results


R version 3.3.1 (2016-06-21) -- "Bug in Your Hair"
Copyright (C) 2016 The R Foundation for Statistical Computing
Platform: x86_64-pc-linux-gnu (64-bit)

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.

> library(DRIMSeq)
> png(filename="/home/ddbj/snapshot/RGM3/R_BC/result/DRIMSeq/data_dmSQTLdata.Rd_%03d_medium.png", width=480, height=480)
> ### Name: data_dmSQTLdata
> ### Title: Sample data for sQTL analysis
> ### Aliases: data_dmSQTLdata
> ### Keywords: datasets
> 
> ### ** Examples
> 
> 
> #############################
> ### Create dmSQTLdata object
> #############################
> 
> # Use subsets of data defined in GeuvadisTranscriptExpr package
> library(GeuvadisTranscriptExpr)
> 
> counts <- GeuvadisTranscriptExpr::counts
> genotypes <- GeuvadisTranscriptExpr::genotypes
> gene_ranges <- GeuvadisTranscriptExpr::gene_ranges
> snp_ranges <- GeuvadisTranscriptExpr::snp_ranges
> 
> # Make sure that samples in counts and genotypes are in the same order
> sample_id <- colnames(counts[, -(1:2)])
> 
> d <- dmSQTLdataFromRanges(counts = counts[, sample_id], 
+    gene_id = counts$Gene_Symbol, feature_id = counts$TargetID, 
+    gene_ranges = gene_ranges, genotypes = genotypes[, sample_id], 
+    snp_id = genotypes$snpId, snp_ranges = snp_ranges, sample_id = sample_id, 
+    window = 5e3, BPPARAM = BiocParallel::SerialParam())
> 
> plotData(d)
[[1]]

[[2]]

[[3]]

> 
> data_dmSQTLdata <- d
> 
> #############################
> ### sQTL analysis
> #############################
> # If possible, use BPPARAM = BiocParallel::MulticoreParam() with more workers
> 
> d <- data_dmSQTLdata
> 
> head(names(d))
[1] "ENSG00000221983.2" "ENSG00000095066.6" "ENSG00000167604.7"
[4] "ENSG00000105290.4" "ENSG00000079462.2" "ENSG00000152443.8"
> length(d)
[1] 50
> d[1:10, ]
An object of class dmSQTLdata 
with 10 genes and 91 samples
> d[1:10, 1:10]
An object of class dmSQTLdata 
with 10 genes and 10 samples
> 
> ### Filtering
> d <- dmFilter(d, min_samps_gene_expr = 70, min_samps_feature_expr = 5, 
+    min_samps_feature_prop = 0, minor_allele_freq = 5, 
+    BPPARAM = BiocParallel::SerialParam())
> plotData(d)
[[1]]

[[2]]

[[3]]

> 
> 
> ### Calculate dispersion
> d <- dmDispersion(d, BPPARAM = BiocParallel::SerialParam())
! Using common_dispersion = 4 as disp_init !
> plotDispersion(d)
> 
> ### Fit full model proportions
> d <- dmFit(d, BPPARAM = BiocParallel::SerialParam())
> 
> ### Fit null model proportions and test for sQTLs
> d <- dmTest(d, BPPARAM = BiocParallel::SerialParam())
> plotTest(d)
> 
> head(results(d))
              gene_id block_id          snp_id       lr df    pvalue adj_pvalue
1   ENSG00000221983.2  block_3 snp_19_18678970 1.262112  2 0.5320296   0.999976
1.1 ENSG00000221983.2  block_3 snp_19_18685964 1.262112  2 0.5320296   0.999976
1.2 ENSG00000221983.2  block_3 snp_19_18687175 1.262112  2 0.5320296   0.999976
1.3 ENSG00000221983.2  block_3 snp_19_18689904 1.262112  2 0.5320296   0.999976
2   ENSG00000221983.2  block_4 snp_19_18679155 1.075932  2 0.5839347   0.999976
2.1 ENSG00000221983.2  block_4 snp_19_18679379 1.075932  2 0.5839347   0.999976
> 
> ### Plot feature proportions for top sQTL
> res <- results(d)
> res <- res[order(res$pvalue, decreasing = FALSE), ]
> 
> gene_id <- res$gene_id[1]
> snp_id <- res$snp_id[1]
> 
> plotFit(d, gene_id, snp_id)
> plotFit(d, gene_id, snp_id, plot_type = "boxplot2", order = FALSE)
> plotFit(d, gene_id, snp_id, plot_type = "ribbonplot")
> 
> 
> 
> 
> 
> 
> dev.off()
null device 
          1 
>