Last data update: 2014.03.03

R: REPTILE sample data (rsd)
rsdR Documentation

REPTILE sample data (rsd)

Description

sample data for testing REPTILE training, prediction and evaluation.

Usage

data(rsd)

Format

A list containing two lists.

training_data is the data used for training REPTILE enhancer model. This list has four elements: region_epimark, DMR_epimark, region_label and DMR_label. The former two store the epigenomic signatures of query regions and DMRs. The latter two label which a certain query region or DMR is enhancer (1) or negative instance (0)

test_data is for training REPTILE enhancer model and it has four elements: region_epimark, DMR_epimark and region_label. The former two store the epigenomic signatures of query regions and DMRs. The region_label indicates whether a certain query region or DMR is enhancer (1) or negative instance (0)

Author(s)

Yupeng He yupeng.he.bioinfo@gmail.com

Source

training_data was based on the EP300 binding sites (positives), promoters (negatives) and randomly chosen genomic loci (negatives) in mouse embryonic stem cells.

The test_data data was constructed based on in vivo validated mouse sequences from VISTA enhancer browser (Oct 24th, 2015). The labels indicate the activity in mouse heart tissues from E11.5 embryo.

See the papers included in References for details.

References

He, Yupeng et al., REPTILE: Regulatory Element Prediction based on TIssue-specific Local Epigenetic marks, in preparation

Visel, Axel et al. (2007), VISTA Enhancer Browser - a database of tissue-specific human enhancers Nucleic acids research 35. suppl 1 http://enhancer.lbl.gov/

Examples

## Visualizing rsd data
library("REPTILE")
data(rsd)

## Epigenomic signature of query region grouped by labels
ind_pos = rsd$training_data$region_label == 1
pos_region = rsd$training_data$region_epimark[ind_pos,]
neg_region = rsd$training_data$region_epimark[!ind_pos,]

## Epigenomic signature of DMRs grouped by labels
ind_pos = rsd$training_data$DMR_label == 1
pos_DMR = rsd$training_data$DMR_epimark[ind_pos,]
neg_DMR = rsd$training_data$DMR_epimark[!ind_pos,]

## Prepare the data format required for plotting
n = ncol(rsd$training_data$DMR_epimark) ## Number of features
feature_data_DMR = list()
feature_data_region = list()
for(i in 1:n){
    feature_data_DMR <- append(feature_data_DMR,
                               list(neg_DMR[,i],pos_DMR[,i],
                                    NA,NA))
    feature_data_region <- append(feature_data_region,
                                  list(neg_region[,i],pos_region[,i],
                                       NA,NA))
}

## Plot the feature distribution
par(mar=c(4,8,4,4))
## - query region
b <- boxplot(feature_data_region,
             xlab = "feature value",
             notch=TRUE,outline=FALSE,yaxt='n',
             xlim = c(1,n*4-2),ylim=c(-7,7),
             horizontal=TRUE,
             col=c(rgb(65,105,225,max=255),rgb(250,128,114,max=255)),
             main = "Feature value distribution in query regions"
             )
text(par("usr")[1]-0.2, seq(1.5,n*4-2,by=4),
     labels=gsub("_","-",colnames(rsd$training_data$region_epimark)),
     xpd = TRUE,adj=1)
legend(-8,4*n+4,c("negative","enhancer"),ncol=2,
       fill = c(rgb(250,128,114,max=255),rgb(65,105,225,max=255)),
       xpd=TRUE,bty='n')

## - DMR
b <- boxplot(feature_data_DMR,
             xlab = "feature value",
             notch=TRUE,outline=FALSE,yaxt='n',
             xlim = c(1,n*4-2),ylim=c(-7,7),
             horizontal=TRUE,
             col=c(rgb(65,105,225,max=255),rgb(250,128,114,max=255)),
             main = "Feature value distribution in DMRs"
             )
text(par("usr")[1]-0.2, seq(1.5,n*4-2,by=4),
     labels=gsub("_","-",colnames(rsd$training_data$DMR_epimark)),
     xpd = TRUE,adj=1)
legend(-8,4*n+4,c("negative","enhancer"),ncol=2,
       fill = c(rgb(250,128,114,max=255),rgb(65,105,225,max=255)),
       xpd=TRUE,bty='n')

Results


R version 3.3.1 (2016-06-21) -- "Bug in Your Hair"
Copyright (C) 2016 The R Foundation for Statistical Computing
Platform: x86_64-pc-linux-gnu (64-bit)

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.

> library(REPTILE)
Loading required package: foreach
Loading required package: doParallel
Loading required package: iterators
Loading required package: parallel
> png(filename="/home/ddbj/snapshot/RGM3/R_CC/result/REPTILE/rsd.Rd_%03d_medium.png", width=480, height=480)
> ### Name: rsd
> ### Title: REPTILE sample data (rsd)
> ### Aliases: rsd
> ### Keywords: dataset
> 
> ### ** Examples
> 
> ## Visualizing rsd data
> library("REPTILE")
> data(rsd)
> 
> ## Epigenomic signature of query region grouped by labels
> ind_pos = rsd$training_data$region_label == 1
> pos_region = rsd$training_data$region_epimark[ind_pos,]
> neg_region = rsd$training_data$region_epimark[!ind_pos,]
> 
> ## Epigenomic signature of DMRs grouped by labels
> ind_pos = rsd$training_data$DMR_label == 1
> pos_DMR = rsd$training_data$DMR_epimark[ind_pos,]
> neg_DMR = rsd$training_data$DMR_epimark[!ind_pos,]
> 
> ## Prepare the data format required for plotting
> n = ncol(rsd$training_data$DMR_epimark) ## Number of features
> feature_data_DMR = list()
> feature_data_region = list()
> for(i in 1:n){
+     feature_data_DMR <- append(feature_data_DMR,
+                                list(neg_DMR[,i],pos_DMR[,i],
+                                     NA,NA))
+     feature_data_region <- append(feature_data_region,
+                                   list(neg_region[,i],pos_region[,i],
+                                        NA,NA))
+ }
> 
> ## Plot the feature distribution
> par(mar=c(4,8,4,4))
> ## - query region
> b <- boxplot(feature_data_region,
+              xlab = "feature value",
+              notch=TRUE,outline=FALSE,yaxt='n',
+              xlim = c(1,n*4-2),ylim=c(-7,7),
+              horizontal=TRUE,
+              col=c(rgb(65,105,225,max=255),rgb(250,128,114,max=255)),
+              main = "Feature value distribution in query regions"
+              )
> text(par("usr")[1]-0.2, seq(1.5,n*4-2,by=4),
+      labels=gsub("_","-",colnames(rsd$training_data$region_epimark)),
+      xpd = TRUE,adj=1)
> legend(-8,4*n+4,c("negative","enhancer"),ncol=2,
+        fill = c(rgb(250,128,114,max=255),rgb(65,105,225,max=255)),
+        xpd=TRUE,bty='n')
> 
> ## - DMR
> b <- boxplot(feature_data_DMR,
+              xlab = "feature value",
+              notch=TRUE,outline=FALSE,yaxt='n',
+              xlim = c(1,n*4-2),ylim=c(-7,7),
+              horizontal=TRUE,
+              col=c(rgb(65,105,225,max=255),rgb(250,128,114,max=255)),
+              main = "Feature value distribution in DMRs"
+              )
> text(par("usr")[1]-0.2, seq(1.5,n*4-2,by=4),
+      labels=gsub("_","-",colnames(rsd$training_data$DMR_epimark)),
+      xpd = TRUE,adj=1)
> legend(-8,4*n+4,c("negative","enhancer"),ncol=2,
+        fill = c(rgb(250,128,114,max=255),rgb(65,105,225,max=255)),
+        xpd=TRUE,bty='n')
> 
> 
> 
> 
> 
> dev.off()
null device 
          1 
>