A name of CDF package to use in RMA algorithm. If cdf_name=NULL, the package name is inferred from the name of probe_mapping_file ("HGU133Plus2_Hs_ENSG_mapping.txt" -> "hgu133plus2hsensgcdf")
cluster
A cluster object created using "makeCluster" function from "parellel" package. If cluster=NULL, no parallelization is used.
output_eset
If set to TRUE, the output of calc_prebs will be ExpressionSet object. Otherwise, the output will be a data frame.
paired_ended_reads
Set it to TRUE if your data contains paired-ended reads. Otherwise, the two read mates will be treated as independent units.
ignore_strand
If set to TRUE, then the strand is ignored while counting read overlaps with probe regions. If you use strand-specific RNA-seq protocol, set to FALSE, otherwise set it to TRUE.
sum.method
Microarray summarization method to be used. Can be either rpa or rma. The default mode is rpa.
Details
calc_prebs is the main function of prebs package that implements the whole
pipeline. The function takes mapped reads in BAM format and probe sequence
mappings as an input.
calc_prebs can run in two modes: rpa and rma. RMA is the classical
microarray summarization algorithm developed by R. A. Irizarry et al. (2003), while RPA is a newer algorithm that was developed by
L. Lahti et al. (2011). The default mode is rpa. NOTE: before prebs version 1.7.1 only RMA mode was available.
The output format depends on output_eset option. If output_eset=TRUE then
calc_prebs returns ExpressionSet object (ExpressionSet object is defined in
affy package). Otherwise, it returns a data frame containing PREBS values.
For running calc_prebs with custom CDF, the custom CDF package has to be
downloaded and installed from Custom CDF website:
http://brainarray.mbni.med.umich.edu/CustomCDF
For running calc_prebs with manufacturer's CDF, the manufacturer's CDF package
can be installed from Bioconductor, for example:
biocLite("GenomicRanges");
biocLite("hgu133plus2cdf")
For a detailed input specification, please refer to the prebs vignette.
Value
ExpressionSet object or a data frame containing PREBS values
Examples
if (require(prebsdata)) {
# Get full paths to data files in code{prebsdata} package
bam_file1 <- system.file(file.path("sample_bam_files", "input1.bam"), package="prebsdata")
bam_file2 <- system.file(file.path("sample_bam_files", "input2.bam"), package="prebsdata")
bam_files <- c(bam_file1, bam_file2)
custom_cdf_mapping1 <- system.file(file.path("custom-cdf", "HGU133Plus2_Hs_ENSG_mapping.txt"),
package="prebsdata")
custom_cdf_mapping2 <- system.file(file.path("custom-cdf", "HGU133A2_Hs_ENSG_mapping.txt"),
package="prebsdata")
manufacturer_cdf_mapping <- system.file(file.path("manufacturer-cdf", "HGU133Plus2_mapping.txt"),
package="prebsdata")
if (interactive()) {
# Run PREBS using custom CDF without parallelization ("rpa" mode)
prebs_values <- calc_prebs(bam_files, custom_cdf_mapping1)
head(exprs(prebs_values))
# Run PREBS using custom CDF without parallelization ("rma" mode)
prebs_values <- calc_prebs(bam_files, custom_cdf_mapping1, sum.method="rma")
head(exprs(prebs_values))
# Run PREBS using custom CDF with parallelization
library(parallel)
N_CORES = 2
CLUSTER <- makeCluster(N_CORES)
prebs_values <- calc_prebs(bam_files, custom_cdf_mapping1, cluster=CLUSTER)
stopCluster(CLUSTER)
# Run PREBS using another custom CDF
prebs_values <- calc_prebs(bam_files, custom_cdf_mapping2)
# Run PREBS and return data frame instead of ExpressionSet object
prebs_values <- calc_prebs(bam_files, custom_cdf_mapping1, output_eset=FALSE)
head(prebs_values)
}
# Run PREBS using Manufacturer's CDF (outputs probe set expressions)
prebs_values <- calc_prebs(bam_files, manufacturer_cdf_mapping)
head(exprs(prebs_values))
# Same as above, but state CDF package name explicitly
prebs_values <- calc_prebs(bam_files, manufacturer_cdf_mapping, cdf_name="hgu133plus2cdf")
}
Results
R version 3.3.1 (2016-06-21) -- "Bug in Your Hair"
Copyright (C) 2016 The R Foundation for Statistical Computing
Platform: x86_64-pc-linux-gnu (64-bit)
R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.
R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.
Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.
> library(prebs)
Loading required package: GenomicAlignments
Loading required package: BiocGenerics
Loading required package: parallel
Attaching package: 'BiocGenerics'
The following objects are masked from 'package:parallel':
clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
clusterExport, clusterMap, parApply, parCapply, parLapply,
parLapplyLB, parRapply, parSapply, parSapplyLB
The following objects are masked from 'package:stats':
IQR, mad, xtabs
The following objects are masked from 'package:base':
Filter, Find, Map, Position, Reduce, anyDuplicated, append,
as.data.frame, cbind, colnames, do.call, duplicated, eval, evalq,
get, grep, grepl, intersect, is.unsorted, lapply, lengths, mapply,
match, mget, order, paste, pmax, pmax.int, pmin, pmin.int, rank,
rbind, rownames, sapply, setdiff, sort, table, tapply, union,
unique, unsplit
Loading required package: S4Vectors
Loading required package: stats4
Attaching package: 'S4Vectors'
The following objects are masked from 'package:base':
colMeans, colSums, expand.grid, rowMeans, rowSums
Loading required package: IRanges
Loading required package: GenomeInfoDb
Loading required package: GenomicRanges
Loading required package: SummarizedExperiment
Loading required package: Biobase
Welcome to Bioconductor
Vignettes contain introductory material; view with
'browseVignettes()'. To cite Bioconductor, see
'citation("Biobase")', and for packages 'citation("pkgname")'.
Loading required package: Biostrings
Loading required package: XVector
Loading required package: Rsamtools
Loading required package: affy
Loading required package: RPA
Loading required package: phyloseq
Attaching package: 'phyloseq'
The following object is masked from 'package:affy':
sampleNames
The following object is masked from 'package:SummarizedExperiment':
distance
The following object is masked from 'package:Biobase':
sampleNames
The following object is masked from 'package:GenomicRanges':
distance
The following object is masked from 'package:IRanges':
distance
RPA Copyright (C) 2008-2016 Leo Lahti.
This program comes with ABSOLUTELY NO WARRANTY.
This is free software, and you are welcome to redistribute it under the FreeBSD open source license.
> png(filename="/home/ddbj/snapshot/RGM3/R_BC/result/prebs/calc_prebs.Rd_%03d_medium.png", width=480, height=480)
> ### Name: calc_prebs
> ### Title: Calculate PREBS values
> ### Aliases: calc_prebs
>
> ### ** Examples
>
> if (require(prebsdata)) {
+ # Get full paths to data files in code{prebsdata} package
+ bam_file1 <- system.file(file.path("sample_bam_files", "input1.bam"), package="prebsdata")
+ bam_file2 <- system.file(file.path("sample_bam_files", "input2.bam"), package="prebsdata")
+ bam_files <- c(bam_file1, bam_file2)
+ custom_cdf_mapping1 <- system.file(file.path("custom-cdf", "HGU133Plus2_Hs_ENSG_mapping.txt"),
+ package="prebsdata")
+ custom_cdf_mapping2 <- system.file(file.path("custom-cdf", "HGU133A2_Hs_ENSG_mapping.txt"),
+ package="prebsdata")
+ manufacturer_cdf_mapping <- system.file(file.path("manufacturer-cdf", "HGU133Plus2_mapping.txt"),
+ package="prebsdata")
+ # if (interactive()) {
+ # Run PREBS using custom CDF without parallelization ("rpa" mode)
+ prebs_values <- calc_prebs(bam_files, custom_cdf_mapping1)
+ head(exprs(prebs_values))
+
+ # Run PREBS using custom CDF without parallelization ("rma" mode)
+ prebs_values <- calc_prebs(bam_files, custom_cdf_mapping1, sum.method="rma")
+ head(exprs(prebs_values))
+
+ # Run PREBS using custom CDF with parallelization
+ library(parallel)
+ N_CORES = 2
+ CLUSTER <- makeCluster(N_CORES)
+ prebs_values <- calc_prebs(bam_files, custom_cdf_mapping1, cluster=CLUSTER)
+ stopCluster(CLUSTER)
+
+ # Run PREBS using another custom CDF
+ prebs_values <- calc_prebs(bam_files, custom_cdf_mapping2)
+
+ # Run PREBS and return data frame instead of ExpressionSet object
+ prebs_values <- calc_prebs(bam_files, custom_cdf_mapping1, output_eset=FALSE)
+ head(prebs_values)
+ # }
+
+ # Run PREBS using Manufacturer's CDF (outputs probe set expressions)
+ prebs_values <- calc_prebs(bam_files, manufacturer_cdf_mapping)
+ head(exprs(prebs_values))
+
+ # Same as above, but state CDF package name explicitly
+ prebs_values <- calc_prebs(bam_files, manufacturer_cdf_mapping, cdf_name="hgu133plus2cdf")
+ }
Loading required package: prebsdata
Inferred name for CDF package: HGU133Plus2_Hs_ENSG_mapping.txt -> hgu133plus2hsensgcdf
Error in check_input(bam_files, probe_mapping_file, cdf_name) :
CDF package `hgu133plus2hsensgcdf` is not installed. Please, install the package before running `calc_prebs`
Calls: calc_prebs -> check_input
Execution halted