Last data update: 2014.03.03

R: Coverage of a GAlignments, GAlignmentPairs, or...
coverage-methodsR Documentation

Coverage of a GAlignments, GAlignmentPairs, or GAlignmentsList object

Description

coverage methods for GAlignments, GAlignmentPairs, GAlignmentsList, and BamFile objects.

NOTE: The coverage generic function and methods for Ranges and RangesList objects are defined and documented in the IRanges package. Methods for GRanges and GRangesList objects are defined and documented in the GenomicRanges package.

Usage

## S4 method for signature 'GAlignments'
coverage(x, shift=0L, width=NULL, weight=1L,
         method=c("auto", "sort", "hash"), drop.D.ranges=FALSE)

## S4 method for signature 'GAlignmentPairs'
coverage(x, shift=0L, width=NULL, weight=1L,
         method=c("auto", "sort", "hash"), drop.D.ranges=FALSE)

## S4 method for signature 'GAlignmentsList'
coverage(x, shift=0L, width=NULL, weight=1L, ...)

## S4 method for signature 'BamFile'
coverage(x, shift=0L, width=NULL, weight=1L, ...,
         param=ScanBamParam())

## S4 method for signature 'character'
coverage(x, shift=0L, width=NULL, weight=1L, ...,
         yieldSize=2500000L)

Arguments

x

A GAlignments, GAlignmentPairs, GAlignmentsList, or BamFile object, or the path to a BAM file.

shift, width, weight

See coverage method for GRanges objects in the GenomicRanges package.

method

See ?coverage in the IRanges package for a description of this argument.

drop.D.ranges

Whether the coverage calculation should ignore ranges corresponding to D (deletion) in the CIGAR string.

...

Additional arguments passed to the coverage method for GAlignments objects.

param

An optional ScanBamParam object passed to readGAlignments.

yieldSize

An optional argument controlling how many records are input when iterating through a BamFile.

Details

The methods for GAlignments and GAlignmentPairs objects do:

  coverage(grglist(x, drop.D.ranges=drop.D.ranges), ...)

The method for GAlignmentsList objects does:

  coverage(unlist(x), ...)

The method for BamFile objects iterates through a BAM file, reading yieldSize(x) records (or all records, if is.na(yieldSize(x))) and calculating:

  gal <- readGAlignments(x, param=param)
  coverage(gal, shift=shift, width=width, weight=weight, ...)

The method for character vectors of length 1 creates a BamFile object from x and performs the calculation for coverage,BamFile-method.

Value

A named RleList object with one coverage vector per seqlevel in x.

See Also

  • coverage in the IRanges package.

  • coverage-methods in the GenomicRanges package.

  • RleList objects in the IRanges package.

  • GAlignments and GAlignmentPairs objects.

  • readGAlignments.

  • BamFile objects in the Rsamtools package.

Examples

## ---------------------------------------------------------------------
## A. EXAMPLE WITH TOY DATA
## ---------------------------------------------------------------------

ex1_file <- system.file("extdata", "ex1.bam", package="Rsamtools")

## Coverage of a GAlignments object:
gal <- readGAlignments(ex1_file)
cvg1 <- coverage(gal)
cvg1

## Coverage of a GAlignmentPairs object:
galp <- readGAlignmentPairs(ex1_file)
cvg2 <- coverage(galp)
cvg2

## Coverage of a GAlignmentsList object:
galist <- readGAlignmentsList(ex1_file)
cvg3 <- coverage(galist)
cvg3

table(mcols(galist)$mate_status)
mated_idx <- which(mcols(galist)$mate_status == "mated")
mated_galist <- galist[mated_idx]
mated_cvg3 <- coverage(mated_galist)
mated_cvg3

## Sanity checks:
stopifnot(identical(cvg1, cvg3))
stopifnot(identical( cvg2, mated_cvg3))

## ---------------------------------------------------------------------
## B. EXAMPLE WITH REAL DATA
## ---------------------------------------------------------------------

library(pasillaBamSubset)
## See '?pasillaBamSubset' for more information about the 2 BAM files
## included in this package.
reads <- readGAlignments(untreated3_chr4())
table(njunc(reads))  # data contains junction reads

## Junctions do NOT contribute to the coverage:
read1 <- reads[which(njunc(reads) != 0L)[1]]  # 1st read with a junction
read1  # cigar shows a "skipped region" of length 15306
grglist(read1)[[1]]  # the junction is between pos 4500 and 19807
coverage(read1)$chr4  # junction is not covered

## Sanity checks:
cvg <- coverage(reads)
read_chunks <- unlist(grglist(reads), use.names=FALSE)
read_chunks_per_chrom <- split(read_chunks, seqnames(read_chunks))
stopifnot(identical(sum(cvg), sum(width(read_chunks_per_chrom))))

galist <- readGAlignmentsList(untreated3_chr4())
stopifnot(identical(cvg, coverage(galist)))

Results


R version 3.3.1 (2016-06-21) -- "Bug in Your Hair"
Copyright (C) 2016 The R Foundation for Statistical Computing
Platform: x86_64-pc-linux-gnu (64-bit)

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.

> library(GenomicAlignments)
Loading required package: BiocGenerics
Loading required package: parallel

Attaching package: 'BiocGenerics'

The following objects are masked from 'package:parallel':

    clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
    clusterExport, clusterMap, parApply, parCapply, parLapply,
    parLapplyLB, parRapply, parSapply, parSapplyLB

The following objects are masked from 'package:stats':

    IQR, mad, xtabs

The following objects are masked from 'package:base':

    Filter, Find, Map, Position, Reduce, anyDuplicated, append,
    as.data.frame, cbind, colnames, do.call, duplicated, eval, evalq,
    get, grep, grepl, intersect, is.unsorted, lapply, lengths, mapply,
    match, mget, order, paste, pmax, pmax.int, pmin, pmin.int, rank,
    rbind, rownames, sapply, setdiff, sort, table, tapply, union,
    unique, unsplit

Loading required package: S4Vectors
Loading required package: stats4

Attaching package: 'S4Vectors'

The following objects are masked from 'package:base':

    colMeans, colSums, expand.grid, rowMeans, rowSums

Loading required package: IRanges
Loading required package: GenomeInfoDb
Loading required package: GenomicRanges
Loading required package: SummarizedExperiment
Loading required package: Biobase
Welcome to Bioconductor

    Vignettes contain introductory material; view with
    'browseVignettes()'. To cite Bioconductor, see
    'citation("Biobase")', and for packages 'citation("pkgname")'.

Loading required package: Biostrings
Loading required package: XVector
Loading required package: Rsamtools
> png(filename="/home/ddbj/snapshot/RGM3/R_BC/result/GenomicAlignments/coverage-methods.Rd_%03d_medium.png", width=480, height=480)
> ### Name: coverage-methods
> ### Title: Coverage of a GAlignments, GAlignmentPairs, or GAlignmentsList
> ###   object
> ### Aliases: coverage-methods coverage coverage,GAlignments-method
> ###   coverage,GAlignmentPairs-method coverage,GAlignmentsList-method
> ###   coverage,BamFile-method coverage,character-method
> ### Keywords: methods utilities
> 
> ### ** Examples
> 
> ## ---------------------------------------------------------------------
> ## A. EXAMPLE WITH TOY DATA
> ## ---------------------------------------------------------------------
> 
> ex1_file <- system.file("extdata", "ex1.bam", package="Rsamtools")
> 
> ## Coverage of a GAlignments object:
> gal <- readGAlignments(ex1_file)
> cvg1 <- coverage(gal)
> cvg1
RleList of length 2
$seq1
integer-Rle of length 1575 with 1056 runs
  Lengths:  2  2  1  3  4  2  3  4  2  4  1 ...  2  1  1  1  1  1  1  1  1  6
  Values :  1  2  3  4  5  7  8  9 11 12 13 ... 12 10  9  7  6  5  3  2  1  0

$seq2
integer-Rle of length 1584 with 1092 runs
  Lengths:  1  3  1  1  1  3  1  4  1  1  6 ...  1  1  1  1  2  1  4  4  1 17
  Values :  3  4  5  8 12 14 15 16 17 18 19 ... 14 13 10  8  7  6  3  2  1  0

> 
> ## Coverage of a GAlignmentPairs object:
> galp <- readGAlignmentPairs(ex1_file)
> cvg2 <- coverage(galp)
> cvg2
RleList of length 2
$seq1
integer-Rle of length 1575 with 1032 runs
  Lengths: 35  1  6  6  1  1  1  7  1  1  2 ...  2  1  1  1  1  1  1  1  1  6
  Values :  0  1  2  3  4  5  6  7  8 10 12 ... 12 10  9  7  6  5  3  2  1  0

$seq2
integer-Rle of length 1584 with 1056 runs
  Lengths: 36  2  1  1  2  1  1  1  1  1  2 ...  2  4  7  2  1  5  7  4  1 17
  Values :  0  2  3  4  6  9 10 12 14 15 16 ... 13 11 10  8  5  4  3  2  1  0

> 
> ## Coverage of a GAlignmentsList object:
> galist <- readGAlignmentsList(ex1_file)
> cvg3 <- coverage(galist)
> cvg3
RleList of length 2
$seq1
integer-Rle of length 1575 with 1056 runs
  Lengths:  2  2  1  3  4  2  3  4  2  4  1 ...  2  1  1  1  1  1  1  1  1  6
  Values :  1  2  3  4  5  7  8  9 11 12 13 ... 12 10  9  7  6  5  3  2  1  0

$seq2
integer-Rle of length 1584 with 1092 runs
  Lengths:  1  3  1  1  1  3  1  4  1  1  6 ...  1  1  1  1  2  1  4  4  1 17
  Values :  3  4  5  8 12 14 15 16 17 18 19 ... 14 13 10  8  7  6  3  2  1  0

> 
> table(mcols(galist)$mate_status)

    mated ambiguous   unmated 
     1572         0       127 
> mated_idx <- which(mcols(galist)$mate_status == "mated")
> mated_galist <- galist[mated_idx]
> mated_cvg3 <- coverage(mated_galist)
> mated_cvg3
RleList of length 2
$seq1
integer-Rle of length 1575 with 1032 runs
  Lengths: 35  1  6  6  1  1  1  7  1  1  2 ...  2  1  1  1  1  1  1  1  1  6
  Values :  0  1  2  3  4  5  6  7  8 10 12 ... 12 10  9  7  6  5  3  2  1  0

$seq2
integer-Rle of length 1584 with 1056 runs
  Lengths: 36  2  1  1  2  1  1  1  1  1  2 ...  2  4  7  2  1  5  7  4  1 17
  Values :  0  2  3  4  6  9 10 12 14 15 16 ... 13 11 10  8  5  4  3  2  1  0

> 
> ## Sanity checks:
> stopifnot(identical(cvg1, cvg3))
> stopifnot(identical( cvg2, mated_cvg3))
> 
> ## ---------------------------------------------------------------------
> ## B. EXAMPLE WITH REAL DATA
> ## ---------------------------------------------------------------------
> 
> library(pasillaBamSubset)
> ## See '?pasillaBamSubset' for more information about the 2 BAM files
> ## included in this package.
> reads <- readGAlignments(untreated3_chr4())
> table(njunc(reads))  # data contains junction reads

     0      1 
172529   2817 
> 
> ## Junctions do NOT contribute to the coverage:
> read1 <- reads[which(njunc(reads) != 0L)[1]]  # 1st read with a junction
> read1  # cigar shows a "skipped region" of length 15306
GAlignments object with 1 alignment and 0 metadata columns:
      seqnames strand       cigar    qwidth     start       end     width
         <Rle>  <Rle> <character> <integer> <integer> <integer> <integer>
  [1]     chr4      - 29M15306N8M        37      4472     19814     15343
          njunc
      <integer>
  [1]         1
  -------
  seqinfo: 8 sequences from an unspecified genome
> grglist(read1)[[1]]  # the junction is between pos 4500 and 19807
GRanges object with 2 ranges and 0 metadata columns:
      seqnames         ranges strand
         <Rle>      <IRanges>  <Rle>
  [1]     chr4 [ 4472,  4500]      -
  [2]     chr4 [19807, 19814]      -
  -------
  seqinfo: 8 sequences from an unspecified genome
> coverage(read1)$chr4  # junction is not covered
integer-Rle of length 1351857 with 5 runs
  Lengths:    4471      29   15306       8 1332043
  Values :       0       1       0       1       0
> 
> ## Sanity checks:
> cvg <- coverage(reads)
> read_chunks <- unlist(grglist(reads), use.names=FALSE)
> read_chunks_per_chrom <- split(read_chunks, seqnames(read_chunks))
> stopifnot(identical(sum(cvg), sum(width(read_chunks_per_chrom))))
> 
> galist <- readGAlignmentsList(untreated3_chr4())
> stopifnot(identical(cvg, coverage(galist)))
> 
> 
> 
> 
> 
> dev.off()
null device 
          1 
>