Last data update: 2014.03.03

R: tabulate counts of alignments occurring in specified genomic...
tabulateReadsR Documentation

tabulate counts of alignments occurring in specified genomic regions

Description

tabulate counts of alignments occurring in specified genomic regions

Usage

tabulateReads(bv, strandmarker=NULL, as.GRanges=FALSE, applier=lapply)

Arguments

bv

BamViews-class instance

strandmarker

character atom: ‘+’ or ‘-’; if missing, ignore strand

as.GRanges

logical directive to return a GRanges instance instead of a matrix

applier

lapply-like function; if unspecified and multicore is attached will use mclapply

Details

readGAlignments is the basic engine for this task

Value

annotated matrix with start, end, and samples as rows, regions as columns, and read counts as cell entries

Author(s)

VJ Carey <stvjc@channing.harvard.edu>

Examples

example(bs1)
#
# counts in a partition
#
myrn = GRanges(IRanges(start=seq(861250, 862750, 100), width=100),
    seqnames="Scchr13", strand="+")

values(myrn)$name = paste("til", 1:length(myrn), sep=".")
bamRanges(bs1) = myrn
tabulateReads(bs1, "+")
#
# a related computation based on countBam
lapply(bamPaths(bs1)[1:2], function(x) 
    countBam(x,  param=ScanBamParam(which=bamRanges(bs1))))

Results


R version 3.3.1 (2016-06-21) -- "Bug in Your Hair"
Copyright (C) 2016 The R Foundation for Statistical Computing
Platform: x86_64-pc-linux-gnu (64-bit)

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.

> library(leeBamViews)
Loading required package: Biobase
Loading required package: BiocGenerics
Loading required package: parallel

Attaching package: 'BiocGenerics'

The following objects are masked from 'package:parallel':

    clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
    clusterExport, clusterMap, parApply, parCapply, parLapply,
    parLapplyLB, parRapply, parSapply, parSapplyLB

The following objects are masked from 'package:stats':

    IQR, mad, xtabs

The following objects are masked from 'package:base':

    Filter, Find, Map, Position, Reduce, anyDuplicated, append,
    as.data.frame, cbind, colnames, do.call, duplicated, eval, evalq,
    get, grep, grepl, intersect, is.unsorted, lapply, lengths, mapply,
    match, mget, order, paste, pmax, pmax.int, pmin, pmin.int, rank,
    rbind, rownames, sapply, setdiff, sort, table, tapply, union,
    unique, unsplit

Welcome to Bioconductor

    Vignettes contain introductory material; view with
    'browseVignettes()'. To cite Bioconductor, see
    'citation("Biobase")', and for packages 'citation("pkgname")'.

Loading required package: Rsamtools
Loading required package: GenomeInfoDb
Loading required package: stats4
Loading required package: S4Vectors

Attaching package: 'S4Vectors'

The following objects are masked from 'package:base':

    colMeans, colSums, expand.grid, rowMeans, rowSums

Loading required package: IRanges
Loading required package: GenomicRanges
Loading required package: Biostrings
Loading required package: XVector
Loading required package: BSgenome
Loading required package: rtracklayer
> png(filename="/home/ddbj/snapshot/RGM3/R_BC/result/leeBamViews/tabulateReads.Rd_%03d_medium.png", width=480, height=480)
> ### Name: tabulateReads
> ### Title: tabulate counts of alignments occurring in specified genomic
> ###   regions
> ### Aliases: tabulateReads
> ###   tabulateReads,BamViews,characterORNULL,logical,function-method
> ###   tabulateReads,BamViews,characterORNULL,missing,missing-method
> ###   tabulateReads,BamViews,missing,missing,missing-method
> ### Keywords: models
> 
> ### ** Examples
> 
> example(bs1)

bs1> library(leeBamViews)  # bam files stored in package

bs1> bpaths = dir(system.file("bam", package="leeBamViews"), full=TRUE, patt="bam$")

bs1> #
bs1> # extract genotype and lane information from filenames
bs1> #
bs1> gt = gsub(".*/", "", bpaths)

bs1> gt = gsub("_.*", "", gt)

bs1> lane = gsub(".*(.)$", "\1", gt)

bs1> geno = gsub(".$", "", gt)

bs1> #
bs1> # format the sample-level information appropriately
bs1> #
bs1> pd = DataFrame(geno=geno, lane=lane, row.names=paste(geno,lane,sep="."))

bs1> prd = new("DataFrame")  # protocol data could go here

bs1> #
bs1> # create the views object, adding some arbitrary experiment-level information
bs1> #
bs1> bs1 = BamViews(bamPaths=bpaths, bamSamples=pd,
bs1+         bamExperiment=list(annotation="org.Sc.sgd.db"))

bs1> bs1
BamViews dim: 0 ranges x 8 samples 
names: isowt.5 isowt.6 ... xrn.1 xrn.2 
detail: use bamPaths(), bamSamples(), bamRanges(), ... 

bs1> # add ranges and tabulate reads
bs1> 
bs1> START=c(861250, 863000)

bs1> END=c(862750, 864000)

bs1> exc = GRanges(IRanges(start=START, end=END), seqnames="Scchr13", strand="+")

bs1> values(exc)$name = c("intv1", "intv2")  # necessary

bs1> bamRanges(bs1) = exc

bs1> bs1
BamViews dim: 2 ranges x 8 samples 
names: isowt.5 isowt.6 ... xrn.1 xrn.2 
detail: use bamPaths(), bamSamples(), bamRanges(), ... 

bs1> tabulateReads(bs1, "+")
         intv1  intv2
start   861250 863000
end     862750 864000
isowt.5   3673   2697
isowt.6   3770   2653
rlp.5     1532   1047
rlp.6     1567   1140
ssr.1     4304   3065
ssr.2     4627   3388
xrn.1     2841   1695
xrn.2     3477   2199
> #
> # counts in a partition
> #
> myrn = GRanges(IRanges(start=seq(861250, 862750, 100), width=100),
+     seqnames="Scchr13", strand="+")
> 
> values(myrn)$name = paste("til", 1:length(myrn), sep=".")
> bamRanges(bs1) = myrn
> tabulateReads(bs1, "+")
         til.1  til.2  til.3  til.4  til.5  til.6  til.7  til.8  til.9 til.10
start   861250 861350 861450 861550 861650 861750 861850 861950 862050 862150
end     861349 861449 861549 861649 861749 861849 861949 862049 862149 862249
isowt.5      1      1      3      6      2      7    299    605    408    380
isowt.6      2      6      9     12      7      4    306    666    458    382
rlp.5        1      5     65     53     36     11    158    247    186    145
rlp.6        3      2     47     48     37     16    123    238    163    159
ssr.1        2      6     35     27     21      8    423    700    541    496
ssr.2        2      6     43     37     26     13    443    839    616    509
xrn.1        7      8     75     78     24      5    180    446    357    288
xrn.2        4      9     96    110     31      8    225    611    465    356
        til.11 til.12 til.13 til.14 til.15 til.16
start   862250 862350 862450 862550 862650 862750
end     862349 862449 862549 862649 862749 862849
isowt.5    482    554    895    631    643    702
isowt.6    446    517    870    689    691    701
rlp.5      174    180    316    251    239    277
rlp.6      190    215    336    270    269    281
ssr.1      573    596    966    737    669    771
ssr.2      576    606    987    775    742    811
xrn.1      349    484    678    549    396    342
xrn.2      430    578    837    643    453    420
> #
> # a related computation based on countBam
> lapply(bamPaths(bs1)[1:2], function(x) 
+     countBam(x,  param=ScanBamParam(which=bamRanges(bs1))))
$isowt.5
     space  start    end width           file records nucleotides
1  Scchr13 861250 861349   100 isowt5_13e.bam       6         216
2  Scchr13 861350 861449   100 isowt5_13e.bam       4         144
3  Scchr13 861450 861549   100 isowt5_13e.bam      10         360
4  Scchr13 861550 861649   100 isowt5_13e.bam      12         432
5  Scchr13 861650 861749   100 isowt5_13e.bam       5         180
6  Scchr13 861750 861849   100 isowt5_13e.bam      18         648
7  Scchr13 861850 861949   100 isowt5_13e.bam     431       15516
8  Scchr13 861950 862049   100 isowt5_13e.bam    1202       43272
9  Scchr13 862050 862149   100 isowt5_13e.bam     973       35028
10 Scchr13 862150 862249   100 isowt5_13e.bam     770       27720
11 Scchr13 862250 862349   100 isowt5_13e.bam     956       34416
12 Scchr13 862350 862449   100 isowt5_13e.bam     969       34884
13 Scchr13 862450 862549   100 isowt5_13e.bam    1588       57168
14 Scchr13 862550 862649   100 isowt5_13e.bam    1192       42912
15 Scchr13 862650 862749   100 isowt5_13e.bam    1220       43920
16 Scchr13 862750 862849   100 isowt5_13e.bam    1348       48528

$isowt.6
     space  start    end width           file records nucleotides
1  Scchr13 861250 861349   100 isowt6_13e.bam       4         144
2  Scchr13 861350 861449   100 isowt6_13e.bam       8         288
3  Scchr13 861450 861549   100 isowt6_13e.bam      10         360
4  Scchr13 861550 861649   100 isowt6_13e.bam      17         612
5  Scchr13 861650 861749   100 isowt6_13e.bam      14         504
6  Scchr13 861750 861849   100 isowt6_13e.bam      11         396
7  Scchr13 861850 861949   100 isowt6_13e.bam     436       15696
8  Scchr13 861950 862049   100 isowt6_13e.bam    1203       43308
9  Scchr13 862050 862149   100 isowt6_13e.bam    1007       36252
10 Scchr13 862150 862249   100 isowt6_13e.bam     758       27288
11 Scchr13 862250 862349   100 isowt6_13e.bam     940       33840
12 Scchr13 862350 862449   100 isowt6_13e.bam     898       32328
13 Scchr13 862450 862549   100 isowt6_13e.bam    1517       54612
14 Scchr13 862550 862649   100 isowt6_13e.bam    1323       47628
15 Scchr13 862650 862749   100 isowt6_13e.bam    1265       45540
16 Scchr13 862750 862849   100 isowt6_13e.bam    1300       46800

> 
> 
> 
> 
> 
> dev.off()
null device 
          1 
>