Last data update: 2014.03.03

R: bsapply
bsapplyR Documentation

bsapply

Description

Apply a function to each chromosome in a genome.

Usage

bsapply(BSParams, ...)

Arguments

BSParams

a BSParams object that holds the various parameters needed to configure the bsapply function

...

optional arguments to 'FUN'.

Details

By default the exclude parameter is set to not exclude anything. A popular option will probably be to set this to "rand" so that random bits of unassigned contigs are filtered out.

Value

If BSParams sets simplify=FALSE, an ordinary list is returned containing the results generated using the remaining BSParams specifications. If BSParams sets simplify=TRUE, an sapply-like simplification is performed on the results.

Author(s)

Marc Carlson

See Also

BSParams-class, BSgenome-class, BSgenome-utils

Examples

  ## Load the Worm genome:
  library("BSgenome.Celegans.UCSC.ce2")

  ## Count the alphabet frequencies for every chromosome but exclude
  ## mitochrondrial and scaffold ones:
  params <- new("BSParams", X = Celegans, FUN = alphabetFrequency,
  exclude = c("M", "_"))
  bsapply(params)

  ## Or we can do this same function with simplify = TRUE:
  params <- new("BSParams", X = Celegans, FUN = alphabetFrequency,
  exclude = c("M", "_"), simplify = TRUE)
  bsapply(params)


  ## Examples to show how we might look for a string (in this case an
  ## ebox motif) across the whole genome.  
  Ebox <- DNAStringSet("CACGTG")
  pdict0 <- PDict(Ebox)

  params <- new("BSParams", X = Celegans, FUN = countPDict, simplify = TRUE)
  bsapply(params, pdict = pdict0)

  params@FUN <- matchPDict
  bsapply(params, pdict = pdict0)

  ## And since its really overkill to use matchPDict to find a single pattern:
  params@FUN <- matchPattern
  bsapply(params, pattern = "CACGTG")


  ## Examples on how to use the masks
  library(BSgenome.Hsapiens.UCSC.hg38.masked)
  genome <- BSgenome.Hsapiens.UCSC.hg38.masked
  ## I can make things verbose if I want to see the chromosomes getting processed.
  options(verbose=TRUE)
  ## For the 1st example, lets use default masks
  params <- new("BSParams", X = genome, FUN = alphabetFrequency,
  exclude = c(1:8,"M","X","_"), simplify = TRUE)
  bsapply(params)

  if (interactive()) {
    ## Set up the motifList to filter out all double T's and all double C's
    params@motifList <-c("TT","CC")
    bsapply(params)

    ## Get rid of the motifList
    params@motifList=as.character()
  }

  ##Enable all standard masks
  params@maskList <- c(RM=TRUE,TRF=TRUE)
  bsapply(params)

  ##Disable all standard masks
  params@maskList <- c(AGAPS=FALSE,AMB=FALSE)
  bsapply(params)

Results


R version 3.3.1 (2016-06-21) -- "Bug in Your Hair"
Copyright (C) 2016 The R Foundation for Statistical Computing
Platform: x86_64-pc-linux-gnu (64-bit)

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.

> library(BSgenome)
Loading required package: BiocGenerics
Loading required package: parallel

Attaching package: 'BiocGenerics'

The following objects are masked from 'package:parallel':

    clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
    clusterExport, clusterMap, parApply, parCapply, parLapply,
    parLapplyLB, parRapply, parSapply, parSapplyLB

The following objects are masked from 'package:stats':

    IQR, mad, xtabs

The following objects are masked from 'package:base':

    Filter, Find, Map, Position, Reduce, anyDuplicated, append,
    as.data.frame, cbind, colnames, do.call, duplicated, eval, evalq,
    get, grep, grepl, intersect, is.unsorted, lapply, lengths, mapply,
    match, mget, order, paste, pmax, pmax.int, pmin, pmin.int, rank,
    rbind, rownames, sapply, setdiff, sort, table, tapply, union,
    unique, unsplit

Loading required package: S4Vectors
Loading required package: stats4

Attaching package: 'S4Vectors'

The following objects are masked from 'package:base':

    colMeans, colSums, expand.grid, rowMeans, rowSums

Loading required package: IRanges
Loading required package: GenomeInfoDb
Loading required package: GenomicRanges
Loading required package: Biostrings
Loading required package: XVector
Loading required package: rtracklayer
> png(filename="/home/ddbj/snapshot/RGM3/R_BC/result/BSgenome/bsapply.Rd_%03d_medium.png", width=480, height=480)
> ### Name: bsapply
> ### Title: bsapply
> ### Aliases: bsapply
> ### Keywords: manip
> 
> ### ** Examples
> 
>   ## Load the Worm genome:
>   library("BSgenome.Celegans.UCSC.ce2")
> 
>   ## Count the alphabet frequencies for every chromosome but exclude
>   ## mitochrondrial and scaffold ones:
>   params <- new("BSParams", X = Celegans, FUN = alphabetFrequency,
+   exclude = c("M", "_"))
>   bsapply(params)
$chrI
      A       C       G       T       M       R       W       S       Y       K 
4838561 2697177 2693544 4851201       0       0       0       0       0       0 
      V       H       D       B       N       -       +       . 
      0       0       0       0       0       0       0       0 

$chrII
      A       C       G       T       M       R       W       S       Y       K 
4878194 2769208 2762193 4869710       0       0       0       0       0       0 
      V       H       D       B       N       -       +       . 
      0       0       0       0       3       0       0       0 

$chrIII
      A       C       G       T       M       R       W       S       Y       K 
4444527 2449074 2466260 4423447       0       0       0       0       0       0 
      V       H       D       B       N       -       +       . 
      0       0       0       0       5       0       0       0 

$chrIV
      A       C       G       T       M       R       W       S       Y       K 
5711041 3034771 3017009 5730970       0       0       0       0       0       0 
      V       H       D       B       N       -       +       . 
      0       0       0       0       0       0       0       0 

$chrV
      A       C       G       T       M       R       W       S       Y       K 
6749806 3711722 3700959 6759744       0       0       0       0       0       0 
      V       H       D       B       N       -       +       . 
      0       0       0       0       0       0       0       0 

$chrX
      A       C       G       T       M       R       W       S       Y       K 
5746418 3119282 3118284 5734865       0       0       0       0       0       0 
      V       H       D       B       N       -       +       . 
      0       0       0       0       0       0       0       0 

> 
>   ## Or we can do this same function with simplify = TRUE:
>   params <- new("BSParams", X = Celegans, FUN = alphabetFrequency,
+   exclude = c("M", "_"), simplify = TRUE)
>   bsapply(params)
     chrI   chrII  chrIII   chrIV    chrV    chrX
A 4838561 4878194 4444527 5711041 6749806 5746418
C 2697177 2769208 2449074 3034771 3711722 3119282
G 2693544 2762193 2466260 3017009 3700959 3118284
T 4851201 4869710 4423447 5730970 6759744 5734865
M       0       0       0       0       0       0
R       0       0       0       0       0       0
W       0       0       0       0       0       0
S       0       0       0       0       0       0
Y       0       0       0       0       0       0
K       0       0       0       0       0       0
V       0       0       0       0       0       0
H       0       0       0       0       0       0
D       0       0       0       0       0       0
B       0       0       0       0       0       0
N       0       3       5       0       0       0
-       0       0       0       0       0       0
+       0       0       0       0       0       0
.       0       0       0       0       0       0
> 
> 
>   ## Examples to show how we might look for a string (in this case an
>   ## ebox motif) across the whole genome.  
>   Ebox <- DNAStringSet("CACGTG")
>   pdict0 <- PDict(Ebox)
> 
>   params <- new("BSParams", X = Celegans, FUN = countPDict, simplify = TRUE)
>   bsapply(params, pdict = pdict0)
  chrI  chrII chrIII  chrIV   chrV   chrX   chrM 
  3369   2698   2612   2997   3588   2466      1 
> 
>   params@FUN <- matchPDict
>   bsapply(params, pdict = pdict0)
$chrI
MIndex object of length 1
[[1]]
IRanges object with 3369 ranges and 0 metadata columns:
             start       end     width
         <integer> <integer> <integer>
     [1]      2260      2265         6
     [2]      4920      4925         6
     [3]      8074      8079         6
     [4]     17729     17734         6
     [5]     18576     18581         6
     ...       ...       ...       ...
  [3365]  15062401  15062406         6
  [3366]  15065488  15065493         6
  [3367]  15066587  15066592         6
  [3368]  15066627  15066632         6
  [3369]  15074448  15074453         6


$chrII
MIndex object of length 1
[[1]]
IRanges object with 2698 ranges and 0 metadata columns:
             start       end     width
         <integer> <integer> <integer>
     [1]     28734     28739         6
     [2]     29032     29037         6
     [3]     35849     35854         6
     [4]     39084     39089         6
     [5]     43244     43249         6
     ...       ...       ...       ...
  [2694]  15271300  15271305         6
  [2695]  15273056  15273061         6
  [2696]  15273761  15273766         6
  [2697]  15275492  15275497         6
  [2698]  15277856  15277861         6


$chrIII
MIndex object of length 1
[[1]]
IRanges object with 2612 ranges and 0 metadata columns:
             start       end     width
         <integer> <integer> <integer>
     [1]      7323      7328         6
     [2]     11819     11824         6
     [3]     17156     17161         6
     [4]     24300     24305         6
     [5]     26324     26329         6
     ...       ...       ...       ...
  [2608]  13760197  13760202         6
  [2609]  13766607  13766612         6
  [2610]  13768348  13768353         6
  [2611]  13769745  13769750         6
  [2612]  13772637  13772642         6


$chrIV
MIndex object of length 1
[[1]]
IRanges object with 2997 ranges and 0 metadata columns:
             start       end     width
         <integer> <integer> <integer>
     [1]      5606      5611         6
     [2]      6675      6680         6
     [3]      8409      8414         6
     [4]      9438      9443         6
     [5]     18765     18770         6
     ...       ...       ...       ...
  [2993]  17458897  17458902         6
  [2994]  17479117  17479122         6
  [2995]  17488757  17488762         6
  [2996]  17490208  17490213         6
  [2997]  17490673  17490678         6


$chrV
MIndex object of length 1
[[1]]
IRanges object with 3588 ranges and 0 metadata columns:
             start       end     width
         <integer> <integer> <integer>
     [1]       605       610         6
     [2]      3531      3536         6
     [3]      5078      5083         6
     [4]      5128      5133         6
     [5]      6038      6043         6
     ...       ...       ...       ...
  [3584]  20891341  20891346         6
  [3585]  20894142  20894147         6
  [3586]  20897746  20897751         6
  [3587]  20899040  20899045         6
  [3588]  20921084  20921089         6


$chrX
MIndex object of length 1
[[1]]
IRanges object with 2466 ranges and 0 metadata columns:
             start       end     width
         <integer> <integer> <integer>
     [1]      7656      7661         6
     [2]     26993     26998         6
     [3]     37580     37585         6
     [4]     39506     39511         6
     [5]     56687     56692         6
     ...       ...       ...       ...
  [2462]  17708492  17708497         6
  [2463]  17708649  17708654         6
  [2464]  17709026  17709031         6
  [2465]  17712705  17712710         6
  [2466]  17716966  17716971         6


$chrM
MIndex object of length 1
[[1]]
IRanges object with 1 range and 0 metadata columns:
          start       end     width
      <integer> <integer> <integer>
  [1]      8767      8772         6


> 
>   ## And since its really overkill to use matchPDict to find a single pattern:
>   params@FUN <- matchPattern
>   bsapply(params, pattern = "CACGTG")
$chrI
  Views on a 15080483-letter DNAString subject
subject: GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT...AGGCTTAGGCTTAGGCTTAGGTTTAGGCTTAGGC
views:
          start      end width
   [1]     2260     2265     6 [CACGTG]
   [2]     4920     4925     6 [CACGTG]
   [3]     8074     8079     6 [CACGTG]
   [4]    17729    17734     6 [CACGTG]
   [5]    18576    18581     6 [CACGTG]
   ...      ...      ...   ... ...
[3365] 15062401 15062406     6 [CACGTG]
[3366] 15065488 15065493     6 [CACGTG]
[3367] 15066587 15066592     6 [CACGTG]
[3368] 15066627 15066632     6 [CACGTG]
[3369] 15074448 15074453     6 [CACGTG]

$chrII
  Views on a 15279308-letter DNAString subject
subject: CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA...TAGGCTGAGACTTAGGCTTAGGCTTAGGCTTAGT
views:
          start      end width
   [1]    28734    28739     6 [CACGTG]
   [2]    29032    29037     6 [CACGTG]
   [3]    35849    35854     6 [CACGTG]
   [4]    39084    39089     6 [CACGTG]
   [5]    43244    43249     6 [CACGTG]
   ...      ...      ...   ... ...
[2694] 15271300 15271305     6 [CACGTG]
[2695] 15273056 15273061     6 [CACGTG]
[2696] 15273761 15273766     6 [CACGTG]
[2697] 15275492 15275497     6 [CACGTG]
[2698] 15277856 15277861     6 [CACGTG]

$chrIII
  Views on a 13783313-letter DNAString subject
subject: CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA...TAGGCTTAGGCTTAGGCTTAGGCTTAGGCTTAGG
views:
          start      end width
   [1]     7323     7328     6 [CACGTG]
   [2]    11819    11824     6 [CACGTG]
   [3]    17156    17161     6 [CACGTG]
   [4]    24300    24305     6 [CACGTG]
   [5]    26324    26329     6 [CACGTG]
   ...      ...      ...   ... ...
[2608] 13760197 13760202     6 [CACGTG]
[2609] 13766607 13766612     6 [CACGTG]
[2610] 13768348 13768353     6 [CACGTG]
[2611] 13769745 13769750     6 [CACGTG]
[2612] 13772637 13772642     6 [CACGTG]

$chrIV
  Views on a 17493791-letter DNAString subject
subject: CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA...TAGGCTTAGGCTTAGGCTTAGGCTTAGGCTTAGG
views:
          start      end width
   [1]     5606     5611     6 [CACGTG]
   [2]     6675     6680     6 [CACGTG]
   [3]     8409     8414     6 [CACGTG]
   [4]     9438     9443     6 [CACGTG]
   [5]    18765    18770     6 [CACGTG]
   ...      ...      ...   ... ...
[2993] 17458897 17458902     6 [CACGTG]
[2994] 17479117 17479122     6 [CACGTG]
[2995] 17488757 17488762     6 [CACGTG]
[2996] 17490208 17490213     6 [CACGTG]
[2997] 17490673 17490678     6 [CACGTG]

$chrV
  Views on a 20922231-letter DNAString subject
subject: GAATTCCTAAGCCTAAGCCTAAGCCTAAGCCTAA...CTTAGGCTTAGGCTTAGGCTTAGGCTTAGGCTTA
views:
          start      end width
   [1]      605      610     6 [CACGTG]
   [2]     3531     3536     6 [CACGTG]
   [3]     5078     5083     6 [CACGTG]
   [4]     5128     5133     6 [CACGTG]
   [5]     6038     6043     6 [CACGTG]
   ...      ...      ...   ... ...
[3584] 20891341 20891346     6 [CACGTG]
[3585] 20894142 20894147     6 [CACGTG]
[3586] 20897746 20897751     6 [CACGTG]
[3587] 20899040 20899045     6 [CACGTG]
[3588] 20921084 20921089     6 [CACGTG]

$chrX
  Views on a 17718849-letter DNAString subject
subject: CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA...TAGGCTTAGGCTTAGGCTTAGGCTTAGGCTTAGG
views:
          start      end width
   [1]     7656     7661     6 [CACGTG]
   [2]    26993    26998     6 [CACGTG]
   [3]    37580    37585     6 [CACGTG]
   [4]    39506    39511     6 [CACGTG]
   [5]    56687    56692     6 [CACGTG]
   ...      ...      ...   ... ...
[2462] 17708492 17708497     6 [CACGTG]
[2463] 17708649 17708654     6 [CACGTG]
[2464] 17709026 17709031     6 [CACGTG]
[2465] 17712705 17712710     6 [CACGTG]
[2466] 17716966 17716971     6 [CACGTG]

$chrM
  Views on a 13794-letter DNAString subject
subject: CAGTAAATAGTTTAATAAAAATATAGCATTTGGG...TTTATAGATATATACTTTGTATATATCTATATTA
views:
    start  end width
[1]  8767 8772     6 [CACGTG]

> 
> 
>   ## Examples on how to use the masks
>   library(BSgenome.Hsapiens.UCSC.hg38.masked)
Loading required package: BSgenome.Hsapiens.UCSC.hg38
>   genome <- BSgenome.Hsapiens.UCSC.hg38.masked
>   ## I can make things verbose if I want to see the chromosomes getting processed.
>   options(verbose=TRUE)
>   ## For the 1st example, lets use default masks
>   params <- new("BSParams", X = genome, FUN = alphabetFrequency,
+   exclude = c(1:8,"M","X","_"), simplify = TRUE)
>   bsapply(params)
uncaching chrV
uncaching chrIV
uncaching chrIII
uncaching chrII
uncaching chrI
uncaching chrM
uncaching chrX
caching chr9
caching chrY
      chr9    chrY
A 35736329 7886192
C 25099811 5285789
G 25170662 5286894
T 35783748 7956168
M        0       0
R        0       0
W        0       0
S        0       0
Y        0       0
K        0       0
V        0       0
H        0       0
D        0       0
B        0       0
N        0       0
-        0       0
+        0       0
.        0       0
> 
> #  if (interactive()) {
>     ## Set up the motifList to filter out all double T's and all double C's
>     params@motifList <-c("TT","CC")
>     bsapply(params)
uncaching chrY
caching chrY
uncaching chr9
      chr9    chrY
A 35736329 7886192
C 13903490 2988946
G 25170662 5286894
T 16755064 3698131
M        0       0
R        0       0
W        0       0
S        0       0
Y        0       0
K        0       0
V        0       0
H        0       0
D        0       0
B        0       0
N        0       0
-        0       0
+        0       0
.        0       0
> 
>     ## Get rid of the motifList
>     params@motifList=as.character()
> #  }
> 
>   ##Enable all standard masks
>   params@maskList <- c(RM=TRUE,TRF=TRUE)
>   bsapply(params)
caching chr9
      chr9    chrY
A 17364609 2994088
C 12067803 1876822
G 12063390 1889305
T 17335711 3002884
M        0       0
R        0       0
W        0       0
S        0       0
Y        0       0
K        0       0
V        0       0
H        0       0
D        0       0
B        0       0
N        0       0
-        0       0
+        0       0
.        0       0
> 
>   ##Disable all standard masks
>   params@maskList <- c(AGAPS=FALSE,AMB=FALSE)
>   bsapply(params)
      chr9     chrY
A 35736329  7886192
C 25099811  5285789
G 25170662  5286894
T 35783748  7956168
M        0        0
R        0        0
W        0        0
S        0        0
Y        0        0
K        0        0
V        0        0
H        0        0
D        0        0
B        0        0
N 16604167 30812372
-        0        0
+        0        0
.        0        0
> 
> 
> 
> 
> 
> dev.off()
null device 
          1 
>