a BSParams object that holds the various parameters needed to
configure the bsapply function
...
optional arguments to 'FUN'.
Details
By default the exclude parameter is set to not exclude anything. A
popular option will probably be to set this to "rand" so that random
bits of unassigned contigs are filtered out.
Value
If BSParams sets simplify=FALSE, an ordinary list is returned
containing the results generated using the remaining BSParams specifications.
If BSParams sets simplify=TRUE, an sapply-like
simplification is performed on the results.
Author(s)
Marc Carlson
See Also
BSParams-class, BSgenome-class, BSgenome-utils
Examples
## Load the Worm genome:
library("BSgenome.Celegans.UCSC.ce2")
## Count the alphabet frequencies for every chromosome but exclude
## mitochrondrial and scaffold ones:
params <- new("BSParams", X = Celegans, FUN = alphabetFrequency,
exclude = c("M", "_"))
bsapply(params)
## Or we can do this same function with simplify = TRUE:
params <- new("BSParams", X = Celegans, FUN = alphabetFrequency,
exclude = c("M", "_"), simplify = TRUE)
bsapply(params)
## Examples to show how we might look for a string (in this case an
## ebox motif) across the whole genome.
Ebox <- DNAStringSet("CACGTG")
pdict0 <- PDict(Ebox)
params <- new("BSParams", X = Celegans, FUN = countPDict, simplify = TRUE)
bsapply(params, pdict = pdict0)
params@FUN <- matchPDict
bsapply(params, pdict = pdict0)
## And since its really overkill to use matchPDict to find a single pattern:
params@FUN <- matchPattern
bsapply(params, pattern = "CACGTG")
## Examples on how to use the masks
library(BSgenome.Hsapiens.UCSC.hg38.masked)
genome <- BSgenome.Hsapiens.UCSC.hg38.masked
## I can make things verbose if I want to see the chromosomes getting processed.
options(verbose=TRUE)
## For the 1st example, lets use default masks
params <- new("BSParams", X = genome, FUN = alphabetFrequency,
exclude = c(1:8,"M","X","_"), simplify = TRUE)
bsapply(params)
if (interactive()) {
## Set up the motifList to filter out all double T's and all double C's
params@motifList <-c("TT","CC")
bsapply(params)
## Get rid of the motifList
params@motifList=as.character()
}
##Enable all standard masks
params@maskList <- c(RM=TRUE,TRF=TRUE)
bsapply(params)
##Disable all standard masks
params@maskList <- c(AGAPS=FALSE,AMB=FALSE)
bsapply(params)
Results
R version 3.3.1 (2016-06-21) -- "Bug in Your Hair"
Copyright (C) 2016 The R Foundation for Statistical Computing
Platform: x86_64-pc-linux-gnu (64-bit)
R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.
R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.
Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.
> library(BSgenome)
Loading required package: BiocGenerics
Loading required package: parallel
Attaching package: 'BiocGenerics'
The following objects are masked from 'package:parallel':
clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
clusterExport, clusterMap, parApply, parCapply, parLapply,
parLapplyLB, parRapply, parSapply, parSapplyLB
The following objects are masked from 'package:stats':
IQR, mad, xtabs
The following objects are masked from 'package:base':
Filter, Find, Map, Position, Reduce, anyDuplicated, append,
as.data.frame, cbind, colnames, do.call, duplicated, eval, evalq,
get, grep, grepl, intersect, is.unsorted, lapply, lengths, mapply,
match, mget, order, paste, pmax, pmax.int, pmin, pmin.int, rank,
rbind, rownames, sapply, setdiff, sort, table, tapply, union,
unique, unsplit
Loading required package: S4Vectors
Loading required package: stats4
Attaching package: 'S4Vectors'
The following objects are masked from 'package:base':
colMeans, colSums, expand.grid, rowMeans, rowSums
Loading required package: IRanges
Loading required package: GenomeInfoDb
Loading required package: GenomicRanges
Loading required package: Biostrings
Loading required package: XVector
Loading required package: rtracklayer
> png(filename="/home/ddbj/snapshot/RGM3/R_BC/result/BSgenome/bsapply.Rd_%03d_medium.png", width=480, height=480)
> ### Name: bsapply
> ### Title: bsapply
> ### Aliases: bsapply
> ### Keywords: manip
>
> ### ** Examples
>
> ## Load the Worm genome:
> library("BSgenome.Celegans.UCSC.ce2")
>
> ## Count the alphabet frequencies for every chromosome but exclude
> ## mitochrondrial and scaffold ones:
> params <- new("BSParams", X = Celegans, FUN = alphabetFrequency,
+ exclude = c("M", "_"))
> bsapply(params)
$chrI
A C G T M R W S Y K
4838561 2697177 2693544 4851201 0 0 0 0 0 0
V H D B N - + .
0 0 0 0 0 0 0 0
$chrII
A C G T M R W S Y K
4878194 2769208 2762193 4869710 0 0 0 0 0 0
V H D B N - + .
0 0 0 0 3 0 0 0
$chrIII
A C G T M R W S Y K
4444527 2449074 2466260 4423447 0 0 0 0 0 0
V H D B N - + .
0 0 0 0 5 0 0 0
$chrIV
A C G T M R W S Y K
5711041 3034771 3017009 5730970 0 0 0 0 0 0
V H D B N - + .
0 0 0 0 0 0 0 0
$chrV
A C G T M R W S Y K
6749806 3711722 3700959 6759744 0 0 0 0 0 0
V H D B N - + .
0 0 0 0 0 0 0 0
$chrX
A C G T M R W S Y K
5746418 3119282 3118284 5734865 0 0 0 0 0 0
V H D B N - + .
0 0 0 0 0 0 0 0
>
> ## Or we can do this same function with simplify = TRUE:
> params <- new("BSParams", X = Celegans, FUN = alphabetFrequency,
+ exclude = c("M", "_"), simplify = TRUE)
> bsapply(params)
chrI chrII chrIII chrIV chrV chrX
A 4838561 4878194 4444527 5711041 6749806 5746418
C 2697177 2769208 2449074 3034771 3711722 3119282
G 2693544 2762193 2466260 3017009 3700959 3118284
T 4851201 4869710 4423447 5730970 6759744 5734865
M 0 0 0 0 0 0
R 0 0 0 0 0 0
W 0 0 0 0 0 0
S 0 0 0 0 0 0
Y 0 0 0 0 0 0
K 0 0 0 0 0 0
V 0 0 0 0 0 0
H 0 0 0 0 0 0
D 0 0 0 0 0 0
B 0 0 0 0 0 0
N 0 3 5 0 0 0
- 0 0 0 0 0 0
+ 0 0 0 0 0 0
. 0 0 0 0 0 0
>
>
> ## Examples to show how we might look for a string (in this case an
> ## ebox motif) across the whole genome.
> Ebox <- DNAStringSet("CACGTG")
> pdict0 <- PDict(Ebox)
>
> params <- new("BSParams", X = Celegans, FUN = countPDict, simplify = TRUE)
> bsapply(params, pdict = pdict0)
chrI chrII chrIII chrIV chrV chrX chrM
3369 2698 2612 2997 3588 2466 1
>
> params@FUN <- matchPDict
> bsapply(params, pdict = pdict0)
$chrI
MIndex object of length 1
[[1]]
IRanges object with 3369 ranges and 0 metadata columns:
start end width
<integer> <integer> <integer>
[1] 2260 2265 6
[2] 4920 4925 6
[3] 8074 8079 6
[4] 17729 17734 6
[5] 18576 18581 6
... ... ... ...
[3365] 15062401 15062406 6
[3366] 15065488 15065493 6
[3367] 15066587 15066592 6
[3368] 15066627 15066632 6
[3369] 15074448 15074453 6
$chrII
MIndex object of length 1
[[1]]
IRanges object with 2698 ranges and 0 metadata columns:
start end width
<integer> <integer> <integer>
[1] 28734 28739 6
[2] 29032 29037 6
[3] 35849 35854 6
[4] 39084 39089 6
[5] 43244 43249 6
... ... ... ...
[2694] 15271300 15271305 6
[2695] 15273056 15273061 6
[2696] 15273761 15273766 6
[2697] 15275492 15275497 6
[2698] 15277856 15277861 6
$chrIII
MIndex object of length 1
[[1]]
IRanges object with 2612 ranges and 0 metadata columns:
start end width
<integer> <integer> <integer>
[1] 7323 7328 6
[2] 11819 11824 6
[3] 17156 17161 6
[4] 24300 24305 6
[5] 26324 26329 6
... ... ... ...
[2608] 13760197 13760202 6
[2609] 13766607 13766612 6
[2610] 13768348 13768353 6
[2611] 13769745 13769750 6
[2612] 13772637 13772642 6
$chrIV
MIndex object of length 1
[[1]]
IRanges object with 2997 ranges and 0 metadata columns:
start end width
<integer> <integer> <integer>
[1] 5606 5611 6
[2] 6675 6680 6
[3] 8409 8414 6
[4] 9438 9443 6
[5] 18765 18770 6
... ... ... ...
[2993] 17458897 17458902 6
[2994] 17479117 17479122 6
[2995] 17488757 17488762 6
[2996] 17490208 17490213 6
[2997] 17490673 17490678 6
$chrV
MIndex object of length 1
[[1]]
IRanges object with 3588 ranges and 0 metadata columns:
start end width
<integer> <integer> <integer>
[1] 605 610 6
[2] 3531 3536 6
[3] 5078 5083 6
[4] 5128 5133 6
[5] 6038 6043 6
... ... ... ...
[3584] 20891341 20891346 6
[3585] 20894142 20894147 6
[3586] 20897746 20897751 6
[3587] 20899040 20899045 6
[3588] 20921084 20921089 6
$chrX
MIndex object of length 1
[[1]]
IRanges object with 2466 ranges and 0 metadata columns:
start end width
<integer> <integer> <integer>
[1] 7656 7661 6
[2] 26993 26998 6
[3] 37580 37585 6
[4] 39506 39511 6
[5] 56687 56692 6
... ... ... ...
[2462] 17708492 17708497 6
[2463] 17708649 17708654 6
[2464] 17709026 17709031 6
[2465] 17712705 17712710 6
[2466] 17716966 17716971 6
$chrM
MIndex object of length 1
[[1]]
IRanges object with 1 range and 0 metadata columns:
start end width
<integer> <integer> <integer>
[1] 8767 8772 6
>
> ## And since its really overkill to use matchPDict to find a single pattern:
> params@FUN <- matchPattern
> bsapply(params, pattern = "CACGTG")
$chrI
Views on a 15080483-letter DNAString subject
subject: GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT...AGGCTTAGGCTTAGGCTTAGGTTTAGGCTTAGGC
views:
start end width
[1] 2260 2265 6 [CACGTG]
[2] 4920 4925 6 [CACGTG]
[3] 8074 8079 6 [CACGTG]
[4] 17729 17734 6 [CACGTG]
[5] 18576 18581 6 [CACGTG]
... ... ... ... ...
[3365] 15062401 15062406 6 [CACGTG]
[3366] 15065488 15065493 6 [CACGTG]
[3367] 15066587 15066592 6 [CACGTG]
[3368] 15066627 15066632 6 [CACGTG]
[3369] 15074448 15074453 6 [CACGTG]
$chrII
Views on a 15279308-letter DNAString subject
subject: CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA...TAGGCTGAGACTTAGGCTTAGGCTTAGGCTTAGT
views:
start end width
[1] 28734 28739 6 [CACGTG]
[2] 29032 29037 6 [CACGTG]
[3] 35849 35854 6 [CACGTG]
[4] 39084 39089 6 [CACGTG]
[5] 43244 43249 6 [CACGTG]
... ... ... ... ...
[2694] 15271300 15271305 6 [CACGTG]
[2695] 15273056 15273061 6 [CACGTG]
[2696] 15273761 15273766 6 [CACGTG]
[2697] 15275492 15275497 6 [CACGTG]
[2698] 15277856 15277861 6 [CACGTG]
$chrIII
Views on a 13783313-letter DNAString subject
subject: CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA...TAGGCTTAGGCTTAGGCTTAGGCTTAGGCTTAGG
views:
start end width
[1] 7323 7328 6 [CACGTG]
[2] 11819 11824 6 [CACGTG]
[3] 17156 17161 6 [CACGTG]
[4] 24300 24305 6 [CACGTG]
[5] 26324 26329 6 [CACGTG]
... ... ... ... ...
[2608] 13760197 13760202 6 [CACGTG]
[2609] 13766607 13766612 6 [CACGTG]
[2610] 13768348 13768353 6 [CACGTG]
[2611] 13769745 13769750 6 [CACGTG]
[2612] 13772637 13772642 6 [CACGTG]
$chrIV
Views on a 17493791-letter DNAString subject
subject: CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA...TAGGCTTAGGCTTAGGCTTAGGCTTAGGCTTAGG
views:
start end width
[1] 5606 5611 6 [CACGTG]
[2] 6675 6680 6 [CACGTG]
[3] 8409 8414 6 [CACGTG]
[4] 9438 9443 6 [CACGTG]
[5] 18765 18770 6 [CACGTG]
... ... ... ... ...
[2993] 17458897 17458902 6 [CACGTG]
[2994] 17479117 17479122 6 [CACGTG]
[2995] 17488757 17488762 6 [CACGTG]
[2996] 17490208 17490213 6 [CACGTG]
[2997] 17490673 17490678 6 [CACGTG]
$chrV
Views on a 20922231-letter DNAString subject
subject: GAATTCCTAAGCCTAAGCCTAAGCCTAAGCCTAA...CTTAGGCTTAGGCTTAGGCTTAGGCTTAGGCTTA
views:
start end width
[1] 605 610 6 [CACGTG]
[2] 3531 3536 6 [CACGTG]
[3] 5078 5083 6 [CACGTG]
[4] 5128 5133 6 [CACGTG]
[5] 6038 6043 6 [CACGTG]
... ... ... ... ...
[3584] 20891341 20891346 6 [CACGTG]
[3585] 20894142 20894147 6 [CACGTG]
[3586] 20897746 20897751 6 [CACGTG]
[3587] 20899040 20899045 6 [CACGTG]
[3588] 20921084 20921089 6 [CACGTG]
$chrX
Views on a 17718849-letter DNAString subject
subject: CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA...TAGGCTTAGGCTTAGGCTTAGGCTTAGGCTTAGG
views:
start end width
[1] 7656 7661 6 [CACGTG]
[2] 26993 26998 6 [CACGTG]
[3] 37580 37585 6 [CACGTG]
[4] 39506 39511 6 [CACGTG]
[5] 56687 56692 6 [CACGTG]
... ... ... ... ...
[2462] 17708492 17708497 6 [CACGTG]
[2463] 17708649 17708654 6 [CACGTG]
[2464] 17709026 17709031 6 [CACGTG]
[2465] 17712705 17712710 6 [CACGTG]
[2466] 17716966 17716971 6 [CACGTG]
$chrM
Views on a 13794-letter DNAString subject
subject: CAGTAAATAGTTTAATAAAAATATAGCATTTGGG...TTTATAGATATATACTTTGTATATATCTATATTA
views:
start end width
[1] 8767 8772 6 [CACGTG]
>
>
> ## Examples on how to use the masks
> library(BSgenome.Hsapiens.UCSC.hg38.masked)
Loading required package: BSgenome.Hsapiens.UCSC.hg38
> genome <- BSgenome.Hsapiens.UCSC.hg38.masked
> ## I can make things verbose if I want to see the chromosomes getting processed.
> options(verbose=TRUE)
> ## For the 1st example, lets use default masks
> params <- new("BSParams", X = genome, FUN = alphabetFrequency,
+ exclude = c(1:8,"M","X","_"), simplify = TRUE)
> bsapply(params)
uncaching chrV
uncaching chrIV
uncaching chrIII
uncaching chrII
uncaching chrI
uncaching chrM
uncaching chrX
caching chr9
caching chrY
chr9 chrY
A 35736329 7886192
C 25099811 5285789
G 25170662 5286894
T 35783748 7956168
M 0 0
R 0 0
W 0 0
S 0 0
Y 0 0
K 0 0
V 0 0
H 0 0
D 0 0
B 0 0
N 0 0
- 0 0
+ 0 0
. 0 0
>
> # if (interactive()) {
> ## Set up the motifList to filter out all double T's and all double C's
> params@motifList <-c("TT","CC")
> bsapply(params)
uncaching chrY
caching chrY
uncaching chr9
chr9 chrY
A 35736329 7886192
C 13903490 2988946
G 25170662 5286894
T 16755064 3698131
M 0 0
R 0 0
W 0 0
S 0 0
Y 0 0
K 0 0
V 0 0
H 0 0
D 0 0
B 0 0
N 0 0
- 0 0
+ 0 0
. 0 0
>
> ## Get rid of the motifList
> params@motifList=as.character()
> # }
>
> ##Enable all standard masks
> params@maskList <- c(RM=TRUE,TRF=TRUE)
> bsapply(params)
caching chr9
chr9 chrY
A 17364609 2994088
C 12067803 1876822
G 12063390 1889305
T 17335711 3002884
M 0 0
R 0 0
W 0 0
S 0 0
Y 0 0
K 0 0
V 0 0
H 0 0
D 0 0
B 0 0
N 0 0
- 0 0
+ 0 0
. 0 0
>
> ##Disable all standard masks
> params@maskList <- c(AGAPS=FALSE,AMB=FALSE)
> bsapply(params)
chr9 chrY
A 35736329 7886192
C 25099811 5285789
G 25170662 5286894
T 35783748 7956168
M 0 0
R 0 0
W 0 0
S 0 0
Y 0 0
K 0 0
V 0 0
H 0 0
D 0 0
B 0 0
N 16604167 30812372
- 0 0
+ 0 0
. 0 0
>
>
>
>
>
> dev.off()
null device
1
>