Last data update: 2014.03.03
|
R: function performs TFBS prediction using the package rtfbs
function performs TFBS prediction using the package rtfbs
Description
function performs TFBS prediction using the package rtfbs
Usage
## S4 method for signature 'cobindr'
rtfbs(x, append = F, background_scan = FALSE, n.cpu = NA)
Arguments
x |
an object of the class "cobindr", which will hold all necessary
information about the sequences and the hits.
|
append |
logical flag, if append=TRUE the binding sites will be appended to
already existing results
|
background_scan |
logical flag, if background_scan=TRUE the background sequences will
be searched for transcription factor binding sites
|
n.cpu |
number of CPUs to be used for parallelization. Default value is 'NA'
in which case the number of available CPUs is checked and than used.
|
Value
x |
an object of the class "cobindr" including the predicted
transcription factor binding sites
|
Author(s)
Yue-Hien Lee <>
References
uses the package "rtfbs" (http://cran.r-project.org/web/packages/rtfbs/index.html)
See Also
search.pwm , search.gadem
Examples
############################################################
# use simulated sequences
library(Biostrings)
n <- 400 # number of input sequences
l <- 500 # length of sequences
n.hits <- 250 # number of 'true' binding sites
bases <- c("A","C","G","T") # alphabet
# generate random input sequences with two groups with differing GC content
seqs <- sapply(1:(3*n/4), function(x) paste(sample(bases, l, replace=TRUE,
prob=c(.3,.22,.2,.28)), collapse=""))
seqs <- append(seqs, sapply(1:(n/4), function(x) paste(sample(bases, l,
replace=TRUE, prob=c(.25,.25,.25,.25)), collapse="")))
path <- system.file('extdata/pfms/myod.tfpfm',package='cobindR')
motif <- read.transfac.pfm(path)[[1]] # get PFM of binding site
# add binding sites with distance specificity
for(position in c(110, 150)) {
hits <- apply(apply(motif, 2, function(x) sample(x=bases, size=n.hits,
prob=x, replace=TRUE)), 1, paste, collapse='')
pos.hits <- round(rnorm(n.hits, mean=position, sd=8))
names(pos.hits) <- sample(1:n, n.hits)
for(i in 1:n.hits) substr(seqs[as.integer(names(pos.hits)[i])],
start=pos.hits[i], stop=pos.hits[i]+ncol(motif)) <- hits[i]
}
#save sample sequences in fasta file
tmp.file <- tempfile(pattern = "cobindr_sample_seq", tmpdir = tempdir(), fileext = ".fasta")
writeXStringSet(DNAStringSet(seqs), tmp.file)
#run cobindr
cfg <- cobindRConfiguration()
sequence_type(cfg) <- 'fasta'
sequence_source(cfg) <- tmp.file
sequence_origin(cfg) <- 'artificial sequences'
pfm_path(cfg) <- system.file('extdata/pfms',package='cobindR')
pairs(cfg) <- 'V$MYOD_01 V$MYOD_01'
fdrThreshold(cfg) <- 0
runObj <- cobindr(cfg, name='cobindr test using sampled sequences')
# perform tfbs prediction using rtfbs
runObj.bs <- rtfbs(runObj)
# show results
plot.positionprofile(runObj.bs)
#clean up
unlink(tmp.file)
Results
R version 3.3.1 (2016-06-21) -- "Bug in Your Hair"
Copyright (C) 2016 The R Foundation for Statistical Computing
Platform: x86_64-pc-linux-gnu (64-bit)
R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.
R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.
Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.
> library(cobindR)
Attaching package: 'cobindR'
The following object is masked from 'package:base':
sequence
> png(filename="/home/ddbj/snapshot/RGM3/R_BC/result/cobindR/rtfbs.Rd_%03d_medium.png", width=480, height=480)
> ### Name: rtfbs
> ### Title: function performs TFBS prediction using the package rtfbs
> ### Aliases: rtfbs rtfbs-method rtfbs,cobindr-method
> ### Keywords: manip iteration methods
>
> ### ** Examples
>
>
> ############################################################
> # use simulated sequences
> library(Biostrings)
Loading required package: BiocGenerics
Loading required package: parallel
Attaching package: 'BiocGenerics'
The following objects are masked from 'package:parallel':
clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
clusterExport, clusterMap, parApply, parCapply, parLapply,
parLapplyLB, parRapply, parSapply, parSapplyLB
The following objects are masked from 'package:stats':
IQR, mad, xtabs
The following objects are masked from 'package:base':
Filter, Find, Map, Position, Reduce, anyDuplicated, append,
as.data.frame, cbind, colnames, do.call, duplicated, eval, evalq,
get, grep, grepl, intersect, is.unsorted, lapply, lengths, mapply,
match, mget, order, paste, pmax, pmax.int, pmin, pmin.int, rank,
rbind, rownames, sapply, setdiff, sort, table, tapply, union,
unique, unsplit
Loading required package: S4Vectors
Loading required package: stats4
Attaching package: 'S4Vectors'
The following objects are masked from 'package:base':
colMeans, colSums, expand.grid, rowMeans, rowSums
Loading required package: IRanges
Loading required package: XVector
>
> n <- 400 # number of input sequences
> l <- 500 # length of sequences
> n.hits <- 250 # number of 'true' binding sites
> bases <- c("A","C","G","T") # alphabet
> # generate random input sequences with two groups with differing GC content
> seqs <- sapply(1:(3*n/4), function(x) paste(sample(bases, l, replace=TRUE,
+ prob=c(.3,.22,.2,.28)), collapse=""))
> seqs <- append(seqs, sapply(1:(n/4), function(x) paste(sample(bases, l,
+ replace=TRUE, prob=c(.25,.25,.25,.25)), collapse="")))
> path <- system.file('extdata/pfms/myod.tfpfm',package='cobindR')
> motif <- read.transfac.pfm(path)[[1]] # get PFM of binding site
[1] "ES_Sox2_1_c1058"
[1] "ES_Klf4_3_c1373"
> # add binding sites with distance specificity
> for(position in c(110, 150)) {
+ hits <- apply(apply(motif, 2, function(x) sample(x=bases, size=n.hits,
+ prob=x, replace=TRUE)), 1, paste, collapse='')
+ pos.hits <- round(rnorm(n.hits, mean=position, sd=8))
+ names(pos.hits) <- sample(1:n, n.hits)
+ for(i in 1:n.hits) substr(seqs[as.integer(names(pos.hits)[i])],
+ start=pos.hits[i], stop=pos.hits[i]+ncol(motif)) <- hits[i]
+ }
> #save sample sequences in fasta file
> tmp.file <- tempfile(pattern = "cobindr_sample_seq", tmpdir = tempdir(), fileext = ".fasta")
> writeXStringSet(DNAStringSet(seqs), tmp.file)
> #run cobindr
> cfg <- cobindRConfiguration()
Warning message:
In .local(.Object, ...) :
no config-file defined, generating configuration-object with default values
> sequence_type(cfg) <- 'fasta'
> sequence_source(cfg) <- tmp.file
> sequence_origin(cfg) <- 'artificial sequences'
> pfm_path(cfg) <- system.file('extdata/pfms',package='cobindR')
> pairs(cfg) <- 'V$MYOD_01 V$MYOD_01'
> fdrThreshold(cfg) <- 0
> runObj <- cobindr(cfg, name='cobindr test using sampled sequences')
[1] "Creating a new experiment!"
reading file /tmp/Rtmpi0d0kP/cobindr_sample_seq698673fdbd86.fasta ...
| | | 0% | |= | 1% | |= | 2% | |== | 2% | |== | 3% | |== | 4% | |=== | 4% | |=== | 5% | |==== | 5% | |==== | 6% | |===== | 6% | |===== | 7% | |===== | 8% | |====== | 8% | |====== | 9% | |======= | 10% | |======== | 11% | |======== | 12% | |========= | 12% | |========= | 13% | |========= | 14% | |========== | 14% | |========== | 15% | |=========== | 15% | |=========== | 16% | |============ | 16% | |============ | 17% | |============ | 18% | |============= | 18% | |============= | 19% | |============== | 20% | |=============== | 21% | |=============== | 22% | |================ | 22% | |================ | 23% | |================ | 24% | |================= | 24% | |================= | 25% | |================== | 25% | |================== | 26% | |=================== | 26% | |=================== | 27% | |=================== | 28% | |==================== | 28% | |==================== | 29% | |===================== | 30% | |====================== | 31% | |====================== | 32% | |======================= | 32% | |======================= | 33% | |======================= | 34% | |======================== | 34% | |======================== | 35% | |========================= | 35% | |========================= | 36% | |========================== | 36% | |========================== | 37% | |========================== | 38% | |=========================== | 38% | |=========================== | 39% | |============================ | 40% | |============================= | 41% | |============================= | 42% | |============================== | 42% | |============================== | 43% | |============================== | 44% | |=============================== | 44% | |=============================== | 45% | |================================ | 45% | |================================ | 46% | |================================= | 46% | |================================= | 47% | |================================= | 48% | |================================== | 48% | |================================== | 49% | |=================================== | 50% | |==================================== | 51% | |==================================== | 52% | |===================================== | 52% | |===================================== | 53% | |===================================== | 54% | |====================================== | 54% | |====================================== | 55% | |======================================= | 55% | |======================================= | 56% | |======================================== | 56% | |======================================== | 57% | |======================================== | 58% | |========================================= | 58% | |========================================= | 59% | |========================================== | 60% | |=========================================== | 61% | |=========================================== | 62% | |============================================ | 62% | |============================================ | 63% | |============================================ | 64% | |============================================= | 64% | |============================================= | 65% | |============================================== | 65% | |============================================== | 66% | |=============================================== | 66% | |=============================================== | 67% | |=============================================== | 68% | |================================================ | 68% | |================================================ | 69% | |================================================= | 70% | |================================================== | 71% | |================================================== | 72% | |=================================================== | 72% | |=================================================== | 73% | |=================================================== | 74% | |==================================================== | 74% | |==================================================== | 75% | |===================================================== | 75% | |===================================================== | 76% | |====================================================== | 76% | |====================================================== | 77% | |====================================================== | 78% | |======================================================= | 78% | |======================================================= | 79% | |======================================================== | 80% | |========================================================= | 81% | |========================================================= | 82% | |========================================================== | 82% | |========================================================== | 83% | |========================================================== | 84% | |=========================================================== | 84% | |=========================================================== | 85% | |============================================================ | 85% | |============================================================ | 86% | |============================================================= | 86% | |============================================================= | 87% | |============================================================= | 88% | |============================================================== | 88% | |============================================================== | 89% | |=============================================================== | 90% | |================================================================ | 91% | |================================================================ | 92% | |================================================================= | 92% | |================================================================= | 93% | |================================================================= | 94% | |================================================================== | 94% | |================================================================== | 95% | |=================================================================== | 95% | |=================================================================== | 96% | |==================================================================== | 96% | |==================================================================== | 97% | |==================================================================== | 98% | |===================================================================== | 98% | |===================================================================== | 99% | |======================================================================| 100%ready retrieving sequences!
reading pfm files: /home/ddbj/local/lib64/R/library/cobindR/extdata/pfms ...
[1] "ES_Sox2_1_c1058"
[1] "ES_Klf4_3_c1373"
[1] "ES_Oct4_1_c570"
[1] "ES_Sox2_1_c1058"
ignored files:
Using the parallel (multicore) version of cobindR - function cpg.gc.content with 4 cores
There were 50 or more warnings (use warnings() to see the first 50)
> # perform tfbs prediction using rtfbs
> runObj.bs <- rtfbs(runObj)
Loading required package: rtfbs
Starting binding site search with 4 PWMs.
Using the parallel (multicore) version of cobindR - function rtfbs.intern with 4 cores
Found 1449 binding sites.
Time difference of 4.122411 secs
> # show results
> plot.positionprofile(runObj.bs)
>
> #clean up
> unlink(tmp.file)
>
>
>
>
>
> dev.off()
null device
1
>
|
|