Last data update: 2014.03.03
R: function performs TFBS prediction denovo or based on transfac...
search.gadem R Documentation
function performs TFBS prediction denovo or based on transfac / jaspar
matrices pwms using rGADEM.
Description
function performs TFBS prediction denovo or based on transfac / jaspar
matrices pwms using rGADEM. If append=T, predicted hits are appended to the hits in the input object.
Usage
## S4 method for signature 'cobindr'
search.gadem(x, deNovo = FALSE, append = F, background_scan = FALSE)
Arguments
x
an object of the class "cobindr", which will hold all necessary
information about the sequences and the hits.
deNovo
logical flag, if deNOVO=TRUE a denovo search is startet. Otherwise
the given PFMs are used as seed.
append
logical flag, if append=TRUE the binding sites will be appended to
already existing results
background_scan
logical flag, if background_scan=TRUE the function will search for
binding sites in the set of background sequences
Value
x
an object of the class "cobindr" including the predicted
transcription factor binding sites
Author(s)
Robert Lehmann <r.lehmann@biologie.hu-berlin.de>
References
uses package "rGADEM" (http://www.bioconductor.org/packages/release/bioc/html/rGADEM.html)
See Also
rtfbs
, search.pwm
Examples
############################################################
# use simulated sequences
library(Biostrings)
n <- 600 # number of input sequences
l <- 150 # length of sequences
n.hits <- 600 # number of 'true' binding sites
bases <- c("A","C","G","T") # alphabet
# generate random input sequences with two groups with differing GC content
seqs <- sapply(1:(3*n/4), function(x) paste(sample(bases, l, replace=TRUE,
prob=c(.3,.22,.2,.28)), collapse=""))
seqs <- append(seqs, sapply(1:(n/4), function(x) paste(sample(bases, l,
replace=TRUE, prob=c(.25,.25,.25,.25)), collapse="")))
path <- system.file('extdata/pfms/myod.tfpfm',package='cobindR')
motif <- read.transfac.pfm(path)[[1]] # get PFM of binding site
# add binding sites with distance specificity
for(position in c(70, 90)) {
hits <- apply(apply(motif, 2, function(x) sample(x=bases, size=n.hits,
prob=x, replace=TRUE)), 1, paste, collapse='')
pos.hits <- round(rnorm(n.hits, mean=position, sd=8))
names(pos.hits) <- sample(1:n, n.hits)
for(i in 1:n.hits) substr(seqs[as.integer(names(pos.hits)[i])], start=pos.hits[i],
stop=pos.hits[i]+ncol(motif)) <- hits[i]
}
#save sample sequences in fasta file
tmp.file <- tempfile(pattern = "cobindr_sample_seq", tmpdir = tempdir(), fileext = ".fasta")
writeXStringSet(DNAStringSet(seqs), tmp.file)
#run cobindr
cfg <- cobindRConfiguration()
sequence_type(cfg) <- 'fasta'
sequence_source(cfg) <- tmp.file
sequence_origin(cfg) <- 'artificial sequences'
pfm_path(cfg) <- system.file('extdata/pfms',package='cobindR')
pairs(cfg) <- 'V$MYOD_01 V$MYOD_01'
runObj <-cobindr(cfg, name='cobindr test using sampled sequences')
# perform tfbs prediction using rGADEM - commented out due to long time required
# runObj.bs <- search.gadem(runObj)
# show results
# plot.positions(runObj.bs)
#clean up
unlink(tmp.file)
Results
R version 3.3.1 (2016-06-21) -- "Bug in Your Hair"
Copyright (C) 2016 The R Foundation for Statistical Computing
Platform: x86_64-pc-linux-gnu (64-bit)
R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.
R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.
Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.
> library(cobindR)
Attaching package: 'cobindR'
The following object is masked from 'package:base':
sequence
> png(filename="/home/ddbj/snapshot/RGM3/R_BC/result/cobindR/search.gadem.Rd_%03d_medium.png", width=480, height=480)
> ### Name: search.gadem
> ### Title: function performs TFBS prediction denovo or based on transfac /
> ### jaspar matrices pwms using rGADEM.
> ### Aliases: search.gadem search.gadem-method search.gadem,cobindr-method
> ### Keywords: manip iteration methods
>
> ### ** Examples
>
> ############################################################
> # use simulated sequences
> library(Biostrings)
Loading required package: BiocGenerics
Loading required package: parallel
Attaching package: 'BiocGenerics'
The following objects are masked from 'package:parallel':
clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
clusterExport, clusterMap, parApply, parCapply, parLapply,
parLapplyLB, parRapply, parSapply, parSapplyLB
The following objects are masked from 'package:stats':
IQR, mad, xtabs
The following objects are masked from 'package:base':
Filter, Find, Map, Position, Reduce, anyDuplicated, append,
as.data.frame, cbind, colnames, do.call, duplicated, eval, evalq,
get, grep, grepl, intersect, is.unsorted, lapply, lengths, mapply,
match, mget, order, paste, pmax, pmax.int, pmin, pmin.int, rank,
rbind, rownames, sapply, setdiff, sort, table, tapply, union,
unique, unsplit
Loading required package: S4Vectors
Loading required package: stats4
Attaching package: 'S4Vectors'
The following objects are masked from 'package:base':
colMeans, colSums, expand.grid, rowMeans, rowSums
Loading required package: IRanges
Loading required package: XVector
>
> n <- 600 # number of input sequences
> l <- 150 # length of sequences
> n.hits <- 600 # number of 'true' binding sites
> bases <- c("A","C","G","T") # alphabet
> # generate random input sequences with two groups with differing GC content
> seqs <- sapply(1:(3*n/4), function(x) paste(sample(bases, l, replace=TRUE,
+ prob=c(.3,.22,.2,.28)), collapse=""))
> seqs <- append(seqs, sapply(1:(n/4), function(x) paste(sample(bases, l,
+ replace=TRUE, prob=c(.25,.25,.25,.25)), collapse="")))
> path <- system.file('extdata/pfms/myod.tfpfm',package='cobindR')
> motif <- read.transfac.pfm(path)[[1]] # get PFM of binding site
[1] "ES_Sox2_1_c1058"
[1] "ES_Klf4_3_c1373"
> # add binding sites with distance specificity
> for(position in c(70, 90)) {
+ hits <- apply(apply(motif, 2, function(x) sample(x=bases, size=n.hits,
+ prob=x, replace=TRUE)), 1, paste, collapse='')
+ pos.hits <- round(rnorm(n.hits, mean=position, sd=8))
+ names(pos.hits) <- sample(1:n, n.hits)
+ for(i in 1:n.hits) substr(seqs[as.integer(names(pos.hits)[i])], start=pos.hits[i],
+ stop=pos.hits[i]+ncol(motif)) <- hits[i]
+ }
> #save sample sequences in fasta file
> tmp.file <- tempfile(pattern = "cobindr_sample_seq", tmpdir = tempdir(), fileext = ".fasta")
> writeXStringSet(DNAStringSet(seqs), tmp.file)
> #run cobindr
> cfg <- cobindRConfiguration()
Warning message:
In .local(.Object, ...) :
no config-file defined, generating configuration-object with default values
> sequence_type(cfg) <- 'fasta'
> sequence_source(cfg) <- tmp.file
> sequence_origin(cfg) <- 'artificial sequences'
> pfm_path(cfg) <- system.file('extdata/pfms',package='cobindR')
> pairs(cfg) <- 'V$MYOD_01 V$MYOD_01'
> runObj <-cobindr(cfg, name='cobindr test using sampled sequences')
[1] "Creating a new experiment!"
reading file /tmp/RtmpJSzCUT/cobindr_sample_seq6c3d1a787383.fasta ...
| | | 0% | | | 1% | |= | 1% | |= | 2% | |== | 2% | |== | 3% | |== | 4% | |=== | 4% | |=== | 5% | |==== | 5% | |==== | 6% | |===== | 6% | |===== | 7% | |===== | 8% | |====== | 8% | |====== | 9% | |======= | 9% | |======= | 10% | |======= | 11% | |======== | 11% | |======== | 12% | |========= | 12% | |========= | 13% | |========= | 14% | |========== | 14% | |========== | 15% | |=========== | 15% | |=========== | 16% | |============ | 16% | |============ | 17% | |============ | 18% | |============= | 18% | |============= | 19% | |============== | 19% | |============== | 20% | |============== | 21% | |=============== | 21% | |=============== | 22% | |================ | 22% | |================ | 23% | |================ | 24% | |================= | 24% | |================= | 25% | |================== | 25% | |================== | 26% | |=================== | 26% | |=================== | 27% | |=================== | 28% | |==================== | 28% | |==================== | 29% | |===================== | 29% | |===================== | 30% | |===================== | 31% | |====================== | 31% | |====================== | 32% | |======================= | 32% | |======================= | 33% | |======================= | 34% | |======================== | 34% | |======================== | 35% | |========================= | 35% | |========================= | 36% | |========================== | 36% | |========================== | 37% | |========================== | 38% | |=========================== | 38% | |=========================== | 39% | |============================ | 39% | |============================ | 40% | |============================ | 41% | |============================= | 41% | |============================= | 42% | |============================== | 42% | |============================== | 43% | |============================== | 44% | |=============================== | 44% | |=============================== | 45% | |================================ | 45% | |================================ | 46% | |================================= | 46% | |================================= | 47% | |================================= | 48% | |================================== | 48% | |================================== | 49% | |=================================== | 49% | |=================================== | 50% | |=================================== | 51% | |==================================== | 51% | |==================================== | 52% | |===================================== | 52% | |===================================== | 53% | |===================================== | 54% | |====================================== | 54% | |====================================== | 55% | |======================================= | 55% | |======================================= | 56% | |======================================== | 56% | |======================================== | 57% | |======================================== | 58% | |========================================= | 58% | |========================================= | 59% | |========================================== | 59% | |========================================== | 60% | |========================================== | 61% | |=========================================== | 61% | |=========================================== | 62% | |============================================ | 62% | |============================================ | 63% | |============================================ | 64% | |============================================= | 64% | |============================================= | 65% | |============================================== | 65% | |============================================== | 66% | |=============================================== | 66% | |=============================================== | 67% | |=============================================== | 68% | |================================================ | 68% | |================================================ | 69% | |================================================= | 69% | |================================================= | 70% | |================================================= | 71% | |================================================== | 71% | |================================================== | 72% | |=================================================== | 72% | |=================================================== | 73% | |=================================================== | 74% | |==================================================== | 74% | |==================================================== | 75% | |===================================================== | 75% | |===================================================== | 76% | |====================================================== | 76% | |====================================================== | 77% | |====================================================== | 78% | |======================================================= | 78% | |======================================================= | 79% | |======================================================== | 79% | |======================================================== | 80% | |======================================================== | 81% | |========================================================= | 81% | |========================================================= | 82% | |========================================================== | 82% | |========================================================== | 83% | |========================================================== | 84% | |=========================================================== | 84% | |=========================================================== | 85% | |============================================================ | 85% | |============================================================ | 86% | |============================================================= | 86% | |============================================================= | 87% | |============================================================= | 88% | |============================================================== | 88% | |============================================================== | 89% | |=============================================================== | 89% | |=============================================================== | 90% | |=============================================================== | 91% | |================================================================ | 91% | |================================================================ | 92% | |================================================================= | 92% | |================================================================= | 93% | |================================================================= | 94% | |==================================================================