Last data update: 2014.03.03

R: translate the GFF3 from a ciseqByCluster/processgff output...
cgff2dtR Documentation

translate the GFF3 from a ciseqByCluster/processgff output into a serialized data.table instance, compute genome-wide plug-in FDR, and update the GFF3 with this FDR

Description

translate the GFF3 from a ciseqByCluster/processgff output into a serialized data.table instance, compute genome-wide plug-in FDR, and update the GFF3 with this FDR

Usage

cgff2dt(gff3, tiling, addHitTest = TRUE, addcc878 = TRUE)

Arguments

gff3

character string naming a tabix-indexed, bgzipped output of gffprocess

tiling

output of simpleTiling

addHitTest

logical, telling whether to add a column on coincidence of SNP with the gwastagger ranges

addcc878

logical, telling whether to add a column on coincidence of SNP with the hmm878 ranges, using the inferred chromatin state as factor level

Note

assumes unix utilites zcat, paste and bgzip are available

Examples

##---- Should be DIRECTLY executable !! ----
##-- ==>  Define data, use random,
##--	or do  help(data=index)  for the standard data sets.

## The function is currently defined as
function (gff3, tiling) 
{
    require(foreach)
    require(Rsamtools)
    stopifnot(is(tiling, "GRanges"))
    basen = gsub(".gff3.gz", "", gff3)
    th = headerTabix(gff3)
    orderedChr = th$seqnames
    lgr = foreach(i = 1:length(tiling)) %dopar% {
        gc()
        cat(i)
        lk = try(import.gff3(gff3, which = tiling[i]))
        if (inherits(lk, "try-error")) 
            lk = NULL
        if (!is.null(lk)) 
            lk = as.data.table(as.data.frame(lk))
        lk
    }
    bad = sapply(lgr, is.null)
    if (any(bad)) 
        lgr = lgr[-which(bad)]
    ans = do.call(rbind, lgr)
    ans$snplocs = as.numeric(ans$snplocs)
    ans$ests = as.numeric(ans$ests)
    ans$se = as.numeric(ans$se)
    ans$oldfdr = as.numeric(ans$fdr)
    ans$MAF = as.numeric(ans$MAF)
    ans$dist.mid = as.numeric(ans$dist.mid)
    nperm = length(grep("permS", names(ans)))
    pnames = paste("permScore_", 1:nperm, sep = "")
    for (i in 1:nperm) ans[[pnames[i]]] = as.numeric(ans[[pnames[i]]])
    ans$mindist = as.numeric(ans$mindist)
    print(date())
    ans$fdr = pifdr(ans$score, c(ans$permScore_1, ans$permScore_2, 
        ans$permScore_3))
    print(date())
    obn = paste0(basen, "_dt")
    assign(obn, ans)
    save(list = obn, file = paste0(obn, ".rda"))
    gwfdr = ans$fdr
    gwch = ans$seqnames
    gwsnp = ans$snp
    gwprobeid = ans$probeid
    sgwfdr = split(gwfdr, gwch)
    sgwfdr = unlist(sgwfdr[orderedChr])
    nn = tempfile()
    fdrt = file(nn, "w")
    writeLines(c(c(" ", " ", " "), paste("; gwfdr=", round(sgwfdr, 
        6), sep = "")), con = fdrt)
    close(fdrt)
    chkb = system(paste0("zcat ", gff3, " | paste -d '' - ", 
        nn, " | bgzip > ", gsub(".gff3", "wmlf.gff3", gff3)))
    if (!(chkb == 0)) 
        warning(paste("final system zcat-paste-bgzip returned non-zero: chkb=", 
            chkb))
    invisible(chkb)
  }

Results


R version 3.3.1 (2016-06-21) -- "Bug in Your Hair"
Copyright (C) 2016 The R Foundation for Statistical Computing
Platform: x86_64-pc-linux-gnu (64-bit)

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.

> library(GGtools)
Loading required package: GGBase
Loading required package: snpStats
Loading required package: survival
Loading required package: Matrix
Loading required package: data.table
Loading required package: parallel
Loading required package: Homo.sapiens
Loading required package: AnnotationDbi
Loading required package: stats4
Loading required package: BiocGenerics

Attaching package: 'BiocGenerics'

The following objects are masked from 'package:parallel':

    clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
    clusterExport, clusterMap, parApply, parCapply, parLapply,
    parLapplyLB, parRapply, parSapply, parSapplyLB

The following objects are masked from 'package:stats':

    IQR, mad, xtabs

The following objects are masked from 'package:base':

    Filter, Find, Map, Position, Reduce, anyDuplicated, append,
    as.data.frame, cbind, colnames, do.call, duplicated, eval, evalq,
    get, grep, grepl, intersect, is.unsorted, lapply, lengths, mapply,
    match, mget, order, paste, pmax, pmax.int, pmin, pmin.int, rank,
    rbind, rownames, sapply, setdiff, sort, table, tapply, union,
    unique, unsplit

Loading required package: Biobase
Welcome to Bioconductor

    Vignettes contain introductory material; view with
    'browseVignettes()'. To cite Bioconductor, see
    'citation("Biobase")', and for packages 'citation("pkgname")'.

Loading required package: IRanges
Loading required package: S4Vectors

Attaching package: 'S4Vectors'

The following objects are masked from 'package:Matrix':

    colMeans, colSums, expand, rowMeans, rowSums

The following objects are masked from 'package:base':

    colMeans, colSums, expand.grid, rowMeans, rowSums


Attaching package: 'IRanges'

The following object is masked from 'package:data.table':

    shift

Loading required package: OrganismDbi
Loading required package: GenomicFeatures
Loading required package: GenomeInfoDb
Loading required package: GenomicRanges
Loading required package: GO.db

Loading required package: org.Hs.eg.db

Loading required package: TxDb.Hsapiens.UCSC.hg19.knownGene

Attaching package: 'GGtools'

The following object is masked from 'package:stats':

    getCall

> png(filename="/home/ddbj/snapshot/RGM3/R_BC/result/GGtools/cgff2dt.Rd_%03d_medium.png", width=480, height=480)
> ### Name: cgff2dt
> ### Title: translate the GFF3 from a ciseqByCluster/processgff output into
> ###   a serialized data.table instance, compute genome-wide plug-in FDR,
> ###   and update the GFF3 with this FDR
> ### Aliases: cgff2dt
> ### Keywords: models
> 
> ### ** Examples
> 
> ##---- Should be DIRECTLY executable !! ----
> ##-- ==>  Define data, use random,
> ##--	or do  help(data=index)  for the standard data sets.
> 
> ## The function is currently defined as
> function (gff3, tiling) 
+ {
+     require(foreach)
+     require(Rsamtools)
+     stopifnot(is(tiling, "GRanges"))
+     basen = gsub(".gff3.gz", "", gff3)
+     th = headerTabix(gff3)
+     orderedChr = th$seqnames
+     lgr = foreach(i = 1:length(tiling)) %dopar% {
+         gc()
+         cat(i)
+         lk = try(import.gff3(gff3, which = tiling[i]))
+         if (inherits(lk, "try-error")) 
+             lk = NULL
+         if (!is.null(lk)) 
+             lk = as.data.table(as.data.frame(lk))
+         lk
+     }
+     bad = sapply(lgr, is.null)
+     if (any(bad)) 
+         lgr = lgr[-which(bad)]
+     ans = do.call(rbind, lgr)
+     ans$snplocs = as.numeric(ans$snplocs)
+     ans$ests = as.numeric(ans$ests)
+     ans$se = as.numeric(ans$se)
+     ans$oldfdr = as.numeric(ans$fdr)
+     ans$MAF = as.numeric(ans$MAF)
+     ans$dist.mid = as.numeric(ans$dist.mid)
+     nperm = length(grep("permS", names(ans)))
+     pnames = paste("permScore_", 1:nperm, sep = "")
+     for (i in 1:nperm) ans[[pnames[i]]] = as.numeric(ans[[pnames[i]]])
+     ans$mindist = as.numeric(ans$mindist)
+     print(date())
+     ans$fdr = pifdr(ans$score, c(ans$permScore_1, ans$permScore_2, 
+         ans$permScore_3))
+     print(date())
+     obn = paste0(basen, "_dt")
+     assign(obn, ans)
+     save(list = obn, file = paste0(obn, ".rda"))
+     gwfdr = ans$fdr
+     gwch = ans$seqnames
+     gwsnp = ans$snp
+     gwprobeid = ans$probeid
+     sgwfdr = split(gwfdr, gwch)
+     sgwfdr = unlist(sgwfdr[orderedChr])
+     nn = tempfile()
+     fdrt = file(nn, "w")
+     writeLines(c(c(" ", " ", " "), paste("; gwfdr=", round(sgwfdr, 
+         6), sep = "")), con = fdrt)
+     close(fdrt)
+     chkb = system(paste0("zcat ", gff3, " | paste -d '' - ", 
+         nn, " | bgzip > ", gsub(".gff3", "wmlf.gff3", gff3)))
+     if (!(chkb == 0)) 
+         warning(paste("final system zcat-paste-bgzip returned non-zero: chkb=", 
+             chkb))
+     invisible(chkb)
+   }
function (gff3, tiling) 
{
    require(foreach)
    require(Rsamtools)
    stopifnot(is(tiling, "GRanges"))
    basen = gsub(".gff3.gz", "", gff3)
    th = headerTabix(gff3)
    orderedChr = th$seqnames
    lgr = foreach(i = 1:length(tiling)) %dopar% {
        gc()
        cat(i)
        lk = try(import.gff3(gff3, which = tiling[i]))
        if (inherits(lk, "try-error")) 
            lk = NULL
        if (!is.null(lk)) 
            lk = as.data.table(as.data.frame(lk))
        lk
    }
    bad = sapply(lgr, is.null)
    if (any(bad)) 
        lgr = lgr[-which(bad)]
    ans = do.call(rbind, lgr)
    ans$snplocs = as.numeric(ans$snplocs)
    ans$ests = as.numeric(ans$ests)
    ans$se = as.numeric(ans$se)
    ans$oldfdr = as.numeric(ans$fdr)
    ans$MAF = as.numeric(ans$MAF)
    ans$dist.mid = as.numeric(ans$dist.mid)
    nperm = length(grep("permS", names(ans)))
    pnames = paste("permScore_", 1:nperm, sep = "")
    for (i in 1:nperm) ans[[pnames[i]]] = as.numeric(ans[[pnames[i]]])
    ans$mindist = as.numeric(ans$mindist)
    print(date())
    ans$fdr = pifdr(ans$score, c(ans$permScore_1, ans$permScore_2, 
        ans$permScore_3))
    print(date())
    obn = paste0(basen, "_dt")
    assign(obn, ans)
    save(list = obn, file = paste0(obn, ".rda"))
    gwfdr = ans$fdr
    gwch = ans$seqnames
    gwsnp = ans$snp
    gwprobeid = ans$probeid
    sgwfdr = split(gwfdr, gwch)
    sgwfdr = unlist(sgwfdr[orderedChr])
    nn = tempfile()
    fdrt = file(nn, "w")
    writeLines(c(c(" ", " ", " "), paste("; gwfdr=", round(sgwfdr, 
        6), sep = "")), con = fdrt)
    close(fdrt)
    chkb = system(paste0("zcat ", gff3, " | paste -d '' - ", 
        nn, " | bgzip > ", gsub(".gff3", "wmlf.gff3", gff3)))
    if (!(chkb == 0)) 
        warning(paste("final system zcat-paste-bgzip returned non-zero: chkb=", 
            chkb))
    invisible(chkb)
}
> 
> 
> 
> 
> 
> dev.off()
null device 
          1 
>