Last data update: 2014.03.03

R: generate FASTA file containing short INDEL
OutputaberrantR Documentation

generate FASTA file containing short INDEL

Description

Short insertion/deletion may lead to aberrant proteins in cells. We provide a function to generate FASTA file containing this kind of proteins.

Usage

  Outputaberrant(positiontab, outfile, coding, proteinseq,
    ids, RPKM = NULL, ...)

Arguments

positiontab

a data frame which is the output of function Positionincoding() for INDELs.

outfile

output file name

coding

a data frame cotaining coding sequence for each protein.

proteinseq

a data frame cotaining amino acid sequence for each protein.

ids

a dataframe containing gene/transcript/protein id mapping information.

RPKM

if includes the RPKM value in the header of each sequence, default is NULL.

...

Additional arguments.

Details

the function applys the INDEL into the coding sequence, then translates them into protein sequence, terminated by stop codon. Remove the sequences the same as normal ones or as part of normal ones.

Value

FASTA file containing aberrant proteins.

Author(s)

Xiaojing Wang

Examples

vcffile <- system.file("extdata/vcfs", "test1.vcf", package="customProDB")
vcf <- InputVcf(vcffile)
table(values(vcf[[1]])[['INDEL']])
index <- which(values(vcf[[1]])[['INDEL']] == TRUE)
indelvcf <- vcf[[1]][index]

load(system.file("extdata/refseq", "exon_anno.RData", package="customProDB"))
load(system.file("extdata/refseq", "dbsnpinCoding.RData",
        package="customProDB"))
load(system.file("extdata/refseq", "procodingseq.RData",
        package="customProDB"))
load(system.file("extdata/refseq", "proseq.RData", package="customProDB"))
load(system.file("extdata/refseq", "ids.RData", package="customProDB"))
postable_indel <- Positionincoding(indelvcf, exon)
txlist_indel <- unique(postable_indel[, 'txid'])
codingseq_indel <- procodingseq[procodingseq[, 'tx_id'] %in% txlist_indel, ]
outfile <-  paste(tempdir(), '/test_indel.fasta', sep='')
Outputaberrant(postable_indel, coding=codingseq_indel,
proteinseq=proteinseq, outfile=outfile, ids=ids)

Results


R version 3.3.1 (2016-06-21) -- "Bug in Your Hair"
Copyright (C) 2016 The R Foundation for Statistical Computing
Platform: x86_64-pc-linux-gnu (64-bit)

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.

> library(customProDB)
Loading required package: IRanges
Loading required package: BiocGenerics
Loading required package: parallel

Attaching package: 'BiocGenerics'

The following objects are masked from 'package:parallel':

    clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
    clusterExport, clusterMap, parApply, parCapply, parLapply,
    parLapplyLB, parRapply, parSapply, parSapplyLB

The following objects are masked from 'package:stats':

    IQR, mad, xtabs

The following objects are masked from 'package:base':

    Filter, Find, Map, Position, Reduce, anyDuplicated, append,
    as.data.frame, cbind, colnames, do.call, duplicated, eval, evalq,
    get, grep, grepl, intersect, is.unsorted, lapply, lengths, mapply,
    match, mget, order, paste, pmax, pmax.int, pmin, pmin.int, rank,
    rbind, rownames, sapply, setdiff, sort, table, tapply, union,
    unique, unsplit

Loading required package: S4Vectors
Loading required package: stats4

Attaching package: 'S4Vectors'

The following objects are masked from 'package:base':

    colMeans, colSums, expand.grid, rowMeans, rowSums

Loading required package: AnnotationDbi
Loading required package: Biobase
Welcome to Bioconductor

    Vignettes contain introductory material; view with
    'browseVignettes()'. To cite Bioconductor, see
    'citation("Biobase")', and for packages 'citation("pkgname")'.

Loading required package: biomaRt
> png(filename="/home/ddbj/snapshot/RGM3/R_BC/result/customProDB/Outputaberrant.Rd_%03d_medium.png", width=480, height=480)
> ### Name: Outputaberrant
> ### Title: generate FASTA file containing short INDEL
> ### Aliases: Outputaberrant
> 
> ### ** Examples
> 
> vcffile <- system.file("extdata/vcfs", "test1.vcf", package="customProDB")
> vcf <- InputVcf(vcffile)
> table(values(vcf[[1]])[['INDEL']])

FALSE  TRUE 
   54     7 
> index <- which(values(vcf[[1]])[['INDEL']] == TRUE)
> indelvcf <- vcf[[1]][index]
> 
> load(system.file("extdata/refseq", "exon_anno.RData", package="customProDB"))
> load(system.file("extdata/refseq", "dbsnpinCoding.RData",
+         package="customProDB"))
> load(system.file("extdata/refseq", "procodingseq.RData",
+         package="customProDB"))
> load(system.file("extdata/refseq", "proseq.RData", package="customProDB"))
> load(system.file("extdata/refseq", "ids.RData", package="customProDB"))
> postable_indel <- Positionincoding(indelvcf, exon)
> txlist_indel <- unique(postable_indel[, 'txid'])
> codingseq_indel <- procodingseq[procodingseq[, 'tx_id'] %in% txlist_indel, ]
> outfile <-  paste(tempdir(), '/test_indel.fasta', sep='')
> Outputaberrant(postable_indel, coding=codingseq_indel,
+ proteinseq=proteinseq, outfile=outfile, ids=ids)
Warning messages:
1: In .Call2("DNAStringSet_translate", x, skip_code, dna_codes[codon_alphabet],  :
  in 'x[[1]]': last 2 bases were ignored
2: In .Call2("DNAStringSet_translate", x, skip_code, dna_codes[codon_alphabet],  :
  in 'x[[2]]': last 2 bases were ignored
> 
> 
> 
> 
> 
> dev.off()
null device 
          1 
>