Last data update: 2014.03.03

R: Unmasked Regions of Human Genomes
unmasked-datasetsR Documentation

Unmasked Regions of Human Genomes

Description

Pre-built GRangesList objects with unmasked regions of different human genome builds

Usage

hg18Unmasked
hg19Unmasked
hg38Unmasked
b36Unmasked
b37Unmasked

Format

Each of these is a GRangesList object with unmasked regions of different human genome builds, as provided by the packages BSgenome.Hsapiens.UCSC.hg18.masked, BSgenome.Hsapiens.UCSC.hg19.masked, and BSgenome.Hsapiens.UCSC.hg38.masked. The two latter, b36Unmasked and b37Unmasked, are variants using chromosome names as the genomes b36 and b37 that are frequently used by the Genome Analysis Toolkit (GATK).

All four data sets comprise all 22 autosomal chromosomes, the two sex chromosomes, mitochondrial DNA, and the six pseudoautosomal regions as defined in the data frames pseudoautosomal.hg18 (for hg18), pseudoautosomal.hg19 (for hg19), and pseudoautosomal.hg38 (for hg38) as provided by the GWASTools package. If this is undesired, the user can re-unite the pseudoautosomal regions with their chromosomes as shown in the example section below or run unmaskedRegions him- or herself to extract unmasked regions.

Author(s)

Ulrich Bodenhofer bodenhofer@bioinf.jku.at

References

http://www.bioinf.jku.at/software/podkat

See Also

unmaskedRegions, GRangesList, GRanges, pseudoautosomal

Examples

## load data sets
data(hg19Unmasked)
data(b37Unmasked)

## show chromosome names
seqlevels(hg19Unmasked)
seqlevels(b37Unmasked)

## show names of list components
names(hg19Unmasked)
names(b37Unmasked)

## determine numbers of regions for each component
sapply(hg19Unmasked, length)
sapply(b37Unmasked, length)

## re-unite pseudoautosomal regions with their chromosomes
## (that is the result of what unmaskedRegions() would have given
## without specifying pseudoautosomal regions)
hg19basic <- hg19Unmasked[paste0("chr", 1:22)]
hg19basic$chrX <- reduce(unlist(hg19Unmasked[c("chrX", "X.PAR1",
                                               "X.PAR2", "X.XTR")]))
hg19basic$chrY <- reduce(unlist(hg19Unmasked[c("chrY", "Y.PAR1",
                                               "Y.PAR2", "Y.XTR")]))

## show some information about the newly created object
names(hg19basic)
sapply(hg19basic, length)

Results


R version 3.3.1 (2016-06-21) -- "Bug in Your Hair"
Copyright (C) 2016 The R Foundation for Statistical Computing
Platform: x86_64-pc-linux-gnu (64-bit)

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.

> library(podkat)
Loading required package: Rsamtools
Loading required package: GenomeInfoDb
Loading required package: stats4
Loading required package: BiocGenerics
Loading required package: parallel

Attaching package: 'BiocGenerics'

The following objects are masked from 'package:parallel':

    clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
    clusterExport, clusterMap, parApply, parCapply, parLapply,
    parLapplyLB, parRapply, parSapply, parSapplyLB

The following objects are masked from 'package:stats':

    IQR, mad, xtabs

The following objects are masked from 'package:base':

    Filter, Find, Map, Position, Reduce, anyDuplicated, append,
    as.data.frame, cbind, colnames, do.call, duplicated, eval, evalq,
    get, grep, grepl, intersect, is.unsorted, lapply, lengths, mapply,
    match, mget, order, paste, pmax, pmax.int, pmin, pmin.int, rank,
    rbind, rownames, sapply, setdiff, sort, table, tapply, union,
    unique, unsplit

Loading required package: S4Vectors

Attaching package: 'S4Vectors'

The following objects are masked from 'package:base':

    colMeans, colSums, expand.grid, rowMeans, rowSums

Loading required package: IRanges
Loading required package: GenomicRanges
Loading required package: Biostrings
Loading required package: XVector
> png(filename="/home/ddbj/snapshot/RGM3/R_BC/result/podkat/unmasked-datasets.Rd_%03d_medium.png", width=480, height=480)
> ### Name: unmasked-datasets
> ### Title: Unmasked Regions of Human Genomes
> ### Aliases: unmasked-datasets hg18Unmasked hg19Unmasked hg38Unmasked
> ###   b36Unmasked b37Unmasked
> ### Keywords: datasets
> 
> ### ** Examples
> 
> ## load data sets
> data(hg19Unmasked)
> data(b37Unmasked)
> 
> ## show chromosome names
> seqlevels(hg19Unmasked)
 [1] "chr1"  "chr2"  "chr3"  "chr4"  "chr5"  "chr6"  "chr7"  "chr8"  "chr9" 
[10] "chr10" "chr11" "chr12" "chr13" "chr14" "chr15" "chr16" "chr17" "chr18"
[19] "chr19" "chr20" "chr21" "chr22" "chrX"  "chrY"  "chrM" 
> seqlevels(b37Unmasked)
 [1] "1"  "2"  "3"  "4"  "5"  "6"  "7"  "8"  "9"  "10" "11" "12" "13" "14" "15"
[16] "16" "17" "18" "19" "20" "21" "22" "X"  "Y"  "MT"
> 
> ## show names of list components
> names(hg19Unmasked)
 [1] "chr1"   "chr2"   "chr3"   "chr4"   "chr5"   "chr6"   "chr7"   "chr8"  
 [9] "chr9"   "chr10"  "chr11"  "chr12"  "chr13"  "chr14"  "chr15"  "chr16" 
[17] "chr17"  "chr18"  "chr19"  "chr20"  "chr21"  "chr22"  "chrX"   "chrY"  
[25] "chrM"   "X.PAR1" "X.PAR2" "X.XTR"  "Y.PAR1" "Y.PAR2" "Y.XTR" 
> names(b37Unmasked)
 [1] "1"      "2"      "3"      "4"      "5"      "6"      "7"      "8"     
 [9] "9"      "10"     "11"     "12"     "13"     "14"     "15"     "16"    
[17] "17"     "18"     "19"     "20"     "21"     "22"     "X"      "Y"     
[25] "M"      "X.PAR1" "X.PAR2" "X.XTR"  "Y.PAR1" "Y.PAR2" "Y.XTR" 
> 
> ## determine numbers of regions for each component
> sapply(hg19Unmasked, length)
  chr1   chr2   chr3   chr4   chr5   chr6   chr7   chr8   chr9  chr10  chr11 
    38     19      4     11      6      9     16      8     39     16      8 
 chr12  chr13  chr14  chr15  chr16  chr17  chr18  chr19  chr20  chr21  chr22 
     7      5      1      9      5      7      5      5      6     11      4 
  chrX   chrY   chrM X.PAR1 X.PAR2  X.XTR Y.PAR1 Y.PAR2  Y.XTR 
    17     12      1      7      1      1      7      1      1 
> sapply(b37Unmasked, length)
     1      2      3      4      5      6      7      8      9     10     11 
    38     19      4     11      6      9     16      8     39     16      8 
    12     13     14     15     16     17     18     19     20     21     22 
     7      5      1      9      5      7      5      5      6     11      4 
     X      Y      M X.PAR1 X.PAR2  X.XTR Y.PAR1 Y.PAR2  Y.XTR 
    17     12      1      7      1      1      7      1      1 
> 
> ## re-unite pseudoautosomal regions with their chromosomes
> ## (that is the result of what unmaskedRegions() would have given
> ## without specifying pseudoautosomal regions)
> hg19basic <- hg19Unmasked[paste0("chr", 1:22)]
> hg19basic$chrX <- reduce(unlist(hg19Unmasked[c("chrX", "X.PAR1",
+                                                "X.PAR2", "X.XTR")]))
> hg19basic$chrY <- reduce(unlist(hg19Unmasked[c("chrY", "Y.PAR1",
+                                                "Y.PAR2", "Y.XTR")]))
> 
> ## show some information about the newly created object
> names(hg19basic)
 [1] "chr1"  "chr2"  "chr3"  "chr4"  "chr5"  "chr6"  "chr7"  "chr8"  "chr9" 
[10] "chr10" "chr11" "chr12" "chr13" "chr14" "chr15" "chr16" "chr17" "chr18"
[19] "chr19" "chr20" "chr21" "chr22" "chrX"  "chrY" 
> sapply(hg19basic, length)
 chr1  chr2  chr3  chr4  chr5  chr6  chr7  chr8  chr9 chr10 chr11 chr12 chr13 
   38    19     4    11     6     9    16     8    39    16     8     7     5 
chr14 chr15 chr16 chr17 chr18 chr19 chr20 chr21 chr22  chrX  chrY 
    1     9     5     7     5     5     6    11     4    22    17 
> 
> 
> 
> 
> 
> dev.off()
null device 
          1 
>