Each of these is a GRangesList object with
unmasked regions of different human genome builds, as provided by the packages
BSgenome.Hsapiens.UCSC.hg18.masked,
BSgenome.Hsapiens.UCSC.hg19.masked, and
BSgenome.Hsapiens.UCSC.hg38.masked.
The two latter,
b36Unmasked and b37Unmasked, are variants using
chromosome names as the genomes b36 and b37 that are frequently used
by the Genome Analysis Toolkit (GATK).
All four data sets comprise all 22 autosomal chromosomes, the two sex
chromosomes, mitochondrial DNA, and the six pseudoautosomal regions as
defined in the data frames
pseudoautosomal.hg18 (for
hg18), pseudoautosomal.hg19
(for hg19), and pseudoautosomal.hg38
(for hg38) as provided by the GWASTools package. If this is
undesired, the user can re-unite the pseudoautosomal regions with
their chromosomes as shown in the example section below or run
unmaskedRegions him- or herself to extract unmasked regions.
## load data sets
data(hg19Unmasked)
data(b37Unmasked)
## show chromosome names
seqlevels(hg19Unmasked)
seqlevels(b37Unmasked)
## show names of list components
names(hg19Unmasked)
names(b37Unmasked)
## determine numbers of regions for each component
sapply(hg19Unmasked, length)
sapply(b37Unmasked, length)
## re-unite pseudoautosomal regions with their chromosomes
## (that is the result of what unmaskedRegions() would have given
## without specifying pseudoautosomal regions)
hg19basic <- hg19Unmasked[paste0("chr", 1:22)]
hg19basic$chrX <- reduce(unlist(hg19Unmasked[c("chrX", "X.PAR1",
"X.PAR2", "X.XTR")]))
hg19basic$chrY <- reduce(unlist(hg19Unmasked[c("chrY", "Y.PAR1",
"Y.PAR2", "Y.XTR")]))
## show some information about the newly created object
names(hg19basic)
sapply(hg19basic, length)
Results
R version 3.3.1 (2016-06-21) -- "Bug in Your Hair"
Copyright (C) 2016 The R Foundation for Statistical Computing
Platform: x86_64-pc-linux-gnu (64-bit)
R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.
R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.
Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.
> library(podkat)
Loading required package: Rsamtools
Loading required package: GenomeInfoDb
Loading required package: stats4
Loading required package: BiocGenerics
Loading required package: parallel
Attaching package: 'BiocGenerics'
The following objects are masked from 'package:parallel':
clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
clusterExport, clusterMap, parApply, parCapply, parLapply,
parLapplyLB, parRapply, parSapply, parSapplyLB
The following objects are masked from 'package:stats':
IQR, mad, xtabs
The following objects are masked from 'package:base':
Filter, Find, Map, Position, Reduce, anyDuplicated, append,
as.data.frame, cbind, colnames, do.call, duplicated, eval, evalq,
get, grep, grepl, intersect, is.unsorted, lapply, lengths, mapply,
match, mget, order, paste, pmax, pmax.int, pmin, pmin.int, rank,
rbind, rownames, sapply, setdiff, sort, table, tapply, union,
unique, unsplit
Loading required package: S4Vectors
Attaching package: 'S4Vectors'
The following objects are masked from 'package:base':
colMeans, colSums, expand.grid, rowMeans, rowSums
Loading required package: IRanges
Loading required package: GenomicRanges
Loading required package: Biostrings
Loading required package: XVector
> png(filename="/home/ddbj/snapshot/RGM3/R_BC/result/podkat/unmasked-datasets.Rd_%03d_medium.png", width=480, height=480)
> ### Name: unmasked-datasets
> ### Title: Unmasked Regions of Human Genomes
> ### Aliases: unmasked-datasets hg18Unmasked hg19Unmasked hg38Unmasked
> ### b36Unmasked b37Unmasked
> ### Keywords: datasets
>
> ### ** Examples
>
> ## load data sets
> data(hg19Unmasked)
> data(b37Unmasked)
>
> ## show chromosome names
> seqlevels(hg19Unmasked)
[1] "chr1" "chr2" "chr3" "chr4" "chr5" "chr6" "chr7" "chr8" "chr9"
[10] "chr10" "chr11" "chr12" "chr13" "chr14" "chr15" "chr16" "chr17" "chr18"
[19] "chr19" "chr20" "chr21" "chr22" "chrX" "chrY" "chrM"
> seqlevels(b37Unmasked)
[1] "1" "2" "3" "4" "5" "6" "7" "8" "9" "10" "11" "12" "13" "14" "15"
[16] "16" "17" "18" "19" "20" "21" "22" "X" "Y" "MT"
>
> ## show names of list components
> names(hg19Unmasked)
[1] "chr1" "chr2" "chr3" "chr4" "chr5" "chr6" "chr7" "chr8"
[9] "chr9" "chr10" "chr11" "chr12" "chr13" "chr14" "chr15" "chr16"
[17] "chr17" "chr18" "chr19" "chr20" "chr21" "chr22" "chrX" "chrY"
[25] "chrM" "X.PAR1" "X.PAR2" "X.XTR" "Y.PAR1" "Y.PAR2" "Y.XTR"
> names(b37Unmasked)
[1] "1" "2" "3" "4" "5" "6" "7" "8"
[9] "9" "10" "11" "12" "13" "14" "15" "16"
[17] "17" "18" "19" "20" "21" "22" "X" "Y"
[25] "M" "X.PAR1" "X.PAR2" "X.XTR" "Y.PAR1" "Y.PAR2" "Y.XTR"
>
> ## determine numbers of regions for each component
> sapply(hg19Unmasked, length)
chr1 chr2 chr3 chr4 chr5 chr6 chr7 chr8 chr9 chr10 chr11
38 19 4 11 6 9 16 8 39 16 8
chr12 chr13 chr14 chr15 chr16 chr17 chr18 chr19 chr20 chr21 chr22
7 5 1 9 5 7 5 5 6 11 4
chrX chrY chrM X.PAR1 X.PAR2 X.XTR Y.PAR1 Y.PAR2 Y.XTR
17 12 1 7 1 1 7 1 1
> sapply(b37Unmasked, length)
1 2 3 4 5 6 7 8 9 10 11
38 19 4 11 6 9 16 8 39 16 8
12 13 14 15 16 17 18 19 20 21 22
7 5 1 9 5 7 5 5 6 11 4
X Y M X.PAR1 X.PAR2 X.XTR Y.PAR1 Y.PAR2 Y.XTR
17 12 1 7 1 1 7 1 1
>
> ## re-unite pseudoautosomal regions with their chromosomes
> ## (that is the result of what unmaskedRegions() would have given
> ## without specifying pseudoautosomal regions)
> hg19basic <- hg19Unmasked[paste0("chr", 1:22)]
> hg19basic$chrX <- reduce(unlist(hg19Unmasked[c("chrX", "X.PAR1",
+ "X.PAR2", "X.XTR")]))
> hg19basic$chrY <- reduce(unlist(hg19Unmasked[c("chrY", "Y.PAR1",
+ "Y.PAR2", "Y.XTR")]))
>
> ## show some information about the newly created object
> names(hg19basic)
[1] "chr1" "chr2" "chr3" "chr4" "chr5" "chr6" "chr7" "chr8" "chr9"
[10] "chr10" "chr11" "chr12" "chr13" "chr14" "chr15" "chr16" "chr17" "chr18"
[19] "chr19" "chr20" "chr21" "chr22" "chrX" "chrY"
> sapply(hg19basic, length)
chr1 chr2 chr3 chr4 chr5 chr6 chr7 chr8 chr9 chr10 chr11 chr12 chr13
38 19 4 11 6 9 16 8 39 16 8 7 5
chr14 chr15 chr16 chr17 chr18 chr19 chr20 chr21 chr22 chrX chrY
1 9 5 7 5 5 6 11 4 22 17
>
>
>
>
>
> dev.off()
null device
1
>