# open an example dataset (HapMap)
genofile <- snpgdsOpen(snpgdsExampleFileName())
pop.group <- as.factor(read.gdsn(index.gdsn(
genofile, "sample.annot/pop.group")))
pop.level <- levels(pop.group)
diss <- snpgdsDiss(genofile)
hc <- snpgdsHCluster(diss)
# close the genotype file
snpgdsClose(genofile)
###################################################################
# cluster individuals
#
set.seed(100)
rv <- snpgdsCutTree(hc, label.H=TRUE, label.Z=TRUE)
# the distribution of Z scores
snpgdsDrawTree(rv, type="z-score", main="HapMap Phase II")
# draw dendrogram
snpgdsDrawTree(rv, main="HapMap Phase II",
edgePar=list(col=rgb(0.5,0.5,0.5, 0.75), t.col="black"))
###################################################################
# or cluster individuals by ethnic information
#
rv2 <- snpgdsCutTree(hc, samp.group=pop.group)
# cluster individuals by Z score, specifying 'clust.count'
snpgdsDrawTree(rv2, rv$clust.count, main="HapMap Phase II",
edgePar = list(col=rgb(0.5,0.5,0.5, 0.75), t.col="black"),
labels = c("YRI", "CHB/JPT", "CEU"), y.label=0.1)
legend("bottomleft", legend=levels(pop.group), col=1:nlevels(pop.group),
pch=19, ncol=4, bg="white")
###################################################################
# zoom in ...
#
snpgdsDrawTree(rv2, rv$clust.count, dend.idx = c(1),
main="HapMap Phase II -- YRI",
edgePar=list(col=rgb(0.5,0.5,0.5, 0.75), t.col="black"),
y.label.kinship=TRUE)
snpgdsDrawTree(rv2, rv$clust.count, dend.idx = c(2,2),
main="HapMap Phase II -- CEU",
edgePar=list(col=rgb(0.5,0.5,0.5, 0.75), t.col="black"),
y.label.kinship=TRUE)
snpgdsDrawTree(rv2, rv$clust.count, dend.idx = c(2,1),
main="HapMap Phase II -- CHB/JPT",
edgePar=list(col=rgb(0.5,0.5,0.5, 0.75), t.col="black"),
y.label.kinship=TRUE)
Results
R version 3.3.1 (2016-06-21) -- "Bug in Your Hair"
Copyright (C) 2016 The R Foundation for Statistical Computing
Platform: x86_64-pc-linux-gnu (64-bit)
R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.
R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.
Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.
> library(SNPRelate)
Loading required package: gdsfmt
SNPRelate -- supported by Streaming SIMD Extensions 2 (SSE2)
> png(filename="/home/ddbj/snapshot/RGM3/R_BC/result/SNPRelate/snpgdsCutTree.Rd_%03d_medium.png", width=480, height=480)
> ### Name: snpgdsCutTree
> ### Title: Determine clusters of individuals
> ### Aliases: snpgdsCutTree
> ### Keywords: GDS GWAS
>
> ### ** Examples
>
> # open an example dataset (HapMap)
> genofile <- snpgdsOpen(snpgdsExampleFileName())
>
> pop.group <- as.factor(read.gdsn(index.gdsn(
+ genofile, "sample.annot/pop.group")))
> pop.level <- levels(pop.group)
>
> diss <- snpgdsDiss(genofile)
Individual dissimilarity analysis on SNP genotypes:
Excluding 365 SNPs on non-autosomes
Excluding 1 SNP (monomorphic: TRUE, < MAF: NaN, or > missing rate: NaN)
Working space: 279 samples, 8722 SNPs
using 1 (CPU) core
Dissimilarity: the sum of all selected genotypes (0, 1 and 2) = 2446510
Dissimilarity: Wed Jul 6 05:34:32 2016 0%
Dissimilarity: Wed Jul 6 05:34:33 2016 100%
> hc <- snpgdsHCluster(diss)
>
> # close the genotype file
> snpgdsClose(genofile)
>
>
>
> ###################################################################
> # cluster individuals
> #
>
> set.seed(100)
> rv <- snpgdsCutTree(hc, label.H=TRUE, label.Z=TRUE)
Determine groups by permutation (Z threshold: 15, outlier threshold: 5):
Create 3 groups.
>
> # the distribution of Z scores
> snpgdsDrawTree(rv, type="z-score", main="HapMap Phase II")
>
> # draw dendrogram
> snpgdsDrawTree(rv, main="HapMap Phase II",
+ edgePar=list(col=rgb(0.5,0.5,0.5, 0.75), t.col="black"))
>
>
> ###################################################################
> # or cluster individuals by ethnic information
> #
>
> rv2 <- snpgdsCutTree(hc, samp.group=pop.group)
Create 4 groups.
>
> # cluster individuals by Z score, specifying 'clust.count'
> snpgdsDrawTree(rv2, rv$clust.count, main="HapMap Phase II",
+ edgePar = list(col=rgb(0.5,0.5,0.5, 0.75), t.col="black"),
+ labels = c("YRI", "CHB/JPT", "CEU"), y.label=0.1)
> legend("bottomleft", legend=levels(pop.group), col=1:nlevels(pop.group),
+ pch=19, ncol=4, bg="white")
>
>
>
> ###################################################################
> # zoom in ...
> #
>
> snpgdsDrawTree(rv2, rv$clust.count, dend.idx = c(1),
+ main="HapMap Phase II -- YRI",
+ edgePar=list(col=rgb(0.5,0.5,0.5, 0.75), t.col="black"),
+ y.label.kinship=TRUE)
>
> snpgdsDrawTree(rv2, rv$clust.count, dend.idx = c(2,2),
+ main="HapMap Phase II -- CEU",
+ edgePar=list(col=rgb(0.5,0.5,0.5, 0.75), t.col="black"),
+ y.label.kinship=TRUE)
>
> snpgdsDrawTree(rv2, rv$clust.count, dend.idx = c(2,1),
+ main="HapMap Phase II -- CHB/JPT",
+ edgePar=list(col=rgb(0.5,0.5,0.5, 0.75), t.col="black"),
+ y.label.kinship=TRUE)
>
>
>
>
>
> dev.off()
null device
1
>