R: Estimate ancestral proportions from the eigen-analysis
snpgdsAdmixProp
R Documentation
Estimate ancestral proportions from the eigen-analysis
Description
Estimate ancestral (admixture) proportions based on the eigen-analysis.
Usage
snpgdsAdmixProp(eigobj, groups, bound=FALSE)
Arguments
eigobj
an object of snpgdsEigMixClass from
snpgdsEIGMIX, or an object of snpgdsPCAClass
from snpgdsPCA
groups
a list of sample IDs, such like groups = list(
CEU = c("NA0101", "NA1022", ...), YRI = c("NAxxxx", ...),
Asia = c("NA1234", ...))
bound
if TRUE, the estimates are bounded so that no
component < 0 or > 1, and the sum of proportions is one
Details
The minor allele frequency and missing rate for each SNP passed in
snp.id are calculated over all the samples in sample.id.
Value
Return a snpgdsEigMixClass object, and it is a list:
sample.id
the sample ids used in the analysis
snp.id
the SNP ids used in the analysis
eigenval
eigenvalues
eigenvect
eigenvactors, "# of samples" x "eigen.cnt"
ibdmat
the IBD matrix
Author(s)
Xiuwen Zheng
References
Zheng X, Weir BS.
Eigenanalysis on SNP Data with an Interpretation of Identity by Descent.
Theoretical Population Biology. 2015 Oct 23. pii: S0040-5809(15)00089-1.
doi: 10.1016/j.tpb.2015.09.004. [Epub ahead of print]
See Also
snpgdsEIGMIX, snpgdsPCA
Examples
# open an example dataset (HapMap)
genofile <- snpgdsOpen(snpgdsExampleFileName())
# get population information
# or pop_code <- scan("pop.txt", what=character())
# if it is stored in a text file "pop.txt"
pop_code <- read.gdsn(index.gdsn(genofile, "sample.annot/pop.group"))
# get sample id
samp.id <- read.gdsn(index.gdsn(genofile, "sample.id"))
# run eigen-analysis
RV <- snpgdsEIGMIX(genofile)
# eigenvalues
RV$eigenval
# make a data.frame
tab <- data.frame(sample.id = samp.id, pop = factor(pop_code),
EV1 = RV$eigenvect[,1], # the first eigenvector
EV2 = RV$eigenvect[,2], # the second eigenvector
stringsAsFactors = FALSE)
head(tab)
# draw
plot(tab$EV2, tab$EV1, col=as.integer(tab$pop),
xlab="eigenvector 2", ylab="eigenvector 1")
legend("topleft", legend=levels(tab$pop), pch="o", col=1:4)
# define groups
groups <- list(CEU = samp.id[pop_code == "CEU"],
YRI = samp.id[pop_code == "YRI"],
CHB = samp.id[is.element(pop_code, c("HCB", "JPT"))])
prop <- snpgdsAdmixProp(RV, groups=groups)
# draw
plot(prop[, "YRI"], prop[, "CEU"], col=as.integer(tab$pop),
xlab = "Admixture Proportion from YRI",
ylab = "Admixture Proportion from CEU")
abline(v=0, col="gray25", lty=2)
abline(h=0, col="gray25", lty=2)
abline(a=1, b=-1, col="gray25", lty=2)
legend("topright", legend=levels(tab$pop), pch="o", col=1:4)
# run eigen-analysis
RV <- snpgdsEIGMIX(genofile, sample.id=samp.id[pop_code=="JPT"])
z <- RV$ibdmat
mean(c(z))
mean(diag(z))
# close the genotype file
snpgdsClose(genofile)
Results
R version 3.3.1 (2016-06-21) -- "Bug in Your Hair"
Copyright (C) 2016 The R Foundation for Statistical Computing
Platform: x86_64-pc-linux-gnu (64-bit)
R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.
R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.
Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.
> library(SNPRelate)
Loading required package: gdsfmt
SNPRelate -- supported by Streaming SIMD Extensions 2 (SSE2)
> png(filename="/home/ddbj/snapshot/RGM3/R_BC/result/SNPRelate/snpgdsAdmixProp.Rd_%03d_medium.png", width=480, height=480)
> ### Name: snpgdsAdmixProp
> ### Title: Estimate ancestral proportions from the eigen-analysis
> ### Aliases: snpgdsAdmixProp
> ### Keywords: GDS GWAS
>
> ### ** Examples
>
> # open an example dataset (HapMap)
> genofile <- snpgdsOpen(snpgdsExampleFileName())
>
> # get population information
> # or pop_code <- scan("pop.txt", what=character())
> # if it is stored in a text file "pop.txt"
> pop_code <- read.gdsn(index.gdsn(genofile, "sample.annot/pop.group"))
>
> # get sample id
> samp.id <- read.gdsn(index.gdsn(genofile, "sample.id"))
>
> # run eigen-analysis
> RV <- snpgdsEIGMIX(genofile)
Eigen-analysis on SNP genotypes:
Excluding 365 SNPs on non-autosomes
Excluding 1 SNP (monomorphic: TRUE, < MAF: NaN, or > missing rate: NaN)
Working space: 279 samples, 8722 SNPs
using 1 (CPU) core
Eigen-analysis: the sum of all selected genotypes (0, 1 and 2) = 2446510
Eigen-analysis: Wed Jul 6 05:34:25 2016 0%
Eigen-analysis: Wed Jul 6 05:34:26 2016 100%
Eigen-analysis: Wed Jul 6 05:34:26 2016 Begin (eigenvalues and eigenvectors)
Eigen-analysis: Wed Jul 6 05:34:26 2016 End (eigenvalues and eigenvectors)
>
> # eigenvalues
> RV$eigenval
[1] 2.017305e+01 1.000461e+01 8.806819e-01 7.429476e-01 6.968208e-01
[6] 6.874427e-01 6.525443e-01 6.054765e-01 5.295101e-01 5.076562e-01
[11] 4.967220e-01 4.906583e-01 -4.830992e-01 4.781425e-01 -4.775912e-01
[16] 4.763934e-01 -4.744488e-01 -4.617784e-01 4.612989e-01 4.592947e-01
[21] -4.547933e-01 -4.491199e-01 4.474031e-01 -4.423255e-01 4.418133e-01
[26] 4.385189e-01 4.334209e-01 4.283672e-01 4.226959e-01 -4.197632e-01
[31] 4.194047e-01 -4.174362e-01 4.108246e-01 4.002398e-01 -4.002322e-01
[36] -3.996196e-01 3.969773e-01 3.919273e-01 3.856560e-01 3.823628e-01
[41] 3.789550e-01 3.786969e-01 3.748523e-01 3.693451e-01 3.613405e-01
[46] -3.605896e-01 -3.597417e-01 3.570231e-01 -3.557642e-01 -3.546755e-01
[51] 3.538600e-01 -3.536642e-01 -3.521673e-01 -3.521059e-01 -3.506419e-01
[56] -3.501412e-01 -3.494344e-01 3.491916e-01 -3.476336e-01 -3.468939e-01
[61] 3.463630e-01 -3.459487e-01 -3.451560e-01 -3.436474e-01 -3.425484e-01
[66] -3.413802e-01 -3.405524e-01 -3.400964e-01 3.396521e-01 -3.395577e-01
[71] -3.380061e-01 -3.372883e-01 3.370050e-01 -3.366799e-01 -3.362678e-01
[76] -3.354331e-01 3.346632e-01 -3.344115e-01 -3.339245e-01 -3.331720e-01
[81] -3.323250e-01 3.313393e-01 -3.308113e-01 -3.302457e-01 -3.291947e-01
[86] -3.282409e-01 -3.272011e-01 -3.261149e-01 -3.253198e-01 3.246075e-01
[91] -3.236667e-01 -3.231019e-01 -3.221063e-01 -3.215450e-01 -3.206107e-01
[96] 3.196880e-01 -3.186403e-01 -3.185642e-01 -3.175814e-01 -3.165515e-01
[101] -3.156931e-01 -3.150634e-01 3.149148e-01 -3.137639e-01 -3.134268e-01
[106] -3.121927e-01 3.100852e-01 -3.098287e-01 -3.086269e-01 -3.073546e-01
[111] 3.069019e-01 -3.063779e-01 -3.049949e-01 -3.025869e-01 3.006065e-01
[116] -3.000676e-01 -2.976370e-01 2.968544e-01 2.953681e-01 -2.937433e-01
[121] 2.895660e-01 2.848741e-01 2.828685e-01 2.749171e-01 2.712420e-01
[126] 2.610499e-01 2.575881e-01 2.533426e-01 2.516826e-01 2.448792e-01
[131] 2.376573e-01 -2.296114e-01 2.262690e-01 2.213780e-01 2.174558e-01
[136] 1.891750e-01 1.718771e-01 1.687518e-01 -1.587265e-01 1.481277e-01
[141] 1.441447e-01 1.358029e-01 1.347552e-01 1.246670e-01 -1.222215e-01
[146] 1.214723e-01 -1.204209e-01 1.176114e-01 -1.161671e-01 1.144757e-01
[151] -1.130370e-01 -1.117666e-01 -1.083331e-01 1.082669e-01 -1.066236e-01
[156] 1.064987e-01 1.054807e-01 -1.048083e-01 -1.028137e-01 -1.016878e-01
[161] -1.001440e-01 9.823154e-02 -9.782904e-02 9.598864e-02 -9.403538e-02
[166] 9.400229e-02 -9.251277e-02 9.213832e-02 -9.057700e-02 -8.891962e-02
[171] -8.717063e-02 8.708374e-02 -8.575643e-02 -8.446614e-02 -8.359204e-02
[176] 8.339958e-02 -8.256884e-02 -8.150659e-02 8.092553e-02 7.994854e-02
[181] -7.982835e-02 7.859337e-02 7.674796e-02 -7.563713e-02 -7.549151e-02
[186] 7.493029e-02 -7.397753e-02 7.350292e-02 -7.108372e-02 -7.049592e-02
[191] 6.998468e-02 -6.934603e-02 -6.834589e-02 6.794749e-02 -6.711958e-02
[196] 6.669015e-02 -6.645167e-02 -6.409361e-02 -6.348831e-02 6.340662e-02
[201] 6.170368e-02 -6.141468e-02 -5.987693e-02 5.942937e-02 -5.816994e-02
[206] 5.642555e-02 -5.571108e-02 -5.530567e-02 5.489383e-02 5.362471e-02
[211] -5.312767e-02 -5.282764e-02 5.177067e-02 -5.119307e-02 4.973407e-02
[216] -4.906657e-02 4.850493e-02 -4.824112e-02 -4.719527e-02 -4.676985e-02
[221] 4.589082e-02 -4.450242e-02 4.366607e-02 -4.276357e-02 4.195426e-02
[226] -4.133996e-02 4.108224e-02 3.964960e-02 -3.881050e-02 -3.785205e-02
[231] 3.779321e-02 -3.669045e-02 3.646436e-02 -3.578796e-02 3.560953e-02
[236] -3.374997e-02 -3.344734e-02 3.343481e-02 3.188181e-02 -3.045846e-02
[241] 3.028727e-02 -2.938766e-02 2.809168e-02 2.793242e-02 -2.734460e-02
[246] -2.627365e-02 2.531118e-02 -2.407794e-02 2.391718e-02 2.220627e-02
[251] -2.210489e-02 -2.156214e-02 -2.111477e-02 2.006498e-02 -1.951349e-02
[256] 1.868707e-02 -1.794299e-02 1.682982e-02 -1.665946e-02 1.524378e-02
[261] -1.436045e-02 1.325848e-02 -1.307953e-02 1.200215e-02 -1.196959e-02
[266] -1.138463e-02 9.757884e-03 -9.367784e-03 -8.970240e-03 7.548373e-03
[271] 6.673750e-03 6.093169e-03 -5.547774e-03 3.174412e-03 -2.753696e-03
[276] -2.079812e-03 1.665100e-03 -1.591070e-03 -8.314919e-05
>
> # make a data.frame
> tab <- data.frame(sample.id = samp.id, pop = factor(pop_code),
+ EV1 = RV$eigenvect[,1], # the first eigenvector
+ EV2 = RV$eigenvect[,2], # the second eigenvector
+ stringsAsFactors = FALSE)
> head(tab)
sample.id pop EV1 EV2
1 NA19152 YRI 0.08134977 0.01800612
2 NA19139 YRI 0.08332236 0.01552648
3 NA18912 YRI 0.07999713 0.01544285
4 NA19160 YRI 0.08619874 0.02043080
5 NA07034 CEU -0.02638517 -0.07853139
6 NA07055 CEU -0.02730423 -0.08396931
>
> # draw
> plot(tab$EV2, tab$EV1, col=as.integer(tab$pop),
+ xlab="eigenvector 2", ylab="eigenvector 1")
> legend("topleft", legend=levels(tab$pop), pch="o", col=1:4)
>
>
> # define groups
> groups <- list(CEU = samp.id[pop_code == "CEU"],
+ YRI = samp.id[pop_code == "YRI"],
+ CHB = samp.id[is.element(pop_code, c("HCB", "JPT"))])
>
> prop <- snpgdsAdmixProp(RV, groups=groups)
>
> # draw
> plot(prop[, "YRI"], prop[, "CEU"], col=as.integer(tab$pop),
+ xlab = "Admixture Proportion from YRI",
+ ylab = "Admixture Proportion from CEU")
> abline(v=0, col="gray25", lty=2)
> abline(h=0, col="gray25", lty=2)
> abline(a=1, b=-1, col="gray25", lty=2)
> legend("topright", legend=levels(tab$pop), pch="o", col=1:4)
>
>
>
> # run eigen-analysis
> RV <- snpgdsEIGMIX(genofile, sample.id=samp.id[pop_code=="JPT"])
Eigen-analysis on SNP genotypes:
Excluding 365 SNPs on non-autosomes
Excluding 1985 SNPs (monomorphic: TRUE, < MAF: NaN, or > missing rate: NaN)
Working space: 47 samples, 6738 SNPs
using 1 (CPU) core
Eigen-analysis: the sum of all selected genotypes (0, 1 and 2) = 317025
Eigen-analysis: Wed Jul 6 05:34:29 2016 0%
Eigen-analysis: Wed Jul 6 05:34:29 2016 100%
Eigen-analysis: Wed Jul 6 05:34:29 2016 Begin (eigenvalues and eigenvectors)
Eigen-analysis: Wed Jul 6 05:34:29 2016 End (eigenvalues and eigenvectors)
> z <- RV$ibdmat
>
> mean(c(z))
[1] NA
Warning message:
In mean.default(c(z)) : argument is not numeric or logical: returning NA
> mean(diag(z))
[1] NaN
>
>
> # close the genotype file
> snpgdsClose(genofile)
>
>
>
>
>
> dev.off()
null device
1
>