Last data update: 2014.03.03

R: Eigen-analysis on SNP genotype data
snpgdsEIGMIXR Documentation

Eigen-analysis on SNP genotype data

Description

Eigen-analysis on IBD matrix based SNP genotypes.

Usage

snpgdsEIGMIX(gdsobj, sample.id=NULL, snp.id=NULL, autosome.only=TRUE,
    remove.monosnp=TRUE, maf=NaN, missing.rate=NaN, num.thread=1L,
    eigen.cnt=32L, need.ibdmat=FALSE, ibdmat.only=FALSE, verbose=TRUE)

Arguments

gdsobj

an object of class SNPGDSFileClass, a SNP GDS file

sample.id

a vector of sample id specifying selected samples; if NULL, all samples are used

snp.id

a vector of snp id specifying selected SNPs; if NULL, all SNPs are used

autosome.only

if TRUE, use autosomal SNPs only; if it is a numeric or character value, keep SNPs according to the specified chromosome

remove.monosnp

if TRUE, remove monomorphic SNPs

maf

to use the SNPs with ">= maf" only; if NaN, no MAF threshold

missing.rate

to use the SNPs with "<= missing.rate" only; if NaN, no missing threshold

num.thread

the number of (CPU) cores used; if NA, detect the number of cores automatically

eigen.cnt

output the number of eigenvectors; if eigen.cnt <= 0, then return all eigenvectors

need.ibdmat

if TRUE, return the IBD matrix

ibdmat.only

return the IBD matrix only, do not compute the eigenvalues and eigenvectors

verbose

if TRUE, show information

Value

Return a snpgdsEigMixClass object, and it is a list:

sample.id

the sample ids used in the analysis

snp.id

the SNP ids used in the analysis

eigenval

eigenvalues

eigenvect

eigenvactors, "# of samples" x "eigen.cnt"

ibdmat

the IBD matrix

Author(s)

Xiuwen Zheng

References

Zheng X, Weir BS. Eigenanalysis on SNP Data with an Interpretation of Identity by Descent. Theoretical Population Biology. 2015 Oct 23. pii: S0040-5809(15)00089-1. doi: 10.1016/j.tpb.2015.09.004. [Epub ahead of print]

See Also

snpgdsEIGMIX, snpgdsPCA

Examples

# open an example dataset (HapMap)
genofile <- snpgdsOpen(snpgdsExampleFileName())

# get population information
#   or pop_code <- scan("pop.txt", what=character())
#   if it is stored in a text file "pop.txt"
pop_code <- read.gdsn(index.gdsn(genofile, "sample.annot/pop.group"))

# get sample id
samp.id <- read.gdsn(index.gdsn(genofile, "sample.id"))

# run eigen-analysis
RV <- snpgdsEIGMIX(genofile)

# eigenvalues
RV$eigenval

# make a data.frame
tab <- data.frame(sample.id = samp.id, pop = factor(pop_code),
    EV1 = RV$eigenvect[,1],    # the first eigenvector
    EV2 = RV$eigenvect[,2],    # the second eigenvector
    stringsAsFactors = FALSE)
head(tab)

# draw
plot(tab$EV2, tab$EV1, col=as.integer(tab$pop),
    xlab="eigenvector 2", ylab="eigenvector 1")
legend("topleft", legend=levels(tab$pop), pch="o", col=1:4)


# define groups
groups <- list(CEU = samp.id[pop_code == "CEU"],
    YRI = samp.id[pop_code == "YRI"],
    CHB = samp.id[is.element(pop_code, c("HCB", "JPT"))])

prop <- snpgdsAdmixProp(RV, groups=groups)

# draw
plot(prop[, "YRI"], prop[, "CEU"], col=as.integer(tab$pop),
    xlab = "Admixture Proportion from YRI",
    ylab = "Admixture Proportion from CEU")
abline(v=0, col="gray25", lty=2)
abline(h=0, col="gray25", lty=2)
abline(a=1, b=-1, col="gray25", lty=2)
legend("topright", legend=levels(tab$pop), pch="o", col=1:4)



# run eigen-analysis
RV <- snpgdsEIGMIX(genofile, sample.id=samp.id[pop_code=="JPT"],
    need.ibdmat=TRUE)
z <- RV$ibdmat

mean(c(z))
mean(diag(z))


# close the genotype file
snpgdsClose(genofile)

Results


R version 3.3.1 (2016-06-21) -- "Bug in Your Hair"
Copyright (C) 2016 The R Foundation for Statistical Computing
Platform: x86_64-pc-linux-gnu (64-bit)

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.

> library(SNPRelate)
Loading required package: gdsfmt
SNPRelate -- supported by Streaming SIMD Extensions 2 (SSE2)
> png(filename="/home/ddbj/snapshot/RGM3/R_BC/result/SNPRelate/snpgdsEIGMIX.Rd_%03d_medium.png", width=480, height=480)
> ### Name: snpgdsEIGMIX
> ### Title: Eigen-analysis on SNP genotype data
> ### Aliases: snpgdsEIGMIX
> ### Keywords: GDS GWAS
> 
> ### ** Examples
> 
> # open an example dataset (HapMap)
> genofile <- snpgdsOpen(snpgdsExampleFileName())
> 
> # get population information
> #   or pop_code <- scan("pop.txt", what=character())
> #   if it is stored in a text file "pop.txt"
> pop_code <- read.gdsn(index.gdsn(genofile, "sample.annot/pop.group"))
> 
> # get sample id
> samp.id <- read.gdsn(index.gdsn(genofile, "sample.id"))
> 
> # run eigen-analysis
> RV <- snpgdsEIGMIX(genofile)
Eigen-analysis on SNP genotypes:
Excluding 365 SNPs on non-autosomes
Excluding 1 SNP (monomorphic: TRUE, < MAF: NaN, or > missing rate: NaN)
Working space: 279 samples, 8722 SNPs
	using 1 (CPU) core
Eigen-analysis:	the sum of all selected genotypes (0, 1 and 2) = 2446510
Eigen-analysis:	Wed Jul  6 05:34:36 2016	0%
Eigen-analysis:	Wed Jul  6 05:34:37 2016	100%
Eigen-analysis:	Wed Jul  6 05:34:37 2016	Begin (eigenvalues and eigenvectors)
Eigen-analysis:	Wed Jul  6 05:34:37 2016	End (eigenvalues and eigenvectors)
> 
> # eigenvalues
> RV$eigenval
  [1]  2.017305e+01  1.000461e+01  8.806819e-01  7.429476e-01  6.968208e-01
  [6]  6.874427e-01  6.525443e-01  6.054765e-01  5.295101e-01  5.076562e-01
 [11]  4.967220e-01  4.906583e-01 -4.830992e-01  4.781425e-01 -4.775912e-01
 [16]  4.763934e-01 -4.744488e-01 -4.617784e-01  4.612989e-01  4.592947e-01
 [21] -4.547933e-01 -4.491199e-01  4.474031e-01 -4.423255e-01  4.418133e-01
 [26]  4.385189e-01  4.334209e-01  4.283672e-01  4.226959e-01 -4.197632e-01
 [31]  4.194047e-01 -4.174362e-01  4.108246e-01  4.002398e-01 -4.002322e-01
 [36] -3.996196e-01  3.969773e-01  3.919273e-01  3.856560e-01  3.823628e-01
 [41]  3.789550e-01  3.786969e-01  3.748523e-01  3.693451e-01  3.613405e-01
 [46] -3.605896e-01 -3.597417e-01  3.570231e-01 -3.557642e-01 -3.546755e-01
 [51]  3.538600e-01 -3.536642e-01 -3.521673e-01 -3.521059e-01 -3.506419e-01
 [56] -3.501412e-01 -3.494344e-01  3.491916e-01 -3.476336e-01 -3.468939e-01
 [61]  3.463630e-01 -3.459487e-01 -3.451560e-01 -3.436474e-01 -3.425484e-01
 [66] -3.413802e-01 -3.405524e-01 -3.400964e-01  3.396521e-01 -3.395577e-01
 [71] -3.380061e-01 -3.372883e-01  3.370050e-01 -3.366799e-01 -3.362678e-01
 [76] -3.354331e-01  3.346632e-01 -3.344115e-01 -3.339245e-01 -3.331720e-01
 [81] -3.323250e-01  3.313393e-01 -3.308113e-01 -3.302457e-01 -3.291947e-01
 [86] -3.282409e-01 -3.272011e-01 -3.261149e-01 -3.253198e-01  3.246075e-01
 [91] -3.236667e-01 -3.231019e-01 -3.221063e-01 -3.215450e-01 -3.206107e-01
 [96]  3.196880e-01 -3.186403e-01 -3.185642e-01 -3.175814e-01 -3.165515e-01
[101] -3.156931e-01 -3.150634e-01  3.149148e-01 -3.137639e-01 -3.134268e-01
[106] -3.121927e-01  3.100852e-01 -3.098287e-01 -3.086269e-01 -3.073546e-01
[111]  3.069019e-01 -3.063779e-01 -3.049949e-01 -3.025869e-01  3.006065e-01
[116] -3.000676e-01 -2.976370e-01  2.968544e-01  2.953681e-01 -2.937433e-01
[121]  2.895660e-01  2.848741e-01  2.828685e-01  2.749171e-01  2.712420e-01
[126]  2.610499e-01  2.575881e-01  2.533426e-01  2.516826e-01  2.448792e-01
[131]  2.376573e-01 -2.296114e-01  2.262690e-01  2.213780e-01  2.174558e-01
[136]  1.891750e-01  1.718771e-01  1.687518e-01 -1.587265e-01  1.481277e-01
[141]  1.441447e-01  1.358029e-01  1.347552e-01  1.246670e-01 -1.222215e-01
[146]  1.214723e-01 -1.204209e-01  1.176114e-01 -1.161671e-01  1.144757e-01
[151] -1.130370e-01 -1.117666e-01 -1.083331e-01  1.082669e-01 -1.066236e-01
[156]  1.064987e-01  1.054807e-01 -1.048083e-01 -1.028137e-01 -1.016878e-01
[161] -1.001440e-01  9.823154e-02 -9.782904e-02  9.598864e-02 -9.403538e-02
[166]  9.400229e-02 -9.251277e-02  9.213832e-02 -9.057700e-02 -8.891962e-02
[171] -8.717063e-02  8.708374e-02 -8.575643e-02 -8.446614e-02 -8.359204e-02
[176]  8.339958e-02 -8.256884e-02 -8.150659e-02  8.092553e-02  7.994854e-02
[181] -7.982835e-02  7.859337e-02  7.674796e-02 -7.563713e-02 -7.549151e-02
[186]  7.493029e-02 -7.397753e-02  7.350292e-02 -7.108372e-02 -7.049592e-02
[191]  6.998468e-02 -6.934603e-02 -6.834589e-02  6.794749e-02 -6.711958e-02
[196]  6.669015e-02 -6.645167e-02 -6.409361e-02 -6.348831e-02  6.340662e-02
[201]  6.170368e-02 -6.141468e-02 -5.987693e-02  5.942937e-02 -5.816994e-02
[206]  5.642555e-02 -5.571108e-02 -5.530567e-02  5.489383e-02  5.362471e-02
[211] -5.312767e-02 -5.282764e-02  5.177067e-02 -5.119307e-02  4.973407e-02
[216] -4.906657e-02  4.850493e-02 -4.824112e-02 -4.719527e-02 -4.676985e-02
[221]  4.589082e-02 -4.450242e-02  4.366607e-02 -4.276357e-02  4.195426e-02
[226] -4.133996e-02  4.108224e-02  3.964960e-02 -3.881050e-02 -3.785205e-02
[231]  3.779321e-02 -3.669045e-02  3.646436e-02 -3.578796e-02  3.560953e-02
[236] -3.374997e-02 -3.344734e-02  3.343481e-02  3.188181e-02 -3.045846e-02
[241]  3.028727e-02 -2.938766e-02  2.809168e-02  2.793242e-02 -2.734460e-02
[246] -2.627365e-02  2.531118e-02 -2.407794e-02  2.391718e-02  2.220627e-02
[251] -2.210489e-02 -2.156214e-02 -2.111477e-02  2.006498e-02 -1.951349e-02
[256]  1.868707e-02 -1.794299e-02  1.682982e-02 -1.665946e-02  1.524378e-02
[261] -1.436045e-02  1.325848e-02 -1.307953e-02  1.200215e-02 -1.196959e-02
[266] -1.138463e-02  9.757884e-03 -9.367784e-03 -8.970240e-03  7.548373e-03
[271]  6.673750e-03  6.093169e-03 -5.547774e-03  3.174412e-03 -2.753696e-03
[276] -2.079812e-03  1.665100e-03 -1.591070e-03 -8.314919e-05
> 
> # make a data.frame
> tab <- data.frame(sample.id = samp.id, pop = factor(pop_code),
+     EV1 = RV$eigenvect[,1],    # the first eigenvector
+     EV2 = RV$eigenvect[,2],    # the second eigenvector
+     stringsAsFactors = FALSE)
> head(tab)
  sample.id pop         EV1         EV2
1   NA19152 YRI  0.08134977  0.01800612
2   NA19139 YRI  0.08332236  0.01552648
3   NA18912 YRI  0.07999713  0.01544285
4   NA19160 YRI  0.08619874  0.02043080
5   NA07034 CEU -0.02638517 -0.07853139
6   NA07055 CEU -0.02730423 -0.08396931
> 
> # draw
> plot(tab$EV2, tab$EV1, col=as.integer(tab$pop),
+     xlab="eigenvector 2", ylab="eigenvector 1")
> legend("topleft", legend=levels(tab$pop), pch="o", col=1:4)
> 
> 
> # define groups
> groups <- list(CEU = samp.id[pop_code == "CEU"],
+     YRI = samp.id[pop_code == "YRI"],
+     CHB = samp.id[is.element(pop_code, c("HCB", "JPT"))])
> 
> prop <- snpgdsAdmixProp(RV, groups=groups)
> 
> # draw
> plot(prop[, "YRI"], prop[, "CEU"], col=as.integer(tab$pop),
+     xlab = "Admixture Proportion from YRI",
+     ylab = "Admixture Proportion from CEU")
> abline(v=0, col="gray25", lty=2)
> abline(h=0, col="gray25", lty=2)
> abline(a=1, b=-1, col="gray25", lty=2)
> legend("topright", legend=levels(tab$pop), pch="o", col=1:4)
> 
> 
> 
> # run eigen-analysis
> RV <- snpgdsEIGMIX(genofile, sample.id=samp.id[pop_code=="JPT"],
+     need.ibdmat=TRUE)
Eigen-analysis on SNP genotypes:
Excluding 365 SNPs on non-autosomes
Excluding 1985 SNPs (monomorphic: TRUE, < MAF: NaN, or > missing rate: NaN)
Working space: 47 samples, 6738 SNPs
	using 1 (CPU) core
Eigen-analysis:	the sum of all selected genotypes (0, 1 and 2) = 317025
Eigen-analysis:	Wed Jul  6 05:34:38 2016	0%
Eigen-analysis:	Wed Jul  6 05:34:38 2016	100%
Eigen-analysis:	Wed Jul  6 05:34:38 2016	Begin (eigenvalues and eigenvectors)
Eigen-analysis:	Wed Jul  6 05:34:38 2016	End (eigenvalues and eigenvectors)
> z <- RV$ibdmat
> 
> mean(c(z))
[1] -0.01073275
> mean(diag(z))
[1] -0.006161977
> 
> 
> # close the genotype file
> snpgdsClose(genofile)
> 
> 
> 
> 
> 
> dev.off()
null device 
          1 
>