Last data update: 2014.03.03
|
R: Log likelihood for MLE method in the Identity-By-Descent...
snpgdsPairIBDMLELogLik | R Documentation |
Log likelihood for MLE method in the Identity-By-Descent (IBD) Analysis
Description
Calculate the log likelihood values from maximum likelihood estimation.
Usage
snpgdsPairIBDMLELogLik(geno1, geno2, allele.freq, k0=NaN, k1=NaN,
relatedness=c("", "self", "fullsib", "offspring", "halfsib",
"cousin", "unrelated"), verbose=TRUE)
Arguments
geno1 |
the SNP genotypes for the first individual,
0 – BB, 1 – AB, 2 – AA, other values – missing
|
geno2 |
the SNP genotypes for the second individual,
0 – BB, 1 – AB, 2 – AA, other values – missing
|
allele.freq |
the allele frequencies
|
k0 |
specified IBD coefficient
|
k1 |
specified IBD coefficient
|
relatedness |
specify a relatedness, otherwise use the values of
k0 and k1
|
verbose |
if TRUE, show information
|
Details
If (relatedness == "") and (k0 == NaN or k1 == NaN), then return
the log likelihood values for each (k0, k1) stored in ibdobj.
If (relatedness == "") and (k0 != NaN) and (k1 != NaN), then return
the log likelihood values for a specific IBD coefficient (k0, k1).
If relatedness is: "self", then k0 = 0, k1 = 0; "fullsib", then
k0 = 0.25, k1 = 0.5; "offspring", then k0 = 0, k1 = 1; "halfsib", then
k0 = 0.5, k1 = 0.5; "cousin", then k0 = 0.75, k1 = 0.25; "unrelated", then
k0 = 1, k1 = 0.
Value
The value of log likelihood.
Author(s)
Xiuwen Zheng
References
Milligan BG. 2003. Maximum-likelihood estimation of relatedness.
Genetics 163:1153-1167.
Weir BS, Anderson AD, Hepler AB. 2006.
Genetic relatedness analysis: modern data and new challenges.
Nat Rev Genet. 7(10):771-80.
Choi Y, Wijsman EM, Weir BS. 2009.
Case-control association testing in the presence of unknown relationships.
Genet Epidemiol 33(8):668-78.
See Also
snpgdsPairIBD , snpgdsIBDMLE ,
snpgdsIBDMLELogLik , snpgdsIBDMoM
Examples
# open an example dataset (HapMap)
genofile <- snpgdsOpen(snpgdsExampleFileName())
YRI.id <- read.gdsn(index.gdsn(genofile, "sample.id"))[
read.gdsn(index.gdsn(genofile, "sample.annot/pop.group"))=="YRI"]
# SNP pruning
set.seed(10)
snpset <- snpgdsLDpruning(genofile, sample.id=YRI.id, maf=0.05,
missing.rate=0.05)
snpset <- unname(sample(unlist(snpset), 250))
# the number of samples
n <- 25
# specify allele frequencies
RF <- snpgdsSNPRateFreq(genofile, sample.id=YRI.id, snp.id=snpset,
with.id=TRUE)
summary(RF$AlleleFreq)
subMLE <- snpgdsIBDMLE(genofile, sample.id=YRI.id[1:n], snp.id=RF$snp.id,
allele.freq=RF$AlleleFreq)
subMoM <- snpgdsIBDMoM(genofile, sample.id=YRI.id[1:n], snp.id=RF$snp.id,
allele.freq=RF$AlleleFreq)
# genotype matrix
mat <- snpgdsGetGeno(genofile, sample.id=YRI.id[1:n], snp.id=snpset,
snpfirstdim=TRUE)
########################
rv <- NULL
for (i in 2:n)
{
rv <- rbind(rv, snpgdsPairIBD(mat[,1], mat[,i], RF$AlleleFreq, "EM"))
print(snpgdsPairIBDMLELogLik(mat[,1], mat[,i], RF$AlleleFreq,
relatedness="unrelated", verbose=TRUE))
}
rv
summary(rv$k0 - subMLE$k0[1, 2:n])
summary(rv$k1 - subMLE$k1[1, 2:n])
# ZERO
rv <- NULL
for (i in 2:n)
rv <- rbind(rv, snpgdsPairIBD(mat[,1], mat[,i], RF$AlleleFreq, "MoM"))
rv
summary(rv$k0 - subMoM$k0[1, 2:n])
summary(rv$k1 - subMoM$k1[1, 2:n])
# ZERO
# close the genotype file
snpgdsClose(genofile)
Results
R version 3.3.1 (2016-06-21) -- "Bug in Your Hair"
Copyright (C) 2016 The R Foundation for Statistical Computing
Platform: x86_64-pc-linux-gnu (64-bit)
R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.
R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.
Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.
> library(SNPRelate)
Loading required package: gdsfmt
SNPRelate -- supported by Streaming SIMD Extensions 2 (SSE2)
> png(filename="/home/ddbj/snapshot/RGM3/R_BC/result/SNPRelate/snpgdsPairIBDMLELogLik.Rd_%03d_medium.png", width=480, height=480)
> ### Name: snpgdsPairIBDMLELogLik
> ### Title: Log likelihood for MLE method in the Identity-By-Descent (IBD)
> ### Analysis
> ### Aliases: snpgdsPairIBDMLELogLik
> ### Keywords: GDS GWAS
>
> ### ** Examples
>
> # open an example dataset (HapMap)
> genofile <- snpgdsOpen(snpgdsExampleFileName())
>
> YRI.id <- read.gdsn(index.gdsn(genofile, "sample.id"))[
+ read.gdsn(index.gdsn(genofile, "sample.annot/pop.group"))=="YRI"]
>
> # SNP pruning
> set.seed(10)
> snpset <- snpgdsLDpruning(genofile, sample.id=YRI.id, maf=0.05,
+ missing.rate=0.05)
SNP pruning based on LD:
Excluding 365 SNPs on non-autosomes
Excluding 1646 SNPs (monomorphic: TRUE, < MAF: 0.05, or > missing rate: 0.05)
Working space: 93 samples, 7077 SNPs
using 1 (CPU) core
Sliding window: 500000 basepairs, Inf SNPs
|LD| threshold: 0.2
Chromosome 1: 62.29%, 446/716
Chromosome 2: 62.40%, 463/742
Chromosome 3: 60.76%, 370/609
Chromosome 4: 64.59%, 363/562
Chromosome 5: 62.54%, 354/566
Chromosome 6: 60.00%, 339/565
Chromosome 7: 62.71%, 296/472
Chromosome 8: 58.61%, 286/488
Chromosome 9: 62.98%, 262/416
Chromosome 10: 60.66%, 293/483
Chromosome 11: 62.86%, 281/447
Chromosome 12: 63.00%, 269/427
Chromosome 13: 63.08%, 217/344
Chromosome 14: 63.48%, 179/282
Chromosome 15: 63.36%, 166/262
Chromosome 16: 61.87%, 172/278
Chromosome 17: 65.70%, 136/207
Chromosome 18: 59.02%, 157/266
Chromosome 19: 68.33%, 82/120
Chromosome 20: 67.25%, 154/229
Chromosome 21: 61.11%, 77/126
Chromosome 22: 57.76%, 67/116
5429 SNPs are selected in total.
> snpset <- unname(sample(unlist(snpset), 250))
>
> # the number of samples
> n <- 25
>
> # specify allele frequencies
> RF <- snpgdsSNPRateFreq(genofile, sample.id=YRI.id, snp.id=snpset,
+ with.id=TRUE)
> summary(RF$AlleleFreq)
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.05376 0.20430 0.55110 0.50640 0.78900 0.94620
>
> subMLE <- snpgdsIBDMLE(genofile, sample.id=YRI.id[1:n], snp.id=RF$snp.id,
+ allele.freq=RF$AlleleFreq)
Identity-By-Descent analysis (MLE) on SNP genotypes:
Excluding 8838 SNPs (non-autosomes or non-selection)
Excluding 0 SNP (monomorphic: TRUE, < MAF: NaN, or > missing rate: NaN)
Working space: 25 samples, 250 SNPs
using 1 (CPU) core
Specifying allele frequencies, mean: 0.506, sd: 0.300
MLE IBD: the sum of all selected genotypes (0, 1 and 2) = 6339
MLE IBD: Wed Jul 6 05:34:50 2016 0%
MLE IBD: Wed Jul 6 05:34:50 2016 100%
> subMoM <- snpgdsIBDMoM(genofile, sample.id=YRI.id[1:n], snp.id=RF$snp.id,
+ allele.freq=RF$AlleleFreq)
IBD analysis (PLINK method of moment) on SNP genotypes:
Excluding 8838 SNPs (non-autosomes or non-selection)
Excluding 0 SNP (monomorphic: TRUE, < MAF: NaN, or > missing rate: NaN)
Working space: 25 samples, 250 SNPs
using 1 (CPU) core
Specifying allele frequencies, mean: 0.506, sd: 0.300
*** A correction factor based on allele count is not used, since the allele frequencies are specified.
PLINK IBD: the sum of all selected genotypes (0, 1 and 2) = 6339
Wed Jul 6 05:34:50 2016 (internal increment: 65536)
[>.................................................] 0%, ETC: NA [==================================================] 100%, completed
Wed Jul 6 05:34:50 2016 Done.
>
>
> # genotype matrix
> mat <- snpgdsGetGeno(genofile, sample.id=YRI.id[1:n], snp.id=snpset,
+ snpfirstdim=TRUE)
Genotype matrix: 250 SNPs X 25 samples
>
>
> ########################
>
> rv <- NULL
> for (i in 2:n)
+ {
+ rv <- rbind(rv, snpgdsPairIBD(mat[,1], mat[,i], RF$AlleleFreq, "EM"))
+ print(snpgdsPairIBDMLELogLik(mat[,1], mat[,i], RF$AlleleFreq,
+ relatedness="unrelated", verbose=TRUE))
+ }
[1] -364.4667
[1] -377.9771
[1] -390.3299
[1] -380.5342
[1] -378.2092
[1] -383.641
[1] -364.5406
[1] -375.196
[1] -383.5752
[1] -385.7771
[1] -368.6237
[1] -390.7766
[1] -367.5082
[1] -383.595
[1] -385.7602
[1] -378.5134
[1] -388.3292
[1] -376.4419
[1] -378.3509
[1] -386.8318
[1] -385.3803
[1] -382.988
[1] -369.9328
[1] -383.3912
> rv
k0 k1 loglik niter
1 1.00000000 0.000000e+00 -364.4667 747
2 1.00000000 0.000000e+00 -377.9771 173
3 0.82468804 1.167875e-01 -388.2101 332
4 1.00000000 0.000000e+00 -380.5342 389
5 0.98984740 3.887511e-05 -378.1907 239
6 0.90539842 1.644146e-03 -382.3265 770
7 0.97381760 8.728809e-06 -364.4401 184
8 1.00000000 0.000000e+00 -375.1960 507
9 1.00000000 0.000000e+00 -383.5752 89
10 0.83360197 1.662557e-01 -384.8404 136
11 0.99837814 2.517451e-05 -368.6221 494
12 0.08549853 8.110497e-01 -358.6954 107
13 0.82269816 1.772903e-01 -366.7767 162
14 0.93057265 7.620839e-05 -382.7161 298
15 0.97530202 2.469798e-02 -385.7473 360
16 0.82754985 1.724271e-01 -377.7132 157
17 1.00000000 0.000000e+00 -388.3292 528
18 0.83463317 1.653668e-01 -375.7027 164
19 1.00000000 0.000000e+00 -378.3509 239
20 0.94551004 5.448186e-02 -386.7572 263
21 1.00000000 0.000000e+00 -385.3803 162
22 1.00000000 0.000000e+00 -382.9880 148
23 1.00000000 0.000000e+00 -369.9328 189
24 1.00000000 0.000000e+00 -383.3912 199
> summary(rv$k0 - subMLE$k0[1, 2:n])
Min. 1st Qu. Median Mean 3rd Qu. Max.
0 0 0 0 0 0
> summary(rv$k1 - subMLE$k1[1, 2:n])
Min. 1st Qu. Median Mean 3rd Qu. Max.
0 0 0 0 0 0
> # ZERO
>
> rv <- NULL
> for (i in 2:n)
+ rv <- rbind(rv, snpgdsPairIBD(mat[,1], mat[,i], RF$AlleleFreq, "MoM"))
> rv
k0 k1 loglik niter
1 0.37690989 0.62309011 NaN 0
2 1.00000000 0.00000000 NaN 0
3 1.00000000 0.00000000 NaN 0
4 0.60711190 0.39288810 NaN 0
5 1.00000000 0.00000000 NaN 0
6 0.80206954 0.13002840 NaN 0
7 0.87239826 0.08401839 NaN 0
8 0.82564551 0.17435449 NaN 0
9 1.00000000 0.00000000 NaN 0
10 0.58598310 0.41401690 NaN 0
11 0.75006441 0.24993559 NaN 0
12 0.06683913 0.87687977 NaN 0
13 0.48080417 0.51919583 NaN 0
14 0.93574779 0.01238419 NaN 0
15 0.77081797 0.22918203 NaN 0
16 0.46514972 0.53485028 NaN 0
17 0.73814017 0.26185983 NaN 0
18 0.30082134 0.69917866 NaN 0
19 0.77081797 0.22918203 NaN 0
20 0.64711113 0.35288887 NaN 0
21 1.00000000 0.00000000 NaN 0
22 1.00000000 0.00000000 NaN 0
23 0.71662087 0.28337913 NaN 0
24 1.00000000 0.00000000 NaN 0
> summary(rv$k0 - subMoM$k0[1, 2:n])
Min. 1st Qu. Median Mean 3rd Qu. Max.
0 0 0 0 0 0
> summary(rv$k1 - subMoM$k1[1, 2:n])
Min. 1st Qu. Median Mean 3rd Qu. Max.
0 0 0 0 0 0
> # ZERO
>
> # close the genotype file
> snpgdsClose(genofile)
>
>
>
>
>
> dev.off()
null device
1
>
|