Last data update: 2014.03.03

R: Log likelihood for MLE method in the Identity-By-Descent...
snpgdsPairIBDMLELogLikR Documentation

Log likelihood for MLE method in the Identity-By-Descent (IBD) Analysis

Description

Calculate the log likelihood values from maximum likelihood estimation.

Usage

snpgdsPairIBDMLELogLik(geno1, geno2, allele.freq, k0=NaN, k1=NaN,
    relatedness=c("", "self", "fullsib", "offspring", "halfsib",
    "cousin", "unrelated"), verbose=TRUE)

Arguments

geno1

the SNP genotypes for the first individual, 0 – BB, 1 – AB, 2 – AA, other values – missing

geno2

the SNP genotypes for the second individual, 0 – BB, 1 – AB, 2 – AA, other values – missing

allele.freq

the allele frequencies

k0

specified IBD coefficient

k1

specified IBD coefficient

relatedness

specify a relatedness, otherwise use the values of k0 and k1

verbose

if TRUE, show information

Details

If (relatedness == "") and (k0 == NaN or k1 == NaN), then return the log likelihood values for each (k0, k1) stored in ibdobj.

If (relatedness == "") and (k0 != NaN) and (k1 != NaN), then return the log likelihood values for a specific IBD coefficient (k0, k1).

If relatedness is: "self", then k0 = 0, k1 = 0; "fullsib", then k0 = 0.25, k1 = 0.5; "offspring", then k0 = 0, k1 = 1; "halfsib", then k0 = 0.5, k1 = 0.5; "cousin", then k0 = 0.75, k1 = 0.25; "unrelated", then k0 = 1, k1 = 0.

Value

The value of log likelihood.

Author(s)

Xiuwen Zheng

References

Milligan BG. 2003. Maximum-likelihood estimation of relatedness. Genetics 163:1153-1167.

Weir BS, Anderson AD, Hepler AB. 2006. Genetic relatedness analysis: modern data and new challenges. Nat Rev Genet. 7(10):771-80.

Choi Y, Wijsman EM, Weir BS. 2009. Case-control association testing in the presence of unknown relationships. Genet Epidemiol 33(8):668-78.

See Also

snpgdsPairIBD, snpgdsIBDMLE, snpgdsIBDMLELogLik, snpgdsIBDMoM

Examples

# open an example dataset (HapMap)
genofile <- snpgdsOpen(snpgdsExampleFileName())

YRI.id <- read.gdsn(index.gdsn(genofile, "sample.id"))[
    read.gdsn(index.gdsn(genofile, "sample.annot/pop.group"))=="YRI"]

# SNP pruning
set.seed(10)
snpset <- snpgdsLDpruning(genofile, sample.id=YRI.id, maf=0.05,
    missing.rate=0.05)
snpset <- unname(sample(unlist(snpset), 250))

# the number of samples
n <- 25

# specify allele frequencies
RF <- snpgdsSNPRateFreq(genofile, sample.id=YRI.id, snp.id=snpset,
    with.id=TRUE)
summary(RF$AlleleFreq)

subMLE <- snpgdsIBDMLE(genofile, sample.id=YRI.id[1:n], snp.id=RF$snp.id,
    allele.freq=RF$AlleleFreq)
subMoM <- snpgdsIBDMoM(genofile, sample.id=YRI.id[1:n], snp.id=RF$snp.id,
    allele.freq=RF$AlleleFreq)


# genotype matrix
mat <- snpgdsGetGeno(genofile, sample.id=YRI.id[1:n], snp.id=snpset,
    snpfirstdim=TRUE)


########################

rv <- NULL
for (i in 2:n)
{
    rv <- rbind(rv, snpgdsPairIBD(mat[,1], mat[,i], RF$AlleleFreq, "EM"))
    print(snpgdsPairIBDMLELogLik(mat[,1], mat[,i], RF$AlleleFreq,
        relatedness="unrelated", verbose=TRUE))
}
rv
summary(rv$k0 - subMLE$k0[1, 2:n])
summary(rv$k1 - subMLE$k1[1, 2:n])
# ZERO

rv <- NULL
for (i in 2:n)
    rv <- rbind(rv, snpgdsPairIBD(mat[,1], mat[,i], RF$AlleleFreq, "MoM"))
rv
summary(rv$k0 - subMoM$k0[1, 2:n])
summary(rv$k1 - subMoM$k1[1, 2:n])
# ZERO

# close the genotype file
snpgdsClose(genofile)

Results


R version 3.3.1 (2016-06-21) -- "Bug in Your Hair"
Copyright (C) 2016 The R Foundation for Statistical Computing
Platform: x86_64-pc-linux-gnu (64-bit)

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.

> library(SNPRelate)
Loading required package: gdsfmt
SNPRelate -- supported by Streaming SIMD Extensions 2 (SSE2)
> png(filename="/home/ddbj/snapshot/RGM3/R_BC/result/SNPRelate/snpgdsPairIBDMLELogLik.Rd_%03d_medium.png", width=480, height=480)
> ### Name: snpgdsPairIBDMLELogLik
> ### Title: Log likelihood for MLE method in the Identity-By-Descent (IBD)
> ###   Analysis
> ### Aliases: snpgdsPairIBDMLELogLik
> ### Keywords: GDS GWAS
> 
> ### ** Examples
> 
> # open an example dataset (HapMap)
> genofile <- snpgdsOpen(snpgdsExampleFileName())
> 
> YRI.id <- read.gdsn(index.gdsn(genofile, "sample.id"))[
+     read.gdsn(index.gdsn(genofile, "sample.annot/pop.group"))=="YRI"]
> 
> # SNP pruning
> set.seed(10)
> snpset <- snpgdsLDpruning(genofile, sample.id=YRI.id, maf=0.05,
+     missing.rate=0.05)
SNP pruning based on LD:
Excluding 365 SNPs on non-autosomes
Excluding 1646 SNPs (monomorphic: TRUE, < MAF: 0.05, or > missing rate: 0.05)
Working space: 93 samples, 7077 SNPs
	using 1 (CPU) core
	Sliding window: 500000 basepairs, Inf SNPs
	|LD| threshold: 0.2
Chromosome 1: 62.29%, 446/716
Chromosome 2: 62.40%, 463/742
Chromosome 3: 60.76%, 370/609
Chromosome 4: 64.59%, 363/562
Chromosome 5: 62.54%, 354/566
Chromosome 6: 60.00%, 339/565
Chromosome 7: 62.71%, 296/472
Chromosome 8: 58.61%, 286/488
Chromosome 9: 62.98%, 262/416
Chromosome 10: 60.66%, 293/483
Chromosome 11: 62.86%, 281/447
Chromosome 12: 63.00%, 269/427
Chromosome 13: 63.08%, 217/344
Chromosome 14: 63.48%, 179/282
Chromosome 15: 63.36%, 166/262
Chromosome 16: 61.87%, 172/278
Chromosome 17: 65.70%, 136/207
Chromosome 18: 59.02%, 157/266
Chromosome 19: 68.33%, 82/120
Chromosome 20: 67.25%, 154/229
Chromosome 21: 61.11%, 77/126
Chromosome 22: 57.76%, 67/116
5429 SNPs are selected in total.
> snpset <- unname(sample(unlist(snpset), 250))
> 
> # the number of samples
> n <- 25
> 
> # specify allele frequencies
> RF <- snpgdsSNPRateFreq(genofile, sample.id=YRI.id, snp.id=snpset,
+     with.id=TRUE)
> summary(RF$AlleleFreq)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
0.05376 0.20430 0.55110 0.50640 0.78900 0.94620 
> 
> subMLE <- snpgdsIBDMLE(genofile, sample.id=YRI.id[1:n], snp.id=RF$snp.id,
+     allele.freq=RF$AlleleFreq)
Identity-By-Descent analysis (MLE) on SNP genotypes:
Excluding 8838 SNPs (non-autosomes or non-selection)
Excluding 0 SNP (monomorphic: TRUE, < MAF: NaN, or > missing rate: NaN)
Working space: 25 samples, 250 SNPs
	using 1 (CPU) core
Specifying allele frequencies, mean: 0.506, sd: 0.300
MLE IBD:	the sum of all selected genotypes (0, 1 and 2) = 6339
MLE IBD:	Wed Jul  6 05:34:50 2016	0%
MLE IBD:	Wed Jul  6 05:34:50 2016	100%
> subMoM <- snpgdsIBDMoM(genofile, sample.id=YRI.id[1:n], snp.id=RF$snp.id,
+     allele.freq=RF$AlleleFreq)
IBD analysis (PLINK method of moment) on SNP genotypes:
Excluding 8838 SNPs (non-autosomes or non-selection)
Excluding 0 SNP (monomorphic: TRUE, < MAF: NaN, or > missing rate: NaN)
Working space: 25 samples, 250 SNPs
	using 1 (CPU) core
Specifying allele frequencies, mean: 0.506, sd: 0.300
*** A correction factor based on allele count is not used, since the allele frequencies are specified.
PLINK IBD:	the sum of all selected genotypes (0, 1 and 2) = 6339
Wed Jul  6 05:34:50 2016    (internal increment: 65536)
 [>.................................................]  0%, ETC: NA     [==================================================] 100%, completed  
Wed Jul  6 05:34:50 2016    Done.
> 
> 
> # genotype matrix
> mat <- snpgdsGetGeno(genofile, sample.id=YRI.id[1:n], snp.id=snpset,
+     snpfirstdim=TRUE)
Genotype matrix: 250 SNPs X 25 samples
> 
> 
> ########################
> 
> rv <- NULL
> for (i in 2:n)
+ {
+     rv <- rbind(rv, snpgdsPairIBD(mat[,1], mat[,i], RF$AlleleFreq, "EM"))
+     print(snpgdsPairIBDMLELogLik(mat[,1], mat[,i], RF$AlleleFreq,
+         relatedness="unrelated", verbose=TRUE))
+ }
[1] -364.4667
[1] -377.9771
[1] -390.3299
[1] -380.5342
[1] -378.2092
[1] -383.641
[1] -364.5406
[1] -375.196
[1] -383.5752
[1] -385.7771
[1] -368.6237
[1] -390.7766
[1] -367.5082
[1] -383.595
[1] -385.7602
[1] -378.5134
[1] -388.3292
[1] -376.4419
[1] -378.3509
[1] -386.8318
[1] -385.3803
[1] -382.988
[1] -369.9328
[1] -383.3912
> rv
           k0           k1    loglik niter
1  1.00000000 0.000000e+00 -364.4667   747
2  1.00000000 0.000000e+00 -377.9771   173
3  0.82468804 1.167875e-01 -388.2101   332
4  1.00000000 0.000000e+00 -380.5342   389
5  0.98984740 3.887511e-05 -378.1907   239
6  0.90539842 1.644146e-03 -382.3265   770
7  0.97381760 8.728809e-06 -364.4401   184
8  1.00000000 0.000000e+00 -375.1960   507
9  1.00000000 0.000000e+00 -383.5752    89
10 0.83360197 1.662557e-01 -384.8404   136
11 0.99837814 2.517451e-05 -368.6221   494
12 0.08549853 8.110497e-01 -358.6954   107
13 0.82269816 1.772903e-01 -366.7767   162
14 0.93057265 7.620839e-05 -382.7161   298
15 0.97530202 2.469798e-02 -385.7473   360
16 0.82754985 1.724271e-01 -377.7132   157
17 1.00000000 0.000000e+00 -388.3292   528
18 0.83463317 1.653668e-01 -375.7027   164
19 1.00000000 0.000000e+00 -378.3509   239
20 0.94551004 5.448186e-02 -386.7572   263
21 1.00000000 0.000000e+00 -385.3803   162
22 1.00000000 0.000000e+00 -382.9880   148
23 1.00000000 0.000000e+00 -369.9328   189
24 1.00000000 0.000000e+00 -383.3912   199
> summary(rv$k0 - subMLE$k0[1, 2:n])
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
      0       0       0       0       0       0 
> summary(rv$k1 - subMLE$k1[1, 2:n])
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
      0       0       0       0       0       0 
> # ZERO
> 
> rv <- NULL
> for (i in 2:n)
+     rv <- rbind(rv, snpgdsPairIBD(mat[,1], mat[,i], RF$AlleleFreq, "MoM"))
> rv
           k0         k1 loglik niter
1  0.37690989 0.62309011    NaN     0
2  1.00000000 0.00000000    NaN     0
3  1.00000000 0.00000000    NaN     0
4  0.60711190 0.39288810    NaN     0
5  1.00000000 0.00000000    NaN     0
6  0.80206954 0.13002840    NaN     0
7  0.87239826 0.08401839    NaN     0
8  0.82564551 0.17435449    NaN     0
9  1.00000000 0.00000000    NaN     0
10 0.58598310 0.41401690    NaN     0
11 0.75006441 0.24993559    NaN     0
12 0.06683913 0.87687977    NaN     0
13 0.48080417 0.51919583    NaN     0
14 0.93574779 0.01238419    NaN     0
15 0.77081797 0.22918203    NaN     0
16 0.46514972 0.53485028    NaN     0
17 0.73814017 0.26185983    NaN     0
18 0.30082134 0.69917866    NaN     0
19 0.77081797 0.22918203    NaN     0
20 0.64711113 0.35288887    NaN     0
21 1.00000000 0.00000000    NaN     0
22 1.00000000 0.00000000    NaN     0
23 0.71662087 0.28337913    NaN     0
24 1.00000000 0.00000000    NaN     0
> summary(rv$k0 - subMoM$k0[1, 2:n])
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
      0       0       0       0       0       0 
> summary(rv$k1 - subMoM$k1[1, 2:n])
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
      0       0       0       0       0       0 
> # ZERO
> 
> # close the genotype file
> snpgdsClose(genofile)
> 
> 
> 
> 
> 
> dev.off()
null device 
          1 
>