Last data update: 2014.03.03
R: Calls genotypes in a normal sample
callNaiveGenotypes R Documentation
Calls genotypes in a normal sample
Description
Calls genotypes in a normal sample.
Usage
## S3 method for class 'numeric'
callNaiveGenotypes(y, cn=rep(2L, times = length(y)), ..., modelFit=NULL, verbose=FALSE)
Arguments
y
A numeric
vector
of length J containing allele B fractions
for a normal sample.
cn
An optional numeric
vector
of length J specifying the true
total copy number in {0,1,2,NA} at each locus. This can be
used to specify which loci are diploid and which are not, e.g.
autosomal and sex chromosome copy numbers.
...
Additional arguments passed to fitNaiveGenotypes
().
modelFit
A optional model fit as returned
by fitNaiveGenotypes
().
verbose
A logical
or a Verbose
object.
Value
Returns a numeric
vector
of length J containing the genotype calls
in allele B fraction space, that is, in [0,1] where 1/2 corresponds
to a heterozygous call, and 0 and 1 corresponds to homozygous A
and B, respectively.
Non called genotypes have value NA
.
Missing and non-finite values
A missing value always gives a missing (NA
) genotype call.
Negative infinity (-Inf
) always gives genotype call 0.
Positive infinity (+Inf
) always gives genotype call 1.
Author(s)
Henrik Bengtsson
See Also
Internally fitNaiveGenotypes
() is used to identify the thresholds.
Examples
layout(matrix(1:3, ncol=1))
par(mar=c(2,4,4,1)+0.1)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# A bimodal distribution
# - - - - - - - - - - - - - - - - - - - - - - - - - - - -
xAA <- rnorm(n=10000, mean=0, sd=0.1)
xBB <- rnorm(n=10000, mean=1, sd=0.1)
x <- c(xAA,xBB)
fit <- findPeaksAndValleys(x)
print(fit)
calls <- callNaiveGenotypes(x, cn=rep(1,length(x)), verbose=-20)
xc <- split(x, calls)
print(table(calls))
xx <- c(list(x),xc)
plotDensity(xx, adjust=1.5, lwd=2, col=seq(along=xx), main="(AA,BB)")
abline(v=fit$x)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# A trimodal distribution with missing values
# - - - - - - - - - - - - - - - - - - - - - - - - - - - -
xAB <- rnorm(n=10000, mean=1/2, sd=0.1)
x <- c(xAA,xAB,xBB)
x[sample(length(x), size=0.05*length(x))] <- NA;
x[sample(length(x), size=0.01*length(x))] <- -Inf;
x[sample(length(x), size=0.01*length(x))] <- +Inf;
fit <- findPeaksAndValleys(x)
print(fit)
calls <- callNaiveGenotypes(x)
xc <- split(x, calls)
print(table(calls))
xx <- c(list(x),xc)
plotDensity(xx, adjust=1.5, lwd=2, col=seq(along=xx), main="(AA,AB,BB)")
abline(v=fit$x)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# A trimodal distribution with clear separation
# - - - - - - - - - - - - - - - - - - - - - - - - - - - -
xAA <- rnorm(n=10000, mean=0, sd=0.02)
xAB <- rnorm(n=10000, mean=1/2, sd=0.02)
xBB <- rnorm(n=10000, mean=1, sd=0.02)
x <- c(xAA,xAB,xBB)
fit <- findPeaksAndValleys(x)
print(fit)
calls <- callNaiveGenotypes(x)
xc <- split(x, calls)
print(table(calls))
xx <- c(list(x),xc)
plotDensity(xx, adjust=1.5, lwd=2, col=seq(along=xx), main="(AA',AB',BB')")
abline(v=fit$x)
Results
R version 3.3.1 (2016-06-21) -- "Bug in Your Hair"
Copyright (C) 2016 The R Foundation for Statistical Computing
Platform: x86_64-pc-linux-gnu (64-bit)
R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.
R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.
Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.
> library(aroma.light)
aroma.light v3.2.0 (2016-01-06) successfully loaded. See ?aroma.light for help.
> png(filename="/home/ddbj/snapshot/RGM3/R_BC/result/aroma.light/callNaiveGenotypes.Rd_%03d_medium.png", width=480, height=480)
> ### Name: callNaiveGenotypes
> ### Title: Calls genotypes in a normal sample
> ### Aliases: callNaiveGenotypes callNaiveGenotypes.numeric
> ### Keywords: methods
>
> ### ** Examples
>
> layout(matrix(1:3, ncol=1))
> par(mar=c(2,4,4,1)+0.1)
>
> # - - - - - - - - - - - - - - - - - - - - - - - - - - - -
> # A bimodal distribution
> # - - - - - - - - - - - - - - - - - - - - - - - - - - - -
> xAA <- rnorm(n=10000, mean=0, sd=0.1)
> xBB <- rnorm(n=10000, mean=1, sd=0.1)
> x <- c(xAA,xBB)
> fit <- findPeaksAndValleys(x)
> print(fit)
type x density
1 peak -0.005000074 1.6688245868
2 valley 0.508658760 0.0003689443
3 peak 0.993780992 1.6767188615
> calls <- callNaiveGenotypes(x, cn=rep(1,length(x)), verbose=-20)
Calling genotypes from allele B fractions (BAFs)...
Fitting naive genotype model...
Fitting naive genotype model from normal allele B fractions (BAFs)...
Flavor: density
Censoring BAFs...
Before:
Min. 1st Qu. Median Mean 3rd Qu. Max.
-0.377300 -0.001031 0.538400 0.498700 0.999300 1.325000
[1] 20000
After:
Min. 1st Qu. Median Mean 3rd Qu. Max.
-Inf -0.001031 0.538400 0.999300 Inf
[1] 16747
Censoring BAFs...done
Copy number level #1 (C=1) of 1...
Identified extreme points in density of BAF:
type x density
1 peak 0.0143900 1.632765447
2 valley 0.4982467 0.003750737
3 peak 0.9786717 1.638429596
Local minimas ("valleys") in BAF:
type x density
2 valley 0.4982467 0.003750737
Copy number level #1 (C=1) of 1...done
Fitting naive genotype model from normal allele B fractions (BAFs)...done
[[1]]
[[1]]$flavor
[1] "density"
[[1]]$cn
[1] 1
[[1]]$nbrOfGenotypeGroups
[1] 2
[[1]]$tau
[1] 0.4982467
[[1]]$n
[1] 16747
[[1]]$fit
type x density
1 peak 0.0143900 1.632765447
2 valley 0.4982467 0.003750737
3 peak 0.9786717 1.638429596
[[1]]$fitValleys
type x density
2 valley 0.4982467 0.003750737
attr(,"class")
[1] "NaiveGenotypeModelFit" "list"
Fitting naive genotype model...done
Copy number level #1 (C=1) of 1...
Model fit:
$flavor
[1] "density"
$cn
[1] 1
$nbrOfGenotypeGroups
[1] 2
$tau
[1] 0.4982467
$n
[1] 16747
$fit
type x density
1 peak 0.0143900 1.632765447
2 valley 0.4982467 0.003750737
3 peak 0.9786717 1.638429596
$fitValleys
type x density
2 valley 0.4982467 0.003750737
Genotype threshholds [1]: 0.498246677583216
TCN=1 => BAF in {0,1}.
Call regions: A = (-Inf,0.498], B = (0.498,+Inf)
Copy number level #1 (C=1) of 1...done
Calling genotypes from allele B fractions (BAFs)...done
> xc <- split(x, calls)
> print(table(calls))
calls
0 1
10000 10000
> xx <- c(list(x),xc)
> plotDensity(xx, adjust=1.5, lwd=2, col=seq(along=xx), main="(AA,BB)")
> abline(v=fit$x)
>
> # - - - - - - - - - - - - - - - - - - - - - - - - - - - -
> # A trimodal distribution with missing values
> # - - - - - - - - - - - - - - - - - - - - - - - - - - - -
> xAB <- rnorm(n=10000, mean=1/2, sd=0.1)
> x <- c(xAA,xAB,xBB)
> x[sample(length(x), size=0.05*length(x))] <- NA;
> x[sample(length(x), size=0.01*length(x))] <- -Inf;
> x[sample(length(x), size=0.01*length(x))] <- +Inf;
> fit <- findPeaksAndValleys(x)
> print(fit)
type x density
1 peak -0.004499636 1.1535866
2 valley 0.245495758 0.1846697
3 peak 0.491584973 1.1685745
4 valley 0.745486545 0.1800684
5 peak 0.995481939 1.1577693
> calls <- callNaiveGenotypes(x)
> xc <- split(x, calls)
> print(table(calls))
calls
0 0.5 1
9614 9284 9634
> xx <- c(list(x),xc)
> plotDensity(xx, adjust=1.5, lwd=2, col=seq(along=xx), main="(AA,AB,BB)")
> abline(v=fit$x)
>
> # - - - - - - - - - - - - - - - - - - - - - - - - - - - -
> # A trimodal distribution with clear separation
> # - - - - - - - - - - - - - - - - - - - - - - - - - - - -
> xAA <- rnorm(n=10000, mean=0, sd=0.02)
> xAB <- rnorm(n=10000, mean=1/2, sd=0.02)
> xBB <- rnorm(n=10000, mean=1, sd=0.02)
> x <- c(xAA,xAB,xBB)
> fit <- findPeaksAndValleys(x)
> print(fit)
type x density
1 peak -0.003704154 2.605738e+00
2 valley 0.247171043 3.113963e-05
3 peak 0.498046240 2.615278e+00
4 valley 0.746133935 3.346365e-05
5 peak 0.997009132 2.604815e+00
> calls <- callNaiveGenotypes(x)
> xc <- split(x, calls)
> print(table(calls))
calls
0 0.5 1
10000 10000 10000
> xx <- c(list(x),xc)
> plotDensity(xx, adjust=1.5, lwd=2, col=seq(along=xx), main="(AA',AB',BB')")
> abline(v=fit$x)
>
>
>
>
>
> dev.off()
null device
1
>