Last data update: 2014.03.03

R: Calls genotypes in a normal sample
callNaiveGenotypesR Documentation

Calls genotypes in a normal sample

Description

Calls genotypes in a normal sample.

Usage

## S3 method for class 'numeric'
callNaiveGenotypes(y, cn=rep(2L, times = length(y)), ..., modelFit=NULL, verbose=FALSE)

Arguments

y

A numeric vector of length J containing allele B fractions for a normal sample.

cn

An optional numeric vector of length J specifying the true total copy number in {0,1,2,NA} at each locus. This can be used to specify which loci are diploid and which are not, e.g. autosomal and sex chromosome copy numbers.

...

Additional arguments passed to fitNaiveGenotypes().

modelFit

A optional model fit as returned by fitNaiveGenotypes().

verbose

A logical or a Verbose object.

Value

Returns a numeric vector of length J containing the genotype calls in allele B fraction space, that is, in [0,1] where 1/2 corresponds to a heterozygous call, and 0 and 1 corresponds to homozygous A and B, respectively. Non called genotypes have value NA.

Missing and non-finite values

A missing value always gives a missing (NA) genotype call. Negative infinity (-Inf) always gives genotype call 0. Positive infinity (+Inf) always gives genotype call 1.

Author(s)

Henrik Bengtsson

See Also

Internally fitNaiveGenotypes() is used to identify the thresholds.

Examples

layout(matrix(1:3, ncol=1))
par(mar=c(2,4,4,1)+0.1)

# - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# A bimodal distribution
# - - - - - - - - - - - - - - - - - - - - - - - - - - - -
xAA <- rnorm(n=10000, mean=0, sd=0.1)
xBB <- rnorm(n=10000, mean=1, sd=0.1)
x <- c(xAA,xBB)
fit <- findPeaksAndValleys(x)
print(fit)
calls <- callNaiveGenotypes(x, cn=rep(1,length(x)), verbose=-20)
xc <- split(x, calls)
print(table(calls))
xx <- c(list(x),xc)
plotDensity(xx, adjust=1.5, lwd=2, col=seq(along=xx), main="(AA,BB)")
abline(v=fit$x)

# - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# A trimodal distribution with missing values
# - - - - - - - - - - - - - - - - - - - - - - - - - - - -
xAB <- rnorm(n=10000, mean=1/2, sd=0.1)
x <- c(xAA,xAB,xBB)
x[sample(length(x), size=0.05*length(x))] <- NA;
x[sample(length(x), size=0.01*length(x))] <- -Inf;
x[sample(length(x), size=0.01*length(x))] <- +Inf;
fit <- findPeaksAndValleys(x)
print(fit)
calls <- callNaiveGenotypes(x)
xc <- split(x, calls)
print(table(calls))
xx <- c(list(x),xc)
plotDensity(xx, adjust=1.5, lwd=2, col=seq(along=xx), main="(AA,AB,BB)")
abline(v=fit$x)

# - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# A trimodal distribution with clear separation
# - - - - - - - - - - - - - - - - - - - - - - - - - - - -
xAA <- rnorm(n=10000, mean=0, sd=0.02)
xAB <- rnorm(n=10000, mean=1/2, sd=0.02)
xBB <- rnorm(n=10000, mean=1, sd=0.02)
x <- c(xAA,xAB,xBB)
fit <- findPeaksAndValleys(x)
print(fit)
calls <- callNaiveGenotypes(x)
xc <- split(x, calls)
print(table(calls))
xx <- c(list(x),xc)
plotDensity(xx, adjust=1.5, lwd=2, col=seq(along=xx), main="(AA',AB',BB')")
abline(v=fit$x)

Results


R version 3.3.1 (2016-06-21) -- "Bug in Your Hair"
Copyright (C) 2016 The R Foundation for Statistical Computing
Platform: x86_64-pc-linux-gnu (64-bit)

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.

> library(aroma.light)
aroma.light v3.2.0 (2016-01-06) successfully loaded. See ?aroma.light for help.
> png(filename="/home/ddbj/snapshot/RGM3/R_BC/result/aroma.light/callNaiveGenotypes.Rd_%03d_medium.png", width=480, height=480)
> ### Name: callNaiveGenotypes
> ### Title: Calls genotypes in a normal sample
> ### Aliases: callNaiveGenotypes callNaiveGenotypes.numeric
> ### Keywords: methods
> 
> ### ** Examples
> 
> layout(matrix(1:3, ncol=1))
> par(mar=c(2,4,4,1)+0.1)
> 
> # - - - - - - - - - - - - - - - - - - - - - - - - - - - -
> # A bimodal distribution
> # - - - - - - - - - - - - - - - - - - - - - - - - - - - -
> xAA <- rnorm(n=10000, mean=0, sd=0.1)
> xBB <- rnorm(n=10000, mean=1, sd=0.1)
> x <- c(xAA,xBB)
> fit <- findPeaksAndValleys(x)
> print(fit)
    type            x      density
1   peak -0.005000074 1.6688245868
2 valley  0.508658760 0.0003689443
3   peak  0.993780992 1.6767188615
> calls <- callNaiveGenotypes(x, cn=rep(1,length(x)), verbose=-20)
Calling genotypes from allele B fractions (BAFs)...
 Fitting naive genotype model...
  Fitting naive genotype model from normal allele B fractions (BAFs)...
   Flavor: density
   Censoring BAFs...
    Before:
         Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
    -0.377300 -0.001031  0.538400  0.498700  0.999300  1.325000 
    [1] 20000
    After:
         Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
         -Inf -0.001031  0.538400            0.999300       Inf 
    [1] 16747
   Censoring BAFs...done
   Copy number level #1 (C=1) of 1...
    Identified extreme points in density of BAF:
        type         x     density
    1   peak 0.0143900 1.632765447
    2 valley 0.4982467 0.003750737
    3   peak 0.9786717 1.638429596
    Local minimas ("valleys") in BAF:
        type         x     density
    2 valley 0.4982467 0.003750737
   Copy number level #1 (C=1) of 1...done
  Fitting naive genotype model from normal allele B fractions (BAFs)...done
  [[1]]
  [[1]]$flavor
  [1] "density"
  
  [[1]]$cn
  [1] 1
  
  [[1]]$nbrOfGenotypeGroups
  [1] 2
  
  [[1]]$tau
  [1] 0.4982467
  
  [[1]]$n
  [1] 16747
  
  [[1]]$fit
      type         x     density
  1   peak 0.0143900 1.632765447
  2 valley 0.4982467 0.003750737
  3   peak 0.9786717 1.638429596
  
  [[1]]$fitValleys
      type         x     density
  2 valley 0.4982467 0.003750737
  
  
  attr(,"class")
  [1] "NaiveGenotypeModelFit" "list"                 
 Fitting naive genotype model...done
 Copy number level #1 (C=1) of 1...
  Model fit:
  $flavor
  [1] "density"
  
  $cn
  [1] 1
  
  $nbrOfGenotypeGroups
  [1] 2
  
  $tau
  [1] 0.4982467
  
  $n
  [1] 16747
  
  $fit
      type         x     density
  1   peak 0.0143900 1.632765447
  2 valley 0.4982467 0.003750737
  3   peak 0.9786717 1.638429596
  
  $fitValleys
      type         x     density
  2 valley 0.4982467 0.003750737
  
  Genotype threshholds [1]: 0.498246677583216
  TCN=1 => BAF in {0,1}.
  Call regions: A = (-Inf,0.498], B = (0.498,+Inf)
 Copy number level #1 (C=1) of 1...done
Calling genotypes from allele B fractions (BAFs)...done
> xc <- split(x, calls)
> print(table(calls))
calls
    0     1 
10000 10000 
> xx <- c(list(x),xc)
> plotDensity(xx, adjust=1.5, lwd=2, col=seq(along=xx), main="(AA,BB)")
> abline(v=fit$x)
> 
> # - - - - - - - - - - - - - - - - - - - - - - - - - - - -
> # A trimodal distribution with missing values
> # - - - - - - - - - - - - - - - - - - - - - - - - - - - -
> xAB <- rnorm(n=10000, mean=1/2, sd=0.1)
> x <- c(xAA,xAB,xBB)
> x[sample(length(x), size=0.05*length(x))] <- NA;
> x[sample(length(x), size=0.01*length(x))] <- -Inf;
> x[sample(length(x), size=0.01*length(x))] <- +Inf;
> fit <- findPeaksAndValleys(x)
> print(fit)
    type            x   density
1   peak -0.004499636 1.1535866
2 valley  0.245495758 0.1846697
3   peak  0.491584973 1.1685745
4 valley  0.745486545 0.1800684
5   peak  0.995481939 1.1577693
> calls <- callNaiveGenotypes(x)
> xc <- split(x, calls)
> print(table(calls))
calls
   0  0.5    1 
9614 9284 9634 
> xx <- c(list(x),xc)
> plotDensity(xx, adjust=1.5, lwd=2, col=seq(along=xx), main="(AA,AB,BB)")
> abline(v=fit$x)
> 
> # - - - - - - - - - - - - - - - - - - - - - - - - - - - -
> # A trimodal distribution with clear separation
> # - - - - - - - - - - - - - - - - - - - - - - - - - - - -
> xAA <- rnorm(n=10000, mean=0, sd=0.02)
> xAB <- rnorm(n=10000, mean=1/2, sd=0.02)
> xBB <- rnorm(n=10000, mean=1, sd=0.02)
> x <- c(xAA,xAB,xBB)
> fit <- findPeaksAndValleys(x)
> print(fit)
    type            x      density
1   peak -0.003704154 2.605738e+00
2 valley  0.247171043 3.113963e-05
3   peak  0.498046240 2.615278e+00
4 valley  0.746133935 3.346365e-05
5   peak  0.997009132 2.604815e+00
> calls <- callNaiveGenotypes(x)
> xc <- split(x, calls)
> print(table(calls))
calls
    0   0.5     1 
10000 10000 10000 
> xx <- c(list(x),xc)
> plotDensity(xx, adjust=1.5, lwd=2, col=seq(along=xx), main="(AA',AB',BB')")
> abline(v=fit$x)
> 
> 
> 
> 
> 
> dev.off()
null device 
          1 
>