starting indices; if missing, starts with the object with the
maximum median distance to all other objects.
N
total number of selections; length of idx is subtracted.
exclude
boolean vector indicating elements to exclude from the
calculation.
include.center
includes the "most central" element (ie, the one with
the smallest median of pairwise distances to all other elements) if TRUE
Value
A vector of indices corresponding to the margin of mat.
Note
Note that it is important to evaluate if the candidate sequences contain
outliers (for example, mislabeled sequences), because these will
assuredly be included in a maximally diverse set of elements!
Author(s)
Noah Hoffman
See Also
findOutliers
Examples
library(ape)
library(clstutils)
data(seqs)
data(seqdat)
efaecium <- seqdat$tax_name == 'Enterococcus faecium'
seqdat <- subset(seqdat, efaecium)
seqs <- seqs[efaecium,]
dmat <- ape::dist.dna(seqs, pairwise.deletion=TRUE, as.matrix=TRUE, model='raw')
## find a maximally diverse set without first identifying outliers
picked <- maxDists(dmat, N=10)
picked
prettyTree(nj(dmat), groups=ifelse(1:nrow(dmat) %in% picked,'picked','not picked'))
## restrict selected elements to non-outliers
outliers <- findOutliers(dmat, cutoff=0.015)
picked <- maxDists(dmat, N=10, exclude=outliers)
picked
prettyTree(nj(dmat), groups=ifelse(1:nrow(dmat) %in% picked,'picked','not picked'),
X = outliers)
Results
R version 3.3.1 (2016-06-21) -- "Bug in Your Hair"
Copyright (C) 2016 The R Foundation for Statistical Computing
Platform: x86_64-pc-linux-gnu (64-bit)
R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.
R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.
Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.
> library(clstutils)
Loading required package: clst
Loading required package: rjson
Loading required package: ape
> png(filename="/home/ddbj/snapshot/RGM3/R_BC/result/clstutils/maxDists.Rd_%03d_medium.png", width=480, height=480)
> ### Name: maxDists
> ### Title: Select a maximally diverse set of items given a distance matrix.
> ### Aliases: maxDists
> ### Keywords: classif
>
> ### ** Examples
>
> library(ape)
> library(clstutils)
> data(seqs)
> data(seqdat)
> efaecium <- seqdat$tax_name == 'Enterococcus faecium'
> seqdat <- subset(seqdat, efaecium)
> seqs <- seqs[efaecium,]
> dmat <- ape::dist.dna(seqs, pairwise.deletion=TRUE, as.matrix=TRUE, model='raw')
>
> ## find a maximally diverse set without first identifying outliers
> picked <- maxDists(dmat, N=10)
> picked
S000651464 S000591191 S001794517 S001418165 S000427507 S000516063 S002166218
24 111 98 89 13 18 107
S001352453 S002166217 S001155771
85 106 48
> prettyTree(nj(dmat), groups=ifelse(1:nrow(dmat) %in% picked,'picked','not picked'))
>
> ## restrict selected elements to non-outliers
> outliers <- findOutliers(dmat, cutoff=0.015)
> picked <- maxDists(dmat, N=10, exclude=outliers)
> picked
S000651464 S000531199 S001045699 S001352453 S000591317 S001156205 S001155770
24 19 39 85 21 49 47
S001153624 S000871538 S001292637
45 28 83
> prettyTree(nj(dmat), groups=ifelse(1:nrow(dmat) %in% picked,'picked','not picked'),
+ X = outliers)
>
>
>
>
>
>
> dev.off()
null device
1
>