[character | Design | list of Design]
Either problem ids, a single problem design or a list of problem designs,
the latter two created by makeDesign.
If missing, all problems are selected (without associating a design),
and this is the default.
algo.designs
[character | Design | list of Design]
Either algorithm ids, a single algorithm design or a list of algorithm designs,
the latter two created by makeDesign.
If missing, all algorithms are selected (without associating a design),
and this is the default.
repls
[integer(1)]
Number of replications.
Default is 1.
skip.defined
[logical]
If set to TRUE, already defined experiments get skipped. Otherwise an error is thrown.
Default is FALSE.
Value
Invisibly returns vector of ids of added experiments.
See Also
Other add: Algorithm,
addAlgorithm; Problem,
addProblem
Examples
### EXAMPLE 1 ###
reg = makeExperimentRegistry(id = "example1", file.dir = tempfile())
# Define a problem:
# Subsampling from the iris dataset.
data(iris)
subsample = function(static, ratio) {
n = nrow(static)
train = sample(n, floor(n * ratio))
test = setdiff(seq(n), train)
list(test = test, train = train)
}
addProblem(reg, id = "iris", static = iris,
dynamic = subsample, seed = 123)
# Define algorithm "tree":
# Decision tree on the iris dataset, modeling Species.
tree.wrapper = function(static, dynamic, ...) {
library(rpart)
mod = rpart(Species ~ ., data = static[dynamic$train, ], ...)
pred = predict(mod, newdata = static[dynamic$test, ], type = "class")
table(static$Species[dynamic$test], pred)
}
addAlgorithm(reg, id = "tree", fun = tree.wrapper)
# Define algorithm "forest":
# Random forest on the iris dataset, modeling Species.
forest.wrapper = function(static, dynamic, ...) {
library(randomForest)
mod = randomForest(Species ~ ., data = static, subset = dynamic$train, ...)
pred = predict(mod, newdata = static[dynamic$test, ])
table(static$Species[dynamic$test], pred)
}
addAlgorithm(reg, id = "forest", fun = forest.wrapper)
# Define problem parameters:
pars = list(ratio = c(0.67, 0.9))
iris.design = makeDesign("iris", exhaustive = pars)
# Define decision tree parameters:
pars = list(minsplit = c(10, 20), cp = c(0.01, 0.1))
tree.design = makeDesign("tree", exhaustive = pars)
# Define random forest parameters:
pars = list(ntree = c(100, 500))
forest.design = makeDesign("forest", exhaustive = pars)
# Add experiments to the registry:
# Use previously defined experimental designs.
addExperiments(reg, prob.designs = iris.design,
algo.designs = list(tree.design, forest.design),
repls = 2) # usually you would set repls to 100 or more.
# Optional: Short summary over problems and algorithms.
summarizeExperiments(reg)
# Optional: Test one decision tree job and one expensive (ntree = 1000)
# random forest job. Use findExperiments to get the right job ids.
do.tests = FALSE
if (do.tests) {
id1 = findExperiments(reg, algo.pattern = "tree")[1]
id2 = findExperiments(reg, algo.pattern = "forest",
algo.pars = (ntree == 1000))[1]
testJob(reg, id1)
testJob(reg, id2)
}
# Submit the jobs to the batch system
submitJobs(reg)
# Calculate the misclassification rate for all (already done) jobs.
reduce = function(job, res) {
n = sum(res)
list(mcr = (n-sum(diag(res)))/n)
}
res = reduceResultsExperiments(reg, fun = reduce)
print(res)
# Aggregate results using 'ddply' from package 'plyr':
# Calculate the mean over all replications of identical experiments
# (same problem, same algorithm and same parameters)
library(plyr)
vars = setdiff(names(res), c("repl", "mcr"))
aggr = ddply(res, vars, summarise, mean.mcr = mean(mcr))
print(aggr)
## Not run:
### EXAMPLE 2 ###
# define two simple test functions
testfun1 = function(x) sum(x^2)
testfun2 = function(x) -exp(-sum(abs(x)))
# Define ExperimentRegistry:
reg = makeExperimentRegistry("example02", seed = 123, file.dir = tempfile())
# Add the testfunctions to the registry:
addProblem(reg, "testfun1", static = testfun1)
addProblem(reg, "testfun2", static = testfun2)
# Use SimulatedAnnealing on the test functions:
addAlgorithm(reg, "sann", fun = function(static, dynamic) {
upp = rep(10, 2)
low = -upp
start = sample(c(-10, 10), 2)
res = optim(start, fn = static, lower = low, upper = upp, method = "SANN")
res = res[c("par", "value", "counts", "convergence")]
res$start = start
return(res)
})
# add experiments and submit
addExperiments(reg, repls = 10)
submitJobs(reg)
# Gather informations from the experiments, in this case function value
# and whether the algorithm convergenced:
reduceResultsExperiments(reg, fun = function(job, res) res[c("value", "convergence")])
## End(Not run)
Results
R version 3.3.1 (2016-06-21) -- "Bug in Your Hair"
Copyright (C) 2016 The R Foundation for Statistical Computing
Platform: x86_64-pc-linux-gnu (64-bit)
R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.
R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.
Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.
> library(BatchExperiments)
Loading required package: BatchJobs
Loading required package: BBmisc
Sourcing configuration file: '/home/ddbj/local/lib64/R/library/BatchJobs/etc/BatchJobs_global_config.R'
BatchJobs configuration:
cluster functions: Interactive
mail.from:
mail.to:
mail.start: none
mail.done: none
mail.error: none
default.resources:
debug: FALSE
raise.warnings: FALSE
staged.queries: TRUE
max.concurrent.jobs: Inf
fs.timeout: NA
> png(filename="/home/ddbj/snapshot/RGM3/R_CC/result/BatchExperiments/addExperiments.Rd_%03d_medium.png", width=480, height=480)
> ### Name: addExperiments
> ### Title: Add experiemts to the registry.
> ### Aliases: Experiment addExperiments
>
> ### ** Examples
>
> ### EXAMPLE 1 ###
> reg = makeExperimentRegistry(id = "example1", file.dir = tempfile())
Creating dir: /tmp/RtmpxMJ7rZ/file3996117c07e4
Saving registry: /tmp/RtmpxMJ7rZ/file3996117c07e4/registry.RData
>
> # Define a problem:
> # Subsampling from the iris dataset.
> data(iris)
> subsample = function(static, ratio) {
+ n = nrow(static)
+ train = sample(n, floor(n * ratio))
+ test = setdiff(seq(n), train)
+ list(test = test, train = train)
+ }
> addProblem(reg, id = "iris", static = iris,
+ dynamic = subsample, seed = 123)
Writing problem files: /tmp/RtmpxMJ7rZ/file3996117c07e4/problems/iris_static.RData, /tmp/RtmpxMJ7rZ/file3996117c07e4/problems/iris_dynamic.RData
>
> # Define algorithm "tree":
> # Decision tree on the iris dataset, modeling Species.
> tree.wrapper = function(static, dynamic, ...) {
+ library(rpart)
+ mod = rpart(Species ~ ., data = static[dynamic$train, ], ...)
+ pred = predict(mod, newdata = static[dynamic$test, ], type = "class")
+ table(static$Species[dynamic$test], pred)
+ }
> addAlgorithm(reg, id = "tree", fun = tree.wrapper)
Writing algorithm file: /tmp/RtmpxMJ7rZ/file3996117c07e4/algorithms/tree.RData
>
> # Define algorithm "forest":
> # Random forest on the iris dataset, modeling Species.
> forest.wrapper = function(static, dynamic, ...) {
+ library(randomForest)
+ mod = randomForest(Species ~ ., data = static, subset = dynamic$train, ...)
+ pred = predict(mod, newdata = static[dynamic$test, ])
+ table(static$Species[dynamic$test], pred)
+ }
> addAlgorithm(reg, id = "forest", fun = forest.wrapper)
Writing algorithm file: /tmp/RtmpxMJ7rZ/file3996117c07e4/algorithms/forest.RData
>
> # Define problem parameters:
> pars = list(ratio = c(0.67, 0.9))
> iris.design = makeDesign("iris", exhaustive = pars)
>
> # Define decision tree parameters:
> pars = list(minsplit = c(10, 20), cp = c(0.01, 0.1))
> tree.design = makeDesign("tree", exhaustive = pars)
>
> # Define random forest parameters:
> pars = list(ntree = c(100, 500))
> forest.design = makeDesign("forest", exhaustive = pars)
>
> # Add experiments to the registry:
> # Use previously defined experimental designs.
> addExperiments(reg, prob.designs = iris.design,
+ algo.designs = list(tree.design, forest.design),
+ repls = 2) # usually you would set repls to 100 or more.
Adding 12 experiments / 24 jobs to DB.
>
> # Optional: Short summary over problems and algorithms.
> summarizeExperiments(reg)
prob algo .count
1 iris forest 8
2 iris tree 16
>
> # Optional: Test one decision tree job and one expensive (ntree = 1000)
> # random forest job. Use findExperiments to get the right job ids.
> do.tests = FALSE
> if (do.tests) {
+ id1 = findExperiments(reg, algo.pattern = "tree")[1]
+ id2 = findExperiments(reg, algo.pattern = "forest",
+ algo.pars = (ntree == 1000))[1]
+ testJob(reg, id1)
+ testJob(reg, id2)
+ }
>
> # Submit the jobs to the batch system
> submitJobs(reg)
Saving conf: /tmp/RtmpxMJ7rZ/file3996117c07e4/conf.RData
Submitting 24 chunks / 24 jobs.
Cluster functions: Interactive.
Auto-mailer settings: start=none, done=none, error=none.
Writing 24 R scripts...
SubmitJobs |+ | 0% (00:00:00) SubmitJobs |+ | 0% (00:00:00) SubmitJobs |++ | 4% (00:00:00) SubmitJobs |++++ | 8% (00:00:00) SubmitJobs |++++++ | 12% (00:00:07) SubmitJobs |++++++++ | 17% (00:00:05) SubmitJobs |++++++++++ | 21% (00:00:03) SubmitJobs |++++++++++++ | 25% (00:00:03) SubmitJobs |++++++++++++++ | 29% (00:00:02) SubmitJobs |++++++++++++++++ | 33% (00:00:02) SubmitJobs |++++++++++++++++++ | 38% (00:00:01) SubmitJobs |++++++++++++++++++++ | 42% (00:00:01) SubmitJobs |++++++++++++++++++++++ | 46% (00:00:02) SubmitJobs |++++++++++++++++++++++++ | 50% (00:00:02) SubmitJobs |+++++++++++++++++++++++++++ | 54% (00:00:01) SubmitJobs |+++++++++++++++++++++++++++++ | 58% (00:00:01) SubmitJobs |+++++++++++++++++++++++++++++++ | 62% (00:00:01) SubmitJobs |+++++++++++++++++++++++++++++++++ | 67% (00:00:01) SubmitJobs |+++++++++++++++++++++++++++++++++++ | 71% (00:00:01) SubmitJobs |+++++++++++++++++++++++++++++++++++++ | 75% (00:00:01) SubmitJobs |+++++++++++++++++++++++++++++++++++++++ | 79% (00:00:00) SubmitJobs |+++++++++++++++++++++++++++++++++++++++++ | 83% (00:00:00) SubmitJobs |+++++++++++++++++++++++++++++++++++++++++++ | 88% (00:00:00) SubmitJobs |+++++++++++++++++++++++++++++++++++++++++++++ | 92% (00:00:00) SubmitJobs |+++++++++++++++++++++++++++++++++++++++++++++++ | 96% (00:00:00) SubmitJobs |+++++++++++++++++++++++++++++++++++++++++++++++++| 100% (00:00:00)
Sending 24 submit messages...
Might take some time, do not interrupt this!
>
> # Calculate the misclassification rate for all (already done) jobs.
> reduce = function(job, res) {
+ n = sum(res)
+ list(mcr = (n-sum(diag(res)))/n)
+ }
> res = reduceResultsExperiments(reg, fun = reduce)
Syncing registry ...
Reducing 24 results...
reduceResultsExperiments |+ | 0% (00:00:00) reduceResultsExperiments |+++++++++++++++++++++++++++++++++++| 100% (00:00:00)
> print(res)
id prob ratio algo cp minsplit repl mcr ntree
1 1 iris 0.67 tree 0.01 10 1 0.08000000 NA
2 2 iris 0.67 tree 0.01 10 2 0.06000000 NA
3 3 iris 0.67 tree 0.01 20 1 0.08000000 NA
4 4 iris 0.67 tree 0.01 20 2 0.06000000 NA
5 5 iris 0.67 tree 0.10 10 1 0.08000000 NA
6 6 iris 0.67 tree 0.10 10 2 0.06000000 NA
7 7 iris 0.67 tree 0.10 20 1 0.08000000 NA
8 8 iris 0.67 tree 0.10 20 2 0.06000000 NA
9 9 iris 0.67 forest NA NA 1 0.06000000 100
10 10 iris 0.67 forest NA NA 2 0.06000000 100
11 11 iris 0.67 forest NA NA 1 0.06000000 500
12 12 iris 0.67 forest NA NA 2 0.06000000 500
13 13 iris 0.90 tree 0.01 10 1 0.00000000 NA
14 14 iris 0.90 tree 0.01 10 2 0.06666667 NA
15 15 iris 0.90 tree 0.01 20 1 0.00000000 NA
16 16 iris 0.90 tree 0.01 20 2 0.06666667 NA
17 17 iris 0.90 tree 0.10 10 1 0.00000000 NA
18 18 iris 0.90 tree 0.10 10 2 0.00000000 NA
19 19 iris 0.90 tree 0.10 20 1 0.00000000 NA
20 20 iris 0.90 tree 0.10 20 2 0.00000000 NA
21 21 iris 0.90 forest NA NA 1 0.00000000 100
22 22 iris 0.90 forest NA NA 2 0.06666667 100
23 23 iris 0.90 forest NA NA 1 0.00000000 500
24 24 iris 0.90 forest NA NA 2 0.06666667 500
>
> # Aggregate results using 'ddply' from package 'plyr':
> # Calculate the mean over all replications of identical experiments
> # (same problem, same algorithm and same parameters)
> library(plyr)
> vars = setdiff(names(res), c("repl", "mcr"))
> aggr = ddply(res, vars, summarise, mean.mcr = mean(mcr))
> print(aggr)
id prob ratio algo cp minsplit ntree mean.mcr
1 1 iris 0.67 tree 0.01 10 NA 0.08000000
2 2 iris 0.67 tree 0.01 10 NA 0.06000000
3 3 iris 0.67 tree 0.01 20 NA 0.08000000
4 4 iris 0.67 tree 0.01 20 NA 0.06000000
5 5 iris 0.67 tree 0.10 10 NA 0.08000000
6 6 iris 0.67 tree 0.10 10 NA 0.06000000
7 7 iris 0.67 tree 0.10 20 NA 0.08000000
8 8 iris 0.67 tree 0.10 20 NA 0.06000000
9 9 iris 0.67 forest NA NA 100 0.06000000
10 10 iris 0.67 forest NA NA 100 0.06000000
11 11 iris 0.67 forest NA NA 500 0.06000000
12 12 iris 0.67 forest NA NA 500 0.06000000
13 13 iris 0.90 tree 0.01 10 NA 0.00000000
14 14 iris 0.90 tree 0.01 10 NA 0.06666667
15 15 iris 0.90 tree 0.01 20 NA 0.00000000
16 16 iris 0.90 tree 0.01 20 NA 0.06666667
17 17 iris 0.90 tree 0.10 10 NA 0.00000000
18 18 iris 0.90 tree 0.10 10 NA 0.00000000
19 19 iris 0.90 tree 0.10 20 NA 0.00000000
20 20 iris 0.90 tree 0.10 20 NA 0.00000000
21 21 iris 0.90 forest NA NA 100 0.00000000
22 22 iris 0.90 forest NA NA 100 0.06666667
23 23 iris 0.90 forest NA NA 500 0.00000000
24 24 iris 0.90 forest NA NA 500 0.06666667
>
> ## Not run:
> ##D ### EXAMPLE 2 ###
> ##D # define two simple test functions
> ##D testfun1 = function(x) sum(x^2)
> ##D testfun2 = function(x) -exp(-sum(abs(x)))
> ##D
> ##D # Define ExperimentRegistry:
> ##D reg = makeExperimentRegistry("example02", seed = 123, file.dir = tempfile())
> ##D
> ##D # Add the testfunctions to the registry:
> ##D addProblem(reg, "testfun1", static = testfun1)
> ##D addProblem(reg, "testfun2", static = testfun2)
> ##D
> ##D # Use SimulatedAnnealing on the test functions:
> ##D addAlgorithm(reg, "sann", fun = function(static, dynamic) {
> ##D upp = rep(10, 2)
> ##D low = -upp
> ##D start = sample(c(-10, 10), 2)
> ##D res = optim(start, fn = static, lower = low, upper = upp, method = "SANN")
> ##D res = res[c("par", "value", "counts", "convergence")]
> ##D res$start = start
> ##D return(res)
> ##D })
> ##D
> ##D # add experiments and submit
> ##D addExperiments(reg, repls = 10)
> ##D submitJobs(reg)
> ##D
> ##D # Gather informations from the experiments, in this case function value
> ##D # and whether the algorithm convergenced:
> ##D reduceResultsExperiments(reg, fun = function(job, res) res[c("value", "convergence")])
> ## End(Not run)
>
>
>
>
>
> dev.off()
null device
1
>