Last data update: 2014.03.03

R: Add experiemts to the registry.
addExperimentsR Documentation

Add experiemts to the registry.

Description

Add experiments for running algorithms on problems to the registry, so they can be executed later.

Usage

addExperiments(reg, prob.designs, algo.designs, repls = 1L,
  skip.defined = FALSE)

Arguments

reg

[ExperimentRegistry]
Registry.

prob.designs

[character | Design | list of Design]
Either problem ids, a single problem design or a list of problem designs, the latter two created by makeDesign. If missing, all problems are selected (without associating a design), and this is the default.

algo.designs

[character | Design | list of Design]
Either algorithm ids, a single algorithm design or a list of algorithm designs, the latter two created by makeDesign. If missing, all algorithms are selected (without associating a design), and this is the default.

repls

[integer(1)]
Number of replications.
Default is 1.

skip.defined

[logical]
If set to TRUE, already defined experiments get skipped. Otherwise an error is thrown.
Default is FALSE.

Value

Invisibly returns vector of ids of added experiments.

See Also

Other add: Algorithm, addAlgorithm; Problem, addProblem

Examples

### EXAMPLE 1 ###
reg = makeExperimentRegistry(id = "example1", file.dir = tempfile())

# Define a problem:
# Subsampling from the iris dataset.
data(iris)
subsample = function(static, ratio) {
  n = nrow(static)
  train = sample(n, floor(n * ratio))
  test = setdiff(seq(n), train)
  list(test = test, train = train)
}
addProblem(reg, id = "iris", static = iris,
           dynamic = subsample, seed = 123)

# Define algorithm "tree":
# Decision tree on the iris dataset, modeling Species.
tree.wrapper = function(static, dynamic, ...) {
  library(rpart)
  mod = rpart(Species ~ ., data = static[dynamic$train, ], ...)
  pred = predict(mod, newdata = static[dynamic$test, ], type = "class")
  table(static$Species[dynamic$test], pred)
}
addAlgorithm(reg, id = "tree", fun = tree.wrapper)

# Define algorithm "forest":
# Random forest on the iris dataset, modeling Species.
forest.wrapper = function(static, dynamic, ...) {
  library(randomForest)
  mod = randomForest(Species ~ ., data = static, subset = dynamic$train, ...)
  pred = predict(mod, newdata = static[dynamic$test, ])
  table(static$Species[dynamic$test], pred)
}
addAlgorithm(reg, id = "forest", fun = forest.wrapper)

# Define problem parameters:
pars = list(ratio = c(0.67, 0.9))
iris.design = makeDesign("iris", exhaustive = pars)

# Define decision tree parameters:
pars = list(minsplit = c(10, 20), cp = c(0.01, 0.1))
tree.design = makeDesign("tree", exhaustive = pars)

# Define random forest parameters:
pars = list(ntree = c(100, 500))
forest.design = makeDesign("forest", exhaustive = pars)

# Add experiments to the registry:
# Use  previously defined experimental designs.
addExperiments(reg, prob.designs = iris.design,
               algo.designs = list(tree.design, forest.design),
               repls = 2) # usually you would set repls to 100 or more.

# Optional: Short summary over problems and algorithms.
summarizeExperiments(reg)

# Optional: Test one decision tree job and one expensive (ntree = 1000)
# random forest job. Use findExperiments to get the right job ids.
do.tests = FALSE
if (do.tests) {
  id1 = findExperiments(reg, algo.pattern = "tree")[1]
  id2 = findExperiments(reg, algo.pattern = "forest",
                         algo.pars = (ntree == 1000))[1]
  testJob(reg, id1)
  testJob(reg, id2)
}

# Submit the jobs to the batch system
submitJobs(reg)

# Calculate the misclassification rate for all (already done) jobs.
reduce = function(job, res) {
  n = sum(res)
  list(mcr = (n-sum(diag(res)))/n)
}
res = reduceResultsExperiments(reg, fun = reduce)
print(res)

# Aggregate results using 'ddply' from package 'plyr':
# Calculate the mean over all replications of identical experiments
# (same problem, same algorithm and same parameters)
library(plyr)
vars = setdiff(names(res), c("repl", "mcr"))
aggr = ddply(res, vars, summarise, mean.mcr = mean(mcr))
print(aggr)

## Not run: 
### EXAMPLE 2 ###
# define two simple test functions
testfun1 = function(x) sum(x^2)
testfun2 = function(x) -exp(-sum(abs(x)))

# Define ExperimentRegistry:
reg = makeExperimentRegistry("example02", seed = 123, file.dir = tempfile())

# Add the testfunctions to the registry:
addProblem(reg, "testfun1", static = testfun1)
addProblem(reg, "testfun2", static = testfun2)

# Use SimulatedAnnealing on the test functions:
addAlgorithm(reg, "sann", fun = function(static, dynamic) {
  upp = rep(10, 2)
  low = -upp
  start = sample(c(-10, 10), 2)
  res = optim(start, fn = static, lower = low, upper = upp, method = "SANN")
  res = res[c("par", "value", "counts", "convergence")]
  res$start = start
  return(res)
})

# add experiments and submit
addExperiments(reg, repls = 10)
submitJobs(reg)

# Gather informations from the experiments, in this case function value
# and whether the algorithm convergenced:
reduceResultsExperiments(reg, fun = function(job, res) res[c("value", "convergence")])

## End(Not run)

Results


R version 3.3.1 (2016-06-21) -- "Bug in Your Hair"
Copyright (C) 2016 The R Foundation for Statistical Computing
Platform: x86_64-pc-linux-gnu (64-bit)

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.

> library(BatchExperiments)
Loading required package: BatchJobs
Loading required package: BBmisc
Sourcing configuration file: '/home/ddbj/local/lib64/R/library/BatchJobs/etc/BatchJobs_global_config.R'
BatchJobs configuration:
  cluster functions: Interactive
  mail.from: 
  mail.to: 
  mail.start: none
  mail.done: none
  mail.error: none
  default.resources: 
  debug: FALSE
  raise.warnings: FALSE
  staged.queries: TRUE
  max.concurrent.jobs: Inf
  fs.timeout: NA

> png(filename="/home/ddbj/snapshot/RGM3/R_CC/result/BatchExperiments/addExperiments.Rd_%03d_medium.png", width=480, height=480)
> ### Name: addExperiments
> ### Title: Add experiemts to the registry.
> ### Aliases: Experiment addExperiments
> 
> ### ** Examples
> 
> ### EXAMPLE 1 ###
> reg = makeExperimentRegistry(id = "example1", file.dir = tempfile())
Creating dir: /tmp/RtmpxMJ7rZ/file3996117c07e4
Saving registry: /tmp/RtmpxMJ7rZ/file3996117c07e4/registry.RData
> 
> # Define a problem:
> # Subsampling from the iris dataset.
> data(iris)
> subsample = function(static, ratio) {
+   n = nrow(static)
+   train = sample(n, floor(n * ratio))
+   test = setdiff(seq(n), train)
+   list(test = test, train = train)
+ }
> addProblem(reg, id = "iris", static = iris,
+            dynamic = subsample, seed = 123)
Writing problem files: /tmp/RtmpxMJ7rZ/file3996117c07e4/problems/iris_static.RData, /tmp/RtmpxMJ7rZ/file3996117c07e4/problems/iris_dynamic.RData
> 
> # Define algorithm "tree":
> # Decision tree on the iris dataset, modeling Species.
> tree.wrapper = function(static, dynamic, ...) {
+   library(rpart)
+   mod = rpart(Species ~ ., data = static[dynamic$train, ], ...)
+   pred = predict(mod, newdata = static[dynamic$test, ], type = "class")
+   table(static$Species[dynamic$test], pred)
+ }
> addAlgorithm(reg, id = "tree", fun = tree.wrapper)
Writing algorithm file: /tmp/RtmpxMJ7rZ/file3996117c07e4/algorithms/tree.RData
> 
> # Define algorithm "forest":
> # Random forest on the iris dataset, modeling Species.
> forest.wrapper = function(static, dynamic, ...) {
+   library(randomForest)
+   mod = randomForest(Species ~ ., data = static, subset = dynamic$train, ...)
+   pred = predict(mod, newdata = static[dynamic$test, ])
+   table(static$Species[dynamic$test], pred)
+ }
> addAlgorithm(reg, id = "forest", fun = forest.wrapper)
Writing algorithm file: /tmp/RtmpxMJ7rZ/file3996117c07e4/algorithms/forest.RData
> 
> # Define problem parameters:
> pars = list(ratio = c(0.67, 0.9))
> iris.design = makeDesign("iris", exhaustive = pars)
> 
> # Define decision tree parameters:
> pars = list(minsplit = c(10, 20), cp = c(0.01, 0.1))
> tree.design = makeDesign("tree", exhaustive = pars)
> 
> # Define random forest parameters:
> pars = list(ntree = c(100, 500))
> forest.design = makeDesign("forest", exhaustive = pars)
> 
> # Add experiments to the registry:
> # Use  previously defined experimental designs.
> addExperiments(reg, prob.designs = iris.design,
+                algo.designs = list(tree.design, forest.design),
+                repls = 2) # usually you would set repls to 100 or more.
Adding 12 experiments / 24 jobs to DB.
> 
> # Optional: Short summary over problems and algorithms.
> summarizeExperiments(reg)
  prob   algo .count
1 iris forest      8
2 iris   tree     16
> 
> # Optional: Test one decision tree job and one expensive (ntree = 1000)
> # random forest job. Use findExperiments to get the right job ids.
> do.tests = FALSE
> if (do.tests) {
+   id1 = findExperiments(reg, algo.pattern = "tree")[1]
+   id2 = findExperiments(reg, algo.pattern = "forest",
+                          algo.pars = (ntree == 1000))[1]
+   testJob(reg, id1)
+   testJob(reg, id2)
+ }
> 
> # Submit the jobs to the batch system
> submitJobs(reg)
Saving conf: /tmp/RtmpxMJ7rZ/file3996117c07e4/conf.RData
Submitting 24 chunks / 24 jobs.
Cluster functions: Interactive.
Auto-mailer settings: start=none, done=none, error=none.
Writing 24 R scripts...
 SubmitJobs |+                                                |   0% (00:00:00) SubmitJobs |+                                                |   0% (00:00:00) SubmitJobs |++                                               |   4% (00:00:00) SubmitJobs |++++                                             |   8% (00:00:00) SubmitJobs |++++++                                           |  12% (00:00:07) SubmitJobs |++++++++                                         |  17% (00:00:05) SubmitJobs |++++++++++                                       |  21% (00:00:03) SubmitJobs |++++++++++++                                     |  25% (00:00:03) SubmitJobs |++++++++++++++                                   |  29% (00:00:02) SubmitJobs |++++++++++++++++                                 |  33% (00:00:02) SubmitJobs |++++++++++++++++++                               |  38% (00:00:01) SubmitJobs |++++++++++++++++++++                             |  42% (00:00:01) SubmitJobs |++++++++++++++++++++++                           |  46% (00:00:02) SubmitJobs |++++++++++++++++++++++++                         |  50% (00:00:02) SubmitJobs |+++++++++++++++++++++++++++                      |  54% (00:00:01) SubmitJobs |+++++++++++++++++++++++++++++                    |  58% (00:00:01) SubmitJobs |+++++++++++++++++++++++++++++++                  |  62% (00:00:01) SubmitJobs |+++++++++++++++++++++++++++++++++                |  67% (00:00:01) SubmitJobs |+++++++++++++++++++++++++++++++++++              |  71% (00:00:01) SubmitJobs |+++++++++++++++++++++++++++++++++++++            |  75% (00:00:01) SubmitJobs |+++++++++++++++++++++++++++++++++++++++          |  79% (00:00:00) SubmitJobs |+++++++++++++++++++++++++++++++++++++++++        |  83% (00:00:00) SubmitJobs |+++++++++++++++++++++++++++++++++++++++++++      |  88% (00:00:00) SubmitJobs |+++++++++++++++++++++++++++++++++++++++++++++    |  92% (00:00:00) SubmitJobs |+++++++++++++++++++++++++++++++++++++++++++++++  |  96% (00:00:00) SubmitJobs |+++++++++++++++++++++++++++++++++++++++++++++++++| 100% (00:00:00)
Sending 24 submit messages...
Might take some time, do not interrupt this!
> 
> # Calculate the misclassification rate for all (already done) jobs.
> reduce = function(job, res) {
+   n = sum(res)
+   list(mcr = (n-sum(diag(res)))/n)
+ }
> res = reduceResultsExperiments(reg, fun = reduce)
Syncing registry ...
Reducing 24 results...
 reduceResultsExperiments |+                                  |   0% (00:00:00) reduceResultsExperiments |+++++++++++++++++++++++++++++++++++| 100% (00:00:00)
> print(res)
   id prob ratio   algo   cp minsplit repl        mcr ntree
1   1 iris  0.67   tree 0.01       10    1 0.08000000    NA
2   2 iris  0.67   tree 0.01       10    2 0.06000000    NA
3   3 iris  0.67   tree 0.01       20    1 0.08000000    NA
4   4 iris  0.67   tree 0.01       20    2 0.06000000    NA
5   5 iris  0.67   tree 0.10       10    1 0.08000000    NA
6   6 iris  0.67   tree 0.10       10    2 0.06000000    NA
7   7 iris  0.67   tree 0.10       20    1 0.08000000    NA
8   8 iris  0.67   tree 0.10       20    2 0.06000000    NA
9   9 iris  0.67 forest   NA       NA    1 0.06000000   100
10 10 iris  0.67 forest   NA       NA    2 0.06000000   100
11 11 iris  0.67 forest   NA       NA    1 0.06000000   500
12 12 iris  0.67 forest   NA       NA    2 0.06000000   500
13 13 iris  0.90   tree 0.01       10    1 0.00000000    NA
14 14 iris  0.90   tree 0.01       10    2 0.06666667    NA
15 15 iris  0.90   tree 0.01       20    1 0.00000000    NA
16 16 iris  0.90   tree 0.01       20    2 0.06666667    NA
17 17 iris  0.90   tree 0.10       10    1 0.00000000    NA
18 18 iris  0.90   tree 0.10       10    2 0.00000000    NA
19 19 iris  0.90   tree 0.10       20    1 0.00000000    NA
20 20 iris  0.90   tree 0.10       20    2 0.00000000    NA
21 21 iris  0.90 forest   NA       NA    1 0.00000000   100
22 22 iris  0.90 forest   NA       NA    2 0.06666667   100
23 23 iris  0.90 forest   NA       NA    1 0.00000000   500
24 24 iris  0.90 forest   NA       NA    2 0.06666667   500
> 
> # Aggregate results using 'ddply' from package 'plyr':
> # Calculate the mean over all replications of identical experiments
> # (same problem, same algorithm and same parameters)
> library(plyr)
> vars = setdiff(names(res), c("repl", "mcr"))
> aggr = ddply(res, vars, summarise, mean.mcr = mean(mcr))
> print(aggr)
   id prob ratio   algo   cp minsplit ntree   mean.mcr
1   1 iris  0.67   tree 0.01       10    NA 0.08000000
2   2 iris  0.67   tree 0.01       10    NA 0.06000000
3   3 iris  0.67   tree 0.01       20    NA 0.08000000
4   4 iris  0.67   tree 0.01       20    NA 0.06000000
5   5 iris  0.67   tree 0.10       10    NA 0.08000000
6   6 iris  0.67   tree 0.10       10    NA 0.06000000
7   7 iris  0.67   tree 0.10       20    NA 0.08000000
8   8 iris  0.67   tree 0.10       20    NA 0.06000000
9   9 iris  0.67 forest   NA       NA   100 0.06000000
10 10 iris  0.67 forest   NA       NA   100 0.06000000
11 11 iris  0.67 forest   NA       NA   500 0.06000000
12 12 iris  0.67 forest   NA       NA   500 0.06000000
13 13 iris  0.90   tree 0.01       10    NA 0.00000000
14 14 iris  0.90   tree 0.01       10    NA 0.06666667
15 15 iris  0.90   tree 0.01       20    NA 0.00000000
16 16 iris  0.90   tree 0.01       20    NA 0.06666667
17 17 iris  0.90   tree 0.10       10    NA 0.00000000
18 18 iris  0.90   tree 0.10       10    NA 0.00000000
19 19 iris  0.90   tree 0.10       20    NA 0.00000000
20 20 iris  0.90   tree 0.10       20    NA 0.00000000
21 21 iris  0.90 forest   NA       NA   100 0.00000000
22 22 iris  0.90 forest   NA       NA   100 0.06666667
23 23 iris  0.90 forest   NA       NA   500 0.00000000
24 24 iris  0.90 forest   NA       NA   500 0.06666667
> 
> ## Not run: 
> ##D ### EXAMPLE 2 ###
> ##D # define two simple test functions
> ##D testfun1 = function(x) sum(x^2)
> ##D testfun2 = function(x) -exp(-sum(abs(x)))
> ##D 
> ##D # Define ExperimentRegistry:
> ##D reg = makeExperimentRegistry("example02", seed = 123, file.dir = tempfile())
> ##D 
> ##D # Add the testfunctions to the registry:
> ##D addProblem(reg, "testfun1", static = testfun1)
> ##D addProblem(reg, "testfun2", static = testfun2)
> ##D 
> ##D # Use SimulatedAnnealing on the test functions:
> ##D addAlgorithm(reg, "sann", fun = function(static, dynamic) {
> ##D   upp = rep(10, 2)
> ##D   low = -upp
> ##D   start = sample(c(-10, 10), 2)
> ##D   res = optim(start, fn = static, lower = low, upper = upp, method = "SANN")
> ##D   res = res[c("par", "value", "counts", "convergence")]
> ##D   res$start = start
> ##D   return(res)
> ##D })
> ##D 
> ##D # add experiments and submit
> ##D addExperiments(reg, repls = 10)
> ##D submitJobs(reg)
> ##D 
> ##D # Gather informations from the experiments, in this case function value
> ##D # and whether the algorithm convergenced:
> ##D reduceResultsExperiments(reg, fun = function(job, res) res[c("value", "convergence")])
> ## End(Not run)
> 
> 
> 
> 
> 
> dev.off()
null device 
          1 
>