The goal of this vignette is explain how to use
ResamplingSameOtherSizesCV
for various kinds of cross-validation.
We begin with a simple simulated data set.
N <- 2100
abs.x <- 70
set.seed(2)
x.vec <- runif(N, -abs.x, abs.x)
str(x.vec)
#> num [1:2100] -44.1 28.3 10.3 -46.5 62.1 ...
library(data.table)
(task.dt <- data.table(
x=x.vec,
y = sin(x.vec)+rnorm(N,sd=0.5)))
#> x y
#> <num> <num>
#> 1: -44.11648 -0.40781530
#> 2: 28.33237 -0.08520601
#> 3: 10.26569 -1.23266284
#> 4: -46.47273 -1.36225125
#> 5: 62.13751 -1.33779346
#> ---
#> 2096: 60.83765 -0.10678010
#> 2097: 55.71469 -0.92403513
#> 2098: 14.31045 1.04519820
#> 2099: 27.18008 1.67815828
#> 2100: 23.67202 -0.26881102
if(require(ggplot2)){
ggplot()+
geom_point(aes(
x, y),
shape=1,
data=task.dt)+
coord_equal()
}
#> Loading required package: ggplot2
Above we see a scatterplot of the simulated data. The goal of the learning algorithm will be to predict y from x.
The code below assigns three test groups to the randomly simulated data.
atomic.group.size <- 2
task.dt[, agroup := rep(seq(1, N/atomic.group.size), each=atomic.group.size)][]
#> x y agroup
#> <num> <num> <int>
#> 1: -44.11648 -0.40781530 1
#> 2: 28.33237 -0.08520601 1
#> 3: 10.26569 -1.23266284 2
#> 4: -46.47273 -1.36225125 2
#> 5: 62.13751 -1.33779346 3
#> ---
#> 2096: 60.83765 -0.10678010 1048
#> 2097: 55.71469 -0.92403513 1049
#> 2098: 14.31045 1.04519820 1049
#> 2099: 27.18008 1.67815828 1050
#> 2100: 23.67202 -0.26881102 1050
task.dt[, random_group := rep(
rep(c("A","B","B","C","C","C","C"), each=atomic.group.size),
l=.N
)][]
#> x y agroup random_group
#> <num> <num> <int> <char>
#> 1: -44.11648 -0.40781530 1 A
#> 2: 28.33237 -0.08520601 1 A
#> 3: 10.26569 -1.23266284 2 B
#> 4: -46.47273 -1.36225125 2 B
#> 5: 62.13751 -1.33779346 3 B
#> ---
#> 2096: 60.83765 -0.10678010 1048 C
#> 2097: 55.71469 -0.92403513 1049 C
#> 2098: 14.31045 1.04519820 1049 C
#> 2099: 27.18008 1.67815828 1050 C
#> 2100: 23.67202 -0.26881102 1050 C
table(group.tab <- task.dt$random_group)
#>
#> A B C
#> 300 600 1200
The output above shows the number of rows in each random group. Below we define a task using that group,
reg.task <- mlr3::TaskRegr$new(
"sin", task.dt, target="y")
reg.task$col_roles$subset <- "random_group"
reg.task$col_roles$group <- "agroup"
reg.task$col_roles$stratum <- "random_group"
reg.task$col_roles$feature <- "x"
str(reg.task$col_roles)
#> List of 8
#> $ feature: chr "x"
#> $ target : chr "y"
#> $ name : chr(0)
#> $ order : chr(0)
#> $ stratum: chr "random_group"
#> $ group : chr "agroup"
#> $ weight : chr(0)
#> $ subset : chr "random_group"
Below we define the cross-validation object, and instantiate it, in
order to show details about how it works (but normally you should not
instantiate it yourself, as this will be done automatically inside the
call to mlr3::benchmark
).
same_other_sizes_cv <- mlr3resampling::ResamplingSameOtherSizesCV$new()
same_other_sizes_cv$instantiate(reg.task)
same_other_sizes_cv$instance$iteration.dt
#> test.subset train.subsets groups test.fold test
#> <char> <char> <int> <int> <list>
#> 1: A all 700 1 43,44,57,58,71,72,...
#> 2: B all 700 1 3, 4, 5, 6,17,18,...
#> 3: C all 700 1 23,24,25,26,37,38,...
#> 4: A all 700 2 1, 2,15,16,29,30,...
#> 5: B all 700 2 33,34,47,48,61,62,...
#> 6: C all 700 2 13,14,21,22,35,36,...
#> 7: A all 700 3 99,100,155,156,169,170,...
#> 8: B all 700 3 19,20,45,46,75,76,...
#> 9: C all 700 3 7, 8, 9,10,11,12,...
#> 10: A other 600 1 43,44,57,58,71,72,...
#> 11: B other 500 1 3, 4, 5, 6,17,18,...
#> 12: C other 300 1 23,24,25,26,37,38,...
#> 13: A other 600 2 1, 2,15,16,29,30,...
#> 14: B other 500 2 33,34,47,48,61,62,...
#> 15: C other 300 2 13,14,21,22,35,36,...
#> 16: A other 600 3 99,100,155,156,169,170,...
#> 17: B other 500 3 19,20,45,46,75,76,...
#> 18: C other 300 3 7, 8, 9,10,11,12,...
#> 19: A same 100 1 43,44,57,58,71,72,...
#> 20: B same 200 1 3, 4, 5, 6,17,18,...
#> 21: C same 400 1 23,24,25,26,37,38,...
#> 22: A same 100 2 1, 2,15,16,29,30,...
#> 23: B same 200 2 33,34,47,48,61,62,...
#> 24: C same 400 2 13,14,21,22,35,36,...
#> 25: A same 100 3 99,100,155,156,169,170,...
#> 26: B same 200 3 19,20,45,46,75,76,...
#> 27: C same 400 3 7, 8, 9,10,11,12,...
#> test.subset train.subsets groups test.fold test
#> train seed n.train.groups iteration
#> <list> <int> <int> <int>
#> 1: 1, 2, 7, 8, 9,10,... 1 700 1
#> 2: 1, 2, 7, 8, 9,10,... 1 700 2
#> 3: 1, 2, 7, 8, 9,10,... 1 700 3
#> 4: 3,4,5,6,7,8,... 1 700 4
#> 5: 3,4,5,6,7,8,... 1 700 5
#> 6: 3,4,5,6,7,8,... 1 700 6
#> 7: 1,2,3,4,5,6,... 1 700 7
#> 8: 1,2,3,4,5,6,... 1 700 8
#> 9: 1,2,3,4,5,6,... 1 700 9
#> 10: 7, 8, 9,10,11,12,... 1 600 10
#> 11: 1, 2, 7, 8, 9,10,... 1 500 11
#> 12: 1, 2,15,16,19,20,... 1 300 12
#> 13: 3,4,5,6,7,8,... 1 600 13
#> 14: 7, 8, 9,10,11,12,... 1 500 14
#> 15: 3, 4, 5, 6,17,18,... 1 300 15
#> 16: 3, 4, 5, 6,13,14,... 1 600 16
#> 17: 1, 2,13,14,15,16,... 1 500 17
#> 18: 1,2,3,4,5,6,... 1 300 18
#> 19: 1, 2,15,16,29,30,... 1 100 19
#> 20: 19,20,33,34,45,46,... 1 200 20
#> 21: 7, 8, 9,10,11,12,... 1 400 21
#> 22: 43,44,57,58,71,72,... 1 100 22
#> 23: 3, 4, 5, 6,17,18,... 1 200 23
#> 24: 7, 8, 9,10,11,12,... 1 400 24
#> 25: 1, 2,15,16,29,30,... 1 100 25
#> 26: 3, 4, 5, 6,17,18,... 1 200 26
#> 27: 13,14,21,22,23,24,... 1 400 27
#> train seed n.train.groups iteration
So using the K-fold cross-validation, we will do one train/test split for each row of the table above. There is one row for each combination of test subset (A/B/C), train subset (same/other/all), and test fold (½/3).
We compute and plot the results using the code below,
(reg.learner.list <- list(
mlr3::LearnerRegrFeatureless$new()))
#> [[1]]
#> <LearnerRegrFeatureless:regr.featureless>: Featureless Regression Learner
#> * Model: -
#> * Parameters: robust=FALSE
#> * Packages: mlr3, stats
#> * Predict Types: [response], se
#> * Feature Types: logical, integer, numeric, character, factor, ordered,
#> POSIXct
#> * Properties: featureless, importance, missings, selected_features
if(requireNamespace("rpart")){
reg.learner.list$rpart <- mlr3::LearnerRegrRpart$new()
}
#> Loading required namespace: rpart
(same.other.grid <- mlr3::benchmark_grid(
reg.task,
reg.learner.list,
same_other_sizes_cv))
#> task learner resampling
#> <char> <char> <char>
#> 1: sin regr.featureless same_other_sizes_cv
#> 2: sin regr.rpart same_other_sizes_cv
##if(require(future))plan("multisession")
lgr::get_logger("mlr3")$set_threshold("warn")
(same.other.result <- mlr3::benchmark(
same.other.grid, store_models = TRUE))
#> <BenchmarkResult> of 54 rows with 2 resampling runs
#> nr task_id learner_id resampling_id iters warnings errors
#> 1 sin regr.featureless same_other_sizes_cv 27 0 0
#> 2 sin regr.rpart same_other_sizes_cv 27 0 0
same.other.score <- mlr3resampling::score(same.other.result)
same.other.score[, n.train := sapply(train, length)]
same.other.score[1]
#> test.subset train.subsets groups test.fold test
#> <char> <char> <int> <int> <list>
#> 1: A all 700 1 43,44,57,58,71,72,...
#> train seed n.train.groups iteration
#> <list> <int> <int> <int>
#> 1: 1, 2, 7, 8, 9,10,... 1 700 1
#> uhash nr task task_id
#> <char> <int> <list> <char>
#> 1: 5c305e33-df8a-4523-8ba7-11809f4a527b 1 <TaskRegr:sin> sin
#> learner learner_id
#> <list> <char>
#> 1: <LearnerRegrFeatureless:regr.featureless> regr.featureless
#> resampling resampling_id prediction regr.mse
#> <list> <char> <list> <num>
#> 1: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.819815
#> algorithm n.train
#> <char> <int>
#> 1: featureless 1400
if(require(ggplot2)){
ggplot()+
geom_point(aes(
regr.mse, train.subsets, color=algorithm),
shape=1,
data=same.other.score)+
geom_text(aes(
Inf, train.subsets,
label=sprintf("n.train=%d ", n.train)),
hjust=1,
vjust=1.5,
shape=1,
data=same.other.score[algorithm=="featureless" & test.fold==1])+
facet_grid(. ~ test.subset, labeller=label_both, scales="free")+
scale_x_log10(
"Mean squared prediction error (test set)")
}
#> Warning in geom_text(aes(Inf, train.subsets, label = sprintf("n.train=%d ", :
#> Ignoring unknown parameters: `shape`
same.other.wide <- dcast(
same.other.score,
algorithm + test.subset + train.subsets ~ .,
list(mean, sd),
value.var="regr.mse")
if(require(ggplot2)){
ggplot()+
geom_segment(aes(
regr.mse_mean+regr.mse_sd, train.subsets,
xend=regr.mse_mean-regr.mse_sd, yend=train.subsets,
color=algorithm),
shape=1,
data=same.other.wide)+
geom_point(aes(
regr.mse_mean, train.subsets, color=algorithm),
shape=1,
data=same.other.wide)+
geom_text(aes(
Inf, train.subsets,
label=sprintf("n.train=%d ", n.train)),
hjust=1,
vjust=1.5,
shape=1,
data=same.other.score[algorithm=="featureless" & test.fold==1])+
facet_grid(. ~ test.subset, labeller=label_both, scales="free")+
scale_x_log10(
"Mean squared prediction error (test set)")
}
#> Warning in geom_segment(aes(regr.mse_mean + regr.mse_sd, train.subsets, :
#> Ignoring unknown parameters: `shape`
#> Warning in geom_text(aes(Inf, train.subsets, label = sprintf("n.train=%d ", :
#> Ignoring unknown parameters: `shape`
The figures above show a test subset in each panel, the train subsets on the y axis, the test error on the x axis, the two different algorithms are shown in two different colors. We can clearly see that
train.subsets=same
, test error is largest, sometimes almost as
large as featureless, which is the error rate when no relationship
has been learned between inputs and outputs (not enough data).train.subsets=other
, rpart test error is significantly smaller
than featureless, indicating that some non-trivial relationship
between inputs and outputs has been learned. Sometimes other has
larger error than same, sometimes smaller (depending on sample
size).train.subsets=all
, rpart test error tends to be minimal, which
indicates that combining all of the subsets is beneficial in this
case (when the pattern is exactly the same in the different
subsets).Overall in the plot above, all tends to have less prediction error than same, which suggests that the subsets are similar (and indeed there are iid in this simulation).
Below we visualize test error as a function of train size.
if(require(ggplot2)){
ggplot()+
geom_line(aes(
n.train, regr.mse,
color=algorithm,
subset=paste(algorithm, test.fold)),
data=same.other.score)+
geom_label(aes(
n.train, regr.mse,
color=algorithm,
label=train.subsets),
data=same.other.score)+
facet_grid(. ~ test.subset, labeller=label_both, scales="free")+
scale_y_log10(
"Mean squared prediction error (test set)")
}
#> Warning in geom_line(aes(n.train, regr.mse, color = algorithm, subset =
#> paste(algorithm, : Ignoring unknown aesthetics: subset
In the previous section we defined a task using the subset
role,
which means that the different values in that column will be used to
define different subsets for training/testing using same/other/all CV.
In contrast, below we define a task without the subset
role, which
means that we will not have separate CV iterations for same/other/all
(full data is treated as one subset / train subset is same).
task.no.subset <- mlr3::TaskRegr$new(
"sin", task.dt, target="y")
task.no.subset$col_roles$group <- "agroup"
task.no.subset$col_roles$stratum <- "random_group"
task.no.subset$col_roles$feature <- "x"
str(task.no.subset$col_roles)
#> List of 7
#> $ feature: chr "x"
#> $ target : chr "y"
#> $ name : chr(0)
#> $ order : chr(0)
#> $ stratum: chr "random_group"
#> $ group : chr "agroup"
#> $ weight : chr(0)
Below we define cross-validation, and we set the sizes
to 5 so we
can see what happens when we have have train sets that are 5 sizes
smaller than the full train set size.
same_other_sizes_cv <- mlr3resampling::ResamplingSameOtherSizesCV$new()
same_other_sizes_cv$param_set$values$sizes <- 5
same_other_sizes_cv$instantiate(task.no.subset)
same_other_sizes_cv$instance$iteration.dt
#> test.subset train.subsets groups test.fold test
#> <char> <char> <int> <int> <list>
#> 1: full same 700 1 3, 4,13,14,15,16,...
#> 2: full same 700 1 3, 4,13,14,15,16,...
#> 3: full same 700 1 3, 4,13,14,15,16,...
#> 4: full same 700 1 3, 4,13,14,15,16,...
#> 5: full same 700 1 3, 4,13,14,15,16,...
#> 6: full same 700 1 3, 4,13,14,15,16,...
#> 7: full same 700 2 1, 2,17,18,21,22,...
#> 8: full same 700 2 1, 2,17,18,21,22,...
#> 9: full same 700 2 1, 2,17,18,21,22,...
#> 10: full same 700 2 1, 2,17,18,21,22,...
#> 11: full same 700 2 1, 2,17,18,21,22,...
#> 12: full same 700 2 1, 2,17,18,21,22,...
#> 13: full same 700 3 5, 6, 7, 8, 9,10,...
#> 14: full same 700 3 5, 6, 7, 8, 9,10,...
#> 15: full same 700 3 5, 6, 7, 8, 9,10,...
#> 16: full same 700 3 5, 6, 7, 8, 9,10,...
#> 17: full same 700 3 5, 6, 7, 8, 9,10,...
#> 18: full same 700 3 5, 6, 7, 8, 9,10,...
#> train seed n.train.groups iteration
#> <list> <int> <int> <int>
#> 1: 565,566,583,584,743,744,... 1 21 1
#> 2: 133,134,171,172,305,306,... 1 43 2
#> 3: 77,78,93,94,95,96,... 1 87 3
#> 4: 7, 8,25,26,29,30,... 1 175 4
#> 5: 1, 2, 7, 8,17,18,... 1 350 5
#> 6: 1,2,5,6,7,8,... 1 700 6
#> 7: 39, 40,109,110,285,286,... 1 21 7
#> 8: 29,30,37,38,39,40,... 1 43 8
#> 9: 29,30,37,38,39,40,... 1 87 9
#> 10: 3, 4,13,14,23,24,... 1 175 10
#> 11: 3, 4,13,14,19,20,... 1 350 11
#> 12: 3,4,5,6,7,8,... 1 700 12
#> 13: 209,210,397,398,519,520,... 1 21 13
#> 14: 139,140,209,210,343,344,... 1 43 14
#> 15: 1, 2,17,18,43,44,... 1 87 15
#> 16: 1, 2,17,18,37,38,... 1 175 16
#> 17: 1, 2,13,14,17,18,... 1 350 17
#> 18: 1, 2, 3, 4,13,14,... 1 700 18
So using the K-fold cross-validation, we will do one train/test split
for each row of the table above. There is one row for each combination
of n.train.groups
(full train set size + 5 smaller sizes), and test
fold (½/3).
We compute and plot the results using the code below,
(reg.learner.list <- list(
mlr3::LearnerRegrFeatureless$new()))
#> [[1]]
#> <LearnerRegrFeatureless:regr.featureless>: Featureless Regression Learner
#> * Model: -
#> * Parameters: robust=FALSE
#> * Packages: mlr3, stats
#> * Predict Types: [response], se
#> * Feature Types: logical, integer, numeric, character, factor, ordered,
#> POSIXct
#> * Properties: featureless, importance, missings, selected_features
if(requireNamespace("rpart")){
reg.learner.list$rpart <- mlr3::LearnerRegrRpart$new()
}
(same.other.grid <- mlr3::benchmark_grid(
task.no.subset,
reg.learner.list,
same_other_sizes_cv))
#> task learner resampling
#> <char> <char> <char>
#> 1: sin regr.featureless same_other_sizes_cv
#> 2: sin regr.rpart same_other_sizes_cv
##if(require(future))plan("multisession")
lgr::get_logger("mlr3")$set_threshold("warn")
(same.other.result <- mlr3::benchmark(
same.other.grid, store_models = TRUE))
#> <BenchmarkResult> of 36 rows with 2 resampling runs
#> nr task_id learner_id resampling_id iters warnings errors
#> 1 sin regr.featureless same_other_sizes_cv 18 0 0
#> 2 sin regr.rpart same_other_sizes_cv 18 0 0
same.other.score <- mlr3resampling::score(same.other.result)
same.other.score[, n.train := sapply(train, length)]
same.other.score[1]
#> test.subset train.subsets groups test.fold test
#> <char> <char> <int> <int> <list>
#> 1: full same 700 1 3, 4,13,14,15,16,...
#> train seed n.train.groups iteration
#> <list> <int> <int> <int>
#> 1: 565,566,583,584,743,744,... 1 21 1
#> uhash nr task task_id
#> <char> <int> <list> <char>
#> 1: 7ee4b7bb-f3a3-4f96-bf1b-5df4b7f35eba 1 <TaskRegr:sin> sin
#> learner learner_id
#> <list> <char>
#> 1: <LearnerRegrFeatureless:regr.featureless> regr.featureless
#> resampling resampling_id prediction regr.mse
#> <list> <char> <list> <num>
#> 1: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.7650132
#> algorithm n.train
#> <char> <int>
#> 1: featureless 42
if(require(ggplot2)){
ggplot()+
geom_line(aes(
n.train, regr.mse,
color=algorithm,
subset=paste(algorithm, test.fold)),
data=same.other.score)+
geom_point(aes(
n.train, regr.mse,
color=algorithm),
data=same.other.score)+
facet_grid(. ~ test.subset, labeller=label_both, scales="free")+
scale_x_log10(
"Number of train rows",
breaks=unique(same.other.score$n.train))+
scale_y_log10(
"Mean squared prediction error (test set)")
}
#> Warning in geom_line(aes(n.train, regr.mse, color = algorithm, subset =
#> paste(algorithm, : Ignoring unknown aesthetics: subset
From the plot above, it looks like about 700 rows is enough to get minimal test error, using the rpart learner.
N <- 600
abs.x <- 20
set.seed(1)
x.vec <- sort(runif(N, -abs.x, abs.x))
str(x.vec)
#> num [1:600] -19.9 -19.9 -19.7 -19.6 -19.6 ...
library(data.table)
(task.dt <- data.table(
x=x.vec,
y = sin(x.vec)+rnorm(N,sd=0.5)))
#> x y
#> <num> <num>
#> 1: -19.92653 -0.4336887
#> 2: -19.92269 -1.4023484
#> 3: -19.67486 0.2509134
#> 4: -19.55856 -0.8428921
#> 5: -19.55402 0.1794473
#> ---
#> 596: 19.70736 0.7497818
#> 597: 19.74997 0.3178435
#> 598: 19.75656 1.3950030
#> 599: 19.83862 -0.2086586
#> 600: 19.84309 0.5748863
if(require(ggplot2)){
ggplot()+
geom_point(aes(
x, y),
shape=1,
data=task.dt)+
coord_equal()
}
atomic.subset.size <- 2
task.dt[, agroup := rep(seq(1, N/atomic.subset.size), each=atomic.subset.size)][]
#> x y agroup
#> <num> <num> <int>
#> 1: -19.92653 -0.4336887 1
#> 2: -19.92269 -1.4023484 1
#> 3: -19.67486 0.2509134 2
#> 4: -19.55856 -0.8428921 2
#> 5: -19.55402 0.1794473 3
#> ---
#> 596: 19.70736 0.7497818 298
#> 597: 19.74997 0.3178435 299
#> 598: 19.75656 1.3950030 299
#> 599: 19.83862 -0.2086586 300
#> 600: 19.84309 0.5748863 300
task.dt[, random_subset := rep(
rep(c("A","B","B","B"), each=atomic.subset.size),
l=.N
)][]
#> x y agroup random_subset
#> <num> <num> <int> <char>
#> 1: -19.92653 -0.4336887 1 A
#> 2: -19.92269 -1.4023484 1 A
#> 3: -19.67486 0.2509134 2 B
#> 4: -19.55856 -0.8428921 2 B
#> 5: -19.55402 0.1794473 3 B
#> ---
#> 596: 19.70736 0.7497818 298 B
#> 597: 19.74997 0.3178435 299 B
#> 598: 19.75656 1.3950030 299 B
#> 599: 19.83862 -0.2086586 300 B
#> 600: 19.84309 0.5748863 300 B
table(subset.tab <- task.dt$random_subset)
#>
#> A B
#> 150 450
reg.task <- mlr3::TaskRegr$new(
"sin", task.dt, target="y")
reg.task$col_roles$subset <- "random_subset"
reg.task$col_roles$group <- "agroup"
reg.task$col_roles$stratum <- "random_subset"
reg.task$col_roles$feature <- "x"
same_other_sizes_cv <- mlr3resampling::ResamplingSameOtherSizesCV$new()
In the previous section we analyzed prediction accuracy of
same/other/all, which corresponds to keeping sizes
parameter at
default of -1. The main difference in this section is that we change
sizes
to 0, which means to down-sample same/other/all, so we can see
if there is an effect for sample size (there should be for iid
problems with intermediate difficulty). We set sizes to 0 in the next
line:
same_other_sizes_cv$param_set$values$sizes <- 0
same_other_sizes_cv$instantiate(reg.task)
same_other_sizes_cv$instance$it
#> test.subset train.subsets groups test.fold test
#> <char> <char> <int> <int> <list>
#> 1: A all 200 1 1, 2,49,50,57,58,...
#> 2: A all 200 1 1, 2,49,50,57,58,...
#> 3: A all 200 1 1, 2,49,50,57,58,...
#> 4: B all 200 1 19,20,31,32,37,38,...
#> 5: B all 200 1 19,20,31,32,37,38,...
#> 6: B all 200 1 19,20,31,32,37,38,...
#> 7: A all 200 2 17,18,41,42,89,90,...
#> 8: A all 200 2 17,18,41,42,89,90,...
#> 9: A all 200 2 17,18,41,42,89,90,...
#> 10: B all 200 2 3,4,5,6,7,8,...
#> 11: B all 200 2 3,4,5,6,7,8,...
#> 12: B all 200 2 3,4,5,6,7,8,...
#> 13: A all 200 3 9,10,25,26,33,34,...
#> 14: A all 200 3 9,10,25,26,33,34,...
#> 15: A all 200 3 9,10,25,26,33,34,...
#> 16: B all 200 3 15,16,21,22,23,24,...
#> 17: B all 200 3 15,16,21,22,23,24,...
#> 18: B all 200 3 15,16,21,22,23,24,...
#> 19: A other 150 1 1, 2,49,50,57,58,...
#> 20: A other 150 1 1, 2,49,50,57,58,...
#> 21: B other 50 1 19,20,31,32,37,38,...
#> 22: A other 150 2 17,18,41,42,89,90,...
#> 23: A other 150 2 17,18,41,42,89,90,...
#> 24: B other 50 2 3,4,5,6,7,8,...
#> 25: A other 150 3 9,10,25,26,33,34,...
#> 26: A other 150 3 9,10,25,26,33,34,...
#> 27: B other 50 3 15,16,21,22,23,24,...
#> 28: A same 50 1 1, 2,49,50,57,58,...
#> 29: B same 150 1 19,20,31,32,37,38,...
#> 30: B same 150 1 19,20,31,32,37,38,...
#> 31: A same 50 2 17,18,41,42,89,90,...
#> 32: B same 150 2 3,4,5,6,7,8,...
#> 33: B same 150 2 3,4,5,6,7,8,...
#> 34: A same 50 3 9,10,25,26,33,34,...
#> 35: B same 150 3 15,16,21,22,23,24,...
#> 36: B same 150 3 15,16,21,22,23,24,...
#> test.subset train.subsets groups test.fold test
#> train seed n.train.groups iteration
#> <list> <int> <int> <int>
#> 1: 5, 6, 9,10,15,16,... 1 50 1
#> 2: 3,4,5,6,7,8,... 1 150 2
#> 3: 3,4,5,6,7,8,... 1 200 3
#> 4: 3, 4, 7, 8,15,16,... 1 50 4
#> 5: 3,4,5,6,7,8,... 1 150 5
#> 6: 3,4,5,6,7,8,... 1 200 6
#> 7: 1, 2,35,36,39,40,... 1 50 7
#> 8: 1, 2, 9,10,19,20,... 1 150 8
#> 9: 1, 2, 9,10,15,16,... 1 200 9
#> 10: 19,20,63,64,73,74,... 1 50 10
#> 11: 1, 2, 9,10,15,16,... 1 150 11
#> 12: 1, 2, 9,10,15,16,... 1 200 12
#> 13: 29,30,37,38,49,50,... 1 50 13
#> 14: 5, 6,11,12,13,14,... 1 150 14
#> 15: 1,2,3,4,5,6,... 1 200 15
#> 16: 13,14,29,30,49,50,... 1 50 16
#> 17: 1,2,3,4,5,6,... 1 150 17
#> 18: 1,2,3,4,5,6,... 1 200 18
#> 19: 15,16,21,22,55,56,... 1 50 19
#> 20: 3,4,5,6,7,8,... 1 150 20
#> 21: 9,10,17,18,25,26,... 1 50 21
#> 22: 15,16,19,20,23,24,... 1 50 22
#> 23: 15,16,19,20,21,22,... 1 150 23
#> 24: 1, 2, 9,10,25,26,... 1 50 24
#> 25: 11,12,19,20,27,28,... 1 50 25
#> 26: 3,4,5,6,7,8,... 1 150 26
#> 27: 1, 2,17,18,41,42,... 1 50 27
#> 28: 9,10,17,18,25,26,... 1 50 28
#> 29: 59,60,63,64,75,76,... 1 50 29
#> 30: 3,4,5,6,7,8,... 1 150 30
#> 31: 1, 2, 9,10,25,26,... 1 50 31
#> 32: 23,24,37,38,51,52,... 1 50 32
#> 33: 15,16,19,20,21,22,... 1 150 33
#> 34: 1, 2,17,18,41,42,... 1 50 34
#> 35: 11,12,19,20,45,46,... 1 50 35
#> 36: 3,4,5,6,7,8,... 1 150 36
#> train seed n.train.groups iteration
(reg.learner.list <- list(
mlr3::LearnerRegrFeatureless$new()))
#> [[1]]
#> <LearnerRegrFeatureless:regr.featureless>: Featureless Regression Learner
#> * Model: -
#> * Parameters: robust=FALSE
#> * Packages: mlr3, stats
#> * Predict Types: [response], se
#> * Feature Types: logical, integer, numeric, character, factor, ordered,
#> POSIXct
#> * Properties: featureless, importance, missings, selected_features
if(requireNamespace("rpart")){
reg.learner.list$rpart <- mlr3::LearnerRegrRpart$new()
}
(same.other.grid <- mlr3::benchmark_grid(
reg.task,
reg.learner.list,
same_other_sizes_cv))
#> task learner resampling
#> <char> <char> <char>
#> 1: sin regr.featureless same_other_sizes_cv
#> 2: sin regr.rpart same_other_sizes_cv
##if(require(future))plan("multisession")
lgr::get_logger("mlr3")$set_threshold("warn")
(same.other.result <- mlr3::benchmark(
same.other.grid, store_models = TRUE))
#> <BenchmarkResult> of 72 rows with 2 resampling runs
#> nr task_id learner_id resampling_id iters warnings errors
#> 1 sin regr.featureless same_other_sizes_cv 36 0 0
#> 2 sin regr.rpart same_other_sizes_cv 36 0 0
same.other.score <- mlr3resampling::score(same.other.result)
same.other.score[1]
#> test.subset train.subsets groups test.fold test
#> <char> <char> <int> <int> <list>
#> 1: A all 200 1 1, 2,49,50,57,58,...
#> train seed n.train.groups iteration
#> <list> <int> <int> <int>
#> 1: 5, 6, 9,10,15,16,... 1 50 1
#> uhash nr task task_id
#> <char> <int> <list> <char>
#> 1: 3c3b1f0c-2a5a-4cce-85d5-e6d04fd97c2c 1 <TaskRegr:sin> sin
#> learner learner_id
#> <list> <char>
#> 1: <LearnerRegrFeatureless:regr.featureless> regr.featureless
#> resampling resampling_id prediction regr.mse
#> <list> <char> <list> <num>
#> 1: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.5814085
#> algorithm
#> <char>
#> 1: featureless
The plot below shows the same results (no down-sampling) as if we did
sizes=-1
(like in the previous section.
if(require(ggplot2)){
ggplot()+
geom_point(aes(
regr.mse, train.subsets, color=algorithm),
shape=1,
data=same.other.score[groups==n.train.groups])+
facet_grid(. ~ test.subset, labeller=label_both)
}
The plots below compare all six train subsets (including three down-sampled), and it it is clear there is an effect for sample size.
same.other.score[, subset.N := paste(train.subsets, n.train.groups)][]
#> test.subset train.subsets groups test.fold test
#> <char> <char> <int> <int> <list>
#> 1: A all 200 1 1, 2,49,50,57,58,...
#> 2: A all 200 1 1, 2,49,50,57,58,...
#> 3: A all 200 1 1, 2,49,50,57,58,...
#> 4: B all 200 1 19,20,31,32,37,38,...
#> 5: B all 200 1 19,20,31,32,37,38,...
#> 6: B all 200 1 19,20,31,32,37,38,...
#> 7: A all 200 2 17,18,41,42,89,90,...
#> 8: A all 200 2 17,18,41,42,89,90,...
#> 9: A all 200 2 17,18,41,42,89,90,...
#> 10: B all 200 2 3,4,5,6,7,8,...
#> 11: B all 200 2 3,4,5,6,7,8,...
#> 12: B all 200 2 3,4,5,6,7,8,...
#> 13: A all 200 3 9,10,25,26,33,34,...
#> 14: A all 200 3 9,10,25,26,33,34,...
#> 15: A all 200 3 9,10,25,26,33,34,...
#> 16: B all 200 3 15,16,21,22,23,24,...
#> 17: B all 200 3 15,16,21,22,23,24,...
#> 18: B all 200 3 15,16,21,22,23,24,...
#> 19: A other 150 1 1, 2,49,50,57,58,...
#> 20: A other 150 1 1, 2,49,50,57,58,...
#> 21: B other 50 1 19,20,31,32,37,38,...
#> 22: A other 150 2 17,18,41,42,89,90,...
#> 23: A other 150 2 17,18,41,42,89,90,...
#> 24: B other 50 2 3,4,5,6,7,8,...
#> 25: A other 150 3 9,10,25,26,33,34,...
#> 26: A other 150 3 9,10,25,26,33,34,...
#> 27: B other 50 3 15,16,21,22,23,24,...
#> 28: A same 50 1 1, 2,49,50,57,58,...
#> 29: B same 150 1 19,20,31,32,37,38,...
#> 30: B same 150 1 19,20,31,32,37,38,...
#> 31: A same 50 2 17,18,41,42,89,90,...
#> 32: B same 150 2 3,4,5,6,7,8,...
#> 33: B same 150 2 3,4,5,6,7,8,...
#> 34: A same 50 3 9,10,25,26,33,34,...
#> 35: B same 150 3 15,16,21,22,23,24,...
#> 36: B same 150 3 15,16,21,22,23,24,...
#> 37: A all 200 1 1, 2,49,50,57,58,...
#> 38: A all 200 1 1, 2,49,50,57,58,...
#> 39: A all 200 1 1, 2,49,50,57,58,...
#> 40: B all 200 1 19,20,31,32,37,38,...
#> 41: B all 200 1 19,20,31,32,37,38,...
#> 42: B all 200 1 19,20,31,32,37,38,...
#> 43: A all 200 2 17,18,41,42,89,90,...
#> 44: A all 200 2 17,18,41,42,89,90,...
#> 45: A all 200 2 17,18,41,42,89,90,...
#> 46: B all 200 2 3,4,5,6,7,8,...
#> 47: B all 200 2 3,4,5,6,7,8,...
#> 48: B all 200 2 3,4,5,6,7,8,...
#> 49: A all 200 3 9,10,25,26,33,34,...
#> 50: A all 200 3 9,10,25,26,33,34,...
#> 51: A all 200 3 9,10,25,26,33,34,...
#> 52: B all 200 3 15,16,21,22,23,24,...
#> 53: B all 200 3 15,16,21,22,23,24,...
#> 54: B all 200 3 15,16,21,22,23,24,...
#> 55: A other 150 1 1, 2,49,50,57,58,...
#> 56: A other 150 1 1, 2,49,50,57,58,...
#> 57: B other 50 1 19,20,31,32,37,38,...
#> 58: A other 150 2 17,18,41,42,89,90,...
#> 59: A other 150 2 17,18,41,42,89,90,...
#> 60: B other 50 2 3,4,5,6,7,8,...
#> 61: A other 150 3 9,10,25,26,33,34,...
#> 62: A other 150 3 9,10,25,26,33,34,...
#> 63: B other 50 3 15,16,21,22,23,24,...
#> 64: A same 50 1 1, 2,49,50,57,58,...
#> 65: B same 150 1 19,20,31,32,37,38,...
#> 66: B same 150 1 19,20,31,32,37,38,...
#> 67: A same 50 2 17,18,41,42,89,90,...
#> 68: B same 150 2 3,4,5,6,7,8,...
#> 69: B same 150 2 3,4,5,6,7,8,...
#> 70: A same 50 3 9,10,25,26,33,34,...
#> 71: B same 150 3 15,16,21,22,23,24,...
#> 72: B same 150 3 15,16,21,22,23,24,...
#> test.subset train.subsets groups test.fold test
#> train seed n.train.groups iteration
#> <list> <int> <int> <int>
#> 1: 5, 6, 9,10,15,16,... 1 50 1
#> 2: 3,4,5,6,7,8,... 1 150 2
#> 3: 3,4,5,6,7,8,... 1 200 3
#> 4: 3, 4, 7, 8,15,16,... 1 50 4
#> 5: 3,4,5,6,7,8,... 1 150 5
#> 6: 3,4,5,6,7,8,... 1 200 6
#> 7: 1, 2,35,36,39,40,... 1 50 7
#> 8: 1, 2, 9,10,19,20,... 1 150 8
#> 9: 1, 2, 9,10,15,16,... 1 200 9
#> 10: 19,20,63,64,73,74,... 1 50 10
#> 11: 1, 2, 9,10,15,16,... 1 150 11
#> 12: 1, 2, 9,10,15,16,... 1 200 12
#> 13: 29,30,37,38,49,50,... 1 50 13
#> 14: 5, 6,11,12,13,14,... 1 150 14
#> 15: 1,2,3,4,5,6,... 1 200 15
#> 16: 13,14,29,30,49,50,... 1 50 16
#> 17: 1,2,3,4,5,6,... 1 150 17
#> 18: 1,2,3,4,5,6,... 1 200 18
#> 19: 15,16,21,22,55,56,... 1 50 19
#> 20: 3,4,5,6,7,8,... 1 150 20
#> 21: 9,10,17,18,25,26,... 1 50 21
#> 22: 15,16,19,20,23,24,... 1 50 22
#> 23: 15,16,19,20,21,22,... 1 150 23
#> 24: 1, 2, 9,10,25,26,... 1 50 24
#> 25: 11,12,19,20,27,28,... 1 50 25
#> 26: 3,4,5,6,7,8,... 1 150 26
#> 27: 1, 2,17,18,41,42,... 1 50 27
#> 28: 9,10,17,18,25,26,... 1 50 28
#> 29: 59,60,63,64,75,76,... 1 50 29
#> 30: 3,4,5,6,7,8,... 1 150 30
#> 31: 1, 2, 9,10,25,26,... 1 50 31
#> 32: 23,24,37,38,51,52,... 1 50 32
#> 33: 15,16,19,20,21,22,... 1 150 33
#> 34: 1, 2,17,18,41,42,... 1 50 34
#> 35: 11,12,19,20,45,46,... 1 50 35
#> 36: 3,4,5,6,7,8,... 1 150 36
#> 37: 5, 6, 9,10,15,16,... 1 50 1
#> 38: 3,4,5,6,7,8,... 1 150 2
#> 39: 3,4,5,6,7,8,... 1 200 3
#> 40: 3, 4, 7, 8,15,16,... 1 50 4
#> 41: 3,4,5,6,7,8,... 1 150 5
#> 42: 3,4,5,6,7,8,... 1 200 6
#> 43: 1, 2,35,36,39,40,... 1 50 7
#> 44: 1, 2, 9,10,19,20,... 1 150 8
#> 45: 1, 2, 9,10,15,16,... 1 200 9
#> 46: 19,20,63,64,73,74,... 1 50 10
#> 47: 1, 2, 9,10,15,16,... 1 150 11
#> 48: 1, 2, 9,10,15,16,... 1 200 12
#> 49: 29,30,37,38,49,50,... 1 50 13
#> 50: 5, 6,11,12,13,14,... 1 150 14
#> 51: 1,2,3,4,5,6,... 1 200 15
#> 52: 13,14,29,30,49,50,... 1 50 16
#> 53: 1,2,3,4,5,6,... 1 150 17
#> 54: 1,2,3,4,5,6,... 1 200 18
#> 55: 15,16,21,22,55,56,... 1 50 19
#> 56: 3,4,5,6,7,8,... 1 150 20
#> 57: 9,10,17,18,25,26,... 1 50 21
#> 58: 15,16,19,20,23,24,... 1 50 22
#> 59: 15,16,19,20,21,22,... 1 150 23
#> 60: 1, 2, 9,10,25,26,... 1 50 24
#> 61: 11,12,19,20,27,28,... 1 50 25
#> 62: 3,4,5,6,7,8,... 1 150 26
#> 63: 1, 2,17,18,41,42,... 1 50 27
#> 64: 9,10,17,18,25,26,... 1 50 28
#> 65: 59,60,63,64,75,76,... 1 50 29
#> 66: 3,4,5,6,7,8,... 1 150 30
#> 67: 1, 2, 9,10,25,26,... 1 50 31
#> 68: 23,24,37,38,51,52,... 1 50 32
#> 69: 15,16,19,20,21,22,... 1 150 33
#> 70: 1, 2,17,18,41,42,... 1 50 34
#> 71: 11,12,19,20,45,46,... 1 50 35
#> 72: 3,4,5,6,7,8,... 1 150 36
#> train seed n.train.groups iteration
#> uhash nr task task_id
#> <char> <int> <list> <char>
#> 1: 3c3b1f0c-2a5a-4cce-85d5-e6d04fd97c2c 1 <TaskRegr:sin> sin
#> 2: 3c3b1f0c-2a5a-4cce-85d5-e6d04fd97c2c 1 <TaskRegr:sin> sin
#> 3: 3c3b1f0c-2a5a-4cce-85d5-e6d04fd97c2c 1 <TaskRegr:sin> sin
#> 4: 3c3b1f0c-2a5a-4cce-85d5-e6d04fd97c2c 1 <TaskRegr:sin> sin
#> 5: 3c3b1f0c-2a5a-4cce-85d5-e6d04fd97c2c 1 <TaskRegr:sin> sin
#> 6: 3c3b1f0c-2a5a-4cce-85d5-e6d04fd97c2c 1 <TaskRegr:sin> sin
#> 7: 3c3b1f0c-2a5a-4cce-85d5-e6d04fd97c2c 1 <TaskRegr:sin> sin
#> 8: 3c3b1f0c-2a5a-4cce-85d5-e6d04fd97c2c 1 <TaskRegr:sin> sin
#> 9: 3c3b1f0c-2a5a-4cce-85d5-e6d04fd97c2c 1 <TaskRegr:sin> sin
#> 10: 3c3b1f0c-2a5a-4cce-85d5-e6d04fd97c2c 1 <TaskRegr:sin> sin
#> 11: 3c3b1f0c-2a5a-4cce-85d5-e6d04fd97c2c 1 <TaskRegr:sin> sin
#> 12: 3c3b1f0c-2a5a-4cce-85d5-e6d04fd97c2c 1 <TaskRegr:sin> sin
#> 13: 3c3b1f0c-2a5a-4cce-85d5-e6d04fd97c2c 1 <TaskRegr:sin> sin
#> 14: 3c3b1f0c-2a5a-4cce-85d5-e6d04fd97c2c 1 <TaskRegr:sin> sin
#> 15: 3c3b1f0c-2a5a-4cce-85d5-e6d04fd97c2c 1 <TaskRegr:sin> sin
#> 16: 3c3b1f0c-2a5a-4cce-85d5-e6d04fd97c2c 1 <TaskRegr:sin> sin
#> 17: 3c3b1f0c-2a5a-4cce-85d5-e6d04fd97c2c 1 <TaskRegr:sin> sin
#> 18: 3c3b1f0c-2a5a-4cce-85d5-e6d04fd97c2c 1 <TaskRegr:sin> sin
#> 19: 3c3b1f0c-2a5a-4cce-85d5-e6d04fd97c2c 1 <TaskRegr:sin> sin
#> 20: 3c3b1f0c-2a5a-4cce-85d5-e6d04fd97c2c 1 <TaskRegr:sin> sin
#> 21: 3c3b1f0c-2a5a-4cce-85d5-e6d04fd97c2c 1 <TaskRegr:sin> sin
#> 22: 3c3b1f0c-2a5a-4cce-85d5-e6d04fd97c2c 1 <TaskRegr:sin> sin
#> 23: 3c3b1f0c-2a5a-4cce-85d5-e6d04fd97c2c 1 <TaskRegr:sin> sin
#> 24: 3c3b1f0c-2a5a-4cce-85d5-e6d04fd97c2c 1 <TaskRegr:sin> sin
#> 25: 3c3b1f0c-2a5a-4cce-85d5-e6d04fd97c2c 1 <TaskRegr:sin> sin
#> 26: 3c3b1f0c-2a5a-4cce-85d5-e6d04fd97c2c 1 <TaskRegr:sin> sin
#> 27: 3c3b1f0c-2a5a-4cce-85d5-e6d04fd97c2c 1 <TaskRegr:sin> sin
#> 28: 3c3b1f0c-2a5a-4cce-85d5-e6d04fd97c2c 1 <TaskRegr:sin> sin
#> 29: 3c3b1f0c-2a5a-4cce-85d5-e6d04fd97c2c 1 <TaskRegr:sin> sin
#> 30: 3c3b1f0c-2a5a-4cce-85d5-e6d04fd97c2c 1 <TaskRegr:sin> sin
#> 31: 3c3b1f0c-2a5a-4cce-85d5-e6d04fd97c2c 1 <TaskRegr:sin> sin
#> 32: 3c3b1f0c-2a5a-4cce-85d5-e6d04fd97c2c 1 <TaskRegr:sin> sin
#> 33: 3c3b1f0c-2a5a-4cce-85d5-e6d04fd97c2c 1 <TaskRegr:sin> sin
#> 34: 3c3b1f0c-2a5a-4cce-85d5-e6d04fd97c2c 1 <TaskRegr:sin> sin
#> 35: 3c3b1f0c-2a5a-4cce-85d5-e6d04fd97c2c 1 <TaskRegr:sin> sin
#> 36: 3c3b1f0c-2a5a-4cce-85d5-e6d04fd97c2c 1 <TaskRegr:sin> sin
#> 37: a537899f-f739-4305-aef8-0f0d40347787 2 <TaskRegr:sin> sin
#> 38: a537899f-f739-4305-aef8-0f0d40347787 2 <TaskRegr:sin> sin
#> 39: a537899f-f739-4305-aef8-0f0d40347787 2 <TaskRegr:sin> sin
#> 40: a537899f-f739-4305-aef8-0f0d40347787 2 <TaskRegr:sin> sin
#> 41: a537899f-f739-4305-aef8-0f0d40347787 2 <TaskRegr:sin> sin
#> 42: a537899f-f739-4305-aef8-0f0d40347787 2 <TaskRegr:sin> sin
#> 43: a537899f-f739-4305-aef8-0f0d40347787 2 <TaskRegr:sin> sin
#> 44: a537899f-f739-4305-aef8-0f0d40347787 2 <TaskRegr:sin> sin
#> 45: a537899f-f739-4305-aef8-0f0d40347787 2 <TaskRegr:sin> sin
#> 46: a537899f-f739-4305-aef8-0f0d40347787 2 <TaskRegr:sin> sin
#> 47: a537899f-f739-4305-aef8-0f0d40347787 2 <TaskRegr:sin> sin
#> 48: a537899f-f739-4305-aef8-0f0d40347787 2 <TaskRegr:sin> sin
#> 49: a537899f-f739-4305-aef8-0f0d40347787 2 <TaskRegr:sin> sin
#> 50: a537899f-f739-4305-aef8-0f0d40347787 2 <TaskRegr:sin> sin
#> 51: a537899f-f739-4305-aef8-0f0d40347787 2 <TaskRegr:sin> sin
#> 52: a537899f-f739-4305-aef8-0f0d40347787 2 <TaskRegr:sin> sin
#> 53: a537899f-f739-4305-aef8-0f0d40347787 2 <TaskRegr:sin> sin
#> 54: a537899f-f739-4305-aef8-0f0d40347787 2 <TaskRegr:sin> sin
#> 55: a537899f-f739-4305-aef8-0f0d40347787 2 <TaskRegr:sin> sin
#> 56: a537899f-f739-4305-aef8-0f0d40347787 2 <TaskRegr:sin> sin
#> 57: a537899f-f739-4305-aef8-0f0d40347787 2 <TaskRegr:sin> sin
#> 58: a537899f-f739-4305-aef8-0f0d40347787 2 <TaskRegr:sin> sin
#> 59: a537899f-f739-4305-aef8-0f0d40347787 2 <TaskRegr:sin> sin
#> 60: a537899f-f739-4305-aef8-0f0d40347787 2 <TaskRegr:sin> sin
#> 61: a537899f-f739-4305-aef8-0f0d40347787 2 <TaskRegr:sin> sin
#> 62: a537899f-f739-4305-aef8-0f0d40347787 2 <TaskRegr:sin> sin
#> 63: a537899f-f739-4305-aef8-0f0d40347787 2 <TaskRegr:sin> sin
#> 64: a537899f-f739-4305-aef8-0f0d40347787 2 <TaskRegr:sin> sin
#> 65: a537899f-f739-4305-aef8-0f0d40347787 2 <TaskRegr:sin> sin
#> 66: a537899f-f739-4305-aef8-0f0d40347787 2 <TaskRegr:sin> sin
#> 67: a537899f-f739-4305-aef8-0f0d40347787 2 <TaskRegr:sin> sin
#> 68: a537899f-f739-4305-aef8-0f0d40347787 2 <TaskRegr:sin> sin
#> 69: a537899f-f739-4305-aef8-0f0d40347787 2 <TaskRegr:sin> sin
#> 70: a537899f-f739-4305-aef8-0f0d40347787 2 <TaskRegr:sin> sin
#> 71: a537899f-f739-4305-aef8-0f0d40347787 2 <TaskRegr:sin> sin
#> 72: a537899f-f739-4305-aef8-0f0d40347787 2 <TaskRegr:sin> sin
#> uhash nr task task_id
#> learner learner_id
#> <list> <char>
#> 1: <LearnerRegrFeatureless:regr.featureless> regr.featureless
#> 2: <LearnerRegrFeatureless:regr.featureless> regr.featureless
#> 3: <LearnerRegrFeatureless:regr.featureless> regr.featureless
#> 4: <LearnerRegrFeatureless:regr.featureless> regr.featureless
#> 5: <LearnerRegrFeatureless:regr.featureless> regr.featureless
#> 6: <LearnerRegrFeatureless:regr.featureless> regr.featureless
#> 7: <LearnerRegrFeatureless:regr.featureless> regr.featureless
#> 8: <LearnerRegrFeatureless:regr.featureless> regr.featureless
#> 9: <LearnerRegrFeatureless:regr.featureless> regr.featureless
#> 10: <LearnerRegrFeatureless:regr.featureless> regr.featureless
#> 11: <LearnerRegrFeatureless:regr.featureless> regr.featureless
#> 12: <LearnerRegrFeatureless:regr.featureless> regr.featureless
#> 13: <LearnerRegrFeatureless:regr.featureless> regr.featureless
#> 14: <LearnerRegrFeatureless:regr.featureless> regr.featureless
#> 15: <LearnerRegrFeatureless:regr.featureless> regr.featureless
#> 16: <LearnerRegrFeatureless:regr.featureless> regr.featureless
#> 17: <LearnerRegrFeatureless:regr.featureless> regr.featureless
#> 18: <LearnerRegrFeatureless:regr.featureless> regr.featureless
#> 19: <LearnerRegrFeatureless:regr.featureless> regr.featureless
#> 20: <LearnerRegrFeatureless:regr.featureless> regr.featureless
#> 21: <LearnerRegrFeatureless:regr.featureless> regr.featureless
#> 22: <LearnerRegrFeatureless:regr.featureless> regr.featureless
#> 23: <LearnerRegrFeatureless:regr.featureless> regr.featureless
#> 24: <LearnerRegrFeatureless:regr.featureless> regr.featureless
#> 25: <LearnerRegrFeatureless:regr.featureless> regr.featureless
#> 26: <LearnerRegrFeatureless:regr.featureless> regr.featureless
#> 27: <LearnerRegrFeatureless:regr.featureless> regr.featureless
#> 28: <LearnerRegrFeatureless:regr.featureless> regr.featureless
#> 29: <LearnerRegrFeatureless:regr.featureless> regr.featureless
#> 30: <LearnerRegrFeatureless:regr.featureless> regr.featureless
#> 31: <LearnerRegrFeatureless:regr.featureless> regr.featureless
#> 32: <LearnerRegrFeatureless:regr.featureless> regr.featureless
#> 33: <LearnerRegrFeatureless:regr.featureless> regr.featureless
#> 34: <LearnerRegrFeatureless:regr.featureless> regr.featureless
#> 35: <LearnerRegrFeatureless:regr.featureless> regr.featureless
#> 36: <LearnerRegrFeatureless:regr.featureless> regr.featureless
#> 37: <LearnerRegrRpart:regr.rpart> regr.rpart
#> 38: <LearnerRegrRpart:regr.rpart> regr.rpart
#> 39: <LearnerRegrRpart:regr.rpart> regr.rpart
#> 40: <LearnerRegrRpart:regr.rpart> regr.rpart
#> 41: <LearnerRegrRpart:regr.rpart> regr.rpart
#> 42: <LearnerRegrRpart:regr.rpart> regr.rpart
#> 43: <LearnerRegrRpart:regr.rpart> regr.rpart
#> 44: <LearnerRegrRpart:regr.rpart> regr.rpart
#> 45: <LearnerRegrRpart:regr.rpart> regr.rpart
#> 46: <LearnerRegrRpart:regr.rpart> regr.rpart
#> 47: <LearnerRegrRpart:regr.rpart> regr.rpart
#> 48: <LearnerRegrRpart:regr.rpart> regr.rpart
#> 49: <LearnerRegrRpart:regr.rpart> regr.rpart
#> 50: <LearnerRegrRpart:regr.rpart> regr.rpart
#> 51: <LearnerRegrRpart:regr.rpart> regr.rpart
#> 52: <LearnerRegrRpart:regr.rpart> regr.rpart
#> 53: <LearnerRegrRpart:regr.rpart> regr.rpart
#> 54: <LearnerRegrRpart:regr.rpart> regr.rpart
#> 55: <LearnerRegrRpart:regr.rpart> regr.rpart
#> 56: <LearnerRegrRpart:regr.rpart> regr.rpart
#> 57: <LearnerRegrRpart:regr.rpart> regr.rpart
#> 58: <LearnerRegrRpart:regr.rpart> regr.rpart
#> 59: <LearnerRegrRpart:regr.rpart> regr.rpart
#> 60: <LearnerRegrRpart:regr.rpart> regr.rpart
#> 61: <LearnerRegrRpart:regr.rpart> regr.rpart
#> 62: <LearnerRegrRpart:regr.rpart> regr.rpart
#> 63: <LearnerRegrRpart:regr.rpart> regr.rpart
#> 64: <LearnerRegrRpart:regr.rpart> regr.rpart
#> 65: <LearnerRegrRpart:regr.rpart> regr.rpart
#> 66: <LearnerRegrRpart:regr.rpart> regr.rpart
#> 67: <LearnerRegrRpart:regr.rpart> regr.rpart
#> 68: <LearnerRegrRpart:regr.rpart> regr.rpart
#> 69: <LearnerRegrRpart:regr.rpart> regr.rpart
#> 70: <LearnerRegrRpart:regr.rpart> regr.rpart
#> 71: <LearnerRegrRpart:regr.rpart> regr.rpart
#> 72: <LearnerRegrRpart:regr.rpart> regr.rpart
#> learner learner_id
#> resampling resampling_id prediction regr.mse
#> <list> <char> <list> <num>
#> 1: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.5814085
#> 2: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.5888123
#> 3: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.6004137
#> 4: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.7418338
#> 5: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.7448785
#> 6: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.7426736
#> 7: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.7032864
#> 8: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.6915582
#> 9: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.6935124
#> 10: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.6876009
#> 11: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.6938962
#> 12: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.6927970
#> 13: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.8679940
#> 14: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.8679637
#> 15: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.8698560
#> 16: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.8100344
#> 17: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.8080412
#> 18: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.8079158
#> 19: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.6008614
#> 20: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.5986958
#> 21: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.7438359
#> 22: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.7575599
#> 23: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.6927668
#> 24: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.6974565
#> 25: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.8758799
#> 26: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.8682078
#> 27: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.8095930
#> 28: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.6058833
#> 29: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.7644929
#> 30: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.7423914
#> 31: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.6967243
#> 32: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.7065626
#> 33: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.6915689
#> 34: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.8800099
#> 35: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.8944441
#> 36: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.8090933
#> 37: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.9579886
#> 38: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.5572793
#> 39: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.5762640
#> 40: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.6754320
#> 41: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.4605665
#> 42: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.4170070
#> 43: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.7317229
#> 44: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.5239765
#> 45: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.3890640
#> 46: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.7954012
#> 47: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.3448899
#> 48: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.3294351
#> 49: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.5894307
#> 50: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.5876353
#> 51: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.5989192
#> 52: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.6718522
#> 53: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.4275034
#> 54: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.5398954
#> 55: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.7813902
#> 56: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.6348613
#> 57: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.5887354
#> 58: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.6004308
#> 59: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.4871282
#> 60: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.5600961
#> 61: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.7225103
#> 62: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.4658216
#> 63: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.7620797
#> 64: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.8782663
#> 65: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.7809353
#> 66: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.4280411
#> 67: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.6343650
#> 68: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.6115182
#> 69: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.4005002
#> 70: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.7486917
#> 71: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.9276521
#> 72: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.4816441
#> resampling resampling_id prediction regr.mse
#> algorithm subset.N
#> <char> <char>
#> 1: featureless all 50
#> 2: featureless all 150
#> 3: featureless all 200
#> 4: featureless all 50
#> 5: featureless all 150
#> 6: featureless all 200
#> 7: featureless all 50
#> 8: featureless all 150
#> 9: featureless all 200
#> 10: featureless all 50
#> 11: featureless all 150
#> 12: featureless all 200
#> 13: featureless all 50
#> 14: featureless all 150
#> 15: featureless all 200
#> 16: featureless all 50
#> 17: featureless all 150
#> 18: featureless all 200
#> 19: featureless other 50
#> 20: featureless other 150
#> 21: featureless other 50
#> 22: featureless other 50
#> 23: featureless other 150
#> 24: featureless other 50
#> 25: featureless other 50
#> 26: featureless other 150
#> 27: featureless other 50
#> 28: featureless same 50
#> 29: featureless same 50
#> 30: featureless same 150
#> 31: featureless same 50
#> 32: featureless same 50
#> 33: featureless same 150
#> 34: featureless same 50
#> 35: featureless same 50
#> 36: featureless same 150
#> 37: rpart all 50
#> 38: rpart all 150
#> 39: rpart all 200
#> 40: rpart all 50
#> 41: rpart all 150
#> 42: rpart all 200
#> 43: rpart all 50
#> 44: rpart all 150
#> 45: rpart all 200
#> 46: rpart all 50
#> 47: rpart all 150
#> 48: rpart all 200
#> 49: rpart all 50
#> 50: rpart all 150
#> 51: rpart all 200
#> 52: rpart all 50
#> 53: rpart all 150
#> 54: rpart all 200
#> 55: rpart other 50
#> 56: rpart other 150
#> 57: rpart other 50
#> 58: rpart other 50
#> 59: rpart other 150
#> 60: rpart other 50
#> 61: rpart other 50
#> 62: rpart other 150
#> 63: rpart other 50
#> 64: rpart same 50
#> 65: rpart same 50
#> 66: rpart same 150
#> 67: rpart same 50
#> 68: rpart same 50
#> 69: rpart same 150
#> 70: rpart same 50
#> 71: rpart same 50
#> 72: rpart same 150
#> algorithm subset.N
(levs <- same.other.score[order(train.subsets, n.train.groups), unique(subset.N)])
#> [1] "all 50" "all 150" "all 200" "other 50" "other 150" "same 50"
#> [7] "same 150"
same.other.score[, subset.N.fac := factor(subset.N, levs)]
if(require(ggplot2)){
ggplot()+
geom_point(aes(
regr.mse, subset.N.fac, color=algorithm),
shape=1,
data=same.other.score)+
facet_wrap("test.subset", labeller=label_both, scales="free", nrow=1)
}
(levs <- same.other.score[order(n.train.groups, train.subsets), unique(subset.N)])
#> [1] "all 50" "other 50" "same 50" "all 150" "other 150" "same 150"
#> [7] "all 200"
same.other.score[, N.subset.fac := factor(subset.N, levs)]
if(require(ggplot2)){
ggplot()+
geom_point(aes(
regr.mse, N.subset.fac, color=algorithm),
shape=1,
data=same.other.score)+
facet_wrap("test.subset", labeller=label_both, scales="free", nrow=1)
}
Another way to view the effect of sample size is to plot the test/prediction error, as a function of number of train data, as in the plots below.
if(require(ggplot2)){
ggplot()+
geom_point(aes(
n.train.groups, regr.mse,
color=train.subsets),
shape=1,
data=same.other.score)+
geom_line(aes(
n.train.groups, regr.mse,
subset=paste(train.subsets, seed, algorithm),
linetype=algorithm,
color=train.subsets),
data=same.other.score)+
facet_grid(test.fold ~ test.subset, labeller=label_both)+
scale_x_log10()
}
#> Warning in geom_line(aes(n.train.groups, regr.mse, subset =
#> paste(train.subsets, : Ignoring unknown aesthetics: subset
rpart.score <- same.other.score[algorithm=="rpart" & train.subsets != "other"]
if(require(ggplot2)){
ggplot()+
geom_point(aes(
n.train.groups, regr.mse,
color=train.subsets),
shape=1,
data=rpart.score)+
geom_line(aes(
n.train.groups, regr.mse,
subset=paste(train.subsets, seed, algorithm),
color=train.subsets),
data=rpart.score)+
facet_grid(test.fold ~ test.subset, labeller=label_both)+
scale_x_log10()
}
#> Warning in geom_line(aes(n.train.groups, regr.mse, subset =
#> paste(train.subsets, : Ignoring unknown aesthetics: subset
In this section we show how ResamplingSameOtherSizesCV
can be used on a task with stratification and grouping, for hyper-parameter learning. First we recall the previously defined task and evaluation CV.
str(reg.task$col_roles)
#> List of 8
#> $ feature: chr "x"
#> $ target : chr "y"
#> $ name : chr(0)
#> $ order : chr(0)
#> $ stratum: chr "random_subset"
#> $ group : chr "agroup"
#> $ weight : chr(0)
#> $ subset : chr "random_subset"
We see in the output aove that the task has column roles for both
stratum
and group
, which normally errors when used with
ResamplingCV
:
mlr3::ResamplingCV$new()$instantiate(reg.task)
#> Error: Cannot combine stratification with grouping
Below we show how ResamplingSameOtherSizesCV
can be used instead:
ignore.cv <- mlr3resampling::ResamplingSameOtherSizesCV$new()
ignore.cv$param_set$values$ignore_subset <- TRUE
ignore.cv$instantiate(reg.task)
ignore.cv$instance$iteration.dt
#> test.subset train.subsets groups test.fold test
#> <char> <char> <int> <int> <list>
#> 1: full same 200 1 5, 6, 7, 8, 9,10,...
#> 2: full same 200 2 3, 4,11,12,13,14,...
#> 3: full same 200 3 1, 2,25,26,31,32,...
#> train seed n.train.groups iteration
#> <list> <int> <int> <int>
#> 1: 1, 2, 3, 4,11,12,... 1 200 1
#> 2: 1,2,5,6,7,8,... 1 200 2
#> 3: 3,4,5,6,7,8,... 1 200 3
To use the above CV object with a learning algorithm in a benchmark
experiment, we need to use it as the resampling
argument to
auto_tuner
, as in the code below,
do_benchmark <- function(subtrain.valid.cv){
reg.learner.list <- list(
mlr3::LearnerRegrFeatureless$new())
if(requireNamespace("rpart")){
reg.learner.list$rpart <- mlr3::LearnerRegrRpart$new()
if(requireNamespace("mlr3tuning")){
rpart.learner <- mlr3::LearnerRegrRpart$new()
##mlr3tuningspaces::lts(rpart.learner)$param_set$values
rpart.learner$param_set$values$cp <- paradox::to_tune(1e-4, 0.1, log=TRUE)
reg.learner.list$rpart.tuned <- mlr3tuning::auto_tuner(
tuner = mlr3tuning::tnr("grid_search"), #mlr3tuning::TunerBatchGridSearch$new()
learner = rpart.learner,
resampling = subtrain.valid.cv,
measure = mlr3::msr("regr.mse"))
}
}
same.other.grid <- mlr3::benchmark_grid(
reg.task,
reg.learner.list,
same_other_sizes_cv)
lgr::get_logger("bbotk")$set_threshold("warn")
same.other.result <- mlr3::benchmark(
same.other.grid, store_models = TRUE)
}
do_benchmark(mlr3::ResamplingCV$new())
#> Loading required namespace: mlr3tuning
#> Error: Cannot combine stratification with grouping
The error above is because ResamplingCV
does not support
stratification and grouping. To fix that, we can use the code below:
ignore.cv <- mlr3resampling::ResamplingSameOtherSizesCV$new()
ignore.cv$param_set$values$ignore_subset <- TRUE
(same.other.result <- do_benchmark(ignore.cv))
#> <BenchmarkResult> of 108 rows with 3 resampling runs
#> nr task_id learner_id resampling_id iters warnings errors
#> 1 sin regr.featureless same_other_sizes_cv 36 0 0
#> 2 sin regr.rpart same_other_sizes_cv 36 0 0
#> 3 sin regr.rpart.tuned same_other_sizes_cv 36 0 0
The output above shows that the benchmark worked. The code below plots the results.
same.other.score <- mlr3resampling::score(same.other.result)
same.other.score[1]
#> test.subset train.subsets groups test.fold test
#> <char> <char> <int> <int> <list>
#> 1: A all 200 1 1, 2,49,50,57,58,...
#> train seed n.train.groups iteration
#> <list> <int> <int> <int>
#> 1: 5, 6, 9,10,15,16,... 1 50 1
#> uhash nr task task_id
#> <char> <int> <list> <char>
#> 1: bb68c564-6458-48e6-9d83-352d310188e6 1 <TaskRegr:sin> sin
#> learner learner_id
#> <list> <char>
#> 1: <LearnerRegrFeatureless:regr.featureless> regr.featureless
#> resampling resampling_id prediction regr.mse
#> <list> <char> <list> <num>
#> 1: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.5814085
#> algorithm
#> <char>
#> 1: featureless
same.other.wide <- dcast(
same.other.score,
algorithm + test.subset + train.subsets ~ .,
list(mean, sd),
value.var="regr.mse")
if(require(ggplot2)){
ggplot()+
geom_segment(aes(
regr.mse_mean+regr.mse_sd, train.subsets,
xend=regr.mse_mean-regr.mse_sd, yend=train.subsets),
shape=1,
data=same.other.wide)+
geom_point(aes(
regr.mse_mean, train.subsets),
shape=1,
data=same.other.wide)+
facet_grid(algorithm ~ test.subset, labeller=label_both)
}
#> Warning in geom_segment(aes(regr.mse_mean + regr.mse_sd, train.subsets, :
#> Ignoring unknown parameters: `shape`
The plot above has different panels for rpart
(without tuning) and
tuned
(rpart with tuning of cp
).
mlr3resampling::ResamplingSameOtherSizesCV
can be used for model evaluation (train/test split):
subset
).sizes
).It can also be used for model training (subtrain/validation split):
stratum
and group
roles (use is as resampling
argument of auto_tuner
).The goal of this section is explain the differences between various column roles:
group
is used to designate observations which should stay together
when splitting. In other words, two rows in the same group
should
never appear in different sets.subset
designates a column whose values are each treated as a test
set (the train data come from Same/Other/All subsets).Below we load the data set.
data(AZtrees,package="mlr3resampling")
library(data.table)
AZdt <- data.table(AZtrees)
AZdt[1]
#> xcoord ycoord region3 region4 polygon y SAMPLE_1 SAMPLE_2
#> <num> <num> <char> <char> <fctr> <fctr> <int> <int>
#> 1: -111.6643 35.23736 NE NE 1 Not tree 3331 3919
#> SAMPLE_3 SAMPLE_4 SAMPLE_5 SAMPLE_6 SAMPLE_7 SAMPLE_8 SAMPLE_9 SAMPLE_10
#> <int> <int> <int> <int> <int> <int> <int> <int>
#> 1: 3957 4514 4700 4607 4420 4494 4139 3906
#> SAMPLE_11 SAMPLE_12 SAMPLE_13 SAMPLE_14 SAMPLE_15 SAMPLE_16 SAMPLE_17
#> <int> <int> <int> <int> <int> <int> <int>
#> 1: 14 -40 -71 125 21 25 10
#> SAMPLE_18 SAMPLE_19 SAMPLE_20 SAMPLE_21
#> <int> <int> <int> <int>
#> 1: -263 -324 -362 370
Above we see one row of data. Below we see a scatterplot of the data:
x.center <- -111.72
y.center <- 35.272
rect.size <- 0.01/2
x.min.max <- x.center+c(-1, 1)*rect.size
y.min.max <- y.center+c(-1, 1)*rect.size
rect.dt <- data.table(
xmin=x.min.max[1], xmax=x.min.max[2],
ymin=y.min.max[1], ymax=y.min.max[2])
if(require(ggplot2)){
tree.fill.scale <- scale_fill_manual(
values=c(Tree="black", "Not tree"="white"))
ggplot()+
theme_bw()+
tree.fill.scale+
geom_rect(aes(
xmin=xmin, xmax=xmax, ymin=ymin,ymax=ymax),
data=rect.dt,
fill="red",
linewidth=3,
color="red")+
geom_point(aes(
xcoord, ycoord, fill=y),
shape=21,
data=AZdt)+
coord_equal()
}
Note the red square in the plot above. Below we zoom into that square.
if(require(ggplot2)){
gg <- ggplot()+
theme_bw()+
tree.fill.scale+
geom_point(aes(
xcoord, ycoord, fill=y),
shape=21,
data=AZdt)+
coord_equal()+
scale_x_continuous(
limits=x.min.max)+
scale_y_continuous(
limits=y.min.max)
if(require(directlabels)){
gg <- gg+geom_dl(aes(
xcoord, ycoord, label=polygon),
data=AZdt,
method="smart.grid")
}
gg
}
#> Loading required package: directlabels
#> Warning: Removed 5927 rows containing missing values or values outside the scale range
#> (`geom_point()`).
#> Warning: Removed 5927 rows containing missing values or values outside the scale range
#> (`geom_dl()`).
In the plot above, we see that there are several groups of points, each with a black number. Each group of points comes from a single polygon (label drawn in GIS software), and the black number is the polygon ID number. So each polygon represents one label, either tree or not, and there are one or more points/pixels with that label inside each polygon.
A polygon is an example of a group. Each polygon results in one or more rows of training data (pixels), but since pixels in a given group were all labeled together, we would like to keep them together when splitting the data.
Below we plot the same data, but this time colored by region.
##dput(RColorBrewer::brewer.pal(3,"Dark2"))
region.colors <- c(NW="#1B9E77", NE="#D95F02", S="#7570B3")
if(require(ggplot2)){
ggplot()+
theme_bw()+
tree.fill.scale+
scale_color_manual(
values=region.colors)+
geom_point(aes(
xcoord, ycoord, color=region3, fill=y),
shape=21,
data=AZdt)+
coord_equal()
}
We can see in the plot above that there are three values in the
region3
column: NE, NW, and S (different geographical regions on the
map which are well-separated). We would like to know if it is possible
to train on one region, and then accurately predict on another region.
First we create a task:
ctask <- mlr3::TaskClassif$new(
"AZtrees", AZdt, target="y")
ctask$col_roles$subset <- "region3"
ctask$col_roles$group <- "polygon"
ctask$col_roles$stratum <- "y"
ctask$col_roles$feature <- grep("SAMPLE",names(AZdt),value=TRUE)
str(ctask$col_roles)
#> List of 8
#> $ feature: chr [1:21] "SAMPLE_1" "SAMPLE_2" "SAMPLE_3" "SAMPLE_4" ...
#> $ target : chr "y"
#> $ name : chr(0)
#> $ order : chr(0)
#> $ stratum: chr "y"
#> $ group : chr "polygon"
#> $ weight : chr(0)
#> $ subset : chr "region3"
Then we can instantiate the CV to see how it works (but usually you do
not need to instantiate, if you are using benchmark
it does it for
you).
same.other.cv <- mlr3resampling::ResamplingSameOtherSizesCV$new()
same.other.cv$param_set$values$folds <- 3
same.other.cv$instantiate(ctask)
same.other.cv$instance$iteration.dt[, .(
train.subsets, test.fold, test.subset, n.train.groups,
train.rows=sapply(train, length))]
#> train.subsets test.fold test.subset n.train.groups train.rows
#> <char> <int> <char> <int> <int>
#> 1: all 1 NE 125 4212
#> 2: all 1 NW 125 4212
#> 3: all 1 S 125 4212
#> 4: all 2 NE 125 2838
#> 5: all 2 NW 125 2838
#> 6: all 2 S 125 2838
#> 7: all 3 NE 125 4862
#> 8: all 3 NW 125 4862
#> 9: all 3 S 125 4862
#> 10: other 1 NE 55 3405
#> 11: other 1 NW 104 3541
#> 12: other 1 S 91 1478
#> 13: other 2 NE 55 1664
#> 14: other 2 NW 104 1702
#> 15: other 2 S 91 2310
#> 16: other 3 NE 55 3915
#> 17: other 3 NW 104 3543
#> 18: other 3 S 91 2266
#> 19: same 1 NE 70 807
#> 20: same 1 NW 21 671
#> 21: same 1 S 34 2734
#> 22: same 2 NE 70 1174
#> 23: same 2 NW 21 1136
#> 24: same 2 S 34 528
#> 25: same 3 NE 70 947
#> 26: same 3 NW 21 1319
#> 27: same 3 S 34 2596
#> train.subsets test.fold test.subset n.train.groups train.rows
The table above has one row per train/test split for which
error/accuracy metrics will be computed. The n.train.groups
column
is the number of polygons which are used in the train set, which is
defined as the intersection of the train subsets and the train folds.
To double check, below we compute the total number of groups/polygons per
subset/region, and the expected number of train groups/polygons.
AZdt[, .(
polygons=length(unique(polygon))
), by=region3][
, train.polygons := polygons*with(same.other.cv$param_set$values, (folds-1)/folds)
][]
#> region3 polygons train.polygons
#> <char> <int> <num>
#> 1: NE 105 70.00000
#> 2: NW 32 21.33333
#> 3: S 52 34.66667
It is clear that the counts in the train.polygons
column above match
the numbers in the previous table column n.train.groups
. To
determine the number of rows of train data, we can look at the
train.rows
column in the previous table.
Below we define the benchmark experiment.
same.other.cv <- mlr3resampling::ResamplingSameOtherSizesCV$new()
(learner.list <- list(
mlr3::LearnerClassifFeatureless$new()))
#> [[1]]
#> <LearnerClassifFeatureless:classif.featureless>: Featureless Classification Learner
#> * Model: -
#> * Parameters: method=mode
#> * Packages: mlr3
#> * Predict Types: [response], prob
#> * Feature Types: logical, integer, numeric, character, factor, ordered,
#> POSIXct
#> * Properties: featureless, importance, missings, multiclass,
#> selected_features, twoclass
if(requireNamespace("rpart")){
learner.list$rpart <- mlr3::LearnerClassifRpart$new()
}
for(learner.i in seq_along(learner.list)){
learner.list[[learner.i]]$predict_type <- "prob"
}
(bench.grid <- mlr3::benchmark_grid(ctask, learner.list, same.other.cv))
#> task learner resampling
#> <char> <char> <char>
#> 1: AZtrees classif.featureless same_other_sizes_cv
#> 2: AZtrees classif.rpart same_other_sizes_cv
Above we see one row per combination of task, learner, and resampling. Below we compute the benchmark result and test accuracy.
bench.result <- mlr3::benchmark(bench.grid)
measure.list <- mlr3::msrs(c("classif.acc","classif.auc"))
score.dt <- mlr3resampling::score(bench.result, measure.list)
score.dt[1]
#> test.subset train.subsets groups test.fold test
#> <char> <char> <int> <int> <list>
#> 1: NE all 125 1 1,2,3,4,5,6,...
#> train seed n.train.groups iteration
#> <list> <int> <int> <int>
#> 1: 9,10,11,12,13,32,... 1 125 1
#> uhash nr task task_id
#> <char> <int> <list> <char>
#> 1: 847a2040-f1eb-431a-a01b-a5e8daa8f5f1 1 <TaskClassif:AZtrees> AZtrees
#> learner learner_id
#> <list> <char>
#> 1: <LearnerClassifFeatureless:classif.featureless> classif.featureless
#> resampling resampling_id prediction
#> <list> <char> <list>
#> 1: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionClassif>
#> classif.acc classif.auc algorithm
#> <num> <num> <char>
#> 1: 0.8561737 0.5 featureless
Above we see one row of the result, for one train/test split. Below we plot the accuracy results.
score.long <- melt(
score.dt,
measure.vars=measure(variable, pattern="classif.(acc|auc)"))
if(require(ggplot2)){
ggplot()+
geom_point(aes(
value, train.subsets, color=algorithm),
data=score.long)+
facet_grid(test.subset ~ variable, labeller=label_both, scales="free")
}
Above we show one dot per train/test split, and below we take the mean/SD over folds.
score.wide <- dcast(
score.long,
algorithm + test.subset + train.subsets + variable ~ .,
list(mean, sd),
value.var="value")
if(require(ggplot2)){
ggplot()+
geom_point(aes(
value_mean, train.subsets, color=algorithm),
size=3,
fill="white",
shape=21,
data=score.wide)+
geom_segment(aes(
value_mean+value_sd, train.subsets,
color=algorithm,
linewidth=algorithm,
xend=value_mean-value_sd, yend=train.subsets),
data=score.wide)+
scale_linewidth_manual(values=c(featureless=2, rpart=1))+
facet_grid(test.subset ~ variable, labeller=label_both, scales="free")+
scale_x_continuous(
"Mean +/- SD of test accuracy/AUC over folds/splits")
}
The plot above shows an interesting pattern:
Column roles group
, stratum
, and subset
may be used together, in
the same task, in order to perform a cross-validation experiment which
captures the structure in the data.
sessionInfo()
#> R version 4.3.2 (2023-10-31)
#> Platform: x86_64-pc-linux-gnu (64-bit)
#> Running under: Ubuntu 18.04.6 LTS
#>
#> Matrix products: default
#> BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
#> LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
#>
#> locale:
#> [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
#> [3] LC_TIME=fr_FR.UTF-8 LC_COLLATE=C
#> [5] LC_MONETARY=fr_FR.UTF-8 LC_MESSAGES=en_US.UTF-8
#> [7] LC_PAPER=fr_FR.UTF-8 LC_NAME=C
#> [9] LC_ADDRESS=C LC_TELEPHONE=C
#> [11] LC_MEASUREMENT=fr_FR.UTF-8 LC_IDENTIFICATION=C
#>
#> time zone: America/Toronto
#> tzcode source: system (glibc)
#>
#> attached base packages:
#> [1] stats graphics grDevices utils datasets methods base
#>
#> other attached packages:
#> [1] directlabels_2024.1.21 mlr3_0.17.0 ggplot2_3.5.0
#> [4] data.table_1.15.0
#>
#> loaded via a namespace (and not attached):
#> [1] future.apply_1.11.0 gtable_0.3.4 crayon_1.5.2
#> [4] dplyr_1.1.4 compiler_4.3.2 highr_0.10
#> [7] rpart_4.1.23 tidyselect_1.2.0 parallel_4.3.2
#> [10] globals_0.16.2 scales_1.3.0 uuid_1.1-1
#> [13] R6_2.5.1 mlr3tuning_0.19.2 labeling_0.4.3
#> [16] generics_0.1.3 knitr_1.45 palmerpenguins_0.1.1
#> [19] backports_1.4.1 checkmate_2.3.1 future_1.33.0
#> [22] tibble_3.2.1 munsell_0.5.0 paradox_0.11.1
#> [25] pillar_1.9.0 mlr3measures_0.5.0 rlang_1.1.3
#> [28] utf8_1.2.4 lgr_0.4.4 xfun_0.42
#> [31] quadprog_1.5-8 mlr3misc_0.13.0 cli_3.6.2
#> [34] withr_3.0.0 magrittr_2.0.3 digest_0.6.34
#> [37] grid_4.3.2 bbotk_0.7.3 lifecycle_1.0.4
#> [40] vctrs_0.6.5 evaluate_0.23 glue_1.7.0
#> [43] farver_2.1.1 listenv_0.9.1 codetools_0.2-19
#> [46] parallelly_1.37.0 fansi_1.0.6 colorspace_2.1-0
#> [49] mlr3resampling_2024.7.7 tools_4.3.2 pkgconfig_2.0.3