-
Notifications
You must be signed in to change notification settings - Fork 45
Open
Labels
Description
When Lrnr_randomForest is a candidate learner in a super learner library, the delayed learner fit and regular fit do not yield the same prediction result.
Here is an example borrowed from test-delayed_sl3.R:
library(delayed)
library(SuperLearner)
library(future)
plan(sequential)
data(cpp_imputed)
task <- sl3_Task$new(
cpp_imputed,
covariates = c("apgar1", "apgar5", "parity", "gagebrth", "mage", "meducyrs", "sexn"),
outcome = "haz")
lrnr_rf <- Lrnr_randomForest$new()
lrnr_glmnet <- Lrnr_glmnet$new()
lrnr_glm_fast <- Lrnr_glm_fast$new()
stack <- Stack$new(lrnr_rf, lrnr_glmnet, lrnr_glm_fast)
sl <- Lrnr_sl$new(learners = stack)
set.seed(123)
test_delayed <- delayed_learner_train(sl, task)
sched <- Scheduler$new(test_delayed, SequentialJob)
set.seed(123)
fit_delayed <- sched$compute()
preds_delayed <- fit_delayed$predict()
set.seed(123)
fit <- sl$train(task)
preds <- fit$predict()
expect_equal(as.numeric(preds_delayed), as.numeric(preds))
Running the code above, preds_delayed and preds do not match.
Note that if we remove Lrnr_randomForest, result is reproducible:
plan(sequential)
data(cpp_imputed)
task <- sl3_Task$new(
cpp_imputed,
covariates = c("apgar1", "apgar5", "parity", "gagebrth", "mage", "meducyrs", "sexn"),
outcome = "haz")
lrnr_glmnet <- Lrnr_glmnet$new()
lrnr_glm_fast <- Lrnr_glm_fast$new()
stack <- Stack$new(lrnr_glmnet, lrnr_glm_fast)
sl <- Lrnr_sl$new(learners = stack)
set.seed(123)
test_delayed <- delayed_learner_train(sl, task)
sched <- Scheduler$new(test_delayed, SequentialJob)
set.seed(123)
fit_delayed <- sched$compute()
set.seed(123)
fit <- sl$train(task)
preds_delayed <- fit_delayed$predict()
preds <- fit$predict()
expect_equal(preds_delayed, preds)
Removing set.seed(123) above the line test_delayed <- delayed_learner_train(sl, task), preds_delayed and preds do not match.