changed tests to fit new param scheme

mlr-org · Aug 23, 2024 · 2867759 · 2867759
1 parent e1b113e
commit 2867759
Showing 1 changed file with 37 additions and 17 deletions.
diff --git a/tests/testthat/test_pipeop_umap.R b/tests/testthat/test_pipeop_umap.R
@@ -20,6 +20,7 @@ test_that("PipeOpUMAP - basic properties", {
 
 test_that("PipeOpUMAP - Compare to uwot::umap2 and uwot::umap_transform; Default Params, nn_method = annoy", {
   skip_if_not_installed("uwot")
+  skip_if_not_installed("RcppAnnoy")
   task = mlr_tasks$get("iris")$filter(1:30)
 
   op = PipeOpUMAP$new()
@@ -33,7 +34,7 @@ test_that("PipeOpUMAP - Compare to uwot::umap2 and uwot::umap_transform; Default
                   "gamma", "approx_pow", "metric", "norig_col", "pcg_rand", "batch", "opt_args", "num_precomputed_nns", "min_dist", "spread",
                   "binary_edge_weights", "seed", "nn_method", "nn_args", "n_neighbors", "nn_index", "pca_models")
   expect_true(all(state_names %in% names(op$state)))
-  state_names_wo_pointers = setdiff(state_names, "nn_index") #  since address in state$nn_index$ann will not be equal
+  state_names_wo_pointers = setdiff(state_names, "nn_index") #  since RefClass in state$nn_index$ann will not be equal
   expect_identical(op$state[state_names_wo_pointers], umap_out[state_names_wo_pointers])
   expect_equal(train_out$data()[, 2:3], as.data.table(umap_out[["embedding"]]))
 
@@ -50,6 +51,8 @@ test_that("PipeOpUMAP - Compare to uwot::umap2 and uwot::umap_transform; Changed
   task = mlr_tasks$get("iris")$filter(1:30)
 
   op = PipeOpUMAP$new()
+
+  # BUild list of param with same names for PipeOpUMAP and uwot::umap2() / uwot::umap_transform()
   pv = list(
     seed = 1234L,
     nn_method = "annoy",
@@ -64,24 +67,31 @@ test_that("PipeOpUMAP - Compare to uwot::umap2 and uwot::umap_transform; Changed
     local_connectivity = 1.1,
     bandwidth = 0.9,
     repulsion_strength = 1.1,
-    negative_sample_rate = 6,
-    y = task$data()[, 1]
+    negative_sample_rate = 6
   )
-  op$param_set$set_values(.values = pv)
+  # Handle parameters that are differently named for PipeOpUMAP and uwot::umap2() / uwot::umap_transform()
+  pv_po = insert_named(pv, list(use_supervised = TRUE,
+                                batch_transform = TRUE,
+                                init_transform = "average",
+                                search_k_transform = 1000L))
+  op$param_set$set_values(.values = pv_po)
+  args_umap2 = insert_named(pv, list(ret_model = TRUE, y = task$data()[, 1]))
+  args_umap_transform = list(init = "average", search_k = 1000L, batch = TRUE)
 
   train_out = train_pipeop(op, list(task))[[1L]]
-  umap_out = invoke(uwot::umap2, X = task$data()[, 2:5], ret_model = TRUE, .args = pv)
+  umap_out = invoke(uwot::umap2, X = task$data()[, 2:5], .args = args_umap2)
 
   state_names = c("embedding", "scale_info", "search_k", "local_connectivity", "n_epochs", "alpha", "negative_sample_rate", "method", "a", "b",
                   "gamma", "approx_pow", "metric", "norig_col", "pcg_rand", "batch", "opt_args", "num_precomputed_nns", "min_dist", "spread",
                   "binary_edge_weights", "seed", "nn_method", "nn_args", "n_neighbors", "nn_index", "pca_models")
   expect_true(all(state_names %in% names(op$state)))
-  state_names = setdiff(state_names, "nn_index") #  since address in state$nn_index$ann will not be equal
+  state_names = setdiff(state_names, "nn_index") #  since RefClass in state$nn_index$ann will not be equal
   expect_identical(op$state[state_names], umap_out[state_names])
   expect_equal(train_out$data()[, 2:3], as.data.table(umap_out[["embedding"]]))
 
   predict_out = predict_pipeop(op, list(task))[[1L]]
-  umap_transform_out = invoke(uwot::umap_transform, X = task$data()[, 2:5], model = umap_out)
+  umap_transform_out = invoke(uwot::umap_transform, X = task$data()[, 2:5], model = umap_out, .args = args_umap_transform)
+
   expect_equal(predict_out$data()[, 2:3], as.data.table(umap_transform_out))
 
 })
@@ -93,6 +103,8 @@ test_that("PipeOpUMAP - Compare to uwot::umap2 and uwot::umap_transform; Changed
   task = mlr_tasks$get("iris")$filter(1:30)
 
   op = PipeOpUMAP$new()
+
+  # BUild list of param with same names for PipeOpUMAP and uwot::umap2() / uwot::umap_transform()
   pv = list(
     seed = 1234L,
     nn_method = "hnsw",
@@ -108,24 +120,27 @@ test_that("PipeOpUMAP - Compare to uwot::umap2 and uwot::umap_transform; Changed
     bandwidth = 0.9,
     repulsion_strength = 1.1,
     negative_sample_rate = 6,
-    y = task$data()[, 1],
     nn_args = list(M = 10L, ef_construction = 100L, ef = 20L)
   )
-  op$param_set$set_values(.values = pv)
+  # Handle parameters that are differently named for PipeOpUMAP and uwot::umap2() / uwot::umap_transform()
+  pv_po = insert_named(pv, list(use_supervised = TRUE, init_transform = "average"))
+  op$param_set$set_values(.values = pv_po)
+  args_umap2 = insert_named(pv, list(ret_model = TRUE, y = task$data()[, 1]))
+  args_umap_transform = list(init = "average")
 
   train_out = train_pipeop(op, list(task))[[1L]]
-  umap_out = invoke(uwot::umap2, X = task$data()[, 2:5], ret_model = TRUE, .args = pv)
+  umap_out = invoke(uwot::umap2, X = task$data()[, 2:5], .args = args_umap2)
 
   state_names = c("embedding", "scale_info", "search_k", "local_connectivity", "n_epochs", "alpha", "negative_sample_rate", "method", "a", "b",
                   "gamma", "approx_pow", "metric", "norig_col", "pcg_rand", "batch", "opt_args", "num_precomputed_nns", "min_dist", "spread",
                   "binary_edge_weights", "seed", "nn_method", "nn_args", "n_neighbors", "nn_index", "pca_models")
   expect_true(all(state_names %in% names(op$state)))
-  state_names = setdiff(state_names, "nn_index") #  since address in state$nn_index$ann will not be equal
+  state_names = setdiff(state_names, "nn_index") #  since RefClass in state$nn_index$ann will not be equal
   expect_identical(op$state[state_names], umap_out[state_names])
   expect_equal(train_out$data()[, 2:3], as.data.table(umap_out[["embedding"]]))
 
   predict_out = predict_pipeop(op, list(task))[[1L]]
-  umap_transform_out = invoke(uwot::umap_transform, X = task$data()[, 2:5], model = umap_out)
+  umap_transform_out = invoke(uwot::umap_transform, X = task$data()[, 2:5], model = umap_out, .args = args_umap_transform)
   expect_equal(predict_out$data()[, 2:3], as.data.table(umap_transform_out))
 
 })
@@ -137,6 +152,8 @@ test_that("PipeOpUMAP - Compare to uwot::umap2 and uwot::umap_transform; Changed
   task = mlr_tasks$get("iris")$filter(1:30)
 
   op = PipeOpUMAP$new()
+
+  # BUild list of param with same names for PipeOpUMAP and uwot::umap2() / uwot::umap_transform()
   pv = list(
     seed = 1234L,
     nn_method = "nndescent",
@@ -152,25 +169,28 @@ test_that("PipeOpUMAP - Compare to uwot::umap2 and uwot::umap_transform; Changed
     bandwidth = 0.9,
     repulsion_strength = 1.1,
     negative_sample_rate = 6,
-    y = task$data()[, 1],
     nn_args = list(n_trees = 15L, max_candidates = 15L, pruning_degree_multiplier = 1.4, epsilon = 0.05)
   )
-  op$param_set$set_values(.values = pv)
+  # Handle parameters that are differently named for PipeOpUMAP and uwot::umap2() / uwot::umap_transform()
+  pv_po = insert_named(pv, list(use_supervised = TRUE, init_transform = "average"))
+  op$param_set$set_values(.values = pv_po)
+  args_umap2 = insert_named(pv, list(ret_model = TRUE, y = task$data()[, 1]))
+  args_umap_transform = list(init = "average")
 
   train_out = train_pipeop(op, list(task))[[1L]]
-  umap_out = invoke(uwot::umap2, X = task$data()[, 2:5], ret_model = TRUE, .args = pv)
+  umap_out = invoke(uwot::umap2, X = task$data()[, 2:5], .args = args_umap2)
 
   state_names = c("embedding", "scale_info", "search_k", "local_connectivity", "n_epochs", "alpha", "negative_sample_rate", "method", "a", "b",
                   "gamma", "approx_pow", "metric", "norig_col", "pcg_rand", "batch", "opt_args", "num_precomputed_nns", "min_dist", "spread",
                   "binary_edge_weights", "seed", "nn_method", "nn_args", "n_neighbors", "nn_index", "pca_models")
   expect_true(all(state_names %in% names(op$state)))
 
-  state_names = setdiff(state_names, "nn_index") #  since address in state$nn_index$ann will not be equal
+  state_names = setdiff(state_names, "nn_index") #  since RefClass in state$nn_index$ann will not be equal
   expect_identical(op$state[state_names], umap_out[state_names])
   expect_equal(train_out$data()[, 2:3], as.data.table(umap_out[["embedding"]]))
 
   predict_out = predict_pipeop(op, list(task))[[1L]]
-  umap_transform_out = invoke(uwot::umap_transform, X = task$data()[, 2:5], model = umap_out)
+  umap_transform_out = invoke(uwot::umap_transform, X = task$data()[, 2:5], model = umap_out, .args = args_umap_transform)
   expect_equal(predict_out$data()[, 2:3], as.data.table(umap_transform_out))
 
 })