From 9faa0da149ddec0884c6c05600055f8bb2b51913 Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Fri, 8 Nov 2024 13:06:43 +0100 Subject: [PATCH 01/12] fix extract component --- src/utils/extract_uns_metadata/script.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/utils/extract_uns_metadata/script.py b/src/utils/extract_uns_metadata/script.py index 5d759b60a..9a85af3f9 100644 --- a/src/utils/extract_uns_metadata/script.py +++ b/src/utils/extract_uns_metadata/script.py @@ -8,8 +8,8 @@ ## VIASH START par = { - 'input': 'resources_test/common/pancreas/dataset.h5ad', - 'schema': 'src/datasets/api/file_raw.yaml', + 'input': 'foo/cellxgene_census/dkd/log_cp10k.combat.asw_batch.asw_batch.output.h5ad', + 'schema': '/home/rcannood/workspace/openproblems/task_batch_integration/src/api/file_score.yaml', 'output': 'output/meta.yaml', } ## VIASH END @@ -36,9 +36,15 @@ ## Helper functions for extracting the dataset metadata in uns ## #################################################################################################### def is_atomic(obj): - return isinstance(obj, str) or isinstance(obj, int) or isinstance(obj, bool) or isinstance(obj, float) + return isinstance(obj, str) or isinstance(obj, int) or isinstance(obj, bool) or isinstance(obj, float) or \ + isinstance(obj, np.int32) or isinstance(obj, np.float32) or isinstance(obj, np.int64) or \ + isinstance(obj, np.bool_) or isinstance(obj, np.str_) def to_atomic(obj): + if isinstance(obj, np.int32): + return int(obj) + elif isinstance(obj, np.float32): + return float(obj) if isinstance(obj, np.float64): return float(obj) elif isinstance(obj, np.int64): From d70a960181320e9bd6aaf0e3afd1933c2c5860d8 Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Fri, 8 Nov 2024 13:34:24 +0100 Subject: [PATCH 02/12] add to par --- src/utils/extract_uns_metadata/script.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/utils/extract_uns_metadata/script.py b/src/utils/extract_uns_metadata/script.py index 9a85af3f9..984bf46d1 100644 --- a/src/utils/extract_uns_metadata/script.py +++ b/src/utils/extract_uns_metadata/script.py @@ -11,6 +11,7 @@ 'input': 'foo/cellxgene_census/dkd/log_cp10k.combat.asw_batch.asw_batch.output.h5ad', 'schema': '/home/rcannood/workspace/openproblems/task_batch_integration/src/api/file_score.yaml', 'output': 'output/meta.yaml', + 'uns_length_cutoff': 100 } ## VIASH END From b10be3263a751d0033dab9dcf8df6b69f53cb78c Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Fri, 8 Nov 2024 13:35:15 +0100 Subject: [PATCH 03/12] fix par --- src/utils/extract_uns_metadata/script.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/utils/extract_uns_metadata/script.py b/src/utils/extract_uns_metadata/script.py index 984bf46d1..d3d331400 100644 --- a/src/utils/extract_uns_metadata/script.py +++ b/src/utils/extract_uns_metadata/script.py @@ -8,8 +8,8 @@ ## VIASH START par = { - 'input': 'foo/cellxgene_census/dkd/log_cp10k.combat.asw_batch.asw_batch.output.h5ad', - 'schema': '/home/rcannood/workspace/openproblems/task_batch_integration/src/api/file_score.yaml', + 'input': 'resources_test/common/pancreas/dataset.h5ad', + 'schema': 'src/datasets/api/file_raw.yaml', 'output': 'output/meta.yaml', 'uns_length_cutoff': 100 } From 2bc613f81ef54e446d0e2619c652e47082c4d7ca Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Fri, 8 Nov 2024 13:36:11 +0100 Subject: [PATCH 04/12] add more checks --- src/utils/extract_uns_metadata/script.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utils/extract_uns_metadata/script.py b/src/utils/extract_uns_metadata/script.py index d3d331400..754a02a92 100644 --- a/src/utils/extract_uns_metadata/script.py +++ b/src/utils/extract_uns_metadata/script.py @@ -39,7 +39,7 @@ def is_atomic(obj): return isinstance(obj, str) or isinstance(obj, int) or isinstance(obj, bool) or isinstance(obj, float) or \ isinstance(obj, np.int32) or isinstance(obj, np.float32) or isinstance(obj, np.int64) or \ - isinstance(obj, np.bool_) or isinstance(obj, np.str_) + isinstance(obj, np.float64) or isinstance(obj, np.bool_) or isinstance(obj, np.str_) def to_atomic(obj): if isinstance(obj, np.int32): From 2920f327fd28676c61c16536b044e7a3511ef792 Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Fri, 8 Nov 2024 13:37:09 +0100 Subject: [PATCH 05/12] simplify --- src/utils/extract_uns_metadata/script.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/utils/extract_uns_metadata/script.py b/src/utils/extract_uns_metadata/script.py index 754a02a92..ae848748d 100644 --- a/src/utils/extract_uns_metadata/script.py +++ b/src/utils/extract_uns_metadata/script.py @@ -37,9 +37,9 @@ ## Helper functions for extracting the dataset metadata in uns ## #################################################################################################### def is_atomic(obj): - return isinstance(obj, str) or isinstance(obj, int) or isinstance(obj, bool) or isinstance(obj, float) or \ - isinstance(obj, np.int32) or isinstance(obj, np.float32) or isinstance(obj, np.int64) or \ - isinstance(obj, np.float64) or isinstance(obj, np.bool_) or isinstance(obj, np.str_) + return isinstance(obj, ( + str, int, bool, float, np.int32, np.float32, np.int64, np.float64, np.bool_, np.str_ + )) def to_atomic(obj): if isinstance(obj, np.int32): From be0b4c8235f8d5bcda318afe7037b87750b740e9 Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Fri, 8 Nov 2024 14:01:22 +0100 Subject: [PATCH 06/12] use pd function --- src/utils/extract_uns_metadata/script.py | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/src/utils/extract_uns_metadata/script.py b/src/utils/extract_uns_metadata/script.py index ae848748d..b62872af0 100644 --- a/src/utils/extract_uns_metadata/script.py +++ b/src/utils/extract_uns_metadata/script.py @@ -37,19 +37,13 @@ ## Helper functions for extracting the dataset metadata in uns ## #################################################################################################### def is_atomic(obj): - return isinstance(obj, ( - str, int, bool, float, np.int32, np.float32, np.int64, np.float64, np.bool_, np.str_ - )) + return pd.api.types.is_scalar(obj) def to_atomic(obj): - if isinstance(obj, np.int32): + if isinstance(obj, (np.int32,np.int64)): return int(obj) - elif isinstance(obj, np.float32): + elif isinstance(obj, np.float32,np.float64): return float(obj) - if isinstance(obj, np.float64): - return float(obj) - elif isinstance(obj, np.int64): - return int(obj) elif isinstance(obj, np.bool_): return bool(obj) elif isinstance(obj, np.str_): @@ -57,7 +51,7 @@ def to_atomic(obj): return obj def is_list_of_atomics(obj): - if not isinstance(obj, (list,pd.core.series.Series,np.ndarray)): + if not isinstance(obj, (list, pd.core.series.Series, np.ndarray)): return False return all(is_atomic(elem) for elem in obj) From 496a76759dcb865dd1ac3366a2269cca1c8f5385 Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Fri, 8 Nov 2024 17:34:14 +0100 Subject: [PATCH 07/12] fix function --- src/utils/extract_uns_metadata/script.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utils/extract_uns_metadata/script.py b/src/utils/extract_uns_metadata/script.py index b62872af0..ac5e93f13 100644 --- a/src/utils/extract_uns_metadata/script.py +++ b/src/utils/extract_uns_metadata/script.py @@ -42,7 +42,7 @@ def is_atomic(obj): def to_atomic(obj): if isinstance(obj, (np.int32,np.int64)): return int(obj) - elif isinstance(obj, np.float32,np.float64): + elif isinstance(obj, (np.float32,np.float64)): return float(obj) elif isinstance(obj, np.bool_): return bool(obj) From f4da0daf942c35685cc8de194080390317913afd Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Fri, 8 Nov 2024 17:34:15 +0100 Subject: [PATCH 08/12] fix function From d2db577cf0ff4cac4ef611c75e3fba89bb7c6fac Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Fri, 8 Nov 2024 17:39:27 +0100 Subject: [PATCH 09/12] update changelog --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 276eb63a8..3213323e2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,6 +28,10 @@ - Update `dataset_id` for `tenx_visium`, `zenodo_spatial`, `zenodo_spatial_slidetags` datasets and use `mouse_brain_coronal` as a test resource in the `spatially_variable_genes` task (PR #908). +## Bug fixes + +- Fix extracting metadata from anndata files in the `extract_metadata` component (PR #914). + # openproblems v2.0.0 A major update to the OpenProblems framework, switching from a Python-based framework to a Viash + Nextflow-based framework. This update features the same concepts as the previous version, but with a new implementation that is more flexible, scalable, and maintainable. From 7a4e60d71ed425ced6f847b76b48a29f79d388dd Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Mon, 11 Nov 2024 23:42:46 +0100 Subject: [PATCH 10/12] wip improvements --- src/reporting/get_metric_info/script.R | 3 +- src/reporting/get_results/script.R | 185 ++++++++++++------ .../process_task_results/run_test.sh | 4 +- 3 files changed, 129 insertions(+), 63 deletions(-) diff --git a/src/reporting/get_metric_info/script.R b/src/reporting/get_metric_info/script.R index 0b3aff308..a13e55de7 100644 --- a/src/reporting/get_metric_info/script.R +++ b/src/reporting/get_metric_info/script.R @@ -32,7 +32,7 @@ outputs <- map(configs, function(config) { info$task_id <- gsub("/.*", "", config$namespace) info$id <- info$name info$name <- NULL - info$component_id <- config$name + info$component_name <- config$name info$namespace <- config$namespace info$commit_sha <- build_info$git_commit %||% "missing-sha" info$code_version <- "missing-version" @@ -47,6 +47,7 @@ outputs <- map(configs, function(config) { # construct v1 format out <- list( task_id = info$task_id, + component_name = info$component_name, metric_id = info$id, metric_name = info$label, metric_summary = info$summary, diff --git a/src/reporting/get_results/script.R b/src/reporting/get_results/script.R index 9f8459cff..ee06b77da 100644 --- a/src/reporting/get_results/script.R +++ b/src/reporting/get_results/script.R @@ -9,16 +9,24 @@ library(purrr, warn.conflicts = FALSE) library(rlang, warn.conflicts = FALSE) ## VIASH START +# raw_dir <- "resources_test/openproblems/task_results_v3/raw" +# processed_dir <- "resources_test/openproblems/task_results_v3/processed" +# raw_dir <- "/home/rcannood/workspace/openproblems-bio/task_perturbation_prediction/resources/results/run_2024-10-31_06-14-14" +# processed_dir <- "/home/rcannood/workspace/openproblems-bio/website/results/perturbation_prediction/data" +raw_dir <- "/home/rcannood/workspace/openproblems-bio/task_batch_integration/resources/results/run_2024-11-08_19-43-15" +processed_dir <- "/home/rcannood/workspace/openproblems-bio/website/results/batch_integration/data" + par <- list( # inputs - input_scores = "resources_test/openproblems/task_results_v3/raw/score_uns.yaml", - input_execution = "resources_test/openproblems/task_results_v3/raw/trace.txt", - input_dataset_info = "resources_test/openproblems/task_results_v3/processed/dataset_info.json", - input_method_info = "resources_test/openproblems/task_results_v3/processed/method_info.json", - input_metric_info = "resources_test/openproblems/task_results_v3/processed/metric_info.json", + input_scores = paste0(raw_dir, "/score_uns.yaml"), + input_execution = paste0(raw_dir, "/trace.txt"), + input_dataset_info = paste0(processed_dir, "/dataset_info.json"), + input_method_info = paste0(processed_dir, "/method_info.json"), + input_method_configs = paste0(raw_dir, "/method_configs.yaml"), + input_metric_info = paste0(processed_dir, "/metric_info.json"), # outputs - output_results = "resources_test/openproblems/task_results_v3/processed/results.json", - output_metric_execution_info = "resources_test/openproblems/task_results_v3/processed/metric_execution_info.json" + output_results = paste0(processed_dir, "/results.json"), + output_metric_execution_info = paste0(processed_dir, "/metric_execution_info.json") ) ## VIASH END @@ -133,47 +141,61 @@ scores <- raw_scores %>% norm_methods <- "/log_cp10k|/log_cpm|/sqrt_cp10k|/sqrt_cpm|/l1_sqrt|/log_scran_pooling" id_regex <- paste0("^.*:(.*)_process \\(([^\\.]*)(", norm_methods, ")?(.[^\\.]*)?\\.(.*)\\)$") -trace <- readr::read_tsv(par$input_execution) %>% +# # reverse engineer metric component names +# metric_info <- metric_info %>% +# mutate(comp_id = gsub(".*/([^/]*)/config\\.vsh\\.yaml", "\\1", implementation_url)) + +# read execution info +input_execution <- readr::read_tsv(par$input_execution) + +method_lookup <- map_dfr(method_info$method_id, function(method_id) { + regex <- paste0("(.*:", method_id, ":[^ ]*)") + name <- + input_execution$name[grepl(regex, input_execution$name)] |> + unique() + name_ <- name[!grepl(":publishStatesProc", name)] + tibble(method_id = method_id, name = name_) +}) +dataset_lookup <- map_dfr(dataset_info$dataset_id, function(dataset_id) { + regex <- paste0(".*[(.](", dataset_id, ")[).].*") + name <- + input_execution$name[grepl(regex, input_execution$name)] |> + unique() + tibble(dataset_id = dataset_id, name = name) +}) + +# parse values +execution_info_ind <- input_execution |> + left_join(method_lookup, by = "name") |> + left_join(dataset_lookup, by = "name") |> + filter(!is.na(method_id)) %>% + rowwise() |> mutate( - id = name, - process_id = stringr::str_extract(id, id_regex, 1L), - dataset_id = stringr::str_extract(id, id_regex, 2L), - normalization_id = gsub("^/", "", stringr::str_extract(id, id_regex, 3L)), - grp4 = gsub("^\\.", "", stringr::str_extract(id, id_regex, 4L)), - grp5 = stringr::str_extract(id, id_regex, 5L), + process_id = gsub(" .*", "", name), submit = strptime(submit, "%Y-%m-%d %H:%M:%S"), - ) %>% - # detect whether entry is a metric or a method - mutate( - method_id = ifelse(is.na(grp4), grp5, grp4), - metric_id = ifelse(is.na(grp4), grp4, grp5) - ) %>% - select(-grp4, -grp5) %>% - filter(!is.na(method_id)) %>% - # take last entry for each run - arrange(desc(submit)) %>% - group_by(name) %>% - slice(1) %>% + exit_code = parse_exit(exit), + duration_sec = parse_duration(realtime), + cpu_pct = parse_cpu(`%cpu`), + peak_memory_mb = parse_size(peak_vmem), + disk_read_mb = parse_size(rchar), + disk_write_mb = parse_size(wchar) + ) |> ungroup() -# parse values -execution_info <- trace %>% - filter(process_id == method_id) %>% # only keep method entries - rowwise() %>% - transmute( - dataset_id, - normalization_id, - method_id, +execution_info <- execution_info_ind |> + group_by(dataset_id, method_id) |> + summarise( resources = list(list( - exit_code = parse_exit(exit), - duration_sec = parse_duration(realtime), - cpu_pct = parse_cpu(`%cpu`), - peak_memory_mb = parse_size(peak_vmem), - disk_read_mb = parse_size(rchar), - disk_write_mb = parse_size(wchar) - )) - ) %>% - ungroup() + submit = min(submit), + exit_code = max(exit_code), + duration_sec = sum(duration_sec), + cpu_pct = sum(cpu_pct * duration_sec) / sum(duration_sec), + peak_memory_mb = max(peak_memory_mb), + disk_read_mb = sum(disk_read_mb), + disk_write_mb = sum(disk_write_mb) + )), + .groups = "drop" + ) # combine scores with execution info # fill up missing entries with NAs and 0s @@ -201,25 +223,68 @@ out <- full_join( # --- process metric execution info -------------------------------------------- cat("Processing metric execution info\n") -metric_execution_info <- trace %>% - filter(process_id == metric_id) %>% # only keep metric entries - rowwise() %>% - transmute( - dataset_id, - normalization_id, - method_id, - metric_id, - resources = list(list( - exit_code = parse_exit(exit), - duration_sec = parse_duration(realtime), - cpu_pct = parse_cpu(`%cpu`), - peak_memory_mb = parse_size(peak_vmem), - disk_read_mb = parse_size(rchar), - disk_write_mb = parse_size(wchar) - )) - ) %>% + +# manually add component id to metric info +metric_info$component_name <- metric_info$component_name %||% rep(NA_character_, nrow(metric_info)) %|% + gsub(".*/([^/]*)/config\\.vsh\\.yaml", "\\1", metric_info$implementation_url) + +metric_lookup2 <- pmap_dfr(metric_info, function(metric_id, component_name, ...) { + regex <- paste0("(.*:", component_name, ":[^ ]*)") + name <- + input_execution$name[grepl(regex, input_execution$name)] |> + unique() + name_ <- name[!grepl(":publishStatesProc", name)] + tibble(metric_id = metric_id, component_name = component_name, name = name_) +}) +dataset_lookup2 <- map_dfr(dataset_info$dataset_id, function(dataset_id) { + regex <- paste0(".*[(.](", dataset_id, ")[).].*") + name <- + input_execution$name[grepl(regex, input_execution$name)] |> + unique() + tibble(dataset_id = dataset_id, name = name) +}) +method_lookup2 <- map_dfr(method_info$method_id, function(method_id) { + regex <- paste0(".*[(.](", method_id, ")[).].*") + name <- + input_execution$name[grepl(regex, input_execution$name)] |> + unique() + tibble(method_id = method_id, name = name) +}) + +metric_execution_info_ind <- input_execution |> + left_join(metric_lookup2, by = "name") |> + left_join(dataset_lookup2, by = "name") |> + left_join(method_lookup2, by = "name") |> + filter(!is.na(metric_id)) %>% + rowwise() |> + mutate( + process_id = gsub(" .*", "", name), + submit = strptime(submit, "%Y-%m-%d %H:%M:%S"), + exit_code = parse_exit(exit), + duration_sec = parse_duration(realtime), + cpu_pct = parse_cpu(`%cpu`), + peak_memory_mb = parse_size(peak_vmem), + disk_read_mb = parse_size(rchar), + disk_write_mb = parse_size(wchar) + ) |> ungroup() +metric_execution_info <- metric_execution_info_ind |> + group_by(dataset_id, method_id, metric_component_name = component_name) |> + summarise( + resources = list(list( + submit = min(submit), + exit_code = max(exit_code), + duration_sec = sum(duration_sec), + cpu_pct = sum(cpu_pct * duration_sec) / sum(duration_sec), + peak_memory_mb = max(peak_memory_mb), + disk_read_mb = sum(disk_read_mb), + disk_write_mb = sum(disk_write_mb) + )), + .groups = "drop" + ) + + # --- write output files ------------------------------------------------------- cat("Writing output files\n") # write output files diff --git a/src/reporting/process_task_results/run_test.sh b/src/reporting/process_task_results/run_test.sh index c46b3627b..63ea27027 100755 --- a/src/reporting/process_task_results/run_test.sh +++ b/src/reporting/process_task_results/run_test.sh @@ -21,7 +21,7 @@ for TASK in "task_perturbation_prediction"; do # # temp sync # aws s3 sync $INPUT_DIR output/temp - echo "Processing $TASK - $DATE" + echo "Processing $TASK - $DATE -> $OUTPUT_DIR" # start the run NXF_VER=23.10.0 nextflow run . \ @@ -40,5 +40,5 @@ for TASK in "task_perturbation_prediction"; do --publish_dir "$OUTPUT_DIR" # cause quarto rerender to index page when in preview mode - touch ../website/results/$TASK/index.qmd + touch ../website/results/$TASK_STRIP_PREFIX/index.qmd done \ No newline at end of file From e0080723a37739a3c8f1e926a844c2223149b0c4 Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Fri, 22 Nov 2024 14:21:17 +0100 Subject: [PATCH 11/12] fixes --- src/reporting/get_results/script.R | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/reporting/get_results/script.R b/src/reporting/get_results/script.R index ee06b77da..ac868991a 100644 --- a/src/reporting/get_results/script.R +++ b/src/reporting/get_results/script.R @@ -13,7 +13,7 @@ library(rlang, warn.conflicts = FALSE) # processed_dir <- "resources_test/openproblems/task_results_v3/processed" # raw_dir <- "/home/rcannood/workspace/openproblems-bio/task_perturbation_prediction/resources/results/run_2024-10-31_06-14-14" # processed_dir <- "/home/rcannood/workspace/openproblems-bio/website/results/perturbation_prediction/data" -raw_dir <- "/home/rcannood/workspace/openproblems-bio/task_batch_integration/resources/results/run_2024-11-08_19-43-15" +raw_dir <- "/home/rcannood/workspace/openproblems-bio/task_batch_integration/resources/results/run_2024-11-20_12-47-03" processed_dir <- "/home/rcannood/workspace/openproblems-bio/website/results/batch_integration/data" par <- list( @@ -57,6 +57,8 @@ parse_size <- function(x) { out <- if (is.na(x) || x == "-") { NA_integer_ + } else if (grepl("TB", x)) { + as.numeric(gsub(" *TB", "", x)) * 1024 } else if (grepl("GB", x)) { as.numeric(gsub(" *GB", "", x)) * 1024 } else if (grepl("MB", x)) { @@ -137,16 +139,14 @@ scores <- raw_scores %>% .groups = "drop" ) -# read nxf log and process the task id -norm_methods <- "/log_cp10k|/log_cpm|/sqrt_cp10k|/sqrt_cpm|/l1_sqrt|/log_scran_pooling" -id_regex <- paste0("^.*:(.*)_process \\(([^\\.]*)(", norm_methods, ")?(.[^\\.]*)?\\.(.*)\\)$") - -# # reverse engineer metric component names -# metric_info <- metric_info %>% -# mutate(comp_id = gsub(".*/([^/]*)/config\\.vsh\\.yaml", "\\1", implementation_url)) # read execution info -input_execution <- readr::read_tsv(par$input_execution) +# -> only keep the last execution of each process +input_execution <- readr::read_tsv(par$input_execution) |> + group_by(name) |> + mutate(num_runs = n()) |> + slice(which.max(submit)) |> + ungroup() method_lookup <- map_dfr(method_info$method_id, function(method_id) { regex <- paste0("(.*:", method_id, ":[^ ]*)") @@ -157,7 +157,7 @@ method_lookup <- map_dfr(method_info$method_id, function(method_id) { tibble(method_id = method_id, name = name_) }) dataset_lookup <- map_dfr(dataset_info$dataset_id, function(dataset_id) { - regex <- paste0(".*[(.](", dataset_id, ")[).].*") + regex <- paste0(".*[(.](", dataset_id, ")[)./].*") name <- input_execution$name[grepl(regex, input_execution$name)] |> unique() @@ -237,14 +237,14 @@ metric_lookup2 <- pmap_dfr(metric_info, function(metric_id, component_name, ...) tibble(metric_id = metric_id, component_name = component_name, name = name_) }) dataset_lookup2 <- map_dfr(dataset_info$dataset_id, function(dataset_id) { - regex <- paste0(".*[(.](", dataset_id, ")[).].*") + regex <- paste0(".*[(.](", dataset_id, ")[)./].*") name <- input_execution$name[grepl(regex, input_execution$name)] |> unique() tibble(dataset_id = dataset_id, name = name) }) method_lookup2 <- map_dfr(method_info$method_id, function(method_id) { - regex <- paste0(".*[(.](", method_id, ")[).].*") + regex <- paste0(".*[(.](", method_id, ")[)./].*") name <- input_execution$name[grepl(regex, input_execution$name)] |> unique() From 51f959f1534d38429c909f6bdddafccbef10832f Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Fri, 22 Nov 2024 14:37:48 +0100 Subject: [PATCH 12/12] derp --- src/reporting/get_results/script.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/reporting/get_results/script.R b/src/reporting/get_results/script.R index ac868991a..6b4555665 100644 --- a/src/reporting/get_results/script.R +++ b/src/reporting/get_results/script.R @@ -58,7 +58,7 @@ parse_size <- function(x) { if (is.na(x) || x == "-") { NA_integer_ } else if (grepl("TB", x)) { - as.numeric(gsub(" *TB", "", x)) * 1024 + as.numeric(gsub(" *TB", "", x)) * 1024 * 1024 } else if (grepl("GB", x)) { as.numeric(gsub(" *GB", "", x)) * 1024 } else if (grepl("MB", x)) {