diff --git a/.gitignore b/.gitignore index f45322da9..9828c9a59 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,8 @@ /.quarto/ /_site /_freeze +# local created libraries +/results/*/library.bib # Temporary files generated by quarto preview /site_libs diff --git a/results/_generate_task_pages.py b/results/_generate_task_pages.py index 17b8d8786..5d480a1cf 100755 --- a/results/_generate_task_pages.py +++ b/results/_generate_task_pages.py @@ -10,7 +10,7 @@ for task_info_file in task_info_files: print(f"Reading {task_info_file}", flush=True) task_info = json.loads(task_info_file.read_text()) - task_id = task_info.get("task_id", "task_id_missing") + task_id = task_info.get("task_id", "task_id_missing").replace("task_", "") task_name = task_info.get("task_name", "") task_summary = task_info.get("task_summary", "") @@ -24,6 +24,9 @@ engine: knitr fig-cap-location: bottom citation-location: document +bibliography: + - library.bib + - ../../library.bib toc: false --- diff --git a/results/_include/_baseline_descriptions.qmd b/results/_include/_baseline_descriptions.qmd index e568e7789..dc2c68394 100644 --- a/results/_include/_baseline_descriptions.qmd +++ b/results/_include/_baseline_descriptions.qmd @@ -1,13 +1,58 @@ ```{r baseline_descriptions} #| echo: false baselines <- method_info %>% filter(is_baseline) -lines <- pmap_chr(baselines, function(method_name, method_summary, method_description, reference, code_url, ...) { +lines <- pmap_chr(baselines, function(method_name, method_summary, method_description, reference, code_url, implementation_url, ...) { + rest <- list(...) + image <- pluck(rest, "image", .default = NULL) + documentation_url <- pluck(rest, "documentation_url", .default = NULL) + code_version <- pluck(rest, "code_version", .default = NULL) + references_doi <- pluck(rest, "references_doi", .default = NULL) + references_bibtex <- pluck(rest, "references_bibtex", .default = NULL) + + if ("paper_reference" %in% names(rest)) { + ref <- split_cite_fun(rest$paper_reference) + } else { + bibs <- c() + if (!is.null(references_doi) && !is.na(references_doi)) { + bibs <- get_bibtex_from_doi(references_doi) + } + if (!is.null(references_bibtex) && !is.na(references_bibtex)) { + bibs <- c(bibs, references_bibtex) + } + # Write new entries to library.bib + write_library(bibs) + # Get bibtex references + if (!is.null(bibs)) { + ref <- get_bibtex_entries(bibs) + } else { + ref <- "" + } + } + + if (ref != "") ref <- paste0(" ", ref) + summ <- (method_summary %|% "Missing 'method_summary'") %>% str_replace_all("\\n", " ") %>% str_replace_all("\\. *$", "") + method_meta <- tribble( + ~icon, ~value, + "bi bi-book", if (!is.null(documentation_url)) paste0("[Documentation](", documentation_url, ")") else NULL, + "bi bi-globe", if (!is.null(code_url)) paste0("[Repository](", code_url, ")") else NULL, + "bi bi-file-earmark-code", if (!is.null(implementation_url)) paste0("[Source Code](", implementation_url, ")") else NULL, + "bi bi-box-seam", if (!is.null(image)) paste0("[Container](", image, ")") else NULL, + "bi bi-tag", if (!is.null(code_version) && !is.na(code_version)) code_version else NULL, + ) %>% filter(!sapply(value, is.null)) + + meta_list <- paste( + paste0(" ", method_meta$value), + collapse = " · " + ) + strip_margin(glue::glue(" |### {method_name} | - |{summ} + |{meta_list} + | + |{summ} {ref} | |{method_description %|% ''} |" diff --git a/results/_include/_dataset_descriptions.qmd b/results/_include/_dataset_descriptions.qmd index 6c7fe8fca..6e02168ee 100644 --- a/results/_include/_dataset_descriptions.qmd +++ b/results/_include/_dataset_descriptions.qmd @@ -1,23 +1,61 @@ ```{r dataset_description} #| echo: false -lines <- pmap_chr(dataset_info, function(dataset_name, dataset_summary, dataset_description, data_reference, ...) { - ref <- split_cite_fun(data_reference) +lines <- pmap_chr(dataset_info, function(dataset_id, dataset_name, dataset_summary, dataset_description, data_reference, ...) { + rest <- list(...) - source_dataset_id <- if ("source_dataset_id" %in% names(rest)) rest$source_dataset_id else NULL - dataset_link <- if (!is.null(source_dataset_id) && dir.exists(paste0("../../datasets/", source_dataset_id))) { - glue::glue("[{dataset_name}](/datasets/{source_dataset_id})") - } else if ("scPerturb" %in% dataset_name) { - dataset_name - } else if (dir.exists(paste0("../../datasets/", rest$dataset_id))) { - glue::glue("[{dataset_name}](/datasets/{rest$dataset_id})") + source_dataset_id <- pluck(rest, "source_dataset_id", .default = NULL) + data_url <- pluck(rest, "data_url", .default = NULL) + date_created <- pluck(rest, "date_created", .default = NULL) + file_size <- pluck(rest, "file_size", .default = NULL) + + if (grep("^[a-zA-Z0-9]+[0-9]{4}[a-z]+$",data_reference) && !grepl("^@",data_reference)) { + ref <- split_cite_fun(data_reference) } else { - dataset_name + bibs <- convert_to_bibtex(data_reference) + # Write new entries to library.bib + write_library(bibs) + ref <- get_bibtex_entries(bibs) } + if (ref != "") ref <- paste0(" ", ref) + + dataset_link <- if (!is.null(source_dataset_id) && dir.exists(paste0("../../datasets/", source_dataset_id))) { + glue::glue("../../datasets/{source_dataset_id}") + } else if (dir.exists(paste0("../../datasets/", dataset_id))) { + glue::glue("../../datasets/{dataset_id}") + } else { + NULL + } + + # Make file_size human readable + if (!is.null(file_size)) { + file_size_str <- format( + structure(file_size, class = "object_size"), + units = "auto", + standard = "IEC", + digits = 2 + ) + } + + dataset_meta <- tribble( + ~icon, ~value, + "bi bi-link-45deg", if(!is.null(dataset_link)) paste0("[Source dataset](", dataset_link, ")") else NULL, + "bi bi-box-arrow-up-right", if(!is.null(data_url)) paste0("[Data source](", data_url, ")") else NULL, + "bi bi-calendar", if(!is.null(date_created)) paste0(date_created) else NULL, + "bi bi-file-earmark-text", if(!is.null(file_size)) file_size_str else NULL, + ) %>% filter(!sapply(value, is.null)) + + meta_list <- paste( + paste0(" ", dataset_meta$value), + collapse = " · " + ) + summ <- (dataset_summary %|% "Missing 'dataset_summary'") %>% str_replace_all("\\n", " ") %>% str_replace_all("\\. *$", "") strip_margin(glue::glue(" - |### {dataset_link} + |### {dataset_name} + | + |{meta_list} | |{summ}{ref}. | diff --git a/results/_include/_load_data.qmd b/results/_include/_load_data.qmd index 7349688b4..93cd1d180 100644 --- a/results/_include/_load_data.qmd +++ b/results/_include/_load_data.qmd @@ -8,6 +8,11 @@ library(tidyverse) library(funkyheatmap) library(kableExtra) +# touch library.bib in data dir +if (!file.exists("library.bib")) { + file.create("library.bib") +} + # read task info task_info <- jsonlite::read_json(paste0(params$data_dir, "/task_info.json")) @@ -52,6 +57,89 @@ split_cite_fun <- function(keys) { paste0("[@", paste(refs, collapse = "; @"), "]") } + +convert_to_bibtex <- function(refs) { + + bibtexhandle <- curl::new_handle() + curl::handle_setheaders(bibtexhandle, "accept" = "application/x-bibtex") + + bibs <- map(refs, function(ref) { + if (grepl("^@", ref)) { + # text is already a bibtex, update citation key + ref + } else { + url <- paste0("https://doi.org/", ref) + res <- curl::curl_fetch_memory(url, handle = bibtexhandle) + if (res$status_code != 200) { + cli::cli_alert_warning(paste0("Error processing doi '", ref, "'")) + "" + } else { + rawToChar(res$content) + } + } + }) + return(unlist(bibs)) +} + + +get_bibtex_from_doi <- function(dois) { + if (is.null(dois)) return("") + + dois <- dois[!is.na(dois)] + if (length(dois) == 0) return("") + + bibtexhandle <- curl::new_handle() + curl::handle_setheaders(bibtexhandle, "accept" = "application/x-bibtex") + + refs <- unlist(dois) + bibs <- map (refs,function(ref) { + url <- paste0("https://doi.org/", ref) + res <- curl::curl_fetch_memory(url, handle = bibtexhandle) + if (res$status_code != 200) { + cli::cli_alert_warning(paste0("Error processing doi '", text, "'")) + "" + } else { + rawToChar(res$content) + } + }) + + return(unlist(bibs)) +} + +write_library <- function(library) { + # Read existing entries from library.bib + existing_bibs <- if (file.exists("library.bib")) { + readLines("library.bib") + } else { + c() + } + + # Filter out bibs that already exist in library.bib + new_bibs <- library[!library %in% existing_bibs] + + # Write new entries to library.bib + if (length(new_bibs) > 0) { + write(new_bibs, "library.bib", append=TRUE) + } +} + +get_bibtex_entries <- function(bibs) { + ref <-sapply(bibs, function(bib) { + matches <- regmatches(bib, regexpr("@.*?\\{(.*?),", bib)) + if (length(matches) > 0) { + sub("@.*?\\{(.*?),", "\\1", matches) + } else { + NA + } + }) + ref <- na.omit(ref) + ref_string <- paste0("[@", ref, "]", collapse = " ") + return(ref_string) +} + + + + aggregate_scores <- function(scaled_score) { mean(pmin(1, pmax(0, scaled_score)) %|% 0) } diff --git a/results/_include/_method_descriptions.qmd b/results/_include/_method_descriptions.qmd index 1dbeec39f..03f2ace2e 100644 --- a/results/_include/_method_descriptions.qmd +++ b/results/_include/_method_descriptions.qmd @@ -1,31 +1,57 @@ ```{r method_description} #| echo: false # show each method just once -lines <- pmap_chr(method_info %>% filter(!is_baseline), function(method_name, method_summary, method_description, paper_reference, code_url, code_version, ...) { - - links <- c() - if (!is.na(code_url)) { - links <- c(links, glue::glue(" [Docs]({code_url})")) - } - version_label <- - if (!is.na(code_version) && code_version != "missing-version") { - glue::glue(" Software version {code_version}.") - } else { - "" +lines <- pmap_chr(method_info %>% filter(!is_baseline), function(method_name, method_summary, method_description, code_url, implementation_url, ...) { + rest <- list(...) + image <- pluck(rest, "image", .default = NULL) + documentation_url <- pluck(rest, "documentation_url", .default = NULL) + code_version <- pluck(rest, "code_version", .default = NULL) + references_doi <- pluck(rest, "references_doi", .default = NULL) + references_bibtex <- pluck(rest, "references_bibtex", .default = NULL) + if ("paper_reference" %in% names(rest)) { + ref <- split_cite_fun(rest$paper_reference) + } else { + bibs <- c() + if (!is.null(references_doi) && !is.na(references_doi)) { + bibs <- get_bibtex_from_doi(references_doi) } - ref <- split_cite_fun(paper_reference) - if (ref != "") ref <- paste0(" ", ref) - links_label <- - if (length(links) > 0) { - glue::glue(" Links: {paste(links, collapse = ', ')}.") + if (!is.null(references_bibtex) && !is.na(references_bibtex)) { + bibs <- c(bibs, references_bibtex) + } + # Write new entries to library.bib + write_library(bibs) + # Get bibtex references + if (!is.null(bibs)) { + ref <- get_bibtex_entries(bibs) } else { - "" + ref <- "" } + } + + if (ref != "") ref <- paste0(" ", ref) + summ <- (method_summary %|% "Missing 'method_summary'") %>% str_replace_all("\\n", " ") %>% str_replace_all("\\. *$", "") + + method_meta <- tribble( + ~icon, ~value, + "bi bi-book", if (!is.null(documentation_url)) paste0("[Documentation](", documentation_url, ")") else NULL, + "bi bi-globe", if (!is.null(code_url)) paste0("[Repository](", code_url, ")") else NULL, + "bi bi-file-earmark-code", if (!is.null(implementation_url)) paste0("[Source Code](", implementation_url, ")") else NULL, + "bi bi-box-seam", if (!is.null(image)) paste0("[Container](", image, ")") else NULL, + "bi bi-tag", if (!is.null(code_version) && !is.na(code_version)) code_version else NULL, + ) %>% filter(!sapply(value, is.null)) + + meta_list <- paste( + paste0(" ", method_meta$value), + collapse = " · " + ) + strip_margin(glue::glue(" |### {method_name} | - |{summ}{ref}.{version_label}{links_label} + |{meta_list} + | + |{summ}{ref} | |{method_description %|% ''} |" diff --git a/results/_include/_metric_descriptions.qmd b/results/_include/_metric_descriptions.qmd index 9b9ac9d37..d36a2dbd6 100644 --- a/results/_include/_metric_descriptions.qmd +++ b/results/_include/_metric_descriptions.qmd @@ -1,12 +1,53 @@ -```{r} +```{r metric_descriptions} #| echo: false -lines <- pmap_chr(metric_info, function(metric_name, metric_summary, metric_description, paper_reference, ...) { - ref <- split_cite_fun(paper_reference) +lines <- pmap_chr(metric_info, function(metric_name, metric_summary, metric_description, implementation_url, code_version, ...) { + rest <- list(...) + image <- pluck(rest, "image", .default = NULL) + code_version <- pluck(rest, "code_version", .default = NULL) + references_doi <- pluck(rest, "references_doi", .default = NULL) + references_bibtex <- pluck(rest, "references_bibtex", .default = NULL) + if ("paper_reference" %in% names(rest)) { + ref <- split_cite_fun(rest$paper_reference) + } else { + bibs <- c() + if (!is.null(references_doi) && !is.na(references_doi)) { + bibs <- get_bibtex_from_doi(references_doi) + } + if (!is.null(references_bibtex) && !is.na(references_bibtex)) { + bibs <- c(bibs, references_bibtex) + } + # Write new entries to library.bib + write_library(bibs) + + # Get bibtex references + if (!is.null(bibs)) { + ref <- get_bibtex_entries(bibs) + } else { + ref <- "" + } + } + if (ref != "") ref <- paste0(" ", ref) + summ <- (metric_summary %|% "Missing 'metric_summary'") %>% str_replace_all("\\n", " ") %>% str_replace_all("\\. *$", "") + + metric_meta <- tribble( + ~icon, ~value, + "bi bi-file-earmark-code", if (!is.null(implementation_url)) paste0("[Source code](", implementation_url, ")") else NULL, + "bi bi-box-seam", if (!is.null(image)) paste0("[Container](", image, ")") else NULL, + "bi bi-tag", if (!is.null(code_version) && !is.na(code_version)) code_version else NULL, + ) %>% filter(!sapply(value, is.null)) + + meta_list <- paste( + paste0(" ", metric_meta$value), + collapse = " · " + ) + strip_margin(glue::glue(" |### {metric_name} | + |{meta_list} + | |{summ}{ref}. | |{metric_description %|% ''} diff --git a/results/_include/_results_table.qmd b/results/_include/_results_table.qmd index 1ed2936dc..c9bba87c5 100644 --- a/results/_include/_results_table.qmd +++ b/results/_include/_results_table.qmd @@ -15,7 +15,7 @@ mean_na_zero <- function(x) { } res_tib1 <- res_tib0 %>% - group_by(method_id, method_name, paper_reference, code_url) %>% + group_by(method_id, method_name, code_url) %>% summarise_if(is.numeric, mean_na_zero) %>% ungroup() %>% mutate( diff --git a/results/_include/_task_template.qmd b/results/_include/_task_template.qmd index 29664b4f2..975d6bb10 100644 --- a/results/_include/_task_template.qmd +++ b/results/_include/_task_template.qmd @@ -18,6 +18,22 @@ cat( ) ``` +```{r details, results="asis", echo=FALSE} +#| column: margin + +cat("#### Info\n\n") + +details <- tribble( + ~icon, ~value, + "bi bi-github", if (!is.null(task_info$repo)) paste0("[Repository](",task_info$repo,")") else NULL, + "octicon--issue-opened-17", if (!is.null(task_info$issue_tracker)) paste0("[Issues](",task_info$issue_tracker,")") else NULL, + "bi bi-tag", if (!is.null(task_info$version)) paste0(task_info$version) else NULL, + "fa-solid fa-scale-balanced", if (!is.null(task_info$license)) paste0(task_info$license) else NULL + ) %>% filter(!sapply(value, is.null)) +cat(paste(paste0(" ", details$value), " \n")) + +``` + [Task info](data/task_info.json){class="btn btn-secondary"} [Method info](data/method_info.json){class="btn btn-secondary"} diff --git a/results/_include/task_template.css b/results/_include/task_template.css index 4f5163ba1..107420bac 100644 --- a/results/_include/task_template.css +++ b/results/_include/task_template.css @@ -58,3 +58,21 @@ div.dtsp-panesContainer div.dataTables_wrapper div.dataTables_scrollBody { section#details details img.img-fluid { filter: invert(var(--invert-colors)); } + + +/* custom icons */ + +.octicon--issue-opened-17 { + display: inline-block; + width: 1em; + height: 1em; + --svg: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 17 17'%3E%3Cpath fill='%23000' d='M8 9.5a1.5 1.5 0 1 0 0-3a1.5 1.5 0 0 0 0 3'/%3E%3Cpath fill='%23000' d='M8 0a8 8 0 1 1 0 16A8 8 0 0 1 8 0M1.5 8a6.5 6.5 0 1 0 13 0a6.5 6.5 0 0 0-13 0'/%3E%3C/svg%3E"); + background-color: currentColor; + -webkit-mask-image: var(--svg); + mask-image: var(--svg); + -webkit-mask-repeat: no-repeat; + mask-repeat: no-repeat; + -webkit-mask-size: 100% 100%; + mask-size: 100% 100%; + vertical-align: -.125em; + } \ No newline at end of file diff --git a/results/_transform_results.R b/results/_transform_results.R new file mode 100644 index 000000000..2243b8a4f --- /dev/null +++ b/results/_transform_results.R @@ -0,0 +1,112 @@ +repositories <- c( + "openproblems-bio/task_denoising", + "openproblems-bio/task_dimensionality_reduction", + "openproblems-bio/task_batch_integration", + "openproblems-bio/task_cell_cell_communication", + "openproblems-bio/task_label_projection", + "openproblems-bio/task_spatial_decomposition" +) + +cache_repository <- function(repo) { + # get the repository name + repo_dir <- file.path(Sys.getenv("HOME"), ".cache", "openproblems", "repositories", repo) + + # clone the repository + if (!dir.exists(paste0(repo_dir, "/.git"))) { + zzz <- processx::run("git", c("clone", paste0("https://github.com/", repo), repo_dir)) + } + + # fetch the latest changes + zzz <- processx::run("git", c("fetch", "--all"), wd = repo_dir) + + # reset the repository to the latest commit + if (repo == "openproblems-bio/task_spatial_decomposition") { + zzz <- processx::run("git", c("pull", "origin", "add-missing-authors"), wd = repo_dir) + } else if (repo == "openproblems-bio/task_dimensionality_reduction") { + zzz <- processx::run("git", c("pull", "origin", "add_author"), wd = repo_dir) + } else { + zzz <- processx::run("git", c("pull", "origin", "main"), wd = repo_dir) + } + + # return the path to the repository + return(repo_dir) +} + +find_task_info <- function(repo_dir) { + # find all authors in the repository + # task_info <- list.files(repo_dir, pattern = "task_info.yaml", full.names = TRUE, recursive = TRUE) + # task_info <- task_info[grep("/api/task_info.yaml", task_info)] + + # if (length(task_info) > 0) { + # return(task_info) + # } + + viash_yaml <- list.files(repo_dir, pattern = "_viash.yaml", full.names = TRUE, recursive = TRUE) + + return(viash_yaml) +} + + +write_json_file <- function(json, file) { + jsonlite::write_json( + json, + file, + auto_unbox = TRUE, + pretty = TRUE + ) +} + +for (repo in repositories) { + repo_dir <- cache_repository(repo) + task_info_files <- find_task_info(repo_dir) + + # Update task_info.json + for (task_info_file in task_info_files) { + task_info <- suppressWarnings(yaml::read_yaml(task_info_file)) + task_name <- gsub("task_", "", task_info$name) + + # Read task_info.json + if (task_name == "batch_integration") { + dirs <- list.dirs("results") + dirs <- grep("batch_integration", dirs, value = TRUE) + dirs <- grep("data", dirs, value = TRUE) + for (dir in dirs) { + task_info_json <- jsonlite::read_json(file.path( dir, "task_info.json")) + task_info_json[["authors"]] <- task_info$authors + task_info_json$repo <- paste0("https://github.com/",task_info_json["repo"],"/tree/v1.0.0/openproblems/tasks/_batch_integration/",gsub("results/","",dirname(dir))) + task_info_json[["version"]] <- "v1.0.0" + task_info_json[["license"]] <- "MIT" + write_json_file( + task_info_json, + file.path( dir, "task_info.json") + ) + } + } else if (task_name == "cell_cell_communication") { + dirs <- list.dirs("results") + dirs <- grep("cell_cell_communication", dirs, value = TRUE) + dirs <- grep("data", dirs, value = TRUE) + for (dir in dirs) { + task_info_json <- jsonlite::read_json(file.path( dir, "task_info.json")) + task_info_json[["authors"]] <- task_info$authors + task_info_json$repo <- paste0("https://github.com/",task_info_json["repo"],"/tree/v1.0.0/openproblems/tasks/_cell_cell_communication/",gsub("results/","",dirname(dir))) + task_info_json[["version"]] <- "v1.0.0" + task_info_json[["license"]] <- "MIT" + write_json_file( + task_info_json, + file.path( dir, "task_info.json") + ) + } + } else { + task_info_json <- jsonlite::read_json(file.path("results", task_name, "data", "task_info.json")) + task_info_json[["authors"]] <- task_info$authors + task_info_json$repo <- paste0("https://github.com/",task_info_json["repo"],"/tree/v1.0.0/openproblems/tasks/",task_name) + task_info_json[["version"]] <- "v1.0.0" + task_info_json[["license"]] <- "MIT" + write_json_file( + task_info_json, + file.path("results", task_name, "data", "task_info.json") + ) + } + } + +} \ No newline at end of file diff --git a/results/_transform_results.py b/results/_transform_results.py new file mode 100644 index 000000000..f4df3fcc7 --- /dev/null +++ b/results/_transform_results.py @@ -0,0 +1,67 @@ +import json +from pathlib import Path + +def read_json_file(file_path): + """ + Reads a JSON file and returns the data. + + Parameters: + file_path (str or Path): The path to the JSON file. + + Returns: + dict: The data from the JSON file. + """ + path = Path(file_path) + if not path.is_file(): + raise FileNotFoundError(f"No such file: '{file_path}'") + + with open(path, 'r', encoding='utf-8') as file: + data = json.load(file) + + return data + +def write_json_file(json_obj, file): + + path = Path(file) + + with open(path, 'w', encoding='utf-8') as f: + json.dump(json_obj, f, indent=2) + +dirs = [dir for dir in Path("results").iterdir() if dir.is_dir()] + +exclude_dirs = ["_include", "perturbation_prediction", "spatially_variable_genes", "foundation_models"] + +for dir in dirs: + if dir.name in exclude_dirs: + continue + + print(f"Processing directory: {dir}") + + data_path = dir.joinpath("data") + + # Transform the method_info.json file + method_info_path = data_path.joinpath("method_info.json") + print("Reading method json", flush=True) + method_info = read_json_file(method_info_path) + print("processing method json", flush=True) + for info in method_info: + info["code_url"] = info["code_url"] + "/tree/v1.0.0/openproblems/tasks" + info["implementation_url"] = info["implementation_url"].replace("main", "v1.0.0") + info["code_version"] = "v1.0.0" + info["image"] = "https://github.com/openproblems-bio/openproblems/pkgs/container/" + info["image"] + + print("Writing method json", flush=True) + write_json_file(method_info, method_info_path) + + print("Reading metric json", flush=True) + metric_info_path = data_path.joinpath("metric_info.json") + metric_info = read_json_file(metric_info_path) + print("processing metric json", flush=True) + for info in metric_info: + info["implementation_url"] = info["implementation_url"].replace("main", "v1.0.0") + info["code_version"] = "v1.0.0" + info["image"] = "https://github.com/openproblems-bio/openproblems/pkgs/container/" + info["image"] + + print("Writing metric json", flush=True) + write_json_file(metric_info, metric_info_path) + diff --git a/results/batch_integration_embed/data/method_info.json b/results/batch_integration_embed/data/method_info.json index ac2307464..344519a08 100644 --- a/results/batch_integration_embed/data/method_info.json +++ b/results/batch_integration_embed/data/method_info.json @@ -1,602 +1,602 @@ [ - { - "method_name": "Random Integration by Batch", - "method_summary": "Feature values, embedding coordinates, and graph connectivity are all randomly permuted within each batch label", - "paper_name": "Open Problems for Single Cell Analysis", - "paper_reference": "openproblems", - "paper_year": 2022, - "code_url": "https://github.com/openproblems-bio/openproblems", - "image": "openproblems", - "is_baseline": true, - "code_version": null, - "task_id": "batch_integration_embed", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "batch_random_integration", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/_common/methods/baseline.py" - }, - { - "method_name": "Random Embedding by Celltype", - "method_summary": "Cells are embedded as a one-hot encoding of celltype labels", - "paper_name": "Open Problems for Single Cell Analysis", - "paper_reference": "openproblems", - "paper_year": 2022, - "code_url": "https://github.com/openproblems-bio/openproblems", - "image": "openproblems", - "is_baseline": true, - "code_version": null, - "task_id": "batch_integration_embed", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "celltype_random_embedding", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_embed/methods/baseline.py" - }, - { - "method_name": "Random Embedding by Celltype (with jitter)", - "method_summary": "Cells are embedded as a one-hot encoding of celltype labels, with a small amount of random noise added to the embedding", - "paper_name": "Open Problems for Single Cell Analysis", - "paper_reference": "openproblems", - "paper_year": 2022, - "code_url": "https://github.com/openproblems-bio/openproblems", - "image": "openproblems", - "is_baseline": true, - "code_version": null, - "task_id": "batch_integration_embed", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "celltype_random_embedding_jitter", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_embed/methods/baseline.py" - }, - { - "method_name": "Random Graph by Celltype", - "method_summary": "Cells are embedded as a one-hot encoding of celltype labels. A graph is then built on this embedding", - "paper_name": "Open Problems for Single Cell Analysis", - "paper_reference": "openproblems", - "paper_year": 2022, - "code_url": "https://github.com/openproblems-bio/openproblems", - "image": "openproblems", - "is_baseline": true, - "code_version": null, - "task_id": "batch_integration_embed", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "celltype_random_graph", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/baseline.py" - }, - { - "method_name": "Random Integration by Celltype", - "method_summary": "Feature values, embedding coordinates, and graph connectivity are all randomly permuted within each celltype label", - "paper_name": "Open Problems for Single Cell Analysis", - "paper_reference": "openproblems", - "paper_year": 2022, - "code_url": "https://github.com/openproblems-bio/openproblems", - "image": "openproblems", - "is_baseline": true, - "code_version": null, - "task_id": "batch_integration_embed", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "celltype_random_integration", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/_common/methods/baseline.py" - }, - { - "method_name": "Combat (full/scaled)", - "method_summary": "ComBat uses an Empirical Bayes (EB) approach to correct for batch effects. It estimates batch-specific parameters by pooling information across genes in each batch and shrinks the estimates towards the overall mean of the batch effect estimates across all genes. These parameters are then used to adjust the data for batch effects, leading to more accurate and reproducible results.", - "paper_name": "Adjusting batch effects in microarray expression data using empirical Bayes methods", - "paper_reference": "hansen2012removing", - "paper_year": 2007, - "code_url": "https://scanpy.readthedocs.io/en/stable/api/scanpy.pp.combat.html", - "image": "openproblems-r-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_embed", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "combat_full_scaled", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/combat.py" - }, - { - "method_name": "Combat (full/unscaled)", - "method_summary": "ComBat uses an Empirical Bayes (EB) approach to correct for batch effects. It estimates batch-specific parameters by pooling information across genes in each batch and shrinks the estimates towards the overall mean of the batch effect estimates across all genes. These parameters are then used to adjust the data for batch effects, leading to more accurate and reproducible results.", - "paper_name": "Adjusting batch effects in microarray expression data using empirical Bayes methods", - "paper_reference": "hansen2012removing", - "paper_year": 2007, - "code_url": "https://scanpy.readthedocs.io/en/stable/api/scanpy.pp.combat.html", - "image": "openproblems-r-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_embed", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "combat_full_unscaled", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/combat.py" - }, - { - "method_name": "Combat (hvg/scaled)", - "method_summary": "ComBat uses an Empirical Bayes (EB) approach to correct for batch effects. It estimates batch-specific parameters by pooling information across genes in each batch and shrinks the estimates towards the overall mean of the batch effect estimates across all genes. These parameters are then used to adjust the data for batch effects, leading to more accurate and reproducible results.", - "paper_name": "Adjusting batch effects in microarray expression data using empirical Bayes methods", - "paper_reference": "hansen2012removing", - "paper_year": 2007, - "code_url": "https://scanpy.readthedocs.io/en/stable/api/scanpy.pp.combat.html", - "image": "openproblems-r-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_embed", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "combat_hvg_scaled", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/combat.py" - }, - { - "method_name": "Combat (hvg/unscaled)", - "method_summary": "ComBat uses an Empirical Bayes (EB) approach to correct for batch effects. It estimates batch-specific parameters by pooling information across genes in each batch and shrinks the estimates towards the overall mean of the batch effect estimates across all genes. These parameters are then used to adjust the data for batch effects, leading to more accurate and reproducible results.", - "paper_name": "Adjusting batch effects in microarray expression data using empirical Bayes methods", - "paper_reference": "hansen2012removing", - "paper_year": 2007, - "code_url": "https://scanpy.readthedocs.io/en/stable/api/scanpy.pp.combat.html", - "image": "openproblems-r-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_embed", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "combat_hvg_unscaled", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/combat.py" - }, - { - "method_name": "FastMNN embed (full/scaled)", - "method_summary": "fastMNN performs a multi-sample PCA to reduce dimensionality, identifying MNN paris in the low-dimensional space, and then correcting the target batch towards the reference using locally weighted correction vectors. The corrected target batch is then merged with the reference. The process is repeated with the next target batch except for the PCA step.", - "paper_name": "A description of the theory behind the fastMNN algorithm", - "paper_reference": "lun2019fastmnn", - "paper_year": 2019, - "code_url": "https://doi.org/doi:10.18129/B9.bioc.batchelor", - "image": "openproblems-r-extras", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_embed", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "fastmnn_embed_full_scaled", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/fastmnn.py" - }, - { - "method_name": "FastMNN embed (full/unscaled)", - "method_summary": "fastMNN performs a multi-sample PCA to reduce dimensionality, identifying MNN paris in the low-dimensional space, and then correcting the target batch towards the reference using locally weighted correction vectors. The corrected target batch is then merged with the reference. The process is repeated with the next target batch except for the PCA step.", - "paper_name": "A description of the theory behind the fastMNN algorithm", - "paper_reference": "lun2019fastmnn", - "paper_year": 2019, - "code_url": "https://doi.org/doi:10.18129/B9.bioc.batchelor", - "image": "openproblems-r-extras", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_embed", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "fastmnn_embed_full_unscaled", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/fastmnn.py" - }, - { - "method_name": "FastMNN embed (hvg/scaled)", - "method_summary": "fastMNN performs a multi-sample PCA to reduce dimensionality, identifying MNN paris in the low-dimensional space, and then correcting the target batch towards the reference using locally weighted correction vectors. The corrected target batch is then merged with the reference. The process is repeated with the next target batch except for the PCA step.", - "paper_name": "A description of the theory behind the fastMNN algorithm", - "paper_reference": "lun2019fastmnn", - "paper_year": 2019, - "code_url": "https://doi.org/doi:10.18129/B9.bioc.batchelor", - "image": "openproblems-r-extras", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_embed", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "fastmnn_embed_hvg_scaled", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/fastmnn.py" - }, - { - "method_name": "FastMNN embed (hvg/unscaled)", - "method_summary": "fastMNN performs a multi-sample PCA to reduce dimensionality, identifying MNN paris in the low-dimensional space, and then correcting the target batch towards the reference using locally weighted correction vectors. The corrected target batch is then merged with the reference. The process is repeated with the next target batch except for the PCA step.", - "paper_name": "A description of the theory behind the fastMNN algorithm", - "paper_reference": "lun2019fastmnn", - "paper_year": 2019, - "code_url": "https://doi.org/doi:10.18129/B9.bioc.batchelor", - "image": "openproblems-r-extras", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_embed", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "fastmnn_embed_hvg_unscaled", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/fastmnn.py" - }, - { - "method_name": "Harmony (full/scaled)", - "method_summary": "Harmony is a method that uses PCA to group the cells into multi-dataset clusters, and then computes cluster-specific linear correction factors. Each cell is then corrected by its cell-specific linear factor using the cluster-weighted average. The method keeps iterating these four steps until cell clusters are stable.", - "paper_name": "Fast, sensitive and accurate integration of single-cell data with Harmony", - "paper_reference": "korsunsky2019fast", - "paper_year": 2019, - "code_url": "https://github.com/lilab-bcb/harmony-pytorch", - "image": "openproblems-r-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_embed", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "harmony_full_scaled", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/harmony.py" - }, - { - "method_name": "Harmony (full/unscaled)", - "method_summary": "Harmony is a method that uses PCA to group the cells into multi-dataset clusters, and then computes cluster-specific linear correction factors. Each cell is then corrected by its cell-specific linear factor using the cluster-weighted average. The method keeps iterating these four steps until cell clusters are stable.", - "paper_name": "Fast, sensitive and accurate integration of single-cell data with Harmony", - "paper_reference": "korsunsky2019fast", - "paper_year": 2019, - "code_url": "https://github.com/lilab-bcb/harmony-pytorch", - "image": "openproblems-r-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_embed", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "harmony_full_unscaled", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/harmony.py" - }, - { - "method_name": "Harmony (hvg/scaled)", - "method_summary": "Harmony is a method that uses PCA to group the cells into multi-dataset clusters, and then computes cluster-specific linear correction factors. Each cell is then corrected by its cell-specific linear factor using the cluster-weighted average. The method keeps iterating these four steps until cell clusters are stable.", - "paper_name": "Fast, sensitive and accurate integration of single-cell data with Harmony", - "paper_reference": "korsunsky2019fast", - "paper_year": 2019, - "code_url": "https://github.com/lilab-bcb/harmony-pytorch", - "image": "openproblems-r-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_embed", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "harmony_hvg_scaled", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/harmony.py" - }, - { - "method_name": "Harmony (hvg/unscaled)", - "method_summary": "Harmony is a method that uses PCA to group the cells into multi-dataset clusters, and then computes cluster-specific linear correction factors. Each cell is then corrected by its cell-specific linear factor using the cluster-weighted average. The method keeps iterating these four steps until cell clusters are stable.", - "paper_name": "Fast, sensitive and accurate integration of single-cell data with Harmony", - "paper_reference": "korsunsky2019fast", - "paper_year": 2019, - "code_url": "https://github.com/lilab-bcb/harmony-pytorch", - "image": "openproblems-r-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_embed", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "harmony_hvg_unscaled", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/harmony.py" - }, - { - "method_name": "Liger (full/unscaled)", - "method_summary": "LIGER or linked inference of genomic experimental relationships uses iNMF deriving and implementing a novel coordinate descent algorithm to efficiently do the factorization. Joint clustering is performed and factor loadings are normalised.", - "paper_name": "Single-Cell Multi-omic Integration Compares and Contrasts Features of Brain Cell Identity", - "paper_reference": "welch2019single", - "paper_year": 2019, - "code_url": "https://github.com/welch-lab/liger", - "image": "openproblems-r-extras", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_embed", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "liger_full_unscaled", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/liger.py" - }, - { - "method_name": "Liger (hvg/unscaled)", - "method_summary": "LIGER or linked inference of genomic experimental relationships uses iNMF deriving and implementing a novel coordinate descent algorithm to efficiently do the factorization. Joint clustering is performed and factor loadings are normalised.", - "paper_name": "Single-Cell Multi-omic Integration Compares and Contrasts Features of Brain Cell Identity", - "paper_reference": "welch2019single", - "paper_year": 2019, - "code_url": "https://github.com/welch-lab/liger", - "image": "openproblems-r-extras", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_embed", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "liger_hvg_unscaled", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/liger.py" - }, - { - "method_name": "MNN (full/scaled)", - "method_summary": "MNN first detects mutual nearest neighbours in two of the batches and infers a projection of the second onto the first batch. After that, additional batches are added iteratively.", - "paper_name": "Batch effects in single-cell RNA-sequencing data are corrected by matching mutual nearest neighbors", - "paper_reference": "haghverdi2018batch", - "paper_year": 2018, - "code_url": "https://github.com/chriscainx/mnnpy", - "image": "openproblems-r-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_embed", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "mnn_full_scaled", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/mnn.py" - }, - { - "method_name": "MNN (full/unscaled)", - "method_summary": "MNN first detects mutual nearest neighbours in two of the batches and infers a projection of the second onto the first batch. After that, additional batches are added iteratively.", - "paper_name": "Batch effects in single-cell RNA-sequencing data are corrected by matching mutual nearest neighbors", - "paper_reference": "haghverdi2018batch", - "paper_year": 2018, - "code_url": "https://github.com/chriscainx/mnnpy", - "image": "openproblems-r-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_embed", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "mnn_full_unscaled", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/mnn.py" - }, - { - "method_name": "MNN (hvg/scaled)", - "method_summary": "MNN first detects mutual nearest neighbours in two of the batches and infers a projection of the second onto the first batch. After that, additional batches are added iteratively.", - "paper_name": "Batch effects in single-cell RNA-sequencing data are corrected by matching mutual nearest neighbors", - "paper_reference": "haghverdi2018batch", - "paper_year": 2018, - "code_url": "https://github.com/chriscainx/mnnpy", - "image": "openproblems-r-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_embed", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "mnn_hvg_scaled", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/mnn.py" - }, - { - "method_name": "MNN (hvg/unscaled)", - "method_summary": "MNN first detects mutual nearest neighbours in two of the batches and infers a projection of the second onto the first batch. After that, additional batches are added iteratively.", - "paper_name": "Batch effects in single-cell RNA-sequencing data are corrected by matching mutual nearest neighbors", - "paper_reference": "haghverdi2018batch", - "paper_year": 2018, - "code_url": "https://github.com/chriscainx/mnnpy", - "image": "openproblems-r-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_embed", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "mnn_hvg_unscaled", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/mnn.py" - }, - { - "method_name": "No Integration", - "method_summary": "Cells are embedded by PCA on the unintegrated data. A graph is built on this PCA embedding.", - "paper_name": "Open Problems for Single Cell Analysis", - "paper_reference": "openproblems", - "paper_year": 2022, - "code_url": "https://github.com/openproblems-bio/openproblems", - "image": "openproblems", - "is_baseline": true, - "code_version": null, - "task_id": "batch_integration_embed", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "no_integration", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/_common/methods/baseline.py" - }, - { - "method_name": "No Integration by Batch", - "method_summary": "Cells are embedded by computing PCA independently on each batch", - "paper_name": "Open Problems for Single Cell Analysis", - "paper_reference": "openproblems", - "paper_year": 2022, - "code_url": "https://github.com/openproblems-bio/openproblems", - "image": "openproblems", - "is_baseline": true, - "code_version": null, - "task_id": "batch_integration_embed", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "no_integration_batch", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_embed/methods/baseline.py" - }, - { - "method_name": "Random Integration", - "method_summary": "Feature values, embedding coordinates, and graph connectivity are all randomly permuted", - "paper_name": "Open Problems for Single Cell Analysis", - "paper_reference": "openproblems", - "paper_year": 2022, - "code_url": "https://github.com/openproblems-bio/openproblems", - "image": "openproblems", - "is_baseline": true, - "code_version": null, - "task_id": "batch_integration_embed", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "random_integration", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/_common/methods/baseline.py" - }, - { - "method_name": "SCALEX (full)", - "method_summary": "SCALEX is a method for integrating heterogeneous single-cell data online using a VAE framework. Its generalised encoder disentangles batch-related components from batch-invariant biological components, which are then projected into a common cell-embedding space.", - "paper_name": "Online single-cell data integration through projecting heterogeneous datasets into a common cell-embedding space", - "paper_reference": "xiong2021online", - "paper_year": 2022, - "code_url": "https://github.com/jsxlei/SCALEX", - "image": "openproblems-python-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_embed", - "commit_sha": "7455e35cbee06267e6a5f977e020a816f98168f5", - "method_id": "scalex_full", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_embed/methods/scalex.py" - }, - { - "method_name": "SCALEX (hvg)", - "method_summary": "SCALEX is a method for integrating heterogeneous single-cell data online using a VAE framework. Its generalised encoder disentangles batch-related components from batch-invariant biological components, which are then projected into a common cell-embedding space.", - "paper_name": "Online single-cell data integration through projecting heterogeneous datasets into a common cell-embedding space", - "paper_reference": "xiong2021online", - "paper_year": 2022, - "code_url": "https://github.com/jsxlei/SCALEX", - "image": "openproblems-python-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_embed", - "commit_sha": "7455e35cbee06267e6a5f977e020a816f98168f5", - "method_id": "scalex_hvg", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_embed/methods/scalex.py" - }, - { - "method_name": "Scanorama (full/scaled)", - "method_summary": "Scanorama is an extension of the MNN method. Other then MNN, it finds mutual nearest neighbours over all batches and embeds observations into a joint hyperplane.", - "paper_name": "Efficient integration of heterogeneous single-cell transcriptomes using Scanorama", - "paper_reference": "hie2019efficient", - "paper_year": 2019, - "code_url": "https://github.com/brianhie/scanorama", - "image": "openproblems-r-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_embed", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "scanorama_embed_full_scaled", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/scanorama.py" - }, - { - "method_name": "Scanorama (full/unscaled)", - "method_summary": "Scanorama is an extension of the MNN method. Other then MNN, it finds mutual nearest neighbours over all batches and embeds observations into a joint hyperplane.", - "paper_name": "Efficient integration of heterogeneous single-cell transcriptomes using Scanorama", - "paper_reference": "hie2019efficient", - "paper_year": 2019, - "code_url": "https://github.com/brianhie/scanorama", - "image": "openproblems-r-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_embed", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "scanorama_embed_full_unscaled", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/scanorama.py" - }, - { - "method_name": "Scanorama (hvg/scaled)", - "method_summary": "Scanorama is an extension of the MNN method. Other then MNN, it finds mutual nearest neighbours over all batches and embeds observations into a joint hyperplane.", - "paper_name": "Efficient integration of heterogeneous single-cell transcriptomes using Scanorama", - "paper_reference": "hie2019efficient", - "paper_year": 2019, - "code_url": "https://github.com/brianhie/scanorama", - "image": "openproblems-r-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_embed", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "scanorama_embed_hvg_scaled", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/scanorama.py" - }, - { - "method_name": "Scanorama (hvg/unscaled)", - "method_summary": "Scanorama is an extension of the MNN method. Other then MNN, it finds mutual nearest neighbours over all batches and embeds observations into a joint hyperplane.", - "paper_name": "Efficient integration of heterogeneous single-cell transcriptomes using Scanorama", - "paper_reference": "hie2019efficient", - "paper_year": 2019, - "code_url": "https://github.com/brianhie/scanorama", - "image": "openproblems-r-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_embed", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "scanorama_embed_hvg_unscaled", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/scanorama.py" - }, - { - "method_name": "Scanorama gene output (full/scaled)", - "method_summary": "Scanorama is an extension of the MNN method. Other then MNN, it finds mutual nearest neighbours over all batches and embeds observations into a joint hyperplane.", - "paper_name": "Efficient integration of heterogeneous single-cell transcriptomes using Scanorama", - "paper_reference": "hie2019efficient", - "paper_year": 2019, - "code_url": "https://github.com/brianhie/scanorama", - "image": "openproblems-r-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_embed", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "scanorama_feature_full_scaled", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/scanorama.py" - }, - { - "method_name": "Scanorama gene output (full/unscaled)", - "method_summary": "Scanorama is an extension of the MNN method. Other then MNN, it finds mutual nearest neighbours over all batches and embeds observations into a joint hyperplane.", - "paper_name": "Efficient integration of heterogeneous single-cell transcriptomes using Scanorama", - "paper_reference": "hie2019efficient", - "paper_year": 2019, - "code_url": "https://github.com/brianhie/scanorama", - "image": "openproblems-r-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_embed", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "scanorama_feature_full_unscaled", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/scanorama.py" - }, - { - "method_name": "Scanorama gene output (hvg/scaled)", - "method_summary": "Scanorama is an extension of the MNN method. Other then MNN, it finds mutual nearest neighbours over all batches and embeds observations into a joint hyperplane.", - "paper_name": "Efficient integration of heterogeneous single-cell transcriptomes using Scanorama", - "paper_reference": "hie2019efficient", - "paper_year": 2019, - "code_url": "https://github.com/brianhie/scanorama", - "image": "openproblems-r-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_embed", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "scanorama_feature_hvg_scaled", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/scanorama.py" - }, - { - "method_name": "Scanorama gene output (hvg/unscaled)", - "method_summary": "Scanorama is an extension of the MNN method. Other then MNN, it finds mutual nearest neighbours over all batches and embeds observations into a joint hyperplane.", - "paper_name": "Efficient integration of heterogeneous single-cell transcriptomes using Scanorama", - "paper_reference": "hie2019efficient", - "paper_year": 2019, - "code_url": "https://github.com/brianhie/scanorama", - "image": "openproblems-r-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_embed", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "scanorama_feature_hvg_unscaled", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/scanorama.py" - }, - { - "method_name": "scANVI (full/unscaled)", - "method_summary": "ScanVI is an extension of scVI but instead using a Bayesian semi-supervised approach for more principled cell annotation.", - "paper_name": "Probabilistic harmonization and annotation of single\u2010cell transcriptomics data with deep generative models", - "paper_reference": "xu2021probabilistic", - "paper_year": 2021, - "code_url": "https://github.com/YosefLab/scvi-tools", - "image": "openproblems-r-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_embed", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "scanvi_full_unscaled", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/scanvi.py" - }, - { - "method_name": "scANVI (hvg/unscaled)", - "method_summary": "ScanVI is an extension of scVI but instead using a Bayesian semi-supervised approach for more principled cell annotation.", - "paper_name": "Probabilistic harmonization and annotation of single\u2010cell transcriptomics data with deep generative models", - "paper_reference": "xu2021probabilistic", - "paper_year": 2021, - "code_url": "https://github.com/YosefLab/scvi-tools", - "image": "openproblems-r-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_embed", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "scanvi_hvg_unscaled", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/scanvi.py" - }, - { - "method_name": "scVI (full/unscaled)", - "method_summary": "scVI combines a variational autoencoder with a hierarchical Bayesian model.", - "paper_name": "Deep generative modeling for single-cell transcriptomics", - "paper_reference": "lopez2018deep", - "paper_year": 2018, - "code_url": "https://github.com/YosefLab/scvi-tools", - "image": "openproblems-r-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_embed", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "scvi_full_unscaled", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/scvi.py" - }, - { - "method_name": "scVI (hvg/unscaled)", - "method_summary": "scVI combines a variational autoencoder with a hierarchical Bayesian model.", - "paper_name": "Deep generative modeling for single-cell transcriptomics", - "paper_reference": "lopez2018deep", - "paper_year": 2018, - "code_url": "https://github.com/YosefLab/scvi-tools", - "image": "openproblems-r-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_embed", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "scvi_hvg_unscaled", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/scvi.py" - } + { + "method_name": "Random Integration by Batch", + "method_summary": "Feature values, embedding coordinates, and graph connectivity are all randomly permuted within each batch label", + "paper_name": "Open Problems for Single Cell Analysis", + "paper_reference": "openproblems", + "paper_year": 2022, + "code_url": "https://github.com/openproblems-bio/openproblems/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems", + "is_baseline": true, + "code_version": "v1.0.0", + "task_id": "batch_integration_embed", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "batch_random_integration", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/_common/methods/baseline.py" + }, + { + "method_name": "Random Embedding by Celltype", + "method_summary": "Cells are embedded as a one-hot encoding of celltype labels", + "paper_name": "Open Problems for Single Cell Analysis", + "paper_reference": "openproblems", + "paper_year": 2022, + "code_url": "https://github.com/openproblems-bio/openproblems/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems", + "is_baseline": true, + "code_version": "v1.0.0", + "task_id": "batch_integration_embed", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "celltype_random_embedding", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_embed/methods/baseline.py" + }, + { + "method_name": "Random Embedding by Celltype (with jitter)", + "method_summary": "Cells are embedded as a one-hot encoding of celltype labels, with a small amount of random noise added to the embedding", + "paper_name": "Open Problems for Single Cell Analysis", + "paper_reference": "openproblems", + "paper_year": 2022, + "code_url": "https://github.com/openproblems-bio/openproblems/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems", + "is_baseline": true, + "code_version": "v1.0.0", + "task_id": "batch_integration_embed", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "celltype_random_embedding_jitter", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_embed/methods/baseline.py" + }, + { + "method_name": "Random Graph by Celltype", + "method_summary": "Cells are embedded as a one-hot encoding of celltype labels. A graph is then built on this embedding", + "paper_name": "Open Problems for Single Cell Analysis", + "paper_reference": "openproblems", + "paper_year": 2022, + "code_url": "https://github.com/openproblems-bio/openproblems/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems", + "is_baseline": true, + "code_version": "v1.0.0", + "task_id": "batch_integration_embed", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "celltype_random_graph", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/baseline.py" + }, + { + "method_name": "Random Integration by Celltype", + "method_summary": "Feature values, embedding coordinates, and graph connectivity are all randomly permuted within each celltype label", + "paper_name": "Open Problems for Single Cell Analysis", + "paper_reference": "openproblems", + "paper_year": 2022, + "code_url": "https://github.com/openproblems-bio/openproblems/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems", + "is_baseline": true, + "code_version": "v1.0.0", + "task_id": "batch_integration_embed", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "celltype_random_integration", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/_common/methods/baseline.py" + }, + { + "method_name": "Combat (full/scaled)", + "method_summary": "ComBat uses an Empirical Bayes (EB) approach to correct for batch effects. It estimates batch-specific parameters by pooling information across genes in each batch and shrinks the estimates towards the overall mean of the batch effect estimates across all genes. These parameters are then used to adjust the data for batch effects, leading to more accurate and reproducible results.", + "paper_name": "Adjusting batch effects in microarray expression data using empirical Bayes methods", + "paper_reference": "hansen2012removing", + "paper_year": 2007, + "code_url": "https://scanpy.readthedocs.io/en/stable/api/scanpy.pp.combat.html/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_embed", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "combat_full_scaled", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/combat.py" + }, + { + "method_name": "Combat (full/unscaled)", + "method_summary": "ComBat uses an Empirical Bayes (EB) approach to correct for batch effects. It estimates batch-specific parameters by pooling information across genes in each batch and shrinks the estimates towards the overall mean of the batch effect estimates across all genes. These parameters are then used to adjust the data for batch effects, leading to more accurate and reproducible results.", + "paper_name": "Adjusting batch effects in microarray expression data using empirical Bayes methods", + "paper_reference": "hansen2012removing", + "paper_year": 2007, + "code_url": "https://scanpy.readthedocs.io/en/stable/api/scanpy.pp.combat.html/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_embed", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "combat_full_unscaled", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/combat.py" + }, + { + "method_name": "Combat (hvg/scaled)", + "method_summary": "ComBat uses an Empirical Bayes (EB) approach to correct for batch effects. It estimates batch-specific parameters by pooling information across genes in each batch and shrinks the estimates towards the overall mean of the batch effect estimates across all genes. These parameters are then used to adjust the data for batch effects, leading to more accurate and reproducible results.", + "paper_name": "Adjusting batch effects in microarray expression data using empirical Bayes methods", + "paper_reference": "hansen2012removing", + "paper_year": 2007, + "code_url": "https://scanpy.readthedocs.io/en/stable/api/scanpy.pp.combat.html/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_embed", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "combat_hvg_scaled", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/combat.py" + }, + { + "method_name": "Combat (hvg/unscaled)", + "method_summary": "ComBat uses an Empirical Bayes (EB) approach to correct for batch effects. It estimates batch-specific parameters by pooling information across genes in each batch and shrinks the estimates towards the overall mean of the batch effect estimates across all genes. These parameters are then used to adjust the data for batch effects, leading to more accurate and reproducible results.", + "paper_name": "Adjusting batch effects in microarray expression data using empirical Bayes methods", + "paper_reference": "hansen2012removing", + "paper_year": 2007, + "code_url": "https://scanpy.readthedocs.io/en/stable/api/scanpy.pp.combat.html/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_embed", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "combat_hvg_unscaled", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/combat.py" + }, + { + "method_name": "FastMNN embed (full/scaled)", + "method_summary": "fastMNN performs a multi-sample PCA to reduce dimensionality, identifying MNN paris in the low-dimensional space, and then correcting the target batch towards the reference using locally weighted correction vectors. The corrected target batch is then merged with the reference. The process is repeated with the next target batch except for the PCA step.", + "paper_name": "A description of the theory behind the fastMNN algorithm", + "paper_reference": "lun2019fastmnn", + "paper_year": 2019, + "code_url": "https://doi.org/doi:10.18129/B9.bioc.batchelor/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-extras", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_embed", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "fastmnn_embed_full_scaled", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/fastmnn.py" + }, + { + "method_name": "FastMNN embed (full/unscaled)", + "method_summary": "fastMNN performs a multi-sample PCA to reduce dimensionality, identifying MNN paris in the low-dimensional space, and then correcting the target batch towards the reference using locally weighted correction vectors. The corrected target batch is then merged with the reference. The process is repeated with the next target batch except for the PCA step.", + "paper_name": "A description of the theory behind the fastMNN algorithm", + "paper_reference": "lun2019fastmnn", + "paper_year": 2019, + "code_url": "https://doi.org/doi:10.18129/B9.bioc.batchelor/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-extras", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_embed", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "fastmnn_embed_full_unscaled", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/fastmnn.py" + }, + { + "method_name": "FastMNN embed (hvg/scaled)", + "method_summary": "fastMNN performs a multi-sample PCA to reduce dimensionality, identifying MNN paris in the low-dimensional space, and then correcting the target batch towards the reference using locally weighted correction vectors. The corrected target batch is then merged with the reference. The process is repeated with the next target batch except for the PCA step.", + "paper_name": "A description of the theory behind the fastMNN algorithm", + "paper_reference": "lun2019fastmnn", + "paper_year": 2019, + "code_url": "https://doi.org/doi:10.18129/B9.bioc.batchelor/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-extras", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_embed", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "fastmnn_embed_hvg_scaled", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/fastmnn.py" + }, + { + "method_name": "FastMNN embed (hvg/unscaled)", + "method_summary": "fastMNN performs a multi-sample PCA to reduce dimensionality, identifying MNN paris in the low-dimensional space, and then correcting the target batch towards the reference using locally weighted correction vectors. The corrected target batch is then merged with the reference. The process is repeated with the next target batch except for the PCA step.", + "paper_name": "A description of the theory behind the fastMNN algorithm", + "paper_reference": "lun2019fastmnn", + "paper_year": 2019, + "code_url": "https://doi.org/doi:10.18129/B9.bioc.batchelor/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-extras", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_embed", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "fastmnn_embed_hvg_unscaled", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/fastmnn.py" + }, + { + "method_name": "Harmony (full/scaled)", + "method_summary": "Harmony is a method that uses PCA to group the cells into multi-dataset clusters, and then computes cluster-specific linear correction factors. Each cell is then corrected by its cell-specific linear factor using the cluster-weighted average. The method keeps iterating these four steps until cell clusters are stable.", + "paper_name": "Fast, sensitive and accurate integration of single-cell data with Harmony", + "paper_reference": "korsunsky2019fast", + "paper_year": 2019, + "code_url": "https://github.com/lilab-bcb/harmony-pytorch/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_embed", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "harmony_full_scaled", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/harmony.py" + }, + { + "method_name": "Harmony (full/unscaled)", + "method_summary": "Harmony is a method that uses PCA to group the cells into multi-dataset clusters, and then computes cluster-specific linear correction factors. Each cell is then corrected by its cell-specific linear factor using the cluster-weighted average. The method keeps iterating these four steps until cell clusters are stable.", + "paper_name": "Fast, sensitive and accurate integration of single-cell data with Harmony", + "paper_reference": "korsunsky2019fast", + "paper_year": 2019, + "code_url": "https://github.com/lilab-bcb/harmony-pytorch/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_embed", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "harmony_full_unscaled", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/harmony.py" + }, + { + "method_name": "Harmony (hvg/scaled)", + "method_summary": "Harmony is a method that uses PCA to group the cells into multi-dataset clusters, and then computes cluster-specific linear correction factors. Each cell is then corrected by its cell-specific linear factor using the cluster-weighted average. The method keeps iterating these four steps until cell clusters are stable.", + "paper_name": "Fast, sensitive and accurate integration of single-cell data with Harmony", + "paper_reference": "korsunsky2019fast", + "paper_year": 2019, + "code_url": "https://github.com/lilab-bcb/harmony-pytorch/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_embed", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "harmony_hvg_scaled", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/harmony.py" + }, + { + "method_name": "Harmony (hvg/unscaled)", + "method_summary": "Harmony is a method that uses PCA to group the cells into multi-dataset clusters, and then computes cluster-specific linear correction factors. Each cell is then corrected by its cell-specific linear factor using the cluster-weighted average. The method keeps iterating these four steps until cell clusters are stable.", + "paper_name": "Fast, sensitive and accurate integration of single-cell data with Harmony", + "paper_reference": "korsunsky2019fast", + "paper_year": 2019, + "code_url": "https://github.com/lilab-bcb/harmony-pytorch/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_embed", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "harmony_hvg_unscaled", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/harmony.py" + }, + { + "method_name": "Liger (full/unscaled)", + "method_summary": "LIGER or linked inference of genomic experimental relationships uses iNMF deriving and implementing a novel coordinate descent algorithm to efficiently do the factorization. Joint clustering is performed and factor loadings are normalised.", + "paper_name": "Single-Cell Multi-omic Integration Compares and Contrasts Features of Brain Cell Identity", + "paper_reference": "welch2019single", + "paper_year": 2019, + "code_url": "https://github.com/welch-lab/liger/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-extras", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_embed", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "liger_full_unscaled", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/liger.py" + }, + { + "method_name": "Liger (hvg/unscaled)", + "method_summary": "LIGER or linked inference of genomic experimental relationships uses iNMF deriving and implementing a novel coordinate descent algorithm to efficiently do the factorization. Joint clustering is performed and factor loadings are normalised.", + "paper_name": "Single-Cell Multi-omic Integration Compares and Contrasts Features of Brain Cell Identity", + "paper_reference": "welch2019single", + "paper_year": 2019, + "code_url": "https://github.com/welch-lab/liger/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-extras", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_embed", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "liger_hvg_unscaled", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/liger.py" + }, + { + "method_name": "MNN (full/scaled)", + "method_summary": "MNN first detects mutual nearest neighbours in two of the batches and infers a projection of the second onto the first batch. After that, additional batches are added iteratively.", + "paper_name": "Batch effects in single-cell RNA-sequencing data are corrected by matching mutual nearest neighbors", + "paper_reference": "haghverdi2018batch", + "paper_year": 2018, + "code_url": "https://github.com/chriscainx/mnnpy/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_embed", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "mnn_full_scaled", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/mnn.py" + }, + { + "method_name": "MNN (full/unscaled)", + "method_summary": "MNN first detects mutual nearest neighbours in two of the batches and infers a projection of the second onto the first batch. After that, additional batches are added iteratively.", + "paper_name": "Batch effects in single-cell RNA-sequencing data are corrected by matching mutual nearest neighbors", + "paper_reference": "haghverdi2018batch", + "paper_year": 2018, + "code_url": "https://github.com/chriscainx/mnnpy/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_embed", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "mnn_full_unscaled", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/mnn.py" + }, + { + "method_name": "MNN (hvg/scaled)", + "method_summary": "MNN first detects mutual nearest neighbours in two of the batches and infers a projection of the second onto the first batch. After that, additional batches are added iteratively.", + "paper_name": "Batch effects in single-cell RNA-sequencing data are corrected by matching mutual nearest neighbors", + "paper_reference": "haghverdi2018batch", + "paper_year": 2018, + "code_url": "https://github.com/chriscainx/mnnpy/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_embed", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "mnn_hvg_scaled", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/mnn.py" + }, + { + "method_name": "MNN (hvg/unscaled)", + "method_summary": "MNN first detects mutual nearest neighbours in two of the batches and infers a projection of the second onto the first batch. After that, additional batches are added iteratively.", + "paper_name": "Batch effects in single-cell RNA-sequencing data are corrected by matching mutual nearest neighbors", + "paper_reference": "haghverdi2018batch", + "paper_year": 2018, + "code_url": "https://github.com/chriscainx/mnnpy/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_embed", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "mnn_hvg_unscaled", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/mnn.py" + }, + { + "method_name": "No Integration", + "method_summary": "Cells are embedded by PCA on the unintegrated data. A graph is built on this PCA embedding.", + "paper_name": "Open Problems for Single Cell Analysis", + "paper_reference": "openproblems", + "paper_year": 2022, + "code_url": "https://github.com/openproblems-bio/openproblems/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems", + "is_baseline": true, + "code_version": "v1.0.0", + "task_id": "batch_integration_embed", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "no_integration", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/_common/methods/baseline.py" + }, + { + "method_name": "No Integration by Batch", + "method_summary": "Cells are embedded by computing PCA independently on each batch", + "paper_name": "Open Problems for Single Cell Analysis", + "paper_reference": "openproblems", + "paper_year": 2022, + "code_url": "https://github.com/openproblems-bio/openproblems/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems", + "is_baseline": true, + "code_version": "v1.0.0", + "task_id": "batch_integration_embed", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "no_integration_batch", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_embed/methods/baseline.py" + }, + { + "method_name": "Random Integration", + "method_summary": "Feature values, embedding coordinates, and graph connectivity are all randomly permuted", + "paper_name": "Open Problems for Single Cell Analysis", + "paper_reference": "openproblems", + "paper_year": 2022, + "code_url": "https://github.com/openproblems-bio/openproblems/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems", + "is_baseline": true, + "code_version": "v1.0.0", + "task_id": "batch_integration_embed", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "random_integration", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/_common/methods/baseline.py" + }, + { + "method_name": "SCALEX (full)", + "method_summary": "SCALEX is a method for integrating heterogeneous single-cell data online using a VAE framework. Its generalised encoder disentangles batch-related components from batch-invariant biological components, which are then projected into a common cell-embedding space.", + "paper_name": "Online single-cell data integration through projecting heterogeneous datasets into a common cell-embedding space", + "paper_reference": "xiong2021online", + "paper_year": 2022, + "code_url": "https://github.com/jsxlei/SCALEX/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-python-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_embed", + "commit_sha": "7455e35cbee06267e6a5f977e020a816f98168f5", + "method_id": "scalex_full", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_embed/methods/scalex.py" + }, + { + "method_name": "SCALEX (hvg)", + "method_summary": "SCALEX is a method for integrating heterogeneous single-cell data online using a VAE framework. Its generalised encoder disentangles batch-related components from batch-invariant biological components, which are then projected into a common cell-embedding space.", + "paper_name": "Online single-cell data integration through projecting heterogeneous datasets into a common cell-embedding space", + "paper_reference": "xiong2021online", + "paper_year": 2022, + "code_url": "https://github.com/jsxlei/SCALEX/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-python-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_embed", + "commit_sha": "7455e35cbee06267e6a5f977e020a816f98168f5", + "method_id": "scalex_hvg", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_embed/methods/scalex.py" + }, + { + "method_name": "Scanorama (full/scaled)", + "method_summary": "Scanorama is an extension of the MNN method. Other then MNN, it finds mutual nearest neighbours over all batches and embeds observations into a joint hyperplane.", + "paper_name": "Efficient integration of heterogeneous single-cell transcriptomes using Scanorama", + "paper_reference": "hie2019efficient", + "paper_year": 2019, + "code_url": "https://github.com/brianhie/scanorama/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_embed", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "scanorama_embed_full_scaled", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/scanorama.py" + }, + { + "method_name": "Scanorama (full/unscaled)", + "method_summary": "Scanorama is an extension of the MNN method. Other then MNN, it finds mutual nearest neighbours over all batches and embeds observations into a joint hyperplane.", + "paper_name": "Efficient integration of heterogeneous single-cell transcriptomes using Scanorama", + "paper_reference": "hie2019efficient", + "paper_year": 2019, + "code_url": "https://github.com/brianhie/scanorama/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_embed", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "scanorama_embed_full_unscaled", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/scanorama.py" + }, + { + "method_name": "Scanorama (hvg/scaled)", + "method_summary": "Scanorama is an extension of the MNN method. Other then MNN, it finds mutual nearest neighbours over all batches and embeds observations into a joint hyperplane.", + "paper_name": "Efficient integration of heterogeneous single-cell transcriptomes using Scanorama", + "paper_reference": "hie2019efficient", + "paper_year": 2019, + "code_url": "https://github.com/brianhie/scanorama/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_embed", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "scanorama_embed_hvg_scaled", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/scanorama.py" + }, + { + "method_name": "Scanorama (hvg/unscaled)", + "method_summary": "Scanorama is an extension of the MNN method. Other then MNN, it finds mutual nearest neighbours over all batches and embeds observations into a joint hyperplane.", + "paper_name": "Efficient integration of heterogeneous single-cell transcriptomes using Scanorama", + "paper_reference": "hie2019efficient", + "paper_year": 2019, + "code_url": "https://github.com/brianhie/scanorama/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_embed", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "scanorama_embed_hvg_unscaled", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/scanorama.py" + }, + { + "method_name": "Scanorama gene output (full/scaled)", + "method_summary": "Scanorama is an extension of the MNN method. Other then MNN, it finds mutual nearest neighbours over all batches and embeds observations into a joint hyperplane.", + "paper_name": "Efficient integration of heterogeneous single-cell transcriptomes using Scanorama", + "paper_reference": "hie2019efficient", + "paper_year": 2019, + "code_url": "https://github.com/brianhie/scanorama/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_embed", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "scanorama_feature_full_scaled", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/scanorama.py" + }, + { + "method_name": "Scanorama gene output (full/unscaled)", + "method_summary": "Scanorama is an extension of the MNN method. Other then MNN, it finds mutual nearest neighbours over all batches and embeds observations into a joint hyperplane.", + "paper_name": "Efficient integration of heterogeneous single-cell transcriptomes using Scanorama", + "paper_reference": "hie2019efficient", + "paper_year": 2019, + "code_url": "https://github.com/brianhie/scanorama/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_embed", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "scanorama_feature_full_unscaled", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/scanorama.py" + }, + { + "method_name": "Scanorama gene output (hvg/scaled)", + "method_summary": "Scanorama is an extension of the MNN method. Other then MNN, it finds mutual nearest neighbours over all batches and embeds observations into a joint hyperplane.", + "paper_name": "Efficient integration of heterogeneous single-cell transcriptomes using Scanorama", + "paper_reference": "hie2019efficient", + "paper_year": 2019, + "code_url": "https://github.com/brianhie/scanorama/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_embed", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "scanorama_feature_hvg_scaled", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/scanorama.py" + }, + { + "method_name": "Scanorama gene output (hvg/unscaled)", + "method_summary": "Scanorama is an extension of the MNN method. Other then MNN, it finds mutual nearest neighbours over all batches and embeds observations into a joint hyperplane.", + "paper_name": "Efficient integration of heterogeneous single-cell transcriptomes using Scanorama", + "paper_reference": "hie2019efficient", + "paper_year": 2019, + "code_url": "https://github.com/brianhie/scanorama/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_embed", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "scanorama_feature_hvg_unscaled", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/scanorama.py" + }, + { + "method_name": "scANVI (full/unscaled)", + "method_summary": "ScanVI is an extension of scVI but instead using a Bayesian semi-supervised approach for more principled cell annotation.", + "paper_name": "Probabilistic harmonization and annotation of single\u2010cell transcriptomics data with deep generative models", + "paper_reference": "xu2021probabilistic", + "paper_year": 2021, + "code_url": "https://github.com/YosefLab/scvi-tools/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_embed", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "scanvi_full_unscaled", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/scanvi.py" + }, + { + "method_name": "scANVI (hvg/unscaled)", + "method_summary": "ScanVI is an extension of scVI but instead using a Bayesian semi-supervised approach for more principled cell annotation.", + "paper_name": "Probabilistic harmonization and annotation of single\u2010cell transcriptomics data with deep generative models", + "paper_reference": "xu2021probabilistic", + "paper_year": 2021, + "code_url": "https://github.com/YosefLab/scvi-tools/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_embed", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "scanvi_hvg_unscaled", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/scanvi.py" + }, + { + "method_name": "scVI (full/unscaled)", + "method_summary": "scVI combines a variational autoencoder with a hierarchical Bayesian model.", + "paper_name": "Deep generative modeling for single-cell transcriptomics", + "paper_reference": "lopez2018deep", + "paper_year": 2018, + "code_url": "https://github.com/YosefLab/scvi-tools/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_embed", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "scvi_full_unscaled", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/scvi.py" + }, + { + "method_name": "scVI (hvg/unscaled)", + "method_summary": "scVI combines a variational autoencoder with a hierarchical Bayesian model.", + "paper_name": "Deep generative modeling for single-cell transcriptomics", + "paper_reference": "lopez2018deep", + "paper_year": 2018, + "code_url": "https://github.com/YosefLab/scvi-tools/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_embed", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "scvi_hvg_unscaled", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/scvi.py" + } ] \ No newline at end of file diff --git a/results/batch_integration_embed/data/metric_info.json b/results/batch_integration_embed/data/metric_info.json index 69ffcb057..ce8874aea 100644 --- a/results/batch_integration_embed/data/metric_info.json +++ b/results/batch_integration_embed/data/metric_info.json @@ -1,112 +1,122 @@ [ - { - "metric_name": "ARI", - "metric_summary": "ARI (Adjusted Rand Index) compares the overlap of two clusterings. It considers both correct clustering overlaps while also counting correct disagreements between two clustering.", - "paper_reference": "luecken2022benchmarking", - "maximize": true, - "image": "openproblems-r-pytorch", - "task_id": "batch_integration_embed", - "commit_sha": "ee7836251c4c6c371471e95eb7aa6a3e9f133b43", - "metric_id": "ari", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_embed/metrics/ari.py" - }, - { - "metric_name": "Cell Cycle Score", - "metric_summary": "The cell-cycle conservation score evaluates how well the cell-cycle effect can be captured before and after integration.", - "paper_reference": "luecken2022benchmarking", - "maximize": true, - "image": "openproblems-r-pytorch", - "task_id": "batch_integration_embed", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "metric_id": "cc_score", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_embed/metrics/cc_score.py" - }, - { - "metric_name": "Graph connectivity", - "metric_summary": "The graph connectivity metric assesses whether the kNN graph representation, G, of the integrated data connects all cells with the same cell identity label.", - "paper_reference": "luecken2022benchmarking", - "maximize": true, - "image": "openproblems-r-pytorch", - "task_id": "batch_integration_embed", - "commit_sha": "ee7836251c4c6c371471e95eb7aa6a3e9f133b43", - "metric_id": "graph_connectivity", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_embed/metrics/graph_connectivity.py" - }, - { - "metric_name": "Isolated label F1", - "metric_summary": "Isolated cell labels are identified as the labels present in the least number of batches in the integration task. The score evaluates how well these isolated labels separate from other cell identities based on clustering.", - "paper_reference": "luecken2022benchmarking", - "maximize": true, - "image": "openproblems-r-pytorch", - "task_id": "batch_integration_embed", - "commit_sha": "ee7836251c4c6c371471e95eb7aa6a3e9f133b43", - "metric_id": "isolated_labels_f1", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_embed/metrics/iso_label_f1.py" - }, - { - "metric_name": "Isolated label Silhouette", - "metric_summary": "This score evaluates the compactness for the label(s) that is(are) shared by fewest batches. It indicates how well rare cell types can be preserved after integration.", - "paper_reference": "luecken2022benchmarking", - "maximize": true, - "image": "openproblems-r-pytorch", - "task_id": "batch_integration_embed", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "metric_id": "isolated_labels_sil", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_embed/metrics/iso_label_sil.py" - }, - { - "metric_name": "kBET", - "metric_summary": "kBET determines whether the label composition of a k nearest neighborhood of a cell is similar to the expected (global) label composition. The test is repeated for a random subset of cells, and the results are summarized as a rejection rate over all tested neighborhoods.", - "paper_reference": "bttner2018test", - "maximize": true, - "image": "openproblems-r-extras", - "task_id": "batch_integration_embed", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "metric_id": "kBET", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_embed/metrics/kBET.py" - }, - { - "metric_name": "NMI", - "metric_summary": "NMI compares the overlap of two clusterings. We used NMI to compare the cell-type labels with Louvain clusters computed on the integrated dataset.", - "paper_reference": "luecken2022benchmarking", - "maximize": true, - "image": "openproblems-r-pytorch", - "task_id": "batch_integration_embed", - "commit_sha": "ee7836251c4c6c371471e95eb7aa6a3e9f133b43", - "metric_id": "nmi", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_embed/metrics/nmi.py" - }, - { - "metric_name": "PC Regression", - "metric_summary": "This compares the explained variance by batch before and after integration. It returns a score between 0 and 1 (scaled=True) with 0 if the variance contribution hasn\u2019t changed. The larger the score, the more different the variance contributions are before and after integration.", - "paper_reference": "luecken2022benchmarking", - "maximize": true, - "image": "openproblems-r-pytorch", - "task_id": "batch_integration_embed", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "metric_id": "pcr", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_embed/metrics/pcr.py" - }, - { - "metric_name": "Silhouette", - "metric_summary": "The absolute silhouette with is computed on cell identity labels, measuring their compactness.", - "paper_reference": "luecken2022benchmarking", - "maximize": true, - "image": "openproblems-r-pytorch", - "task_id": "batch_integration_embed", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "metric_id": "silhouette", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_embed/metrics/silhouette.py" - }, - { - "metric_name": "Batch ASW", - "metric_summary": "The absolute silhouette width is computed over batch labels per cell. As 0 then indicates that batches are well mixed and any deviation from 0 indicates a batch effect, we use the 1-abs(ASW) to map the score to the scale [0;1].", - "paper_reference": "luecken2022benchmarking", - "maximize": true, - "image": "openproblems-r-pytorch", - "task_id": "batch_integration_embed", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "metric_id": "silhouette_batch", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_embed/metrics/sil_batch.py" - } + { + "metric_name": "ARI", + "metric_summary": "ARI (Adjusted Rand Index) compares the overlap of two clusterings. It considers both correct clustering overlaps while also counting correct disagreements between two clustering.", + "paper_reference": "luecken2022benchmarking", + "maximize": true, + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "task_id": "batch_integration_embed", + "commit_sha": "ee7836251c4c6c371471e95eb7aa6a3e9f133b43", + "metric_id": "ari", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_embed/metrics/ari.py", + "code_version": "v1.0.0" + }, + { + "metric_name": "Cell Cycle Score", + "metric_summary": "The cell-cycle conservation score evaluates how well the cell-cycle effect can be captured before and after integration.", + "paper_reference": "luecken2022benchmarking", + "maximize": true, + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "task_id": "batch_integration_embed", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "metric_id": "cc_score", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_embed/metrics/cc_score.py", + "code_version": "v1.0.0" + }, + { + "metric_name": "Graph connectivity", + "metric_summary": "The graph connectivity metric assesses whether the kNN graph representation, G, of the integrated data connects all cells with the same cell identity label.", + "paper_reference": "luecken2022benchmarking", + "maximize": true, + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "task_id": "batch_integration_embed", + "commit_sha": "ee7836251c4c6c371471e95eb7aa6a3e9f133b43", + "metric_id": "graph_connectivity", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_embed/metrics/graph_connectivity.py", + "code_version": "v1.0.0" + }, + { + "metric_name": "Isolated label F1", + "metric_summary": "Isolated cell labels are identified as the labels present in the least number of batches in the integration task. The score evaluates how well these isolated labels separate from other cell identities based on clustering.", + "paper_reference": "luecken2022benchmarking", + "maximize": true, + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "task_id": "batch_integration_embed", + "commit_sha": "ee7836251c4c6c371471e95eb7aa6a3e9f133b43", + "metric_id": "isolated_labels_f1", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_embed/metrics/iso_label_f1.py", + "code_version": "v1.0.0" + }, + { + "metric_name": "Isolated label Silhouette", + "metric_summary": "This score evaluates the compactness for the label(s) that is(are) shared by fewest batches. It indicates how well rare cell types can be preserved after integration.", + "paper_reference": "luecken2022benchmarking", + "maximize": true, + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "task_id": "batch_integration_embed", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "metric_id": "isolated_labels_sil", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_embed/metrics/iso_label_sil.py", + "code_version": "v1.0.0" + }, + { + "metric_name": "kBET", + "metric_summary": "kBET determines whether the label composition of a k nearest neighborhood of a cell is similar to the expected (global) label composition. The test is repeated for a random subset of cells, and the results are summarized as a rejection rate over all tested neighborhoods.", + "paper_reference": "bttner2018test", + "maximize": true, + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-extras", + "task_id": "batch_integration_embed", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "metric_id": "kBET", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_embed/metrics/kBET.py", + "code_version": "v1.0.0" + }, + { + "metric_name": "NMI", + "metric_summary": "NMI compares the overlap of two clusterings. We used NMI to compare the cell-type labels with Louvain clusters computed on the integrated dataset.", + "paper_reference": "luecken2022benchmarking", + "maximize": true, + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "task_id": "batch_integration_embed", + "commit_sha": "ee7836251c4c6c371471e95eb7aa6a3e9f133b43", + "metric_id": "nmi", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_embed/metrics/nmi.py", + "code_version": "v1.0.0" + }, + { + "metric_name": "PC Regression", + "metric_summary": "This compares the explained variance by batch before and after integration. It returns a score between 0 and 1 (scaled=True) with 0 if the variance contribution hasn\u2019t changed. The larger the score, the more different the variance contributions are before and after integration.", + "paper_reference": "luecken2022benchmarking", + "maximize": true, + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "task_id": "batch_integration_embed", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "metric_id": "pcr", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_embed/metrics/pcr.py", + "code_version": "v1.0.0" + }, + { + "metric_name": "Silhouette", + "metric_summary": "The absolute silhouette with is computed on cell identity labels, measuring their compactness.", + "paper_reference": "luecken2022benchmarking", + "maximize": true, + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "task_id": "batch_integration_embed", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "metric_id": "silhouette", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_embed/metrics/silhouette.py", + "code_version": "v1.0.0" + }, + { + "metric_name": "Batch ASW", + "metric_summary": "The absolute silhouette width is computed over batch labels per cell. As 0 then indicates that batches are well mixed and any deviation from 0 indicates a batch effect, we use the 1-abs(ASW) to map the score to the scale [0;1].", + "paper_reference": "luecken2022benchmarking", + "maximize": true, + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "task_id": "batch_integration_embed", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "metric_id": "silhouette_batch", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_embed/metrics/sil_batch.py", + "code_version": "v1.0.0" + } ] \ No newline at end of file diff --git a/results/batch_integration_embed/data/task_info.json b/results/batch_integration_embed/data/task_info.json index 34176f5c6..05b16889f 100644 --- a/results/batch_integration_embed/data/task_info.json +++ b/results/batch_integration_embed/data/task_info.json @@ -1,8 +1,68 @@ { - "task_id": "batch_integration_embed", - "commit_sha": "c97decf07adb2e3050561d6fa9ae46132be07bef", - "task_name": "Batch integration embed", - "task_summary": "Removing batch effects while preserving biological variation (embedding output)", - "task_description": "\nThis is a sub-task of the overall batch integration task. Batch (or data) integration\nintegrates datasets across batches that arise from various biological and technical\nsources. Methods that integrate batches typically have three different types of output:\na corrected feature matrix, a joint embedding across batches, and/or an integrated\ncell-cell similarity graph (e.g., a kNN graph). This sub-task focuses on all methods\nthat can output joint embeddings, and includes methods that canonically output corrected\nfeature matrices with subsequent postprocessing to generate a joint embedding. Other\nsub-tasks for batch integration can be found for:\n\n* [graphs](../batch_integration_graph/), and\n* [corrected features](../batch_integration_feature/)\n\nThis sub-task was taken from a\n[benchmarking study of data integration\nmethods](https://openproblems.bio/bibliography#luecken2022benchmarking).\n\n", - "repo": "openproblems-bio/openproblems" -} \ No newline at end of file + "task_id": "batch_integration_embed", + "commit_sha": "c97decf07adb2e3050561d6fa9ae46132be07bef", + "task_name": "Batch integration embed", + "task_summary": "Removing batch effects while preserving biological variation (embedding output)", + "task_description": "\nThis is a sub-task of the overall batch integration task. Batch (or data) integration\nintegrates datasets across batches that arise from various biological and technical\nsources. Methods that integrate batches typically have three different types of output:\na corrected feature matrix, a joint embedding across batches, and/or an integrated\ncell-cell similarity graph (e.g., a kNN graph). This sub-task focuses on all methods\nthat can output joint embeddings, and includes methods that canonically output corrected\nfeature matrices with subsequent postprocessing to generate a joint embedding. Other\nsub-tasks for batch integration can be found for:\n\n* [graphs](../batch_integration_graph/), and\n* [corrected features](../batch_integration_feature/)\n\nThis sub-task was taken from a\n[benchmarking study of data integration\nmethods](https://openproblems.bio/bibliography#luecken2022benchmarking).\n\n", + "repo": "https://github.com/openproblems-bio/openproblems/tree/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_embed", + "authors": [ + { + "name": "Michaela Mueller", + "roles": ["maintainer", "author"], + "info": { + "github": "mumichae", + "orcid": "0000-0002-1401-1785" + } + }, + { + "name": "Malte Luecken", + "roles": "author", + "info": { + "github": "LuckyMD", + "orcid": "0000-0001-7464-7921" + } + }, + { + "name": "Daniel Strobl", + "roles": "author", + "info": { + "github": "danielStrobl", + "orcid": "0000-0002-5516-7057" + } + }, + { + "name": "Robrecht Cannoodt", + "roles": "contributor", + "info": { + "github": "rcannood", + "orcid": "0000-0003-3641-729X" + } + }, + { + "name": "Scott Gigante", + "roles": "contributor", + "info": { + "github": "scottgigante", + "orcid": "0000-0002-4544-2764" + } + }, + { + "name": "Kai Waldrant", + "roles": "contributor", + "info": { + "github": "KaiWaldrant", + "orcid": "0009-0003-8555-1361" + } + }, + { + "name": "Nartin Kim", + "roles": "contributor", + "info": { + "github": "martinkim0", + "orcid": "0009-0003-8555-1361" + } + } + ], + "version": "v1.0.0", + "license": "MIT" +} diff --git a/results/batch_integration_embed/index.qmd b/results/batch_integration_embed/index.qmd index 3dbb3d4fa..cde052cd6 100644 --- a/results/batch_integration_embed/index.qmd +++ b/results/batch_integration_embed/index.qmd @@ -7,6 +7,9 @@ css: ../_include/task_template.css engine: knitr fig-cap-location: bottom citation-location: document +bibliography: + - library.bib + - ../../library.bib toc: false --- diff --git a/results/batch_integration_feature/data/method_info.json b/results/batch_integration_feature/data/method_info.json index 809850a86..90b037cbf 100644 --- a/results/batch_integration_feature/data/method_info.json +++ b/results/batch_integration_feature/data/method_info.json @@ -1,377 +1,377 @@ [ - { - "method_name": "Random Integration by Batch", - "method_summary": "Feature values, embedding coordinates, and graph connectivity are all randomly permuted within each batch label", - "paper_name": "Open Problems for Single Cell Analysis", - "paper_reference": "openproblems", - "paper_year": 2022, - "code_url": "https://github.com/openproblems-bio/openproblems", - "image": "openproblems", - "is_baseline": true, - "code_version": null, - "task_id": "batch_integration_feature", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "batch_random_integration", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/_common/methods/baseline.py" - }, - { - "method_name": "Random Embedding by Celltype", - "method_summary": "Cells are embedded as a one-hot encoding of celltype labels", - "paper_name": "Open Problems for Single Cell Analysis", - "paper_reference": "openproblems", - "paper_year": 2022, - "code_url": "https://github.com/openproblems-bio/openproblems", - "image": "openproblems", - "is_baseline": true, - "code_version": null, - "task_id": "batch_integration_feature", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "celltype_random_embedding", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_embed/methods/baseline.py" - }, - { - "method_name": "Random Graph by Celltype", - "method_summary": "Cells are embedded as a one-hot encoding of celltype labels. A graph is then built on this embedding", - "paper_name": "Open Problems for Single Cell Analysis", - "paper_reference": "openproblems", - "paper_year": 2022, - "code_url": "https://github.com/openproblems-bio/openproblems", - "image": "openproblems", - "is_baseline": true, - "code_version": null, - "task_id": "batch_integration_feature", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "celltype_random_graph", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/baseline.py" - }, - { - "method_name": "Random Integration by Celltype", - "method_summary": "Feature values, embedding coordinates, and graph connectivity are all randomly permuted within each celltype label", - "paper_name": "Open Problems for Single Cell Analysis", - "paper_reference": "openproblems", - "paper_year": 2022, - "code_url": "https://github.com/openproblems-bio/openproblems", - "image": "openproblems", - "is_baseline": true, - "code_version": null, - "task_id": "batch_integration_feature", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "celltype_random_integration", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/_common/methods/baseline.py" - }, - { - "method_name": "Combat (full/scaled)", - "method_summary": "ComBat uses an Empirical Bayes (EB) approach to correct for batch effects. It estimates batch-specific parameters by pooling information across genes in each batch and shrinks the estimates towards the overall mean of the batch effect estimates across all genes. These parameters are then used to adjust the data for batch effects, leading to more accurate and reproducible results.", - "paper_name": "Adjusting batch effects in microarray expression data using empirical Bayes methods", - "paper_reference": "hansen2012removing", - "paper_year": 2007, - "code_url": "https://scanpy.readthedocs.io/en/stable/api/scanpy.pp.combat.html", - "image": "openproblems-r-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_feature", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "combat_full_scaled", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/combat.py" - }, - { - "method_name": "Combat (full/unscaled)", - "method_summary": "ComBat uses an Empirical Bayes (EB) approach to correct for batch effects. It estimates batch-specific parameters by pooling information across genes in each batch and shrinks the estimates towards the overall mean of the batch effect estimates across all genes. These parameters are then used to adjust the data for batch effects, leading to more accurate and reproducible results.", - "paper_name": "Adjusting batch effects in microarray expression data using empirical Bayes methods", - "paper_reference": "hansen2012removing", - "paper_year": 2007, - "code_url": "https://scanpy.readthedocs.io/en/stable/api/scanpy.pp.combat.html", - "image": "openproblems-r-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_feature", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "combat_full_unscaled", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/combat.py" - }, - { - "method_name": "Combat (hvg/scaled)", - "method_summary": "ComBat uses an Empirical Bayes (EB) approach to correct for batch effects. It estimates batch-specific parameters by pooling information across genes in each batch and shrinks the estimates towards the overall mean of the batch effect estimates across all genes. These parameters are then used to adjust the data for batch effects, leading to more accurate and reproducible results.", - "paper_name": "Adjusting batch effects in microarray expression data using empirical Bayes methods", - "paper_reference": "hansen2012removing", - "paper_year": 2007, - "code_url": "https://scanpy.readthedocs.io/en/stable/api/scanpy.pp.combat.html", - "image": "openproblems-r-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_feature", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "combat_hvg_scaled", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/combat.py" - }, - { - "method_name": "Combat (hvg/unscaled)", - "method_summary": "ComBat uses an Empirical Bayes (EB) approach to correct for batch effects. It estimates batch-specific parameters by pooling information across genes in each batch and shrinks the estimates towards the overall mean of the batch effect estimates across all genes. These parameters are then used to adjust the data for batch effects, leading to more accurate and reproducible results.", - "paper_name": "Adjusting batch effects in microarray expression data using empirical Bayes methods", - "paper_reference": "hansen2012removing", - "paper_year": 2007, - "code_url": "https://scanpy.readthedocs.io/en/stable/api/scanpy.pp.combat.html", - "image": "openproblems-r-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_feature", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "combat_hvg_unscaled", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/combat.py" - }, - { - "method_name": "FastMNN feature (full/scaled)", - "method_summary": "fastMNN performs a multi-sample PCA to reduce dimensionality, identifying MNN paris in the low-dimensional space, and then correcting the target batch towards the reference using locally weighted correction vectors. The corrected target batch is then merged with the reference. The process is repeated with the next target batch except for the PCA step.", - "paper_name": "A description of the theory behind the fastMNN algorithm", - "paper_reference": "lun2019fastmnn", - "paper_year": 2019, - "code_url": "https://doi.org/doi:10.18129/B9.bioc.batchelor", - "image": "openproblems-r-extras", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_feature", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "fastmnn_feature_full_scaled", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/fastmnn.py" - }, - { - "method_name": "FastMNN feature (full/unscaled)", - "method_summary": "fastMNN performs a multi-sample PCA to reduce dimensionality, identifying MNN paris in the low-dimensional space, and then correcting the target batch towards the reference using locally weighted correction vectors. The corrected target batch is then merged with the reference. The process is repeated with the next target batch except for the PCA step.", - "paper_name": "A description of the theory behind the fastMNN algorithm", - "paper_reference": "lun2019fastmnn", - "paper_year": 2019, - "code_url": "https://doi.org/doi:10.18129/B9.bioc.batchelor", - "image": "openproblems-r-extras", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_feature", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "fastmnn_feature_full_unscaled", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/fastmnn.py" - }, - { - "method_name": "FastMNN feature (hvg/scaled)", - "method_summary": "fastMNN performs a multi-sample PCA to reduce dimensionality, identifying MNN paris in the low-dimensional space, and then correcting the target batch towards the reference using locally weighted correction vectors. The corrected target batch is then merged with the reference. The process is repeated with the next target batch except for the PCA step.", - "paper_name": "A description of the theory behind the fastMNN algorithm", - "paper_reference": "lun2019fastmnn", - "paper_year": 2019, - "code_url": "https://doi.org/doi:10.18129/B9.bioc.batchelor", - "image": "openproblems-r-extras", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_feature", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "fastmnn_feature_hvg_scaled", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/fastmnn.py" - }, - { - "method_name": "FastMNN feature (hvg/unscaled)", - "method_summary": "fastMNN performs a multi-sample PCA to reduce dimensionality, identifying MNN paris in the low-dimensional space, and then correcting the target batch towards the reference using locally weighted correction vectors. The corrected target batch is then merged with the reference. The process is repeated with the next target batch except for the PCA step.", - "paper_name": "A description of the theory behind the fastMNN algorithm", - "paper_reference": "lun2019fastmnn", - "paper_year": 2019, - "code_url": "https://doi.org/doi:10.18129/B9.bioc.batchelor", - "image": "openproblems-r-extras", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_feature", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "fastmnn_feature_hvg_unscaled", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/fastmnn.py" - }, - { - "method_name": "MNN (full/scaled)", - "method_summary": "MNN first detects mutual nearest neighbours in two of the batches and infers a projection of the second onto the first batch. After that, additional batches are added iteratively.", - "paper_name": "Batch effects in single-cell RNA-sequencing data are corrected by matching mutual nearest neighbors", - "paper_reference": "haghverdi2018batch", - "paper_year": 2018, - "code_url": "https://github.com/chriscainx/mnnpy", - "image": "openproblems-r-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_feature", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "mnn_full_scaled", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/mnn.py" - }, - { - "method_name": "MNN (full/unscaled)", - "method_summary": "MNN first detects mutual nearest neighbours in two of the batches and infers a projection of the second onto the first batch. After that, additional batches are added iteratively.", - "paper_name": "Batch effects in single-cell RNA-sequencing data are corrected by matching mutual nearest neighbors", - "paper_reference": "haghverdi2018batch", - "paper_year": 2018, - "code_url": "https://github.com/chriscainx/mnnpy", - "image": "openproblems-r-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_feature", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "mnn_full_unscaled", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/mnn.py" - }, - { - "method_name": "MNN (hvg/scaled)", - "method_summary": "MNN first detects mutual nearest neighbours in two of the batches and infers a projection of the second onto the first batch. After that, additional batches are added iteratively.", - "paper_name": "Batch effects in single-cell RNA-sequencing data are corrected by matching mutual nearest neighbors", - "paper_reference": "haghverdi2018batch", - "paper_year": 2018, - "code_url": "https://github.com/chriscainx/mnnpy", - "image": "openproblems-r-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_feature", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "mnn_hvg_scaled", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/mnn.py" - }, - { - "method_name": "MNN (hvg/unscaled)", - "method_summary": "MNN first detects mutual nearest neighbours in two of the batches and infers a projection of the second onto the first batch. After that, additional batches are added iteratively.", - "paper_name": "Batch effects in single-cell RNA-sequencing data are corrected by matching mutual nearest neighbors", - "paper_reference": "haghverdi2018batch", - "paper_year": 2018, - "code_url": "https://github.com/chriscainx/mnnpy", - "image": "openproblems-r-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_feature", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "mnn_hvg_unscaled", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/mnn.py" - }, - { - "method_name": "No Integration", - "method_summary": "Cells are embedded by PCA on the unintegrated data. A graph is built on this PCA embedding.", - "paper_name": "Open Problems for Single Cell Analysis", - "paper_reference": "openproblems", - "paper_year": 2022, - "code_url": "https://github.com/openproblems-bio/openproblems", - "image": "openproblems", - "is_baseline": true, - "code_version": null, - "task_id": "batch_integration_feature", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "no_integration", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/_common/methods/baseline.py" - }, - { - "method_name": "No Integration by Batch", - "method_summary": "Cells are embedded by computing PCA independently on each batch", - "paper_name": "Open Problems for Single Cell Analysis", - "paper_reference": "openproblems", - "paper_year": 2022, - "code_url": "https://github.com/openproblems-bio/openproblems", - "image": "openproblems", - "is_baseline": true, - "code_version": null, - "task_id": "batch_integration_feature", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "no_integration_batch", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_embed/methods/baseline.py" - }, - { - "method_name": "Random Integration", - "method_summary": "Feature values, embedding coordinates, and graph connectivity are all randomly permuted", - "paper_name": "Open Problems for Single Cell Analysis", - "paper_reference": "openproblems", - "paper_year": 2022, - "code_url": "https://github.com/openproblems-bio/openproblems", - "image": "openproblems", - "is_baseline": true, - "code_version": null, - "task_id": "batch_integration_feature", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "random_integration", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/_common/methods/baseline.py" - }, - { - "method_name": "SCALEX (full)", - "method_summary": "SCALEX is a method for integrating heterogeneous single-cell data online using a VAE framework. Its generalised encoder disentangles batch-related components from batch-invariant biological components, which are then projected into a common cell-embedding space.", - "paper_name": "Online single-cell data integration through projecting heterogeneous datasets into a common cell-embedding space", - "paper_reference": "xiong2021online", - "paper_year": 2022, - "code_url": "https://github.com/jsxlei/SCALEX", - "image": "openproblems-python-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_feature", - "commit_sha": "7455e35cbee06267e6a5f977e020a816f98168f5", - "method_id": "scalex_full", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_feature/methods/scalex.py" - }, - { - "method_name": "SCALEX (hvg)", - "method_summary": "SCALEX is a method for integrating heterogeneous single-cell data online using a VAE framework. Its generalised encoder disentangles batch-related components from batch-invariant biological components, which are then projected into a common cell-embedding space.", - "paper_name": "Online single-cell data integration through projecting heterogeneous datasets into a common cell-embedding space", - "paper_reference": "xiong2021online", - "paper_year": 2022, - "code_url": "https://github.com/jsxlei/SCALEX", - "image": "openproblems-python-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_feature", - "commit_sha": "7455e35cbee06267e6a5f977e020a816f98168f5", - "method_id": "scalex_hvg", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_feature/methods/scalex.py" - }, - { - "method_name": "Scanorama gene output (full/scaled)", - "method_summary": "Scanorama is an extension of the MNN method. Other then MNN, it finds mutual nearest neighbours over all batches and embeds observations into a joint hyperplane.", - "paper_name": "Efficient integration of heterogeneous single-cell transcriptomes using Scanorama", - "paper_reference": "hie2019efficient", - "paper_year": 2019, - "code_url": "https://github.com/brianhie/scanorama", - "image": "openproblems-r-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_feature", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "scanorama_feature_full_scaled", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/scanorama.py" - }, - { - "method_name": "Scanorama gene output (full/unscaled)", - "method_summary": "Scanorama is an extension of the MNN method. Other then MNN, it finds mutual nearest neighbours over all batches and embeds observations into a joint hyperplane.", - "paper_name": "Efficient integration of heterogeneous single-cell transcriptomes using Scanorama", - "paper_reference": "hie2019efficient", - "paper_year": 2019, - "code_url": "https://github.com/brianhie/scanorama", - "image": "openproblems-r-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_feature", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "scanorama_feature_full_unscaled", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/scanorama.py" - }, - { - "method_name": "Scanorama gene output (hvg/scaled)", - "method_summary": "Scanorama is an extension of the MNN method. Other then MNN, it finds mutual nearest neighbours over all batches and embeds observations into a joint hyperplane.", - "paper_name": "Efficient integration of heterogeneous single-cell transcriptomes using Scanorama", - "paper_reference": "hie2019efficient", - "paper_year": 2019, - "code_url": "https://github.com/brianhie/scanorama", - "image": "openproblems-r-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_feature", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "scanorama_feature_hvg_scaled", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/scanorama.py" - }, - { - "method_name": "Scanorama gene output (hvg/unscaled)", - "method_summary": "Scanorama is an extension of the MNN method. Other then MNN, it finds mutual nearest neighbours over all batches and embeds observations into a joint hyperplane.", - "paper_name": "Efficient integration of heterogeneous single-cell transcriptomes using Scanorama", - "paper_reference": "hie2019efficient", - "paper_year": 2019, - "code_url": "https://github.com/brianhie/scanorama", - "image": "openproblems-r-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_feature", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "scanorama_feature_hvg_unscaled", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/scanorama.py" - } + { + "method_name": "Random Integration by Batch", + "method_summary": "Feature values, embedding coordinates, and graph connectivity are all randomly permuted within each batch label", + "paper_name": "Open Problems for Single Cell Analysis", + "paper_reference": "openproblems", + "paper_year": 2022, + "code_url": "https://github.com/openproblems-bio/openproblems/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems", + "is_baseline": true, + "code_version": "v1.0.0", + "task_id": "batch_integration_feature", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "batch_random_integration", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/_common/methods/baseline.py" + }, + { + "method_name": "Random Embedding by Celltype", + "method_summary": "Cells are embedded as a one-hot encoding of celltype labels", + "paper_name": "Open Problems for Single Cell Analysis", + "paper_reference": "openproblems", + "paper_year": 2022, + "code_url": "https://github.com/openproblems-bio/openproblems/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems", + "is_baseline": true, + "code_version": "v1.0.0", + "task_id": "batch_integration_feature", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "celltype_random_embedding", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_embed/methods/baseline.py" + }, + { + "method_name": "Random Graph by Celltype", + "method_summary": "Cells are embedded as a one-hot encoding of celltype labels. A graph is then built on this embedding", + "paper_name": "Open Problems for Single Cell Analysis", + "paper_reference": "openproblems", + "paper_year": 2022, + "code_url": "https://github.com/openproblems-bio/openproblems/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems", + "is_baseline": true, + "code_version": "v1.0.0", + "task_id": "batch_integration_feature", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "celltype_random_graph", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/baseline.py" + }, + { + "method_name": "Random Integration by Celltype", + "method_summary": "Feature values, embedding coordinates, and graph connectivity are all randomly permuted within each celltype label", + "paper_name": "Open Problems for Single Cell Analysis", + "paper_reference": "openproblems", + "paper_year": 2022, + "code_url": "https://github.com/openproblems-bio/openproblems/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems", + "is_baseline": true, + "code_version": "v1.0.0", + "task_id": "batch_integration_feature", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "celltype_random_integration", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/_common/methods/baseline.py" + }, + { + "method_name": "Combat (full/scaled)", + "method_summary": "ComBat uses an Empirical Bayes (EB) approach to correct for batch effects. It estimates batch-specific parameters by pooling information across genes in each batch and shrinks the estimates towards the overall mean of the batch effect estimates across all genes. These parameters are then used to adjust the data for batch effects, leading to more accurate and reproducible results.", + "paper_name": "Adjusting batch effects in microarray expression data using empirical Bayes methods", + "paper_reference": "hansen2012removing", + "paper_year": 2007, + "code_url": "https://scanpy.readthedocs.io/en/stable/api/scanpy.pp.combat.html/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_feature", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "combat_full_scaled", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/combat.py" + }, + { + "method_name": "Combat (full/unscaled)", + "method_summary": "ComBat uses an Empirical Bayes (EB) approach to correct for batch effects. It estimates batch-specific parameters by pooling information across genes in each batch and shrinks the estimates towards the overall mean of the batch effect estimates across all genes. These parameters are then used to adjust the data for batch effects, leading to more accurate and reproducible results.", + "paper_name": "Adjusting batch effects in microarray expression data using empirical Bayes methods", + "paper_reference": "hansen2012removing", + "paper_year": 2007, + "code_url": "https://scanpy.readthedocs.io/en/stable/api/scanpy.pp.combat.html/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_feature", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "combat_full_unscaled", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/combat.py" + }, + { + "method_name": "Combat (hvg/scaled)", + "method_summary": "ComBat uses an Empirical Bayes (EB) approach to correct for batch effects. It estimates batch-specific parameters by pooling information across genes in each batch and shrinks the estimates towards the overall mean of the batch effect estimates across all genes. These parameters are then used to adjust the data for batch effects, leading to more accurate and reproducible results.", + "paper_name": "Adjusting batch effects in microarray expression data using empirical Bayes methods", + "paper_reference": "hansen2012removing", + "paper_year": 2007, + "code_url": "https://scanpy.readthedocs.io/en/stable/api/scanpy.pp.combat.html/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_feature", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "combat_hvg_scaled", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/combat.py" + }, + { + "method_name": "Combat (hvg/unscaled)", + "method_summary": "ComBat uses an Empirical Bayes (EB) approach to correct for batch effects. It estimates batch-specific parameters by pooling information across genes in each batch and shrinks the estimates towards the overall mean of the batch effect estimates across all genes. These parameters are then used to adjust the data for batch effects, leading to more accurate and reproducible results.", + "paper_name": "Adjusting batch effects in microarray expression data using empirical Bayes methods", + "paper_reference": "hansen2012removing", + "paper_year": 2007, + "code_url": "https://scanpy.readthedocs.io/en/stable/api/scanpy.pp.combat.html/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_feature", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "combat_hvg_unscaled", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/combat.py" + }, + { + "method_name": "FastMNN feature (full/scaled)", + "method_summary": "fastMNN performs a multi-sample PCA to reduce dimensionality, identifying MNN paris in the low-dimensional space, and then correcting the target batch towards the reference using locally weighted correction vectors. The corrected target batch is then merged with the reference. The process is repeated with the next target batch except for the PCA step.", + "paper_name": "A description of the theory behind the fastMNN algorithm", + "paper_reference": "lun2019fastmnn", + "paper_year": 2019, + "code_url": "https://doi.org/doi:10.18129/B9.bioc.batchelor/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-extras", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_feature", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "fastmnn_feature_full_scaled", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/fastmnn.py" + }, + { + "method_name": "FastMNN feature (full/unscaled)", + "method_summary": "fastMNN performs a multi-sample PCA to reduce dimensionality, identifying MNN paris in the low-dimensional space, and then correcting the target batch towards the reference using locally weighted correction vectors. The corrected target batch is then merged with the reference. The process is repeated with the next target batch except for the PCA step.", + "paper_name": "A description of the theory behind the fastMNN algorithm", + "paper_reference": "lun2019fastmnn", + "paper_year": 2019, + "code_url": "https://doi.org/doi:10.18129/B9.bioc.batchelor/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-extras", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_feature", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "fastmnn_feature_full_unscaled", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/fastmnn.py" + }, + { + "method_name": "FastMNN feature (hvg/scaled)", + "method_summary": "fastMNN performs a multi-sample PCA to reduce dimensionality, identifying MNN paris in the low-dimensional space, and then correcting the target batch towards the reference using locally weighted correction vectors. The corrected target batch is then merged with the reference. The process is repeated with the next target batch except for the PCA step.", + "paper_name": "A description of the theory behind the fastMNN algorithm", + "paper_reference": "lun2019fastmnn", + "paper_year": 2019, + "code_url": "https://doi.org/doi:10.18129/B9.bioc.batchelor/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-extras", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_feature", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "fastmnn_feature_hvg_scaled", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/fastmnn.py" + }, + { + "method_name": "FastMNN feature (hvg/unscaled)", + "method_summary": "fastMNN performs a multi-sample PCA to reduce dimensionality, identifying MNN paris in the low-dimensional space, and then correcting the target batch towards the reference using locally weighted correction vectors. The corrected target batch is then merged with the reference. The process is repeated with the next target batch except for the PCA step.", + "paper_name": "A description of the theory behind the fastMNN algorithm", + "paper_reference": "lun2019fastmnn", + "paper_year": 2019, + "code_url": "https://doi.org/doi:10.18129/B9.bioc.batchelor/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-extras", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_feature", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "fastmnn_feature_hvg_unscaled", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/fastmnn.py" + }, + { + "method_name": "MNN (full/scaled)", + "method_summary": "MNN first detects mutual nearest neighbours in two of the batches and infers a projection of the second onto the first batch. After that, additional batches are added iteratively.", + "paper_name": "Batch effects in single-cell RNA-sequencing data are corrected by matching mutual nearest neighbors", + "paper_reference": "haghverdi2018batch", + "paper_year": 2018, + "code_url": "https://github.com/chriscainx/mnnpy/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_feature", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "mnn_full_scaled", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/mnn.py" + }, + { + "method_name": "MNN (full/unscaled)", + "method_summary": "MNN first detects mutual nearest neighbours in two of the batches and infers a projection of the second onto the first batch. After that, additional batches are added iteratively.", + "paper_name": "Batch effects in single-cell RNA-sequencing data are corrected by matching mutual nearest neighbors", + "paper_reference": "haghverdi2018batch", + "paper_year": 2018, + "code_url": "https://github.com/chriscainx/mnnpy/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_feature", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "mnn_full_unscaled", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/mnn.py" + }, + { + "method_name": "MNN (hvg/scaled)", + "method_summary": "MNN first detects mutual nearest neighbours in two of the batches and infers a projection of the second onto the first batch. After that, additional batches are added iteratively.", + "paper_name": "Batch effects in single-cell RNA-sequencing data are corrected by matching mutual nearest neighbors", + "paper_reference": "haghverdi2018batch", + "paper_year": 2018, + "code_url": "https://github.com/chriscainx/mnnpy/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_feature", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "mnn_hvg_scaled", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/mnn.py" + }, + { + "method_name": "MNN (hvg/unscaled)", + "method_summary": "MNN first detects mutual nearest neighbours in two of the batches and infers a projection of the second onto the first batch. After that, additional batches are added iteratively.", + "paper_name": "Batch effects in single-cell RNA-sequencing data are corrected by matching mutual nearest neighbors", + "paper_reference": "haghverdi2018batch", + "paper_year": 2018, + "code_url": "https://github.com/chriscainx/mnnpy/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_feature", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "mnn_hvg_unscaled", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/mnn.py" + }, + { + "method_name": "No Integration", + "method_summary": "Cells are embedded by PCA on the unintegrated data. A graph is built on this PCA embedding.", + "paper_name": "Open Problems for Single Cell Analysis", + "paper_reference": "openproblems", + "paper_year": 2022, + "code_url": "https://github.com/openproblems-bio/openproblems/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems", + "is_baseline": true, + "code_version": "v1.0.0", + "task_id": "batch_integration_feature", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "no_integration", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/_common/methods/baseline.py" + }, + { + "method_name": "No Integration by Batch", + "method_summary": "Cells are embedded by computing PCA independently on each batch", + "paper_name": "Open Problems for Single Cell Analysis", + "paper_reference": "openproblems", + "paper_year": 2022, + "code_url": "https://github.com/openproblems-bio/openproblems/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems", + "is_baseline": true, + "code_version": "v1.0.0", + "task_id": "batch_integration_feature", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "no_integration_batch", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_embed/methods/baseline.py" + }, + { + "method_name": "Random Integration", + "method_summary": "Feature values, embedding coordinates, and graph connectivity are all randomly permuted", + "paper_name": "Open Problems for Single Cell Analysis", + "paper_reference": "openproblems", + "paper_year": 2022, + "code_url": "https://github.com/openproblems-bio/openproblems/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems", + "is_baseline": true, + "code_version": "v1.0.0", + "task_id": "batch_integration_feature", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "random_integration", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/_common/methods/baseline.py" + }, + { + "method_name": "SCALEX (full)", + "method_summary": "SCALEX is a method for integrating heterogeneous single-cell data online using a VAE framework. Its generalised encoder disentangles batch-related components from batch-invariant biological components, which are then projected into a common cell-embedding space.", + "paper_name": "Online single-cell data integration through projecting heterogeneous datasets into a common cell-embedding space", + "paper_reference": "xiong2021online", + "paper_year": 2022, + "code_url": "https://github.com/jsxlei/SCALEX/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-python-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_feature", + "commit_sha": "7455e35cbee06267e6a5f977e020a816f98168f5", + "method_id": "scalex_full", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_feature/methods/scalex.py" + }, + { + "method_name": "SCALEX (hvg)", + "method_summary": "SCALEX is a method for integrating heterogeneous single-cell data online using a VAE framework. Its generalised encoder disentangles batch-related components from batch-invariant biological components, which are then projected into a common cell-embedding space.", + "paper_name": "Online single-cell data integration through projecting heterogeneous datasets into a common cell-embedding space", + "paper_reference": "xiong2021online", + "paper_year": 2022, + "code_url": "https://github.com/jsxlei/SCALEX/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-python-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_feature", + "commit_sha": "7455e35cbee06267e6a5f977e020a816f98168f5", + "method_id": "scalex_hvg", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_feature/methods/scalex.py" + }, + { + "method_name": "Scanorama gene output (full/scaled)", + "method_summary": "Scanorama is an extension of the MNN method. Other then MNN, it finds mutual nearest neighbours over all batches and embeds observations into a joint hyperplane.", + "paper_name": "Efficient integration of heterogeneous single-cell transcriptomes using Scanorama", + "paper_reference": "hie2019efficient", + "paper_year": 2019, + "code_url": "https://github.com/brianhie/scanorama/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_feature", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "scanorama_feature_full_scaled", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/scanorama.py" + }, + { + "method_name": "Scanorama gene output (full/unscaled)", + "method_summary": "Scanorama is an extension of the MNN method. Other then MNN, it finds mutual nearest neighbours over all batches and embeds observations into a joint hyperplane.", + "paper_name": "Efficient integration of heterogeneous single-cell transcriptomes using Scanorama", + "paper_reference": "hie2019efficient", + "paper_year": 2019, + "code_url": "https://github.com/brianhie/scanorama/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_feature", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "scanorama_feature_full_unscaled", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/scanorama.py" + }, + { + "method_name": "Scanorama gene output (hvg/scaled)", + "method_summary": "Scanorama is an extension of the MNN method. Other then MNN, it finds mutual nearest neighbours over all batches and embeds observations into a joint hyperplane.", + "paper_name": "Efficient integration of heterogeneous single-cell transcriptomes using Scanorama", + "paper_reference": "hie2019efficient", + "paper_year": 2019, + "code_url": "https://github.com/brianhie/scanorama/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_feature", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "scanorama_feature_hvg_scaled", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/scanorama.py" + }, + { + "method_name": "Scanorama gene output (hvg/unscaled)", + "method_summary": "Scanorama is an extension of the MNN method. Other then MNN, it finds mutual nearest neighbours over all batches and embeds observations into a joint hyperplane.", + "paper_name": "Efficient integration of heterogeneous single-cell transcriptomes using Scanorama", + "paper_reference": "hie2019efficient", + "paper_year": 2019, + "code_url": "https://github.com/brianhie/scanorama/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_feature", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "scanorama_feature_hvg_unscaled", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/scanorama.py" + } ] \ No newline at end of file diff --git a/results/batch_integration_feature/data/metric_info.json b/results/batch_integration_feature/data/metric_info.json index b4edfbbad..353e0183c 100644 --- a/results/batch_integration_feature/data/metric_info.json +++ b/results/batch_integration_feature/data/metric_info.json @@ -1,123 +1,134 @@ [ - { - "metric_name": "ARI", - "metric_summary": "ARI (Adjusted Rand Index) compares the overlap of two clusterings. It considers both correct clustering overlaps while also counting correct disagreements between two clustering.", - "paper_reference": "luecken2022benchmarking", - "maximize": true, - "image": "openproblems-r-pytorch", - "task_id": "batch_integration_feature", - "commit_sha": "ee7836251c4c6c371471e95eb7aa6a3e9f133b43", - "metric_id": "ari", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_feature/metrics/ari.py" - }, - { - "metric_name": "Cell Cycle Score", - "metric_summary": "The cell-cycle conservation score evaluates how well the cell-cycle effect can be captured before and after integration.", - "paper_reference": "luecken2022benchmarking", - "maximize": true, - "image": "openproblems-r-pytorch", - "task_id": "batch_integration_feature", - "commit_sha": "ee7836251c4c6c371471e95eb7aa6a3e9f133b43", - "metric_id": "cc_score", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_feature/metrics/cc_score.py" - }, - { - "metric_name": "Graph connectivity", - "metric_summary": "The graph connectivity metric assesses whether the kNN graph representation, G, of the integrated data connects all cells with the same cell identity label.", - "paper_reference": "luecken2022benchmarking", - "maximize": true, - "image": "openproblems-r-pytorch", - "task_id": "batch_integration_feature", - "commit_sha": "ee7836251c4c6c371471e95eb7aa6a3e9f133b43", - "metric_id": "graph_connectivity", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_feature/metrics/graph_connectivity.py" - }, - { - "metric_name": "HVG conservation", - "metric_summary": "This metric computes the average percentage of overlapping highly variable genes per batch before and after integration.", - "paper_reference": "luecken2022benchmarking", - "maximize": true, - "image": "openproblems-r-pytorch", - "task_id": "batch_integration_feature", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "metric_id": "hvg_conservation", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_feature/metrics/hvg_conservation.py" - }, - { - "metric_name": "Isolated label F1", - "metric_summary": "Isolated cell labels are identified as the labels present in the least number of batches in the integration task. The score evaluates how well these isolated labels separate from other cell identities based on clustering.", - "paper_reference": "luecken2022benchmarking", - "maximize": true, - "image": "openproblems-r-pytorch", - "task_id": "batch_integration_feature", - "commit_sha": "ee7836251c4c6c371471e95eb7aa6a3e9f133b43", - "metric_id": "isolated_labels_f1", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_feature/metrics/iso_label_f1.py" - }, - { - "metric_name": "Isolated label Silhouette", - "metric_summary": "This score evaluates the compactness for the label(s) that is(are) shared by fewest batches. It indicates how well rare cell types can be preserved after integration.", - "paper_reference": "luecken2022benchmarking", - "maximize": true, - "image": "openproblems-r-pytorch", - "task_id": "batch_integration_feature", - "commit_sha": "ee7836251c4c6c371471e95eb7aa6a3e9f133b43", - "metric_id": "isolated_labels_sil", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_feature/metrics/iso_label_sil.py" - }, - { - "metric_name": "kBET", - "metric_summary": "kBET determines whether the label composition of a k nearest neighborhood of a cell is similar to the expected (global) label composition. The test is repeated for a random subset of cells, and the results are summarized as a rejection rate over all tested neighborhoods.", - "paper_reference": "bttner2018test", - "maximize": true, - "image": "openproblems-r-extras", - "task_id": "batch_integration_feature", - "commit_sha": "ee7836251c4c6c371471e95eb7aa6a3e9f133b43", - "metric_id": "kBET", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_feature/metrics/kBET.py" - }, - { - "metric_name": "NMI", - "metric_summary": "NMI compares the overlap of two clusterings. We used NMI to compare the cell-type labels with Louvain clusters computed on the integrated dataset.", - "paper_reference": "luecken2022benchmarking", - "maximize": true, - "image": "openproblems-r-pytorch", - "task_id": "batch_integration_feature", - "commit_sha": "ee7836251c4c6c371471e95eb7aa6a3e9f133b43", - "metric_id": "nmi", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_feature/metrics/nmi.py" - }, - { - "metric_name": "PC Regression", - "metric_summary": "This compares the explained variance by batch before and after integration. It returns a score between 0 and 1 (scaled=True) with 0 if the variance contribution hasn\u2019t changed. The larger the score, the more different the variance contributions are before and after integration.", - "paper_reference": "luecken2022benchmarking", - "maximize": true, - "image": "openproblems-r-pytorch", - "task_id": "batch_integration_feature", - "commit_sha": "ee7836251c4c6c371471e95eb7aa6a3e9f133b43", - "metric_id": "pcr", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_feature/metrics/pcr.py" - }, - { - "metric_name": "Silhouette", - "metric_summary": "The absolute silhouette with is computed on cell identity labels, measuring their compactness.", - "paper_reference": "luecken2022benchmarking", - "maximize": true, - "image": "openproblems-r-pytorch", - "task_id": "batch_integration_feature", - "commit_sha": "ee7836251c4c6c371471e95eb7aa6a3e9f133b43", - "metric_id": "silhouette", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_feature/metrics/silhouette.py" - }, - { - "metric_name": "Batch ASW", - "metric_summary": "The absolute silhouette width is computed over batch labels per cell. As 0 then indicates that batches are well mixed and any deviation from 0 indicates a batch effect, we use the 1-abs(ASW) to map the score to the scale [0;1].", - "paper_reference": "luecken2022benchmarking", - "maximize": true, - "image": "openproblems-r-pytorch", - "task_id": "batch_integration_feature", - "commit_sha": "ee7836251c4c6c371471e95eb7aa6a3e9f133b43", - "metric_id": "silhouette_batch", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_feature/metrics/sil_batch.py" - } + { + "metric_name": "ARI", + "metric_summary": "ARI (Adjusted Rand Index) compares the overlap of two clusterings. It considers both correct clustering overlaps while also counting correct disagreements between two clustering.", + "paper_reference": "luecken2022benchmarking", + "maximize": true, + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "task_id": "batch_integration_feature", + "commit_sha": "ee7836251c4c6c371471e95eb7aa6a3e9f133b43", + "metric_id": "ari", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_feature/metrics/ari.py", + "code_version": "v1.0.0" + }, + { + "metric_name": "Cell Cycle Score", + "metric_summary": "The cell-cycle conservation score evaluates how well the cell-cycle effect can be captured before and after integration.", + "paper_reference": "luecken2022benchmarking", + "maximize": true, + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "task_id": "batch_integration_feature", + "commit_sha": "ee7836251c4c6c371471e95eb7aa6a3e9f133b43", + "metric_id": "cc_score", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_feature/metrics/cc_score.py", + "code_version": "v1.0.0" + }, + { + "metric_name": "Graph connectivity", + "metric_summary": "The graph connectivity metric assesses whether the kNN graph representation, G, of the integrated data connects all cells with the same cell identity label.", + "paper_reference": "luecken2022benchmarking", + "maximize": true, + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "task_id": "batch_integration_feature", + "commit_sha": "ee7836251c4c6c371471e95eb7aa6a3e9f133b43", + "metric_id": "graph_connectivity", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_feature/metrics/graph_connectivity.py", + "code_version": "v1.0.0" + }, + { + "metric_name": "HVG conservation", + "metric_summary": "This metric computes the average percentage of overlapping highly variable genes per batch before and after integration.", + "paper_reference": "luecken2022benchmarking", + "maximize": true, + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "task_id": "batch_integration_feature", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "metric_id": "hvg_conservation", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_feature/metrics/hvg_conservation.py", + "code_version": "v1.0.0" + }, + { + "metric_name": "Isolated label F1", + "metric_summary": "Isolated cell labels are identified as the labels present in the least number of batches in the integration task. The score evaluates how well these isolated labels separate from other cell identities based on clustering.", + "paper_reference": "luecken2022benchmarking", + "maximize": true, + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "task_id": "batch_integration_feature", + "commit_sha": "ee7836251c4c6c371471e95eb7aa6a3e9f133b43", + "metric_id": "isolated_labels_f1", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_feature/metrics/iso_label_f1.py", + "code_version": "v1.0.0" + }, + { + "metric_name": "Isolated label Silhouette", + "metric_summary": "This score evaluates the compactness for the label(s) that is(are) shared by fewest batches. It indicates how well rare cell types can be preserved after integration.", + "paper_reference": "luecken2022benchmarking", + "maximize": true, + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "task_id": "batch_integration_feature", + "commit_sha": "ee7836251c4c6c371471e95eb7aa6a3e9f133b43", + "metric_id": "isolated_labels_sil", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_feature/metrics/iso_label_sil.py", + "code_version": "v1.0.0" + }, + { + "metric_name": "kBET", + "metric_summary": "kBET determines whether the label composition of a k nearest neighborhood of a cell is similar to the expected (global) label composition. The test is repeated for a random subset of cells, and the results are summarized as a rejection rate over all tested neighborhoods.", + "paper_reference": "bttner2018test", + "maximize": true, + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-extras", + "task_id": "batch_integration_feature", + "commit_sha": "ee7836251c4c6c371471e95eb7aa6a3e9f133b43", + "metric_id": "kBET", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_feature/metrics/kBET.py", + "code_version": "v1.0.0" + }, + { + "metric_name": "NMI", + "metric_summary": "NMI compares the overlap of two clusterings. We used NMI to compare the cell-type labels with Louvain clusters computed on the integrated dataset.", + "paper_reference": "luecken2022benchmarking", + "maximize": true, + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "task_id": "batch_integration_feature", + "commit_sha": "ee7836251c4c6c371471e95eb7aa6a3e9f133b43", + "metric_id": "nmi", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_feature/metrics/nmi.py", + "code_version": "v1.0.0" + }, + { + "metric_name": "PC Regression", + "metric_summary": "This compares the explained variance by batch before and after integration. It returns a score between 0 and 1 (scaled=True) with 0 if the variance contribution hasn\u2019t changed. The larger the score, the more different the variance contributions are before and after integration.", + "paper_reference": "luecken2022benchmarking", + "maximize": true, + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "task_id": "batch_integration_feature", + "commit_sha": "ee7836251c4c6c371471e95eb7aa6a3e9f133b43", + "metric_id": "pcr", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_feature/metrics/pcr.py", + "code_version": "v1.0.0" + }, + { + "metric_name": "Silhouette", + "metric_summary": "The absolute silhouette with is computed on cell identity labels, measuring their compactness.", + "paper_reference": "luecken2022benchmarking", + "maximize": true, + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "task_id": "batch_integration_feature", + "commit_sha": "ee7836251c4c6c371471e95eb7aa6a3e9f133b43", + "metric_id": "silhouette", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_feature/metrics/silhouette.py", + "code_version": "v1.0.0" + }, + { + "metric_name": "Batch ASW", + "metric_summary": "The absolute silhouette width is computed over batch labels per cell. As 0 then indicates that batches are well mixed and any deviation from 0 indicates a batch effect, we use the 1-abs(ASW) to map the score to the scale [0;1].", + "paper_reference": "luecken2022benchmarking", + "maximize": true, + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "task_id": "batch_integration_feature", + "commit_sha": "ee7836251c4c6c371471e95eb7aa6a3e9f133b43", + "metric_id": "silhouette_batch", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_feature/metrics/sil_batch.py", + "code_version": "v1.0.0" + } ] \ No newline at end of file diff --git a/results/batch_integration_feature/data/task_info.json b/results/batch_integration_feature/data/task_info.json index 3bbad32c6..3cae70618 100644 --- a/results/batch_integration_feature/data/task_info.json +++ b/results/batch_integration_feature/data/task_info.json @@ -1,8 +1,68 @@ { - "task_id": "batch_integration_feature", - "commit_sha": "b578c4fb69d5d3d8d3fee7ca1b383f67820dbcca", - "task_name": "Batch integration feature", - "task_summary": "Removing batch effects while preserving biological variation (feature output)", - "task_description": "\nThis is a sub-task of the overall batch integration task. Batch (or data) integration\nintegrates datasets across batches that arise from various biological and technical\nsources. Methods that integrate batches typically have three different types of output:\na corrected feature matrix, a joint embedding across batches, and/or an integrated\ncell-cell similarity graph (e.g., a kNN graph). This sub-task focuses on all methods\nthat can output feature matrices. Other sub-tasks for batch integration can be found\nfor:\n\n* [graphs](../batch_integration_graph/), and\n* [embeddings](../batch_integration_embed/)\n\nThis sub-task was taken from a [benchmarking study of data integration\nmethods](https://openproblems.bio/bibliography#luecken2022benchmarking).\n\n", - "repo": "openproblems-bio/openproblems" -} \ No newline at end of file + "task_id": "batch_integration_feature", + "commit_sha": "b578c4fb69d5d3d8d3fee7ca1b383f67820dbcca", + "task_name": "Batch integration feature", + "task_summary": "Removing batch effects while preserving biological variation (feature output)", + "task_description": "\nThis is a sub-task of the overall batch integration task. Batch (or data) integration\nintegrates datasets across batches that arise from various biological and technical\nsources. Methods that integrate batches typically have three different types of output:\na corrected feature matrix, a joint embedding across batches, and/or an integrated\ncell-cell similarity graph (e.g., a kNN graph). This sub-task focuses on all methods\nthat can output feature matrices. Other sub-tasks for batch integration can be found\nfor:\n\n* [graphs](../batch_integration_graph/), and\n* [embeddings](../batch_integration_embed/)\n\nThis sub-task was taken from a [benchmarking study of data integration\nmethods](https://openproblems.bio/bibliography#luecken2022benchmarking).\n\n", + "repo": "https://github.com/openproblems-bio/openproblems/tree/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_feature", + "authors": [ + { + "name": "Michaela Mueller", + "roles": ["maintainer", "author"], + "info": { + "github": "mumichae", + "orcid": "0000-0002-1401-1785" + } + }, + { + "name": "Malte Luecken", + "roles": "author", + "info": { + "github": "LuckyMD", + "orcid": "0000-0001-7464-7921" + } + }, + { + "name": "Daniel Strobl", + "roles": "author", + "info": { + "github": "danielStrobl", + "orcid": "0000-0002-5516-7057" + } + }, + { + "name": "Robrecht Cannoodt", + "roles": "contributor", + "info": { + "github": "rcannood", + "orcid": "0000-0003-3641-729X" + } + }, + { + "name": "Scott Gigante", + "roles": "contributor", + "info": { + "github": "scottgigante", + "orcid": "0000-0002-4544-2764" + } + }, + { + "name": "Kai Waldrant", + "roles": "contributor", + "info": { + "github": "KaiWaldrant", + "orcid": "0009-0003-8555-1361" + } + }, + { + "name": "Nartin Kim", + "roles": "contributor", + "info": { + "github": "martinkim0", + "orcid": "0009-0003-8555-1361" + } + } + ], + "version": "v1.0.0", + "license": "MIT" +} diff --git a/results/batch_integration_feature/index.qmd b/results/batch_integration_feature/index.qmd index d51c657ca..c35c70001 100644 --- a/results/batch_integration_feature/index.qmd +++ b/results/batch_integration_feature/index.qmd @@ -7,6 +7,9 @@ css: ../_include/task_template.css engine: knitr fig-cap-location: bottom citation-location: document +bibliography: + - library.bib + - ../../library.bib toc: false --- diff --git a/results/batch_integration_graph/data/method_info.json b/results/batch_integration_graph/data/method_info.json index 3527ce08a..eab850fda 100644 --- a/results/batch_integration_graph/data/method_info.json +++ b/results/batch_integration_graph/data/method_info.json @@ -1,677 +1,677 @@ [ - { - "method_name": "Random Integration by Batch", - "method_summary": "Feature values, embedding coordinates, and graph connectivity are all randomly permuted within each batch label", - "paper_name": "Open Problems for Single Cell Analysis", - "paper_reference": "openproblems", - "paper_year": 2022, - "code_url": "https://github.com/openproblems-bio/openproblems", - "image": "openproblems", - "is_baseline": true, - "code_version": null, - "task_id": "batch_integration_graph", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "batch_random_integration", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/_common/methods/baseline.py" - }, - { - "method_name": "BBKNN (full/scaled)", - "method_summary": "BBKNN or batch balanced k nearest neighbours graph is built for each cell by identifying its k nearest neighbours within each defined batch separately, creating independent neighbour sets for each cell in each batch. These sets are then combined and processed with the UMAP algorithm for visualisation.", - "paper_name": "BBKNN: fast batch alignment of single cell transcriptomes", - "paper_reference": "polanski2020bbknn", - "paper_year": 2020, - "code_url": "https://github.com/Teichlab/bbknn", - "image": "openproblems-r-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_graph", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "bbknn_full_scaled", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/bbknn.py" - }, - { - "method_name": "BBKNN (full/unscaled)", - "method_summary": "BBKNN or batch balanced k nearest neighbours graph is built for each cell by identifying its k nearest neighbours within each defined batch separately, creating independent neighbour sets for each cell in each batch. These sets are then combined and processed with the UMAP algorithm for visualisation.", - "paper_name": "BBKNN: fast batch alignment of single cell transcriptomes", - "paper_reference": "polanski2020bbknn", - "paper_year": 2020, - "code_url": "https://github.com/Teichlab/bbknn", - "image": "openproblems-r-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_graph", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "bbknn_full_unscaled", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/bbknn.py" - }, - { - "method_name": "BBKNN (hvg/scaled)", - "method_summary": "BBKNN or batch balanced k nearest neighbours graph is built for each cell by identifying its k nearest neighbours within each defined batch separately, creating independent neighbour sets for each cell in each batch. These sets are then combined and processed with the UMAP algorithm for visualisation.", - "paper_name": "BBKNN: fast batch alignment of single cell transcriptomes", - "paper_reference": "polanski2020bbknn", - "paper_year": 2020, - "code_url": "https://github.com/Teichlab/bbknn", - "image": "openproblems-r-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_graph", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "bbknn_hvg_scaled", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/bbknn.py" - }, - { - "method_name": "BBKNN (hvg/unscaled)", - "method_summary": "BBKNN or batch balanced k nearest neighbours graph is built for each cell by identifying its k nearest neighbours within each defined batch separately, creating independent neighbour sets for each cell in each batch. These sets are then combined and processed with the UMAP algorithm for visualisation.", - "paper_name": "BBKNN: fast batch alignment of single cell transcriptomes", - "paper_reference": "polanski2020bbknn", - "paper_year": 2020, - "code_url": "https://github.com/Teichlab/bbknn", - "image": "openproblems-r-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_graph", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "bbknn_hvg_unscaled", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/bbknn.py" - }, - { - "method_name": "Random Graph by Celltype", - "method_summary": "Cells are embedded as a one-hot encoding of celltype labels. A graph is then built on this embedding", - "paper_name": "Open Problems for Single Cell Analysis", - "paper_reference": "openproblems", - "paper_year": 2022, - "code_url": "https://github.com/openproblems-bio/openproblems", - "image": "openproblems", - "is_baseline": true, - "code_version": null, - "task_id": "batch_integration_graph", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "celltype_random_graph", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/baseline.py" - }, - { - "method_name": "Random Integration by Celltype", - "method_summary": "Feature values, embedding coordinates, and graph connectivity are all randomly permuted within each celltype label", - "paper_name": "Open Problems for Single Cell Analysis", - "paper_reference": "openproblems", - "paper_year": 2022, - "code_url": "https://github.com/openproblems-bio/openproblems", - "image": "openproblems", - "is_baseline": true, - "code_version": null, - "task_id": "batch_integration_graph", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "celltype_random_integration", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/_common/methods/baseline.py" - }, - { - "method_name": "Combat (full/scaled)", - "method_summary": "ComBat uses an Empirical Bayes (EB) approach to correct for batch effects. It estimates batch-specific parameters by pooling information across genes in each batch and shrinks the estimates towards the overall mean of the batch effect estimates across all genes. These parameters are then used to adjust the data for batch effects, leading to more accurate and reproducible results.", - "paper_name": "Adjusting batch effects in microarray expression data using empirical Bayes methods", - "paper_reference": "hansen2012removing", - "paper_year": 2007, - "code_url": "https://scanpy.readthedocs.io/en/stable/api/scanpy.pp.combat.html", - "image": "openproblems-r-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_graph", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "combat_full_scaled", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/combat.py" - }, - { - "method_name": "Combat (full/unscaled)", - "method_summary": "ComBat uses an Empirical Bayes (EB) approach to correct for batch effects. It estimates batch-specific parameters by pooling information across genes in each batch and shrinks the estimates towards the overall mean of the batch effect estimates across all genes. These parameters are then used to adjust the data for batch effects, leading to more accurate and reproducible results.", - "paper_name": "Adjusting batch effects in microarray expression data using empirical Bayes methods", - "paper_reference": "hansen2012removing", - "paper_year": 2007, - "code_url": "https://scanpy.readthedocs.io/en/stable/api/scanpy.pp.combat.html", - "image": "openproblems-r-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_graph", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "combat_full_unscaled", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/combat.py" - }, - { - "method_name": "Combat (hvg/scaled)", - "method_summary": "ComBat uses an Empirical Bayes (EB) approach to correct for batch effects. It estimates batch-specific parameters by pooling information across genes in each batch and shrinks the estimates towards the overall mean of the batch effect estimates across all genes. These parameters are then used to adjust the data for batch effects, leading to more accurate and reproducible results.", - "paper_name": "Adjusting batch effects in microarray expression data using empirical Bayes methods", - "paper_reference": "hansen2012removing", - "paper_year": 2007, - "code_url": "https://scanpy.readthedocs.io/en/stable/api/scanpy.pp.combat.html", - "image": "openproblems-r-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_graph", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "combat_hvg_scaled", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/combat.py" - }, - { - "method_name": "Combat (hvg/unscaled)", - "method_summary": "ComBat uses an Empirical Bayes (EB) approach to correct for batch effects. It estimates batch-specific parameters by pooling information across genes in each batch and shrinks the estimates towards the overall mean of the batch effect estimates across all genes. These parameters are then used to adjust the data for batch effects, leading to more accurate and reproducible results.", - "paper_name": "Adjusting batch effects in microarray expression data using empirical Bayes methods", - "paper_reference": "hansen2012removing", - "paper_year": 2007, - "code_url": "https://scanpy.readthedocs.io/en/stable/api/scanpy.pp.combat.html", - "image": "openproblems-r-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_graph", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "combat_hvg_unscaled", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/combat.py" - }, - { - "method_name": "FastMNN embed (full/scaled)", - "method_summary": "fastMNN performs a multi-sample PCA to reduce dimensionality, identifying MNN paris in the low-dimensional space, and then correcting the target batch towards the reference using locally weighted correction vectors. The corrected target batch is then merged with the reference. The process is repeated with the next target batch except for the PCA step.", - "paper_name": "A description of the theory behind the fastMNN algorithm", - "paper_reference": "lun2019fastmnn", - "paper_year": 2019, - "code_url": "https://doi.org/doi:10.18129/B9.bioc.batchelor", - "image": "openproblems-r-extras", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_graph", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "fastmnn_embed_full_scaled", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/fastmnn.py" - }, - { - "method_name": "FastMNN embed (full/unscaled)", - "method_summary": "fastMNN performs a multi-sample PCA to reduce dimensionality, identifying MNN paris in the low-dimensional space, and then correcting the target batch towards the reference using locally weighted correction vectors. The corrected target batch is then merged with the reference. The process is repeated with the next target batch except for the PCA step.", - "paper_name": "A description of the theory behind the fastMNN algorithm", - "paper_reference": "lun2019fastmnn", - "paper_year": 2019, - "code_url": "https://doi.org/doi:10.18129/B9.bioc.batchelor", - "image": "openproblems-r-extras", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_graph", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "fastmnn_embed_full_unscaled", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/fastmnn.py" - }, - { - "method_name": "FastMNN embed (hvg/scaled)", - "method_summary": "fastMNN performs a multi-sample PCA to reduce dimensionality, identifying MNN paris in the low-dimensional space, and then correcting the target batch towards the reference using locally weighted correction vectors. The corrected target batch is then merged with the reference. The process is repeated with the next target batch except for the PCA step.", - "paper_name": "A description of the theory behind the fastMNN algorithm", - "paper_reference": "lun2019fastmnn", - "paper_year": 2019, - "code_url": "https://doi.org/doi:10.18129/B9.bioc.batchelor", - "image": "openproblems-r-extras", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_graph", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "fastmnn_embed_hvg_scaled", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/fastmnn.py" - }, - { - "method_name": "FastMNN embed (hvg/unscaled)", - "method_summary": "fastMNN performs a multi-sample PCA to reduce dimensionality, identifying MNN paris in the low-dimensional space, and then correcting the target batch towards the reference using locally weighted correction vectors. The corrected target batch is then merged with the reference. The process is repeated with the next target batch except for the PCA step.", - "paper_name": "A description of the theory behind the fastMNN algorithm", - "paper_reference": "lun2019fastmnn", - "paper_year": 2019, - "code_url": "https://doi.org/doi:10.18129/B9.bioc.batchelor", - "image": "openproblems-r-extras", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_graph", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "fastmnn_embed_hvg_unscaled", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/fastmnn.py" - }, - { - "method_name": "FastMNN feature (full/scaled)", - "method_summary": "fastMNN performs a multi-sample PCA to reduce dimensionality, identifying MNN paris in the low-dimensional space, and then correcting the target batch towards the reference using locally weighted correction vectors. The corrected target batch is then merged with the reference. The process is repeated with the next target batch except for the PCA step.", - "paper_name": "A description of the theory behind the fastMNN algorithm", - "paper_reference": "lun2019fastmnn", - "paper_year": 2019, - "code_url": "https://doi.org/doi:10.18129/B9.bioc.batchelor", - "image": "openproblems-r-extras", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_graph", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "fastmnn_feature_full_scaled", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/fastmnn.py" - }, - { - "method_name": "FastMNN feature (full/unscaled)", - "method_summary": "fastMNN performs a multi-sample PCA to reduce dimensionality, identifying MNN paris in the low-dimensional space, and then correcting the target batch towards the reference using locally weighted correction vectors. The corrected target batch is then merged with the reference. The process is repeated with the next target batch except for the PCA step.", - "paper_name": "A description of the theory behind the fastMNN algorithm", - "paper_reference": "lun2019fastmnn", - "paper_year": 2019, - "code_url": "https://doi.org/doi:10.18129/B9.bioc.batchelor", - "image": "openproblems-r-extras", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_graph", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "fastmnn_feature_full_unscaled", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/fastmnn.py" - }, - { - "method_name": "FastMNN feature (hvg/scaled)", - "method_summary": "fastMNN performs a multi-sample PCA to reduce dimensionality, identifying MNN paris in the low-dimensional space, and then correcting the target batch towards the reference using locally weighted correction vectors. The corrected target batch is then merged with the reference. The process is repeated with the next target batch except for the PCA step.", - "paper_name": "A description of the theory behind the fastMNN algorithm", - "paper_reference": "lun2019fastmnn", - "paper_year": 2019, - "code_url": "https://doi.org/doi:10.18129/B9.bioc.batchelor", - "image": "openproblems-r-extras", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_graph", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "fastmnn_feature_hvg_scaled", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/fastmnn.py" - }, - { - "method_name": "FastMNN feature (hvg/unscaled)", - "method_summary": "fastMNN performs a multi-sample PCA to reduce dimensionality, identifying MNN paris in the low-dimensional space, and then correcting the target batch towards the reference using locally weighted correction vectors. The corrected target batch is then merged with the reference. The process is repeated with the next target batch except for the PCA step.", - "paper_name": "A description of the theory behind the fastMNN algorithm", - "paper_reference": "lun2019fastmnn", - "paper_year": 2019, - "code_url": "https://doi.org/doi:10.18129/B9.bioc.batchelor", - "image": "openproblems-r-extras", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_graph", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "fastmnn_feature_hvg_unscaled", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/fastmnn.py" - }, - { - "method_name": "Harmony (full/scaled)", - "method_summary": "Harmony is a method that uses PCA to group the cells into multi-dataset clusters, and then computes cluster-specific linear correction factors. Each cell is then corrected by its cell-specific linear factor using the cluster-weighted average. The method keeps iterating these four steps until cell clusters are stable.", - "paper_name": "Fast, sensitive and accurate integration of single-cell data with Harmony", - "paper_reference": "korsunsky2019fast", - "paper_year": 2019, - "code_url": "https://github.com/lilab-bcb/harmony-pytorch", - "image": "openproblems-r-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_graph", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "harmony_full_scaled", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/harmony.py" - }, - { - "method_name": "Harmony (full/unscaled)", - "method_summary": "Harmony is a method that uses PCA to group the cells into multi-dataset clusters, and then computes cluster-specific linear correction factors. Each cell is then corrected by its cell-specific linear factor using the cluster-weighted average. The method keeps iterating these four steps until cell clusters are stable.", - "paper_name": "Fast, sensitive and accurate integration of single-cell data with Harmony", - "paper_reference": "korsunsky2019fast", - "paper_year": 2019, - "code_url": "https://github.com/lilab-bcb/harmony-pytorch", - "image": "openproblems-r-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_graph", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "harmony_full_unscaled", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/harmony.py" - }, - { - "method_name": "Harmony (hvg/scaled)", - "method_summary": "Harmony is a method that uses PCA to group the cells into multi-dataset clusters, and then computes cluster-specific linear correction factors. Each cell is then corrected by its cell-specific linear factor using the cluster-weighted average. The method keeps iterating these four steps until cell clusters are stable.", - "paper_name": "Fast, sensitive and accurate integration of single-cell data with Harmony", - "paper_reference": "korsunsky2019fast", - "paper_year": 2019, - "code_url": "https://github.com/lilab-bcb/harmony-pytorch", - "image": "openproblems-r-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_graph", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "harmony_hvg_scaled", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/harmony.py" - }, - { - "method_name": "Harmony (hvg/unscaled)", - "method_summary": "Harmony is a method that uses PCA to group the cells into multi-dataset clusters, and then computes cluster-specific linear correction factors. Each cell is then corrected by its cell-specific linear factor using the cluster-weighted average. The method keeps iterating these four steps until cell clusters are stable.", - "paper_name": "Fast, sensitive and accurate integration of single-cell data with Harmony", - "paper_reference": "korsunsky2019fast", - "paper_year": 2019, - "code_url": "https://github.com/lilab-bcb/harmony-pytorch", - "image": "openproblems-r-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_graph", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "harmony_hvg_unscaled", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/harmony.py" - }, - { - "method_name": "Liger (full/unscaled)", - "method_summary": "LIGER or linked inference of genomic experimental relationships uses iNMF deriving and implementing a novel coordinate descent algorithm to efficiently do the factorization. Joint clustering is performed and factor loadings are normalised.", - "paper_name": "Single-Cell Multi-omic Integration Compares and Contrasts Features of Brain Cell Identity", - "paper_reference": "welch2019single", - "paper_year": 2019, - "code_url": "https://github.com/welch-lab/liger", - "image": "openproblems-r-extras", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_graph", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "liger_full_unscaled", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/liger.py" - }, - { - "method_name": "Liger (hvg/unscaled)", - "method_summary": "LIGER or linked inference of genomic experimental relationships uses iNMF deriving and implementing a novel coordinate descent algorithm to efficiently do the factorization. Joint clustering is performed and factor loadings are normalised.", - "paper_name": "Single-Cell Multi-omic Integration Compares and Contrasts Features of Brain Cell Identity", - "paper_reference": "welch2019single", - "paper_year": 2019, - "code_url": "https://github.com/welch-lab/liger", - "image": "openproblems-r-extras", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_graph", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "liger_hvg_unscaled", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/liger.py" - }, - { - "method_name": "MNN (full/scaled)", - "method_summary": "MNN first detects mutual nearest neighbours in two of the batches and infers a projection of the second onto the first batch. After that, additional batches are added iteratively.", - "paper_name": "Batch effects in single-cell RNA-sequencing data are corrected by matching mutual nearest neighbors", - "paper_reference": "haghverdi2018batch", - "paper_year": 2018, - "code_url": "https://github.com/chriscainx/mnnpy", - "image": "openproblems-r-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_graph", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "mnn_full_scaled", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/mnn.py" - }, - { - "method_name": "MNN (full/unscaled)", - "method_summary": "MNN first detects mutual nearest neighbours in two of the batches and infers a projection of the second onto the first batch. After that, additional batches are added iteratively.", - "paper_name": "Batch effects in single-cell RNA-sequencing data are corrected by matching mutual nearest neighbors", - "paper_reference": "haghverdi2018batch", - "paper_year": 2018, - "code_url": "https://github.com/chriscainx/mnnpy", - "image": "openproblems-r-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_graph", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "mnn_full_unscaled", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/mnn.py" - }, - { - "method_name": "MNN (hvg/scaled)", - "method_summary": "MNN first detects mutual nearest neighbours in two of the batches and infers a projection of the second onto the first batch. After that, additional batches are added iteratively.", - "paper_name": "Batch effects in single-cell RNA-sequencing data are corrected by matching mutual nearest neighbors", - "paper_reference": "haghverdi2018batch", - "paper_year": 2018, - "code_url": "https://github.com/chriscainx/mnnpy", - "image": "openproblems-r-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_graph", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "mnn_hvg_scaled", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/mnn.py" - }, - { - "method_name": "MNN (hvg/unscaled)", - "method_summary": "MNN first detects mutual nearest neighbours in two of the batches and infers a projection of the second onto the first batch. After that, additional batches are added iteratively.", - "paper_name": "Batch effects in single-cell RNA-sequencing data are corrected by matching mutual nearest neighbors", - "paper_reference": "haghverdi2018batch", - "paper_year": 2018, - "code_url": "https://github.com/chriscainx/mnnpy", - "image": "openproblems-r-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_graph", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "mnn_hvg_unscaled", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/mnn.py" - }, - { - "method_name": "No Integration", - "method_summary": "Cells are embedded by PCA on the unintegrated data. A graph is built on this PCA embedding.", - "paper_name": "Open Problems for Single Cell Analysis", - "paper_reference": "openproblems", - "paper_year": 2022, - "code_url": "https://github.com/openproblems-bio/openproblems", - "image": "openproblems", - "is_baseline": true, - "code_version": null, - "task_id": "batch_integration_graph", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "no_integration", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/_common/methods/baseline.py" - }, - { - "method_name": "Random Integration", - "method_summary": "Feature values, embedding coordinates, and graph connectivity are all randomly permuted", - "paper_name": "Open Problems for Single Cell Analysis", - "paper_reference": "openproblems", - "paper_year": 2022, - "code_url": "https://github.com/openproblems-bio/openproblems", - "image": "openproblems", - "is_baseline": true, - "code_version": null, - "task_id": "batch_integration_graph", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "random_integration", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/_common/methods/baseline.py" - }, - { - "method_name": "SCALEX (full)", - "method_summary": "SCALEX is a method for integrating heterogeneous single-cell data online using a VAE framework. Its generalised encoder disentangles batch-related components from batch-invariant biological components, which are then projected into a common cell-embedding space.", - "paper_name": "Online single-cell data integration through projecting heterogeneous datasets into a common cell-embedding space", - "paper_reference": "xiong2021online", - "paper_year": 2022, - "code_url": "https://github.com/jsxlei/SCALEX", - "image": "openproblems-python-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_graph", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "scalex_full", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/scalex.py" - }, - { - "method_name": "SCALEX (hvg)", - "method_summary": "SCALEX is a method for integrating heterogeneous single-cell data online using a VAE framework. Its generalised encoder disentangles batch-related components from batch-invariant biological components, which are then projected into a common cell-embedding space.", - "paper_name": "Online single-cell data integration through projecting heterogeneous datasets into a common cell-embedding space", - "paper_reference": "xiong2021online", - "paper_year": 2022, - "code_url": "https://github.com/jsxlei/SCALEX", - "image": "openproblems-python-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_graph", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "scalex_hvg", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/scalex.py" - }, - { - "method_name": "Scanorama (full/scaled)", - "method_summary": "Scanorama is an extension of the MNN method. Other then MNN, it finds mutual nearest neighbours over all batches and embeds observations into a joint hyperplane.", - "paper_name": "Efficient integration of heterogeneous single-cell transcriptomes using Scanorama", - "paper_reference": "hie2019efficient", - "paper_year": 2019, - "code_url": "https://github.com/brianhie/scanorama", - "image": "openproblems-r-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_graph", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "scanorama_embed_full_scaled", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/scanorama.py" - }, - { - "method_name": "Scanorama (full/unscaled)", - "method_summary": "Scanorama is an extension of the MNN method. Other then MNN, it finds mutual nearest neighbours over all batches and embeds observations into a joint hyperplane.", - "paper_name": "Efficient integration of heterogeneous single-cell transcriptomes using Scanorama", - "paper_reference": "hie2019efficient", - "paper_year": 2019, - "code_url": "https://github.com/brianhie/scanorama", - "image": "openproblems-r-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_graph", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "scanorama_embed_full_unscaled", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/scanorama.py" - }, - { - "method_name": "Scanorama (hvg/scaled)", - "method_summary": "Scanorama is an extension of the MNN method. Other then MNN, it finds mutual nearest neighbours over all batches and embeds observations into a joint hyperplane.", - "paper_name": "Efficient integration of heterogeneous single-cell transcriptomes using Scanorama", - "paper_reference": "hie2019efficient", - "paper_year": 2019, - "code_url": "https://github.com/brianhie/scanorama", - "image": "openproblems-r-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_graph", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "scanorama_embed_hvg_scaled", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/scanorama.py" - }, - { - "method_name": "Scanorama (hvg/unscaled)", - "method_summary": "Scanorama is an extension of the MNN method. Other then MNN, it finds mutual nearest neighbours over all batches and embeds observations into a joint hyperplane.", - "paper_name": "Efficient integration of heterogeneous single-cell transcriptomes using Scanorama", - "paper_reference": "hie2019efficient", - "paper_year": 2019, - "code_url": "https://github.com/brianhie/scanorama", - "image": "openproblems-r-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_graph", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "scanorama_embed_hvg_unscaled", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/scanorama.py" - }, - { - "method_name": "Scanorama gene output (full/scaled)", - "method_summary": "Scanorama is an extension of the MNN method. Other then MNN, it finds mutual nearest neighbours over all batches and embeds observations into a joint hyperplane.", - "paper_name": "Efficient integration of heterogeneous single-cell transcriptomes using Scanorama", - "paper_reference": "hie2019efficient", - "paper_year": 2019, - "code_url": "https://github.com/brianhie/scanorama", - "image": "openproblems-r-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_graph", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "scanorama_feature_full_scaled", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/scanorama.py" - }, - { - "method_name": "Scanorama gene output (full/unscaled)", - "method_summary": "Scanorama is an extension of the MNN method. Other then MNN, it finds mutual nearest neighbours over all batches and embeds observations into a joint hyperplane.", - "paper_name": "Efficient integration of heterogeneous single-cell transcriptomes using Scanorama", - "paper_reference": "hie2019efficient", - "paper_year": 2019, - "code_url": "https://github.com/brianhie/scanorama", - "image": "openproblems-r-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_graph", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "scanorama_feature_full_unscaled", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/scanorama.py" - }, - { - "method_name": "Scanorama gene output (hvg/scaled)", - "method_summary": "Scanorama is an extension of the MNN method. Other then MNN, it finds mutual nearest neighbours over all batches and embeds observations into a joint hyperplane.", - "paper_name": "Efficient integration of heterogeneous single-cell transcriptomes using Scanorama", - "paper_reference": "hie2019efficient", - "paper_year": 2019, - "code_url": "https://github.com/brianhie/scanorama", - "image": "openproblems-r-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_graph", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "scanorama_feature_hvg_scaled", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/scanorama.py" - }, - { - "method_name": "Scanorama gene output (hvg/unscaled)", - "method_summary": "Scanorama is an extension of the MNN method. Other then MNN, it finds mutual nearest neighbours over all batches and embeds observations into a joint hyperplane.", - "paper_name": "Efficient integration of heterogeneous single-cell transcriptomes using Scanorama", - "paper_reference": "hie2019efficient", - "paper_year": 2019, - "code_url": "https://github.com/brianhie/scanorama", - "image": "openproblems-r-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_graph", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "scanorama_feature_hvg_unscaled", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/scanorama.py" - }, - { - "method_name": "scANVI (full/unscaled)", - "method_summary": "ScanVI is an extension of scVI but instead using a Bayesian semi-supervised approach for more principled cell annotation.", - "paper_name": "Probabilistic harmonization and annotation of single\u2010cell transcriptomics data with deep generative models", - "paper_reference": "xu2021probabilistic", - "paper_year": 2021, - "code_url": "https://github.com/YosefLab/scvi-tools", - "image": "openproblems-r-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_graph", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "scanvi_full_unscaled", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/scanvi.py" - }, - { - "method_name": "scANVI (hvg/unscaled)", - "method_summary": "ScanVI is an extension of scVI but instead using a Bayesian semi-supervised approach for more principled cell annotation.", - "paper_name": "Probabilistic harmonization and annotation of single\u2010cell transcriptomics data with deep generative models", - "paper_reference": "xu2021probabilistic", - "paper_year": 2021, - "code_url": "https://github.com/YosefLab/scvi-tools", - "image": "openproblems-r-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_graph", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "scanvi_hvg_unscaled", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/scanvi.py" - }, - { - "method_name": "scVI (full/unscaled)", - "method_summary": "scVI combines a variational autoencoder with a hierarchical Bayesian model.", - "paper_name": "Deep generative modeling for single-cell transcriptomics", - "paper_reference": "lopez2018deep", - "paper_year": 2018, - "code_url": "https://github.com/YosefLab/scvi-tools", - "image": "openproblems-r-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_graph", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "scvi_full_unscaled", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/scvi.py" - }, - { - "method_name": "scVI (hvg/unscaled)", - "method_summary": "scVI combines a variational autoencoder with a hierarchical Bayesian model.", - "paper_name": "Deep generative modeling for single-cell transcriptomics", - "paper_reference": "lopez2018deep", - "paper_year": 2018, - "code_url": "https://github.com/YosefLab/scvi-tools", - "image": "openproblems-r-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "batch_integration_graph", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "scvi_hvg_unscaled", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/methods/scvi.py" - } + { + "method_name": "Random Integration by Batch", + "method_summary": "Feature values, embedding coordinates, and graph connectivity are all randomly permuted within each batch label", + "paper_name": "Open Problems for Single Cell Analysis", + "paper_reference": "openproblems", + "paper_year": 2022, + "code_url": "https://github.com/openproblems-bio/openproblems/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems", + "is_baseline": true, + "code_version": "v1.0.0", + "task_id": "batch_integration_graph", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "batch_random_integration", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/_common/methods/baseline.py" + }, + { + "method_name": "BBKNN (full/scaled)", + "method_summary": "BBKNN or batch balanced k nearest neighbours graph is built for each cell by identifying its k nearest neighbours within each defined batch separately, creating independent neighbour sets for each cell in each batch. These sets are then combined and processed with the UMAP algorithm for visualisation.", + "paper_name": "BBKNN: fast batch alignment of single cell transcriptomes", + "paper_reference": "polanski2020bbknn", + "paper_year": 2020, + "code_url": "https://github.com/Teichlab/bbknn/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_graph", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "bbknn_full_scaled", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/bbknn.py" + }, + { + "method_name": "BBKNN (full/unscaled)", + "method_summary": "BBKNN or batch balanced k nearest neighbours graph is built for each cell by identifying its k nearest neighbours within each defined batch separately, creating independent neighbour sets for each cell in each batch. These sets are then combined and processed with the UMAP algorithm for visualisation.", + "paper_name": "BBKNN: fast batch alignment of single cell transcriptomes", + "paper_reference": "polanski2020bbknn", + "paper_year": 2020, + "code_url": "https://github.com/Teichlab/bbknn/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_graph", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "bbknn_full_unscaled", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/bbknn.py" + }, + { + "method_name": "BBKNN (hvg/scaled)", + "method_summary": "BBKNN or batch balanced k nearest neighbours graph is built for each cell by identifying its k nearest neighbours within each defined batch separately, creating independent neighbour sets for each cell in each batch. These sets are then combined and processed with the UMAP algorithm for visualisation.", + "paper_name": "BBKNN: fast batch alignment of single cell transcriptomes", + "paper_reference": "polanski2020bbknn", + "paper_year": 2020, + "code_url": "https://github.com/Teichlab/bbknn/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_graph", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "bbknn_hvg_scaled", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/bbknn.py" + }, + { + "method_name": "BBKNN (hvg/unscaled)", + "method_summary": "BBKNN or batch balanced k nearest neighbours graph is built for each cell by identifying its k nearest neighbours within each defined batch separately, creating independent neighbour sets for each cell in each batch. These sets are then combined and processed with the UMAP algorithm for visualisation.", + "paper_name": "BBKNN: fast batch alignment of single cell transcriptomes", + "paper_reference": "polanski2020bbknn", + "paper_year": 2020, + "code_url": "https://github.com/Teichlab/bbknn/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_graph", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "bbknn_hvg_unscaled", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/bbknn.py" + }, + { + "method_name": "Random Graph by Celltype", + "method_summary": "Cells are embedded as a one-hot encoding of celltype labels. A graph is then built on this embedding", + "paper_name": "Open Problems for Single Cell Analysis", + "paper_reference": "openproblems", + "paper_year": 2022, + "code_url": "https://github.com/openproblems-bio/openproblems/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems", + "is_baseline": true, + "code_version": "v1.0.0", + "task_id": "batch_integration_graph", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "celltype_random_graph", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/baseline.py" + }, + { + "method_name": "Random Integration by Celltype", + "method_summary": "Feature values, embedding coordinates, and graph connectivity are all randomly permuted within each celltype label", + "paper_name": "Open Problems for Single Cell Analysis", + "paper_reference": "openproblems", + "paper_year": 2022, + "code_url": "https://github.com/openproblems-bio/openproblems/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems", + "is_baseline": true, + "code_version": "v1.0.0", + "task_id": "batch_integration_graph", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "celltype_random_integration", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/_common/methods/baseline.py" + }, + { + "method_name": "Combat (full/scaled)", + "method_summary": "ComBat uses an Empirical Bayes (EB) approach to correct for batch effects. It estimates batch-specific parameters by pooling information across genes in each batch and shrinks the estimates towards the overall mean of the batch effect estimates across all genes. These parameters are then used to adjust the data for batch effects, leading to more accurate and reproducible results.", + "paper_name": "Adjusting batch effects in microarray expression data using empirical Bayes methods", + "paper_reference": "hansen2012removing", + "paper_year": 2007, + "code_url": "https://scanpy.readthedocs.io/en/stable/api/scanpy.pp.combat.html/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_graph", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "combat_full_scaled", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/combat.py" + }, + { + "method_name": "Combat (full/unscaled)", + "method_summary": "ComBat uses an Empirical Bayes (EB) approach to correct for batch effects. It estimates batch-specific parameters by pooling information across genes in each batch and shrinks the estimates towards the overall mean of the batch effect estimates across all genes. These parameters are then used to adjust the data for batch effects, leading to more accurate and reproducible results.", + "paper_name": "Adjusting batch effects in microarray expression data using empirical Bayes methods", + "paper_reference": "hansen2012removing", + "paper_year": 2007, + "code_url": "https://scanpy.readthedocs.io/en/stable/api/scanpy.pp.combat.html/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_graph", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "combat_full_unscaled", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/combat.py" + }, + { + "method_name": "Combat (hvg/scaled)", + "method_summary": "ComBat uses an Empirical Bayes (EB) approach to correct for batch effects. It estimates batch-specific parameters by pooling information across genes in each batch and shrinks the estimates towards the overall mean of the batch effect estimates across all genes. These parameters are then used to adjust the data for batch effects, leading to more accurate and reproducible results.", + "paper_name": "Adjusting batch effects in microarray expression data using empirical Bayes methods", + "paper_reference": "hansen2012removing", + "paper_year": 2007, + "code_url": "https://scanpy.readthedocs.io/en/stable/api/scanpy.pp.combat.html/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_graph", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "combat_hvg_scaled", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/combat.py" + }, + { + "method_name": "Combat (hvg/unscaled)", + "method_summary": "ComBat uses an Empirical Bayes (EB) approach to correct for batch effects. It estimates batch-specific parameters by pooling information across genes in each batch and shrinks the estimates towards the overall mean of the batch effect estimates across all genes. These parameters are then used to adjust the data for batch effects, leading to more accurate and reproducible results.", + "paper_name": "Adjusting batch effects in microarray expression data using empirical Bayes methods", + "paper_reference": "hansen2012removing", + "paper_year": 2007, + "code_url": "https://scanpy.readthedocs.io/en/stable/api/scanpy.pp.combat.html/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_graph", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "combat_hvg_unscaled", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/combat.py" + }, + { + "method_name": "FastMNN embed (full/scaled)", + "method_summary": "fastMNN performs a multi-sample PCA to reduce dimensionality, identifying MNN paris in the low-dimensional space, and then correcting the target batch towards the reference using locally weighted correction vectors. The corrected target batch is then merged with the reference. The process is repeated with the next target batch except for the PCA step.", + "paper_name": "A description of the theory behind the fastMNN algorithm", + "paper_reference": "lun2019fastmnn", + "paper_year": 2019, + "code_url": "https://doi.org/doi:10.18129/B9.bioc.batchelor/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-extras", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_graph", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "fastmnn_embed_full_scaled", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/fastmnn.py" + }, + { + "method_name": "FastMNN embed (full/unscaled)", + "method_summary": "fastMNN performs a multi-sample PCA to reduce dimensionality, identifying MNN paris in the low-dimensional space, and then correcting the target batch towards the reference using locally weighted correction vectors. The corrected target batch is then merged with the reference. The process is repeated with the next target batch except for the PCA step.", + "paper_name": "A description of the theory behind the fastMNN algorithm", + "paper_reference": "lun2019fastmnn", + "paper_year": 2019, + "code_url": "https://doi.org/doi:10.18129/B9.bioc.batchelor/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-extras", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_graph", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "fastmnn_embed_full_unscaled", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/fastmnn.py" + }, + { + "method_name": "FastMNN embed (hvg/scaled)", + "method_summary": "fastMNN performs a multi-sample PCA to reduce dimensionality, identifying MNN paris in the low-dimensional space, and then correcting the target batch towards the reference using locally weighted correction vectors. The corrected target batch is then merged with the reference. The process is repeated with the next target batch except for the PCA step.", + "paper_name": "A description of the theory behind the fastMNN algorithm", + "paper_reference": "lun2019fastmnn", + "paper_year": 2019, + "code_url": "https://doi.org/doi:10.18129/B9.bioc.batchelor/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-extras", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_graph", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "fastmnn_embed_hvg_scaled", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/fastmnn.py" + }, + { + "method_name": "FastMNN embed (hvg/unscaled)", + "method_summary": "fastMNN performs a multi-sample PCA to reduce dimensionality, identifying MNN paris in the low-dimensional space, and then correcting the target batch towards the reference using locally weighted correction vectors. The corrected target batch is then merged with the reference. The process is repeated with the next target batch except for the PCA step.", + "paper_name": "A description of the theory behind the fastMNN algorithm", + "paper_reference": "lun2019fastmnn", + "paper_year": 2019, + "code_url": "https://doi.org/doi:10.18129/B9.bioc.batchelor/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-extras", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_graph", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "fastmnn_embed_hvg_unscaled", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/fastmnn.py" + }, + { + "method_name": "FastMNN feature (full/scaled)", + "method_summary": "fastMNN performs a multi-sample PCA to reduce dimensionality, identifying MNN paris in the low-dimensional space, and then correcting the target batch towards the reference using locally weighted correction vectors. The corrected target batch is then merged with the reference. The process is repeated with the next target batch except for the PCA step.", + "paper_name": "A description of the theory behind the fastMNN algorithm", + "paper_reference": "lun2019fastmnn", + "paper_year": 2019, + "code_url": "https://doi.org/doi:10.18129/B9.bioc.batchelor/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-extras", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_graph", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "fastmnn_feature_full_scaled", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/fastmnn.py" + }, + { + "method_name": "FastMNN feature (full/unscaled)", + "method_summary": "fastMNN performs a multi-sample PCA to reduce dimensionality, identifying MNN paris in the low-dimensional space, and then correcting the target batch towards the reference using locally weighted correction vectors. The corrected target batch is then merged with the reference. The process is repeated with the next target batch except for the PCA step.", + "paper_name": "A description of the theory behind the fastMNN algorithm", + "paper_reference": "lun2019fastmnn", + "paper_year": 2019, + "code_url": "https://doi.org/doi:10.18129/B9.bioc.batchelor/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-extras", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_graph", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "fastmnn_feature_full_unscaled", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/fastmnn.py" + }, + { + "method_name": "FastMNN feature (hvg/scaled)", + "method_summary": "fastMNN performs a multi-sample PCA to reduce dimensionality, identifying MNN paris in the low-dimensional space, and then correcting the target batch towards the reference using locally weighted correction vectors. The corrected target batch is then merged with the reference. The process is repeated with the next target batch except for the PCA step.", + "paper_name": "A description of the theory behind the fastMNN algorithm", + "paper_reference": "lun2019fastmnn", + "paper_year": 2019, + "code_url": "https://doi.org/doi:10.18129/B9.bioc.batchelor/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-extras", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_graph", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "fastmnn_feature_hvg_scaled", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/fastmnn.py" + }, + { + "method_name": "FastMNN feature (hvg/unscaled)", + "method_summary": "fastMNN performs a multi-sample PCA to reduce dimensionality, identifying MNN paris in the low-dimensional space, and then correcting the target batch towards the reference using locally weighted correction vectors. The corrected target batch is then merged with the reference. The process is repeated with the next target batch except for the PCA step.", + "paper_name": "A description of the theory behind the fastMNN algorithm", + "paper_reference": "lun2019fastmnn", + "paper_year": 2019, + "code_url": "https://doi.org/doi:10.18129/B9.bioc.batchelor/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-extras", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_graph", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "fastmnn_feature_hvg_unscaled", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/fastmnn.py" + }, + { + "method_name": "Harmony (full/scaled)", + "method_summary": "Harmony is a method that uses PCA to group the cells into multi-dataset clusters, and then computes cluster-specific linear correction factors. Each cell is then corrected by its cell-specific linear factor using the cluster-weighted average. The method keeps iterating these four steps until cell clusters are stable.", + "paper_name": "Fast, sensitive and accurate integration of single-cell data with Harmony", + "paper_reference": "korsunsky2019fast", + "paper_year": 2019, + "code_url": "https://github.com/lilab-bcb/harmony-pytorch/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_graph", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "harmony_full_scaled", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/harmony.py" + }, + { + "method_name": "Harmony (full/unscaled)", + "method_summary": "Harmony is a method that uses PCA to group the cells into multi-dataset clusters, and then computes cluster-specific linear correction factors. Each cell is then corrected by its cell-specific linear factor using the cluster-weighted average. The method keeps iterating these four steps until cell clusters are stable.", + "paper_name": "Fast, sensitive and accurate integration of single-cell data with Harmony", + "paper_reference": "korsunsky2019fast", + "paper_year": 2019, + "code_url": "https://github.com/lilab-bcb/harmony-pytorch/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_graph", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "harmony_full_unscaled", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/harmony.py" + }, + { + "method_name": "Harmony (hvg/scaled)", + "method_summary": "Harmony is a method that uses PCA to group the cells into multi-dataset clusters, and then computes cluster-specific linear correction factors. Each cell is then corrected by its cell-specific linear factor using the cluster-weighted average. The method keeps iterating these four steps until cell clusters are stable.", + "paper_name": "Fast, sensitive and accurate integration of single-cell data with Harmony", + "paper_reference": "korsunsky2019fast", + "paper_year": 2019, + "code_url": "https://github.com/lilab-bcb/harmony-pytorch/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_graph", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "harmony_hvg_scaled", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/harmony.py" + }, + { + "method_name": "Harmony (hvg/unscaled)", + "method_summary": "Harmony is a method that uses PCA to group the cells into multi-dataset clusters, and then computes cluster-specific linear correction factors. Each cell is then corrected by its cell-specific linear factor using the cluster-weighted average. The method keeps iterating these four steps until cell clusters are stable.", + "paper_name": "Fast, sensitive and accurate integration of single-cell data with Harmony", + "paper_reference": "korsunsky2019fast", + "paper_year": 2019, + "code_url": "https://github.com/lilab-bcb/harmony-pytorch/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_graph", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "harmony_hvg_unscaled", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/harmony.py" + }, + { + "method_name": "Liger (full/unscaled)", + "method_summary": "LIGER or linked inference of genomic experimental relationships uses iNMF deriving and implementing a novel coordinate descent algorithm to efficiently do the factorization. Joint clustering is performed and factor loadings are normalised.", + "paper_name": "Single-Cell Multi-omic Integration Compares and Contrasts Features of Brain Cell Identity", + "paper_reference": "welch2019single", + "paper_year": 2019, + "code_url": "https://github.com/welch-lab/liger/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-extras", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_graph", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "liger_full_unscaled", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/liger.py" + }, + { + "method_name": "Liger (hvg/unscaled)", + "method_summary": "LIGER or linked inference of genomic experimental relationships uses iNMF deriving and implementing a novel coordinate descent algorithm to efficiently do the factorization. Joint clustering is performed and factor loadings are normalised.", + "paper_name": "Single-Cell Multi-omic Integration Compares and Contrasts Features of Brain Cell Identity", + "paper_reference": "welch2019single", + "paper_year": 2019, + "code_url": "https://github.com/welch-lab/liger/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-extras", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_graph", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "liger_hvg_unscaled", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/liger.py" + }, + { + "method_name": "MNN (full/scaled)", + "method_summary": "MNN first detects mutual nearest neighbours in two of the batches and infers a projection of the second onto the first batch. After that, additional batches are added iteratively.", + "paper_name": "Batch effects in single-cell RNA-sequencing data are corrected by matching mutual nearest neighbors", + "paper_reference": "haghverdi2018batch", + "paper_year": 2018, + "code_url": "https://github.com/chriscainx/mnnpy/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_graph", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "mnn_full_scaled", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/mnn.py" + }, + { + "method_name": "MNN (full/unscaled)", + "method_summary": "MNN first detects mutual nearest neighbours in two of the batches and infers a projection of the second onto the first batch. After that, additional batches are added iteratively.", + "paper_name": "Batch effects in single-cell RNA-sequencing data are corrected by matching mutual nearest neighbors", + "paper_reference": "haghverdi2018batch", + "paper_year": 2018, + "code_url": "https://github.com/chriscainx/mnnpy/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_graph", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "mnn_full_unscaled", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/mnn.py" + }, + { + "method_name": "MNN (hvg/scaled)", + "method_summary": "MNN first detects mutual nearest neighbours in two of the batches and infers a projection of the second onto the first batch. After that, additional batches are added iteratively.", + "paper_name": "Batch effects in single-cell RNA-sequencing data are corrected by matching mutual nearest neighbors", + "paper_reference": "haghverdi2018batch", + "paper_year": 2018, + "code_url": "https://github.com/chriscainx/mnnpy/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_graph", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "mnn_hvg_scaled", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/mnn.py" + }, + { + "method_name": "MNN (hvg/unscaled)", + "method_summary": "MNN first detects mutual nearest neighbours in two of the batches and infers a projection of the second onto the first batch. After that, additional batches are added iteratively.", + "paper_name": "Batch effects in single-cell RNA-sequencing data are corrected by matching mutual nearest neighbors", + "paper_reference": "haghverdi2018batch", + "paper_year": 2018, + "code_url": "https://github.com/chriscainx/mnnpy/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_graph", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "mnn_hvg_unscaled", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/mnn.py" + }, + { + "method_name": "No Integration", + "method_summary": "Cells are embedded by PCA on the unintegrated data. A graph is built on this PCA embedding.", + "paper_name": "Open Problems for Single Cell Analysis", + "paper_reference": "openproblems", + "paper_year": 2022, + "code_url": "https://github.com/openproblems-bio/openproblems/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems", + "is_baseline": true, + "code_version": "v1.0.0", + "task_id": "batch_integration_graph", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "no_integration", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/_common/methods/baseline.py" + }, + { + "method_name": "Random Integration", + "method_summary": "Feature values, embedding coordinates, and graph connectivity are all randomly permuted", + "paper_name": "Open Problems for Single Cell Analysis", + "paper_reference": "openproblems", + "paper_year": 2022, + "code_url": "https://github.com/openproblems-bio/openproblems/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems", + "is_baseline": true, + "code_version": "v1.0.0", + "task_id": "batch_integration_graph", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "random_integration", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/_common/methods/baseline.py" + }, + { + "method_name": "SCALEX (full)", + "method_summary": "SCALEX is a method for integrating heterogeneous single-cell data online using a VAE framework. Its generalised encoder disentangles batch-related components from batch-invariant biological components, which are then projected into a common cell-embedding space.", + "paper_name": "Online single-cell data integration through projecting heterogeneous datasets into a common cell-embedding space", + "paper_reference": "xiong2021online", + "paper_year": 2022, + "code_url": "https://github.com/jsxlei/SCALEX/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-python-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_graph", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "scalex_full", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/scalex.py" + }, + { + "method_name": "SCALEX (hvg)", + "method_summary": "SCALEX is a method for integrating heterogeneous single-cell data online using a VAE framework. Its generalised encoder disentangles batch-related components from batch-invariant biological components, which are then projected into a common cell-embedding space.", + "paper_name": "Online single-cell data integration through projecting heterogeneous datasets into a common cell-embedding space", + "paper_reference": "xiong2021online", + "paper_year": 2022, + "code_url": "https://github.com/jsxlei/SCALEX/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-python-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_graph", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "scalex_hvg", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/scalex.py" + }, + { + "method_name": "Scanorama (full/scaled)", + "method_summary": "Scanorama is an extension of the MNN method. Other then MNN, it finds mutual nearest neighbours over all batches and embeds observations into a joint hyperplane.", + "paper_name": "Efficient integration of heterogeneous single-cell transcriptomes using Scanorama", + "paper_reference": "hie2019efficient", + "paper_year": 2019, + "code_url": "https://github.com/brianhie/scanorama/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_graph", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "scanorama_embed_full_scaled", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/scanorama.py" + }, + { + "method_name": "Scanorama (full/unscaled)", + "method_summary": "Scanorama is an extension of the MNN method. Other then MNN, it finds mutual nearest neighbours over all batches and embeds observations into a joint hyperplane.", + "paper_name": "Efficient integration of heterogeneous single-cell transcriptomes using Scanorama", + "paper_reference": "hie2019efficient", + "paper_year": 2019, + "code_url": "https://github.com/brianhie/scanorama/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_graph", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "scanorama_embed_full_unscaled", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/scanorama.py" + }, + { + "method_name": "Scanorama (hvg/scaled)", + "method_summary": "Scanorama is an extension of the MNN method. Other then MNN, it finds mutual nearest neighbours over all batches and embeds observations into a joint hyperplane.", + "paper_name": "Efficient integration of heterogeneous single-cell transcriptomes using Scanorama", + "paper_reference": "hie2019efficient", + "paper_year": 2019, + "code_url": "https://github.com/brianhie/scanorama/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_graph", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "scanorama_embed_hvg_scaled", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/scanorama.py" + }, + { + "method_name": "Scanorama (hvg/unscaled)", + "method_summary": "Scanorama is an extension of the MNN method. Other then MNN, it finds mutual nearest neighbours over all batches and embeds observations into a joint hyperplane.", + "paper_name": "Efficient integration of heterogeneous single-cell transcriptomes using Scanorama", + "paper_reference": "hie2019efficient", + "paper_year": 2019, + "code_url": "https://github.com/brianhie/scanorama/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_graph", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "scanorama_embed_hvg_unscaled", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/scanorama.py" + }, + { + "method_name": "Scanorama gene output (full/scaled)", + "method_summary": "Scanorama is an extension of the MNN method. Other then MNN, it finds mutual nearest neighbours over all batches and embeds observations into a joint hyperplane.", + "paper_name": "Efficient integration of heterogeneous single-cell transcriptomes using Scanorama", + "paper_reference": "hie2019efficient", + "paper_year": 2019, + "code_url": "https://github.com/brianhie/scanorama/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_graph", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "scanorama_feature_full_scaled", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/scanorama.py" + }, + { + "method_name": "Scanorama gene output (full/unscaled)", + "method_summary": "Scanorama is an extension of the MNN method. Other then MNN, it finds mutual nearest neighbours over all batches and embeds observations into a joint hyperplane.", + "paper_name": "Efficient integration of heterogeneous single-cell transcriptomes using Scanorama", + "paper_reference": "hie2019efficient", + "paper_year": 2019, + "code_url": "https://github.com/brianhie/scanorama/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_graph", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "scanorama_feature_full_unscaled", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/scanorama.py" + }, + { + "method_name": "Scanorama gene output (hvg/scaled)", + "method_summary": "Scanorama is an extension of the MNN method. Other then MNN, it finds mutual nearest neighbours over all batches and embeds observations into a joint hyperplane.", + "paper_name": "Efficient integration of heterogeneous single-cell transcriptomes using Scanorama", + "paper_reference": "hie2019efficient", + "paper_year": 2019, + "code_url": "https://github.com/brianhie/scanorama/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_graph", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "scanorama_feature_hvg_scaled", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/scanorama.py" + }, + { + "method_name": "Scanorama gene output (hvg/unscaled)", + "method_summary": "Scanorama is an extension of the MNN method. Other then MNN, it finds mutual nearest neighbours over all batches and embeds observations into a joint hyperplane.", + "paper_name": "Efficient integration of heterogeneous single-cell transcriptomes using Scanorama", + "paper_reference": "hie2019efficient", + "paper_year": 2019, + "code_url": "https://github.com/brianhie/scanorama/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_graph", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "scanorama_feature_hvg_unscaled", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/scanorama.py" + }, + { + "method_name": "scANVI (full/unscaled)", + "method_summary": "ScanVI is an extension of scVI but instead using a Bayesian semi-supervised approach for more principled cell annotation.", + "paper_name": "Probabilistic harmonization and annotation of single\u2010cell transcriptomics data with deep generative models", + "paper_reference": "xu2021probabilistic", + "paper_year": 2021, + "code_url": "https://github.com/YosefLab/scvi-tools/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_graph", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "scanvi_full_unscaled", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/scanvi.py" + }, + { + "method_name": "scANVI (hvg/unscaled)", + "method_summary": "ScanVI is an extension of scVI but instead using a Bayesian semi-supervised approach for more principled cell annotation.", + "paper_name": "Probabilistic harmonization and annotation of single\u2010cell transcriptomics data with deep generative models", + "paper_reference": "xu2021probabilistic", + "paper_year": 2021, + "code_url": "https://github.com/YosefLab/scvi-tools/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_graph", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "scanvi_hvg_unscaled", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/scanvi.py" + }, + { + "method_name": "scVI (full/unscaled)", + "method_summary": "scVI combines a variational autoencoder with a hierarchical Bayesian model.", + "paper_name": "Deep generative modeling for single-cell transcriptomics", + "paper_reference": "lopez2018deep", + "paper_year": 2018, + "code_url": "https://github.com/YosefLab/scvi-tools/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_graph", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "scvi_full_unscaled", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/scvi.py" + }, + { + "method_name": "scVI (hvg/unscaled)", + "method_summary": "scVI combines a variational autoencoder with a hierarchical Bayesian model.", + "paper_name": "Deep generative modeling for single-cell transcriptomics", + "paper_reference": "lopez2018deep", + "paper_year": 2018, + "code_url": "https://github.com/YosefLab/scvi-tools/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "batch_integration_graph", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "scvi_hvg_unscaled", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/methods/scvi.py" + } ] \ No newline at end of file diff --git a/results/batch_integration_graph/data/metric_info.json b/results/batch_integration_graph/data/metric_info.json index 04e193ab9..49a770488 100644 --- a/results/batch_integration_graph/data/metric_info.json +++ b/results/batch_integration_graph/data/metric_info.json @@ -1,46 +1,50 @@ [ - { - "metric_name": "ARI", - "metric_summary": "ARI (Adjusted Rand Index) compares the overlap of two clusterings. It considers both correct clustering overlaps while also counting correct disagreements between two clustering.", - "paper_reference": "luecken2022benchmarking", - "maximize": true, - "image": "openproblems-r-pytorch", - "task_id": "batch_integration_graph", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "metric_id": "ari", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/metrics/ari.py" - }, - { - "metric_name": "Graph connectivity", - "metric_summary": "The graph connectivity metric assesses whether the kNN graph representation, G, of the integrated data connects all cells with the same cell identity label.", - "paper_reference": "luecken2022benchmarking", - "maximize": true, - "image": "openproblems-r-pytorch", - "task_id": "batch_integration_graph", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "metric_id": "graph_connectivity", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/metrics/graph_connectivity.py" - }, - { - "metric_name": "Isolated label F1", - "metric_summary": "Isolated cell labels are identified as the labels present in the least number of batches in the integration task. The score evaluates how well these isolated labels separate from other cell identities based on clustering.", - "paper_reference": "luecken2022benchmarking", - "maximize": true, - "image": "openproblems-r-pytorch", - "task_id": "batch_integration_graph", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "metric_id": "isolated_labels_f1", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/metrics/iso_label_f1.py" - }, - { - "metric_name": "NMI", - "metric_summary": "NMI compares the overlap of two clusterings. We used NMI to compare the cell-type labels with Louvain clusters computed on the integrated dataset.", - "paper_reference": "luecken2022benchmarking", - "maximize": true, - "image": "openproblems-r-pytorch", - "task_id": "batch_integration_graph", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "metric_id": "nmi", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_batch_integration/batch_integration_graph/metrics/nmi.py" - } + { + "metric_name": "ARI", + "metric_summary": "ARI (Adjusted Rand Index) compares the overlap of two clusterings. It considers both correct clustering overlaps while also counting correct disagreements between two clustering.", + "paper_reference": "luecken2022benchmarking", + "maximize": true, + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "task_id": "batch_integration_graph", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "metric_id": "ari", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/metrics/ari.py", + "code_version": "v1.0.0" + }, + { + "metric_name": "Graph connectivity", + "metric_summary": "The graph connectivity metric assesses whether the kNN graph representation, G, of the integrated data connects all cells with the same cell identity label.", + "paper_reference": "luecken2022benchmarking", + "maximize": true, + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "task_id": "batch_integration_graph", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "metric_id": "graph_connectivity", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/metrics/graph_connectivity.py", + "code_version": "v1.0.0" + }, + { + "metric_name": "Isolated label F1", + "metric_summary": "Isolated cell labels are identified as the labels present in the least number of batches in the integration task. The score evaluates how well these isolated labels separate from other cell identities based on clustering.", + "paper_reference": "luecken2022benchmarking", + "maximize": true, + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "task_id": "batch_integration_graph", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "metric_id": "isolated_labels_f1", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/metrics/iso_label_f1.py", + "code_version": "v1.0.0" + }, + { + "metric_name": "NMI", + "metric_summary": "NMI compares the overlap of two clusterings. We used NMI to compare the cell-type labels with Louvain clusters computed on the integrated dataset.", + "paper_reference": "luecken2022benchmarking", + "maximize": true, + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-pytorch", + "task_id": "batch_integration_graph", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "metric_id": "nmi", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph/metrics/nmi.py", + "code_version": "v1.0.0" + } ] \ No newline at end of file diff --git a/results/batch_integration_graph/data/task_info.json b/results/batch_integration_graph/data/task_info.json index 46e07137c..925ca27d1 100644 --- a/results/batch_integration_graph/data/task_info.json +++ b/results/batch_integration_graph/data/task_info.json @@ -1,8 +1,68 @@ { - "task_id": "batch_integration_graph", - "commit_sha": "c97decf07adb2e3050561d6fa9ae46132be07bef", - "task_name": "Batch integration graph", - "task_summary": "Removing batch effects while preserving biological variation (graph output)", - "task_description": "\nThis is a sub-task of the overall batch integration task. Batch (or data) integration\nmethods integrate datasets across batches that arise from various biological and\ntechnical sources. Methods that integrate batches typically have three different types\nof output: a corrected feature matrix, a joint embedding across batches, and/or an\nintegrated cell-cell similarity graph (e.g., a kNN graph). This sub-task focuses on all\nmethods that can output integrated graphs, and includes methods that canonically output\nthe other two data formats with subsequent postprocessing to generate a graph. Other\nsub-tasks for batch integration can be found for:\n\n* [embeddings](../batch_integration_embed/), and\n* [corrected features](../batch_integration_feature/)\n\nThis sub-task was taken from a [benchmarking study of data integration\nmethods](https://openproblems.bio/bibliography#luecken2022benchmarking).\n\n", - "repo": "openproblems-bio/openproblems" -} \ No newline at end of file + "task_id": "batch_integration_graph", + "commit_sha": "c97decf07adb2e3050561d6fa9ae46132be07bef", + "task_name": "Batch integration graph", + "task_summary": "Removing batch effects while preserving biological variation (graph output)", + "task_description": "\nThis is a sub-task of the overall batch integration task. Batch (or data) integration\nmethods integrate datasets across batches that arise from various biological and\ntechnical sources. Methods that integrate batches typically have three different types\nof output: a corrected feature matrix, a joint embedding across batches, and/or an\nintegrated cell-cell similarity graph (e.g., a kNN graph). This sub-task focuses on all\nmethods that can output integrated graphs, and includes methods that canonically output\nthe other two data formats with subsequent postprocessing to generate a graph. Other\nsub-tasks for batch integration can be found for:\n\n* [embeddings](../batch_integration_embed/), and\n* [corrected features](../batch_integration_feature/)\n\nThis sub-task was taken from a [benchmarking study of data integration\nmethods](https://openproblems.bio/bibliography#luecken2022benchmarking).\n\n", + "repo": "https://github.com/openproblems-bio/openproblems/tree/v1.0.0/openproblems/tasks/_batch_integration/batch_integration_graph", + "authors": [ + { + "name": "Michaela Mueller", + "roles": ["maintainer", "author"], + "info": { + "github": "mumichae", + "orcid": "0000-0002-1401-1785" + } + }, + { + "name": "Malte Luecken", + "roles": "author", + "info": { + "github": "LuckyMD", + "orcid": "0000-0001-7464-7921" + } + }, + { + "name": "Daniel Strobl", + "roles": "author", + "info": { + "github": "danielStrobl", + "orcid": "0000-0002-5516-7057" + } + }, + { + "name": "Robrecht Cannoodt", + "roles": "contributor", + "info": { + "github": "rcannood", + "orcid": "0000-0003-3641-729X" + } + }, + { + "name": "Scott Gigante", + "roles": "contributor", + "info": { + "github": "scottgigante", + "orcid": "0000-0002-4544-2764" + } + }, + { + "name": "Kai Waldrant", + "roles": "contributor", + "info": { + "github": "KaiWaldrant", + "orcid": "0009-0003-8555-1361" + } + }, + { + "name": "Nartin Kim", + "roles": "contributor", + "info": { + "github": "martinkim0", + "orcid": "0009-0003-8555-1361" + } + } + ], + "version": "v1.0.0", + "license": "MIT" +} diff --git a/results/batch_integration_graph/index.qmd b/results/batch_integration_graph/index.qmd index d6d56eba0..3d9e86e82 100644 --- a/results/batch_integration_graph/index.qmd +++ b/results/batch_integration_graph/index.qmd @@ -7,6 +7,9 @@ css: ../_include/task_template.css engine: knitr fig-cap-location: bottom citation-location: document +bibliography: + - library.bib + - ../../library.bib toc: false --- diff --git a/results/cell_cell_communication_ligand_target/data/method_info.json b/results/cell_cell_communication_ligand_target/data/method_info.json index d226b65ef..776108e77 100644 --- a/results/cell_cell_communication_ligand_target/data/method_info.json +++ b/results/cell_cell_communication_ligand_target/data/method_info.json @@ -1,242 +1,242 @@ [ - { - "method_name": "CellPhoneDB (max)", - "method_summary": "CellPhoneDBv2 calculates a mean of ligand-receptor expression as a measure of interaction magnitude, along with a permutation-based p-value as a measure of specificity. Here, we use the former to prioritize interactions, subsequent to filtering according to p-value less than 0.05.", - "paper_name": "CellPhoneDB: inferring cell\u2013cell communication from combined expression of multi-subunit ligand\u2013receptor complexes", - "paper_reference": "efremova2020cellphonedb", - "paper_year": 2020, - "code_url": "https://github.com/saezlab/liana", - "image": "openproblems-r-extras", - "is_baseline": false, - "code_version": null, - "task_id": "cell_cell_communication_ligand_target", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "cellphonedb_max", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_cell_cell_communication/_common/methods/liana.py" - }, - { - "method_name": "CellPhoneDB (sum)", - "method_summary": "CellPhoneDBv2 calculates a mean of ligand-receptor expression as a measure of interaction magnitude, along with a permutation-based p-value as a measure of specificity. Here, we use the former to prioritize interactions, subsequent to filtering according to p-value less than 0.05.", - "paper_name": "CellPhoneDB: inferring cell\u2013cell communication from combined expression of multi-subunit ligand\u2013receptor complexes", - "paper_reference": "efremova2020cellphonedb", - "paper_year": 2020, - "code_url": "https://github.com/saezlab/liana", - "image": "openproblems-r-extras", - "is_baseline": false, - "code_version": null, - "task_id": "cell_cell_communication_ligand_target", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "cellphonedb_sum", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_cell_cell_communication/_common/methods/liana.py" - }, - { - "method_name": "Connectome (max)", - "method_summary": "Connectome uses the product of ligand-receptor expression as a measure of magnitude, and the average of the z-transformed expression of ligand and receptor as a measure of specificity.", - "paper_name": "Computation and visualization of cell\u2013cell signaling topologies in single-cell systems data using Connectome", - "paper_reference": "raredon2022computation", - "paper_year": 2022, - "code_url": "https://github.com/saezlab/liana", - "image": "openproblems-r-extras", - "is_baseline": false, - "code_version": null, - "task_id": "cell_cell_communication_ligand_target", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "connectome_max", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_cell_cell_communication/_common/methods/liana.py" - }, - { - "method_name": "Connectome (sum)", - "method_summary": "Connectome uses the product of ligand-receptor expression as a measure of magnitude, and the average of the z-transformed expression of ligand and receptor as a measure of specificity.", - "paper_name": "Computation and visualization of cell\u2013cell signaling topologies in single-cell systems data using Connectome", - "paper_reference": "raredon2022computation", - "paper_year": 2022, - "code_url": "https://github.com/saezlab/liana", - "image": "openproblems-r-extras", - "is_baseline": false, - "code_version": null, - "task_id": "cell_cell_communication_ligand_target", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "connectome_sum", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_cell_cell_communication/_common/methods/liana.py" - }, - { - "method_name": "Log2FC (max)", - "method_summary": "logFC (implemented in LIANA and inspired by iTALK) combines both expression and magnitude, and represents the average of one-versus-the-rest log2-fold change of ligand and receptor expression per cell type.", - "paper_name": "Comparison of methods and resources for cell-cell communication inference from single-cell RNA-Seq data", - "paper_reference": "dimitrov2022comparison", - "paper_year": 2022, - "code_url": "https://github.com/saezlab/liana", - "image": "openproblems-r-extras", - "is_baseline": false, - "code_version": null, - "task_id": "cell_cell_communication_ligand_target", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "logfc_max", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_cell_cell_communication/_common/methods/liana.py" - }, - { - "method_name": "Log2FC (sum)", - "method_summary": "logFC (implemented in LIANA and inspired by iTALK) combines both expression and magnitude, and represents the average of one-versus-the-rest log2-fold change of ligand and receptor expression per cell type.", - "paper_name": "Comparison of methods and resources for cell-cell communication inference from single-cell RNA-Seq data", - "paper_reference": "dimitrov2022comparison", - "paper_year": 2022, - "code_url": "https://github.com/saezlab/liana", - "image": "openproblems-r-extras", - "is_baseline": false, - "code_version": null, - "task_id": "cell_cell_communication_ligand_target", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "logfc_sum", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_cell_cell_communication/_common/methods/liana.py" - }, - { - "method_name": "Magnitude Rank Aggregate (max)", - "method_summary": "RobustRankAggregate generates a consensus rank of all methods implemented in LIANA providing either specificity or magnitude scores.", - "paper_name": "Comparison of methods and resources for cell-cell communication inference from single-cell RNA-Seq data", - "paper_reference": "dimitrov2022comparison", - "paper_year": 2022, - "code_url": "https://github.com/saezlab/liana", - "image": "openproblems-r-extras", - "is_baseline": false, - "code_version": null, - "task_id": "cell_cell_communication_ligand_target", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "magnitude_max", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_cell_cell_communication/_common/methods/liana.py" - }, - { - "method_name": "Magnitude Rank Aggregate (sum)", - "method_summary": "RobustRankAggregate generates a consensus rank of all methods implemented in LIANA providing either specificity or magnitude scores.", - "paper_name": "Comparison of methods and resources for cell-cell communication inference from single-cell RNA-Seq data", - "paper_reference": "dimitrov2022comparison", - "paper_year": 2022, - "code_url": "https://github.com/saezlab/liana", - "image": "openproblems-r-extras", - "is_baseline": false, - "code_version": null, - "task_id": "cell_cell_communication_ligand_target", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "magnitude_sum", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_cell_cell_communication/_common/methods/liana.py" - }, - { - "method_name": "NATMI (max)", - "method_summary": "NATMI uses the product of ligand-receptor expression as a measure of magnitude. As a measure of specificity, NATMI proposes $specificity.edge = \\frac{l}{l_s} \\cdot \\frac{r}{r_s}$; where $l$ and $r$ represent the average expression of ligand and receptor per cell type, and $l_s$ and $r_s$ represent the sums of the average ligand and receptor expression across all cell types. We use its specificity measure, as recommended by the authors for single-context predictions.", - "paper_name": "Predicting cell-to-cell communication networks using NATMI", - "paper_reference": "hou2020predicting", - "paper_year": 2021, - "code_url": "https://github.com/saezlab/liana", - "image": "openproblems-r-extras", - "is_baseline": false, - "code_version": null, - "task_id": "cell_cell_communication_ligand_target", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "natmi_max", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_cell_cell_communication/_common/methods/liana.py" - }, - { - "method_name": "NATMI (sum)", - "method_summary": "NATMI uses the product of ligand-receptor expression as a measure of magnitude. As a measure of specificity, NATMI proposes $specificity.edge = \\frac{l}{l_s} \\cdot \\frac{r}{r_s}$; where $l$ and $r$ represent the average expression of ligand and receptor per cell type, and $l_s$ and $r_s$ represent the sums of the average ligand and receptor expression across all cell types. We use its specificity measure, as recommended by the authors for single-context predictions.", - "paper_name": "Predicting cell-to-cell communication networks using NATMI", - "paper_reference": "hou2020predicting", - "paper_year": 2021, - "code_url": "https://github.com/saezlab/liana", - "image": "openproblems-r-extras", - "is_baseline": false, - "code_version": null, - "task_id": "cell_cell_communication_ligand_target", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "natmi_sum", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_cell_cell_communication/_common/methods/liana.py" - }, - { - "method_name": "Random Events", - "method_summary": "Random generation of cell-cell communication events by random selection of ligand, receptor, source, target, and score", - "paper_name": "Open Problems for Single Cell Analysis", - "paper_reference": "openproblems", - "paper_year": 2022, - "code_url": "https://github.com/openproblems-bio/openproblems", - "image": "openproblems", - "is_baseline": true, - "code_version": null, - "task_id": "cell_cell_communication_ligand_target", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "random_events", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_cell_cell_communication/_common/methods/baseline.py" - }, - { - "method_name": "SingleCellSignalR (max)", - "method_summary": "SingleCellSignalR provides a magnitude score as $LRscore = \\frac{\\sqrt{lr}}{\\mu+\\sqrt{lr}}$; where $l$ and $r$ are the average ligand and receptor expression per cell type, and $\\mu$ is the mean of the expression matrix.", - "paper_name": "SingleCellSignalR: inference of intercellular networks from single-cell transcriptomics", - "paper_reference": "cabello2020singlecellsignalr", - "paper_year": 2021, - "code_url": "https://github.com/saezlab/liana", - "image": "openproblems-r-extras", - "is_baseline": false, - "code_version": null, - "task_id": "cell_cell_communication_ligand_target", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "sca_max", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_cell_cell_communication/_common/methods/liana.py" - }, - { - "method_name": "SingleCellSignalR (sum)", - "method_summary": "SingleCellSignalR provides a magnitude score as $LRscore = \\frac{\\sqrt{lr}}{\\mu+\\sqrt{lr}}$; where $l$ and $r$ are the average ligand and receptor expression per cell type, and $\\mu$ is the mean of the expression matrix.", - "paper_name": "SingleCellSignalR: inference of intercellular networks from single-cell transcriptomics", - "paper_reference": "cabello2020singlecellsignalr", - "paper_year": 2021, - "code_url": "https://github.com/saezlab/liana", - "image": "openproblems-r-extras", - "is_baseline": false, - "code_version": null, - "task_id": "cell_cell_communication_ligand_target", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "sca_sum", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_cell_cell_communication/_common/methods/liana.py" - }, - { - "method_name": "Specificity Rank Aggregate (max)", - "method_summary": "RobustRankAggregate generates a consensus rank of all methods implemented in LIANA providing either specificity or magnitude scores.", - "paper_name": "Comparison of methods and resources for cell-cell communication inference from single-cell RNA-Seq data", - "paper_reference": "dimitrov2022comparison", - "paper_year": 2022, - "code_url": "https://github.com/saezlab/liana", - "image": "openproblems-r-extras", - "is_baseline": false, - "code_version": null, - "task_id": "cell_cell_communication_ligand_target", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "specificity_max", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_cell_cell_communication/_common/methods/liana.py" - }, - { - "method_name": "Specificity Rank Aggregate (sum)", - "method_summary": "RobustRankAggregate generates a consensus rank of all methods implemented in LIANA providing either specificity or magnitude scores.", - "paper_name": "Comparison of methods and resources for cell-cell communication inference from single-cell RNA-Seq data", - "paper_reference": "dimitrov2022comparison", - "paper_year": 2022, - "code_url": "https://github.com/saezlab/liana", - "image": "openproblems-r-extras", - "is_baseline": false, - "code_version": null, - "task_id": "cell_cell_communication_ligand_target", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "specificity_sum", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_cell_cell_communication/_common/methods/liana.py" - }, - { - "method_name": "True Events", - "method_summary": "Perfect prediction of cell-cell communication events from target data", - "paper_name": "Open Problems for Single Cell Analysis", - "paper_reference": "openproblems", - "paper_year": 2022, - "code_url": "https://github.com/openproblems-bio/openproblems", - "image": "openproblems", - "is_baseline": true, - "code_version": null, - "task_id": "cell_cell_communication_ligand_target", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "true_events", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_cell_cell_communication/_common/methods/baseline.py" - } + { + "method_name": "CellPhoneDB (max)", + "method_summary": "CellPhoneDBv2 calculates a mean of ligand-receptor expression as a measure of interaction magnitude, along with a permutation-based p-value as a measure of specificity. Here, we use the former to prioritize interactions, subsequent to filtering according to p-value less than 0.05.", + "paper_name": "CellPhoneDB: inferring cell\u2013cell communication from combined expression of multi-subunit ligand\u2013receptor complexes", + "paper_reference": "efremova2020cellphonedb", + "paper_year": 2020, + "code_url": "https://github.com/saezlab/liana/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-extras", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "cell_cell_communication_ligand_target", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "cellphonedb_max", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_cell_cell_communication/_common/methods/liana.py" + }, + { + "method_name": "CellPhoneDB (sum)", + "method_summary": "CellPhoneDBv2 calculates a mean of ligand-receptor expression as a measure of interaction magnitude, along with a permutation-based p-value as a measure of specificity. Here, we use the former to prioritize interactions, subsequent to filtering according to p-value less than 0.05.", + "paper_name": "CellPhoneDB: inferring cell\u2013cell communication from combined expression of multi-subunit ligand\u2013receptor complexes", + "paper_reference": "efremova2020cellphonedb", + "paper_year": 2020, + "code_url": "https://github.com/saezlab/liana/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-extras", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "cell_cell_communication_ligand_target", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "cellphonedb_sum", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_cell_cell_communication/_common/methods/liana.py" + }, + { + "method_name": "Connectome (max)", + "method_summary": "Connectome uses the product of ligand-receptor expression as a measure of magnitude, and the average of the z-transformed expression of ligand and receptor as a measure of specificity.", + "paper_name": "Computation and visualization of cell\u2013cell signaling topologies in single-cell systems data using Connectome", + "paper_reference": "raredon2022computation", + "paper_year": 2022, + "code_url": "https://github.com/saezlab/liana/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-extras", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "cell_cell_communication_ligand_target", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "connectome_max", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_cell_cell_communication/_common/methods/liana.py" + }, + { + "method_name": "Connectome (sum)", + "method_summary": "Connectome uses the product of ligand-receptor expression as a measure of magnitude, and the average of the z-transformed expression of ligand and receptor as a measure of specificity.", + "paper_name": "Computation and visualization of cell\u2013cell signaling topologies in single-cell systems data using Connectome", + "paper_reference": "raredon2022computation", + "paper_year": 2022, + "code_url": "https://github.com/saezlab/liana/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-extras", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "cell_cell_communication_ligand_target", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "connectome_sum", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_cell_cell_communication/_common/methods/liana.py" + }, + { + "method_name": "Log2FC (max)", + "method_summary": "logFC (implemented in LIANA and inspired by iTALK) combines both expression and magnitude, and represents the average of one-versus-the-rest log2-fold change of ligand and receptor expression per cell type.", + "paper_name": "Comparison of methods and resources for cell-cell communication inference from single-cell RNA-Seq data", + "paper_reference": "dimitrov2022comparison", + "paper_year": 2022, + "code_url": "https://github.com/saezlab/liana/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-extras", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "cell_cell_communication_ligand_target", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "logfc_max", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_cell_cell_communication/_common/methods/liana.py" + }, + { + "method_name": "Log2FC (sum)", + "method_summary": "logFC (implemented in LIANA and inspired by iTALK) combines both expression and magnitude, and represents the average of one-versus-the-rest log2-fold change of ligand and receptor expression per cell type.", + "paper_name": "Comparison of methods and resources for cell-cell communication inference from single-cell RNA-Seq data", + "paper_reference": "dimitrov2022comparison", + "paper_year": 2022, + "code_url": "https://github.com/saezlab/liana/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-extras", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "cell_cell_communication_ligand_target", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "logfc_sum", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_cell_cell_communication/_common/methods/liana.py" + }, + { + "method_name": "Magnitude Rank Aggregate (max)", + "method_summary": "RobustRankAggregate generates a consensus rank of all methods implemented in LIANA providing either specificity or magnitude scores.", + "paper_name": "Comparison of methods and resources for cell-cell communication inference from single-cell RNA-Seq data", + "paper_reference": "dimitrov2022comparison", + "paper_year": 2022, + "code_url": "https://github.com/saezlab/liana/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-extras", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "cell_cell_communication_ligand_target", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "magnitude_max", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_cell_cell_communication/_common/methods/liana.py" + }, + { + "method_name": "Magnitude Rank Aggregate (sum)", + "method_summary": "RobustRankAggregate generates a consensus rank of all methods implemented in LIANA providing either specificity or magnitude scores.", + "paper_name": "Comparison of methods and resources for cell-cell communication inference from single-cell RNA-Seq data", + "paper_reference": "dimitrov2022comparison", + "paper_year": 2022, + "code_url": "https://github.com/saezlab/liana/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-extras", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "cell_cell_communication_ligand_target", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "magnitude_sum", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_cell_cell_communication/_common/methods/liana.py" + }, + { + "method_name": "NATMI (max)", + "method_summary": "NATMI uses the product of ligand-receptor expression as a measure of magnitude. As a measure of specificity, NATMI proposes $specificity.edge = \\frac{l}{l_s} \\cdot \\frac{r}{r_s}$; where $l$ and $r$ represent the average expression of ligand and receptor per cell type, and $l_s$ and $r_s$ represent the sums of the average ligand and receptor expression across all cell types. We use its specificity measure, as recommended by the authors for single-context predictions.", + "paper_name": "Predicting cell-to-cell communication networks using NATMI", + "paper_reference": "hou2020predicting", + "paper_year": 2021, + "code_url": "https://github.com/saezlab/liana/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-extras", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "cell_cell_communication_ligand_target", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "natmi_max", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_cell_cell_communication/_common/methods/liana.py" + }, + { + "method_name": "NATMI (sum)", + "method_summary": "NATMI uses the product of ligand-receptor expression as a measure of magnitude. As a measure of specificity, NATMI proposes $specificity.edge = \\frac{l}{l_s} \\cdot \\frac{r}{r_s}$; where $l$ and $r$ represent the average expression of ligand and receptor per cell type, and $l_s$ and $r_s$ represent the sums of the average ligand and receptor expression across all cell types. We use its specificity measure, as recommended by the authors for single-context predictions.", + "paper_name": "Predicting cell-to-cell communication networks using NATMI", + "paper_reference": "hou2020predicting", + "paper_year": 2021, + "code_url": "https://github.com/saezlab/liana/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-extras", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "cell_cell_communication_ligand_target", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "natmi_sum", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_cell_cell_communication/_common/methods/liana.py" + }, + { + "method_name": "Random Events", + "method_summary": "Random generation of cell-cell communication events by random selection of ligand, receptor, source, target, and score", + "paper_name": "Open Problems for Single Cell Analysis", + "paper_reference": "openproblems", + "paper_year": 2022, + "code_url": "https://github.com/openproblems-bio/openproblems/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems", + "is_baseline": true, + "code_version": "v1.0.0", + "task_id": "cell_cell_communication_ligand_target", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "random_events", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_cell_cell_communication/_common/methods/baseline.py" + }, + { + "method_name": "SingleCellSignalR (max)", + "method_summary": "SingleCellSignalR provides a magnitude score as $LRscore = \\frac{\\sqrt{lr}}{\\mu+\\sqrt{lr}}$; where $l$ and $r$ are the average ligand and receptor expression per cell type, and $\\mu$ is the mean of the expression matrix.", + "paper_name": "SingleCellSignalR: inference of intercellular networks from single-cell transcriptomics", + "paper_reference": "cabello2020singlecellsignalr", + "paper_year": 2021, + "code_url": "https://github.com/saezlab/liana/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-extras", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "cell_cell_communication_ligand_target", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "sca_max", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_cell_cell_communication/_common/methods/liana.py" + }, + { + "method_name": "SingleCellSignalR (sum)", + "method_summary": "SingleCellSignalR provides a magnitude score as $LRscore = \\frac{\\sqrt{lr}}{\\mu+\\sqrt{lr}}$; where $l$ and $r$ are the average ligand and receptor expression per cell type, and $\\mu$ is the mean of the expression matrix.", + "paper_name": "SingleCellSignalR: inference of intercellular networks from single-cell transcriptomics", + "paper_reference": "cabello2020singlecellsignalr", + "paper_year": 2021, + "code_url": "https://github.com/saezlab/liana/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-extras", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "cell_cell_communication_ligand_target", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "sca_sum", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_cell_cell_communication/_common/methods/liana.py" + }, + { + "method_name": "Specificity Rank Aggregate (max)", + "method_summary": "RobustRankAggregate generates a consensus rank of all methods implemented in LIANA providing either specificity or magnitude scores.", + "paper_name": "Comparison of methods and resources for cell-cell communication inference from single-cell RNA-Seq data", + "paper_reference": "dimitrov2022comparison", + "paper_year": 2022, + "code_url": "https://github.com/saezlab/liana/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-extras", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "cell_cell_communication_ligand_target", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "specificity_max", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_cell_cell_communication/_common/methods/liana.py" + }, + { + "method_name": "Specificity Rank Aggregate (sum)", + "method_summary": "RobustRankAggregate generates a consensus rank of all methods implemented in LIANA providing either specificity or magnitude scores.", + "paper_name": "Comparison of methods and resources for cell-cell communication inference from single-cell RNA-Seq data", + "paper_reference": "dimitrov2022comparison", + "paper_year": 2022, + "code_url": "https://github.com/saezlab/liana/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-extras", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "cell_cell_communication_ligand_target", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "specificity_sum", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_cell_cell_communication/_common/methods/liana.py" + }, + { + "method_name": "True Events", + "method_summary": "Perfect prediction of cell-cell communication events from target data", + "paper_name": "Open Problems for Single Cell Analysis", + "paper_reference": "openproblems", + "paper_year": 2022, + "code_url": "https://github.com/openproblems-bio/openproblems/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems", + "is_baseline": true, + "code_version": "v1.0.0", + "task_id": "cell_cell_communication_ligand_target", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "true_events", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_cell_cell_communication/_common/methods/baseline.py" + } ] \ No newline at end of file diff --git a/results/cell_cell_communication_ligand_target/data/metric_info.json b/results/cell_cell_communication_ligand_target/data/metric_info.json index 4cc979fe0..f79fe9562 100644 --- a/results/cell_cell_communication_ligand_target/data/metric_info.json +++ b/results/cell_cell_communication_ligand_target/data/metric_info.json @@ -1,24 +1,26 @@ [ - { - "metric_name": "Precision-recall AUC", - "metric_summary": "Area under the precision-recall curve for the binary classification task predicting interactions.", - "paper_reference": "davis2006prauc", - "maximize": true, - "image": "openproblems", - "task_id": "cell_cell_communication_ligand_target", - "commit_sha": "154ccb9fd99113f3d28d9c3f139194539a0290f9", - "metric_id": "auprc", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_cell_cell_communication/_common/metrics/auprc.py" - }, - { - "metric_name": "Odds Ratio", - "metric_summary": "The odds ratio represents the ratio of true and false positives within a set of prioritized interactions (top ranked hits) versus the same ratio for the remainder of the interactions. Thus, in this scenario odds ratios quantify the strength of association between the ability of methods to prioritize interactions and those interactions assigned to the positive class.", - "paper_reference": "bland2000odds", - "maximize": true, - "image": "openproblems", - "task_id": "cell_cell_communication_ligand_target", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "metric_id": "odds_ratio", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_cell_cell_communication/_common/metrics/odds_ratio.py" - } + { + "metric_name": "Precision-recall AUC", + "metric_summary": "Area under the precision-recall curve for the binary classification task predicting interactions.", + "paper_reference": "davis2006prauc", + "maximize": true, + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems", + "task_id": "cell_cell_communication_ligand_target", + "commit_sha": "154ccb9fd99113f3d28d9c3f139194539a0290f9", + "metric_id": "auprc", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_cell_cell_communication/_common/metrics/auprc.py", + "code_version": "v1.0.0" + }, + { + "metric_name": "Odds Ratio", + "metric_summary": "The odds ratio represents the ratio of true and false positives within a set of prioritized interactions (top ranked hits) versus the same ratio for the remainder of the interactions. Thus, in this scenario odds ratios quantify the strength of association between the ability of methods to prioritize interactions and those interactions assigned to the positive class.", + "paper_reference": "bland2000odds", + "maximize": true, + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems", + "task_id": "cell_cell_communication_ligand_target", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "metric_id": "odds_ratio", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_cell_cell_communication/_common/metrics/odds_ratio.py", + "code_version": "v1.0.0" + } ] \ No newline at end of file diff --git a/results/cell_cell_communication_ligand_target/data/task_info.json b/results/cell_cell_communication_ligand_target/data/task_info.json index 6b5c9111d..d6763183b 100644 --- a/results/cell_cell_communication_ligand_target/data/task_info.json +++ b/results/cell_cell_communication_ligand_target/data/task_info.json @@ -1,8 +1,42 @@ { - "task_id": "cell_cell_communication_ligand_target", - "commit_sha": "c97decf07adb2e3050561d6fa9ae46132be07bef", - "task_name": "Cell-Cell Communication Inference (Ligand-Target)", - "task_summary": "Detect interactions between ligands and target cell types", - "task_description": "\nThe growing availability of single-cell data has sparked an increased\ninterest in the inference of cell-cell communication (CCC),\nwith an ever-growing number of computational tools developed for this purpose.\n\nDifferent tools propose distinct preprocessing steps with diverse\nscoring functions, that are challenging to compare and evaluate.\nFurthermore, each tool typically comes with its own set of prior knowledge.\nTo harmonize these, [Dimitrov et\nal, 2022](https://openproblems.bio/bibliography#dimitrov2022comparison) recently\ndeveloped the [LIANA](https://github.com/saezlab/liana) framework, which was used\nas a foundation for this task.\n\nThe challenges in evaluating the tools are further exacerbated by the\nlack of a gold standard to benchmark the performance of CCC methods. In an\nattempt to address this, Dimitrov et al use alternative data modalities, including\nthe spatial proximity of cell types and\ndownstream cytokine activities, to generate an inferred ground truth. However,\nthese modalities are only approximations of biological reality and come\nwith their own assumptions and limitations. In time, the inclusion of more\ndatasets with known ground truth interactions will become available, from\nwhich the limitations and advantages of the different CCC methods will\nbe better understood.\n\n**This subtask evaluates the methods' ability to predict interactions,\nthe corresponding of cytokines of which, are inferred to be active in\nthe target cell types. This subtask focuses\non the prediction of interactions from steady-state, or single-context,\nsingle-cell data.**\n\n", - "repo": "openproblems-bio/openproblems" -} \ No newline at end of file + "task_id": "cell_cell_communication_ligand_target", + "commit_sha": "c97decf07adb2e3050561d6fa9ae46132be07bef", + "task_name": "Cell-Cell Communication Inference (Ligand-Target)", + "task_summary": "Detect interactions between ligands and target cell types", + "task_description": "\nThe growing availability of single-cell data has sparked an increased\ninterest in the inference of cell-cell communication (CCC),\nwith an ever-growing number of computational tools developed for this purpose.\n\nDifferent tools propose distinct preprocessing steps with diverse\nscoring functions, that are challenging to compare and evaluate.\nFurthermore, each tool typically comes with its own set of prior knowledge.\nTo harmonize these, [Dimitrov et\nal, 2022](https://openproblems.bio/bibliography#dimitrov2022comparison) recently\ndeveloped the [LIANA](https://github.com/saezlab/liana) framework, which was used\nas a foundation for this task.\n\nThe challenges in evaluating the tools are further exacerbated by the\nlack of a gold standard to benchmark the performance of CCC methods. In an\nattempt to address this, Dimitrov et al use alternative data modalities, including\nthe spatial proximity of cell types and\ndownstream cytokine activities, to generate an inferred ground truth. However,\nthese modalities are only approximations of biological reality and come\nwith their own assumptions and limitations. In time, the inclusion of more\ndatasets with known ground truth interactions will become available, from\nwhich the limitations and advantages of the different CCC methods will\nbe better understood.\n\n**This subtask evaluates the methods' ability to predict interactions,\nthe corresponding of cytokines of which, are inferred to be active in\nthe target cell types. This subtask focuses\non the prediction of interactions from steady-state, or single-context,\nsingle-cell data.**\n\n", + "repo": "https://github.com/openproblems-bio/openproblems/tree/v1.0.0/openproblems/tasks/_cell_cell_communication/cell_cell_communication_ligand_target", + "authors": [ + { + "name": "Daniel Dimitrov", + "roles": ["maintainer", "author"], + "info": { + "github": "dbdimitrov" + } + }, + { + "name": "Scott Gigante", + "roles": "contributor", + "info": { + "github": "scottgigante", + "orcid": "0000-0002-4544-2764" + } + }, + { + "name": "Robrecht Cannoodt", + "roles": "contributor", + "info": { + "github": "rcannood", + "orcid": "0000-0003-3641-729X" + } + }, + { + "name": "Vishnuvasan Raghuraman", + "roles": "contributor", + "info": { + "github": "vishnu-vasan" + } + } + ], + "version": "v1.0.0", + "license": "MIT" +} diff --git a/results/cell_cell_communication_ligand_target/index.qmd b/results/cell_cell_communication_ligand_target/index.qmd index 41505330b..0f6922436 100644 --- a/results/cell_cell_communication_ligand_target/index.qmd +++ b/results/cell_cell_communication_ligand_target/index.qmd @@ -7,6 +7,9 @@ css: ../_include/task_template.css engine: knitr fig-cap-location: bottom citation-location: document +bibliography: + - library.bib + - ../../library.bib toc: false --- diff --git a/results/cell_cell_communication_source_target/data/method_info.json b/results/cell_cell_communication_source_target/data/method_info.json index bf1735bcd..635ba4276 100644 --- a/results/cell_cell_communication_source_target/data/method_info.json +++ b/results/cell_cell_communication_source_target/data/method_info.json @@ -1,242 +1,242 @@ [ - { - "method_name": "CellPhoneDB (max)", - "method_summary": "CellPhoneDBv2 calculates a mean of ligand-receptor expression as a measure of interaction magnitude, along with a permutation-based p-value as a measure of specificity. Here, we use the former to prioritize interactions, subsequent to filtering according to p-value less than 0.05.", - "paper_name": "CellPhoneDB: inferring cell\u2013cell communication from combined expression of multi-subunit ligand\u2013receptor complexes", - "paper_reference": "efremova2020cellphonedb", - "paper_year": 2020, - "code_url": "https://github.com/saezlab/liana", - "image": "openproblems-r-extras", - "is_baseline": false, - "code_version": null, - "task_id": "cell_cell_communication_source_target", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "cellphonedb_max", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_cell_cell_communication/_common/methods/liana.py" - }, - { - "method_name": "CellPhoneDB (sum)", - "method_summary": "CellPhoneDBv2 calculates a mean of ligand-receptor expression as a measure of interaction magnitude, along with a permutation-based p-value as a measure of specificity. Here, we use the former to prioritize interactions, subsequent to filtering according to p-value less than 0.05.", - "paper_name": "CellPhoneDB: inferring cell\u2013cell communication from combined expression of multi-subunit ligand\u2013receptor complexes", - "paper_reference": "efremova2020cellphonedb", - "paper_year": 2020, - "code_url": "https://github.com/saezlab/liana", - "image": "openproblems-r-extras", - "is_baseline": false, - "code_version": null, - "task_id": "cell_cell_communication_source_target", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "cellphonedb_sum", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_cell_cell_communication/_common/methods/liana.py" - }, - { - "method_name": "Connectome (max)", - "method_summary": "Connectome uses the product of ligand-receptor expression as a measure of magnitude, and the average of the z-transformed expression of ligand and receptor as a measure of specificity.", - "paper_name": "Computation and visualization of cell\u2013cell signaling topologies in single-cell systems data using Connectome", - "paper_reference": "raredon2022computation", - "paper_year": 2022, - "code_url": "https://github.com/saezlab/liana", - "image": "openproblems-r-extras", - "is_baseline": false, - "code_version": null, - "task_id": "cell_cell_communication_source_target", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "connectome_max", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_cell_cell_communication/_common/methods/liana.py" - }, - { - "method_name": "Connectome (sum)", - "method_summary": "Connectome uses the product of ligand-receptor expression as a measure of magnitude, and the average of the z-transformed expression of ligand and receptor as a measure of specificity.", - "paper_name": "Computation and visualization of cell\u2013cell signaling topologies in single-cell systems data using Connectome", - "paper_reference": "raredon2022computation", - "paper_year": 2022, - "code_url": "https://github.com/saezlab/liana", - "image": "openproblems-r-extras", - "is_baseline": false, - "code_version": null, - "task_id": "cell_cell_communication_source_target", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "connectome_sum", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_cell_cell_communication/_common/methods/liana.py" - }, - { - "method_name": "Log2FC (max)", - "method_summary": "logFC (implemented in LIANA and inspired by iTALK) combines both expression and magnitude, and represents the average of one-versus-the-rest log2-fold change of ligand and receptor expression per cell type.", - "paper_name": "Comparison of methods and resources for cell-cell communication inference from single-cell RNA-Seq data", - "paper_reference": "dimitrov2022comparison", - "paper_year": 2022, - "code_url": "https://github.com/saezlab/liana", - "image": "openproblems-r-extras", - "is_baseline": false, - "code_version": null, - "task_id": "cell_cell_communication_source_target", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "logfc_max", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_cell_cell_communication/_common/methods/liana.py" - }, - { - "method_name": "Log2FC (sum)", - "method_summary": "logFC (implemented in LIANA and inspired by iTALK) combines both expression and magnitude, and represents the average of one-versus-the-rest log2-fold change of ligand and receptor expression per cell type.", - "paper_name": "Comparison of methods and resources for cell-cell communication inference from single-cell RNA-Seq data", - "paper_reference": "dimitrov2022comparison", - "paper_year": 2022, - "code_url": "https://github.com/saezlab/liana", - "image": "openproblems-r-extras", - "is_baseline": false, - "code_version": null, - "task_id": "cell_cell_communication_source_target", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "logfc_sum", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_cell_cell_communication/_common/methods/liana.py" - }, - { - "method_name": "Magnitude Rank Aggregate (max)", - "method_summary": "RobustRankAggregate generates a consensus rank of all methods implemented in LIANA providing either specificity or magnitude scores.", - "paper_name": "Comparison of methods and resources for cell-cell communication inference from single-cell RNA-Seq data", - "paper_reference": "dimitrov2022comparison", - "paper_year": 2022, - "code_url": "https://github.com/saezlab/liana", - "image": "openproblems-r-extras", - "is_baseline": false, - "code_version": null, - "task_id": "cell_cell_communication_source_target", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "magnitude_max", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_cell_cell_communication/_common/methods/liana.py" - }, - { - "method_name": "Magnitude Rank Aggregate (sum)", - "method_summary": "RobustRankAggregate generates a consensus rank of all methods implemented in LIANA providing either specificity or magnitude scores.", - "paper_name": "Comparison of methods and resources for cell-cell communication inference from single-cell RNA-Seq data", - "paper_reference": "dimitrov2022comparison", - "paper_year": 2022, - "code_url": "https://github.com/saezlab/liana", - "image": "openproblems-r-extras", - "is_baseline": false, - "code_version": null, - "task_id": "cell_cell_communication_source_target", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "magnitude_sum", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_cell_cell_communication/_common/methods/liana.py" - }, - { - "method_name": "NATMI (max)", - "method_summary": "NATMI uses the product of ligand-receptor expression as a measure of magnitude. As a measure of specificity, NATMI proposes $specificity.edge = \\frac{l}{l_s} \\cdot \\frac{r}{r_s}$; where $l$ and $r$ represent the average expression of ligand and receptor per cell type, and $l_s$ and $r_s$ represent the sums of the average ligand and receptor expression across all cell types. We use its specificity measure, as recommended by the authors for single-context predictions.", - "paper_name": "Predicting cell-to-cell communication networks using NATMI", - "paper_reference": "hou2020predicting", - "paper_year": 2021, - "code_url": "https://github.com/saezlab/liana", - "image": "openproblems-r-extras", - "is_baseline": false, - "code_version": null, - "task_id": "cell_cell_communication_source_target", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "natmi_max", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_cell_cell_communication/_common/methods/liana.py" - }, - { - "method_name": "NATMI (sum)", - "method_summary": "NATMI uses the product of ligand-receptor expression as a measure of magnitude. As a measure of specificity, NATMI proposes $specificity.edge = \\frac{l}{l_s} \\cdot \\frac{r}{r_s}$; where $l$ and $r$ represent the average expression of ligand and receptor per cell type, and $l_s$ and $r_s$ represent the sums of the average ligand and receptor expression across all cell types. We use its specificity measure, as recommended by the authors for single-context predictions.", - "paper_name": "Predicting cell-to-cell communication networks using NATMI", - "paper_reference": "hou2020predicting", - "paper_year": 2021, - "code_url": "https://github.com/saezlab/liana", - "image": "openproblems-r-extras", - "is_baseline": false, - "code_version": null, - "task_id": "cell_cell_communication_source_target", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "natmi_sum", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_cell_cell_communication/_common/methods/liana.py" - }, - { - "method_name": "Random Events", - "method_summary": "Random generation of cell-cell communication events by random selection of ligand, receptor, source, target, and score", - "paper_name": "Open Problems for Single Cell Analysis", - "paper_reference": "openproblems", - "paper_year": 2022, - "code_url": "https://github.com/openproblems-bio/openproblems", - "image": "openproblems", - "is_baseline": true, - "code_version": null, - "task_id": "cell_cell_communication_source_target", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "random_events", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_cell_cell_communication/_common/methods/baseline.py" - }, - { - "method_name": "SingleCellSignalR (max)", - "method_summary": "SingleCellSignalR provides a magnitude score as $LRscore = \\frac{\\sqrt{lr}}{\\mu+\\sqrt{lr}}$; where $l$ and $r$ are the average ligand and receptor expression per cell type, and $\\mu$ is the mean of the expression matrix.", - "paper_name": "SingleCellSignalR: inference of intercellular networks from single-cell transcriptomics", - "paper_reference": "cabello2020singlecellsignalr", - "paper_year": 2021, - "code_url": "https://github.com/saezlab/liana", - "image": "openproblems-r-extras", - "is_baseline": false, - "code_version": null, - "task_id": "cell_cell_communication_source_target", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "sca_max", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_cell_cell_communication/_common/methods/liana.py" - }, - { - "method_name": "SingleCellSignalR (sum)", - "method_summary": "SingleCellSignalR provides a magnitude score as $LRscore = \\frac{\\sqrt{lr}}{\\mu+\\sqrt{lr}}$; where $l$ and $r$ are the average ligand and receptor expression per cell type, and $\\mu$ is the mean of the expression matrix.", - "paper_name": "SingleCellSignalR: inference of intercellular networks from single-cell transcriptomics", - "paper_reference": "cabello2020singlecellsignalr", - "paper_year": 2021, - "code_url": "https://github.com/saezlab/liana", - "image": "openproblems-r-extras", - "is_baseline": false, - "code_version": null, - "task_id": "cell_cell_communication_source_target", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "sca_sum", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_cell_cell_communication/_common/methods/liana.py" - }, - { - "method_name": "Specificity Rank Aggregate (max)", - "method_summary": "RobustRankAggregate generates a consensus rank of all methods implemented in LIANA providing either specificity or magnitude scores.", - "paper_name": "Comparison of methods and resources for cell-cell communication inference from single-cell RNA-Seq data", - "paper_reference": "dimitrov2022comparison", - "paper_year": 2022, - "code_url": "https://github.com/saezlab/liana", - "image": "openproblems-r-extras", - "is_baseline": false, - "code_version": null, - "task_id": "cell_cell_communication_source_target", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "specificity_max", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_cell_cell_communication/_common/methods/liana.py" - }, - { - "method_name": "Specificity Rank Aggregate (sum)", - "method_summary": "RobustRankAggregate generates a consensus rank of all methods implemented in LIANA providing either specificity or magnitude scores.", - "paper_name": "Comparison of methods and resources for cell-cell communication inference from single-cell RNA-Seq data", - "paper_reference": "dimitrov2022comparison", - "paper_year": 2022, - "code_url": "https://github.com/saezlab/liana", - "image": "openproblems-r-extras", - "is_baseline": false, - "code_version": null, - "task_id": "cell_cell_communication_source_target", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "specificity_sum", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_cell_cell_communication/_common/methods/liana.py" - }, - { - "method_name": "True Events", - "method_summary": "Perfect prediction of cell-cell communication events from target data", - "paper_name": "Open Problems for Single Cell Analysis", - "paper_reference": "openproblems", - "paper_year": 2022, - "code_url": "https://github.com/openproblems-bio/openproblems", - "image": "openproblems", - "is_baseline": true, - "code_version": null, - "task_id": "cell_cell_communication_source_target", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "true_events", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_cell_cell_communication/_common/methods/baseline.py" - } + { + "method_name": "CellPhoneDB (max)", + "method_summary": "CellPhoneDBv2 calculates a mean of ligand-receptor expression as a measure of interaction magnitude, along with a permutation-based p-value as a measure of specificity. Here, we use the former to prioritize interactions, subsequent to filtering according to p-value less than 0.05.", + "paper_name": "CellPhoneDB: inferring cell\u2013cell communication from combined expression of multi-subunit ligand\u2013receptor complexes", + "paper_reference": "efremova2020cellphonedb", + "paper_year": 2020, + "code_url": "https://github.com/saezlab/liana/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-extras", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "cell_cell_communication_source_target", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "cellphonedb_max", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_cell_cell_communication/_common/methods/liana.py" + }, + { + "method_name": "CellPhoneDB (sum)", + "method_summary": "CellPhoneDBv2 calculates a mean of ligand-receptor expression as a measure of interaction magnitude, along with a permutation-based p-value as a measure of specificity. Here, we use the former to prioritize interactions, subsequent to filtering according to p-value less than 0.05.", + "paper_name": "CellPhoneDB: inferring cell\u2013cell communication from combined expression of multi-subunit ligand\u2013receptor complexes", + "paper_reference": "efremova2020cellphonedb", + "paper_year": 2020, + "code_url": "https://github.com/saezlab/liana/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-extras", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "cell_cell_communication_source_target", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "cellphonedb_sum", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_cell_cell_communication/_common/methods/liana.py" + }, + { + "method_name": "Connectome (max)", + "method_summary": "Connectome uses the product of ligand-receptor expression as a measure of magnitude, and the average of the z-transformed expression of ligand and receptor as a measure of specificity.", + "paper_name": "Computation and visualization of cell\u2013cell signaling topologies in single-cell systems data using Connectome", + "paper_reference": "raredon2022computation", + "paper_year": 2022, + "code_url": "https://github.com/saezlab/liana/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-extras", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "cell_cell_communication_source_target", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "connectome_max", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_cell_cell_communication/_common/methods/liana.py" + }, + { + "method_name": "Connectome (sum)", + "method_summary": "Connectome uses the product of ligand-receptor expression as a measure of magnitude, and the average of the z-transformed expression of ligand and receptor as a measure of specificity.", + "paper_name": "Computation and visualization of cell\u2013cell signaling topologies in single-cell systems data using Connectome", + "paper_reference": "raredon2022computation", + "paper_year": 2022, + "code_url": "https://github.com/saezlab/liana/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-extras", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "cell_cell_communication_source_target", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "connectome_sum", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_cell_cell_communication/_common/methods/liana.py" + }, + { + "method_name": "Log2FC (max)", + "method_summary": "logFC (implemented in LIANA and inspired by iTALK) combines both expression and magnitude, and represents the average of one-versus-the-rest log2-fold change of ligand and receptor expression per cell type.", + "paper_name": "Comparison of methods and resources for cell-cell communication inference from single-cell RNA-Seq data", + "paper_reference": "dimitrov2022comparison", + "paper_year": 2022, + "code_url": "https://github.com/saezlab/liana/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-extras", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "cell_cell_communication_source_target", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "logfc_max", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_cell_cell_communication/_common/methods/liana.py" + }, + { + "method_name": "Log2FC (sum)", + "method_summary": "logFC (implemented in LIANA and inspired by iTALK) combines both expression and magnitude, and represents the average of one-versus-the-rest log2-fold change of ligand and receptor expression per cell type.", + "paper_name": "Comparison of methods and resources for cell-cell communication inference from single-cell RNA-Seq data", + "paper_reference": "dimitrov2022comparison", + "paper_year": 2022, + "code_url": "https://github.com/saezlab/liana/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-extras", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "cell_cell_communication_source_target", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "logfc_sum", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_cell_cell_communication/_common/methods/liana.py" + }, + { + "method_name": "Magnitude Rank Aggregate (max)", + "method_summary": "RobustRankAggregate generates a consensus rank of all methods implemented in LIANA providing either specificity or magnitude scores.", + "paper_name": "Comparison of methods and resources for cell-cell communication inference from single-cell RNA-Seq data", + "paper_reference": "dimitrov2022comparison", + "paper_year": 2022, + "code_url": "https://github.com/saezlab/liana/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-extras", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "cell_cell_communication_source_target", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "magnitude_max", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_cell_cell_communication/_common/methods/liana.py" + }, + { + "method_name": "Magnitude Rank Aggregate (sum)", + "method_summary": "RobustRankAggregate generates a consensus rank of all methods implemented in LIANA providing either specificity or magnitude scores.", + "paper_name": "Comparison of methods and resources for cell-cell communication inference from single-cell RNA-Seq data", + "paper_reference": "dimitrov2022comparison", + "paper_year": 2022, + "code_url": "https://github.com/saezlab/liana/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-extras", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "cell_cell_communication_source_target", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "magnitude_sum", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_cell_cell_communication/_common/methods/liana.py" + }, + { + "method_name": "NATMI (max)", + "method_summary": "NATMI uses the product of ligand-receptor expression as a measure of magnitude. As a measure of specificity, NATMI proposes $specificity.edge = \\frac{l}{l_s} \\cdot \\frac{r}{r_s}$; where $l$ and $r$ represent the average expression of ligand and receptor per cell type, and $l_s$ and $r_s$ represent the sums of the average ligand and receptor expression across all cell types. We use its specificity measure, as recommended by the authors for single-context predictions.", + "paper_name": "Predicting cell-to-cell communication networks using NATMI", + "paper_reference": "hou2020predicting", + "paper_year": 2021, + "code_url": "https://github.com/saezlab/liana/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-extras", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "cell_cell_communication_source_target", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "natmi_max", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_cell_cell_communication/_common/methods/liana.py" + }, + { + "method_name": "NATMI (sum)", + "method_summary": "NATMI uses the product of ligand-receptor expression as a measure of magnitude. As a measure of specificity, NATMI proposes $specificity.edge = \\frac{l}{l_s} \\cdot \\frac{r}{r_s}$; where $l$ and $r$ represent the average expression of ligand and receptor per cell type, and $l_s$ and $r_s$ represent the sums of the average ligand and receptor expression across all cell types. We use its specificity measure, as recommended by the authors for single-context predictions.", + "paper_name": "Predicting cell-to-cell communication networks using NATMI", + "paper_reference": "hou2020predicting", + "paper_year": 2021, + "code_url": "https://github.com/saezlab/liana/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-extras", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "cell_cell_communication_source_target", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "natmi_sum", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_cell_cell_communication/_common/methods/liana.py" + }, + { + "method_name": "Random Events", + "method_summary": "Random generation of cell-cell communication events by random selection of ligand, receptor, source, target, and score", + "paper_name": "Open Problems for Single Cell Analysis", + "paper_reference": "openproblems", + "paper_year": 2022, + "code_url": "https://github.com/openproblems-bio/openproblems/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems", + "is_baseline": true, + "code_version": "v1.0.0", + "task_id": "cell_cell_communication_source_target", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "random_events", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_cell_cell_communication/_common/methods/baseline.py" + }, + { + "method_name": "SingleCellSignalR (max)", + "method_summary": "SingleCellSignalR provides a magnitude score as $LRscore = \\frac{\\sqrt{lr}}{\\mu+\\sqrt{lr}}$; where $l$ and $r$ are the average ligand and receptor expression per cell type, and $\\mu$ is the mean of the expression matrix.", + "paper_name": "SingleCellSignalR: inference of intercellular networks from single-cell transcriptomics", + "paper_reference": "cabello2020singlecellsignalr", + "paper_year": 2021, + "code_url": "https://github.com/saezlab/liana/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-extras", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "cell_cell_communication_source_target", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "sca_max", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_cell_cell_communication/_common/methods/liana.py" + }, + { + "method_name": "SingleCellSignalR (sum)", + "method_summary": "SingleCellSignalR provides a magnitude score as $LRscore = \\frac{\\sqrt{lr}}{\\mu+\\sqrt{lr}}$; where $l$ and $r$ are the average ligand and receptor expression per cell type, and $\\mu$ is the mean of the expression matrix.", + "paper_name": "SingleCellSignalR: inference of intercellular networks from single-cell transcriptomics", + "paper_reference": "cabello2020singlecellsignalr", + "paper_year": 2021, + "code_url": "https://github.com/saezlab/liana/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-extras", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "cell_cell_communication_source_target", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "sca_sum", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_cell_cell_communication/_common/methods/liana.py" + }, + { + "method_name": "Specificity Rank Aggregate (max)", + "method_summary": "RobustRankAggregate generates a consensus rank of all methods implemented in LIANA providing either specificity or magnitude scores.", + "paper_name": "Comparison of methods and resources for cell-cell communication inference from single-cell RNA-Seq data", + "paper_reference": "dimitrov2022comparison", + "paper_year": 2022, + "code_url": "https://github.com/saezlab/liana/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-extras", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "cell_cell_communication_source_target", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "specificity_max", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_cell_cell_communication/_common/methods/liana.py" + }, + { + "method_name": "Specificity Rank Aggregate (sum)", + "method_summary": "RobustRankAggregate generates a consensus rank of all methods implemented in LIANA providing either specificity or magnitude scores.", + "paper_name": "Comparison of methods and resources for cell-cell communication inference from single-cell RNA-Seq data", + "paper_reference": "dimitrov2022comparison", + "paper_year": 2022, + "code_url": "https://github.com/saezlab/liana/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-extras", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "cell_cell_communication_source_target", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "specificity_sum", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_cell_cell_communication/_common/methods/liana.py" + }, + { + "method_name": "True Events", + "method_summary": "Perfect prediction of cell-cell communication events from target data", + "paper_name": "Open Problems for Single Cell Analysis", + "paper_reference": "openproblems", + "paper_year": 2022, + "code_url": "https://github.com/openproblems-bio/openproblems/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems", + "is_baseline": true, + "code_version": "v1.0.0", + "task_id": "cell_cell_communication_source_target", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "true_events", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_cell_cell_communication/_common/methods/baseline.py" + } ] \ No newline at end of file diff --git a/results/cell_cell_communication_source_target/data/metric_info.json b/results/cell_cell_communication_source_target/data/metric_info.json index 6cd204e96..652a72f59 100644 --- a/results/cell_cell_communication_source_target/data/metric_info.json +++ b/results/cell_cell_communication_source_target/data/metric_info.json @@ -1,24 +1,26 @@ [ - { - "metric_name": "Precision-recall AUC", - "metric_summary": "Area under the precision-recall curve for the binary classification task predicting interactions.", - "paper_reference": "davis2006prauc", - "maximize": true, - "image": "openproblems", - "task_id": "cell_cell_communication_source_target", - "commit_sha": "154ccb9fd99113f3d28d9c3f139194539a0290f9", - "metric_id": "auprc", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_cell_cell_communication/_common/metrics/auprc.py" - }, - { - "metric_name": "Odds Ratio", - "metric_summary": "The odds ratio represents the ratio of true and false positives within a set of prioritized interactions (top ranked hits) versus the same ratio for the remainder of the interactions. Thus, in this scenario odds ratios quantify the strength of association between the ability of methods to prioritize interactions and those interactions assigned to the positive class.", - "paper_reference": "bland2000odds", - "maximize": true, - "image": "openproblems", - "task_id": "cell_cell_communication_source_target", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "metric_id": "odds_ratio", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/_cell_cell_communication/_common/metrics/odds_ratio.py" - } + { + "metric_name": "Precision-recall AUC", + "metric_summary": "Area under the precision-recall curve for the binary classification task predicting interactions.", + "paper_reference": "davis2006prauc", + "maximize": true, + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems", + "task_id": "cell_cell_communication_source_target", + "commit_sha": "154ccb9fd99113f3d28d9c3f139194539a0290f9", + "metric_id": "auprc", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_cell_cell_communication/_common/metrics/auprc.py", + "code_version": "v1.0.0" + }, + { + "metric_name": "Odds Ratio", + "metric_summary": "The odds ratio represents the ratio of true and false positives within a set of prioritized interactions (top ranked hits) versus the same ratio for the remainder of the interactions. Thus, in this scenario odds ratios quantify the strength of association between the ability of methods to prioritize interactions and those interactions assigned to the positive class.", + "paper_reference": "bland2000odds", + "maximize": true, + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems", + "task_id": "cell_cell_communication_source_target", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "metric_id": "odds_ratio", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/_cell_cell_communication/_common/metrics/odds_ratio.py", + "code_version": "v1.0.0" + } ] \ No newline at end of file diff --git a/results/cell_cell_communication_source_target/data/task_info.json b/results/cell_cell_communication_source_target/data/task_info.json index d8bb1ab5f..debfa6154 100644 --- a/results/cell_cell_communication_source_target/data/task_info.json +++ b/results/cell_cell_communication_source_target/data/task_info.json @@ -1,8 +1,42 @@ { - "task_id": "cell_cell_communication_source_target", - "commit_sha": "c97decf07adb2e3050561d6fa9ae46132be07bef", - "task_name": "Cell-Cell Communication Inference (Source-Target)", - "task_summary": "Detect interactions between source and target cell types", - "task_description": "\nThe growing availability of single-cell data has sparked an increased\ninterest in the inference of cell-cell communication (CCC),\nwith an ever-growing number of computational tools developed for this purpose.\n\nDifferent tools propose distinct preprocessing steps with diverse\nscoring functions, that are challenging to compare and evaluate.\nFurthermore, each tool typically comes with its own set of prior knowledge.\nTo harmonize these, [Dimitrov et\nal, 2022](https://openproblems.bio/bibliography#dimitrov2022comparison) recently\ndeveloped the [LIANA](https://github.com/saezlab/liana) framework, which was used\nas a foundation for this task.\n\nThe challenges in evaluating the tools are further exacerbated by the\nlack of a gold standard to benchmark the performance of CCC methods. In an\nattempt to address this, Dimitrov et al use alternative data modalities, including\nthe spatial proximity of cell types and\ndownstream cytokine activities, to generate an inferred ground truth. However,\nthese modalities are only approximations of biological reality and come\nwith their own assumptions and limitations. In time, the inclusion of more\ndatasets with known ground truth interactions will become available, from\nwhich the limitations and advantages of the different CCC methods will\nbe better understood.\n\n**This subtask evaluates methods in their ability to predict interactions between\nspatially-adjacent source cell types and target cell types. This subtask focuses\non the prediction of interactions from steady-state, or single-context,\nsingle-cell data.**\n\n", - "repo": "openproblems-bio/openproblems" -} \ No newline at end of file + "task_id": "cell_cell_communication_source_target", + "commit_sha": "c97decf07adb2e3050561d6fa9ae46132be07bef", + "task_name": "Cell-Cell Communication Inference (Source-Target)", + "task_summary": "Detect interactions between source and target cell types", + "task_description": "\nThe growing availability of single-cell data has sparked an increased\ninterest in the inference of cell-cell communication (CCC),\nwith an ever-growing number of computational tools developed for this purpose.\n\nDifferent tools propose distinct preprocessing steps with diverse\nscoring functions, that are challenging to compare and evaluate.\nFurthermore, each tool typically comes with its own set of prior knowledge.\nTo harmonize these, [Dimitrov et\nal, 2022](https://openproblems.bio/bibliography#dimitrov2022comparison) recently\ndeveloped the [LIANA](https://github.com/saezlab/liana) framework, which was used\nas a foundation for this task.\n\nThe challenges in evaluating the tools are further exacerbated by the\nlack of a gold standard to benchmark the performance of CCC methods. In an\nattempt to address this, Dimitrov et al use alternative data modalities, including\nthe spatial proximity of cell types and\ndownstream cytokine activities, to generate an inferred ground truth. However,\nthese modalities are only approximations of biological reality and come\nwith their own assumptions and limitations. In time, the inclusion of more\ndatasets with known ground truth interactions will become available, from\nwhich the limitations and advantages of the different CCC methods will\nbe better understood.\n\n**This subtask evaluates methods in their ability to predict interactions between\nspatially-adjacent source cell types and target cell types. This subtask focuses\non the prediction of interactions from steady-state, or single-context,\nsingle-cell data.**\n\n", + "repo": "https://github.com/openproblems-bio/openproblems/tree/v1.0.0/openproblems/tasks/_cell_cell_communication/cell_cell_communication_source_target", + "authors": [ + { + "name": "Daniel Dimitrov", + "roles": ["maintainer", "author"], + "info": { + "github": "dbdimitrov" + } + }, + { + "name": "Scott Gigante", + "roles": "contributor", + "info": { + "github": "scottgigante", + "orcid": "0000-0002-4544-2764" + } + }, + { + "name": "Robrecht Cannoodt", + "roles": "contributor", + "info": { + "github": "rcannood", + "orcid": "0000-0003-3641-729X" + } + }, + { + "name": "Vishnuvasan Raghuraman", + "roles": "contributor", + "info": { + "github": "vishnu-vasan" + } + } + ], + "version": "v1.0.0", + "license": "MIT" +} diff --git a/results/cell_cell_communication_source_target/index.qmd b/results/cell_cell_communication_source_target/index.qmd index 4e2cf84b1..2a6f14971 100644 --- a/results/cell_cell_communication_source_target/index.qmd +++ b/results/cell_cell_communication_source_target/index.qmd @@ -7,6 +7,9 @@ css: ../_include/task_template.css engine: knitr fig-cap-location: bottom citation-location: document +bibliography: + - library.bib + - ../../library.bib toc: false --- diff --git a/results/denoising/data/method_info.json b/results/denoising/data/method_info.json index c6c5a5f84..c59611345 100644 --- a/results/denoising/data/method_info.json +++ b/results/denoising/data/method_info.json @@ -1,197 +1,197 @@ [ - { - "method_name": "ALRA (log norm)", - "method_summary": "ALRA (Adaptively-thresholded Low Rank Approximation) is a method for imputation of missing values in single cell RNA-sequencing data. Given a normalised scRNA-seq expression matrix, it first imputes values using rank-k approximation, using singular value decomposition. Next, a symmetric distribution is fitted to the near-zero imputed values for each gene (row) of the matrix. The right \u201ctail\u201d of this distribution is then used to threshold the accepted nonzero entries. This same threshold is then used to rescale the matrix, once the \u201cbiological zeros\u201d have been removed.", - "paper_name": "Zero-preserving imputation of scRNA-seq data using low-rank approximation", - "paper_reference": "linderman2018zero", - "paper_year": 2018, - "code_url": "https://github.com/KlugerLab/ALRA", - "image": "openproblems-r-extras", - "is_baseline": false, - "code_version": null, - "task_id": "denoising", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "alra_log", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/denoising/methods/alra.py" - }, - { - "method_name": "ALRA (log norm, reversed normalization)", - "method_summary": "ALRA (Adaptively-thresholded Low Rank Approximation) is a method for imputation of missing values in single cell RNA-sequencing data. Given a normalised scRNA-seq expression matrix, it first imputes values using rank-k approximation, using singular value decomposition. Next, a symmetric distribution is fitted to the near-zero imputed values for each gene (row) of the matrix. The right \u201ctail\u201d of this distribution is then used to threshold the accepted nonzero entries. This same threshold is then used to rescale the matrix, once the \u201cbiological zeros\u201d have been removed.", - "paper_name": "Zero-preserving imputation of scRNA-seq data using low-rank approximation", - "paper_reference": "linderman2018zero", - "paper_year": 2018, - "code_url": "https://github.com/KlugerLab/ALRA", - "image": "openproblems-r-extras", - "is_baseline": false, - "code_version": null, - "task_id": "denoising", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "alra_log_reversenorm", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/denoising/methods/alra.py" - }, - { - "method_name": "ALRA (sqrt norm)", - "method_summary": "ALRA (Adaptively-thresholded Low Rank Approximation) is a method for imputation of missing values in single cell RNA-sequencing data. Given a normalised scRNA-seq expression matrix, it first imputes values using rank-k approximation, using singular value decomposition. Next, a symmetric distribution is fitted to the near-zero imputed values for each gene (row) of the matrix. The right \u201ctail\u201d of this distribution is then used to threshold the accepted nonzero entries. This same threshold is then used to rescale the matrix, once the \u201cbiological zeros\u201d have been removed.", - "paper_name": "Zero-preserving imputation of scRNA-seq data using low-rank approximation", - "paper_reference": "linderman2018zero", - "paper_year": 2018, - "code_url": "https://github.com/KlugerLab/ALRA", - "image": "openproblems-r-extras", - "is_baseline": false, - "code_version": null, - "task_id": "denoising", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "alra_sqrt", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/denoising/methods/alra.py" - }, - { - "method_name": "ALRA (sqrt norm, reversed normalization)", - "method_summary": "ALRA (Adaptively-thresholded Low Rank Approximation) is a method for imputation of missing values in single cell RNA-sequencing data. Given a normalised scRNA-seq expression matrix, it first imputes values using rank-k approximation, using singular value decomposition. Next, a symmetric distribution is fitted to the near-zero imputed values for each gene (row) of the matrix. The right \u201ctail\u201d of this distribution is then used to threshold the accepted nonzero entries. This same threshold is then used to rescale the matrix, once the \u201cbiological zeros\u201d have been removed.", - "paper_name": "Zero-preserving imputation of scRNA-seq data using low-rank approximation", - "paper_reference": "linderman2018zero", - "paper_year": 2018, - "code_url": "https://github.com/KlugerLab/ALRA", - "image": "openproblems-r-extras", - "is_baseline": false, - "code_version": null, - "task_id": "denoising", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "alra_sqrt_reversenorm", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/denoising/methods/alra.py" - }, - { - "method_name": "DCA", - "method_summary": "DCA (Deep Count Autoencoder) is a method to remove the effect of dropout in scRNA-seq data. DCA takes into account the count structure, overdispersed nature and sparsity of scRNA-seq datatypes using a deep autoencoder with a zero-inflated negative binomial (ZINB) loss. The autoencoder is then applied to the dataset, where the mean of the fitted negative binomial distributions is used to fill each entry of the imputed matrix.", - "paper_name": "Single-cell RNA-seq denoising using a deep count autoencoder", - "paper_reference": "eraslan2019single", - "paper_year": 2019, - "code_url": "https://github.com/theislab/dca", - "image": "openproblems-python-tensorflow", - "is_baseline": false, - "code_version": null, - "task_id": "denoising", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "dca", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/denoising/methods/dca.py" - }, - { - "method_name": "KNN smoothing", - "method_summary": "KNN-smoothing is a method for denoising data based on the k-nearest neighbours. Given a normalised scRNA-seq matrix, KNN-smoothing calculates a k-nearest neighbour matrix using Euclidean distances between cell pairs. Each cell\u2019s denoised expression is then defined as the average expression of each of its neighbours.", - "paper_name": "Open Problems for Single Cell Analysis", - "paper_reference": "openproblems", - "paper_year": 2022, - "code_url": "https://github.com/openproblems-bio/openproblems", - "image": "openproblems-python-extras", - "is_baseline": false, - "code_version": null, - "task_id": "denoising", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "knn_naive", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/denoising/methods/magic.py" - }, - { - "method_name": "Iterative KNN smoothing", - "method_summary": "Iterative kNN-smoothing is a method to repair or denoise noisy scRNA-seq expression matrices. Given a scRNA-seq expression matrix, KNN-smoothing first applies initial normalisation and smoothing. Then, a chosen number of principal components is used to calculate Euclidean distances between cells. Minimally sized neighbourhoods are initially determined from these Euclidean distances, and expression profiles are shared between neighbouring cells. Then, the resultant smoothed matrix is used as input to the next step of smoothing, where the size (k) of the considered neighbourhoods is increased, leading to greater smoothing. This process continues until a chosen maximum k value has been reached, at which point the iteratively smoothed object is then optionally scaled to yield a final result.", - "paper_name": "K-nearest neighbor smoothing for high-throughput single-cell RNA-Seq data", - "paper_reference": "wagner2018knearest", - "paper_year": 2018, - "code_url": "https://github.com/yanailab/knn-smoothing", - "image": "openproblems-python-extras", - "is_baseline": false, - "code_version": null, - "task_id": "denoising", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "knn_smoothing", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/denoising/methods/knn_smoothing.py" - }, - { - "method_name": "MAGIC", - "method_summary": "MAGIC (Markov Affinity-based Graph Imputation of Cells) is a method for imputation and denoising of noisy or dropout-prone single cell RNA-sequencing data. Given a normalised scRNA-seq expression matrix, it first calculates Euclidean distances between each pair of cells in the dataset, which is then augmented using a Gaussian kernel (function) and row-normalised to give a normalised affinity matrix. A t-step markov process is then calculated, by powering this affinity matrix t times. Finally, the powered affinity matrix is right-multiplied by the normalised data, causing the final imputed values to take the value of a per-gene average weighted by the affinities of cells. The resultant imputed matrix is then rescaled, to more closely match the magnitude of measurements in the normalised (input) matrix.", - "paper_name": "Recovering Gene Interactions from Single-Cell Data Using Data Diffusion", - "paper_reference": "van2018recovering", - "paper_year": 2018, - "code_url": "https://github.com/KrishnaswamyLab/MAGIC", - "image": "openproblems-python-extras", - "is_baseline": false, - "code_version": null, - "task_id": "denoising", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "magic", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/denoising/methods/magic.py" - }, - { - "method_name": "MAGIC (approximate)", - "method_summary": "MAGIC (Markov Affinity-based Graph Imputation of Cells) is a method for imputation and denoising of noisy or dropout-prone single cell RNA-sequencing data. Given a normalised scRNA-seq expression matrix, it first calculates Euclidean distances between each pair of cells in the dataset, which is then augmented using a Gaussian kernel (function) and row-normalised to give a normalised affinity matrix. A t-step markov process is then calculated, by powering this affinity matrix t times. Finally, the powered affinity matrix is right-multiplied by the normalised data, causing the final imputed values to take the value of a per-gene average weighted by the affinities of cells. The resultant imputed matrix is then rescaled, to more closely match the magnitude of measurements in the normalised (input) matrix.", - "paper_name": "Recovering Gene Interactions from Single-Cell Data Using Data Diffusion", - "paper_reference": "van2018recovering", - "paper_year": 2018, - "code_url": "https://github.com/KrishnaswamyLab/MAGIC", - "image": "openproblems-python-extras", - "is_baseline": false, - "code_version": null, - "task_id": "denoising", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "magic_approx", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/denoising/methods/magic.py" - }, - { - "method_name": "MAGIC (approximate, reversed normalization)", - "method_summary": "MAGIC (Markov Affinity-based Graph Imputation of Cells) is a method for imputation and denoising of noisy or dropout-prone single cell RNA-sequencing data. Given a normalised scRNA-seq expression matrix, it first calculates Euclidean distances between each pair of cells in the dataset, which is then augmented using a Gaussian kernel (function) and row-normalised to give a normalised affinity matrix. A t-step markov process is then calculated, by powering this affinity matrix t times. Finally, the powered affinity matrix is right-multiplied by the normalised data, causing the final imputed values to take the value of a per-gene average weighted by the affinities of cells. The resultant imputed matrix is then rescaled, to more closely match the magnitude of measurements in the normalised (input) matrix.", - "paper_name": "Recovering Gene Interactions from Single-Cell Data Using Data Diffusion", - "paper_reference": "van2018recovering", - "paper_year": 2018, - "code_url": "https://github.com/KrishnaswamyLab/MAGIC", - "image": "openproblems-python-extras", - "is_baseline": false, - "code_version": null, - "task_id": "denoising", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "magic_approx_reverse_norm", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/denoising/methods/magic.py" - }, - { - "method_name": "MAGIC (reversed normalization)", - "method_summary": "MAGIC (Markov Affinity-based Graph Imputation of Cells) is a method for imputation and denoising of noisy or dropout-prone single cell RNA-sequencing data. Given a normalised scRNA-seq expression matrix, it first calculates Euclidean distances between each pair of cells in the dataset, which is then augmented using a Gaussian kernel (function) and row-normalised to give a normalised affinity matrix. A t-step markov process is then calculated, by powering this affinity matrix t times. Finally, the powered affinity matrix is right-multiplied by the normalised data, causing the final imputed values to take the value of a per-gene average weighted by the affinities of cells. The resultant imputed matrix is then rescaled, to more closely match the magnitude of measurements in the normalised (input) matrix.", - "paper_name": "Recovering Gene Interactions from Single-Cell Data Using Data Diffusion", - "paper_reference": "van2018recovering", - "paper_year": 2018, - "code_url": "https://github.com/KrishnaswamyLab/MAGIC", - "image": "openproblems-python-extras", - "is_baseline": false, - "code_version": null, - "task_id": "denoising", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "magic_reverse_norm", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/denoising/methods/magic.py" - }, - { - "method_name": "No denoising", - "method_summary": "Denoised outputs are defined from the unmodified input data.", - "paper_name": "Open Problems for Single Cell Analysis", - "paper_reference": "openproblems", - "paper_year": 2022, - "code_url": "https://github.com/openproblems-bio/openproblems", - "image": "openproblems", - "is_baseline": true, - "code_version": null, - "task_id": "denoising", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "no_denoising", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/denoising/methods/baseline.py" - }, - { - "method_name": "Perfect denoising", - "method_summary": "Denoised outputs are defined from the target data.", - "paper_name": "Open Problems for Single Cell Analysis", - "paper_reference": "openproblems", - "paper_year": 2022, - "code_url": "https://github.com/openproblems-bio/openproblems", - "image": "openproblems", - "is_baseline": true, - "code_version": null, - "task_id": "denoising", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "perfect_denoising", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/denoising/methods/baseline.py" - } + { + "method_name": "ALRA (log norm)", + "method_summary": "ALRA (Adaptively-thresholded Low Rank Approximation) is a method for imputation of missing values in single cell RNA-sequencing data. Given a normalised scRNA-seq expression matrix, it first imputes values using rank-k approximation, using singular value decomposition. Next, a symmetric distribution is fitted to the near-zero imputed values for each gene (row) of the matrix. The right \u201ctail\u201d of this distribution is then used to threshold the accepted nonzero entries. This same threshold is then used to rescale the matrix, once the \u201cbiological zeros\u201d have been removed.", + "paper_name": "Zero-preserving imputation of scRNA-seq data using low-rank approximation", + "paper_reference": "linderman2018zero", + "paper_year": 2018, + "code_url": "https://github.com/KlugerLab/ALRA/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-extras", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "denoising", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "alra_log", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/denoising/methods/alra.py" + }, + { + "method_name": "ALRA (log norm, reversed normalization)", + "method_summary": "ALRA (Adaptively-thresholded Low Rank Approximation) is a method for imputation of missing values in single cell RNA-sequencing data. Given a normalised scRNA-seq expression matrix, it first imputes values using rank-k approximation, using singular value decomposition. Next, a symmetric distribution is fitted to the near-zero imputed values for each gene (row) of the matrix. The right \u201ctail\u201d of this distribution is then used to threshold the accepted nonzero entries. This same threshold is then used to rescale the matrix, once the \u201cbiological zeros\u201d have been removed.", + "paper_name": "Zero-preserving imputation of scRNA-seq data using low-rank approximation", + "paper_reference": "linderman2018zero", + "paper_year": 2018, + "code_url": "https://github.com/KlugerLab/ALRA/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-extras", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "denoising", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "alra_log_reversenorm", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/denoising/methods/alra.py" + }, + { + "method_name": "ALRA (sqrt norm)", + "method_summary": "ALRA (Adaptively-thresholded Low Rank Approximation) is a method for imputation of missing values in single cell RNA-sequencing data. Given a normalised scRNA-seq expression matrix, it first imputes values using rank-k approximation, using singular value decomposition. Next, a symmetric distribution is fitted to the near-zero imputed values for each gene (row) of the matrix. The right \u201ctail\u201d of this distribution is then used to threshold the accepted nonzero entries. This same threshold is then used to rescale the matrix, once the \u201cbiological zeros\u201d have been removed.", + "paper_name": "Zero-preserving imputation of scRNA-seq data using low-rank approximation", + "paper_reference": "linderman2018zero", + "paper_year": 2018, + "code_url": "https://github.com/KlugerLab/ALRA/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-extras", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "denoising", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "alra_sqrt", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/denoising/methods/alra.py" + }, + { + "method_name": "ALRA (sqrt norm, reversed normalization)", + "method_summary": "ALRA (Adaptively-thresholded Low Rank Approximation) is a method for imputation of missing values in single cell RNA-sequencing data. Given a normalised scRNA-seq expression matrix, it first imputes values using rank-k approximation, using singular value decomposition. Next, a symmetric distribution is fitted to the near-zero imputed values for each gene (row) of the matrix. The right \u201ctail\u201d of this distribution is then used to threshold the accepted nonzero entries. This same threshold is then used to rescale the matrix, once the \u201cbiological zeros\u201d have been removed.", + "paper_name": "Zero-preserving imputation of scRNA-seq data using low-rank approximation", + "paper_reference": "linderman2018zero", + "paper_year": 2018, + "code_url": "https://github.com/KlugerLab/ALRA/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-extras", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "denoising", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "alra_sqrt_reversenorm", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/denoising/methods/alra.py" + }, + { + "method_name": "DCA", + "method_summary": "DCA (Deep Count Autoencoder) is a method to remove the effect of dropout in scRNA-seq data. DCA takes into account the count structure, overdispersed nature and sparsity of scRNA-seq datatypes using a deep autoencoder with a zero-inflated negative binomial (ZINB) loss. The autoencoder is then applied to the dataset, where the mean of the fitted negative binomial distributions is used to fill each entry of the imputed matrix.", + "paper_name": "Single-cell RNA-seq denoising using a deep count autoencoder", + "paper_reference": "eraslan2019single", + "paper_year": 2019, + "code_url": "https://github.com/theislab/dca/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-python-tensorflow", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "denoising", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "dca", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/denoising/methods/dca.py" + }, + { + "method_name": "KNN smoothing", + "method_summary": "KNN-smoothing is a method for denoising data based on the k-nearest neighbours. Given a normalised scRNA-seq matrix, KNN-smoothing calculates a k-nearest neighbour matrix using Euclidean distances between cell pairs. Each cell\u2019s denoised expression is then defined as the average expression of each of its neighbours.", + "paper_name": "Open Problems for Single Cell Analysis", + "paper_reference": "openproblems", + "paper_year": 2022, + "code_url": "https://github.com/openproblems-bio/openproblems/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-python-extras", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "denoising", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "knn_naive", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/denoising/methods/magic.py" + }, + { + "method_name": "Iterative KNN smoothing", + "method_summary": "Iterative kNN-smoothing is a method to repair or denoise noisy scRNA-seq expression matrices. Given a scRNA-seq expression matrix, KNN-smoothing first applies initial normalisation and smoothing. Then, a chosen number of principal components is used to calculate Euclidean distances between cells. Minimally sized neighbourhoods are initially determined from these Euclidean distances, and expression profiles are shared between neighbouring cells. Then, the resultant smoothed matrix is used as input to the next step of smoothing, where the size (k) of the considered neighbourhoods is increased, leading to greater smoothing. This process continues until a chosen maximum k value has been reached, at which point the iteratively smoothed object is then optionally scaled to yield a final result.", + "paper_name": "K-nearest neighbor smoothing for high-throughput single-cell RNA-Seq data", + "paper_reference": "wagner2018knearest", + "paper_year": 2018, + "code_url": "https://github.com/yanailab/knn-smoothing/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-python-extras", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "denoising", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "knn_smoothing", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/denoising/methods/knn_smoothing.py" + }, + { + "method_name": "MAGIC", + "method_summary": "MAGIC (Markov Affinity-based Graph Imputation of Cells) is a method for imputation and denoising of noisy or dropout-prone single cell RNA-sequencing data. Given a normalised scRNA-seq expression matrix, it first calculates Euclidean distances between each pair of cells in the dataset, which is then augmented using a Gaussian kernel (function) and row-normalised to give a normalised affinity matrix. A t-step markov process is then calculated, by powering this affinity matrix t times. Finally, the powered affinity matrix is right-multiplied by the normalised data, causing the final imputed values to take the value of a per-gene average weighted by the affinities of cells. The resultant imputed matrix is then rescaled, to more closely match the magnitude of measurements in the normalised (input) matrix.", + "paper_name": "Recovering Gene Interactions from Single-Cell Data Using Data Diffusion", + "paper_reference": "van2018recovering", + "paper_year": 2018, + "code_url": "https://github.com/KrishnaswamyLab/MAGIC/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-python-extras", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "denoising", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "magic", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/denoising/methods/magic.py" + }, + { + "method_name": "MAGIC (approximate)", + "method_summary": "MAGIC (Markov Affinity-based Graph Imputation of Cells) is a method for imputation and denoising of noisy or dropout-prone single cell RNA-sequencing data. Given a normalised scRNA-seq expression matrix, it first calculates Euclidean distances between each pair of cells in the dataset, which is then augmented using a Gaussian kernel (function) and row-normalised to give a normalised affinity matrix. A t-step markov process is then calculated, by powering this affinity matrix t times. Finally, the powered affinity matrix is right-multiplied by the normalised data, causing the final imputed values to take the value of a per-gene average weighted by the affinities of cells. The resultant imputed matrix is then rescaled, to more closely match the magnitude of measurements in the normalised (input) matrix.", + "paper_name": "Recovering Gene Interactions from Single-Cell Data Using Data Diffusion", + "paper_reference": "van2018recovering", + "paper_year": 2018, + "code_url": "https://github.com/KrishnaswamyLab/MAGIC/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-python-extras", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "denoising", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "magic_approx", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/denoising/methods/magic.py" + }, + { + "method_name": "MAGIC (approximate, reversed normalization)", + "method_summary": "MAGIC (Markov Affinity-based Graph Imputation of Cells) is a method for imputation and denoising of noisy or dropout-prone single cell RNA-sequencing data. Given a normalised scRNA-seq expression matrix, it first calculates Euclidean distances between each pair of cells in the dataset, which is then augmented using a Gaussian kernel (function) and row-normalised to give a normalised affinity matrix. A t-step markov process is then calculated, by powering this affinity matrix t times. Finally, the powered affinity matrix is right-multiplied by the normalised data, causing the final imputed values to take the value of a per-gene average weighted by the affinities of cells. The resultant imputed matrix is then rescaled, to more closely match the magnitude of measurements in the normalised (input) matrix.", + "paper_name": "Recovering Gene Interactions from Single-Cell Data Using Data Diffusion", + "paper_reference": "van2018recovering", + "paper_year": 2018, + "code_url": "https://github.com/KrishnaswamyLab/MAGIC/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-python-extras", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "denoising", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "magic_approx_reverse_norm", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/denoising/methods/magic.py" + }, + { + "method_name": "MAGIC (reversed normalization)", + "method_summary": "MAGIC (Markov Affinity-based Graph Imputation of Cells) is a method for imputation and denoising of noisy or dropout-prone single cell RNA-sequencing data. Given a normalised scRNA-seq expression matrix, it first calculates Euclidean distances between each pair of cells in the dataset, which is then augmented using a Gaussian kernel (function) and row-normalised to give a normalised affinity matrix. A t-step markov process is then calculated, by powering this affinity matrix t times. Finally, the powered affinity matrix is right-multiplied by the normalised data, causing the final imputed values to take the value of a per-gene average weighted by the affinities of cells. The resultant imputed matrix is then rescaled, to more closely match the magnitude of measurements in the normalised (input) matrix.", + "paper_name": "Recovering Gene Interactions from Single-Cell Data Using Data Diffusion", + "paper_reference": "van2018recovering", + "paper_year": 2018, + "code_url": "https://github.com/KrishnaswamyLab/MAGIC/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-python-extras", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "denoising", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "magic_reverse_norm", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/denoising/methods/magic.py" + }, + { + "method_name": "No denoising", + "method_summary": "Denoised outputs are defined from the unmodified input data.", + "paper_name": "Open Problems for Single Cell Analysis", + "paper_reference": "openproblems", + "paper_year": 2022, + "code_url": "https://github.com/openproblems-bio/openproblems/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems", + "is_baseline": true, + "code_version": "v1.0.0", + "task_id": "denoising", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "no_denoising", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/denoising/methods/baseline.py" + }, + { + "method_name": "Perfect denoising", + "method_summary": "Denoised outputs are defined from the target data.", + "paper_name": "Open Problems for Single Cell Analysis", + "paper_reference": "openproblems", + "paper_year": 2022, + "code_url": "https://github.com/openproblems-bio/openproblems/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems", + "is_baseline": true, + "code_version": "v1.0.0", + "task_id": "denoising", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "perfect_denoising", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/denoising/methods/baseline.py" + } ] \ No newline at end of file diff --git a/results/denoising/data/metric_info.json b/results/denoising/data/metric_info.json index e7b648551..99cc11c13 100644 --- a/results/denoising/data/metric_info.json +++ b/results/denoising/data/metric_info.json @@ -1,24 +1,26 @@ [ - { - "metric_name": "Mean-squared error", - "metric_summary": "The mean squared error between the denoised counts of the training dataset and the true counts of the test dataset after reweighting by the train/test ratio.", - "paper_reference": "batson2019molecular", - "maximize": false, - "image": "openproblems", - "task_id": "denoising", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "metric_id": "mse", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/denoising/metrics/mse.py" - }, - { - "metric_name": "Poisson loss", - "metric_summary": "The Poisson log likelihood of observing the true counts of the test dataset given the distribution given in the denoised dataset.", - "paper_reference": "batson2019molecular", - "maximize": false, - "image": "openproblems-python-pytorch", - "task_id": "denoising", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "metric_id": "poisson", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/denoising/metrics/poisson.py" - } + { + "metric_name": "Mean-squared error", + "metric_summary": "The mean squared error between the denoised counts of the training dataset and the true counts of the test dataset after reweighting by the train/test ratio.", + "paper_reference": "batson2019molecular", + "maximize": false, + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems", + "task_id": "denoising", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "metric_id": "mse", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/denoising/metrics/mse.py", + "code_version": "v1.0.0" + }, + { + "metric_name": "Poisson loss", + "metric_summary": "The Poisson log likelihood of observing the true counts of the test dataset given the distribution given in the denoised dataset.", + "paper_reference": "batson2019molecular", + "maximize": false, + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-python-pytorch", + "task_id": "denoising", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "metric_id": "poisson", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/denoising/metrics/poisson.py", + "code_version": "v1.0.0" + } ] \ No newline at end of file diff --git a/results/denoising/data/task_info.json b/results/denoising/data/task_info.json index 0dcfa9ee6..115037066 100644 --- a/results/denoising/data/task_info.json +++ b/results/denoising/data/task_info.json @@ -1,8 +1,43 @@ { - "task_id": "denoising", - "commit_sha": "c97decf07adb2e3050561d6fa9ae46132be07bef", - "task_name": "Denoising", - "task_summary": "Removing noise in sparse single-cell RNA-sequencing count data", - "task_description": "\nSingle-cell RNA-Seq protocols only detect a fraction of the mRNA molecules present\nin each cell. As a result, the measurements (UMI counts) observed for each gene and each\ncell are associated with generally high levels of technical noise ([Gr\u00fcn et al.,\n2014](https://openproblems.bio/bibliography#grn2014validation)). Denoising describes the\ntask of estimating the true expression level of each gene in each cell. In the\nsingle-cell literature, this task is also referred to as *imputation*, a term which is\ntypically used for missing data problems in statistics. Similar to the use of the terms\n\"dropout\", \"missing data\", and \"technical zeros\", this terminology can create confusion\nabout the underlying measurement process ([Sarkar and Stephens,\n2021](https://openproblems.bio/bibliography#sarkar2021separating)).\n\nA key challenge in evaluating denoising methods is the general lack of a ground truth. A\nrecent benchmark study ([Hou et al.,\n2020](https://openproblems.bio/bibliography#hou2020systematic))\nrelied on flow-sorted datasets, mixture control experiments ([Tian et al.,\n2019](https://openproblems.bio/bibliography#tian2019benchmarking)), and comparisons with\nbulk RNA-Seq data. Since each of these approaches suffers from specific limitations, it\nis difficult to combine these different approaches into a single quantitative measure of\ndenoising accuracy. Here, we instead rely on an approach termed molecular\ncross-validation (MCV), which was specifically developed to quantify denoising accuracy\nin the absence of a ground truth ([Batson et al.,\n2019](https://openproblems.bio/bibliography#batson2019molecular)). In MCV, the observed\nmolecules in a given scRNA-Seq dataset are first partitioned between a *training* and a\n*test* dataset. Next, a denoising method is applied to the training dataset. Finally,\ndenoising accuracy is measured by comparing the result to the test dataset. The authors\nshow that both in theory and in practice, the measured denoising accuracy is\nrepresentative of the accuracy that would be obtained on a ground truth dataset.\n\n", - "repo": "openproblems-bio/openproblems" -} \ No newline at end of file + "task_id": "denoising", + "commit_sha": "c97decf07adb2e3050561d6fa9ae46132be07bef", + "task_name": "Denoising", + "task_summary": "Removing noise in sparse single-cell RNA-sequencing count data", + "task_description": "\nSingle-cell RNA-Seq protocols only detect a fraction of the mRNA molecules present\nin each cell. As a result, the measurements (UMI counts) observed for each gene and each\ncell are associated with generally high levels of technical noise ([Grün et al.,\n2014](https://openproblems.bio/bibliography#grn2014validation)). Denoising describes the\ntask of estimating the true expression level of each gene in each cell. In the\nsingle-cell literature, this task is also referred to as *imputation*, a term which is\ntypically used for missing data problems in statistics. Similar to the use of the terms\n\"dropout\", \"missing data\", and \"technical zeros\", this terminology can create confusion\nabout the underlying measurement process ([Sarkar and Stephens,\n2021](https://openproblems.bio/bibliography#sarkar2021separating)).\n\nA key challenge in evaluating denoising methods is the general lack of a ground truth. A\nrecent benchmark study ([Hou et al.,\n2020](https://openproblems.bio/bibliography#hou2020systematic))\nrelied on flow-sorted datasets, mixture control experiments ([Tian et al.,\n2019](https://openproblems.bio/bibliography#tian2019benchmarking)), and comparisons with\nbulk RNA-Seq data. Since each of these approaches suffers from specific limitations, it\nis difficult to combine these different approaches into a single quantitative measure of\ndenoising accuracy. Here, we instead rely on an approach termed molecular\ncross-validation (MCV), which was specifically developed to quantify denoising accuracy\nin the absence of a ground truth ([Batson et al.,\n2019](https://openproblems.bio/bibliography#batson2019molecular)). In MCV, the observed\nmolecules in a given scRNA-Seq dataset are first partitioned between a *training* and a\n*test* dataset. Next, a denoising method is applied to the training dataset. Finally,\ndenoising accuracy is measured by comparing the result to the test dataset. The authors\nshow that both in theory and in practice, the measured denoising accuracy is\nrepresentative of the accuracy that would be obtained on a ground truth dataset.\n\n", + "repo": "https://github.com/openproblems-bio/openproblems/tree/v1.0.0/openproblems/tasks/denoising", + "authors": [ + { + "name": "Wesley Lewis", + "roles": ["author", "maintainer"], + "info": { + "github": "wes-lewis" + } + }, + { + "name": "Scott Gigante", + "roles": ["author", "maintainer"], + "info": { + "github": "scottgigante", + "orcid": "0000-0002-4544-2764" + } + }, + { + "name": "Robrecht Cannoodt", + "roles": "author", + "info": { + "github": "rcannood", + "orcid": "0000-0003-3641-729X" + } + }, + { + "name": "Kai Waldrant", + "roles": "contributor", + "info": { + "github": "KaiWaldrant", + "orcid": "0009-0003-8555-1361" + } + } + ], + "version": "v1.0.0", + "license": "MIT" +} diff --git a/results/denoising/index.qmd b/results/denoising/index.qmd index 171853c88..5808551e0 100644 --- a/results/denoising/index.qmd +++ b/results/denoising/index.qmd @@ -7,6 +7,9 @@ css: ../_include/task_template.css engine: knitr fig-cap-location: bottom citation-location: document +bibliography: + - library.bib + - ../../library.bib toc: false --- diff --git a/results/dimensionality_reduction/data/method_info.json b/results/dimensionality_reduction/data/method_info.json index 62f20ec7c..240bbcd71 100644 --- a/results/dimensionality_reduction/data/method_info.json +++ b/results/dimensionality_reduction/data/method_info.json @@ -1,392 +1,392 @@ [ - { - "method_name": "densMAP (logCP10k)", - "method_summary": "densMAP is a modification of UMAP that adds an extra cost term in order to preserve information about the relative local density of the data. It is performed on the same inputs as UMAP.", - "paper_name": "Assessing single-cell transcriptomic variability through density-preserving data visualization", - "paper_reference": "narayan2021assessing", - "paper_year": 2021, - "code_url": "https://github.com/lmcinnes/umap", - "image": "openproblems-python-extras", - "is_baseline": false, - "code_version": null, - "task_id": "dimensionality_reduction", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "densmap_logCP10k", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/dimensionality_reduction/methods/umap.py" - }, - { - "method_name": "densMAP (logCP10k, 1kHVG)", - "method_summary": "densMAP is a modification of UMAP that adds an extra cost term in order to preserve information about the relative local density of the data. It is performed on the same inputs as UMAP.", - "paper_name": "Assessing single-cell transcriptomic variability through density-preserving data visualization", - "paper_reference": "narayan2021assessing", - "paper_year": 2021, - "code_url": "https://github.com/lmcinnes/umap", - "image": "openproblems-python-extras", - "is_baseline": false, - "code_version": null, - "task_id": "dimensionality_reduction", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "densmap_logCP10k_1kHVG", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/dimensionality_reduction/methods/umap.py" - }, - { - "method_name": "densMAP PCA (logCP10k)", - "method_summary": "densMAP is a modification of UMAP that adds an extra cost term in order to preserve information about the relative local density of the data. It is performed on the same inputs as UMAP.", - "paper_name": "Assessing single-cell transcriptomic variability through density-preserving data visualization", - "paper_reference": "narayan2021assessing", - "paper_year": 2021, - "code_url": "https://github.com/lmcinnes/umap", - "image": "openproblems-python-extras", - "is_baseline": false, - "code_version": null, - "task_id": "dimensionality_reduction", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "densmap_pca_logCP10k", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/dimensionality_reduction/methods/umap.py" - }, - { - "method_name": "densMAP PCA (logCP10k, 1kHVG)", - "method_summary": "densMAP is a modification of UMAP that adds an extra cost term in order to preserve information about the relative local density of the data. It is performed on the same inputs as UMAP.", - "paper_name": "Assessing single-cell transcriptomic variability through density-preserving data visualization", - "paper_reference": "narayan2021assessing", - "paper_year": 2021, - "code_url": "https://github.com/lmcinnes/umap", - "image": "openproblems-python-extras", - "is_baseline": false, - "code_version": null, - "task_id": "dimensionality_reduction", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "densmap_pca_logCP10k_1kHVG", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/dimensionality_reduction/methods/umap.py" - }, - { - "method_name": "Diffusion maps", - "method_summary": "Diffusion maps uses an affinity matrix to describe the similarity between data points, which is then transformed into a graph Laplacian. The eigenvalue-weighted eigenvectors of the graph Laplacian are then used to create the embedding. Diffusion maps is calculated on the logCPM expression matrix.", - "paper_name": "Diffusion maps", - "paper_reference": "coifman2006diffusion", - "paper_year": 2006, - "code_url": "https://github.com/openproblems-bio/openproblems", - "image": "openproblems", - "is_baseline": false, - "code_version": null, - "task_id": "dimensionality_reduction", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "diffusion_map", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/dimensionality_reduction/methods/diffusion_map.py" - }, - { - "method_name": "NeuralEE (CPU) (Default)", - "method_summary": "NeuralEE is a neural network implementation of elastic embedding. It is a non-linear method that preserves pairwise distances between data points. NeuralEE uses a neural network to optimize an objective function that measures the difference between pairwise distances in the original high-dimensional space and the two-dimensional space. It is computed on both the recommended input from the package authors of 500 HVGs selected from a logged expression matrix (without sequencing depth scaling) and the default logCPM matrix with 1000 HVGs.", - "paper_name": "NeuralEE: A GPU-Accelerated Elastic Embedding Dimensionality Reduction Method for Visualizing Large-Scale scRNA-Seq Data", - "paper_reference": "xiong2020neuralee", - "paper_year": 2020, - "code_url": "https://github.com/HiBearME/NeuralEE", - "image": "openproblems-python-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "dimensionality_reduction", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "neuralee_default", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/dimensionality_reduction/methods/neuralee.py" - }, - { - "method_name": "NeuralEE (CPU) (logCP10k, 1kHVG)", - "method_summary": "NeuralEE is a neural network implementation of elastic embedding. It is a non-linear method that preserves pairwise distances between data points. NeuralEE uses a neural network to optimize an objective function that measures the difference between pairwise distances in the original high-dimensional space and the two-dimensional space. It is computed on both the recommended input from the package authors of 500 HVGs selected from a logged expression matrix (without sequencing depth scaling) and the default logCPM matrix with 1000 HVGs.", - "paper_name": "NeuralEE: A GPU-Accelerated Elastic Embedding Dimensionality Reduction Method for Visualizing Large-Scale scRNA-Seq Data", - "paper_reference": "xiong2020neuralee", - "paper_year": 2020, - "code_url": "https://github.com/HiBearME/NeuralEE", - "image": "openproblems-python-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "dimensionality_reduction", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "neuralee_logCP10k_1kHVG", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/dimensionality_reduction/methods/neuralee.py" - }, - { - "method_name": "PCA (logCP10k)", - "method_summary": "PCA or \"Principal Component Analysis\" is a linear method that finds orthogonal directions in the data that capture the most variance. The first two principal components are chosen as the two-dimensional embedding. We select only the first two principal components as the two-dimensional embedding. PCA is calculated on the logCPM expression matrix with and without selecting 1000 HVGs.", - "paper_name": "On lines and planes of closest fit to systems of points in space", - "paper_reference": "pearson1901pca", - "paper_year": 1901, - "code_url": "https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html", - "image": "openproblems", - "is_baseline": false, - "code_version": null, - "task_id": "dimensionality_reduction", - "commit_sha": "154ccb9fd99113f3d28d9c3f139194539a0290f9", - "method_id": "pca_logCP10k", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/dimensionality_reduction/methods/pca.py" - }, - { - "method_name": "PCA (logCP10k, 1kHVG)", - "method_summary": "PCA or \"Principal Component Analysis\" is a linear method that finds orthogonal directions in the data that capture the most variance. The first two principal components are chosen as the two-dimensional embedding. We select only the first two principal components as the two-dimensional embedding. PCA is calculated on the logCPM expression matrix with and without selecting 1000 HVGs.", - "paper_name": "On lines and planes of closest fit to systems of points in space", - "paper_reference": "pearson1901pca", - "paper_year": 1901, - "code_url": "https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html", - "image": "openproblems", - "is_baseline": false, - "code_version": null, - "task_id": "dimensionality_reduction", - "commit_sha": "154ccb9fd99113f3d28d9c3f139194539a0290f9", - "method_id": "pca_logCP10k_1kHVG", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/dimensionality_reduction/methods/pca.py" - }, - { - "method_name": "PHATE (default)", - "method_summary": "PHATE or \u201cPotential of Heat - diffusion for Affinity - based Transition Embedding\u201d uses the potential of heat diffusion to preserve trajectories in a dataset via a diffusion process. It is an affinity - based method that creates an embedding by finding the dominant eigenvalues of a Markov transition matrix. We evaluate several variants including using the recommended square - root transformed CPM matrix as input, this input with the gamma parameter set to zero and the normal logCPM transformed matrix with and without HVG selection.", - "paper_name": "Visualizing Structure and Transitions in High-Dimensional Biological Data", - "paper_reference": "moon2019visualizing", - "paper_year": 2019, - "code_url": "https://github.com/KrishnaswamyLab/PHATE/", - "image": "openproblems-python-extras", - "is_baseline": false, - "code_version": null, - "task_id": "dimensionality_reduction", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "phate_default", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/dimensionality_reduction/methods/phate.py" - }, - { - "method_name": "PHATE (logCP10k, 1kHVG)", - "method_summary": "PHATE or \u201cPotential of Heat - diffusion for Affinity - based Transition Embedding\u201d uses the potential of heat diffusion to preserve trajectories in a dataset via a diffusion process. It is an affinity - based method that creates an embedding by finding the dominant eigenvalues of a Markov transition matrix. We evaluate several variants including using the recommended square - root transformed CPM matrix as input, this input with the gamma parameter set to zero and the normal logCPM transformed matrix with and without HVG selection.", - "paper_name": "Visualizing Structure and Transitions in High-Dimensional Biological Data", - "paper_reference": "moon2019visualizing", - "paper_year": 2019, - "code_url": "https://github.com/KrishnaswamyLab/PHATE/", - "image": "openproblems-python-extras", - "is_baseline": false, - "code_version": null, - "task_id": "dimensionality_reduction", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "phate_logCP10k", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/dimensionality_reduction/methods/phate.py" - }, - { - "method_name": "PHATE (logCP10k)", - "method_summary": "PHATE or \u201cPotential of Heat - diffusion for Affinity - based Transition Embedding\u201d uses the potential of heat diffusion to preserve trajectories in a dataset via a diffusion process. It is an affinity - based method that creates an embedding by finding the dominant eigenvalues of a Markov transition matrix. We evaluate several variants including using the recommended square - root transformed CPM matrix as input, this input with the gamma parameter set to zero and the normal logCPM transformed matrix with and without HVG selection.", - "paper_name": "Visualizing Structure and Transitions in High-Dimensional Biological Data", - "paper_reference": "moon2019visualizing", - "paper_year": 2019, - "code_url": "https://github.com/KrishnaswamyLab/PHATE/", - "image": "openproblems-python-extras", - "is_baseline": false, - "code_version": null, - "task_id": "dimensionality_reduction", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "phate_logCP10k_1kHVG", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/dimensionality_reduction/methods/phate.py" - }, - { - "method_name": "PHATE (gamma=0)", - "method_summary": "PHATE or \u201cPotential of Heat - diffusion for Affinity - based Transition Embedding\u201d uses the potential of heat diffusion to preserve trajectories in a dataset via a diffusion process. It is an affinity - based method that creates an embedding by finding the dominant eigenvalues of a Markov transition matrix. We evaluate several variants including using the recommended square - root transformed CPM matrix as input, this input with the gamma parameter set to zero and the normal logCPM transformed matrix with and without HVG selection.", - "paper_name": "Visualizing Structure and Transitions in High-Dimensional Biological Data", - "paper_reference": "moon2019visualizing", - "paper_year": 2019, - "code_url": "https://github.com/KrishnaswamyLab/PHATE/", - "image": "openproblems-python-extras", - "is_baseline": false, - "code_version": null, - "task_id": "dimensionality_reduction", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "phate_sqrt", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/dimensionality_reduction/methods/phate.py" - }, - { - "method_name": "PyMDE Preserve Distances (logCP10k)", - "method_summary": "PyMDE is a Python implementation of minimum-distortion embedding. It is a non-linear method that preserves distances between cells or neighborhoods in the high-dimensional space. It is computed with options to preserve distances between cells or neighbourhoods and with the logCPM matrix with and without HVG selection as input.", - "paper_name": "Minimum-Distortion Embedding", - "paper_reference": "agrawal2021mde", - "paper_year": 2021, - "code_url": "https://pymde.org/", - "image": "openproblems-python-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "dimensionality_reduction", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "pymde_distances_log_cp10k", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/dimensionality_reduction/methods/pymde.py" - }, - { - "method_name": "PyMDE Preserve Distances (logCP10k, 1kHVG)", - "method_summary": "PyMDE is a Python implementation of minimum-distortion embedding. It is a non-linear method that preserves distances between cells or neighborhoods in the high-dimensional space. It is computed with options to preserve distances between cells or neighbourhoods and with the logCPM matrix with and without HVG selection as input.", - "paper_name": "Minimum-Distortion Embedding", - "paper_reference": "agrawal2021mde", - "paper_year": 2021, - "code_url": "https://pymde.org/", - "image": "openproblems-python-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "dimensionality_reduction", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "pymde_distances_log_cp10k_hvg", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/dimensionality_reduction/methods/pymde.py" - }, - { - "method_name": "PyMDE Preserve Neighbors (logCP10k)", - "method_summary": "PyMDE is a Python implementation of minimum-distortion embedding. It is a non-linear method that preserves distances between cells or neighborhoods in the high-dimensional space. It is computed with options to preserve distances between cells or neighbourhoods and with the logCPM matrix with and without HVG selection as input.", - "paper_name": "Minimum-Distortion Embedding", - "paper_reference": "agrawal2021mde", - "paper_year": 2021, - "code_url": "https://pymde.org/", - "image": "openproblems-python-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "dimensionality_reduction", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "pymde_neighbors_log_cp10k", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/dimensionality_reduction/methods/pymde.py" - }, - { - "method_name": "PyMDE Preserve Neighbors (logCP10k, 1kHVG)", - "method_summary": "PyMDE is a Python implementation of minimum-distortion embedding. It is a non-linear method that preserves distances between cells or neighborhoods in the high-dimensional space. It is computed with options to preserve distances between cells or neighbourhoods and with the logCPM matrix with and without HVG selection as input.", - "paper_name": "Minimum-Distortion Embedding", - "paper_reference": "agrawal2021mde", - "paper_year": 2021, - "code_url": "https://pymde.org/", - "image": "openproblems-python-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "dimensionality_reduction", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "pymde_neighbors_log_cp10k_hvg", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/dimensionality_reduction/methods/pymde.py" - }, - { - "method_name": "Random Features", - "method_summary": "Randomly generated two-dimensional coordinates from a normal distribution.", - "paper_name": "Open Problems for Single Cell Analysis", - "paper_reference": "openproblems", - "paper_year": 2022, - "code_url": "https://github.com/openproblems-bio/openproblems", - "image": "openproblems", - "is_baseline": true, - "code_version": null, - "task_id": "dimensionality_reduction", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "random_features", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/dimensionality_reduction/methods/baseline.py" - }, - { - "method_name": "Spectral Features", - "method_summary": "Use 1000-dimensional diffusions maps as an embedding", - "paper_name": "Open Problems for Single Cell Analysis", - "paper_reference": "openproblems", - "paper_year": 2022, - "code_url": "https://github.com/openproblems-bio/openproblems", - "image": "openproblems", - "is_baseline": true, - "code_version": null, - "task_id": "dimensionality_reduction", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "spectral_features", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/dimensionality_reduction/methods/baseline.py" - }, - { - "method_name": "True Features", - "method_summary": "Use of the original feature inputs as the 'embedding'.", - "paper_name": "Open Problems for Single Cell Analysis", - "paper_reference": "openproblems", - "paper_year": 2022, - "code_url": "https://github.com/openproblems-bio/openproblems", - "image": "openproblems", - "is_baseline": true, - "code_version": null, - "task_id": "dimensionality_reduction", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "true_features", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/dimensionality_reduction/methods/baseline.py" - }, - { - "method_name": "t-SNE (logCP10k)", - "method_summary": "t-SNE or t-distributed Stochastic Neighbor Embedding converts similarities between data points to joint probabilities and tries to minimize the Kullback-Leibler divergence between the joint probabilities of the low-dimensional embedding and the high-dimensional data. We use the implementation in the scanpy package with the result of PCA on the logCPM expression matrix (with and without HVG selection).", - "paper_name": "Visualizing Data using t-SNE", - "paper_reference": "vandermaaten2008visualizing", - "paper_year": 2008, - "code_url": "https://scikit-learn.org/stable/modules/generated/sklearn.manifold.TSNE.html#sklearn.manifold.TSNE", - "image": "openproblems-python-extras", - "is_baseline": false, - "code_version": null, - "task_id": "dimensionality_reduction", - "commit_sha": "154ccb9fd99113f3d28d9c3f139194539a0290f9", - "method_id": "tsne_logCP10k", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/dimensionality_reduction/methods/tsne.py" - }, - { - "method_name": "t-SNE (logCP10k, 1kHVG)", - "method_summary": "t-SNE or t-distributed Stochastic Neighbor Embedding converts similarities between data points to joint probabilities and tries to minimize the Kullback-Leibler divergence between the joint probabilities of the low-dimensional embedding and the high-dimensional data. We use the implementation in the scanpy package with the result of PCA on the logCPM expression matrix (with and without HVG selection).", - "paper_name": "Visualizing Data using t-SNE", - "paper_reference": "vandermaaten2008visualizing", - "paper_year": 2008, - "code_url": "https://scikit-learn.org/stable/modules/generated/sklearn.manifold.TSNE.html#sklearn.manifold.TSNE", - "image": "openproblems-python-extras", - "is_baseline": false, - "code_version": null, - "task_id": "dimensionality_reduction", - "commit_sha": "154ccb9fd99113f3d28d9c3f139194539a0290f9", - "method_id": "tsne_logCP10k_1kHVG", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/dimensionality_reduction/methods/tsne.py" - }, - { - "method_name": "UMAP (logCP10k)", - "method_summary": "UMAP or Uniform Manifold Approximation and Projection is an algorithm for dimension reduction based on manifold learning techniques and ideas from topological data analysis. We perform UMAP on the logCPM expression matrix before and after HVG selection and with and without PCA as a pre-processing step.", - "paper_name": "UMAP: Uniform Manifold Approximation and Projection for Dimension Reduction", - "paper_reference": "mcinnes2018umap", - "paper_year": 2018, - "code_url": "https://github.com/lmcinnes/umap", - "image": "openproblems", - "is_baseline": false, - "code_version": null, - "task_id": "dimensionality_reduction", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "umap_logCP10k", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/dimensionality_reduction/methods/umap.py" - }, - { - "method_name": "UMAP (logCP10k, 1kHVG)", - "method_summary": "UMAP or Uniform Manifold Approximation and Projection is an algorithm for dimension reduction based on manifold learning techniques and ideas from topological data analysis. We perform UMAP on the logCPM expression matrix before and after HVG selection and with and without PCA as a pre-processing step.", - "paper_name": "UMAP: Uniform Manifold Approximation and Projection for Dimension Reduction", - "paper_reference": "mcinnes2018umap", - "paper_year": 2018, - "code_url": "https://github.com/lmcinnes/umap", - "image": "openproblems", - "is_baseline": false, - "code_version": null, - "task_id": "dimensionality_reduction", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "umap_logCP10k_1kHVG", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/dimensionality_reduction/methods/umap.py" - }, - { - "method_name": "UMAP PCA (logCP10k)", - "method_summary": "UMAP or Uniform Manifold Approximation and Projection is an algorithm for dimension reduction based on manifold learning techniques and ideas from topological data analysis. We perform UMAP on the logCPM expression matrix before and after HVG selection and with and without PCA as a pre-processing step.", - "paper_name": "UMAP: Uniform Manifold Approximation and Projection for Dimension Reduction", - "paper_reference": "mcinnes2018umap", - "paper_year": 2018, - "code_url": "https://github.com/lmcinnes/umap", - "image": "openproblems", - "is_baseline": false, - "code_version": null, - "task_id": "dimensionality_reduction", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "umap_pca_logCP10k", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/dimensionality_reduction/methods/umap.py" - }, - { - "method_name": "UMAP PCA (logCP10k, 1kHVG)", - "method_summary": "UMAP or Uniform Manifold Approximation and Projection is an algorithm for dimension reduction based on manifold learning techniques and ideas from topological data analysis. We perform UMAP on the logCPM expression matrix before and after HVG selection and with and without PCA as a pre-processing step.", - "paper_name": "UMAP: Uniform Manifold Approximation and Projection for Dimension Reduction", - "paper_reference": "mcinnes2018umap", - "paper_year": 2018, - "code_url": "https://github.com/lmcinnes/umap", - "image": "openproblems", - "is_baseline": false, - "code_version": null, - "task_id": "dimensionality_reduction", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "umap_pca_logCP10k_1kHVG", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/dimensionality_reduction/methods/umap.py" - } + { + "method_name": "densMAP (logCP10k)", + "method_summary": "densMAP is a modification of UMAP that adds an extra cost term in order to preserve information about the relative local density of the data. It is performed on the same inputs as UMAP.", + "paper_name": "Assessing single-cell transcriptomic variability through density-preserving data visualization", + "paper_reference": "narayan2021assessing", + "paper_year": 2021, + "code_url": "https://github.com/lmcinnes/umap/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-python-extras", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "dimensionality_reduction", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "densmap_logCP10k", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/dimensionality_reduction/methods/umap.py" + }, + { + "method_name": "densMAP (logCP10k, 1kHVG)", + "method_summary": "densMAP is a modification of UMAP that adds an extra cost term in order to preserve information about the relative local density of the data. It is performed on the same inputs as UMAP.", + "paper_name": "Assessing single-cell transcriptomic variability through density-preserving data visualization", + "paper_reference": "narayan2021assessing", + "paper_year": 2021, + "code_url": "https://github.com/lmcinnes/umap/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-python-extras", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "dimensionality_reduction", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "densmap_logCP10k_1kHVG", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/dimensionality_reduction/methods/umap.py" + }, + { + "method_name": "densMAP PCA (logCP10k)", + "method_summary": "densMAP is a modification of UMAP that adds an extra cost term in order to preserve information about the relative local density of the data. It is performed on the same inputs as UMAP.", + "paper_name": "Assessing single-cell transcriptomic variability through density-preserving data visualization", + "paper_reference": "narayan2021assessing", + "paper_year": 2021, + "code_url": "https://github.com/lmcinnes/umap/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-python-extras", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "dimensionality_reduction", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "densmap_pca_logCP10k", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/dimensionality_reduction/methods/umap.py" + }, + { + "method_name": "densMAP PCA (logCP10k, 1kHVG)", + "method_summary": "densMAP is a modification of UMAP that adds an extra cost term in order to preserve information about the relative local density of the data. It is performed on the same inputs as UMAP.", + "paper_name": "Assessing single-cell transcriptomic variability through density-preserving data visualization", + "paper_reference": "narayan2021assessing", + "paper_year": 2021, + "code_url": "https://github.com/lmcinnes/umap/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-python-extras", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "dimensionality_reduction", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "densmap_pca_logCP10k_1kHVG", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/dimensionality_reduction/methods/umap.py" + }, + { + "method_name": "Diffusion maps", + "method_summary": "Diffusion maps uses an affinity matrix to describe the similarity between data points, which is then transformed into a graph Laplacian. The eigenvalue-weighted eigenvectors of the graph Laplacian are then used to create the embedding. Diffusion maps is calculated on the logCPM expression matrix.", + "paper_name": "Diffusion maps", + "paper_reference": "coifman2006diffusion", + "paper_year": 2006, + "code_url": "https://github.com/openproblems-bio/openproblems/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "dimensionality_reduction", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "diffusion_map", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/dimensionality_reduction/methods/diffusion_map.py" + }, + { + "method_name": "NeuralEE (CPU) (Default)", + "method_summary": "NeuralEE is a neural network implementation of elastic embedding. It is a non-linear method that preserves pairwise distances between data points. NeuralEE uses a neural network to optimize an objective function that measures the difference between pairwise distances in the original high-dimensional space and the two-dimensional space. It is computed on both the recommended input from the package authors of 500 HVGs selected from a logged expression matrix (without sequencing depth scaling) and the default logCPM matrix with 1000 HVGs.", + "paper_name": "NeuralEE: A GPU-Accelerated Elastic Embedding Dimensionality Reduction Method for Visualizing Large-Scale scRNA-Seq Data", + "paper_reference": "xiong2020neuralee", + "paper_year": 2020, + "code_url": "https://github.com/HiBearME/NeuralEE/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-python-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "dimensionality_reduction", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "neuralee_default", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/dimensionality_reduction/methods/neuralee.py" + }, + { + "method_name": "NeuralEE (CPU) (logCP10k, 1kHVG)", + "method_summary": "NeuralEE is a neural network implementation of elastic embedding. It is a non-linear method that preserves pairwise distances between data points. NeuralEE uses a neural network to optimize an objective function that measures the difference between pairwise distances in the original high-dimensional space and the two-dimensional space. It is computed on both the recommended input from the package authors of 500 HVGs selected from a logged expression matrix (without sequencing depth scaling) and the default logCPM matrix with 1000 HVGs.", + "paper_name": "NeuralEE: A GPU-Accelerated Elastic Embedding Dimensionality Reduction Method for Visualizing Large-Scale scRNA-Seq Data", + "paper_reference": "xiong2020neuralee", + "paper_year": 2020, + "code_url": "https://github.com/HiBearME/NeuralEE/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-python-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "dimensionality_reduction", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "neuralee_logCP10k_1kHVG", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/dimensionality_reduction/methods/neuralee.py" + }, + { + "method_name": "PCA (logCP10k)", + "method_summary": "PCA or \"Principal Component Analysis\" is a linear method that finds orthogonal directions in the data that capture the most variance. The first two principal components are chosen as the two-dimensional embedding. We select only the first two principal components as the two-dimensional embedding. PCA is calculated on the logCPM expression matrix with and without selecting 1000 HVGs.", + "paper_name": "On lines and planes of closest fit to systems of points in space", + "paper_reference": "pearson1901pca", + "paper_year": 1901, + "code_url": "https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "dimensionality_reduction", + "commit_sha": "154ccb9fd99113f3d28d9c3f139194539a0290f9", + "method_id": "pca_logCP10k", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/dimensionality_reduction/methods/pca.py" + }, + { + "method_name": "PCA (logCP10k, 1kHVG)", + "method_summary": "PCA or \"Principal Component Analysis\" is a linear method that finds orthogonal directions in the data that capture the most variance. The first two principal components are chosen as the two-dimensional embedding. We select only the first two principal components as the two-dimensional embedding. PCA is calculated on the logCPM expression matrix with and without selecting 1000 HVGs.", + "paper_name": "On lines and planes of closest fit to systems of points in space", + "paper_reference": "pearson1901pca", + "paper_year": 1901, + "code_url": "https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "dimensionality_reduction", + "commit_sha": "154ccb9fd99113f3d28d9c3f139194539a0290f9", + "method_id": "pca_logCP10k_1kHVG", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/dimensionality_reduction/methods/pca.py" + }, + { + "method_name": "PHATE (default)", + "method_summary": "PHATE or \u201cPotential of Heat - diffusion for Affinity - based Transition Embedding\u201d uses the potential of heat diffusion to preserve trajectories in a dataset via a diffusion process. It is an affinity - based method that creates an embedding by finding the dominant eigenvalues of a Markov transition matrix. We evaluate several variants including using the recommended square - root transformed CPM matrix as input, this input with the gamma parameter set to zero and the normal logCPM transformed matrix with and without HVG selection.", + "paper_name": "Visualizing Structure and Transitions in High-Dimensional Biological Data", + "paper_reference": "moon2019visualizing", + "paper_year": 2019, + "code_url": "https://github.com/KrishnaswamyLab/PHATE//tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-python-extras", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "dimensionality_reduction", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "phate_default", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/dimensionality_reduction/methods/phate.py" + }, + { + "method_name": "PHATE (logCP10k, 1kHVG)", + "method_summary": "PHATE or \u201cPotential of Heat - diffusion for Affinity - based Transition Embedding\u201d uses the potential of heat diffusion to preserve trajectories in a dataset via a diffusion process. It is an affinity - based method that creates an embedding by finding the dominant eigenvalues of a Markov transition matrix. We evaluate several variants including using the recommended square - root transformed CPM matrix as input, this input with the gamma parameter set to zero and the normal logCPM transformed matrix with and without HVG selection.", + "paper_name": "Visualizing Structure and Transitions in High-Dimensional Biological Data", + "paper_reference": "moon2019visualizing", + "paper_year": 2019, + "code_url": "https://github.com/KrishnaswamyLab/PHATE//tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-python-extras", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "dimensionality_reduction", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "phate_logCP10k", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/dimensionality_reduction/methods/phate.py" + }, + { + "method_name": "PHATE (logCP10k)", + "method_summary": "PHATE or \u201cPotential of Heat - diffusion for Affinity - based Transition Embedding\u201d uses the potential of heat diffusion to preserve trajectories in a dataset via a diffusion process. It is an affinity - based method that creates an embedding by finding the dominant eigenvalues of a Markov transition matrix. We evaluate several variants including using the recommended square - root transformed CPM matrix as input, this input with the gamma parameter set to zero and the normal logCPM transformed matrix with and without HVG selection.", + "paper_name": "Visualizing Structure and Transitions in High-Dimensional Biological Data", + "paper_reference": "moon2019visualizing", + "paper_year": 2019, + "code_url": "https://github.com/KrishnaswamyLab/PHATE//tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-python-extras", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "dimensionality_reduction", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "phate_logCP10k_1kHVG", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/dimensionality_reduction/methods/phate.py" + }, + { + "method_name": "PHATE (gamma=0)", + "method_summary": "PHATE or \u201cPotential of Heat - diffusion for Affinity - based Transition Embedding\u201d uses the potential of heat diffusion to preserve trajectories in a dataset via a diffusion process. It is an affinity - based method that creates an embedding by finding the dominant eigenvalues of a Markov transition matrix. We evaluate several variants including using the recommended square - root transformed CPM matrix as input, this input with the gamma parameter set to zero and the normal logCPM transformed matrix with and without HVG selection.", + "paper_name": "Visualizing Structure and Transitions in High-Dimensional Biological Data", + "paper_reference": "moon2019visualizing", + "paper_year": 2019, + "code_url": "https://github.com/KrishnaswamyLab/PHATE//tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-python-extras", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "dimensionality_reduction", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "phate_sqrt", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/dimensionality_reduction/methods/phate.py" + }, + { + "method_name": "PyMDE Preserve Distances (logCP10k)", + "method_summary": "PyMDE is a Python implementation of minimum-distortion embedding. It is a non-linear method that preserves distances between cells or neighborhoods in the high-dimensional space. It is computed with options to preserve distances between cells or neighbourhoods and with the logCPM matrix with and without HVG selection as input.", + "paper_name": "Minimum-Distortion Embedding", + "paper_reference": "agrawal2021mde", + "paper_year": 2021, + "code_url": "https://pymde.org//tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-python-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "dimensionality_reduction", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "pymde_distances_log_cp10k", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/dimensionality_reduction/methods/pymde.py" + }, + { + "method_name": "PyMDE Preserve Distances (logCP10k, 1kHVG)", + "method_summary": "PyMDE is a Python implementation of minimum-distortion embedding. It is a non-linear method that preserves distances between cells or neighborhoods in the high-dimensional space. It is computed with options to preserve distances between cells or neighbourhoods and with the logCPM matrix with and without HVG selection as input.", + "paper_name": "Minimum-Distortion Embedding", + "paper_reference": "agrawal2021mde", + "paper_year": 2021, + "code_url": "https://pymde.org//tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-python-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "dimensionality_reduction", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "pymde_distances_log_cp10k_hvg", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/dimensionality_reduction/methods/pymde.py" + }, + { + "method_name": "PyMDE Preserve Neighbors (logCP10k)", + "method_summary": "PyMDE is a Python implementation of minimum-distortion embedding. It is a non-linear method that preserves distances between cells or neighborhoods in the high-dimensional space. It is computed with options to preserve distances between cells or neighbourhoods and with the logCPM matrix with and without HVG selection as input.", + "paper_name": "Minimum-Distortion Embedding", + "paper_reference": "agrawal2021mde", + "paper_year": 2021, + "code_url": "https://pymde.org//tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-python-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "dimensionality_reduction", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "pymde_neighbors_log_cp10k", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/dimensionality_reduction/methods/pymde.py" + }, + { + "method_name": "PyMDE Preserve Neighbors (logCP10k, 1kHVG)", + "method_summary": "PyMDE is a Python implementation of minimum-distortion embedding. It is a non-linear method that preserves distances between cells or neighborhoods in the high-dimensional space. It is computed with options to preserve distances between cells or neighbourhoods and with the logCPM matrix with and without HVG selection as input.", + "paper_name": "Minimum-Distortion Embedding", + "paper_reference": "agrawal2021mde", + "paper_year": 2021, + "code_url": "https://pymde.org//tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-python-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "dimensionality_reduction", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "pymde_neighbors_log_cp10k_hvg", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/dimensionality_reduction/methods/pymde.py" + }, + { + "method_name": "Random Features", + "method_summary": "Randomly generated two-dimensional coordinates from a normal distribution.", + "paper_name": "Open Problems for Single Cell Analysis", + "paper_reference": "openproblems", + "paper_year": 2022, + "code_url": "https://github.com/openproblems-bio/openproblems/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems", + "is_baseline": true, + "code_version": "v1.0.0", + "task_id": "dimensionality_reduction", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "random_features", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/dimensionality_reduction/methods/baseline.py" + }, + { + "method_name": "Spectral Features", + "method_summary": "Use 1000-dimensional diffusions maps as an embedding", + "paper_name": "Open Problems for Single Cell Analysis", + "paper_reference": "openproblems", + "paper_year": 2022, + "code_url": "https://github.com/openproblems-bio/openproblems/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems", + "is_baseline": true, + "code_version": "v1.0.0", + "task_id": "dimensionality_reduction", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "spectral_features", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/dimensionality_reduction/methods/baseline.py" + }, + { + "method_name": "True Features", + "method_summary": "Use of the original feature inputs as the 'embedding'.", + "paper_name": "Open Problems for Single Cell Analysis", + "paper_reference": "openproblems", + "paper_year": 2022, + "code_url": "https://github.com/openproblems-bio/openproblems/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems", + "is_baseline": true, + "code_version": "v1.0.0", + "task_id": "dimensionality_reduction", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "true_features", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/dimensionality_reduction/methods/baseline.py" + }, + { + "method_name": "t-SNE (logCP10k)", + "method_summary": "t-SNE or t-distributed Stochastic Neighbor Embedding converts similarities between data points to joint probabilities and tries to minimize the Kullback-Leibler divergence between the joint probabilities of the low-dimensional embedding and the high-dimensional data. We use the implementation in the scanpy package with the result of PCA on the logCPM expression matrix (with and without HVG selection).", + "paper_name": "Visualizing Data using t-SNE", + "paper_reference": "vandermaaten2008visualizing", + "paper_year": 2008, + "code_url": "https://scikit-learn.org/stable/modules/generated/sklearn.manifold.TSNE.html#sklearn.manifold.TSNE/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-python-extras", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "dimensionality_reduction", + "commit_sha": "154ccb9fd99113f3d28d9c3f139194539a0290f9", + "method_id": "tsne_logCP10k", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/dimensionality_reduction/methods/tsne.py" + }, + { + "method_name": "t-SNE (logCP10k, 1kHVG)", + "method_summary": "t-SNE or t-distributed Stochastic Neighbor Embedding converts similarities between data points to joint probabilities and tries to minimize the Kullback-Leibler divergence between the joint probabilities of the low-dimensional embedding and the high-dimensional data. We use the implementation in the scanpy package with the result of PCA on the logCPM expression matrix (with and without HVG selection).", + "paper_name": "Visualizing Data using t-SNE", + "paper_reference": "vandermaaten2008visualizing", + "paper_year": 2008, + "code_url": "https://scikit-learn.org/stable/modules/generated/sklearn.manifold.TSNE.html#sklearn.manifold.TSNE/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-python-extras", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "dimensionality_reduction", + "commit_sha": "154ccb9fd99113f3d28d9c3f139194539a0290f9", + "method_id": "tsne_logCP10k_1kHVG", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/dimensionality_reduction/methods/tsne.py" + }, + { + "method_name": "UMAP (logCP10k)", + "method_summary": "UMAP or Uniform Manifold Approximation and Projection is an algorithm for dimension reduction based on manifold learning techniques and ideas from topological data analysis. We perform UMAP on the logCPM expression matrix before and after HVG selection and with and without PCA as a pre-processing step.", + "paper_name": "UMAP: Uniform Manifold Approximation and Projection for Dimension Reduction", + "paper_reference": "mcinnes2018umap", + "paper_year": 2018, + "code_url": "https://github.com/lmcinnes/umap/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "dimensionality_reduction", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "umap_logCP10k", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/dimensionality_reduction/methods/umap.py" + }, + { + "method_name": "UMAP (logCP10k, 1kHVG)", + "method_summary": "UMAP or Uniform Manifold Approximation and Projection is an algorithm for dimension reduction based on manifold learning techniques and ideas from topological data analysis. We perform UMAP on the logCPM expression matrix before and after HVG selection and with and without PCA as a pre-processing step.", + "paper_name": "UMAP: Uniform Manifold Approximation and Projection for Dimension Reduction", + "paper_reference": "mcinnes2018umap", + "paper_year": 2018, + "code_url": "https://github.com/lmcinnes/umap/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "dimensionality_reduction", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "umap_logCP10k_1kHVG", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/dimensionality_reduction/methods/umap.py" + }, + { + "method_name": "UMAP PCA (logCP10k)", + "method_summary": "UMAP or Uniform Manifold Approximation and Projection is an algorithm for dimension reduction based on manifold learning techniques and ideas from topological data analysis. We perform UMAP on the logCPM expression matrix before and after HVG selection and with and without PCA as a pre-processing step.", + "paper_name": "UMAP: Uniform Manifold Approximation and Projection for Dimension Reduction", + "paper_reference": "mcinnes2018umap", + "paper_year": 2018, + "code_url": "https://github.com/lmcinnes/umap/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "dimensionality_reduction", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "umap_pca_logCP10k", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/dimensionality_reduction/methods/umap.py" + }, + { + "method_name": "UMAP PCA (logCP10k, 1kHVG)", + "method_summary": "UMAP or Uniform Manifold Approximation and Projection is an algorithm for dimension reduction based on manifold learning techniques and ideas from topological data analysis. We perform UMAP on the logCPM expression matrix before and after HVG selection and with and without PCA as a pre-processing step.", + "paper_name": "UMAP: Uniform Manifold Approximation and Projection for Dimension Reduction", + "paper_reference": "mcinnes2018umap", + "paper_year": 2018, + "code_url": "https://github.com/lmcinnes/umap/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "dimensionality_reduction", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "umap_pca_logCP10k_1kHVG", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/dimensionality_reduction/methods/umap.py" + } ] \ No newline at end of file diff --git a/results/dimensionality_reduction/data/metric_info.json b/results/dimensionality_reduction/data/metric_info.json index ccaa14faf..2298375db 100644 --- a/results/dimensionality_reduction/data/metric_info.json +++ b/results/dimensionality_reduction/data/metric_info.json @@ -1,112 +1,122 @@ [ - { - "metric_name": "continuity", - "metric_summary": "Continuity measures error of hard extrusions based on nearest neighbor coranking", - "paper_reference": "zhang2021pydrmetrics", - "maximize": true, - "image": "openproblems", - "task_id": "dimensionality_reduction", - "commit_sha": "ff1feaf0b741ec05b10084319a1175dfbf5e6faa", - "metric_id": "continuity", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/dimensionality_reduction/metrics/nn_ranking.py" - }, - { - "metric_name": "Density preservation", - "metric_summary": "Similarity between local densities in the high-dimensional data and the reduced data.", - "paper_reference": "narayan2021assessing", - "maximize": true, - "image": "openproblems", - "task_id": "dimensionality_reduction", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "metric_id": "density_preservation", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/dimensionality_reduction/metrics/density.py" - }, - { - "metric_name": "Distance correlation", - "metric_summary": "Spearman correlation between all pairwise Euclidean distances in the original and dimension-reduced data", - "paper_reference": "schober2018correlation", - "maximize": true, - "image": "openproblems", - "task_id": "dimensionality_reduction", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "metric_id": "distance_correlation", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/dimensionality_reduction/metrics/distance_correlation.py" - }, - { - "metric_name": "Distance correlation (spectral)", - "metric_summary": "Spearman correlation between all pairwise diffusion distances in the original and dimension-reduced data", - "paper_reference": "coifman2006diffusion", - "maximize": true, - "image": "openproblems", - "task_id": "dimensionality_reduction", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "metric_id": "distance_correlation_spectral", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/dimensionality_reduction/metrics/distance_correlation.py" - }, - { - "metric_name": "local continuity meta criterion", - "metric_summary": "The local continuity meta criterion is the co-KNN size with baseline removal which favors locality", - "paper_reference": "zhang2021pydrmetrics", - "maximize": true, - "image": "openproblems", - "task_id": "dimensionality_reduction", - "commit_sha": "ff1feaf0b741ec05b10084319a1175dfbf5e6faa", - "metric_id": "lcmc", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/dimensionality_reduction/metrics/nn_ranking.py" - }, - { - "metric_name": "global property", - "metric_summary": "The global property metric is a summary of the global co-KNN", - "paper_reference": "zhang2021pydrmetrics", - "maximize": true, - "image": "openproblems", - "task_id": "dimensionality_reduction", - "commit_sha": "ff1feaf0b741ec05b10084319a1175dfbf5e6faa", - "metric_id": "qglobal", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/dimensionality_reduction/metrics/nn_ranking.py" - }, - { - "metric_name": "local property", - "metric_summary": "The local property metric is a summary of the local co-KNN", - "paper_reference": "zhang2021pydrmetrics", - "maximize": true, - "image": "openproblems", - "task_id": "dimensionality_reduction", - "commit_sha": "ff1feaf0b741ec05b10084319a1175dfbf5e6faa", - "metric_id": "qlocal", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/dimensionality_reduction/metrics/nn_ranking.py" - }, - { - "metric_name": "co-KNN size", - "metric_summary": "co-KNN size counts how many points are in both k-nearest neighbors before and after the dimensionality reduction", - "paper_reference": "zhang2021pydrmetrics", - "maximize": true, - "image": "openproblems", - "task_id": "dimensionality_reduction", - "commit_sha": "ff1feaf0b741ec05b10084319a1175dfbf5e6faa", - "metric_id": "qnn", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/dimensionality_reduction/metrics/nn_ranking.py" - }, - { - "metric_name": "co-KNN AUC", - "metric_summary": "co-KNN AUC is area under the co-KNN curve", - "paper_reference": "zhang2021pydrmetrics", - "maximize": true, - "image": "openproblems", - "task_id": "dimensionality_reduction", - "commit_sha": "ff1feaf0b741ec05b10084319a1175dfbf5e6faa", - "metric_id": "qnn_auc", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/dimensionality_reduction/metrics/nn_ranking.py" - }, - { - "metric_name": "trustworthiness", - "metric_summary": "a measurement of similarity between the rank of each point's nearest neighbors in the high-dimensional data and the reduced data.", - "paper_reference": "venna2001neighborhood", - "maximize": true, - "image": "openproblems", - "task_id": "dimensionality_reduction", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "metric_id": "trustworthiness", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/dimensionality_reduction/metrics/trustworthiness.py" - } + { + "metric_name": "continuity", + "metric_summary": "Continuity measures error of hard extrusions based on nearest neighbor coranking", + "paper_reference": "zhang2021pydrmetrics", + "maximize": true, + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems", + "task_id": "dimensionality_reduction", + "commit_sha": "ff1feaf0b741ec05b10084319a1175dfbf5e6faa", + "metric_id": "continuity", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/dimensionality_reduction/metrics/nn_ranking.py", + "code_version": "v1.0.0" + }, + { + "metric_name": "Density preservation", + "metric_summary": "Similarity between local densities in the high-dimensional data and the reduced data.", + "paper_reference": "narayan2021assessing", + "maximize": true, + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems", + "task_id": "dimensionality_reduction", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "metric_id": "density_preservation", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/dimensionality_reduction/metrics/density.py", + "code_version": "v1.0.0" + }, + { + "metric_name": "Distance correlation", + "metric_summary": "Spearman correlation between all pairwise Euclidean distances in the original and dimension-reduced data", + "paper_reference": "schober2018correlation", + "maximize": true, + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems", + "task_id": "dimensionality_reduction", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "metric_id": "distance_correlation", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/dimensionality_reduction/metrics/distance_correlation.py", + "code_version": "v1.0.0" + }, + { + "metric_name": "Distance correlation (spectral)", + "metric_summary": "Spearman correlation between all pairwise diffusion distances in the original and dimension-reduced data", + "paper_reference": "coifman2006diffusion", + "maximize": true, + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems", + "task_id": "dimensionality_reduction", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "metric_id": "distance_correlation_spectral", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/dimensionality_reduction/metrics/distance_correlation.py", + "code_version": "v1.0.0" + }, + { + "metric_name": "local continuity meta criterion", + "metric_summary": "The local continuity meta criterion is the co-KNN size with baseline removal which favors locality", + "paper_reference": "zhang2021pydrmetrics", + "maximize": true, + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems", + "task_id": "dimensionality_reduction", + "commit_sha": "ff1feaf0b741ec05b10084319a1175dfbf5e6faa", + "metric_id": "lcmc", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/dimensionality_reduction/metrics/nn_ranking.py", + "code_version": "v1.0.0" + }, + { + "metric_name": "global property", + "metric_summary": "The global property metric is a summary of the global co-KNN", + "paper_reference": "zhang2021pydrmetrics", + "maximize": true, + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems", + "task_id": "dimensionality_reduction", + "commit_sha": "ff1feaf0b741ec05b10084319a1175dfbf5e6faa", + "metric_id": "qglobal", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/dimensionality_reduction/metrics/nn_ranking.py", + "code_version": "v1.0.0" + }, + { + "metric_name": "local property", + "metric_summary": "The local property metric is a summary of the local co-KNN", + "paper_reference": "zhang2021pydrmetrics", + "maximize": true, + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems", + "task_id": "dimensionality_reduction", + "commit_sha": "ff1feaf0b741ec05b10084319a1175dfbf5e6faa", + "metric_id": "qlocal", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/dimensionality_reduction/metrics/nn_ranking.py", + "code_version": "v1.0.0" + }, + { + "metric_name": "co-KNN size", + "metric_summary": "co-KNN size counts how many points are in both k-nearest neighbors before and after the dimensionality reduction", + "paper_reference": "zhang2021pydrmetrics", + "maximize": true, + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems", + "task_id": "dimensionality_reduction", + "commit_sha": "ff1feaf0b741ec05b10084319a1175dfbf5e6faa", + "metric_id": "qnn", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/dimensionality_reduction/metrics/nn_ranking.py", + "code_version": "v1.0.0" + }, + { + "metric_name": "co-KNN AUC", + "metric_summary": "co-KNN AUC is area under the co-KNN curve", + "paper_reference": "zhang2021pydrmetrics", + "maximize": true, + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems", + "task_id": "dimensionality_reduction", + "commit_sha": "ff1feaf0b741ec05b10084319a1175dfbf5e6faa", + "metric_id": "qnn_auc", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/dimensionality_reduction/metrics/nn_ranking.py", + "code_version": "v1.0.0" + }, + { + "metric_name": "trustworthiness", + "metric_summary": "a measurement of similarity between the rank of each point's nearest neighbors in the high-dimensional data and the reduced data.", + "paper_reference": "venna2001neighborhood", + "maximize": true, + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems", + "task_id": "dimensionality_reduction", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "metric_id": "trustworthiness", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/dimensionality_reduction/metrics/trustworthiness.py", + "code_version": "v1.0.0" + } ] \ No newline at end of file diff --git a/results/dimensionality_reduction/data/task_info.json b/results/dimensionality_reduction/data/task_info.json index 9a9a8de12..802fc7770 100644 --- a/results/dimensionality_reduction/data/task_info.json +++ b/results/dimensionality_reduction/data/task_info.json @@ -1,8 +1,81 @@ { - "task_id": "dimensionality_reduction", - "commit_sha": "0a0e902bd1482e35418f7816fc91e9bc31a33126", - "task_name": "Dimensionality reduction for visualisation", - "task_summary": "Reduction of high-dimensional datasets to 2D for visualization & interpretation", - "task_description": "\nDimensionality reduction is one of the key challenges in single-cell data\nrepresentation. Routine single-cell RNA sequencing (scRNA-seq) experiments measure cells\nin roughly 20,000-30,000 dimensions (i.e., features - mostly gene transcripts but also\nother functional elements encoded in mRNA such as lncRNAs). Since its inception,\nscRNA-seq experiments have been growing in terms of the number of cells measured.\nOriginally, cutting-edge SmartSeq experiments would yield a few hundred cells, at best.\nNow, it is not uncommon to see experiments that yield over [100,000\ncells](https://openproblems.bio/bibliography#tabula2018single) or even [> 1 million\ncells.](https://openproblems.bio/bibliography#cao2020human)\n\nEach *feature* in a dataset functions as a single dimension. While each of the ~30,000\ndimensions measured in each cell contribute to an underlying data structure, the overall\nstructure of the data is challenging to display in few dimensions due to data sparsity\nand the [*\"curse of\ndimensionality\"*](https://en.wikipedia.org/wiki/Curse_of_dimensionality) (distances in\nhigh dimensional data don\u2019t distinguish data points well). Thus, we need to find a way\nto [dimensionally reduce](https://en.wikipedia.org/wiki/Dimensionality_reduction) the\ndata for visualization and interpretation.\n\n", - "repo": "openproblems-bio/openproblems" -} \ No newline at end of file + "task_id": "dimensionality_reduction", + "commit_sha": "0a0e902bd1482e35418f7816fc91e9bc31a33126", + "task_name": "Dimensionality reduction for visualisation", + "task_summary": "Reduction of high-dimensional datasets to 2D for visualization & interpretation", + "task_description": "\nDimensionality reduction is one of the key challenges in single-cell data\nrepresentation. Routine single-cell RNA sequencing (scRNA-seq) experiments measure cells\nin roughly 20,000-30,000 dimensions (i.e., features - mostly gene transcripts but also\nother functional elements encoded in mRNA such as lncRNAs). Since its inception,\nscRNA-seq experiments have been growing in terms of the number of cells measured.\nOriginally, cutting-edge SmartSeq experiments would yield a few hundred cells, at best.\nNow, it is not uncommon to see experiments that yield over [100,000\ncells](https://openproblems.bio/bibliography#tabula2018single) or even [> 1 million\ncells.](https://openproblems.bio/bibliography#cao2020human)\n\nEach *feature* in a dataset functions as a single dimension. While each of the ~30,000\ndimensions measured in each cell contribute to an underlying data structure, the overall\nstructure of the data is challenging to display in few dimensions due to data sparsity\nand the [*\"curse of\ndimensionality\"*](https://en.wikipedia.org/wiki/Curse_of_dimensionality) (distances in\nhigh dimensional data don’t distinguish data points well). Thus, we need to find a way\nto [dimensionally reduce](https://en.wikipedia.org/wiki/Dimensionality_reduction) the\ndata for visualization and interpretation.\n\n", + "repo": "https://github.com/openproblems-bio/openproblems/tree/v1.0.0/openproblems/tasks/dimensionality_reduction", + "authors": [ + { + "name": "Luke Zappia", + "roles": ["maintainer", "author"], + "info": { + "github": "lazappi", + "orcid": "0000-0001-7744-8565" + } + }, + { + "name": "Michael Vinyard", + "roles": "author", + "info": { + "github": "mvinyard" + } + }, + { + "name": "Michal Klein", + "roles": "author", + "info": { + "github": "michalk8" + } + }, + { + "name": "Scott Gigante", + "roles": "author", + "info": { + "github": "scottgigante", + "orcid": "0000-0002-4544-2764" + } + }, + { + "name": "Ben DeMeo", + "roles": "author", + "info": { + "github": "bendemeo" + } + }, + { + "name": "Robrecht Cannoodt", + "roles": "author", + "info": { + "github": "rcannood", + "orcid": "0000-0003-3641-729X" + } + }, + { + "name": "Kai Waldrant", + "roles": "contributor", + "info": { + "github": "KaiWaldrant", + "orcid": "0009-0003-8555-1361" + } + }, + { + "name": "Sai Nirmayi Yasa", + "roles": "contributor", + "info": { + "github": "sainirmayi", + "orcid": "0009-0003-6319-9803" + } + }, + { + "name": "Juan A. Cordero Varela", + "roles": "contributor", + "info": { + "github": "jacorvar", + "orcid": "0000-0002-7373-5433" + } + } + ], + "version": "v1.0.0", + "license": "MIT" +} diff --git a/results/dimensionality_reduction/index.qmd b/results/dimensionality_reduction/index.qmd index 9125c1432..0e3da35d4 100644 --- a/results/dimensionality_reduction/index.qmd +++ b/results/dimensionality_reduction/index.qmd @@ -7,6 +7,9 @@ css: ../_include/task_template.css engine: knitr fig-cap-location: bottom citation-location: document +bibliography: + - library.bib + - ../../library.bib toc: false --- diff --git a/results/label_projection/data/method_info.json b/results/label_projection/data/method_info.json index 22c7ade44..e61c44a20 100644 --- a/results/label_projection/data/method_info.json +++ b/results/label_projection/data/method_info.json @@ -1,272 +1,272 @@ [ - { - "method_name": "K-neighbors classifier (log CP10k)", - "method_summary": "K-neighbors classifier uses the \"k-nearest neighbours\" approach, which is a popular machine learning algorithm for classification and regression tasks. The assumption underlying KNN in this context is that cells with similar gene expression profiles tend to belong to the same cell type. For each unlabelled cell, this method computes the $k$ labelled cells (in this case, 5) with the smallest distance in PCA space, and assigns that cell the most common cell type among its $k$ nearest neighbors.", - "paper_name": "Nearest neighbor pattern classification", - "paper_reference": "cover1967nearest", - "paper_year": 1967, - "code_url": "https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KNeighborsClassifier.html", - "image": "openproblems", - "is_baseline": false, - "code_version": null, - "task_id": "label_projection", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "knn_classifier_log_cp10k", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/label_projection/methods/knn_classifier.py" - }, - { - "method_name": "K-neighbors classifier (log scran)", - "method_summary": "K-neighbors classifier uses the \"k-nearest neighbours\" approach, which is a popular machine learning algorithm for classification and regression tasks. The assumption underlying KNN in this context is that cells with similar gene expression profiles tend to belong to the same cell type. For each unlabelled cell, this method computes the $k$ labelled cells (in this case, 5) with the smallest distance in PCA space, and assigns that cell the most common cell type among its $k$ nearest neighbors.", - "paper_name": "Nearest neighbor pattern classification", - "paper_reference": "cover1967nearest", - "paper_year": 1967, - "code_url": "https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KNeighborsClassifier.html", - "image": "openproblems-r-base", - "is_baseline": false, - "code_version": null, - "task_id": "label_projection", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "knn_classifier_scran", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/label_projection/methods/knn_classifier.py" - }, - { - "method_name": "Logistic regression (log CP10k)", - "method_summary": "Logistic Regression estimates parameters of a logistic function for multivariate classification tasks. Here, we use 100-dimensional whitened PCA coordinates as independent variables, and the model minimises the cross entropy loss over all cell type classes. ", - "paper_name": "Applied Logistic Regression", - "paper_reference": "hosmer2013applied", - "paper_year": 2013, - "code_url": "https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html", - "image": "openproblems", - "is_baseline": false, - "code_version": null, - "task_id": "label_projection", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "logistic_regression_log_cp10k", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/label_projection/methods/logistic_regression.py" - }, - { - "method_name": "Logistic regression (log scran)", - "method_summary": "Logistic Regression estimates parameters of a logistic function for multivariate classification tasks. Here, we use 100-dimensional whitened PCA coordinates as independent variables, and the model minimises the cross entropy loss over all cell type classes. ", - "paper_name": "Applied Logistic Regression", - "paper_reference": "hosmer2013applied", - "paper_year": 2013, - "code_url": "https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html", - "image": "openproblems-r-base", - "is_baseline": false, - "code_version": null, - "task_id": "label_projection", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "logistic_regression_scran", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/label_projection/methods/logistic_regression.py" - }, - { - "method_name": "Majority Vote", - "method_summary": "Assignment of all predicted labels as the most common label in the training data", - "paper_name": "Open Problems for Single Cell Analysis", - "paper_reference": "openproblems", - "paper_year": 2022, - "code_url": "https://github.com/openproblems-bio/openproblems", - "image": "openproblems", - "is_baseline": false, - "code_version": null, - "task_id": "label_projection", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "majority_vote", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/label_projection/methods/baseline.py" - }, - { - "method_name": "Multilayer perceptron (log CP10k)", - "method_summary": "MLP or \"Multi-Layer Perceptron\" is a type of artificial neural network that consists of multiple layers of interconnected neurons. Each neuron computes a weighted sum of all neurons in the previous layer and transforms it with nonlinear activation function. The output layer provides the final prediction, and network weights are updated by gradient descent to minimize the cross entropy loss. Here, the input data is 100-dimensional whitened PCA coordinates for each cell, and we use two hidden layers of 100 neurons each.", - "paper_name": "Connectionist learning procedures", - "paper_reference": "hinton1989connectionist", - "paper_year": 1990, - "code_url": "https://scikit-learn.org/stable/modules/generated/sklearn.neural_network.MLPClassifier.html", - "image": "openproblems", - "is_baseline": false, - "code_version": null, - "task_id": "label_projection", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "mlp_log_cp10k", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/label_projection/methods/mlp.py" - }, - { - "method_name": "Multilayer perceptron (log scran)", - "method_summary": "MLP or \"Multi-Layer Perceptron\" is a type of artificial neural network that consists of multiple layers of interconnected neurons. Each neuron computes a weighted sum of all neurons in the previous layer and transforms it with nonlinear activation function. The output layer provides the final prediction, and network weights are updated by gradient descent to minimize the cross entropy loss. Here, the input data is 100-dimensional whitened PCA coordinates for each cell, and we use two hidden layers of 100 neurons each.", - "paper_name": "Connectionist learning procedures", - "paper_reference": "hinton1989connectionist", - "paper_year": 1990, - "code_url": "https://scikit-learn.org/stable/modules/generated/sklearn.neural_network.MLPClassifier.html", - "image": "openproblems-r-base", - "is_baseline": false, - "code_version": null, - "task_id": "label_projection", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "mlp_scran", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/label_projection/methods/mlp.py" - }, - { - "method_name": "Random Labels", - "method_summary": "Random assignment of predicted labels proportionate to label abundance in training data", - "paper_name": "Open Problems for Single Cell Analysis", - "paper_reference": "openproblems", - "paper_year": 2022, - "code_url": "https://github.com/openproblems-bio/openproblems", - "image": "openproblems", - "is_baseline": true, - "code_version": null, - "task_id": "label_projection", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "random_labels", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/label_projection/methods/baseline.py" - }, - { - "method_name": "scANVI (All genes)", - "method_summary": "scANVI or \"single-cell ANnotation using Variational Inference\" is a semi-supervised variant of the scVI(Lopez et al. 2018) algorithm. Like scVI, scANVI uses deep neural networks and stochastic optimization to model uncertainty caused by technical noise and bias in single - cell transcriptomics measurements. However, scANVI also leverages cell type labels in the generative modelling. In this approach, scANVI is used to predict the cell type labels of the unlabelled test data.", - "paper_name": "Probabilistic harmonization and annotation of single-cell transcriptomics data with deep generative models", - "paper_reference": "xu2021probabilistic", - "paper_year": 2021, - "code_url": "https://github.com/YosefLab/scvi-tools", - "image": "openproblems-python-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "label_projection", - "commit_sha": "cef4e5cac0b51d454d45e22e354988e77540c40d", - "method_id": "scanvi_all_genes", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/label_projection/methods/scvi_tools.py" - }, - { - "method_name": "scANVI (Seurat v3 2000 HVG)", - "method_summary": "scANVI or \"single-cell ANnotation using Variational Inference\" is a semi-supervised variant of the scVI(Lopez et al. 2018) algorithm. Like scVI, scANVI uses deep neural networks and stochastic optimization to model uncertainty caused by technical noise and bias in single - cell transcriptomics measurements. However, scANVI also leverages cell type labels in the generative modelling. In this approach, scANVI is used to predict the cell type labels of the unlabelled test data.", - "paper_name": "Probabilistic harmonization and annotation of single-cell transcriptomics data with deep generative models", - "paper_reference": "xu2021probabilistic", - "paper_year": 2021, - "code_url": "https://github.com/YosefLab/scvi-tools", - "image": "openproblems-python-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "label_projection", - "commit_sha": "cef4e5cac0b51d454d45e22e354988e77540c40d", - "method_id": "scanvi_hvg", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/label_projection/methods/scvi_tools.py" - }, - { - "method_name": "scArches+scANVI (All genes)", - "method_summary": "scArches+scANVI or \"Single-cell architecture surgery\" is a deep learning method for mapping new datasets onto a pre-existing reference model, using transfer learning and parameter optimization. It first uses scANVI to build a reference model from the training data, and then apply scArches to map the test data onto the reference model and make predictions.", - "paper_name": "Query to reference single-cell integration with transfer learning", - "paper_reference": "lotfollahi2020query", - "paper_year": 2021, - "code_url": "https://github.com/YosefLab/scvi-tools", - "image": "openproblems-python-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "label_projection", - "commit_sha": "cef4e5cac0b51d454d45e22e354988e77540c40d", - "method_id": "scarches_scanvi_all_genes", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/label_projection/methods/scvi_tools.py" - }, - { - "method_name": "scArches+scANVI (Seurat v3 2000 HVG)", - "method_summary": "scArches+scANVI or \"Single-cell architecture surgery\" is a deep learning method for mapping new datasets onto a pre-existing reference model, using transfer learning and parameter optimization. It first uses scANVI to build a reference model from the training data, and then apply scArches to map the test data onto the reference model and make predictions.", - "paper_name": "Query to reference single-cell integration with transfer learning", - "paper_reference": "lotfollahi2020query", - "paper_year": 2021, - "code_url": "https://github.com/YosefLab/scvi-tools", - "image": "openproblems-python-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "label_projection", - "commit_sha": "cef4e5cac0b51d454d45e22e354988e77540c40d", - "method_id": "scarches_scanvi_hvg", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/label_projection/methods/scvi_tools.py" - }, - { - "method_name": "scArches+scANVI+xgboost (All genes)", - "method_summary": "scArches+scANVI or \"Single-cell architecture surgery\" is a deep learning method for mapping new datasets onto a pre-existing reference model, using transfer learning and parameter optimization. It first uses scANVI to build a reference model from the training data, and then apply scArches to map the test data onto the reference model and make predictions.", - "paper_name": "Query to reference single-cell integration with transfer learning", - "paper_reference": "lotfollahi2020query", - "paper_year": 2021, - "code_url": "https://github.com/YosefLab/scvi-tools", - "image": "openproblems-python-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "label_projection", - "commit_sha": "cef4e5cac0b51d454d45e22e354988e77540c40d", - "method_id": "scarches_scanvi_xgb_all_genes", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/label_projection/methods/scvi_tools.py" - }, - { - "method_name": "scArches+scANVI+xgboost (Seurat v3 2000 HVG)", - "method_summary": "scArches+scANVI or \"Single-cell architecture surgery\" is a deep learning method for mapping new datasets onto a pre-existing reference model, using transfer learning and parameter optimization. It first uses scANVI to build a reference model from the training data, and then apply scArches to map the test data onto the reference model and make predictions.", - "paper_name": "Query to reference single-cell integration with transfer learning", - "paper_reference": "lotfollahi2020query", - "paper_year": 2021, - "code_url": "https://github.com/YosefLab/scvi-tools", - "image": "openproblems-python-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "label_projection", - "commit_sha": "cef4e5cac0b51d454d45e22e354988e77540c40d", - "method_id": "scarches_scanvi_xgb_hvg", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/label_projection/methods/scvi_tools.py" - }, - { - "method_name": "Seurat reference mapping (SCTransform)", - "method_summary": "Seurat reference mapping is a cell type label transfer method provided by the Seurat package. Gene expression counts are first normalised by SCTransform before computing PCA. Then it finds mutual nearest neighbours, known as transfer anchors, between the labelled and unlabelled part of the data in PCA space, and computes each cell\u2019s distance to each of the anchor pairs. Finally, it uses the labelled anchors to predict cell types for unlabelled cells based on these distances.", - "paper_name": "Integrated analysis of multimodal single-cell data", - "paper_reference": "hao2021integrated", - "paper_year": 2021, - "code_url": "https://github.com/satijalab/seurat", - "image": "openproblems-r-extras", - "is_baseline": false, - "code_version": null, - "task_id": "label_projection", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "seurat", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/label_projection/methods/seurat.py" - }, - { - "method_name": "True Labels", - "method_summary": "Perfect assignment of the predicted labels from the test labels", - "paper_name": "Open Problems for Single Cell Analysis", - "paper_reference": "openproblems", - "paper_year": 2022, - "code_url": "https://github.com/openproblems-bio/openproblems", - "image": "openproblems", - "is_baseline": true, - "code_version": null, - "task_id": "label_projection", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "true_labels", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/label_projection/methods/baseline.py" - }, - { - "method_name": "XGBoost (log CP10k)", - "method_summary": "XGBoost is a gradient boosting decision tree model that learns multiple tree structures in the form of a series of input features and their values, leading to a prediction decision, and averages predictions from all its trees. Here, input features are normalised gene expression values.", - "paper_name": "XGBoost: A Scalable Tree Boosting System", - "paper_reference": "chen2016xgboost", - "paper_year": 2016, - "code_url": "https://xgboost.readthedocs.io/en/stable/index.html", - "image": "openproblems-python-extras", - "is_baseline": false, - "code_version": null, - "task_id": "label_projection", - "commit_sha": "cef4e5cac0b51d454d45e22e354988e77540c40d", - "method_id": "xgboost_log_cp10k", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/label_projection/methods/xgboost.py" - }, - { - "method_name": "XGBoost (log scran)", - "method_summary": "XGBoost is a gradient boosting decision tree model that learns multiple tree structures in the form of a series of input features and their values, leading to a prediction decision, and averages predictions from all its trees. Here, input features are normalised gene expression values.", - "paper_name": "XGBoost: A Scalable Tree Boosting System", - "paper_reference": "chen2016xgboost", - "paper_year": 2016, - "code_url": "https://xgboost.readthedocs.io/en/stable/index.html", - "image": "openproblems-r-extras", - "is_baseline": false, - "code_version": null, - "task_id": "label_projection", - "commit_sha": "cef4e5cac0b51d454d45e22e354988e77540c40d", - "method_id": "xgboost_scran", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/label_projection/methods/xgboost.py" - } + { + "method_name": "K-neighbors classifier (log CP10k)", + "method_summary": "K-neighbors classifier uses the \"k-nearest neighbours\" approach, which is a popular machine learning algorithm for classification and regression tasks. The assumption underlying KNN in this context is that cells with similar gene expression profiles tend to belong to the same cell type. For each unlabelled cell, this method computes the $k$ labelled cells (in this case, 5) with the smallest distance in PCA space, and assigns that cell the most common cell type among its $k$ nearest neighbors.", + "paper_name": "Nearest neighbor pattern classification", + "paper_reference": "cover1967nearest", + "paper_year": 1967, + "code_url": "https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KNeighborsClassifier.html/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "label_projection", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "knn_classifier_log_cp10k", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/label_projection/methods/knn_classifier.py" + }, + { + "method_name": "K-neighbors classifier (log scran)", + "method_summary": "K-neighbors classifier uses the \"k-nearest neighbours\" approach, which is a popular machine learning algorithm for classification and regression tasks. The assumption underlying KNN in this context is that cells with similar gene expression profiles tend to belong to the same cell type. For each unlabelled cell, this method computes the $k$ labelled cells (in this case, 5) with the smallest distance in PCA space, and assigns that cell the most common cell type among its $k$ nearest neighbors.", + "paper_name": "Nearest neighbor pattern classification", + "paper_reference": "cover1967nearest", + "paper_year": 1967, + "code_url": "https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KNeighborsClassifier.html/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-base", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "label_projection", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "knn_classifier_scran", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/label_projection/methods/knn_classifier.py" + }, + { + "method_name": "Logistic regression (log CP10k)", + "method_summary": "Logistic Regression estimates parameters of a logistic function for multivariate classification tasks. Here, we use 100-dimensional whitened PCA coordinates as independent variables, and the model minimises the cross entropy loss over all cell type classes. ", + "paper_name": "Applied Logistic Regression", + "paper_reference": "hosmer2013applied", + "paper_year": 2013, + "code_url": "https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "label_projection", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "logistic_regression_log_cp10k", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/label_projection/methods/logistic_regression.py" + }, + { + "method_name": "Logistic regression (log scran)", + "method_summary": "Logistic Regression estimates parameters of a logistic function for multivariate classification tasks. Here, we use 100-dimensional whitened PCA coordinates as independent variables, and the model minimises the cross entropy loss over all cell type classes. ", + "paper_name": "Applied Logistic Regression", + "paper_reference": "hosmer2013applied", + "paper_year": 2013, + "code_url": "https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-base", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "label_projection", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "logistic_regression_scran", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/label_projection/methods/logistic_regression.py" + }, + { + "method_name": "Majority Vote", + "method_summary": "Assignment of all predicted labels as the most common label in the training data", + "paper_name": "Open Problems for Single Cell Analysis", + "paper_reference": "openproblems", + "paper_year": 2022, + "code_url": "https://github.com/openproblems-bio/openproblems/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "label_projection", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "majority_vote", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/label_projection/methods/baseline.py" + }, + { + "method_name": "Multilayer perceptron (log CP10k)", + "method_summary": "MLP or \"Multi-Layer Perceptron\" is a type of artificial neural network that consists of multiple layers of interconnected neurons. Each neuron computes a weighted sum of all neurons in the previous layer and transforms it with nonlinear activation function. The output layer provides the final prediction, and network weights are updated by gradient descent to minimize the cross entropy loss. Here, the input data is 100-dimensional whitened PCA coordinates for each cell, and we use two hidden layers of 100 neurons each.", + "paper_name": "Connectionist learning procedures", + "paper_reference": "hinton1989connectionist", + "paper_year": 1990, + "code_url": "https://scikit-learn.org/stable/modules/generated/sklearn.neural_network.MLPClassifier.html/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "label_projection", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "mlp_log_cp10k", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/label_projection/methods/mlp.py" + }, + { + "method_name": "Multilayer perceptron (log scran)", + "method_summary": "MLP or \"Multi-Layer Perceptron\" is a type of artificial neural network that consists of multiple layers of interconnected neurons. Each neuron computes a weighted sum of all neurons in the previous layer and transforms it with nonlinear activation function. The output layer provides the final prediction, and network weights are updated by gradient descent to minimize the cross entropy loss. Here, the input data is 100-dimensional whitened PCA coordinates for each cell, and we use two hidden layers of 100 neurons each.", + "paper_name": "Connectionist learning procedures", + "paper_reference": "hinton1989connectionist", + "paper_year": 1990, + "code_url": "https://scikit-learn.org/stable/modules/generated/sklearn.neural_network.MLPClassifier.html/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-base", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "label_projection", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "mlp_scran", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/label_projection/methods/mlp.py" + }, + { + "method_name": "Random Labels", + "method_summary": "Random assignment of predicted labels proportionate to label abundance in training data", + "paper_name": "Open Problems for Single Cell Analysis", + "paper_reference": "openproblems", + "paper_year": 2022, + "code_url": "https://github.com/openproblems-bio/openproblems/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems", + "is_baseline": true, + "code_version": "v1.0.0", + "task_id": "label_projection", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "random_labels", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/label_projection/methods/baseline.py" + }, + { + "method_name": "scANVI (All genes)", + "method_summary": "scANVI or \"single-cell ANnotation using Variational Inference\" is a semi-supervised variant of the scVI(Lopez et al. 2018) algorithm. Like scVI, scANVI uses deep neural networks and stochastic optimization to model uncertainty caused by technical noise and bias in single - cell transcriptomics measurements. However, scANVI also leverages cell type labels in the generative modelling. In this approach, scANVI is used to predict the cell type labels of the unlabelled test data.", + "paper_name": "Probabilistic harmonization and annotation of single-cell transcriptomics data with deep generative models", + "paper_reference": "xu2021probabilistic", + "paper_year": 2021, + "code_url": "https://github.com/YosefLab/scvi-tools/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-python-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "label_projection", + "commit_sha": "cef4e5cac0b51d454d45e22e354988e77540c40d", + "method_id": "scanvi_all_genes", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/label_projection/methods/scvi_tools.py" + }, + { + "method_name": "scANVI (Seurat v3 2000 HVG)", + "method_summary": "scANVI or \"single-cell ANnotation using Variational Inference\" is a semi-supervised variant of the scVI(Lopez et al. 2018) algorithm. Like scVI, scANVI uses deep neural networks and stochastic optimization to model uncertainty caused by technical noise and bias in single - cell transcriptomics measurements. However, scANVI also leverages cell type labels in the generative modelling. In this approach, scANVI is used to predict the cell type labels of the unlabelled test data.", + "paper_name": "Probabilistic harmonization and annotation of single-cell transcriptomics data with deep generative models", + "paper_reference": "xu2021probabilistic", + "paper_year": 2021, + "code_url": "https://github.com/YosefLab/scvi-tools/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-python-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "label_projection", + "commit_sha": "cef4e5cac0b51d454d45e22e354988e77540c40d", + "method_id": "scanvi_hvg", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/label_projection/methods/scvi_tools.py" + }, + { + "method_name": "scArches+scANVI (All genes)", + "method_summary": "scArches+scANVI or \"Single-cell architecture surgery\" is a deep learning method for mapping new datasets onto a pre-existing reference model, using transfer learning and parameter optimization. It first uses scANVI to build a reference model from the training data, and then apply scArches to map the test data onto the reference model and make predictions.", + "paper_name": "Query to reference single-cell integration with transfer learning", + "paper_reference": "lotfollahi2020query", + "paper_year": 2021, + "code_url": "https://github.com/YosefLab/scvi-tools/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-python-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "label_projection", + "commit_sha": "cef4e5cac0b51d454d45e22e354988e77540c40d", + "method_id": "scarches_scanvi_all_genes", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/label_projection/methods/scvi_tools.py" + }, + { + "method_name": "scArches+scANVI (Seurat v3 2000 HVG)", + "method_summary": "scArches+scANVI or \"Single-cell architecture surgery\" is a deep learning method for mapping new datasets onto a pre-existing reference model, using transfer learning and parameter optimization. It first uses scANVI to build a reference model from the training data, and then apply scArches to map the test data onto the reference model and make predictions.", + "paper_name": "Query to reference single-cell integration with transfer learning", + "paper_reference": "lotfollahi2020query", + "paper_year": 2021, + "code_url": "https://github.com/YosefLab/scvi-tools/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-python-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "label_projection", + "commit_sha": "cef4e5cac0b51d454d45e22e354988e77540c40d", + "method_id": "scarches_scanvi_hvg", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/label_projection/methods/scvi_tools.py" + }, + { + "method_name": "scArches+scANVI+xgboost (All genes)", + "method_summary": "scArches+scANVI or \"Single-cell architecture surgery\" is a deep learning method for mapping new datasets onto a pre-existing reference model, using transfer learning and parameter optimization. It first uses scANVI to build a reference model from the training data, and then apply scArches to map the test data onto the reference model and make predictions.", + "paper_name": "Query to reference single-cell integration with transfer learning", + "paper_reference": "lotfollahi2020query", + "paper_year": 2021, + "code_url": "https://github.com/YosefLab/scvi-tools/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-python-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "label_projection", + "commit_sha": "cef4e5cac0b51d454d45e22e354988e77540c40d", + "method_id": "scarches_scanvi_xgb_all_genes", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/label_projection/methods/scvi_tools.py" + }, + { + "method_name": "scArches+scANVI+xgboost (Seurat v3 2000 HVG)", + "method_summary": "scArches+scANVI or \"Single-cell architecture surgery\" is a deep learning method for mapping new datasets onto a pre-existing reference model, using transfer learning and parameter optimization. It first uses scANVI to build a reference model from the training data, and then apply scArches to map the test data onto the reference model and make predictions.", + "paper_name": "Query to reference single-cell integration with transfer learning", + "paper_reference": "lotfollahi2020query", + "paper_year": 2021, + "code_url": "https://github.com/YosefLab/scvi-tools/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-python-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "label_projection", + "commit_sha": "cef4e5cac0b51d454d45e22e354988e77540c40d", + "method_id": "scarches_scanvi_xgb_hvg", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/label_projection/methods/scvi_tools.py" + }, + { + "method_name": "Seurat reference mapping (SCTransform)", + "method_summary": "Seurat reference mapping is a cell type label transfer method provided by the Seurat package. Gene expression counts are first normalised by SCTransform before computing PCA. Then it finds mutual nearest neighbours, known as transfer anchors, between the labelled and unlabelled part of the data in PCA space, and computes each cell\u2019s distance to each of the anchor pairs. Finally, it uses the labelled anchors to predict cell types for unlabelled cells based on these distances.", + "paper_name": "Integrated analysis of multimodal single-cell data", + "paper_reference": "hao2021integrated", + "paper_year": 2021, + "code_url": "https://github.com/satijalab/seurat/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-extras", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "label_projection", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "seurat", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/label_projection/methods/seurat.py" + }, + { + "method_name": "True Labels", + "method_summary": "Perfect assignment of the predicted labels from the test labels", + "paper_name": "Open Problems for Single Cell Analysis", + "paper_reference": "openproblems", + "paper_year": 2022, + "code_url": "https://github.com/openproblems-bio/openproblems/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems", + "is_baseline": true, + "code_version": "v1.0.0", + "task_id": "label_projection", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "true_labels", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/label_projection/methods/baseline.py" + }, + { + "method_name": "XGBoost (log CP10k)", + "method_summary": "XGBoost is a gradient boosting decision tree model that learns multiple tree structures in the form of a series of input features and their values, leading to a prediction decision, and averages predictions from all its trees. Here, input features are normalised gene expression values.", + "paper_name": "XGBoost: A Scalable Tree Boosting System", + "paper_reference": "chen2016xgboost", + "paper_year": 2016, + "code_url": "https://xgboost.readthedocs.io/en/stable/index.html/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-python-extras", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "label_projection", + "commit_sha": "cef4e5cac0b51d454d45e22e354988e77540c40d", + "method_id": "xgboost_log_cp10k", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/label_projection/methods/xgboost.py" + }, + { + "method_name": "XGBoost (log scran)", + "method_summary": "XGBoost is a gradient boosting decision tree model that learns multiple tree structures in the form of a series of input features and their values, leading to a prediction decision, and averages predictions from all its trees. Here, input features are normalised gene expression values.", + "paper_name": "XGBoost: A Scalable Tree Boosting System", + "paper_reference": "chen2016xgboost", + "paper_year": 2016, + "code_url": "https://xgboost.readthedocs.io/en/stable/index.html/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-extras", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "label_projection", + "commit_sha": "cef4e5cac0b51d454d45e22e354988e77540c40d", + "method_id": "xgboost_scran", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/label_projection/methods/xgboost.py" + } ] \ No newline at end of file diff --git a/results/label_projection/data/metric_info.json b/results/label_projection/data/metric_info.json index f76330411..07b08336e 100644 --- a/results/label_projection/data/metric_info.json +++ b/results/label_projection/data/metric_info.json @@ -1,35 +1,38 @@ [ - { - "metric_name": "Accuracy", - "metric_summary": "Average number of correctly applied labels.", - "paper_reference": "grandini2020metrics", - "maximize": true, - "image": "openproblems", - "task_id": "label_projection", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "metric_id": "accuracy", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/label_projection/metrics/accuracy.py" - }, - { - "metric_name": "F1 score", - "metric_summary": "The [F1 score](https://scikit-learn.org/stable/modules/generated/sklearn.metrics.f1_score.html) is a weighted average of the precision and recall over all class labels, where an F1 score reaches its best value at 1 and worst score at 0, where each class contributes to the score relative to its frequency in the dataset.", - "paper_reference": "grandini2020metrics", - "maximize": true, - "image": "openproblems", - "task_id": "label_projection", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "metric_id": "f1", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/label_projection/metrics/f1.py" - }, - { - "metric_name": "Macro F1 score", - "metric_summary": "The macro F1 score is an unweighted F1 score, where each class contributes equally, regardless of its frequency.", - "paper_reference": "grandini2020metrics", - "maximize": true, - "image": "openproblems", - "task_id": "label_projection", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "metric_id": "f1_macro", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/label_projection/metrics/f1.py" - } + { + "metric_name": "Accuracy", + "metric_summary": "Average number of correctly applied labels.", + "paper_reference": "grandini2020metrics", + "maximize": true, + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems", + "task_id": "label_projection", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "metric_id": "accuracy", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/label_projection/metrics/accuracy.py", + "code_version": "v1.0.0" + }, + { + "metric_name": "F1 score", + "metric_summary": "The [F1 score](https://scikit-learn.org/stable/modules/generated/sklearn.metrics.f1_score.html) is a weighted average of the precision and recall over all class labels, where an F1 score reaches its best value at 1 and worst score at 0, where each class contributes to the score relative to its frequency in the dataset.", + "paper_reference": "grandini2020metrics", + "maximize": true, + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems", + "task_id": "label_projection", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "metric_id": "f1", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/label_projection/metrics/f1.py", + "code_version": "v1.0.0" + }, + { + "metric_name": "Macro F1 score", + "metric_summary": "The macro F1 score is an unweighted F1 score, where each class contributes equally, regardless of its frequency.", + "paper_reference": "grandini2020metrics", + "maximize": true, + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems", + "task_id": "label_projection", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "metric_id": "f1_macro", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/label_projection/metrics/f1.py", + "code_version": "v1.0.0" + } ] \ No newline at end of file diff --git a/results/label_projection/data/task_info.json b/results/label_projection/data/task_info.json index 7e8e534a9..cd2fd192e 100644 --- a/results/label_projection/data/task_info.json +++ b/results/label_projection/data/task_info.json @@ -1,8 +1,35 @@ { - "task_id": "label_projection", - "commit_sha": "c97decf07adb2e3050561d6fa9ae46132be07bef", - "task_name": "Label Projection", - "task_summary": "Automated cell type annotation from rich, labeled reference data", - "task_description": "\nA major challenge for integrating single cell datasets is creating matching cell type\nannotations for each cell. One of the most common strategies for annotating cell types\nis referred to as\n[\"cluster-then-annotate\"](https://openproblems.bio/bibliography#kiselev2019challenges) whereby\ncells are aggregated into clusters based on feature similarity and then manually\ncharacterized based on differential gene expression or previously identified marker\ngenes. Recently, methods have emerged to build on this strategy and annotate cells\nusing [known marker genes](https://openproblems.bio/bibliography#pliner2019supervised). However,\nthese strategies pose a difficulty for integrating atlas-scale datasets as the\nparticular annotations may not match.\n\nTo ensure that the cell type labels in newly generated datasets match existing reference\ndatasets, some methods align cells to a previously annotated [reference\ndataset](https://openproblems.bio/bibliography#hou2019scmatch) and then\n_project_ labels from the reference to the new dataset.\n\nHere, we compare methods for annotation based on a reference dataset. The datasets\nconsist of two or more samples of single cell profiles that have been manually annotated\nwith matching labels. These datasets are then split into training and test batches, and\nthe task of each method is to train a cell type classifer on the training set and\nproject those labels onto the test set.\n\n", - "repo": "openproblems-bio/openproblems" -} \ No newline at end of file + "task_id": "label_projection", + "commit_sha": "c97decf07adb2e3050561d6fa9ae46132be07bef", + "task_name": "Label Projection", + "task_summary": "Automated cell type annotation from rich, labeled reference data", + "task_description": "\nA major challenge for integrating single cell datasets is creating matching cell type\nannotations for each cell. One of the most common strategies for annotating cell types\nis referred to as\n[\"cluster-then-annotate\"](https://openproblems.bio/bibliography#kiselev2019challenges) whereby\ncells are aggregated into clusters based on feature similarity and then manually\ncharacterized based on differential gene expression or previously identified marker\ngenes. Recently, methods have emerged to build on this strategy and annotate cells\nusing [known marker genes](https://openproblems.bio/bibliography#pliner2019supervised). However,\nthese strategies pose a difficulty for integrating atlas-scale datasets as the\nparticular annotations may not match.\n\nTo ensure that the cell type labels in newly generated datasets match existing reference\ndatasets, some methods align cells to a previously annotated [reference\ndataset](https://openproblems.bio/bibliography#hou2019scmatch) and then\n_project_ labels from the reference to the new dataset.\n\nHere, we compare methods for annotation based on a reference dataset. The datasets\nconsist of two or more samples of single cell profiles that have been manually annotated\nwith matching labels. These datasets are then split into training and test batches, and\nthe task of each method is to train a cell type classifer on the training set and\nproject those labels onto the test set.\n\n", + "repo": "https://github.com/openproblems-bio/openproblems/tree/v1.0.0/openproblems/tasks/label_projection", + "authors": [ + { + "name": "Nikolay Markov", + "roles": ["author", "maintainer"], + "info": { + "github": "mxposed" + } + }, + { + "name": "Scott Gigante", + "roles": "author", + "info": { + "github": "scottgigante", + "orcid": "0000-0002-4544-2764" + } + }, + { + "name": "Robrecht Cannoodt", + "roles": "author", + "info": { + "github": "rcannood", + "orcid": "0000-0003-3641-729X" + } + } + ], + "version": "v1.0.0", + "license": "MIT" +} diff --git a/results/label_projection/index.qmd b/results/label_projection/index.qmd index 01ff7bdfe..622476929 100644 --- a/results/label_projection/index.qmd +++ b/results/label_projection/index.qmd @@ -7,6 +7,9 @@ css: ../_include/task_template.css engine: knitr fig-cap-location: bottom citation-location: document +bibliography: + - library.bib + - ../../library.bib toc: false --- diff --git a/results/matching_modalities/data/method_info.json b/results/matching_modalities/data/method_info.json index a72e21ec1..9dfb0c8e1 100644 --- a/results/matching_modalities/data/method_info.json +++ b/results/matching_modalities/data/method_info.json @@ -1,107 +1,107 @@ [ - { - "method_name": "Harmonic Alignment (log scran)", - "method_summary": "Harmonic alignment embeds cellular data from each modality into a common space by computing a mapping between the 100-dimensional diffusion maps of each modality. This mapping is computed by computing an isometric transformation of the eigenmaps, and concatenating the resulting diffusion maps together into a joint 200-dimensional space. This joint diffusion map space is used as output for the task.", - "paper_name": "Harmonic Alignment", - "paper_reference": "stanley2020harmonic", - "paper_year": 2020, - "code_url": "https://github.com/KrishnaswamyLab/harmonic-alignment", - "image": "openproblems-r-extras", - "is_baseline": false, - "code_version": null, - "task_id": "matching_modalities", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "harmonic_alignment_log_scran_pooling", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/matching_modalities/methods/harmonic_alignment.py" - }, - { - "method_name": "Harmonic Alignment (sqrt CP10k)", - "method_summary": "Harmonic alignment embeds cellular data from each modality into a common space by computing a mapping between the 100-dimensional diffusion maps of each modality. This mapping is computed by computing an isometric transformation of the eigenmaps, and concatenating the resulting diffusion maps together into a joint 200-dimensional space. This joint diffusion map space is used as output for the task.", - "paper_name": "Harmonic Alignment", - "paper_reference": "stanley2020harmonic", - "paper_year": 2020, - "code_url": "https://github.com/KrishnaswamyLab/harmonic-alignment", - "image": "openproblems-python-extras", - "is_baseline": false, - "code_version": null, - "task_id": "matching_modalities", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "harmonic_alignment_sqrt_cp10k", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/matching_modalities/methods/harmonic_alignment.py" - }, - { - "method_name": "Mutual Nearest Neighbors (log CP10k)", - "method_summary": "Mutual nearest neighbors (MNN) embeds cellular data from each modality into a common space by computing a mapping between modality-specific 100-dimensional SVD embeddings. The embeddings are integrated using the FastMNN version of the MNN algorithm, which generates an embedding of the second modality mapped to the SVD space of the first. This corrected joint SVD space is used as output for the task.", - "paper_name": "Batch effects in single-cell RNA-sequencing data are corrected by matching mutual nearest neighbors", - "paper_reference": "haghverdi2018batch", - "paper_year": 2018, - "code_url": "https://github.com/LTLA/batchelor", - "image": "openproblems-r-extras", - "is_baseline": false, - "code_version": null, - "task_id": "matching_modalities", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "mnn_log_cp10k", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/matching_modalities/methods/mnn.py" - }, - { - "method_name": "Mutual Nearest Neighbors (log scran)", - "method_summary": "Mutual nearest neighbors (MNN) embeds cellular data from each modality into a common space by computing a mapping between modality-specific 100-dimensional SVD embeddings. The embeddings are integrated using the FastMNN version of the MNN algorithm, which generates an embedding of the second modality mapped to the SVD space of the first. This corrected joint SVD space is used as output for the task.", - "paper_name": "Batch effects in single-cell RNA-sequencing data are corrected by matching mutual nearest neighbors", - "paper_reference": "haghverdi2018batch", - "paper_year": 2018, - "code_url": "https://github.com/LTLA/batchelor", - "image": "openproblems-r-extras", - "is_baseline": false, - "code_version": null, - "task_id": "matching_modalities", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "mnn_log_scran_pooling", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/matching_modalities/methods/mnn.py" - }, - { - "method_name": "Procrustes superimposition", - "method_summary": "Procrustes superimposition embeds cellular data from each modality into a common space by aligning the 100-dimensional SVD embeddings to one another by using an isomorphic transformation that minimizes the root mean squared distance between points. The unmodified SVD embedding and the transformed second modality are used as output for the task.", - "paper_name": "Generalized Procrustes analysis", - "paper_reference": "gower1975generalized", - "paper_year": 1975, - "code_url": "https://docs.scipy.org/doc/scipy/reference/generated/scipy.spatial.procrustes.html", - "image": "openproblems", - "is_baseline": false, - "code_version": null, - "task_id": "matching_modalities", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "procrustes", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/matching_modalities/methods/procrustes.py" - }, - { - "method_name": "Random Features", - "method_summary": "20-dimensional SVD is computed on the first modality, and is then randomly permuted twice, once for use as the output for each modality, producing random features with no correlation between modalities.", - "paper_name": "Open Problems for Single Cell Analysis", - "paper_reference": "openproblems", - "paper_year": 2022, - "code_url": "https://github.com/openproblems-bio/openproblems", - "image": "openproblems", - "is_baseline": true, - "code_version": null, - "task_id": "matching_modalities", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "random_features", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/matching_modalities/methods/baseline.py" - }, - { - "method_name": "True Features", - "method_summary": "20-dimensional SVD is computed on the first modality, and this same embedding is used as output for both modalities, producing perfectly aligned features from each modality.", - "paper_name": "Open Problems for Single Cell Analysis", - "paper_reference": "openproblems", - "paper_year": 2022, - "code_url": "https://github.com/openproblems-bio/openproblems", - "image": "openproblems", - "is_baseline": true, - "code_version": null, - "task_id": "matching_modalities", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "true_features", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/matching_modalities/methods/baseline.py" - } + { + "method_name": "Harmonic Alignment (log scran)", + "method_summary": "Harmonic alignment embeds cellular data from each modality into a common space by computing a mapping between the 100-dimensional diffusion maps of each modality. This mapping is computed by computing an isometric transformation of the eigenmaps, and concatenating the resulting diffusion maps together into a joint 200-dimensional space. This joint diffusion map space is used as output for the task.", + "paper_name": "Harmonic Alignment", + "paper_reference": "stanley2020harmonic", + "paper_year": 2020, + "code_url": "https://github.com/KrishnaswamyLab/harmonic-alignment/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-extras", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "matching_modalities", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "harmonic_alignment_log_scran_pooling", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/matching_modalities/methods/harmonic_alignment.py" + }, + { + "method_name": "Harmonic Alignment (sqrt CP10k)", + "method_summary": "Harmonic alignment embeds cellular data from each modality into a common space by computing a mapping between the 100-dimensional diffusion maps of each modality. This mapping is computed by computing an isometric transformation of the eigenmaps, and concatenating the resulting diffusion maps together into a joint 200-dimensional space. This joint diffusion map space is used as output for the task.", + "paper_name": "Harmonic Alignment", + "paper_reference": "stanley2020harmonic", + "paper_year": 2020, + "code_url": "https://github.com/KrishnaswamyLab/harmonic-alignment/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-python-extras", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "matching_modalities", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "harmonic_alignment_sqrt_cp10k", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/matching_modalities/methods/harmonic_alignment.py" + }, + { + "method_name": "Mutual Nearest Neighbors (log CP10k)", + "method_summary": "Mutual nearest neighbors (MNN) embeds cellular data from each modality into a common space by computing a mapping between modality-specific 100-dimensional SVD embeddings. The embeddings are integrated using the FastMNN version of the MNN algorithm, which generates an embedding of the second modality mapped to the SVD space of the first. This corrected joint SVD space is used as output for the task.", + "paper_name": "Batch effects in single-cell RNA-sequencing data are corrected by matching mutual nearest neighbors", + "paper_reference": "haghverdi2018batch", + "paper_year": 2018, + "code_url": "https://github.com/LTLA/batchelor/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-extras", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "matching_modalities", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "mnn_log_cp10k", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/matching_modalities/methods/mnn.py" + }, + { + "method_name": "Mutual Nearest Neighbors (log scran)", + "method_summary": "Mutual nearest neighbors (MNN) embeds cellular data from each modality into a common space by computing a mapping between modality-specific 100-dimensional SVD embeddings. The embeddings are integrated using the FastMNN version of the MNN algorithm, which generates an embedding of the second modality mapped to the SVD space of the first. This corrected joint SVD space is used as output for the task.", + "paper_name": "Batch effects in single-cell RNA-sequencing data are corrected by matching mutual nearest neighbors", + "paper_reference": "haghverdi2018batch", + "paper_year": 2018, + "code_url": "https://github.com/LTLA/batchelor/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-extras", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "matching_modalities", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "mnn_log_scran_pooling", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/matching_modalities/methods/mnn.py" + }, + { + "method_name": "Procrustes superimposition", + "method_summary": "Procrustes superimposition embeds cellular data from each modality into a common space by aligning the 100-dimensional SVD embeddings to one another by using an isomorphic transformation that minimizes the root mean squared distance between points. The unmodified SVD embedding and the transformed second modality are used as output for the task.", + "paper_name": "Generalized Procrustes analysis", + "paper_reference": "gower1975generalized", + "paper_year": 1975, + "code_url": "https://docs.scipy.org/doc/scipy/reference/generated/scipy.spatial.procrustes.html/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "matching_modalities", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "procrustes", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/matching_modalities/methods/procrustes.py" + }, + { + "method_name": "Random Features", + "method_summary": "20-dimensional SVD is computed on the first modality, and is then randomly permuted twice, once for use as the output for each modality, producing random features with no correlation between modalities.", + "paper_name": "Open Problems for Single Cell Analysis", + "paper_reference": "openproblems", + "paper_year": 2022, + "code_url": "https://github.com/openproblems-bio/openproblems/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems", + "is_baseline": true, + "code_version": "v1.0.0", + "task_id": "matching_modalities", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "random_features", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/matching_modalities/methods/baseline.py" + }, + { + "method_name": "True Features", + "method_summary": "20-dimensional SVD is computed on the first modality, and this same embedding is used as output for both modalities, producing perfectly aligned features from each modality.", + "paper_name": "Open Problems for Single Cell Analysis", + "paper_reference": "openproblems", + "paper_year": 2022, + "code_url": "https://github.com/openproblems-bio/openproblems/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems", + "is_baseline": true, + "code_version": "v1.0.0", + "task_id": "matching_modalities", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "true_features", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/matching_modalities/methods/baseline.py" + } ] \ No newline at end of file diff --git a/results/matching_modalities/data/metric_info.json b/results/matching_modalities/data/metric_info.json index d85fad520..aecb73c76 100644 --- a/results/matching_modalities/data/metric_info.json +++ b/results/matching_modalities/data/metric_info.json @@ -1,24 +1,26 @@ [ - { - "metric_name": "kNN Area Under the Curve", - "metric_summary": "Let $f(i) \u2208 F$ be the scRNA-seq measurement of cell $i$, and $g(i) \u2208 G$ be the scATAC- seq measurement of cell $i$. kNN-AUC calculates the average percentage overlap of neighborhoods of $f(i)$ in $F$ with neighborhoods of $g(i)$ in $G$. Higher is better.", - "paper_reference": "stanley2020harmonic", - "maximize": true, - "image": "openproblems", - "task_id": "matching_modalities", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "metric_id": "knn_auc", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/matching_modalities/metrics/knn_auc.py" - }, - { - "metric_name": "Mean squared error", - "metric_summary": "Mean squared error (MSE) is the average distance between each pair of matched observations of the same cell in the learned latent space. Lower is better.", - "paper_reference": "lance2022multimodal", - "maximize": false, - "image": "openproblems", - "task_id": "matching_modalities", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "metric_id": "mse", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/matching_modalities/metrics/mse.py" - } + { + "metric_name": "kNN Area Under the Curve", + "metric_summary": "Let $f(i) \u2208 F$ be the scRNA-seq measurement of cell $i$, and $g(i) \u2208 G$ be the scATAC- seq measurement of cell $i$. kNN-AUC calculates the average percentage overlap of neighborhoods of $f(i)$ in $F$ with neighborhoods of $g(i)$ in $G$. Higher is better.", + "paper_reference": "stanley2020harmonic", + "maximize": true, + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems", + "task_id": "matching_modalities", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "metric_id": "knn_auc", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/matching_modalities/metrics/knn_auc.py", + "code_version": "v1.0.0" + }, + { + "metric_name": "Mean squared error", + "metric_summary": "Mean squared error (MSE) is the average distance between each pair of matched observations of the same cell in the learned latent space. Lower is better.", + "paper_reference": "lance2022multimodal", + "maximize": false, + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems", + "task_id": "matching_modalities", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "metric_id": "mse", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/matching_modalities/metrics/mse.py", + "code_version": "v1.0.0" + } ] \ No newline at end of file diff --git a/results/matching_modalities/index.qmd b/results/matching_modalities/index.qmd index 3cc890971..f5d8aa982 100644 --- a/results/matching_modalities/index.qmd +++ b/results/matching_modalities/index.qmd @@ -7,6 +7,9 @@ css: ../_include/task_template.css engine: knitr fig-cap-location: bottom citation-location: document +bibliography: + - library.bib + - ../../library.bib toc: false --- diff --git a/results/perturbation_prediction/data/method_info.json b/results/perturbation_prediction/data/method_info.json index 8ef7afd81..4ebe2a4cd 100644 --- a/results/perturbation_prediction/data/method_info.json +++ b/results/perturbation_prediction/data/method_info.json @@ -6,10 +6,13 @@ "method_summary": "Returns the ground truth predictions.", "method_description": "The identity function that returns the ground-truth information as the output.\n", "is_baseline": true, - "paper_reference": null, - "code_url": null, - "implementation_url": "https://github.com/openproblems-bio/task_perturbation_prediction/blob/2fa44462b1e7d530bad703c4a20ed22b49d3705e/src/control_methods/ground_truth/config.vsh.yaml", - "code_version": null, + "references_doi": null, + "references_bibtex": null, + "code_url": "https://github.com/openproblems-bio/task_perturbation_prediction", + "documentation_url": null, + "image": "https://ghcr.io/openproblems-bio/task_perturbation_prediction/control_methods/ground_truth:build_main", + "implementation_url": "https://github.com/openproblems-bio/task_perturbation_prediction/blob/2fa44462b1e7d530bad703c4a20ed22b49d3705e/src/control_methods/ground_truth", + "code_version": "build_main", "commit_sha": "2fa44462b1e7d530bad703c4a20ed22b49d3705e" }, { @@ -19,10 +22,13 @@ "method_summary": "Baseline method that returns mean of gene's outcomes", "method_description": "Baseline method that predicts for a gene the mean of its outcomes of all samples.\n", "is_baseline": true, - "paper_reference": null, - "code_url": null, - "implementation_url": "https://github.com/openproblems-bio/task_perturbation_prediction/blob/2fa44462b1e7d530bad703c4a20ed22b49d3705e/src/control_methods/mean_outcome/config.vsh.yaml", - "code_version": null, + "references_doi": null, + "references_bibtex": null, + "code_url": "https://github.com/openproblems-bio/task_perturbation_prediction", + "documentation_url": null, + "image": "https://ghcr.io/openproblems-bio/task_perturbation_prediction/control_methods/mean_outcome:build_main", + "implementation_url": "https://github.com/openproblems-bio/task_perturbation_prediction/blob/2fa44462b1e7d530bad703c4a20ed22b49d3705e/src/control_methods/mean_outcome", + "code_version": "build_main", "commit_sha": "2fa44462b1e7d530bad703c4a20ed22b49d3705e" }, { @@ -32,10 +38,13 @@ "method_summary": "Baseline method that returns mean of cell type's outcomes", "method_description": "Baseline method that predicts for a cell type the mean of its outcomes of all compounds.\n", "is_baseline": true, - "paper_reference": null, - "code_url": null, - "implementation_url": "https://github.com/openproblems-bio/task_perturbation_prediction/blob/2fa44462b1e7d530bad703c4a20ed22b49d3705e/src/control_methods/mean_across_celltypes/config.vsh.yaml", - "code_version": null, + "references_doi": null, + "references_bibtex": null, + "code_url": "https://github.com/openproblems-bio/task_perturbation_prediction", + "documentation_url": null, + "image": "https://ghcr.io/openproblems-bio/task_perturbation_prediction/control_methods/mean_across_celltypes:build_main", + "implementation_url": "https://github.com/openproblems-bio/task_perturbation_prediction/blob/2fa44462b1e7d530bad703c4a20ed22b49d3705e/src/control_methods/mean_across_celltypes", + "code_version": "build_main", "commit_sha": "2fa44462b1e7d530bad703c4a20ed22b49d3705e" }, { @@ -45,10 +54,13 @@ "method_summary": "Baseline method that returns mean of compound's outcomes", "method_description": "Baseline method that predicts for a compound the mean of its outcomes of all samples.\n", "is_baseline": true, - "paper_reference": null, - "code_url": null, - "implementation_url": "https://github.com/openproblems-bio/task_perturbation_prediction/blob/2fa44462b1e7d530bad703c4a20ed22b49d3705e/src/control_methods/mean_across_compounds/config.vsh.yaml", - "code_version": null, + "references_doi": null, + "references_bibtex": null, + "code_url": "https://github.com/openproblems-bio/task_perturbation_prediction", + "documentation_url": null, + "image": "https://ghcr.io/openproblems-bio/task_perturbation_prediction/control_methods/mean_across_compounds:build_main", + "implementation_url": "https://github.com/openproblems-bio/task_perturbation_prediction/blob/2fa44462b1e7d530bad703c4a20ed22b49d3705e/src/control_methods/mean_across_compounds", + "code_version": "build_main", "commit_sha": "2fa44462b1e7d530bad703c4a20ed22b49d3705e" }, { @@ -58,10 +70,13 @@ "method_summary": "Sample predictions from the training data", "method_description": "This method samples the training data to generate predictions.\n", "is_baseline": true, - "paper_reference": null, - "code_url": null, - "implementation_url": "https://github.com/openproblems-bio/task_perturbation_prediction/blob/2fa44462b1e7d530bad703c4a20ed22b49d3705e/src/control_methods/sample/config.vsh.yaml", - "code_version": null, + "references_doi": null, + "references_bibtex": null, + "code_url": "https://github.com/openproblems-bio/task_perturbation_prediction", + "documentation_url": null, + "image": "https://ghcr.io/openproblems-bio/task_perturbation_prediction/control_methods/sample:build_main", + "implementation_url": "https://github.com/openproblems-bio/task_perturbation_prediction/blob/2fa44462b1e7d530bad703c4a20ed22b49d3705e/src/control_methods/sample", + "code_version": "build_main", "commit_sha": "2fa44462b1e7d530bad703c4a20ed22b49d3705e" }, { @@ -71,10 +86,13 @@ "method_summary": "Baseline method that predicts all zeros", "method_description": "Baseline method that predicts all zeros.\n", "is_baseline": true, - "paper_reference": null, - "code_url": null, - "implementation_url": "https://github.com/openproblems-bio/task_perturbation_prediction/blob/2fa44462b1e7d530bad703c4a20ed22b49d3705e/src/control_methods/zeros/config.vsh.yaml", - "code_version": null, + "references_doi": null, + "references_bibtex": null, + "code_url": "https://github.com/openproblems-bio/task_perturbation_prediction", + "documentation_url": null, + "image": "https://ghcr.io/openproblems-bio/task_perturbation_prediction/control_methods/zeros:build_main", + "implementation_url": "https://github.com/openproblems-bio/task_perturbation_prediction/blob/2fa44462b1e7d530bad703c4a20ed22b49d3705e/src/control_methods/zeros", + "code_version": "build_main", "commit_sha": "2fa44462b1e7d530bad703c4a20ed22b49d3705e" }, { @@ -84,10 +102,13 @@ "method_summary": "An ensemble of LSTM, GRU, and 1D CNN models", "method_description": "An ensemble of LSTM, GRU, and 1D CNN models with a variety of input features derived from ChemBERTa embeddings,\none-hot encoding of cell type/small molecule pairs, and various statistical measures of target gene expression.\nThe models were trained with a combination of MSE, MAE, LogCosh, and BCE loss functions to improve their\nrobustness and predictive performance. The approach also included data augmentation techniques to ensure\ngeneralization and account for noise in the data.\n", "is_baseline": false, - "paper_reference": null, - "code_url": null, - "implementation_url": "https://github.com/openproblems-bio/task_perturbation_prediction/blob/2fa44462b1e7d530bad703c4a20ed22b49d3705e/src/methods/lgc_ensemble/config.vsh.yaml", - "code_version": null, + "references_doi": null, + "references_bibtex": null, + "code_url": "https://github.com/Jean-KOUAGOU/1st-place-solution-single-cell-pbs/tree/main", + "documentation_url": "https://www.kaggle.com/competitions/open-problems-single-cell-perturbations/discussion/459258", + "image": "https://github.com/orgs/openproblems-bio/packages?repo_name=task_perturbation_prediction&q=methods/lgc_ensemble", + "implementation_url": "https://github.com/openproblems-bio/task_perturbation_prediction/blob/2fa44462b1e7d530bad703c4a20ed22b49d3705e/src/methods/lgc_ensemble", + "code_version": "build_main", "commit_sha": "2fa44462b1e7d530bad703c4a20ed22b49d3705e" }, { @@ -97,10 +118,13 @@ "method_summary": "Neural networks with pseudolabeling and ensemble modelling", "method_description": "The prediction system is two staged, so I publish two versions of the notebook.\nThe first stage predicts pseudolabels. To be honest, if I stopped on this version, I would not be the third.\nThe predicted pseudolabels on all test data (255 rows) are added to training in the second stage.\n\n**Stage 1 preparing pseudolabels**: The main part of this system is a neural network. Every neural network and its environment was optimized by optuna. Hyperparameters that have been optimized:\na dropout value, a number of neurons in particular layers, an output dimension of an embedding layer, a number of epochs, a learning rate, a batch size, a number of dimension of truncated singular value decomposition.\nThe optimization was done on custom 4-folds cross validation. In order to avoid overfitting to cross validation by optuna I applied 2 repeats for every fold and took an average. Generally, the more, the better. The optuna's criterion was MRRMSE.\nFinally, 7 models were ensembled. Optuna was applied again to determine best weights of linear combination. The prediction of test set is the pseudolabels now and will be used in second stage.\n\n**Stage 2 retraining with pseudolabels**: The pseudolabels (255 rows) were added to the training dataset. I applied 20 models with optimized parameters in different experiments for a model diversity.\nOptuna selected optimal weights for the linear combination of the prediction again.\nModels had high variance, so every model was trained 10 times on all dataset and the median of prediction is taken as a final prediction. The prediction was additionally clipped to colwise min and max. \n", "is_baseline": false, - "paper_reference": null, - "code_url": null, - "implementation_url": "https://github.com/openproblems-bio/task_perturbation_prediction/blob/2fa44462b1e7d530bad703c4a20ed22b49d3705e/src/methods/nn_retraining_with_pseudolabels/config.vsh.yaml", - "code_version": null, + "references_doi": null, + "references_bibtex": null, + "code_url": "https://github.com/okon2000/single_cell_perturbations", + "documentation_url": "https://www.kaggle.com/competitions/open-problems-single-cell-perturbations/discussion/458750", + "image": "https://ghcr.io/openproblems-bio/task_perturbation_prediction/methods/nn_retraining_with_pseudolabels:build_main", + "implementation_url": "https://github.com/openproblems-bio/task_perturbation_prediction/blob/2fa44462b1e7d530bad703c4a20ed22b49d3705e/src/methods/nn_retraining_with_pseudolabels", + "code_version": "build_main", "commit_sha": "2fa44462b1e7d530bad703c4a20ed22b49d3705e" }, { @@ -110,10 +134,13 @@ "method_summary": "Deep learning architecture composed of 2 modules: a sample-centric MLP and a gene-centric MLP", "method_description": "We first encode each sample using leave-one-out encoder based on compound and cell type. This produces X with the dimension of n_samples, n_genes, n_encode,\nwhere n_encode is 2. Then, X is passed to a MLP1 sample-wise with input of n_samples, n_genes*n_encode, which outputs the same dimension data.\nThe purpose of this MLP is to learn inter-gene relationships. Then, we group the output of MLP1 with X (original encoded data) and feed it\nto MLP2 which receives n_smaples*n_genes, (n_encode + n_encode) and results n_samples*n_genes. This MLP2 trains on each (compound, cell_type, gene)\ncombination. This is to overcome the underdetermination problem due to lack of sufficient (compound, cell_type) samples.\n", "is_baseline": false, - "paper_reference": null, - "code_url": null, - "implementation_url": "https://github.com/openproblems-bio/task_perturbation_prediction/blob/2fa44462b1e7d530bad703c4a20ed22b49d3705e/src/methods/jn_ap_op2/config.vsh.yaml", - "code_version": null, + "references_doi": null, + "references_bibtex": null, + "code_url": "https://github.com/AntoinePassemiers/Open-Challenges-Single-Cell-Perturbations", + "documentation_url": "https://www.kaggle.com/competitions/open-problems-single-cell-perturbations/discussion/461159", + "image": "https://ghcr.io/openproblems-bio/task_perturbation_prediction/methods/jn_ap_op2:build_main", + "implementation_url": "https://github.com/openproblems-bio/task_perturbation_prediction/blob/2fa44462b1e7d530bad703c4a20ed22b49d3705e/src/methods/jn_ap_op2", + "code_version": "build_main", "commit_sha": "2fa44462b1e7d530bad703c4a20ed22b49d3705e" }, { @@ -123,10 +150,13 @@ "method_summary": "Neural network model for drug effect prediction", "method_description": "ScAPE is utilises a neural network (NN) model to estimate drug effects on gene expression in\nperipheral blood mononuclear cells (PBMCs). The model took drug and cell features as input,\nwith these features primarily derived from the median of signed log-pvalues and log fold-changes\ngrouped by drug and cell type. The NN was trained using a leave-one-drug-out cross-validation\nstrategy, focusing on NK cells as a representative cell type due to their similarity to B cells\nand Myeloid cells in principal component analysis. Model performance was evaluated by comparing\nits predictions against two baselines: predicting zero effect and predicting the median\nlog-pvalue for each drug. The final submission combined predictions from models trained on\ndifferent gene and drug subsets, aiming to enhance overall prediction accuracy.\n", "is_baseline": false, - "paper_reference": null, - "code_url": null, - "implementation_url": "https://github.com/openproblems-bio/task_perturbation_prediction/blob/2fa44462b1e7d530bad703c4a20ed22b49d3705e/src/methods/scape/config.vsh.yaml", - "code_version": null, + "references_doi": null, + "references_bibtex": null, + "code_url": "https://github.com/scapeML/scape", + "documentation_url": "https://docs.google.com/document/d/1w0GIJ8VoQx3HEJNmLXoU-Y_STB-h5-bXusL80_6EVuU/edit", + "image": "https://ghcr.io/openproblems-bio/task_perturbation_prediction/methods/scape:build_main", + "implementation_url": "https://github.com/openproblems-bio/task_perturbation_prediction/blob/2fa44462b1e7d530bad703c4a20ed22b49d3705e/src/methods/scape", + "code_version": "build_main", "commit_sha": "2fa44462b1e7d530bad703c4a20ed22b49d3705e" }, { @@ -136,10 +166,13 @@ "method_summary": "An ensemble of four transformer models, trained on diverse feature sets, with a cluster-based sampling strategy and robust validation for optimal performance.", "method_description": "This method employs an ensemble of four transformer models,\neach with different weights and trained on slightly varying feature sets.\nThe feature engineering process involved one-hot encoding of categorical labels,\ntarget encoding using mean and standard deviation, and enriching the feature set\nwith the standard deviation of target variables. Additionally, the dataset was\ncarefully examined to ensure data cleanliness. A sophisticated sampling strategy\nbased on K-Means clustering was employed to partition the data into training and\nvalidation sets, ensuring a representative distribution. The model architecture\nleveraged sparse and dense feature encoding, along with a transformer for effective\nlearning.\n", "is_baseline": false, - "paper_reference": null, - "code_url": null, - "implementation_url": "https://github.com/openproblems-bio/task_perturbation_prediction/blob/2fa44462b1e7d530bad703c4a20ed22b49d3705e/src/methods/transformer_ensemble/config.vsh.yaml", - "code_version": null, + "references_doi": null, + "references_bibtex": null, + "code_url": "https://github.com/Eliorkalfon/single_cell_pb", + "documentation_url": "https://www.kaggle.com/competitions/open-problems-single-cell-perturbations/discussion/458738", + "image": "https://ghcr.io/openproblems-bio/task_perturbation_prediction/methods/transformer_ensemble:build_main", + "implementation_url": "https://github.com/openproblems-bio/task_perturbation_prediction/blob/2fa44462b1e7d530bad703c4a20ed22b49d3705e/src/methods/transformer_ensemble", + "code_version": "build_main", "commit_sha": "2fa44462b1e7d530bad703c4a20ed22b49d3705e" }, { @@ -149,10 +182,13 @@ "method_summary": "Py-boost predicting t-scores", "method_description": "An ensemble of four models was considered: \n\n* Py-boost (a ridge regression-based recommender system)\n* ExtraTrees (a decision tree ensemble with target-encoded features)\n* a k-nearest neighbors recommender system\n* a ridge regression model\n\nEach model offered distinct strengths and weaknesses: ExtraTrees and\nknn were unable to extrapolate beyond the training data, while ridge\nregression provided extrapolation capability. To enhance model performance,\ndata augmentation techniques were used, including averaging differential\nexpressions for compound mixtures and adjusting cell counts to reduce biases.\n\nIn the end, only the py-boost model is used for generating predictions.\n", "is_baseline": false, - "paper_reference": null, - "code_url": null, - "implementation_url": "https://github.com/openproblems-bio/task_perturbation_prediction/blob/2fa44462b1e7d530bad703c4a20ed22b49d3705e/src/methods/pyboost/config.vsh.yaml", - "code_version": null, + "references_doi": null, + "references_bibtex": null, + "code_url": "https://github.com/Ambros-M/Single-Cell-Perturbations-2023", + "documentation_url": "https://www.kaggle.com/competitions/open-problems-single-cell-perturbations/discussion/458661", + "image": "https://ghcr.io/openproblems-bio/task_perturbation_prediction/methods/pyboost:build_main", + "implementation_url": "https://github.com/openproblems-bio/task_perturbation_prediction/blob/2fa44462b1e7d530bad703c4a20ed22b49d3705e/src/methods/pyboost", + "code_version": "build_main", "commit_sha": "2fa44462b1e7d530bad703c4a20ed22b49d3705e" } ] diff --git a/results/perturbation_prediction/data/metric_info.json b/results/perturbation_prediction/data/metric_info.json index a50c2a496..6007d893c 100644 --- a/results/perturbation_prediction/data/metric_info.json +++ b/results/perturbation_prediction/data/metric_info.json @@ -5,11 +5,11 @@ "metric_name": "Mean Rowwise RMSE", "metric_summary": "The mean of the root mean squared error (RMSE) of each row in the matrix.", "metric_description": "We use the **Mean Rowwise Root Mean Squared Error** to score submissions, computed as follows:\n\n$$\n\\textrm{MRRMSE} = \\frac{1}{R}\\sum_{i=1}^R\\left(\\frac{1}{n} \\sum_{j=1}^{n} (y_{ij} - \\widehat{y}_{ij})^2\\right)^{1/2}\n$$\n\nwhere $(R)$ is the number of scored rows, and $(y_{ij})$ and $(\\widehat{y}_{ij})$ are the actual and predicted values, respectively, for row $(i)$ and column $(j)$, and $(n)$ bis the number of columns.\n", - "paper_reference": { - "bibtex": "@article{slazata2024benchmark,\n title = {A benchmark for prediction of transcriptomic responses to chemical perturbations across cell types},\n author = {Artur Szałata and Andrew Benz and Robrecht Cannoodt and Mauricio Cortes and Jason Fong and Sunil Kuppasani and Richard Lieberman and Tianyu Liu and Javier A. Mas-Rosario and Rico Meinl and Jalil Nourisa and Jared Tumiel and Tin M. Tunjic and Mengbo Wang and Noah Weber and Hongyu Zhao and Benedict Anchang and Fabian J Theis and Malte D Luecken and Daniel B Burkhardt},\n booktitle = {The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\n year = {2024},\n url = {https://openreview.net/forum?id=WTI4RJYSVm}\n}\n" - }, - "implementation_url": "https://github.com/openproblems-bio/task_perturbation_prediction/blob/2fa44462b1e7d530bad703c4a20ed22b49d3705e/src/metrics/mean_rowwise_error/config.vsh.yaml", - "code_version": null, + "references_doi": null, + "references_bibtex": "@article{slazata2024benchmark,\n title = {A benchmark for prediction of transcriptomic responses to chemical perturbations across cell types},\n author = {Artur Szałata and Andrew Benz and Robrecht Cannoodt and Mauricio Cortes and Jason Fong and Sunil Kuppasani and Richard Lieberman and Tianyu Liu and Javier A. Mas-Rosario and Rico Meinl and Jalil Nourisa and Jared Tumiel and Tin M. Tunjic and Mengbo Wang and Noah Weber and Hongyu Zhao and Benedict Anchang and Fabian J Theis and Malte D Luecken and Daniel B Burkhardt},\n booktitle = {The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\n year = {2024},\n url = {https://openreview.net/forum?id=WTI4RJYSVm}\n}\n", + "implementation_url": "https://github.com/openproblems-bio/task_perturbation_prediction/blob/2fa44462b1e7d530bad703c4a20ed22b49d3705e/src/metrics/mean_rowwise_error", + "image": "https://ghcr.io/openproblems-bio/task_perturbation_prediction/metrics/mean_rowwise_error:build_main", + "code_version": "build_main", "commit_sha": "2fa44462b1e7d530bad703c4a20ed22b49d3705e", "maximize": false }, @@ -19,11 +19,11 @@ "metric_name": "Mean Rowwise MAE", "metric_summary": "The mean of the absolute error (MAE) of each row in the matrix.", "metric_description": "We use the **Mean Rowwise Absolute Error** to score submissions, computed as follows:\n\n$$\n\\textrm{MRMAE} = \\frac{1}{R}\\sum_{i=1}^R\\left(\\frac{1}{n} \\sum_{j=1}^{n} |y_{ij} - \\widehat{y}_{ij}|\\right)\n$$\n\nwhere $(R)$ is the number of scored rows, and $(y_{ij})$ and $(\\widehat{y}_{ij})$ are the actual and predicted values, respectively, for row $(i)$ and column $(j)$, and $(n)$ bis the number of columns.\n", - "paper_reference": { - "bibtex": "@article{slazata2024benchmark,\n title = {A benchmark for prediction of transcriptomic responses to chemical perturbations across cell types},\n author = {Artur Szałata and Andrew Benz and Robrecht Cannoodt and Mauricio Cortes and Jason Fong and Sunil Kuppasani and Richard Lieberman and Tianyu Liu and Javier A. Mas-Rosario and Rico Meinl and Jalil Nourisa and Jared Tumiel and Tin M. Tunjic and Mengbo Wang and Noah Weber and Hongyu Zhao and Benedict Anchang and Fabian J Theis and Malte D Luecken and Daniel B Burkhardt},\n booktitle = {The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\n year = {2024},\n url = {https://openreview.net/forum?id=WTI4RJYSVm}\n}\n" - }, - "implementation_url": "https://github.com/openproblems-bio/task_perturbation_prediction/blob/2fa44462b1e7d530bad703c4a20ed22b49d3705e/src/metrics/mean_rowwise_error/config.vsh.yaml", - "code_version": null, + "references_doi": null, + "references_bibtex": "@article{slazata2024benchmark,\n title = {A benchmark for prediction of transcriptomic responses to chemical perturbations across cell types},\n author = {Artur Szałata and Andrew Benz and Robrecht Cannoodt and Mauricio Cortes and Jason Fong and Sunil Kuppasani and Richard Lieberman and Tianyu Liu and Javier A. Mas-Rosario and Rico Meinl and Jalil Nourisa and Jared Tumiel and Tin M. Tunjic and Mengbo Wang and Noah Weber and Hongyu Zhao and Benedict Anchang and Fabian J Theis and Malte D Luecken and Daniel B Burkhardt},\n booktitle = {The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\n year = {2024},\n url = {https://openreview.net/forum?id=WTI4RJYSVm}\n}\n", + "implementation_url": "https://github.com/openproblems-bio/task_perturbation_prediction/blob/2fa44462b1e7d530bad703c4a20ed22b49d3705e/src/metrics/mean_rowwise_error", + "image": "https://ghcr.io/openproblems-bio/task_perturbation_prediction/metrics/mean_rowwise_error:build_main", + "code_version": "build_main", "commit_sha": "2fa44462b1e7d530bad703c4a20ed22b49d3705e", "maximize": false }, @@ -33,11 +33,11 @@ "metric_name": "Mean Rowwise Pearson", "metric_summary": "The mean of Pearson correlations per row (perturbation).", "metric_description": "The **Mean Pearson Correlation** is computed as follows:\n\n$$\n\\textrm{Mean-Pearson} = \\frac{1}{R}\\sum_{i=1}^R\\frac{\\textrm{Cov}(\\mathbf{y}_i, \\mathbf{\\hat{y}}_i)}{\\textrm{Var}(\\mathbf{y}_i) \\cdot \\textrm{Var}(\\mathbf{\\hat{y}}_i)}\n$$\n\nwhere $(R)$ is the number of scored rows, and $(\\mathbf{y}_i)$ and $(\\mathbf{\\hat{y}}_i)$ are the actual and predicted values, respectively, for row $(i)$.\n", - "paper_reference": { - "bibtex": "@article{slazata2024benchmark,\n title = {A benchmark for prediction of transcriptomic responses to chemical perturbations across cell types},\n author = {Artur Szałata and Andrew Benz and Robrecht Cannoodt and Mauricio Cortes and Jason Fong and Sunil Kuppasani and Richard Lieberman and Tianyu Liu and Javier A. Mas-Rosario and Rico Meinl and Jalil Nourisa and Jared Tumiel and Tin M. Tunjic and Mengbo Wang and Noah Weber and Hongyu Zhao and Benedict Anchang and Fabian J Theis and Malte D Luecken and Daniel B Burkhardt},\n booktitle = {The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\n year = {2024},\n url = {https://openreview.net/forum?id=WTI4RJYSVm}\n}\n" - }, - "implementation_url": "https://github.com/openproblems-bio/task_perturbation_prediction/blob/2fa44462b1e7d530bad703c4a20ed22b49d3705e/src/metrics/mean_rowwise_correlation/config.vsh.yaml", - "code_version": null, + "references_doi": null, + "references_bibtex": "@article{slazata2024benchmark,\n title = {A benchmark for prediction of transcriptomic responses to chemical perturbations across cell types},\n author = {Artur Szałata and Andrew Benz and Robrecht Cannoodt and Mauricio Cortes and Jason Fong and Sunil Kuppasani and Richard Lieberman and Tianyu Liu and Javier A. Mas-Rosario and Rico Meinl and Jalil Nourisa and Jared Tumiel and Tin M. Tunjic and Mengbo Wang and Noah Weber and Hongyu Zhao and Benedict Anchang and Fabian J Theis and Malte D Luecken and Daniel B Burkhardt},\n booktitle = {The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\n year = {2024},\n url = {https://openreview.net/forum?id=WTI4RJYSVm}\n}\n", + "implementation_url": "https://github.com/openproblems-bio/task_perturbation_prediction/blob/2fa44462b1e7d530bad703c4a20ed22b49d3705e/src/metrics/mean_rowwise_correlation", + "image": "https://ghcr.io/openproblems-bio/task_perturbation_prediction/metrics/mean_rowwise_correlation:build_main", + "code_version": "build_main", "commit_sha": "2fa44462b1e7d530bad703c4a20ed22b49d3705e", "maximize": true }, @@ -47,11 +47,11 @@ "metric_name": "Mean Rowwise Spearman", "metric_summary": "The mean of Spearman correlations per row (perturbation).", "metric_description": "The **Mean Spearman Correlation** is computed as follows:\n\n$$\n\\textrm{Mean-Pearson} = \\frac{1}{R}\\sum_{i=1}^R\\frac{\\textrm{Cov}(\\mathbf{r}_i, \\mathbf{\\hat{r}}_i)}{\\textrm{Var}(\\mathbf{r}_i) \\cdot \\textrm{Var}(\\mathbf{\\hat{r}}_i)}\n$$\n\nwhere $(R)$ is the number of scored rows, and $(\\mathbf{r}_i)$ and $(\\mathbf{\\hat{r}}_i)$ are the ranks of the actual and predicted values, respectively, for row $(i)$.\n", - "paper_reference": { - "bibtex": "@article{slazata2024benchmark,\n title = {A benchmark for prediction of transcriptomic responses to chemical perturbations across cell types},\n author = {Artur Szałata and Andrew Benz and Robrecht Cannoodt and Mauricio Cortes and Jason Fong and Sunil Kuppasani and Richard Lieberman and Tianyu Liu and Javier A. Mas-Rosario and Rico Meinl and Jalil Nourisa and Jared Tumiel and Tin M. Tunjic and Mengbo Wang and Noah Weber and Hongyu Zhao and Benedict Anchang and Fabian J Theis and Malte D Luecken and Daniel B Burkhardt},\n booktitle = {The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\n year = {2024},\n url = {https://openreview.net/forum?id=WTI4RJYSVm}\n}\n" - }, - "implementation_url": "https://github.com/openproblems-bio/task_perturbation_prediction/blob/2fa44462b1e7d530bad703c4a20ed22b49d3705e/src/metrics/mean_rowwise_correlation/config.vsh.yaml", - "code_version": null, + "references_doi": null, + "references_bibtex": "@article{slazata2024benchmark,\n title = {A benchmark for prediction of transcriptomic responses to chemical perturbations across cell types},\n author = {Artur Szałata and Andrew Benz and Robrecht Cannoodt and Mauricio Cortes and Jason Fong and Sunil Kuppasani and Richard Lieberman and Tianyu Liu and Javier A. Mas-Rosario and Rico Meinl and Jalil Nourisa and Jared Tumiel and Tin M. Tunjic and Mengbo Wang and Noah Weber and Hongyu Zhao and Benedict Anchang and Fabian J Theis and Malte D Luecken and Daniel B Burkhardt},\n booktitle = {The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\n year = {2024},\n url = {https://openreview.net/forum?id=WTI4RJYSVm}\n}\n", + "implementation_url": "https://github.com/openproblems-bio/task_perturbation_prediction/blob/2fa44462b1e7d530bad703c4a20ed22b49d3705e/src/metrics/mean_rowwise_correlation", + "image": "https://ghcr.io/openproblems-bio/task_perturbation_prediction/metrics/mean_rowwise_correlation:build_main", + "code_version": "build_main", "commit_sha": "2fa44462b1e7d530bad703c4a20ed22b49d3705e", "maximize": true }, @@ -61,11 +61,11 @@ "metric_name": "Mean Rowwise Cosine", "metric_summary": "The mean of cosine similarities per row (perturbation).", "metric_description": "The **Mean Cosine Similarity** is computed as follows:\n\n$$\n\\textrm{Mean-Cosine} = \\frac{1}{R}\\sum_{i=1}^R\\frac{\\mathbf{y}_i\\cdot \\mathbf{\\hat{y}}_i}{\\|\\mathbf{y}_i\\| \\|\\mathbf{\\hat{y}}_i\\|}\n$$\n\nwhere $(R)$ is the number of scored rows, and $(\\mathbf{y}_i)$ and $(\\mathbf{\\hat{y}}_i)$ are the actual and predicted values, respectively, for row $(i)$.\n", - "paper_reference": { - "bibtex": "@article{slazata2024benchmark,\n title = {A benchmark for prediction of transcriptomic responses to chemical perturbations across cell types},\n author = {Artur Szałata and Andrew Benz and Robrecht Cannoodt and Mauricio Cortes and Jason Fong and Sunil Kuppasani and Richard Lieberman and Tianyu Liu and Javier A. Mas-Rosario and Rico Meinl and Jalil Nourisa and Jared Tumiel and Tin M. Tunjic and Mengbo Wang and Noah Weber and Hongyu Zhao and Benedict Anchang and Fabian J Theis and Malte D Luecken and Daniel B Burkhardt},\n booktitle = {The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\n year = {2024},\n url = {https://openreview.net/forum?id=WTI4RJYSVm}\n}\n" - }, - "implementation_url": "https://github.com/openproblems-bio/task_perturbation_prediction/blob/2fa44462b1e7d530bad703c4a20ed22b49d3705e/src/metrics/mean_rowwise_correlation/config.vsh.yaml", - "code_version": null, + "references_doi": null, + "references_bibtex": "@article{slazata2024benchmark,\n title = {A benchmark for prediction of transcriptomic responses to chemical perturbations across cell types},\n author = {Artur Szałata and Andrew Benz and Robrecht Cannoodt and Mauricio Cortes and Jason Fong and Sunil Kuppasani and Richard Lieberman and Tianyu Liu and Javier A. Mas-Rosario and Rico Meinl and Jalil Nourisa and Jared Tumiel and Tin M. Tunjic and Mengbo Wang and Noah Weber and Hongyu Zhao and Benedict Anchang and Fabian J Theis and Malte D Luecken and Daniel B Burkhardt},\n booktitle = {The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\n year = {2024},\n url = {https://openreview.net/forum?id=WTI4RJYSVm}\n}\n", + "implementation_url": "https://github.com/openproblems-bio/task_perturbation_prediction/blob/2fa44462b1e7d530bad703c4a20ed22b49d3705e/src/metrics/mean_rowwise_correlation", + "image": "https://ghcr.io/openproblems-bio/task_perturbation_prediction/metrics/mean_rowwise_correlation:build_main", + "code_version": "build_main", "commit_sha": "2fa44462b1e7d530bad703c4a20ed22b49d3705e", "maximize": true } diff --git a/results/perturbation_prediction/data/quality_control.json b/results/perturbation_prediction/data/quality_control.json index a90951744..405f7776d 100644 --- a/results/perturbation_prediction/data/quality_control.json +++ b/results/perturbation_prediction/data/quality_control.json @@ -163,9 +163,9 @@ "task_id": "task_perturbation_prediction", "category": "Metric info", "name": "Pct 'paper_reference' missing", - "value": 0.0, - "severity": 0, - "severity_value": 0.0, + "value": 1.0, + "severity": 2, + "severity_value": 3.0, "code": "percent_missing(metric_info, field)", "message": "Metric metadata field 'paper_reference' should be defined\n Task id: task_perturbation_prediction\n Field: paper_reference\n" }, diff --git a/results/perturbation_prediction/data/task_info.json b/results/perturbation_prediction/data/task_info.json index 343bd4ac7..77115c5f2 100644 --- a/results/perturbation_prediction/data/task_info.json +++ b/results/perturbation_prediction/data/task_info.json @@ -4,7 +4,8 @@ "task_name": "Perturbation Prediction", "task_summary": "Predicting how small molecules change gene expression in different cell types.", "task_description": "Human biology can be complex, in part due to the function and interplay of the body's\napproximately 37 trillion cells, which are organized into tissues, organs, and systems.\nHowever, recent advances in single-cell technologies have provided unparalleled insight\ninto the function of cells and tissues at the level of DNA, RNA, and proteins. Yet\nleveraging single-cell methods to develop medicines requires mapping causal links\nbetween chemical perturbations and the downstream impact on cell state. These experiments\nare costly and labor intensive, and not all cells and tissues are amenable to\nhigh-throughput transcriptomic screening. If data science could help accurately predict\nchemical perturbations in new cell types, it could accelerate and expand the development\nof new medicines.\n\nSeveral methods have been developed for drug perturbation prediction, most of which are\nvariations on the autoencoder architecture (Dr.VAE, scGEN, and ChemCPA). However, these\nmethods lack proper benchmarking datasets with diverse cell types to determine how well\nthey generalize. The largest available training dataset is the NIH-funded Connectivity\nMap (CMap), which comprises over 1.3M small molecule perturbation measurements. However,\nthe CMap includes observations of only 978 genes, less than 5% of all genes. Furthermore,\nthe CMap data is comprised almost entirely of measurements in cancer cell lines, which\nmay not accurately represent human biology.\n\nThis task aims to predict how small molecules change gene expression in different cell\ntypes. This task was a [Kaggle competition](https://www.kaggle.com/competitions/open-problems-single-cell-perturbations/overview)\nas part of the [NeurIPS 2023 competition track](https://neurips.cc/virtual/2023/competition/66586).\n\nThe task is to predict the gene expression profile of a cell after a small molecule\nperturbation. For this competition, we designed and generated a novel single-cell\nperturbational dataset in human peripheral blood mononuclear cells (PBMCs). We\nselected 144 compounds from the Library of Integrated Network-Based Cellular Signatures\n(LINCS) Connectivity Map dataset ([PMID: 29195078](https://pubmed.ncbi.nlm.nih.gov/29195078/))\nand measured single-cell gene\nexpression profiles after 24 hours of treatment. The experiment was repeated in three\nhealthy human donors, and the compounds were selected based on diverse transcriptional\nsignatures observed in CD34+ hematopoietic stem cells (data not released). We performed\nthis experiment in human PBMCs because the cells are commercially available with\npre-obtained consent for public release and PBMCs are a primary, disease-relevant tissue\nthat contains multiple mature cell types (including T-cells, B-cells, myeloid cells,\nand NK cells) with established markers for annotation of cell types. To supplement this\ndataset, we also measured cells from each donor at baseline with joint scRNA and\nsingle-cell chromatin accessibility measurements using the 10x Multiome assay. We hope\nthat the addition of rich multi-omic data for each donor and cell type at baseline will\nhelp establish biological priors that explain the susceptibility of particular genes to\nexhibit perturbation responses in difference biological contexts.\n", - "repo": "openproblems-bio/task_perturbation_prediction", + "repo": "https://github.com/openproblems-bio/task_perturbation_prediction", + "issue_tracker": "https://github.com/openproblems-bio/task_perturbation_prediction/issues", "authors": [ { "name": "Artur Szałata", @@ -86,5 +87,7 @@ "orcid": "0000-0003-4356-6058" } } - ] + ], + "version": "build_main", + "license": "MIT" } diff --git a/results/perturbation_prediction/index.qmd b/results/perturbation_prediction/index.qmd index 5f3a3caf4..199be3502 100644 --- a/results/perturbation_prediction/index.qmd +++ b/results/perturbation_prediction/index.qmd @@ -7,12 +7,15 @@ css: ../_include/task_template.css engine: knitr fig-cap-location: bottom citation-location: document +bibliography: + - library.bib + - ../../library.bib toc: false --- ```{r} #| include: false -params <- list(data_dir = "results/predict_modality/data") +params <- list(data_dir = "results/perturbation_prediction/data") params <- list(data_dir = "./data") ``` diff --git a/results/spatial_decomposition/data/method_info.json b/results/spatial_decomposition/data/method_info.json index a2c3f402f..0574929a2 100644 --- a/results/spatial_decomposition/data/method_info.json +++ b/results/spatial_decomposition/data/method_info.json @@ -1,227 +1,227 @@ [ - { - "method_name": "Cell2location (alpha=20, amortised, hard-coded)", - "method_summary": "Cell2location is a decomposition method based on Negative Binomial regression that is able to account for batch effects in estimating the single-cell gene expression signature used for the spatial decomposition step. Note that since batch information is unavailable in this task, here we use either a hard-coded reference, or a negative-binomial learned reference without batch labels. The parameter alpha refers to the detection efficiency prior.", - "paper_name": "Cell2location maps fine-grained cell types in spatial transcriptomics", - "paper_reference": "kleshchevnikov2022cell2location", - "paper_year": 2022, - "code_url": "https://github.com/BayraktarLab/cell2location", - "image": "openproblems-python-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "spatial_decomposition", - "commit_sha": "154ccb9fd99113f3d28d9c3f139194539a0290f9", - "method_id": "cell2location_amortised_detection_alpha_20", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/spatial_decomposition/methods/cell2location.py" - }, - { - "method_name": "Cell2location (alpha=1, reference hard-coded)", - "method_summary": "Cell2location is a decomposition method based on Negative Binomial regression that is able to account for batch effects in estimating the single-cell gene expression signature used for the spatial decomposition step. Note that since batch information is unavailable in this task, here we use either a hard-coded reference, or a negative-binomial learned reference without batch labels. The parameter alpha refers to the detection efficiency prior.", - "paper_name": "Cell2location maps fine-grained cell types in spatial transcriptomics", - "paper_reference": "kleshchevnikov2022cell2location", - "paper_year": 2022, - "code_url": "https://github.com/BayraktarLab/cell2location", - "image": "openproblems-python-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "spatial_decomposition", - "commit_sha": "154ccb9fd99113f3d28d9c3f139194539a0290f9", - "method_id": "cell2location_detection_alpha_1", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/spatial_decomposition/methods/cell2location.py" - }, - { - "method_name": "Cell2location (alpha=20, reference hard-coded)", - "method_summary": "Cell2location is a decomposition method based on Negative Binomial regression that is able to account for batch effects in estimating the single-cell gene expression signature used for the spatial decomposition step. Note that since batch information is unavailable in this task, here we use either a hard-coded reference, or a negative-binomial learned reference without batch labels. The parameter alpha refers to the detection efficiency prior.", - "paper_name": "Cell2location maps fine-grained cell types in spatial transcriptomics", - "paper_reference": "kleshchevnikov2022cell2location", - "paper_year": 2022, - "code_url": "https://github.com/BayraktarLab/cell2location", - "image": "openproblems-python-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "spatial_decomposition", - "commit_sha": "154ccb9fd99113f3d28d9c3f139194539a0290f9", - "method_id": "cell2location_detection_alpha_20", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/spatial_decomposition/methods/cell2location.py" - }, - { - "method_name": "Cell2location (alpha=200, reference hard-coded)", - "method_summary": "Cell2location is a decomposition method based on Negative Binomial regression that is able to account for batch effects in estimating the single-cell gene expression signature used for the spatial decomposition step. Note that since batch information is unavailable in this task, here we use either a hard-coded reference, or a negative-binomial learned reference without batch labels. The parameter alpha refers to the detection efficiency prior.", - "paper_name": "Cell2location maps fine-grained cell types in spatial transcriptomics", - "paper_reference": "kleshchevnikov2022cell2location", - "paper_year": 2022, - "code_url": "https://github.com/BayraktarLab/cell2location", - "image": "openproblems-python-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "spatial_decomposition", - "commit_sha": "154ccb9fd99113f3d28d9c3f139194539a0290f9", - "method_id": "cell2location_detection_alpha_200", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/spatial_decomposition/methods/cell2location.py" - }, - { - "method_name": "Cell2location (alpha=20, NB reference)", - "method_summary": "Cell2location is a decomposition method based on Negative Binomial regression that is able to account for batch effects in estimating the single-cell gene expression signature used for the spatial decomposition step. Note that since batch information is unavailable in this task, here we use either a hard-coded reference, or a negative-binomial learned reference without batch labels. The parameter alpha refers to the detection efficiency prior.", - "paper_name": "Cell2location maps fine-grained cell types in spatial transcriptomics", - "paper_reference": "kleshchevnikov2022cell2location", - "paper_year": 2022, - "code_url": "https://github.com/BayraktarLab/cell2location", - "image": "openproblems-python-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "spatial_decomposition", - "commit_sha": "154ccb9fd99113f3d28d9c3f139194539a0290f9", - "method_id": "cell2location_detection_alpha_20_nb", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/spatial_decomposition/methods/cell2location.py" - }, - { - "method_name": "DestVI", - "method_summary": "destVI is a decomposition method that leverages a conditional generative model of spatial transcriptomics down to the sub-cell-type variation level, which is then used to decompose the cell-type proportions determining the spatial organization of a tissue.", - "paper_name": "DestVI identifies continuums of cell types in spatial transcriptomics data", - "paper_reference": "lopez2022destvi", - "paper_year": 2022, - "code_url": "https://github.com/YosefLab/scvi-tools", - "image": "openproblems-python-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "spatial_decomposition", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "destvi", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/spatial_decomposition/methods/destvi.py" - }, - { - "method_name": "Non-Negative Matrix Factorization (NMF)", - "method_summary": "NMF is a decomposition method based on Non-negative Matrix Factorization (NMF) that reconstructs expression of each spatial location as a weighted combination of cell-type signatures defined by scRNA-seq. It is a simpler baseline than NMFreg as it only performs the NMF step based on mean expression signatures of cell types, returning the weights loading of the NMF as (normalized) cell type proportions, without the regression step.", - "paper_name": "Fast local algorithms for large scale nonnegative matrix and tensor factorizations", - "paper_reference": "cichocki2009fast", - "paper_year": 2009, - "code_url": "https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.NMF.html", - "image": "openproblems", - "is_baseline": false, - "code_version": null, - "task_id": "spatial_decomposition", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "nmf", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/spatial_decomposition/methods/vanillanmf.py" - }, - { - "method_name": "NMF-reg", - "method_summary": "NMFreg is a decomposition method based on Non-negative Matrix Factorization Regression (NMFreg) that reconstructs expression of each spatial location as a weighted combination of cell-type signatures defined by scRNA-seq. It was originally developed for Slide-seq data.", - "paper_name": "Slide-seq: A scalable technology for measuring genome-wide expression at high spatial resolution", - "paper_reference": "rodriques2019slide", - "paper_year": 2019, - "code_url": "https://github.com/tudaga/NMFreg_tutorial", - "image": "openproblems", - "is_baseline": false, - "code_version": null, - "task_id": "spatial_decomposition", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "nmfreg", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/spatial_decomposition/methods/nmfreg.py" - }, - { - "method_name": "Non-Negative Least Squares", - "method_summary": "NNLS13 is a decomposition method based on Non-Negative Least Square Regression (NNLS). It was originally introduced by the method AutoGenes", - "paper_name": "AutoGeneS: Automatic gene selection using multi-objective optimization for RNA-seq deconvolution", - "paper_reference": "aliee2021autogenes", - "paper_year": 2021, - "code_url": "https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.nnls.html", - "image": "openproblems", - "is_baseline": false, - "code_version": null, - "task_id": "spatial_decomposition", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "nnls_scipy", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/spatial_decomposition/methods/nnls.py" - }, - { - "method_name": "Random Proportions", - "method_summary": "Random assignment of predicted celltype proportions from a Dirichlet distribution.", - "paper_name": "Open Problems for Single Cell Analysis", - "paper_reference": "openproblems", - "paper_year": 2022, - "code_url": "https://github.com/openproblems-bio/openproblems", - "image": "openproblems", - "is_baseline": true, - "code_version": null, - "task_id": "spatial_decomposition", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "random_proportions", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/spatial_decomposition/methods/baseline.py" - }, - { - "method_name": "RCTD", - "method_summary": "RCTD (Robust Cell Type Decomposition) is a decomposition method that uses signatures learnt from single-cell data to decompose spatial expression of tissues. It is able to platform effect normalization step, which normalizes the scRNA-seq cell type profiles to match the platform effects of the spatial transcriptomics dataset.", - "paper_name": "Robust decomposition of cell type mixtures in spatial transcriptomics", - "paper_reference": "cable2021robust", - "paper_year": 2020, - "code_url": "https://github.com/dmcable/spacexr", - "image": "openproblems-r-extras", - "is_baseline": false, - "code_version": null, - "task_id": "spatial_decomposition", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "rctd", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/spatial_decomposition/methods/rctd.py" - }, - { - "method_name": "SeuratV3", - "method_summary": "SeuratV3 is a decomposition method that is based on Canonical Correlation Analysis (CCA).", - "paper_name": "Comprehensive Integration of Single-Cell Data", - "paper_reference": "stuart2019comprehensive", - "paper_year": 2019, - "code_url": "https://satijalab.org/seurat/archive/v3.2/spatial_vignette.html", - "image": "openproblems-r-extras", - "is_baseline": false, - "code_version": null, - "task_id": "spatial_decomposition", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "seuratv3", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/spatial_decomposition/methods/seuratv3.py" - }, - { - "method_name": "Stereoscope", - "method_summary": "Stereoscope is a decomposition method based on Negative Binomial regression. It is similar in scope and implementation to cell2location but less flexible to incorporate additional covariates such as batch effects and other type of experimental design annotations.", - "paper_name": "Single-cell and spatial transcriptomics enables probabilistic inference of cell type topography", - "paper_reference": "andersson2020single", - "paper_year": 2020, - "code_url": "https://github.com/scverse/scvi-tools", - "image": "openproblems-python-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "spatial_decomposition", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "stereoscope", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/spatial_decomposition/methods/stereoscope.py" - }, - { - "method_name": "Tangram", - "method_summary": "Tangram is a method to map gene expression signatures from scRNA-seq data to spatial data. It performs the cell type mapping by learning a similarity matrix between single-cell and spatial locations based on gene expression profiles.", - "paper_name": "Deep learning and alignment of spatially resolved single-cell transcriptomes with Tangram", - "paper_reference": "biancalani2021deep", - "paper_year": 2021, - "code_url": "https://github.com/broadinstitute/Tangram", - "image": "openproblems-python-pytorch", - "is_baseline": false, - "code_version": null, - "task_id": "spatial_decomposition", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "tangram", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/spatial_decomposition/methods/tangram.py" - }, - { - "method_name": "True Proportions", - "method_summary": "Perfect assignment of predicted celltype proportions from the ground truth.", - "paper_name": "Open Problems for Single Cell Analysis", - "paper_reference": "openproblems", - "paper_year": 2022, - "code_url": "https://github.com/openproblems-bio/openproblems", - "image": "openproblems", - "is_baseline": true, - "code_version": null, - "task_id": "spatial_decomposition", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "method_id": "true_proportions", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/spatial_decomposition/methods/baseline.py" - } + { + "method_name": "Cell2location (alpha=20, amortised, hard-coded)", + "method_summary": "Cell2location is a decomposition method based on Negative Binomial regression that is able to account for batch effects in estimating the single-cell gene expression signature used for the spatial decomposition step. Note that since batch information is unavailable in this task, here we use either a hard-coded reference, or a negative-binomial learned reference without batch labels. The parameter alpha refers to the detection efficiency prior.", + "paper_name": "Cell2location maps fine-grained cell types in spatial transcriptomics", + "paper_reference": "kleshchevnikov2022cell2location", + "paper_year": 2022, + "code_url": "https://github.com/BayraktarLab/cell2location/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-python-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "spatial_decomposition", + "commit_sha": "154ccb9fd99113f3d28d9c3f139194539a0290f9", + "method_id": "cell2location_amortised_detection_alpha_20", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/spatial_decomposition/methods/cell2location.py" + }, + { + "method_name": "Cell2location (alpha=1, reference hard-coded)", + "method_summary": "Cell2location is a decomposition method based on Negative Binomial regression that is able to account for batch effects in estimating the single-cell gene expression signature used for the spatial decomposition step. Note that since batch information is unavailable in this task, here we use either a hard-coded reference, or a negative-binomial learned reference without batch labels. The parameter alpha refers to the detection efficiency prior.", + "paper_name": "Cell2location maps fine-grained cell types in spatial transcriptomics", + "paper_reference": "kleshchevnikov2022cell2location", + "paper_year": 2022, + "code_url": "https://github.com/BayraktarLab/cell2location/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-python-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "spatial_decomposition", + "commit_sha": "154ccb9fd99113f3d28d9c3f139194539a0290f9", + "method_id": "cell2location_detection_alpha_1", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/spatial_decomposition/methods/cell2location.py" + }, + { + "method_name": "Cell2location (alpha=20, reference hard-coded)", + "method_summary": "Cell2location is a decomposition method based on Negative Binomial regression that is able to account for batch effects in estimating the single-cell gene expression signature used for the spatial decomposition step. Note that since batch information is unavailable in this task, here we use either a hard-coded reference, or a negative-binomial learned reference without batch labels. The parameter alpha refers to the detection efficiency prior.", + "paper_name": "Cell2location maps fine-grained cell types in spatial transcriptomics", + "paper_reference": "kleshchevnikov2022cell2location", + "paper_year": 2022, + "code_url": "https://github.com/BayraktarLab/cell2location/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-python-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "spatial_decomposition", + "commit_sha": "154ccb9fd99113f3d28d9c3f139194539a0290f9", + "method_id": "cell2location_detection_alpha_20", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/spatial_decomposition/methods/cell2location.py" + }, + { + "method_name": "Cell2location (alpha=200, reference hard-coded)", + "method_summary": "Cell2location is a decomposition method based on Negative Binomial regression that is able to account for batch effects in estimating the single-cell gene expression signature used for the spatial decomposition step. Note that since batch information is unavailable in this task, here we use either a hard-coded reference, or a negative-binomial learned reference without batch labels. The parameter alpha refers to the detection efficiency prior.", + "paper_name": "Cell2location maps fine-grained cell types in spatial transcriptomics", + "paper_reference": "kleshchevnikov2022cell2location", + "paper_year": 2022, + "code_url": "https://github.com/BayraktarLab/cell2location/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-python-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "spatial_decomposition", + "commit_sha": "154ccb9fd99113f3d28d9c3f139194539a0290f9", + "method_id": "cell2location_detection_alpha_200", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/spatial_decomposition/methods/cell2location.py" + }, + { + "method_name": "Cell2location (alpha=20, NB reference)", + "method_summary": "Cell2location is a decomposition method based on Negative Binomial regression that is able to account for batch effects in estimating the single-cell gene expression signature used for the spatial decomposition step. Note that since batch information is unavailable in this task, here we use either a hard-coded reference, or a negative-binomial learned reference without batch labels. The parameter alpha refers to the detection efficiency prior.", + "paper_name": "Cell2location maps fine-grained cell types in spatial transcriptomics", + "paper_reference": "kleshchevnikov2022cell2location", + "paper_year": 2022, + "code_url": "https://github.com/BayraktarLab/cell2location/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-python-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "spatial_decomposition", + "commit_sha": "154ccb9fd99113f3d28d9c3f139194539a0290f9", + "method_id": "cell2location_detection_alpha_20_nb", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/spatial_decomposition/methods/cell2location.py" + }, + { + "method_name": "DestVI", + "method_summary": "destVI is a decomposition method that leverages a conditional generative model of spatial transcriptomics down to the sub-cell-type variation level, which is then used to decompose the cell-type proportions determining the spatial organization of a tissue.", + "paper_name": "DestVI identifies continuums of cell types in spatial transcriptomics data", + "paper_reference": "lopez2022destvi", + "paper_year": 2022, + "code_url": "https://github.com/YosefLab/scvi-tools/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-python-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "spatial_decomposition", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "destvi", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/spatial_decomposition/methods/destvi.py" + }, + { + "method_name": "Non-Negative Matrix Factorization (NMF)", + "method_summary": "NMF is a decomposition method based on Non-negative Matrix Factorization (NMF) that reconstructs expression of each spatial location as a weighted combination of cell-type signatures defined by scRNA-seq. It is a simpler baseline than NMFreg as it only performs the NMF step based on mean expression signatures of cell types, returning the weights loading of the NMF as (normalized) cell type proportions, without the regression step.", + "paper_name": "Fast local algorithms for large scale nonnegative matrix and tensor factorizations", + "paper_reference": "cichocki2009fast", + "paper_year": 2009, + "code_url": "https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.NMF.html/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "spatial_decomposition", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "nmf", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/spatial_decomposition/methods/vanillanmf.py" + }, + { + "method_name": "NMF-reg", + "method_summary": "NMFreg is a decomposition method based on Non-negative Matrix Factorization Regression (NMFreg) that reconstructs expression of each spatial location as a weighted combination of cell-type signatures defined by scRNA-seq. It was originally developed for Slide-seq data.", + "paper_name": "Slide-seq: A scalable technology for measuring genome-wide expression at high spatial resolution", + "paper_reference": "rodriques2019slide", + "paper_year": 2019, + "code_url": "https://github.com/tudaga/NMFreg_tutorial/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "spatial_decomposition", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "nmfreg", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/spatial_decomposition/methods/nmfreg.py" + }, + { + "method_name": "Non-Negative Least Squares", + "method_summary": "NNLS13 is a decomposition method based on Non-Negative Least Square Regression (NNLS). It was originally introduced by the method AutoGenes", + "paper_name": "AutoGeneS: Automatic gene selection using multi-objective optimization for RNA-seq deconvolution", + "paper_reference": "aliee2021autogenes", + "paper_year": 2021, + "code_url": "https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.nnls.html/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "spatial_decomposition", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "nnls_scipy", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/spatial_decomposition/methods/nnls.py" + }, + { + "method_name": "Random Proportions", + "method_summary": "Random assignment of predicted celltype proportions from a Dirichlet distribution.", + "paper_name": "Open Problems for Single Cell Analysis", + "paper_reference": "openproblems", + "paper_year": 2022, + "code_url": "https://github.com/openproblems-bio/openproblems/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems", + "is_baseline": true, + "code_version": "v1.0.0", + "task_id": "spatial_decomposition", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "random_proportions", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/spatial_decomposition/methods/baseline.py" + }, + { + "method_name": "RCTD", + "method_summary": "RCTD (Robust Cell Type Decomposition) is a decomposition method that uses signatures learnt from single-cell data to decompose spatial expression of tissues. It is able to platform effect normalization step, which normalizes the scRNA-seq cell type profiles to match the platform effects of the spatial transcriptomics dataset.", + "paper_name": "Robust decomposition of cell type mixtures in spatial transcriptomics", + "paper_reference": "cable2021robust", + "paper_year": 2020, + "code_url": "https://github.com/dmcable/spacexr/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-extras", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "spatial_decomposition", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "rctd", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/spatial_decomposition/methods/rctd.py" + }, + { + "method_name": "SeuratV3", + "method_summary": "SeuratV3 is a decomposition method that is based on Canonical Correlation Analysis (CCA).", + "paper_name": "Comprehensive Integration of Single-Cell Data", + "paper_reference": "stuart2019comprehensive", + "paper_year": 2019, + "code_url": "https://satijalab.org/seurat/archive/v3.2/spatial_vignette.html/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-r-extras", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "spatial_decomposition", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "seuratv3", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/spatial_decomposition/methods/seuratv3.py" + }, + { + "method_name": "Stereoscope", + "method_summary": "Stereoscope is a decomposition method based on Negative Binomial regression. It is similar in scope and implementation to cell2location but less flexible to incorporate additional covariates such as batch effects and other type of experimental design annotations.", + "paper_name": "Single-cell and spatial transcriptomics enables probabilistic inference of cell type topography", + "paper_reference": "andersson2020single", + "paper_year": 2020, + "code_url": "https://github.com/scverse/scvi-tools/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-python-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "spatial_decomposition", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "stereoscope", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/spatial_decomposition/methods/stereoscope.py" + }, + { + "method_name": "Tangram", + "method_summary": "Tangram is a method to map gene expression signatures from scRNA-seq data to spatial data. It performs the cell type mapping by learning a similarity matrix between single-cell and spatial locations based on gene expression profiles.", + "paper_name": "Deep learning and alignment of spatially resolved single-cell transcriptomes with Tangram", + "paper_reference": "biancalani2021deep", + "paper_year": 2021, + "code_url": "https://github.com/broadinstitute/Tangram/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems-python-pytorch", + "is_baseline": false, + "code_version": "v1.0.0", + "task_id": "spatial_decomposition", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "tangram", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/spatial_decomposition/methods/tangram.py" + }, + { + "method_name": "True Proportions", + "method_summary": "Perfect assignment of predicted celltype proportions from the ground truth.", + "paper_name": "Open Problems for Single Cell Analysis", + "paper_reference": "openproblems", + "paper_year": 2022, + "code_url": "https://github.com/openproblems-bio/openproblems/tree/v1.0.0/openproblems/tasks", + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems", + "is_baseline": true, + "code_version": "v1.0.0", + "task_id": "spatial_decomposition", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "method_id": "true_proportions", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/spatial_decomposition/methods/baseline.py" + } ] \ No newline at end of file diff --git a/results/spatial_decomposition/data/metric_info.json b/results/spatial_decomposition/data/metric_info.json index 4da11a831..b9433e37e 100644 --- a/results/spatial_decomposition/data/metric_info.json +++ b/results/spatial_decomposition/data/metric_info.json @@ -1,13 +1,14 @@ [ - { - "metric_name": "r2", - "metric_summary": "R2, or the \u201ccoefficient of determination\u201d, reports the fraction of the true proportion values\u2019 variance that can be explained by the predicted proportion values. The best score, and upper bound, is 1.0. There is no fixed lower bound for the metric. The uniform/non-weighted average across all cell types/states is used to summarise performance.", - "paper_reference": "miles2005rsquared", - "maximize": true, - "image": "openproblems", - "task_id": "spatial_decomposition", - "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", - "metric_id": "r2", - "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/main/openproblems/tasks/spatial_decomposition/metrics/r2.py" - } + { + "metric_name": "r2", + "metric_summary": "R2, or the \u201ccoefficient of determination\u201d, reports the fraction of the true proportion values\u2019 variance that can be explained by the predicted proportion values. The best score, and upper bound, is 1.0. There is no fixed lower bound for the metric. The uniform/non-weighted average across all cell types/states is used to summarise performance.", + "paper_reference": "miles2005rsquared", + "maximize": true, + "image": "https://github.com/openproblems-bio/openproblems/pkgs/container/openproblems", + "task_id": "spatial_decomposition", + "commit_sha": "b3456fd73c04c28516f6df34c57e6e3e8b0dab32", + "metric_id": "r2", + "implementation_url": "https://github.com/openproblems-bio/openproblems/blob/v1.0.0/openproblems/tasks/spatial_decomposition/metrics/r2.py", + "code_version": "v1.0.0" + } ] \ No newline at end of file diff --git a/results/spatial_decomposition/data/task_info.json b/results/spatial_decomposition/data/task_info.json index 8355f5e0c..f98cd87fd 100644 --- a/results/spatial_decomposition/data/task_info.json +++ b/results/spatial_decomposition/data/task_info.json @@ -1,8 +1,70 @@ { - "task_id": "spatial_decomposition", - "commit_sha": "c97decf07adb2e3050561d6fa9ae46132be07bef", - "task_name": "Spatial Decomposition", - "task_summary": "Calling cell-type compositions for spot-based spatial transcriptomics data", - "task_description": "\nSpatial decomposition (also often referred to as Spatial deconvolution) is\napplicable to spatial transcriptomics data where the transcription profile of\neach capture location (spot, voxel, bead, etc.) do not share a bijective\nrelationship with the cells in the tissue, i.e., multiple cells may contribute\nto the same capture location. The task of spatial decomposition then refers to\nestimating the composition of cell types/states that are present at each capture\nlocation. The cell type/states estimates are presented as proportion values,\nrepresenting the proportion of the cells at each capture location that belong to\na given cell type.\n\nWe distinguish between _reference-based_ decomposition and _de novo_\ndecomposition, where the former leverage external data (e.g., scRNA-seq or\nscNuc-seq) to guide the inference process, while the latter only work with the\nspatial data. We require that all datasets have an associated reference single\ncell data set, but methods are free to ignore this information.\n\n", - "repo": "openproblems-bio/openproblems" -} \ No newline at end of file + "task_id": "spatial_decomposition", + "commit_sha": "c97decf07adb2e3050561d6fa9ae46132be07bef", + "task_name": "Spatial Decomposition", + "task_summary": "Calling cell-type compositions for spot-based spatial transcriptomics data", + "task_description": "\nSpatial decomposition (also often referred to as Spatial deconvolution) is\napplicable to spatial transcriptomics data where the transcription profile of\neach capture location (spot, voxel, bead, etc.) do not share a bijective\nrelationship with the cells in the tissue, i.e., multiple cells may contribute\nto the same capture location. The task of spatial decomposition then refers to\nestimating the composition of cell types/states that are present at each capture\nlocation. The cell type/states estimates are presented as proportion values,\nrepresenting the proportion of the cells at each capture location that belong to\na given cell type.\n\nWe distinguish between _reference-based_ decomposition and _de novo_\ndecomposition, where the former leverage external data (e.g., scRNA-seq or\nscNuc-seq) to guide the inference process, while the latter only work with the\nspatial data. We require that all datasets have an associated reference single\ncell data set, but methods are free to ignore this information.\n\n", + "repo": "https://github.com/openproblems-bio/openproblems/tree/v1.0.0/openproblems/tasks/spatial_decomposition", + "authors": [ + { + "name": "Alma Andersson", + "roles": ["author", "maintainer"], + "info": { + "github": "almaan" + } + }, + { + "name": "Giovanni Palla", + "roles": ["author", "maintainer"], + "info": { + "github": "giovp" + } + }, + { + "name": "Vitalii Kleshchevnikov", + "roles": "author", + "info": { + "github": "vitkl" + } + }, + { + "name": "Hirak Sarkar", + "roles": "author", + "info": { + "github": "hiraksarkar" + } + }, + { + "name": "Scott Gigante", + "roles": "author", + "info": { + "github": "scottgigante", + "orcid": "0000-0002-4544-2764" + } + }, + { + "name": "Daniel Burkhardt", + "roles": "contributor", + "info": { + "github": "dburkhardt" + } + }, + { + "name": "Can Ergen", + "roles": "contributor", + "info": { + "github": "canergen" + } + }, + { + "name": "Sai Nirmayi Yasa", + "roles": "contributor", + "info": { + "github": "sainirmayi", + "orcid": "0009-0003-6319-9803" + } + } + ], + "version": "v1.0.0", + "license": "MIT" +} diff --git a/results/spatial_decomposition/index.qmd b/results/spatial_decomposition/index.qmd index 5a1850acf..3d20fc30e 100644 --- a/results/spatial_decomposition/index.qmd +++ b/results/spatial_decomposition/index.qmd @@ -7,6 +7,9 @@ css: ../_include/task_template.css engine: knitr fig-cap-location: bottom citation-location: document +bibliography: + - library.bib + - ../../library.bib toc: false --- diff --git a/results/spatially_variable_genes/data/dataset_info.json b/results/spatially_variable_genes/data/dataset_info.json index 1fad0c09b..e06cdb553 100644 --- a/results/spatially_variable_genes/data/dataset_info.json +++ b/results/spatially_variable_genes/data/dataset_info.json @@ -1,6 +1,5 @@ [ { - "task_id": "task_spatially_variable_genes", "dataset_id": "zenodo_spatial/slideseqv2/mouse_cortex", "dataset_name": "Slide-seqV2 - Mouse Cortex", "dataset_summary": "Highly sensitive spatial transcriptomics at near-cellular resolution with Slide-seqV2.", @@ -11,7 +10,6 @@ "file_size": 1194194 }, { - "task_id": "task_spatially_variable_genes", "dataset_id": "tenx_visium/visium/human_breast_cancer_1", "dataset_name": "10X Visium - Human Breast Cancer 1", "dataset_summary": "Whole transcriptome analysis, Adult Human Breast Cancer (Visium)", @@ -22,7 +20,6 @@ "file_size": 506522 }, { - "task_id": "task_spatially_variable_genes", "dataset_id": "zenodo_spatial/stereoseq/drosophila_embryo_e5_6", "dataset_name": "Stereo-seq - Drosophila embryo E5_6", "dataset_summary": "Stereo-seq faithfully captures Drosophila spatial transcriptomes with high resolution.", @@ -33,7 +30,6 @@ "file_size": 287642 }, { - "task_id": "task_spatially_variable_genes", "dataset_id": "tenx_visium/visium/human_skin_melanoma", "dataset_name": "10X Visium - Human Skin Melanoma", "dataset_summary": "Gene expression library of Human Skin Melanoma (CytAssist FFPE) using the Human Whole Transcriptome Probe Set", @@ -44,7 +40,6 @@ "file_size": 460186 }, { - "task_id": "task_spatially_variable_genes", "dataset_id": "zenodo_spatial/stereoseq/drosophila_embryo_e6_3", "dataset_name": "Stereo-seq - Drosophila embryo E6_3", "dataset_summary": "Stereo-seq faithfully captures Drosophila spatial transcriptomes with high resolution.", @@ -55,7 +50,6 @@ "file_size": 287162 }, { - "task_id": "task_spatially_variable_genes", "dataset_id": "zenodo_spatial/stereoseq/drosophila_embryo_e9_1", "dataset_name": "Stereo-seq - Drosophila embryo E9_1", "dataset_summary": "Stereo-seq faithfully captures Drosophila spatial transcriptomes with high resolution.", @@ -66,7 +60,6 @@ "file_size": 298106 }, { - "task_id": "task_spatially_variable_genes", "dataset_id": "zenodo_spatial/slideseqv2/mouse_hippocampus_puck", "dataset_name": "Slide-seqV2 - Mouse Hippocampus Puck", "dataset_summary": "Highly sensitive spatial transcriptomics at near-cellular resolution with Slide-seqV2.", @@ -77,7 +70,6 @@ "file_size": 1931250 }, { - "task_id": "task_spatially_variable_genes", "dataset_id": "tenx_visium/visium/human_heart", "dataset_name": "10X Visium - Human Heart", "dataset_summary": "V1_Human_Heart", @@ -88,7 +80,6 @@ "file_size": 505306 }, { - "task_id": "task_spatially_variable_genes", "dataset_id": "zenodo_spatial/slideseqv2/mouse_olfactory_bulb_puck", "dataset_name": "Slide-seqV2 - Mouse Olfactory Bulb Puck", "dataset_summary": "Highly sensitive spatial transcriptomics at near-cellular resolution with Slide-seqV2.", @@ -99,7 +90,6 @@ "file_size": 1447730 }, { - "task_id": "task_spatially_variable_genes", "dataset_id": "tenx_visium/visium/human_lymph_node", "dataset_name": "10X Visium - Human Lymph Node", "dataset_summary": "Whole transcriptome analysis, Human Lymph Node", @@ -110,7 +100,6 @@ "file_size": 491098 }, { - "task_id": "task_spatially_variable_genes", "dataset_id": "tenx_visium/post_xenium/mouse_brain", "dataset_name": "10X Post Xenium - Mouse Brain", "dataset_summary": "Gene expression library of Post Xenium Mouse Brain (CytAssist Fresh Frozen) using the Mouse Whole Transcriptome Probe Set - Replicate 1", @@ -121,7 +110,6 @@ "file_size": 580730 }, { - "task_id": "task_spatially_variable_genes", "dataset_id": "zenodo_spatial/visium/human_heart_myocardial_infarction_1", "dataset_name": "10X Visium - Human Heart MI 1", "dataset_summary": "Gene expression library of human heart using 10x Visium.", @@ -132,7 +120,6 @@ "file_size": 506714 }, { - "task_id": "task_spatially_variable_genes", "dataset_id": "zenodo_spatial/visium/human_heart_myocardial_infarction_2", "dataset_name": "10X Visium - Human Heart MI 2", "dataset_summary": "Gene expression library of human heart using 10x Visium.", @@ -143,7 +130,6 @@ "file_size": 522874 }, { - "task_id": "task_spatially_variable_genes", "dataset_id": "zenodo_spatial/merfish/human_cortex_2", "dataset_name": "MERFISH - Human Cortex 2", "dataset_summary": "Spatially resolved profiling of human cerebral cortex using multiplexed error-robust fluorescence in situ hybridization (MERFISH).", @@ -154,7 +140,6 @@ "file_size": 397914 }, { - "task_id": "task_spatially_variable_genes", "dataset_id": "zenodo_spatial/dbitseq/mouse_e10_whole_body", "dataset_name": "DBiT-seq - Mouse Whole Body (E10)", "dataset_summary": "High-Spatial-Resolution Multi-Omics Sequencing via Deterministic Barcoding in Tissue.", @@ -165,7 +150,6 @@ "file_size": 228026 }, { - "task_id": "task_spatially_variable_genes", "dataset_id": "tenx_visium/visium/human_brain_cancer", "dataset_name": "10X Visium - Human Brain Cancer", "dataset_summary": "Gene expression library of Human Glioblastoma (CytAssist FFPE) using the Human Whole Transcriptome Probe Set", @@ -176,7 +160,6 @@ "file_size": 980026 }, { - "task_id": "task_spatially_variable_genes", "dataset_id": "tenx_visium/visium/human_lung_cancer", "dataset_name": "10X Visium - Human Lung Cancer", "dataset_summary": "Gene expression library of Human Lung Cancer (CytAssist FFPE) using the Human Whole Transcriptome Probe Set", @@ -187,7 +170,6 @@ "file_size": 646170 }, { - "task_id": "task_spatially_variable_genes", "dataset_id": "zenodo_spatial/dbitseq/mouse_e11_lower_body", "dataset_name": "DBiT-seq - Mouse Lower Body (E11)", "dataset_summary": "High-Spatial-Resolution Multi-Omics Sequencing via Deterministic Barcoding in Tissue.", @@ -198,7 +180,6 @@ "file_size": 271738 }, { - "task_id": "task_spatially_variable_genes", "dataset_id": "tenx_visium/visium/human_cerebellum", "dataset_name": "10X Visium - Adult Human Cerebellum", "dataset_summary": "Human Cerebellum Whole Transcriptome Analysis", @@ -209,7 +190,6 @@ "file_size": 557562 }, { - "task_id": "task_spatially_variable_genes", "dataset_id": "tenx_visium/visium/human_kidney", "dataset_name": "10X Visium - Human Kidney", "dataset_summary": "Gene expression library of Human Kidney (CytAssist FFPE) using the Human Whole Transcriptome Probe Set", @@ -220,7 +200,6 @@ "file_size": 625562 }, { - "task_id": "task_spatially_variable_genes", "dataset_id": "tenx_visium/post_xenium/human_colon_cancer", "dataset_name": "10X Post Xenium - Human Colon", "dataset_summary": "Gene expression library of Post Xenium Human Colon Cancer (CytAssist FFPE) using the Human Whole Transcriptome Probe Set - Replicate 1", @@ -231,7 +210,6 @@ "file_size": 601306 }, { - "task_id": "task_spatially_variable_genes", "dataset_id": "tenx_visium/visium/human_cervical_cancer", "dataset_name": "10X Visium - Human Cervical Cancer", "dataset_summary": "Gene expression library of Human Cervical Cancer (Visium FFPE) using the Human Whole Transcriptome Probe Set", @@ -242,7 +220,6 @@ "file_size": 401754 }, { - "task_id": "task_spatially_variable_genes", "dataset_id": "zenodo_spatial/dbitseq/mouse_e10_brain", "dataset_name": "DBiT-seq - Mouse Brain (E10)", "dataset_summary": "High-Spatial-Resolution Multi-Omics Sequencing via Deterministic Barcoding in Tissue.", @@ -253,7 +230,6 @@ "file_size": 279898 }, { - "task_id": "task_spatially_variable_genes", "dataset_id": "zenodo_spatial/merfish/human_cortex_4", "dataset_name": "MERFISH - Human Cortex 4", "dataset_summary": "Spatially resolved profiling of human cerebral cortex using multiplexed error-robust fluorescence in situ hybridization (MERFISH).", @@ -264,7 +240,6 @@ "file_size": 472570 }, { - "task_id": "task_spatially_variable_genes", "dataset_id": "zenodo_spatial_slidetags/slidetags/human_skin_melanoma", "dataset_name": "Slide-tags - Human Skin Melanoma", "dataset_summary": "Slide-tags enables single-nucleus barcoding for multimodal spatial genomics.", @@ -275,7 +250,6 @@ "file_size": 548410 }, { - "task_id": "task_spatially_variable_genes", "dataset_id": "zenodo_spatial/stereoseq/drosophila_embryo_e7", "dataset_name": "Stereo-seq - Drosophila embryo E7", "dataset_summary": "Stereo-seq faithfully captures Drosophila spatial transcriptomes with high resolution.", @@ -286,7 +260,6 @@ "file_size": 264090 }, { - "task_id": "task_spatially_variable_genes", "dataset_id": "tenx_visium/visium/human_breast_cancer_2", "dataset_name": "10X Visium - Human Breast Cancer 2", "dataset_summary": "Gene expression library of Human Breast Cancer (Visium FFPE) using the Human Whole Transcriptome Probe Set", @@ -297,7 +270,6 @@ "file_size": 381050 }, { - "task_id": "task_spatially_variable_genes", "dataset_id": "zenodo_spatial/dbitseq/mouse_e11_2", "dataset_name": "DBiT-seq - Mouse Whole Body 2 (E11)", "dataset_summary": "High-Spatial-Resolution Multi-Omics Sequencing via Deterministic Barcoding in Tissue.", @@ -308,7 +280,6 @@ "file_size": 298330 }, { - "task_id": "task_spatially_variable_genes", "dataset_id": "zenodo_spatial/dbitseq/mouse_e11_1", "dataset_name": "DBiT-seq - Mouse Whole Body 1 (E11)", "dataset_summary": "High-Spatial-Resolution Multi-Omics Sequencing via Deterministic Barcoding in Tissue.", @@ -319,7 +290,6 @@ "file_size": 281530 }, { - "task_id": "task_spatially_variable_genes", "dataset_id": "tenx_visium/visium/mouse_olfactory_bulb", "dataset_name": "10X Visium - Mouse Olfactory Bulb", "dataset_summary": "10X Genomics obtained fresh frozen mouse olfactory bulb tissue from BioIVT.", @@ -330,7 +300,6 @@ "file_size": 285114 }, { - "task_id": "task_spatially_variable_genes", "dataset_id": "zenodo_spatial_slidetags/slidetags/mouse_embryo", "dataset_name": "Slide-tags - Mouse Embryo", "dataset_summary": "Slide-tags enables single-nucleus barcoding for multimodal spatial genomics.", @@ -341,7 +310,6 @@ "file_size": 534450 }, { - "task_id": "task_spatially_variable_genes", "dataset_id": "zenodo_spatial_slidetags/slidetags/human_tonsil", "dataset_name": "Slide-tags - Human Tonsil", "dataset_summary": "Slide-tags enables single-nucleus barcoding for multimodal spatial genomics.", @@ -352,7 +320,6 @@ "file_size": 616442 }, { - "task_id": "task_spatially_variable_genes", "dataset_id": "tenx_visium/visium/human_intestinal_cancer", "dataset_name": "10X Visium - Human Intestine Cancer", "dataset_summary": "Gene expression library of Human Intestinal Cancer (Visium FFPE) using the Human Whole Transcriptome Probe Set", @@ -363,7 +330,6 @@ "file_size": 389690 }, { - "task_id": "task_spatially_variable_genes", "dataset_id": "zenodo_spatial_slidetags/slidetags/human_cortex", "dataset_name": "Slide-tags - Human Cortex", "dataset_summary": "Slide-tags enables single-nucleus barcoding for multimodal spatial genomics.", @@ -374,7 +340,6 @@ "file_size": 491994 }, { - "task_id": "task_spatially_variable_genes", "dataset_id": "zenodo_spatial/dbitseq/mouse_e10_eye", "dataset_name": "DBiT-seq - Mouse Eye (E10)", "dataset_summary": "High-Spatial-Resolution Multi-Omics Sequencing via Deterministic Barcoding in Tissue.", @@ -385,7 +350,6 @@ "file_size": 327226 }, { - "task_id": "task_spatially_variable_genes", "dataset_id": "tenx_visium/visium/mouse_brain_coronal", "dataset_name": "10X Visium - Mouse Brain Coronal", "dataset_summary": "Gene expression library of Mouse Brain (CytAssist FFPE) using the Mouse Whole Transcriptome Probe Set", @@ -396,7 +360,6 @@ "file_size": 366202 }, { - "task_id": "task_spatially_variable_genes", "dataset_id": "zenodo_spatial/merfish/mouse_cortex", "dataset_name": "MERFISH - Mouse Cortex", "dataset_summary": "Spatially resolved profiling of mouse cerebral cortex using multiplexed error-robust fluorescence in situ hybridization (MERFISH).", @@ -407,7 +370,6 @@ "file_size": 1049573 }, { - "task_id": "task_spatially_variable_genes", "dataset_id": "tenx_visium/visium/human_prostate_cancer", "dataset_name": "10X Visium - Human Prostate Cancer", "dataset_summary": "Gene expression library of Human Prostate Cancer (Visium FFPE) with an IF image using the Human Whole Transcriptome Probe Set", @@ -418,7 +380,6 @@ "file_size": 448058 }, { - "task_id": "task_spatially_variable_genes", "dataset_id": "zenodo_spatial/slideseqv2/mouse_somatosensory_cortex_puck", "dataset_name": "Slide-seqV2 - Mouse Somatosensory Cortex Puck", "dataset_summary": "Highly sensitive spatial transcriptomics at near-cellular resolution with Slide-seqV2.", @@ -429,7 +390,6 @@ "file_size": 1457874 }, { - "task_id": "task_spatially_variable_genes", "dataset_id": "zenodo_spatial/stereoseq/drosophila_embryo_e10", "dataset_name": "Stereo-seq - Drosophila embryo E10", "dataset_summary": "Stereo-seq faithfully captures Drosophila spatial transcriptomes with high resolution.", @@ -440,7 +400,6 @@ "file_size": 282746 }, { - "task_id": "task_spatially_variable_genes", "dataset_id": "zenodo_spatial/slideseqv2/mouse_cerebellum", "dataset_name": "Slide-seqV2 - Mouse Cerebellum", "dataset_summary": "Highly sensitive spatial transcriptomics at near-cellular resolution with Slide-seqV2.", @@ -451,7 +410,6 @@ "file_size": 1670514 }, { - "task_id": "task_spatially_variable_genes", "dataset_id": "tenx_visium/visium/mouse_kidney_v1", "dataset_name": "10X Visium - Mouse Kidney 1", "dataset_summary": "Mouse Kidney Whole Transcriptome Analysis", @@ -462,7 +420,6 @@ "file_size": 305466 }, { - "task_id": "task_spatially_variable_genes", "dataset_id": "tenx_visium/visium/mouse_embryo", "dataset_name": "10X Visium - Mouse Embryo", "dataset_summary": "Gene expression library of Mouse Embryo (CytAssist FFPE) using the Mouse Whole Transcriptome Probe Set", @@ -473,7 +430,6 @@ "file_size": 666106 }, { - "task_id": "task_spatially_variable_genes", "dataset_id": "tenx_visium/visium/human_colorectal_cancer", "dataset_name": "10X Visium - Human Colorectal Cancer", "dataset_summary": "Gene expression library of Human Colorectal Cancer (CytAssist FFPE) using the Human Whole Transcriptome Probe Set", @@ -484,7 +440,6 @@ "file_size": 853146 }, { - "task_id": "task_spatially_variable_genes", "dataset_id": "zenodo_spatial/merfish/human_cortex_1", "dataset_name": "MERFISH - Human Cortex 1", "dataset_summary": "Spatially resolved profiling of human cerebral cortex using multiplexed error-robust fluorescence in situ hybridization (MERFISH).", @@ -495,7 +450,6 @@ "file_size": 763077 }, { - "task_id": "task_spatially_variable_genes", "dataset_id": "zenodo_spatial/merfish/human_cortex_3", "dataset_name": "MERFISH - Human Cortex 3", "dataset_summary": "Spatially resolved profiling of human cerebral cortex using multiplexed error-robust fluorescence in situ hybridization (MERFISH).", @@ -506,7 +460,6 @@ "file_size": 474970 }, { - "task_id": "task_spatially_variable_genes", "dataset_id": "zenodo_spatial/seqfish/mouse_organogenesis_seqfish", "dataset_name": "Seqfish - Mouse Organogenesis", "dataset_summary": "Single-cell spatial expression of mouse organogenesis.", @@ -517,7 +470,6 @@ "file_size": 1497221 }, { - "task_id": "task_spatially_variable_genes", "dataset_id": "zenodo_spatial/starmap/mouse_brain_2d_zstep10_0", "dataset_name": "STARmap - Mouse Brain 1", "dataset_summary": "Three-dimensional intact-tissue sequencing of single-cell transcriptional states.", @@ -528,7 +480,6 @@ "file_size": 268517 }, { - "task_id": "task_spatially_variable_genes", "dataset_id": "zenodo_spatial/starmap/mouse_brain_2d_zstep15_0", "dataset_name": "STARmap - Mouse Brain 2", "dataset_summary": "Three-dimensional intact-tissue sequencing of single-cell transcriptional states.", @@ -539,7 +490,6 @@ "file_size": 390789 }, { - "task_id": "task_spatially_variable_genes", "dataset_id": "tenx_visium/visium/human_normal_prostate", "dataset_name": "10X Visium - Human Normal Prostate", "dataset_summary": "Gene expression library of Human Normal Prostate (Visium FFPE) using the Human Whole Transcriptome Probe Set", diff --git a/results/spatially_variable_genes/data/method_info.json b/results/spatially_variable_genes/data/method_info.json index 2e9cc1148..d039c5177 100644 --- a/results/spatially_variable_genes/data/method_info.json +++ b/results/spatially_variable_genes/data/method_info.json @@ -1,210 +1,258 @@ [ { - "task_id": "task_spatially_variable_genes", + "task_id": "control_methods", "method_id": "random_ranking", "method_name": "Random Ranking", "method_summary": "Negative control method that randomly rank genes.", "method_description": "A negative control method with random ranking of genes.\n", "is_baseline": true, - "paper_reference": null, + "references_doi": null, + "references_bibtex": null, "code_url": "https://github.com/openproblems-bio/task_spatially_variable_genes", - "implementation_url": "https://github.com/openproblems-bio/task_spatially_variable_genes/blob/a2cb02e6b6054ee0566843f2fd34fa6f63dc96ce/src/control_methods/random_ranking/config.vsh.yaml", - "code_version": null, + "documentation_url": null, + "image": "https://ghcr.io/openproblems-bio/task_spatially_variable_genes/control_methods/random_ranking:build_main", + "implementation_url": "https://github.com/openproblems-bio/task_spatially_variable_genes/blob/a2cb02e6b6054ee0566843f2fd34fa6f63dc96ce/src/control_methods/random_ranking", + "code_version": "build_main", "commit_sha": "a2cb02e6b6054ee0566843f2fd34fa6f63dc96ce" }, { - "task_id": "task_spatially_variable_genes", + "task_id": "control_methods", "method_id": "true_ranking", "method_name": "True Ranking", "method_summary": "Positive control method that correctly rank genes.", "method_description": "A positive control method with correct ranking of genes.\n", "is_baseline": true, - "paper_reference": null, + "references_doi": null, + "references_bibtex": null, "code_url": "https://github.com/openproblems-bio/task_spatially_variable_genes", - "implementation_url": "https://github.com/openproblems-bio/task_spatially_variable_genes/blob/a2cb02e6b6054ee0566843f2fd34fa6f63dc96ce/src/control_methods/true_ranking/config.vsh.yaml", - "code_version": null, + "documentation_url": null, + "image": "https://ghcr.io/openproblems-bio/task_spatially_variable_genes/control_methods/true_ranking:build_main", + "implementation_url": "https://github.com/openproblems-bio/task_spatially_variable_genes/blob/a2cb02e6b6054ee0566843f2fd34fa6f63dc96ce/src/control_methods/true_ranking", + "code_version": "build_main", "commit_sha": "a2cb02e6b6054ee0566843f2fd34fa6f63dc96ce" }, { - "task_id": "task_spatially_variable_genes", + "task_id": "methods", "method_id": "boostgp", "method_name": "BOOST-GP", "method_summary": "Bayesian modeling of spatial molecular profiling data via Gaussian process", "method_description": "BOOST-GP a novel Bayesian hierarchical model to analyze spatial transcriptomics data, \nwith several unique characteristics. It models the zero-inflated and over-dispersed \ncounts by deploying a zero-inflated negative binomial model that greatly increases \nmodel stability and robustness. Besides, the Bayesian inference framework allows us \nto borrow strength in parameter estimation in a de novo fashion. As a result, \nthe proposed model shows competitive performances in accuracy and robustness \nover existing methods in both simulation studies and two real data applications.\n", "is_baseline": false, - "paper_reference": "10.1093/bioinformatics/btab455", + "references_doi": "10.1093/bioinformatics/btab455", + "references_bibtex": null, "code_url": "https://github.com/Minzhe/BOOST-GP", - "implementation_url": "https://github.com/openproblems-bio/task_spatially_variable_genes/blob/a2cb02e6b6054ee0566843f2fd34fa6f63dc96ce/src/methods/boostgp/config.vsh.yaml", - "code_version": null, + "documentation_url": "https://github.com/Minzhe/BOOST-GP", + "image": "https://ghcr.io/openproblems-bio/task_spatially_variable_genes/methods/boostgp:build_main", + "implementation_url": "https://github.com/openproblems-bio/task_spatially_variable_genes/blob/a2cb02e6b6054ee0566843f2fd34fa6f63dc96ce/src/methods/boostgp", + "code_version": "build_main", "commit_sha": "a2cb02e6b6054ee0566843f2fd34fa6f63dc96ce" }, { - "task_id": "task_spatially_variable_genes", + "task_id": "methods", "method_id": "gpcounts", "method_name": "GPcounts", "method_summary": "GPcounts is non-parametric modelling of temporal and spatial counts data from RNA-seq experiments.", "method_description": "The GPcounts package implements GP regression methods for modelling counts data using a \nnegative binomial likelihood function. Computational efficiency is achieved through the use of \nvariational Bayesian inference. The GP function models changes in the mean of the negative binomial \nlikelihood through a logarithmic link function and the dispersion parameter is fitted by maximum \nlikelihood. We validate the method on simulated time course data, showing better performance to identify \nchanges in over-dispersed counts data than methods based on Gaussian or Poisson likelihoods. \n", "is_baseline": false, - "paper_reference": "10.1093/bioinformatics/btab486", + "references_doi": "10.1093/bioinformatics/btab486", + "references_bibtex": null, "code_url": "https://github.com/ManchesterBioinference/GPcounts", - "implementation_url": "https://github.com/openproblems-bio/task_spatially_variable_genes/blob/a2cb02e6b6054ee0566843f2fd34fa6f63dc96ce/src/methods/gpcounts/config.vsh.yaml", - "code_version": null, + "documentation_url": "https://github.com/ManchesterBioinference/GPcounts/blob/master/demo_notebooks/GPcounts_spatial.ipynb", + "image": "https://ghcr.io/openproblems-bio/task_spatially_variable_genes/methods/gpcounts:build_main", + "implementation_url": "https://github.com/openproblems-bio/task_spatially_variable_genes/blob/a2cb02e6b6054ee0566843f2fd34fa6f63dc96ce/src/methods/gpcounts", + "code_version": "build_main", "commit_sha": "a2cb02e6b6054ee0566843f2fd34fa6f63dc96ce" }, { - "task_id": "task_spatially_variable_genes", + "task_id": "methods", "method_id": "moran_i", "method_name": "Moran's I", "method_summary": "Moran's I is a measurement of spatial autocorrelation.", "method_description": "The MoranI global spatial auto-correlation statistics evaluates whether features (i.e. genes) \nshows a pattern that is clustered, dispersed or random in the tissue are under consideration.\n", "is_baseline": false, - "paper_reference": "10.1038/s41592-021-01358-2", + "references_doi": "10.1038/s41592-021-01358-2", + "references_bibtex": null, "code_url": "https://github.com/scverse/squidpy", - "implementation_url": "https://github.com/openproblems-bio/task_spatially_variable_genes/blob/a2cb02e6b6054ee0566843f2fd34fa6f63dc96ce/src/methods/moran_i/config.vsh.yaml", - "code_version": null, + "documentation_url": "https://squidpy.readthedocs.io/en/stable/api/squidpy.gr.spatial_autocorr.html", + "image": "https://ghcr.io/openproblems-bio/task_spatially_variable_genes/methods/moran_i:build_main", + "implementation_url": "https://github.com/openproblems-bio/task_spatially_variable_genes/blob/a2cb02e6b6054ee0566843f2fd34fa6f63dc96ce/src/methods/moran_i", + "code_version": "build_main", "commit_sha": "a2cb02e6b6054ee0566843f2fd34fa6f63dc96ce" }, { - "task_id": "task_spatially_variable_genes", + "task_id": "methods", "method_id": "nnsvg", "method_name": "nnSVG", "method_summary": "nnSVG is based on nearest-neighbor Gaussian process (NNGP) models to estimate parameters in GPs", "method_description": "nnSVG identifies genes that vary in expression continuously across the entire tissue or within a priori defined \nspatial domains. It uses gene-specific estimates of length scale parameters within the Gaussian process models, \nand scales linearly with the number of spatial locations.\n", "is_baseline": false, - "paper_reference": "10.1038/s41467-023-39748-z", + "references_doi": "10.1038/s41467-023-39748-z", + "references_bibtex": null, "code_url": "https://github.com/lmweber/nnSVG", - "implementation_url": "https://github.com/openproblems-bio/task_spatially_variable_genes/blob/a2cb02e6b6054ee0566843f2fd34fa6f63dc96ce/src/methods/nnsvg/config.vsh.yaml", - "code_version": null, + "documentation_url": "https://bioconductor.org/packages/release/bioc/vignettes/nnSVG/inst/doc/nnSVG.html", + "image": "https://ghcr.io/openproblems-bio/task_spatially_variable_genes/methods/nnsvg:build_main", + "implementation_url": "https://github.com/openproblems-bio/task_spatially_variable_genes/blob/a2cb02e6b6054ee0566843f2fd34fa6f63dc96ce/src/methods/nnsvg", + "code_version": "build_main", "commit_sha": "a2cb02e6b6054ee0566843f2fd34fa6f63dc96ce" }, { - "task_id": "task_spatially_variable_genes", + "task_id": "methods", "method_id": "scgco", "method_name": "scGCO", "method_summary": "Identification of spatially variable genes with graph cuts.", "method_description": "Single-cell gene expression data with positional information is critical to dissect \nmechanisms and architectures of multicellular organisms, but the potential is limited \nby the scalability of current data analysis strategies. Here, we present scGCO, \na method based on fast optimization of hidden Markov Random Fields with graph cuts \nto identify spatially variable genes. Comparing to existing methods, scGCO delivers \na superior performance with lower false positive rate and improved specificity, \nwhile demonstrates a more robust performance in the presence of noises. \nCritically, scGCO scales near linearly with inputs and demonstrates orders of \nmagnitude better running time and memory requirement than existing methods, \nand could represent a valuable solution when spatial transcriptomics data grows \ninto millions of data points and beyond..\n", "is_baseline": false, - "paper_reference": "10.1038/s41467-022-33182-3", + "references_doi": "10.1038/s41467-022-33182-3", + "references_bibtex": null, "code_url": "https://github.com/WangPeng-Lab/scGCO", - "implementation_url": "https://github.com/openproblems-bio/task_spatially_variable_genes/blob/a2cb02e6b6054ee0566843f2fd34fa6f63dc96ce/src/methods/scgco/config.vsh.yaml", - "code_version": null, + "documentation_url": "https://github.com/WangPeng-Lab/scGCO/blob/master/code/Tutorial/scGCO_tutorial.ipynb", + "image": "https://ghcr.io/openproblems-bio/task_spatially_variable_genes/methods/scgco:build_main", + "implementation_url": "https://github.com/openproblems-bio/task_spatially_variable_genes/blob/a2cb02e6b6054ee0566843f2fd34fa6f63dc96ce/src/methods/scgco", + "code_version": "build_main", "commit_sha": "a2cb02e6b6054ee0566843f2fd34fa6f63dc96ce" }, { - "task_id": "task_spatially_variable_genes", + "task_id": "methods", "method_id": "sepal", "method_name": "Sepal", "method_summary": "Sepal simulates diffusion of individual transcripts to extract genes with spatial patterns.", "method_description": "This method assesses the degree of randomness exhibited by each transcript profile and rank them accordingly.\n", "is_baseline": false, - "paper_reference": "10.1093/bioinformatics/btab164", + "references_doi": "10.1093/bioinformatics/btab164", + "references_bibtex": null, "code_url": "https://github.com/scverse/squidpy", - "implementation_url": "https://github.com/openproblems-bio/task_spatially_variable_genes/blob/a2cb02e6b6054ee0566843f2fd34fa6f63dc96ce/src/methods/sepal/config.vsh.yaml", - "code_version": null, + "documentation_url": "https://squidpy.readthedocs.io/en/stable/api/squidpy.gr.sepal.html", + "image": "https://ghcr.io/openproblems-bio/task_spatially_variable_genes/methods/sepal:build_main", + "implementation_url": "https://github.com/openproblems-bio/task_spatially_variable_genes/blob/a2cb02e6b6054ee0566843f2fd34fa6f63dc96ce/src/methods/sepal", + "code_version": "build_main", "commit_sha": "a2cb02e6b6054ee0566843f2fd34fa6f63dc96ce" }, { - "task_id": "task_spatially_variable_genes", + "task_id": "methods", "method_id": "somde", "method_name": "SOMDE", "method_summary": "SOMDE is a scalable method for identifying spatially variable genes with self-organizing map.", "method_description": "SOMDE uses self-organizing map to cluster neighboring cells into nodes, and then uses a Gaussian process \nto fit the node-level spatial gene expression to identify SVgenes. Experiments show that SOMDE is about \n5 to 50 times faster than existing methods with comparable results. \nThe adjustable resolution of SOMDE makes it the only method that can give results in about \n5 min in large datasets of more than 20 000 sequencing sites.\n", "is_baseline": false, - "paper_reference": "10.1093/bioinformatics/btab471", + "references_doi": "10.1093/bioinformatics/btab471", + "references_bibtex": null, "code_url": "https://github.com/XuegongLab/somde", - "implementation_url": "https://github.com/openproblems-bio/task_spatially_variable_genes/blob/a2cb02e6b6054ee0566843f2fd34fa6f63dc96ce/src/methods/somde/config.vsh.yaml", - "code_version": null, + "documentation_url": "https://github.com/WhirlFirst/somde/blob/master/slide_seq0819_11_SOM.ipynb", + "image": "https://ghcr.io/openproblems-bio/task_spatially_variable_genes/methods/somde:build_main", + "implementation_url": "https://github.com/openproblems-bio/task_spatially_variable_genes/blob/a2cb02e6b6054ee0566843f2fd34fa6f63dc96ce/src/methods/somde", + "code_version": "build_main", "commit_sha": "a2cb02e6b6054ee0566843f2fd34fa6f63dc96ce" }, { - "task_id": "task_spatially_variable_genes", + "task_id": "methods", "method_id": "spagcn", "method_name": "SpaGCN", "method_summary": "Integrating gene expression, spatial location and histology to identify spatial domains and spatially variable genes by graph convolutional network.", "method_description": "To elucidate spatial gene expression variation, we present SpaGCN, a graph convolutional \nnetwork approach that integrates gene expression, spatial location and histology in SRT data analysis. \nThrough graph convolution, SpaGCN aggregates gene expression of each spot from its neighboring spots, \nwhich enables the identification of spatial domains with coherent expression and histology. \nThe subsequent domain guided differential expression (DE) analysis then detects genes with \nenriched expression patterns in the identified domains. Analyzing seven SRT datasets using \nSpaGCN, we show it can detect genes with much more enriched spatial expression patterns than competing methods. Furthermore, genes detected by SpaGCN are transferrable and can be utilized to study spatial variation of gene expression in other datasets. SpaGCN is computationally \nfast, platform independent, making it a desirable tool for diverse SRT studies.\n", "is_baseline": false, - "paper_reference": "10.1038/s41592-021-01255-8", + "references_doi": "10.1038/s41592-021-01255-8", + "references_bibtex": null, "code_url": "https://github.com/jianhuupenn/SpaGCN", - "implementation_url": "https://github.com/openproblems-bio/task_spatially_variable_genes/blob/a2cb02e6b6054ee0566843f2fd34fa6f63dc96ce/src/methods/spagcn/config.vsh.yaml", - "code_version": null, + "documentation_url": "https://github.com/jianhuupenn/SpaGCN/blob/master/tutorial/tutorial.ipynb", + "image": "https://ghcr.io/openproblems-bio/task_spatially_variable_genes/methods/spagcn:build_main", + "implementation_url": "https://github.com/openproblems-bio/task_spatially_variable_genes/blob/a2cb02e6b6054ee0566843f2fd34fa6f63dc96ce/src/methods/spagcn", + "code_version": "build_main", "commit_sha": "a2cb02e6b6054ee0566843f2fd34fa6f63dc96ce" }, { - "task_id": "task_spatially_variable_genes", + "task_id": "methods", "method_id": "spagft", "method_name": "SpaGFT", "method_summary": "SpaGFT is a graph Fourier transform for tissue module identification from spatially resolved transcriptomics", "method_description": "The tissue module (TM) was defined as an architectural area containing recurrent cellular \ncommunities executing specific biological functions at different tissue sites. \nHowever, the computational identification of TMs poses challenges owing to their various \nlength scales, convoluted biological processes, not well-defined molecular features, and \nirregular spatial patterns. Here, we present a hypothesis-free graph Fourier transform model, \nSpaGFT, to characterize TMs. For the first time, SpaGFT transforms complex gene expression \npatterns into simple, but informative signals, leading to the accurate identification of \nspatially variable genes (SVGs) at a fast computational speed. Based on clustering the \ntransformed signals of the SVGs, SpaGFT provides a novel computational framework for TM \ncharacterization. Three case studies were used to illustrate TM identities, the biological \nprocesses of convoluted TMs in the lymph node, and conserved TMs across multiple samples constituting \nthe complex organ. The superior accuracy, scalability, and interpretability of SpaGFT indicate \nthat it is a novel and powerful tool for the investigation of TMs to gain new insights into a variety \nof biological questions.\n", "is_baseline": false, - "paper_reference": "10.1101/2022.12.10.519929", + "references_doi": "10.1101/2022.12.10.519929", + "references_bibtex": null, "code_url": "https://github.com/jxLiu-bio/SpaGFT", - "implementation_url": "https://github.com/openproblems-bio/task_spatially_variable_genes/blob/a2cb02e6b6054ee0566843f2fd34fa6f63dc96ce/src/methods/spagft/config.vsh.yaml", - "code_version": null, + "documentation_url": "https://spagft.readthedocs.io/en/latest/", + "image": "https://ghcr.io/openproblems-bio/task_spatially_variable_genes/methods/spagft:build_main", + "implementation_url": "https://github.com/openproblems-bio/task_spatially_variable_genes/blob/a2cb02e6b6054ee0566843f2fd34fa6f63dc96ce/src/methods/spagft", + "code_version": "build_main", "commit_sha": "a2cb02e6b6054ee0566843f2fd34fa6f63dc96ce" }, { - "task_id": "task_spatially_variable_genes", + "task_id": "methods", "method_id": "spanve", "method_name": "Spanve", "method_summary": "Spanve is a non-parametric statistical approach based on modeling space dependence as a distance of two distributions for detecting SV genes.", "method_description": "The depiction of in situ gene expression through spatial transcriptomics facilitates the inference of cell \nfunction mechanisms. To build spatial maps of transcriptomes, the first and crucial step is to \nidentify spatially variable (SV) genes. However, current methods fall short in dealing with \nlarge-scale spatial transcriptomics data and may result in a high false positive rate due to the \nmodeling of gene expression into parametric distributions. \nThis paper introduces Spanve (https://github.com/zjupgx/Spanve), a non-parametric statistical approach \nbased on modeling space dependence as a distance of two distributions for detecting SV genes. \nThe high computing efficiency and accuracy of Spanve is demonstrated through comprehensive benchmarking. \nAdditionally, Spanve can detect clustering-friendly SV genes and spatially variable co-expression, \nfacilitating the identification of spatial tissue domains by an imputation. \n", "is_baseline": false, - "paper_reference": "10.1101/2023.02.08.527623", + "references_doi": "10.1101/2023.02.08.527623", + "references_bibtex": null, "code_url": "https://github.com/zjupgx/Spanve", - "implementation_url": "https://github.com/openproblems-bio/task_spatially_variable_genes/blob/a2cb02e6b6054ee0566843f2fd34fa6f63dc96ce/src/methods/spanve/config.vsh.yaml", - "code_version": null, + "documentation_url": "https://github.com/zjupgx/Spanve/blob/main/tutorial.ipynb", + "image": "https://ghcr.io/openproblems-bio/task_spatially_variable_genes/methods/spanve:build_main", + "implementation_url": "https://github.com/openproblems-bio/task_spatially_variable_genes/blob/a2cb02e6b6054ee0566843f2fd34fa6f63dc96ce/src/methods/spanve", + "code_version": "build_main", "commit_sha": "a2cb02e6b6054ee0566843f2fd34fa6f63dc96ce" }, { - "task_id": "task_spatially_variable_genes", + "task_id": "methods", "method_id": "spark", "method_name": "SPARK", "method_summary": "Spatial PAttern Recognition via Kernels", "method_description": "SPARK builds upon a generalized linear spatial model (GLSM) with a variety of spatial kernels to accommodate count data.\nWith a newly developed penalized quasi-likelihood (PQL) algorithm, SPARK is scalable to analyzing tens of \nthousands of genes across tens of thousands spatial locations.\n", "is_baseline": false, - "paper_reference": "10.1038/s41592-019-0701-7", + "references_doi": "10.1038/s41592-019-0701-7", + "references_bibtex": null, "code_url": "https://github.com/xzhoulab/SPARK", - "implementation_url": "https://github.com/openproblems-bio/task_spatially_variable_genes/blob/a2cb02e6b6054ee0566843f2fd34fa6f63dc96ce/src/methods/spark/config.vsh.yaml", - "code_version": null, + "documentation_url": "https://xzhoulab.github.io/SPARK/02_SPARK_Example/", + "image": "https://ghcr.io/openproblems-bio/task_spatially_variable_genes/methods/spark:build_main", + "implementation_url": "https://github.com/openproblems-bio/task_spatially_variable_genes/blob/a2cb02e6b6054ee0566843f2fd34fa6f63dc96ce/src/methods/spark", + "code_version": "build_main", "commit_sha": "a2cb02e6b6054ee0566843f2fd34fa6f63dc96ce" }, { - "task_id": "task_spatially_variable_genes", + "task_id": "methods", "method_id": "spark_x", "method_name": "SPARK-X", "method_summary": "SPARK-X is a non-parametric method for rapid and effective detection of spatially expressed genes in large spatial transcriptomic studies.", "method_description": "Spatial transcriptomic studies are becoming increasingly common and large, posing important \nstatistical and computational challenges for many analytic tasks. Here, we present SPARK-X, \na non-parametric method for rapid and effective detection of spatially expressed genes in large \nspatial transcriptomic studies. SPARK-X not only produces effective type I error control and \nhigh power but also brings orders of magnitude computational savings. We apply SPARK-X to \nanalyze three large datasets, one of which is only analyzable by SPARK-X. In these data, \nSPARK-X identifies many spatially expressed genes including those that are spatially \nexpressed within the same cell type, revealing new biological insights.\n", "is_baseline": false, - "paper_reference": "10.1186/s13059-021-02404-0", + "references_doi": "10.1186/s13059-021-02404-0", + "references_bibtex": null, "code_url": "https://github.com/xzhoulab/SPARK", - "implementation_url": "https://github.com/openproblems-bio/task_spatially_variable_genes/blob/a2cb02e6b6054ee0566843f2fd34fa6f63dc96ce/src/methods/spark_x/config.vsh.yaml", - "code_version": null, + "documentation_url": "https://xzhoulab.github.io/SPARK/02_SPARK_Example/", + "image": "https://ghcr.io/openproblems-bio/task_spatially_variable_genes/methods/spark_x:build_main", + "implementation_url": "https://github.com/openproblems-bio/task_spatially_variable_genes/blob/a2cb02e6b6054ee0566843f2fd34fa6f63dc96ce/src/methods/spark_x", + "code_version": "build_main", "commit_sha": "a2cb02e6b6054ee0566843f2fd34fa6f63dc96ce" }, { - "task_id": "task_spatially_variable_genes", + "task_id": "methods", "method_id": "spatialde", "method_name": "SpatialDE", "method_summary": "SpatialDE is a method for identify spatially variable genes based on Gaussian Process model", "method_description": "SpatialDE decomposes expression variability into spatial and nonspatial components using two random effect terms: a spatial variance term that parametrizes gene expression covariance by pairwise distances of samples, and a noise term that models nonspatial variability.\n", "is_baseline": false, - "paper_reference": "10.1038/nmeth.4636", + "references_doi": "10.1038/nmeth.4636", + "references_bibtex": null, "code_url": "https://github.com/Teichlab/SpatialDE", - "implementation_url": "https://github.com/openproblems-bio/task_spatially_variable_genes/blob/a2cb02e6b6054ee0566843f2fd34fa6f63dc96ce/src/methods/spatialde/config.vsh.yaml", - "code_version": null, + "documentation_url": "https://github.com/Teichlab/SpatialDE", + "image": "https://ghcr.io/openproblems-bio/task_spatially_variable_genes/methods/spatialde:build_main", + "implementation_url": "https://github.com/openproblems-bio/task_spatially_variable_genes/blob/a2cb02e6b6054ee0566843f2fd34fa6f63dc96ce/src/methods/spatialde", + "code_version": "build_main", "commit_sha": "a2cb02e6b6054ee0566843f2fd34fa6f63dc96ce" }, { - "task_id": "task_spatially_variable_genes", + "task_id": "methods", "method_id": "spatialde2", "method_name": "SpatialDE2", "method_summary": "SpatialDE2: Fast and localized variance component analysis of spatial transcriptomics", "method_description": "Spatial transcriptomics is now a mature technology, allowing to assay gene expression changes \nin the histological context of complex tissues. A canonical analysis workflow starts with the \nidentification of tissue zones that share similar expression profiles, followed by the detection \nof highly variable or spatially variable genes. Rapid increases in the scale and complexity of \nspatial transcriptomic datasets demand that these analysis steps are conducted in a consistent \nand integrated manner, a requirement that is not met by current methods. To address this, we \nhere present SpatialDE2, which unifies the mapping of tissue zones and spatial variable gene \ndetection as integrated software framework, while at the same time advancing current algorithms \nfor both of these steps. Formulated in a Bayesian framework, the model accounts for the Poisson \ncount noise, while simultaneously offering superior computational speed compared to previous methods. \nWe validate SpatialDE2 using simulated data and illustrate its utility in the context of two real-world \napplications to the spatial transcriptomics profiles of the mouse brain and human endometrium.\n", "is_baseline": false, - "paper_reference": "10.1101/2021.10.27.466045", + "references_doi": "10.1101/2021.10.27.466045", + "references_bibtex": null, "code_url": "https://github.com/PMBio/SpatialDE", - "implementation_url": "https://github.com/openproblems-bio/task_spatially_variable_genes/blob/a2cb02e6b6054ee0566843f2fd34fa6f63dc96ce/src/methods/spatialde2/config.vsh.yaml", - "code_version": null, + "documentation_url": "https://pmbio.github.io/SpatialDE/", + "image": "https://ghcr.io/openproblems-bio/task_spatially_variable_genes/methods/spatialde2:build_main", + "implementation_url": "https://github.com/openproblems-bio/task_spatially_variable_genes/blob/a2cb02e6b6054ee0566843f2fd34fa6f63dc96ce/src/methods/spatialde2", + "code_version": "build_main", "commit_sha": "a2cb02e6b6054ee0566843f2fd34fa6f63dc96ce" } ] diff --git a/results/spatially_variable_genes/data/metric_info.json b/results/spatially_variable_genes/data/metric_info.json index 66cd21b4c..308b131d4 100644 --- a/results/spatially_variable_genes/data/metric_info.json +++ b/results/spatially_variable_genes/data/metric_info.json @@ -5,11 +5,11 @@ "metric_name": "correlation", "metric_summary": "Correlation represents the agreement of true and predicted spatial variability.", "metric_description": "Kendall rank correlation coefficient measures the ordinal association between two measured quantities. The best score and upper bound is 1 (observations have an identical rank), while the lower bound is -1 (observations have a completely different rank).\n", - "paper_reference": { - "doi": "10.1093/biomet/30.1-2.81" - }, - "implementation_url": "https://github.com/openproblems-bio/task_spatially_variable_genes/blob/a2cb02e6b6054ee0566843f2fd34fa6f63dc96ce/src/metrics/correlation/config.vsh.yaml", - "code_version": null, + "references_doi": "10.1093/biomet/30.1-2.81", + "references_bibtex": null, + "implementation_url": "https://github.com/openproblems-bio/task_spatially_variable_genes/blob/a2cb02e6b6054ee0566843f2fd34fa6f63dc96ce/src/metrics/correlation", + "image": "https://ghcr.io/openproblems-bio/task_spatially_variable_genes/metrics/correlation:build_main", + "code_version": "build_main", "commit_sha": "a2cb02e6b6054ee0566843f2fd34fa6f63dc96ce", "maximize": true } diff --git a/results/spatially_variable_genes/data/quality_control.json b/results/spatially_variable_genes/data/quality_control.json index af4472568..32f1190e7 100644 --- a/results/spatially_variable_genes/data/quality_control.json +++ b/results/spatially_variable_genes/data/quality_control.json @@ -93,9 +93,9 @@ "task_id": "task_spatially_variable_genes", "category": "Method info", "name": "Pct 'paper_reference' missing", - "value": 0.0, - "severity": 0, - "severity_value": 0.0, + "value": 0.875, + "severity": 2, + "severity_value": 3.0, "code": "percent_missing(method_info, field)", "message": "Method metadata field 'paper_reference' should be defined\n Task id: task_spatially_variable_genes\n Field: paper_reference\n" }, @@ -163,9 +163,9 @@ "task_id": "task_spatially_variable_genes", "category": "Metric info", "name": "Pct 'paper_reference' missing", - "value": 0.0, - "severity": 0, - "severity_value": 0.0, + "value": 1.0, + "severity": 2, + "severity_value": 3.0, "code": "percent_missing(metric_info, field)", "message": "Metric metadata field 'paper_reference' should be defined\n Task id: task_spatially_variable_genes\n Field: paper_reference\n" }, @@ -183,9 +183,9 @@ "task_id": "task_spatially_variable_genes", "category": "Dataset info", "name": "Pct 'task_id' missing", - "value": 0.0, - "severity": 0, - "severity_value": 0.0, + "value": 1.0, + "severity": 2, + "severity_value": 3.0, "code": "percent_missing(dataset_info, field)", "message": "Dataset metadata field 'task_id' should be defined\n Task id: task_spatially_variable_genes\n Field: task_id\n" }, diff --git a/results/spatially_variable_genes/data/results.json b/results/spatially_variable_genes/data/results.json index 6c2fd2e75..9f092321f 100644 --- a/results/spatially_variable_genes/data/results.json +++ b/results/spatially_variable_genes/data/results.json @@ -17,8 +17,7 @@ "peak_memory_mb": "NA", "disk_read_mb": "NA", "disk_write_mb": "NA" - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/post_xenium/human_colon_cancer", @@ -38,8 +37,7 @@ "peak_memory_mb": 39527, "disk_read_mb": 170, "disk_write_mb": 521 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/post_xenium/human_colon_cancer", @@ -59,8 +57,7 @@ "peak_memory_mb": 20890, "disk_read_mb": 168, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/post_xenium/human_colon_cancer", @@ -80,8 +77,7 @@ "peak_memory_mb": 102503, "disk_read_mb": 142, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/post_xenium/human_colon_cancer", @@ -101,8 +97,7 @@ "peak_memory_mb": 1434, "disk_read_mb": 103, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/post_xenium/human_colon_cancer", @@ -122,8 +117,7 @@ "peak_memory_mb": 445543, "disk_read_mb": 381, "disk_write_mb": 255 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/post_xenium/human_colon_cancer", @@ -143,8 +137,7 @@ "peak_memory_mb": 6861, "disk_read_mb": 168, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/post_xenium/human_colon_cancer", @@ -164,8 +157,7 @@ "peak_memory_mb": 5632, "disk_read_mb": 131, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/post_xenium/human_colon_cancer", @@ -185,8 +177,7 @@ "peak_memory_mb": 10445, "disk_read_mb": 185, "disk_write_mb": 2 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/post_xenium/human_colon_cancer", @@ -206,8 +197,7 @@ "peak_memory_mb": 18432, "disk_read_mb": 131, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/post_xenium/human_colon_cancer", @@ -227,8 +217,7 @@ "peak_memory_mb": 4096, "disk_read_mb": 438, "disk_write_mb": 59 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/post_xenium/human_colon_cancer", @@ -248,8 +237,7 @@ "peak_memory_mb": 100762, "disk_read_mb": 585, "disk_write_mb": 469 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/post_xenium/human_colon_cancer", @@ -269,8 +257,7 @@ "peak_memory_mb": 25805, "disk_read_mb": 117, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/post_xenium/human_colon_cancer", @@ -290,8 +277,7 @@ "peak_memory_mb": 14029, "disk_read_mb": 132, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/post_xenium/human_colon_cancer", @@ -311,8 +297,7 @@ "peak_memory_mb": 21607, "disk_read_mb": 196, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/post_xenium/human_colon_cancer", @@ -332,8 +317,7 @@ "peak_memory_mb": 2868, "disk_read_mb": 19, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/post_xenium/mouse_brain", @@ -353,8 +337,7 @@ "peak_memory_mb": "NA", "disk_read_mb": "NA", "disk_write_mb": "NA" - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/post_xenium/mouse_brain", @@ -374,8 +357,7 @@ "peak_memory_mb": 39424, "disk_read_mb": 154, "disk_write_mb": 521 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/post_xenium/mouse_brain", @@ -395,8 +377,7 @@ "peak_memory_mb": 6656, "disk_read_mb": 152, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/post_xenium/mouse_brain", @@ -416,8 +397,7 @@ "peak_memory_mb": 67277, "disk_read_mb": 126, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/post_xenium/mouse_brain", @@ -437,8 +417,7 @@ "peak_memory_mb": 1434, "disk_read_mb": 87, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/post_xenium/mouse_brain", @@ -458,8 +437,7 @@ "peak_memory_mb": 220365, "disk_read_mb": 353, "disk_write_mb": 243 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/post_xenium/mouse_brain", @@ -479,8 +457,7 @@ "peak_memory_mb": 4506, "disk_read_mb": 152, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/post_xenium/mouse_brain", @@ -500,8 +477,7 @@ "peak_memory_mb": 3175, "disk_read_mb": 115, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/post_xenium/mouse_brain", @@ -521,8 +497,7 @@ "peak_memory_mb": 7373, "disk_read_mb": 169, "disk_write_mb": 2 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/post_xenium/mouse_brain", @@ -542,8 +517,7 @@ "peak_memory_mb": 1332, "disk_read_mb": 114, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/post_xenium/mouse_brain", @@ -563,8 +537,7 @@ "peak_memory_mb": 2253, "disk_read_mb": 241, "disk_write_mb": 53 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/post_xenium/mouse_brain", @@ -584,8 +557,7 @@ "peak_memory_mb": 74240, "disk_read_mb": 546, "disk_write_mb": 447 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/post_xenium/mouse_brain", @@ -605,8 +577,7 @@ "peak_memory_mb": 2151, "disk_read_mb": 100, "disk_write_mb": 2 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/post_xenium/mouse_brain", @@ -626,8 +597,7 @@ "peak_memory_mb": 6144, "disk_read_mb": 116, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/post_xenium/mouse_brain", @@ -647,8 +617,7 @@ "peak_memory_mb": 21607, "disk_read_mb": 180, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/post_xenium/mouse_brain", @@ -668,8 +637,7 @@ "peak_memory_mb": 782, "disk_read_mb": 19, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_brain_cancer", @@ -689,8 +657,7 @@ "peak_memory_mb": "NA", "disk_read_mb": "NA", "disk_write_mb": "NA" - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_brain_cancer", @@ -710,8 +677,7 @@ "peak_memory_mb": 40858, "disk_read_mb": 293, "disk_write_mb": 521 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_brain_cancer", @@ -731,8 +697,7 @@ "peak_memory_mb": 7373, "disk_read_mb": 291, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_brain_cancer", @@ -752,8 +717,7 @@ "peak_memory_mb": 59597, "disk_read_mb": 265, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_brain_cancer", @@ -773,8 +737,7 @@ "peak_memory_mb": 778, "disk_read_mb": 226, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_brain_cancer", @@ -794,8 +757,7 @@ "peak_memory_mb": 449536, "disk_read_mb": 735, "disk_write_mb": 486 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_brain_cancer", @@ -815,8 +777,7 @@ "peak_memory_mb": 11981, "disk_read_mb": 291, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_brain_cancer", @@ -836,8 +797,7 @@ "peak_memory_mb": 3380, "disk_read_mb": 254, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_brain_cancer", @@ -857,8 +817,7 @@ "peak_memory_mb": 10752, "disk_read_mb": 308, "disk_write_mb": 2 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_brain_cancer", @@ -878,8 +837,7 @@ "peak_memory_mb": 18535, "disk_read_mb": 254, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_brain_cancer", @@ -899,8 +857,7 @@ "peak_memory_mb": 6042, "disk_read_mb": 594, "disk_write_mb": 310 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_brain_cancer", @@ -920,8 +877,7 @@ "peak_memory_mb": "NA", "disk_read_mb": "NA", "disk_write_mb": "NA" - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_brain_cancer", @@ -941,8 +897,7 @@ "peak_memory_mb": 6554, "disk_read_mb": 240, "disk_write_mb": 2 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_brain_cancer", @@ -962,8 +917,7 @@ "peak_memory_mb": 23655, "disk_read_mb": 255, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_brain_cancer", @@ -983,8 +937,7 @@ "peak_memory_mb": 28263, "disk_read_mb": 319, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_brain_cancer", @@ -1004,8 +957,7 @@ "peak_memory_mb": 776, "disk_read_mb": 19, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_breast_cancer_1", @@ -1025,8 +977,7 @@ "peak_memory_mb": "NA", "disk_read_mb": "NA", "disk_write_mb": "NA" - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_breast_cancer_1", @@ -1046,8 +997,7 @@ "peak_memory_mb": 39220, "disk_read_mb": 174, "disk_write_mb": 521 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_breast_cancer_1", @@ -1067,8 +1017,7 @@ "peak_memory_mb": 20788, "disk_read_mb": 173, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_breast_cancer_1", @@ -1088,8 +1037,7 @@ "peak_memory_mb": 96052, "disk_read_mb": 147, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_breast_cancer_1", @@ -1109,8 +1057,7 @@ "peak_memory_mb": 780, "disk_read_mb": 108, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_breast_cancer_1", @@ -1130,8 +1077,7 @@ "peak_memory_mb": 226100, "disk_read_mb": 327, "disk_write_mb": 197 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_breast_cancer_1", @@ -1151,8 +1097,7 @@ "peak_memory_mb": 11572, "disk_read_mb": 173, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_breast_cancer_1", @@ -1172,8 +1117,7 @@ "peak_memory_mb": 3175, "disk_read_mb": 136, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_breast_cancer_1", @@ -1193,8 +1137,7 @@ "peak_memory_mb": 15156, "disk_read_mb": 190, "disk_write_mb": 2 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_breast_cancer_1", @@ -1214,8 +1157,7 @@ "peak_memory_mb": 2151, "disk_read_mb": 135, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_breast_cancer_1", @@ -1235,8 +1177,7 @@ "peak_memory_mb": 11060, "disk_read_mb": 442, "disk_write_mb": 45 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_breast_cancer_1", @@ -1256,8 +1197,7 @@ "peak_memory_mb": 59188, "disk_read_mb": 484, "disk_write_mb": 363 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_breast_cancer_1", @@ -1277,8 +1217,7 @@ "peak_memory_mb": 2151, "disk_read_mb": 121, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_breast_cancer_1", @@ -1298,8 +1237,7 @@ "peak_memory_mb": 5120, "disk_read_mb": 137, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_breast_cancer_1", @@ -1319,8 +1257,7 @@ "peak_memory_mb": 37786, "disk_read_mb": 201, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_breast_cancer_1", @@ -1340,8 +1277,7 @@ "peak_memory_mb": 2765, "disk_read_mb": 19, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_breast_cancer_2", @@ -1361,8 +1297,7 @@ "peak_memory_mb": "NA", "disk_read_mb": "NA", "disk_write_mb": "NA" - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_breast_cancer_2", @@ -1382,8 +1317,7 @@ "peak_memory_mb": 46285, "disk_read_mb": 142, "disk_write_mb": 521 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_breast_cancer_2", @@ -1403,8 +1337,7 @@ "peak_memory_mb": 14132, "disk_read_mb": 140, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_breast_cancer_2", @@ -1424,8 +1357,7 @@ "peak_memory_mb": 59188, "disk_read_mb": 114, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_breast_cancer_2", @@ -1445,8 +1377,7 @@ "peak_memory_mb": 1844, "disk_read_mb": 75, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_breast_cancer_2", @@ -1466,8 +1397,7 @@ "peak_memory_mb": 114586, "disk_read_mb": 217, "disk_write_mb": 119 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_breast_cancer_2", @@ -1487,8 +1417,7 @@ "peak_memory_mb": 7988, "disk_read_mb": 140, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_breast_cancer_2", @@ -1508,8 +1437,7 @@ "peak_memory_mb": 7680, "disk_read_mb": 103, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_breast_cancer_2", @@ -1529,8 +1457,7 @@ "peak_memory_mb": 6861, "disk_read_mb": 157, "disk_write_mb": 2 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_breast_cancer_2", @@ -1550,8 +1477,7 @@ "peak_memory_mb": 17408, "disk_read_mb": 103, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_breast_cancer_2", @@ -1571,8 +1497,7 @@ "peak_memory_mb": 10240, "disk_read_mb": 837, "disk_write_mb": 460 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_breast_cancer_2", @@ -1592,8 +1517,7 @@ "peak_memory_mb": 64922, "disk_read_mb": 301, "disk_write_mb": 213 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_breast_cancer_2", @@ -1613,8 +1537,7 @@ "peak_memory_mb": 4096, "disk_read_mb": 88, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_breast_cancer_2", @@ -1634,8 +1557,7 @@ "peak_memory_mb": 7578, "disk_read_mb": 104, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_breast_cancer_2", @@ -1655,8 +1577,7 @@ "peak_memory_mb": 36864, "disk_read_mb": 168, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_breast_cancer_2", @@ -1676,8 +1597,7 @@ "peak_memory_mb": 1741, "disk_read_mb": 19, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_cerebellum", @@ -1697,8 +1617,7 @@ "peak_memory_mb": "NA", "disk_read_mb": "NA", "disk_write_mb": "NA" - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_cerebellum", @@ -1718,8 +1637,7 @@ "peak_memory_mb": 39424, "disk_read_mb": 195, "disk_write_mb": 521 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_cerebellum", @@ -1739,8 +1657,7 @@ "peak_memory_mb": 20992, "disk_read_mb": 193, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_cerebellum", @@ -1760,8 +1677,7 @@ "peak_memory_mb": 65639, "disk_read_mb": 167, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_cerebellum", @@ -1781,8 +1697,7 @@ "peak_memory_mb": 5530, "disk_read_mb": 129, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_cerebellum", @@ -1802,8 +1717,7 @@ "peak_memory_mb": 50893, "disk_read_mb": 379, "disk_write_mb": 229 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_cerebellum", @@ -1823,8 +1737,7 @@ "peak_memory_mb": 11674, "disk_read_mb": 194, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_cerebellum", @@ -1844,8 +1757,7 @@ "peak_memory_mb": 7783, "disk_read_mb": 157, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_cerebellum", @@ -1865,8 +1777,7 @@ "peak_memory_mb": 7271, "disk_read_mb": 210, "disk_write_mb": 2 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_cerebellum", @@ -1886,8 +1797,7 @@ "peak_memory_mb": 1536, "disk_read_mb": 156, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_cerebellum", @@ -1907,8 +1817,7 @@ "peak_memory_mb": 5223, "disk_read_mb": 280, "disk_write_mb": 48 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_cerebellum", @@ -1928,8 +1837,7 @@ "peak_memory_mb": 94004, "disk_read_mb": 560, "disk_write_mb": 419 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_cerebellum", @@ -1949,8 +1857,7 @@ "peak_memory_mb": 16180, "disk_read_mb": 142, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_cerebellum", @@ -1970,8 +1877,7 @@ "peak_memory_mb": 13415, "disk_read_mb": 157, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_cerebellum", @@ -1991,8 +1897,7 @@ "peak_memory_mb": 38196, "disk_read_mb": 222, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_cerebellum", @@ -2012,8 +1917,7 @@ "peak_memory_mb": 2765, "disk_read_mb": 19, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_cervical_cancer", @@ -2033,8 +1937,7 @@ "peak_memory_mb": "NA", "disk_read_mb": "NA", "disk_write_mb": "NA" - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_cervical_cancer", @@ -2054,8 +1957,7 @@ "peak_memory_mb": 38912, "disk_read_mb": 142, "disk_write_mb": 521 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_cervical_cancer", @@ -2075,8 +1977,7 @@ "peak_memory_mb": 6759, "disk_read_mb": 141, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_cervical_cancer", @@ -2096,8 +1997,7 @@ "peak_memory_mb": 68813, "disk_read_mb": 115, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_cervical_cancer", @@ -2117,8 +2017,7 @@ "peak_memory_mb": 781, "disk_read_mb": 76, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_cervical_cancer", @@ -2138,8 +2037,7 @@ "peak_memory_mb": 433255, "disk_read_mb": 229, "disk_write_mb": 131 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_cervical_cancer", @@ -2159,8 +2057,7 @@ "peak_memory_mb": 7988, "disk_read_mb": 141, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_cervical_cancer", @@ -2180,8 +2077,7 @@ "peak_memory_mb": 6861, "disk_read_mb": 104, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_cervical_cancer", @@ -2201,8 +2097,7 @@ "peak_memory_mb": 10855, "disk_read_mb": 158, "disk_write_mb": 2 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_cervical_cancer", @@ -2222,8 +2117,7 @@ "peak_memory_mb": 2356, "disk_read_mb": 103, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_cervical_cancer", @@ -2243,8 +2137,7 @@ "peak_memory_mb": 5837, "disk_read_mb": 875, "disk_write_mb": 500 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_cervical_cancer", @@ -2264,8 +2157,7 @@ "peak_memory_mb": 67277, "disk_read_mb": 324, "disk_write_mb": 235 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_cervical_cancer", @@ -2285,8 +2177,7 @@ "peak_memory_mb": 10650, "disk_read_mb": 89, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_cervical_cancer", @@ -2306,8 +2197,7 @@ "peak_memory_mb": 7680, "disk_read_mb": 104, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_cervical_cancer", @@ -2327,8 +2217,7 @@ "peak_memory_mb": 19968, "disk_read_mb": 169, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_cervical_cancer", @@ -2348,8 +2237,7 @@ "peak_memory_mb": 1741, "disk_read_mb": 19, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_colorectal_cancer", @@ -2369,8 +2257,7 @@ "peak_memory_mb": "NA", "disk_read_mb": "NA", "disk_write_mb": "NA" - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_colorectal_cancer", @@ -2390,8 +2277,7 @@ "peak_memory_mb": 40346, "disk_read_mb": 270, "disk_write_mb": 521 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_colorectal_cancer", @@ -2411,8 +2297,7 @@ "peak_memory_mb": 7066, "disk_read_mb": 268, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_colorectal_cancer", @@ -2432,8 +2317,7 @@ "peak_memory_mb": 58983, "disk_read_mb": 242, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_colorectal_cancer", @@ -2453,8 +2337,7 @@ "peak_memory_mb": 775, "disk_read_mb": 203, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_colorectal_cancer", @@ -2474,8 +2357,7 @@ "peak_memory_mb": 285389, "disk_read_mb": 641, "disk_write_mb": 416 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_colorectal_cancer", @@ -2495,8 +2377,7 @@ "peak_memory_mb": 8397, "disk_read_mb": 268, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_colorectal_cancer", @@ -2516,8 +2397,7 @@ "peak_memory_mb": 8192, "disk_read_mb": 231, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_colorectal_cancer", @@ -2537,8 +2417,7 @@ "peak_memory_mb": 9626, "disk_read_mb": 285, "disk_write_mb": 2 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_colorectal_cancer", @@ -2558,8 +2437,7 @@ "peak_memory_mb": 2868, "disk_read_mb": 230, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_colorectal_cancer", @@ -2579,8 +2457,7 @@ "peak_memory_mb": 12084, "disk_read_mb": 493, "disk_write_mb": 109 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_colorectal_cancer", @@ -2600,8 +2477,7 @@ "peak_memory_mb": 158618, "disk_read_mb": 980, "disk_write_mb": 765 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_colorectal_cancer", @@ -2621,8 +2497,7 @@ "peak_memory_mb": 4506, "disk_read_mb": 217, "disk_write_mb": 2 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_colorectal_cancer", @@ -2642,8 +2517,7 @@ "peak_memory_mb": 12186, "disk_read_mb": 232, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_colorectal_cancer", @@ -2663,8 +2537,7 @@ "peak_memory_mb": 41165, "disk_read_mb": 296, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_colorectal_cancer", @@ -2684,8 +2557,7 @@ "peak_memory_mb": 3482, "disk_read_mb": 19, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_heart", @@ -2705,8 +2577,7 @@ "peak_memory_mb": "NA", "disk_read_mb": "NA", "disk_write_mb": "NA" - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_heart", @@ -2726,8 +2597,7 @@ "peak_memory_mb": 39322, "disk_read_mb": 154, "disk_write_mb": 521 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_heart", @@ -2747,8 +2617,7 @@ "peak_memory_mb": 6656, "disk_read_mb": 153, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_heart", @@ -2768,8 +2637,7 @@ "peak_memory_mb": 71578, "disk_read_mb": 127, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_heart", @@ -2789,8 +2657,7 @@ "peak_memory_mb": 774, "disk_read_mb": 88, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_heart", @@ -2810,8 +2677,7 @@ "peak_memory_mb": 280474, "disk_read_mb": 305, "disk_write_mb": 195 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_heart", @@ -2831,8 +2697,7 @@ "peak_memory_mb": 7988, "disk_read_mb": 153, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_heart", @@ -2852,8 +2717,7 @@ "peak_memory_mb": 10650, "disk_read_mb": 116, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_heart", @@ -2873,8 +2737,7 @@ "peak_memory_mb": 9728, "disk_read_mb": 170, "disk_write_mb": 2 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_heart", @@ -2894,8 +2757,7 @@ "peak_memory_mb": 1332, "disk_read_mb": 115, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_heart", @@ -2915,8 +2777,7 @@ "peak_memory_mb": 17101, "disk_read_mb": 603, "disk_write_mb": 43 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_heart", @@ -2936,8 +2797,7 @@ "peak_memory_mb": 82637, "disk_read_mb": 457, "disk_write_mb": 357 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_heart", @@ -2957,8 +2817,7 @@ "peak_memory_mb": 10855, "disk_read_mb": 101, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_heart", @@ -2978,8 +2837,7 @@ "peak_memory_mb": 8807, "disk_read_mb": 116, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_heart", @@ -2999,8 +2857,7 @@ "peak_memory_mb": 37684, "disk_read_mb": 181, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_heart", @@ -3020,8 +2877,7 @@ "peak_memory_mb": 768, "disk_read_mb": 19, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_intestinal_cancer", @@ -3041,8 +2897,7 @@ "peak_memory_mb": "NA", "disk_read_mb": "NA", "disk_write_mb": "NA" - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_intestinal_cancer", @@ -3062,8 +2917,7 @@ "peak_memory_mb": 38912, "disk_read_mb": 138, "disk_write_mb": 521 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_intestinal_cancer", @@ -3083,8 +2937,7 @@ "peak_memory_mb": 20685, "disk_read_mb": 136, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_intestinal_cancer", @@ -3104,8 +2957,7 @@ "peak_memory_mb": 92263, "disk_read_mb": 110, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_intestinal_cancer", @@ -3125,8 +2977,7 @@ "peak_memory_mb": 1434, "disk_read_mb": 71, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_intestinal_cancer", @@ -3146,8 +2997,7 @@ "peak_memory_mb": 431821, "disk_read_mb": 218, "disk_write_mb": 125 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_intestinal_cancer", @@ -3167,8 +3017,7 @@ "peak_memory_mb": 11469, "disk_read_mb": 136, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_intestinal_cancer", @@ -3188,8 +3037,7 @@ "peak_memory_mb": 10650, "disk_read_mb": 99, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_intestinal_cancer", @@ -3209,8 +3057,7 @@ "peak_memory_mb": 9319, "disk_read_mb": 153, "disk_write_mb": 2 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_intestinal_cancer", @@ -3230,8 +3077,7 @@ "peak_memory_mb": 3277, "disk_read_mb": 99, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_intestinal_cancer", @@ -3251,8 +3097,7 @@ "peak_memory_mb": 8397, "disk_read_mb": 1434, "disk_write_mb": 1127 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_intestinal_cancer", @@ -3272,8 +3117,7 @@ "peak_memory_mb": 65434, "disk_read_mb": 309, "disk_write_mb": 225 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_intestinal_cancer", @@ -3293,8 +3137,7 @@ "peak_memory_mb": 2048, "disk_read_mb": 84, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_intestinal_cancer", @@ -3314,8 +3157,7 @@ "peak_memory_mb": 6349, "disk_read_mb": 100, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_intestinal_cancer", @@ -3335,8 +3177,7 @@ "peak_memory_mb": 36864, "disk_read_mb": 164, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_intestinal_cancer", @@ -3356,8 +3197,7 @@ "peak_memory_mb": 5530, "disk_read_mb": 19, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_kidney", @@ -3377,8 +3217,7 @@ "peak_memory_mb": "NA", "disk_read_mb": "NA", "disk_write_mb": "NA" - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_kidney", @@ -3398,8 +3237,7 @@ "peak_memory_mb": 39527, "disk_read_mb": 184, "disk_write_mb": 521 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_kidney", @@ -3419,8 +3257,7 @@ "peak_memory_mb": 13824, "disk_read_mb": 183, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_kidney", @@ -3440,8 +3277,7 @@ "peak_memory_mb": 100045, "disk_read_mb": 157, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_kidney", @@ -3461,8 +3297,7 @@ "peak_memory_mb": 5632, "disk_read_mb": 118, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_kidney", @@ -3482,8 +3317,7 @@ "peak_memory_mb": 442471, "disk_read_mb": 412, "disk_write_mb": 272 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_kidney", @@ -3503,8 +3337,7 @@ "peak_memory_mb": 4608, "disk_read_mb": 183, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_kidney", @@ -3524,8 +3357,7 @@ "peak_memory_mb": 10752, "disk_read_mb": 146, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_kidney", @@ -3545,8 +3377,7 @@ "peak_memory_mb": 10343, "disk_read_mb": 199, "disk_write_mb": 2 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_kidney", @@ -3566,8 +3397,7 @@ "peak_memory_mb": 3584, "disk_read_mb": 145, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_kidney", @@ -3587,8 +3417,7 @@ "peak_memory_mb": 6144, "disk_read_mb": 627, "disk_write_mb": 60 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_kidney", @@ -3608,8 +3437,7 @@ "peak_memory_mb": 81613, "disk_read_mb": 630, "disk_write_mb": 501 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_kidney", @@ -3629,8 +3457,7 @@ "peak_memory_mb": 6349, "disk_read_mb": 131, "disk_write_mb": 2 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_kidney", @@ -3650,8 +3477,7 @@ "peak_memory_mb": 6964, "disk_read_mb": 146, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_kidney", @@ -3671,8 +3497,7 @@ "peak_memory_mb": 22119, "disk_read_mb": 211, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_kidney", @@ -3692,8 +3517,7 @@ "peak_memory_mb": 1741, "disk_read_mb": 19, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_lung_cancer", @@ -3713,8 +3537,7 @@ "peak_memory_mb": "NA", "disk_read_mb": "NA", "disk_write_mb": "NA" - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_lung_cancer", @@ -3734,8 +3557,7 @@ "peak_memory_mb": 39732, "disk_read_mb": 210, "disk_write_mb": 521 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_lung_cancer", @@ -3755,8 +3577,7 @@ "peak_memory_mb": 6861, "disk_read_mb": 208, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_lung_cancer", @@ -3776,8 +3597,7 @@ "peak_memory_mb": 101786, "disk_read_mb": 182, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_lung_cancer", @@ -3797,8 +3617,7 @@ "peak_memory_mb": 2868, "disk_read_mb": 143, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_lung_cancer", @@ -3818,8 +3637,7 @@ "peak_memory_mb": 117146, "disk_read_mb": 449, "disk_write_mb": 283 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_lung_cancer", @@ -3839,8 +3657,7 @@ "peak_memory_mb": 4608, "disk_read_mb": 208, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_lung_cancer", @@ -3860,8 +3677,7 @@ "peak_memory_mb": 3175, "disk_read_mb": 171, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_lung_cancer", @@ -3881,8 +3697,7 @@ "peak_memory_mb": 10650, "disk_read_mb": 225, "disk_write_mb": 2 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_lung_cancer", @@ -3902,8 +3717,7 @@ "peak_memory_mb": 3687, "disk_read_mb": 171, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_lung_cancer", @@ -3923,8 +3737,7 @@ "peak_memory_mb": 6247, "disk_read_mb": 670, "disk_write_mb": 74 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_lung_cancer", @@ -3944,8 +3757,7 @@ "peak_memory_mb": 111104, "disk_read_mb": 676, "disk_write_mb": 520 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_lung_cancer", @@ -3965,8 +3777,7 @@ "peak_memory_mb": 6247, "disk_read_mb": 157, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_lung_cancer", @@ -3986,8 +3797,7 @@ "peak_memory_mb": 9831, "disk_read_mb": 172, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_lung_cancer", @@ -4007,8 +3817,7 @@ "peak_memory_mb": 38912, "disk_read_mb": 236, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_lung_cancer", @@ -4028,8 +3837,7 @@ "peak_memory_mb": 2765, "disk_read_mb": 19, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_lymph_node", @@ -4049,8 +3857,7 @@ "peak_memory_mb": "NA", "disk_read_mb": "NA", "disk_write_mb": "NA" - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_lymph_node", @@ -4070,8 +3877,7 @@ "peak_memory_mb": 39220, "disk_read_mb": 181, "disk_write_mb": 521 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_lymph_node", @@ -4091,8 +3897,7 @@ "peak_memory_mb": 13824, "disk_read_mb": 180, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_lymph_node", @@ -4112,8 +3917,7 @@ "peak_memory_mb": 62362, "disk_read_mb": 154, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_lymph_node", @@ -4133,8 +3937,7 @@ "peak_memory_mb": 767, "disk_read_mb": 115, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_lymph_node", @@ -4154,8 +3957,7 @@ "peak_memory_mb": 66151, "disk_read_mb": 325, "disk_write_mb": 188 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_lymph_node", @@ -4175,8 +3977,7 @@ "peak_memory_mb": 11572, "disk_read_mb": 180, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_lymph_node", @@ -4196,8 +3997,7 @@ "peak_memory_mb": 3175, "disk_read_mb": 143, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_lymph_node", @@ -4217,8 +4017,7 @@ "peak_memory_mb": 15156, "disk_read_mb": 197, "disk_write_mb": 2 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_lymph_node", @@ -4238,8 +4037,7 @@ "peak_memory_mb": 2560, "disk_read_mb": 142, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_lymph_node", @@ -4259,8 +4057,7 @@ "peak_memory_mb": 2151, "disk_read_mb": 269, "disk_write_mb": 42 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_lymph_node", @@ -4280,8 +4077,7 @@ "peak_memory_mb": 107725, "disk_read_mb": 469, "disk_write_mb": 341 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_lymph_node", @@ -4301,8 +4097,7 @@ "peak_memory_mb": 11060, "disk_read_mb": 128, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_lymph_node", @@ -4322,8 +4117,7 @@ "peak_memory_mb": 8704, "disk_read_mb": 143, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_lymph_node", @@ -4343,8 +4137,7 @@ "peak_memory_mb": 20480, "disk_read_mb": 208, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_lymph_node", @@ -4364,8 +4157,7 @@ "peak_memory_mb": 2868, "disk_read_mb": 19, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_normal_prostate", @@ -4385,8 +4177,7 @@ "peak_memory_mb": "NA", "disk_read_mb": "NA", "disk_write_mb": "NA" - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_normal_prostate", @@ -4406,8 +4197,7 @@ "peak_memory_mb": 38810, "disk_read_mb": 116, "disk_write_mb": 521 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_normal_prostate", @@ -4427,8 +4217,7 @@ "peak_memory_mb": 6554, "disk_read_mb": 114, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_normal_prostate", @@ -4448,8 +4237,7 @@ "peak_memory_mb": 66663, "disk_read_mb": 88, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_normal_prostate", @@ -4469,8 +4257,7 @@ "peak_memory_mb": 5530, "disk_read_mb": 49, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_normal_prostate", @@ -4490,8 +4277,7 @@ "peak_memory_mb": 114688, "disk_read_mb": 191, "disk_write_mb": 119 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_normal_prostate", @@ -4511,8 +4297,7 @@ "peak_memory_mb": 7885, "disk_read_mb": 114, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_normal_prostate", @@ -4532,8 +4317,7 @@ "peak_memory_mb": 10548, "disk_read_mb": 77, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_normal_prostate", @@ -4553,8 +4337,7 @@ "peak_memory_mb": 9216, "disk_read_mb": 131, "disk_write_mb": 2 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_normal_prostate", @@ -4574,8 +4357,7 @@ "peak_memory_mb": 3277, "disk_read_mb": 76, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_normal_prostate", @@ -4595,8 +4377,7 @@ "peak_memory_mb": 5940, "disk_read_mb": 1229, "disk_write_mb": 672 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_normal_prostate", @@ -4616,8 +4397,7 @@ "peak_memory_mb": 40141, "disk_read_mb": 277, "disk_write_mb": 215 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_normal_prostate", @@ -4637,8 +4417,7 @@ "peak_memory_mb": 1946, "disk_read_mb": 62, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_normal_prostate", @@ -4658,8 +4437,7 @@ "peak_memory_mb": 3789, "disk_read_mb": 78, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_normal_prostate", @@ -4679,8 +4457,7 @@ "peak_memory_mb": "NA", "disk_read_mb": "NA", "disk_write_mb": "NA" - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_normal_prostate", @@ -4700,8 +4477,7 @@ "peak_memory_mb": 2765, "disk_read_mb": 19, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_prostate_cancer", @@ -4721,8 +4497,7 @@ "peak_memory_mb": "NA", "disk_read_mb": "NA", "disk_write_mb": "NA" - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_prostate_cancer", @@ -4742,8 +4517,7 @@ "peak_memory_mb": 39015, "disk_read_mb": 144, "disk_write_mb": 521 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_prostate_cancer", @@ -4763,8 +4537,7 @@ "peak_memory_mb": 7783, "disk_read_mb": 142, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_prostate_cancer", @@ -4784,8 +4557,7 @@ "peak_memory_mb": 94208, "disk_read_mb": 116, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_prostate_cancer", @@ -4805,8 +4577,7 @@ "peak_memory_mb": 5530, "disk_read_mb": 77, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_prostate_cancer", @@ -4826,8 +4597,7 @@ "peak_memory_mb": 280269, "disk_read_mb": 260, "disk_write_mb": 161 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_prostate_cancer", @@ -4847,8 +4617,7 @@ "peak_memory_mb": 7988, "disk_read_mb": 142, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_prostate_cancer", @@ -4868,8 +4637,7 @@ "peak_memory_mb": 6656, "disk_read_mb": 105, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_prostate_cancer", @@ -4889,8 +4657,7 @@ "peak_memory_mb": 6964, "disk_read_mb": 159, "disk_write_mb": 2 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_prostate_cancer", @@ -4910,8 +4677,7 @@ "peak_memory_mb": 3380, "disk_read_mb": 104, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_prostate_cancer", @@ -4931,8 +4697,7 @@ "peak_memory_mb": 10343, "disk_read_mb": 1536, "disk_write_mb": 1127 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_prostate_cancer", @@ -4952,8 +4717,7 @@ "peak_memory_mb": 73626, "disk_read_mb": 382, "disk_write_mb": 292 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_prostate_cancer", @@ -4973,8 +4737,7 @@ "peak_memory_mb": 10957, "disk_read_mb": 90, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_prostate_cancer", @@ -4994,8 +4757,7 @@ "peak_memory_mb": 4404, "disk_read_mb": 106, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_prostate_cancer", @@ -5015,8 +4777,7 @@ "peak_memory_mb": 20480, "disk_read_mb": 170, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_prostate_cancer", @@ -5036,8 +4797,7 @@ "peak_memory_mb": 1434, "disk_read_mb": 19, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_skin_melanoma", @@ -5057,8 +4817,7 @@ "peak_memory_mb": "NA", "disk_read_mb": "NA", "disk_write_mb": "NA" - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_skin_melanoma", @@ -5078,8 +4837,7 @@ "peak_memory_mb": 39015, "disk_read_mb": 155, "disk_write_mb": 521 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_skin_melanoma", @@ -5099,8 +4857,7 @@ "peak_memory_mb": 20788, "disk_read_mb": 154, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_skin_melanoma", @@ -5120,8 +4877,7 @@ "peak_memory_mb": 68813, "disk_read_mb": 128, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_skin_melanoma", @@ -5141,8 +4897,7 @@ "peak_memory_mb": 1741, "disk_read_mb": 89, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_skin_melanoma", @@ -5162,8 +4917,7 @@ "peak_memory_mb": 272999, "disk_read_mb": 272, "disk_write_mb": 161 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_skin_melanoma", @@ -5183,8 +4937,7 @@ "peak_memory_mb": 7988, "disk_read_mb": 154, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_skin_melanoma", @@ -5204,8 +4957,7 @@ "peak_memory_mb": 10957, "disk_read_mb": 117, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_skin_melanoma", @@ -5225,8 +4977,7 @@ "peak_memory_mb": 9933, "disk_read_mb": 171, "disk_write_mb": 2 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_skin_melanoma", @@ -5246,8 +4997,7 @@ "peak_memory_mb": 3380, "disk_read_mb": 116, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_skin_melanoma", @@ -5267,8 +5017,7 @@ "peak_memory_mb": 16589, "disk_read_mb": 2458, "disk_write_mb": 1946 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_skin_melanoma", @@ -5288,8 +5037,7 @@ "peak_memory_mb": 76800, "disk_read_mb": 393, "disk_write_mb": 292 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_skin_melanoma", @@ -5309,8 +5057,7 @@ "peak_memory_mb": 10855, "disk_read_mb": 102, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_skin_melanoma", @@ -5330,8 +5077,7 @@ "peak_memory_mb": 11981, "disk_read_mb": 117, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_skin_melanoma", @@ -5351,8 +5097,7 @@ "peak_memory_mb": 37376, "disk_read_mb": 182, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/human_skin_melanoma", @@ -5372,8 +5117,7 @@ "peak_memory_mb": 1741, "disk_read_mb": 19, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/mouse_brain_coronal", @@ -5393,8 +5137,7 @@ "peak_memory_mb": "NA", "disk_read_mb": "NA", "disk_write_mb": "NA" - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/mouse_brain_coronal", @@ -5414,8 +5157,7 @@ "peak_memory_mb": 38708, "disk_read_mb": 110, "disk_write_mb": 521 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/mouse_brain_coronal", @@ -5435,8 +5177,7 @@ "peak_memory_mb": 13927, "disk_read_mb": 108, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/mouse_brain_coronal", @@ -5456,8 +5197,7 @@ "peak_memory_mb": 91341, "disk_read_mb": 82, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/mouse_brain_coronal", @@ -5477,8 +5217,7 @@ "peak_memory_mb": 5530, "disk_read_mb": 43, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/mouse_brain_coronal", @@ -5498,8 +5237,7 @@ "peak_memory_mb": 430183, "disk_read_mb": 173, "disk_write_mb": 107 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/mouse_brain_coronal", @@ -5519,8 +5257,7 @@ "peak_memory_mb": 11367, "disk_read_mb": 108, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/mouse_brain_coronal", @@ -5540,8 +5277,7 @@ "peak_memory_mb": 10650, "disk_read_mb": 71, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/mouse_brain_coronal", @@ -5561,8 +5297,7 @@ "peak_memory_mb": 10445, "disk_read_mb": 125, "disk_write_mb": 2 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/mouse_brain_coronal", @@ -5582,8 +5317,7 @@ "peak_memory_mb": 2253, "disk_read_mb": 71, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/mouse_brain_coronal", @@ -5603,8 +5337,7 @@ "peak_memory_mb": 15668, "disk_read_mb": 1127, "disk_write_mb": 566 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/mouse_brain_coronal", @@ -5624,8 +5357,7 @@ "peak_memory_mb": 37888, "disk_read_mb": 251, "disk_write_mb": 195 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/mouse_brain_coronal", @@ -5645,8 +5377,7 @@ "peak_memory_mb": 3994, "disk_read_mb": 57, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/mouse_brain_coronal", @@ -5666,8 +5397,7 @@ "peak_memory_mb": 11162, "disk_read_mb": 72, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/mouse_brain_coronal", @@ -5687,8 +5417,7 @@ "peak_memory_mb": 36864, "disk_read_mb": 136, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/mouse_brain_coronal", @@ -5708,8 +5437,7 @@ "peak_memory_mb": 3482, "disk_read_mb": 19, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/mouse_embryo", @@ -5729,8 +5457,7 @@ "peak_memory_mb": "NA", "disk_read_mb": "NA", "disk_write_mb": "NA" - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/mouse_embryo", @@ -5750,8 +5477,7 @@ "peak_memory_mb": 39629, "disk_read_mb": 143, "disk_write_mb": 521 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/mouse_embryo", @@ -5771,8 +5497,7 @@ "peak_memory_mb": 6656, "disk_read_mb": 141, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/mouse_embryo", @@ -5792,8 +5517,7 @@ "peak_memory_mb": 66663, "disk_read_mb": 115, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/mouse_embryo", @@ -5813,8 +5537,7 @@ "peak_memory_mb": 2765, "disk_read_mb": 76, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/mouse_embryo", @@ -5834,8 +5557,7 @@ "peak_memory_mb": 282010, "disk_read_mb": 387, "disk_write_mb": 289 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/mouse_embryo", @@ -5855,8 +5577,7 @@ "peak_memory_mb": 7988, "disk_read_mb": 141, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/mouse_embryo", @@ -5876,8 +5597,7 @@ "peak_memory_mb": 8295, "disk_read_mb": 104, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/mouse_embryo", @@ -5897,8 +5617,7 @@ "peak_memory_mb": 10343, "disk_read_mb": 158, "disk_write_mb": 2 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/mouse_embryo", @@ -5918,8 +5637,7 @@ "peak_memory_mb": 11469, "disk_read_mb": 103, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/mouse_embryo", @@ -5939,8 +5657,7 @@ "peak_memory_mb": 19149, "disk_read_mb": 598, "disk_write_mb": 71 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/mouse_embryo", @@ -5960,8 +5677,7 @@ "peak_memory_mb": 138957, "disk_read_mb": 632, "disk_write_mb": 543 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/mouse_embryo", @@ -5981,8 +5697,7 @@ "peak_memory_mb": 3482, "disk_read_mb": 90, "disk_write_mb": 2 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/mouse_embryo", @@ -6002,8 +5717,7 @@ "peak_memory_mb": 10138, "disk_read_mb": 105, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/mouse_embryo", @@ -6023,8 +5737,7 @@ "peak_memory_mb": 38810, "disk_read_mb": 169, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/mouse_embryo", @@ -6044,8 +5757,7 @@ "peak_memory_mb": 2868, "disk_read_mb": 19, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/mouse_kidney_v1", @@ -6065,8 +5777,7 @@ "peak_memory_mb": 6759, "disk_read_mb": 114, "disk_write_mb": 28 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/mouse_kidney_v1", @@ -6086,8 +5797,7 @@ "peak_memory_mb": 38605, "disk_read_mb": 114, "disk_write_mb": 521 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/mouse_kidney_v1", @@ -6107,8 +5817,7 @@ "peak_memory_mb": 10957, "disk_read_mb": 112, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/mouse_kidney_v1", @@ -6128,8 +5837,7 @@ "peak_memory_mb": 64615, "disk_read_mb": 86, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/mouse_kidney_v1", @@ -6149,8 +5857,7 @@ "peak_memory_mb": 5530, "disk_read_mb": 48, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/mouse_kidney_v1", @@ -6170,8 +5877,7 @@ "peak_memory_mb": 113767, "disk_read_mb": 140, "disk_write_mb": 70 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/mouse_kidney_v1", @@ -6191,8 +5897,7 @@ "peak_memory_mb": 11367, "disk_read_mb": 113, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/mouse_kidney_v1", @@ -6212,8 +5917,7 @@ "peak_memory_mb": 3175, "disk_read_mb": 76, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/mouse_kidney_v1", @@ -6233,8 +5937,7 @@ "peak_memory_mb": 9012, "disk_read_mb": 129, "disk_write_mb": 2 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/mouse_kidney_v1", @@ -6254,8 +5957,7 @@ "peak_memory_mb": 2253, "disk_read_mb": 75, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/mouse_kidney_v1", @@ -6275,8 +5977,7 @@ "peak_memory_mb": 5735, "disk_read_mb": 517, "disk_write_mb": 159 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/mouse_kidney_v1", @@ -6296,8 +5997,7 @@ "peak_memory_mb": 82637, "disk_read_mb": 182, "disk_write_mb": 122 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/mouse_kidney_v1", @@ -6317,8 +6017,7 @@ "peak_memory_mb": 5940, "disk_read_mb": 61, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/mouse_kidney_v1", @@ -6338,8 +6037,7 @@ "peak_memory_mb": 7476, "disk_read_mb": 76, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/mouse_kidney_v1", @@ -6359,8 +6057,7 @@ "peak_memory_mb": 36660, "disk_read_mb": 141, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/mouse_kidney_v1", @@ -6380,8 +6077,7 @@ "peak_memory_mb": 1741, "disk_read_mb": 18, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/mouse_olfactory_bulb", @@ -6401,8 +6097,7 @@ "peak_memory_mb": 6759, "disk_read_mb": 112, "disk_write_mb": 28 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/mouse_olfactory_bulb", @@ -6422,8 +6117,7 @@ "peak_memory_mb": 38503, "disk_read_mb": 112, "disk_write_mb": 521 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/mouse_olfactory_bulb", @@ -6443,8 +6137,7 @@ "peak_memory_mb": 2253, "disk_read_mb": 110, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/mouse_olfactory_bulb", @@ -6464,8 +6157,7 @@ "peak_memory_mb": 88884, "disk_read_mb": 84, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/mouse_olfactory_bulb", @@ -6485,8 +6177,7 @@ "peak_memory_mb": 1536, "disk_read_mb": 45, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/mouse_olfactory_bulb", @@ -6506,8 +6197,7 @@ "peak_memory_mb": 113664, "disk_read_mb": 127, "disk_write_mb": 59 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/mouse_olfactory_bulb", @@ -6527,8 +6217,7 @@ "peak_memory_mb": 4301, "disk_read_mb": 111, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/mouse_olfactory_bulb", @@ -6548,8 +6237,7 @@ "peak_memory_mb": 3072, "disk_read_mb": 73, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/mouse_olfactory_bulb", @@ -6569,8 +6257,7 @@ "peak_memory_mb": 6349, "disk_read_mb": 127, "disk_write_mb": 2 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/mouse_olfactory_bulb", @@ -6590,8 +6277,7 @@ "peak_memory_mb": 3175, "disk_read_mb": 73, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/mouse_olfactory_bulb", @@ -6611,8 +6297,7 @@ "peak_memory_mb": 7783, "disk_read_mb": 527, "disk_write_mb": 225 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/mouse_olfactory_bulb", @@ -6632,8 +6317,7 @@ "peak_memory_mb": 30823, "disk_read_mb": 159, "disk_write_mb": 101 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/mouse_olfactory_bulb", @@ -6653,8 +6337,7 @@ "peak_memory_mb": 6042, "disk_read_mb": 59, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/mouse_olfactory_bulb", @@ -6674,8 +6357,7 @@ "peak_memory_mb": 6349, "disk_read_mb": 74, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/mouse_olfactory_bulb", @@ -6695,8 +6377,7 @@ "peak_memory_mb": 36557, "disk_read_mb": 139, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "tenx_visium/visium/mouse_olfactory_bulb", @@ -6716,8 +6397,7 @@ "peak_memory_mb": 1434, "disk_read_mb": 18, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e10_brain", @@ -6737,8 +6417,7 @@ "peak_memory_mb": 2663, "disk_read_mb": 114, "disk_write_mb": 28 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e10_brain", @@ -6758,8 +6437,7 @@ "peak_memory_mb": 46080, "disk_read_mb": 114, "disk_write_mb": 521 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e10_brain", @@ -6779,8 +6457,7 @@ "peak_memory_mb": 6452, "disk_read_mb": 112, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e10_brain", @@ -6800,8 +6477,7 @@ "peak_memory_mb": 57140, "disk_read_mb": 86, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e10_brain", @@ -6821,8 +6497,7 @@ "peak_memory_mb": 768, "disk_read_mb": 47, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e10_brain", @@ -6842,8 +6517,7 @@ "peak_memory_mb": 114484, "disk_read_mb": 153, "disk_write_mb": 84 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e10_brain", @@ -6863,8 +6537,7 @@ "peak_memory_mb": 6656, "disk_read_mb": 112, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e10_brain", @@ -6884,8 +6557,7 @@ "peak_memory_mb": 3072, "disk_read_mb": 75, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e10_brain", @@ -6905,8 +6577,7 @@ "peak_memory_mb": 9012, "disk_read_mb": 129, "disk_write_mb": 2 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e10_brain", @@ -6926,8 +6597,7 @@ "peak_memory_mb": 11367, "disk_read_mb": 74, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e10_brain", @@ -6947,8 +6617,7 @@ "peak_memory_mb": 4301, "disk_read_mb": 296, "disk_write_mb": 119 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e10_brain", @@ -6968,8 +6637,7 @@ "peak_memory_mb": 34304, "disk_read_mb": 211, "disk_write_mb": 151 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e10_brain", @@ -6989,8 +6657,7 @@ "peak_memory_mb": 3277, "disk_read_mb": 60, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e10_brain", @@ -7010,8 +6677,7 @@ "peak_memory_mb": 2253, "disk_read_mb": 73, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e10_brain", @@ -7031,8 +6697,7 @@ "peak_memory_mb": 17408, "disk_read_mb": 137, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e10_brain", @@ -7052,8 +6717,7 @@ "peak_memory_mb": 1434, "disk_read_mb": 18, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e10_eye", @@ -7073,8 +6737,7 @@ "peak_memory_mb": "NA", "disk_read_mb": "NA", "disk_write_mb": "NA" - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e10_eye", @@ -7094,8 +6757,7 @@ "peak_memory_mb": 38810, "disk_read_mb": 125, "disk_write_mb": 521 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e10_eye", @@ -7115,8 +6777,7 @@ "peak_memory_mb": 11264, "disk_read_mb": 123, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e10_eye", @@ -7136,8 +6797,7 @@ "peak_memory_mb": 42292, "disk_read_mb": 97, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e10_eye", @@ -7157,8 +6817,7 @@ "peak_memory_mb": 1434, "disk_read_mb": 59, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e10_eye", @@ -7178,8 +6837,7 @@ "peak_memory_mb": 216986, "disk_read_mb": 195, "disk_write_mb": 114 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e10_eye", @@ -7199,8 +6857,7 @@ "peak_memory_mb": 4404, "disk_read_mb": 124, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e10_eye", @@ -7220,8 +6877,7 @@ "peak_memory_mb": 3072, "disk_read_mb": 87, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e10_eye", @@ -7241,8 +6897,7 @@ "peak_memory_mb": 6554, "disk_read_mb": 140, "disk_write_mb": 2 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e10_eye", @@ -7262,8 +6917,7 @@ "peak_memory_mb": 1229, "disk_read_mb": 86, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e10_eye", @@ -7283,8 +6937,7 @@ "peak_memory_mb": 2356, "disk_read_mb": 415, "disk_write_mb": 232 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e10_eye", @@ -7304,8 +6957,7 @@ "peak_memory_mb": 40039, "disk_read_mb": 282, "disk_write_mb": 211 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e10_eye", @@ -7325,8 +6977,7 @@ "peak_memory_mb": 1946, "disk_read_mb": 72, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e10_eye", @@ -7346,8 +6997,7 @@ "peak_memory_mb": 2560, "disk_read_mb": 84, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e10_eye", @@ -7367,8 +7017,7 @@ "peak_memory_mb": 17613, "disk_read_mb": 149, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e10_eye", @@ -7388,8 +7037,7 @@ "peak_memory_mb": 780, "disk_read_mb": 18, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e10_whole_body", @@ -7409,8 +7057,7 @@ "peak_memory_mb": 2663, "disk_read_mb": 100, "disk_write_mb": 28 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e10_whole_body", @@ -7430,8 +7077,7 @@ "peak_memory_mb": 38400, "disk_read_mb": 100, "disk_write_mb": 521 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e10_whole_body", @@ -7451,8 +7097,7 @@ "peak_memory_mb": 6452, "disk_read_mb": 98, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e10_whole_body", @@ -7472,8 +7117,7 @@ "peak_memory_mb": 38196, "disk_read_mb": 72, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e10_whole_body", @@ -7493,8 +7137,7 @@ "peak_memory_mb": 2868, "disk_read_mb": 33, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e10_whole_body", @@ -7514,8 +7157,7 @@ "peak_memory_mb": 111514, "disk_read_mb": 100, "disk_write_mb": 45 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e10_whole_body", @@ -7535,8 +7177,7 @@ "peak_memory_mb": 4301, "disk_read_mb": 98, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e10_whole_body", @@ -7556,8 +7197,7 @@ "peak_memory_mb": 2970, "disk_read_mb": 61, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e10_whole_body", @@ -7577,8 +7217,7 @@ "peak_memory_mb": 6247, "disk_read_mb": 115, "disk_write_mb": 2 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e10_whole_body", @@ -7598,8 +7237,7 @@ "peak_memory_mb": 11264, "disk_read_mb": 61, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e10_whole_body", @@ -7619,8 +7257,7 @@ "peak_memory_mb": 2560, "disk_read_mb": 211, "disk_write_mb": 40 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e10_whole_body", @@ -7640,8 +7277,7 @@ "peak_memory_mb": 29492, "disk_read_mb": 126, "disk_write_mb": 80 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e10_whole_body", @@ -7661,8 +7297,7 @@ "peak_memory_mb": 3584, "disk_read_mb": 46, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e10_whole_body", @@ -7682,8 +7317,7 @@ "peak_memory_mb": 1946, "disk_read_mb": 59, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e10_whole_body", @@ -7703,8 +7337,7 @@ "peak_memory_mb": 17306, "disk_read_mb": 124, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e10_whole_body", @@ -7724,8 +7357,7 @@ "peak_memory_mb": 2868, "disk_read_mb": 18, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e11_1", @@ -7745,8 +7377,7 @@ "peak_memory_mb": 6759, "disk_read_mb": 115, "disk_write_mb": 28 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e11_1", @@ -7766,8 +7397,7 @@ "peak_memory_mb": 46183, "disk_read_mb": 114, "disk_write_mb": 521 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e11_1", @@ -7787,8 +7417,7 @@ "peak_memory_mb": 5940, "disk_read_mb": 113, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e11_1", @@ -7808,8 +7437,7 @@ "peak_memory_mb": 90215, "disk_read_mb": 87, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e11_1", @@ -7829,8 +7457,7 @@ "peak_memory_mb": 1434, "disk_read_mb": 48, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e11_1", @@ -7850,8 +7477,7 @@ "peak_memory_mb": 269108, "disk_read_mb": 156, "disk_write_mb": 86 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e11_1", @@ -7871,8 +7497,7 @@ "peak_memory_mb": 7885, "disk_read_mb": 113, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e11_1", @@ -7892,8 +7517,7 @@ "peak_memory_mb": 8090, "disk_read_mb": 76, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e11_1", @@ -7913,8 +7537,7 @@ "peak_memory_mb": 9012, "disk_read_mb": 130, "disk_write_mb": 2 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e11_1", @@ -7934,8 +7557,7 @@ "peak_memory_mb": 3277, "disk_read_mb": 75, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e11_1", @@ -7955,8 +7577,7 @@ "peak_memory_mb": 9626, "disk_read_mb": 602, "disk_write_mb": 246 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e11_1", @@ -7976,8 +7597,7 @@ "peak_memory_mb": 60109, "disk_read_mb": 216, "disk_write_mb": 156 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e11_1", @@ -7997,8 +7617,7 @@ "peak_memory_mb": 8704, "disk_read_mb": 61, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e11_1", @@ -8018,8 +7637,7 @@ "peak_memory_mb": 4301, "disk_read_mb": 74, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e11_1", @@ -8039,8 +7657,7 @@ "peak_memory_mb": 17408, "disk_read_mb": 138, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e11_1", @@ -8060,8 +7677,7 @@ "peak_memory_mb": 1434, "disk_read_mb": 18, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e11_2", @@ -8081,8 +7697,7 @@ "peak_memory_mb": 6759, "disk_read_mb": 119, "disk_write_mb": 28 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e11_2", @@ -8102,8 +7717,7 @@ "peak_memory_mb": 38708, "disk_read_mb": 119, "disk_write_mb": 521 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e11_2", @@ -8123,8 +7737,7 @@ "peak_memory_mb": 3584, "disk_read_mb": 117, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e11_2", @@ -8144,8 +7757,7 @@ "peak_memory_mb": 57447, "disk_read_mb": 91, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e11_2", @@ -8165,8 +7777,7 @@ "peak_memory_mb": 1434, "disk_read_mb": 52, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e11_2", @@ -8186,8 +7797,7 @@ "peak_memory_mb": 217908, "disk_read_mb": 172, "disk_write_mb": 97 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e11_2", @@ -8207,8 +7817,7 @@ "peak_memory_mb": 6656, "disk_read_mb": 117, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e11_2", @@ -8228,8 +7837,7 @@ "peak_memory_mb": 5530, "disk_read_mb": 80, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e11_2", @@ -8249,8 +7857,7 @@ "peak_memory_mb": 9524, "disk_read_mb": 134, "disk_write_mb": 2 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e11_2", @@ -8270,8 +7877,7 @@ "peak_memory_mb": 2253, "disk_read_mb": 80, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e11_2", @@ -8291,8 +7897,7 @@ "peak_memory_mb": 8090, "disk_read_mb": 706, "disk_write_mb": 406 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e11_2", @@ -8312,8 +7917,7 @@ "peak_memory_mb": 61543, "disk_read_mb": 243, "disk_write_mb": 178 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e11_2", @@ -8333,8 +7937,7 @@ "peak_memory_mb": 3277, "disk_read_mb": 65, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e11_2", @@ -8354,8 +7957,7 @@ "peak_memory_mb": 4404, "disk_read_mb": 78, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e11_2", @@ -8375,8 +7977,7 @@ "peak_memory_mb": 17613, "disk_read_mb": 142, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e11_2", @@ -8396,8 +7997,7 @@ "peak_memory_mb": 1434, "disk_read_mb": 18, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e11_lower_body", @@ -8417,8 +8017,7 @@ "peak_memory_mb": 6759, "disk_read_mb": 112, "disk_write_mb": 28 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e11_lower_body", @@ -8438,8 +8037,7 @@ "peak_memory_mb": 46080, "disk_read_mb": 112, "disk_write_mb": 521 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e11_lower_body", @@ -8459,8 +8057,7 @@ "peak_memory_mb": 11162, "disk_read_mb": 110, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e11_lower_body", @@ -8480,8 +8077,7 @@ "peak_memory_mb": 56320, "disk_read_mb": 84, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e11_lower_body", @@ -8501,8 +8097,7 @@ "peak_memory_mb": 1434, "disk_read_mb": 45, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e11_lower_body", @@ -8522,8 +8117,7 @@ "peak_memory_mb": 215143, "disk_read_mb": 144, "disk_write_mb": 77 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e11_lower_body", @@ -8543,8 +8137,7 @@ "peak_memory_mb": 6656, "disk_read_mb": 110, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e11_lower_body", @@ -8564,8 +8157,7 @@ "peak_memory_mb": 2970, "disk_read_mb": 73, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e11_lower_body", @@ -8585,8 +8177,7 @@ "peak_memory_mb": 9012, "disk_read_mb": 127, "disk_write_mb": 2 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e11_lower_body", @@ -8606,8 +8197,7 @@ "peak_memory_mb": 17306, "disk_read_mb": 72, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e11_lower_body", @@ -8627,8 +8217,7 @@ "peak_memory_mb": 14029, "disk_read_mb": 819, "disk_write_mb": 289 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e11_lower_body", @@ -8648,8 +8237,7 @@ "peak_memory_mb": 50176, "disk_read_mb": 198, "disk_write_mb": 141 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e11_lower_body", @@ -8669,8 +8257,7 @@ "peak_memory_mb": 5940, "disk_read_mb": 58, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e11_lower_body", @@ -8690,8 +8277,7 @@ "peak_memory_mb": 6247, "disk_read_mb": 71, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e11_lower_body", @@ -8711,8 +8297,7 @@ "peak_memory_mb": 17408, "disk_read_mb": 135, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/dbitseq/mouse_e11_lower_body", @@ -8732,8 +8317,7 @@ "peak_memory_mb": 782, "disk_read_mb": 18, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/merfish/human_cortex_1", @@ -8753,8 +8337,7 @@ "peak_memory_mb": "NA", "disk_read_mb": "NA", "disk_write_mb": "NA" - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/merfish/human_cortex_1", @@ -8774,8 +8357,7 @@ "peak_memory_mb": 42906, "disk_read_mb": 159, "disk_write_mb": 261 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/merfish/human_cortex_1", @@ -8795,8 +8377,7 @@ "peak_memory_mb": 6656, "disk_read_mb": 158, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/merfish/human_cortex_1", @@ -8816,8 +8397,7 @@ "peak_memory_mb": 54580, "disk_read_mb": 132, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/merfish/human_cortex_1", @@ -8837,8 +8417,7 @@ "peak_memory_mb": 777, "disk_read_mb": 93, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/merfish/human_cortex_1", @@ -8858,8 +8437,7 @@ "peak_memory_mb": 116122, "disk_read_mb": 340, "disk_write_mb": 224 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/merfish/human_cortex_1", @@ -8879,8 +8457,7 @@ "peak_memory_mb": 6861, "disk_read_mb": 158, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/merfish/human_cortex_1", @@ -8900,8 +8477,7 @@ "peak_memory_mb": 5632, "disk_read_mb": 121, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/merfish/human_cortex_1", @@ -8921,8 +8497,7 @@ "peak_memory_mb": 11572, "disk_read_mb": 175, "disk_write_mb": 2 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/merfish/human_cortex_1", @@ -8942,8 +8517,7 @@ "peak_memory_mb": 2560, "disk_read_mb": 120, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/merfish/human_cortex_1", @@ -8963,8 +8537,7 @@ "peak_memory_mb": 2765, "disk_read_mb": 244, "disk_write_mb": 54 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/merfish/human_cortex_1", @@ -8984,8 +8557,7 @@ "peak_memory_mb": 145716, "disk_read_mb": 496, "disk_write_mb": 391 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/merfish/human_cortex_1", @@ -9005,8 +8577,7 @@ "peak_memory_mb": 8909, "disk_read_mb": 106, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/merfish/human_cortex_1", @@ -9026,8 +8597,7 @@ "peak_memory_mb": 11162, "disk_read_mb": 119, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/merfish/human_cortex_1", @@ -9047,8 +8617,7 @@ "peak_memory_mb": 36148, "disk_read_mb": 183, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/merfish/human_cortex_1", @@ -9068,8 +8637,7 @@ "peak_memory_mb": 1434, "disk_read_mb": 19, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/merfish/human_cortex_2", @@ -9089,8 +8657,7 @@ "peak_memory_mb": "NA", "disk_read_mb": "NA", "disk_write_mb": "NA" - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/merfish/human_cortex_2", @@ -9110,8 +8677,7 @@ "peak_memory_mb": 38912, "disk_read_mb": 134, "disk_write_mb": 521 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/merfish/human_cortex_2", @@ -9131,8 +8697,7 @@ "peak_memory_mb": 11264, "disk_read_mb": 132, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/merfish/human_cortex_2", @@ -9152,8 +8717,7 @@ "peak_memory_mb": 60109, "disk_read_mb": 106, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/merfish/human_cortex_2", @@ -9173,8 +8737,7 @@ "peak_memory_mb": 772, "disk_read_mb": 67, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/merfish/human_cortex_2", @@ -9194,8 +8757,7 @@ "peak_memory_mb": 114279, "disk_read_mb": 230, "disk_write_mb": 141 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/merfish/human_cortex_2", @@ -9215,8 +8777,7 @@ "peak_memory_mb": 6759, "disk_read_mb": 132, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/merfish/human_cortex_2", @@ -9236,8 +8797,7 @@ "peak_memory_mb": 3072, "disk_read_mb": 95, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/merfish/human_cortex_2", @@ -9257,8 +8817,7 @@ "peak_memory_mb": 6759, "disk_read_mb": 149, "disk_write_mb": 2 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/merfish/human_cortex_2", @@ -9278,8 +8837,7 @@ "peak_memory_mb": 1332, "disk_read_mb": 95, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/merfish/human_cortex_2", @@ -9299,8 +8857,7 @@ "peak_memory_mb": 8500, "disk_read_mb": 1741, "disk_write_mb": 1434 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/merfish/human_cortex_2", @@ -9320,8 +8877,7 @@ "peak_memory_mb": 43930, "disk_read_mb": 337, "disk_write_mb": 257 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/merfish/human_cortex_2", @@ -9341,8 +8897,7 @@ "peak_memory_mb": 3789, "disk_read_mb": 80, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/merfish/human_cortex_2", @@ -9362,8 +8917,7 @@ "peak_memory_mb": 2970, "disk_read_mb": 93, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/merfish/human_cortex_2", @@ -9383,8 +8937,7 @@ "peak_memory_mb": 17920, "disk_read_mb": 158, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/merfish/human_cortex_2", @@ -9404,8 +8957,7 @@ "peak_memory_mb": 768, "disk_read_mb": 19, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/merfish/human_cortex_3", @@ -9425,8 +8977,7 @@ "peak_memory_mb": "NA", "disk_read_mb": "NA", "disk_write_mb": "NA" - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/merfish/human_cortex_3", @@ -9446,8 +8997,7 @@ "peak_memory_mb": 46592, "disk_read_mb": 150, "disk_write_mb": 521 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/merfish/human_cortex_3", @@ -9467,8 +9017,7 @@ "peak_memory_mb": 13927, "disk_read_mb": 148, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/merfish/human_cortex_3", @@ -9488,8 +9037,7 @@ "peak_memory_mb": 62157, "disk_read_mb": 122, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/merfish/human_cortex_3", @@ -9509,8 +9057,7 @@ "peak_memory_mb": 1434, "disk_read_mb": 83, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/merfish/human_cortex_3", @@ -9530,8 +9077,7 @@ "peak_memory_mb": 219034, "disk_read_mb": 290, "disk_write_mb": 184 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/merfish/human_cortex_3", @@ -9551,8 +9097,7 @@ "peak_memory_mb": 6759, "disk_read_mb": 148, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/merfish/human_cortex_3", @@ -9572,8 +9117,7 @@ "peak_memory_mb": 6656, "disk_read_mb": 111, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/merfish/human_cortex_3", @@ -9593,8 +9137,7 @@ "peak_memory_mb": 10957, "disk_read_mb": 165, "disk_write_mb": 2 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/merfish/human_cortex_3", @@ -9614,8 +9157,7 @@ "peak_memory_mb": 2151, "disk_read_mb": 111, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/merfish/human_cortex_3", @@ -9635,8 +9177,7 @@ "peak_memory_mb": 8192, "disk_read_mb": 353, "disk_write_mb": 40 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/merfish/human_cortex_3", @@ -9656,8 +9197,7 @@ "peak_memory_mb": 57754, "disk_read_mb": 437, "disk_write_mb": 341 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/merfish/human_cortex_3", @@ -9677,8 +9217,7 @@ "peak_memory_mb": 10548, "disk_read_mb": 97, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/merfish/human_cortex_3", @@ -9698,8 +9237,7 @@ "peak_memory_mb": 3687, "disk_read_mb": 109, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/merfish/human_cortex_3", @@ -9719,8 +9257,7 @@ "peak_memory_mb": 32359, "disk_read_mb": 174, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/merfish/human_cortex_3", @@ -9740,8 +9277,7 @@ "peak_memory_mb": 1434, "disk_read_mb": 19, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/merfish/human_cortex_4", @@ -9761,8 +9297,7 @@ "peak_memory_mb": "NA", "disk_read_mb": "NA", "disk_write_mb": "NA" - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/merfish/human_cortex_4", @@ -9782,8 +9317,7 @@ "peak_memory_mb": 39117, "disk_read_mb": 149, "disk_write_mb": 521 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/merfish/human_cortex_4", @@ -9803,8 +9337,7 @@ "peak_memory_mb": 11981, "disk_read_mb": 147, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/merfish/human_cortex_4", @@ -9824,8 +9357,7 @@ "peak_memory_mb": 95028, "disk_read_mb": 121, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/merfish/human_cortex_4", @@ -9845,8 +9377,7 @@ "peak_memory_mb": 1741, "disk_read_mb": 82, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/merfish/human_cortex_4", @@ -9866,8 +9397,7 @@ "peak_memory_mb": 115815, "disk_read_mb": 286, "disk_write_mb": 182 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/merfish/human_cortex_4", @@ -9887,8 +9417,7 @@ "peak_memory_mb": 6759, "disk_read_mb": 147, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/merfish/human_cortex_4", @@ -9908,8 +9437,7 @@ "peak_memory_mb": 5632, "disk_read_mb": 110, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/merfish/human_cortex_4", @@ -9929,8 +9457,7 @@ "peak_memory_mb": 6861, "disk_read_mb": 164, "disk_write_mb": 2 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/merfish/human_cortex_4", @@ -9950,8 +9477,7 @@ "peak_memory_mb": 2048, "disk_read_mb": 110, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/merfish/human_cortex_4", @@ -9971,8 +9497,7 @@ "peak_memory_mb": 4813, "disk_read_mb": 233, "disk_write_mb": 38 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/merfish/human_cortex_4", @@ -9992,8 +9517,7 @@ "peak_memory_mb": 72090, "disk_read_mb": 430, "disk_write_mb": 335 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/merfish/human_cortex_4", @@ -10013,8 +9537,7 @@ "peak_memory_mb": 3789, "disk_read_mb": 95, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/merfish/human_cortex_4", @@ -10034,8 +9557,7 @@ "peak_memory_mb": 3687, "disk_read_mb": 108, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/merfish/human_cortex_4", @@ -10055,8 +9577,7 @@ "peak_memory_mb": 32461, "disk_read_mb": 172, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/merfish/human_cortex_4", @@ -10076,8 +9597,7 @@ "peak_memory_mb": 1741, "disk_read_mb": 19, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/merfish/mouse_cortex", @@ -10097,8 +9617,7 @@ "peak_memory_mb": "NA", "disk_read_mb": "NA", "disk_write_mb": "NA" - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/merfish/mouse_cortex", @@ -10118,8 +9637,7 @@ "peak_memory_mb": "NA", "disk_read_mb": "NA", "disk_write_mb": "NA" - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/merfish/mouse_cortex", @@ -10139,8 +9657,7 @@ "peak_memory_mb": 11469, "disk_read_mb": 190, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/merfish/mouse_cortex", @@ -10160,8 +9677,7 @@ "peak_memory_mb": "NA", "disk_read_mb": "NA", "disk_write_mb": "NA" - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/merfish/mouse_cortex", @@ -10181,8 +9697,7 @@ "peak_memory_mb": 1434, "disk_read_mb": 125, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/merfish/mouse_cortex", @@ -10202,8 +9717,7 @@ "peak_memory_mb": 222413, "disk_read_mb": 470, "disk_write_mb": 323 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/merfish/mouse_cortex", @@ -10223,8 +9737,7 @@ "peak_memory_mb": 6964, "disk_read_mb": 190, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/merfish/mouse_cortex", @@ -10244,8 +9757,7 @@ "peak_memory_mb": 7271, "disk_read_mb": 153, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/merfish/mouse_cortex", @@ -10265,8 +9777,7 @@ "peak_memory_mb": 14541, "disk_read_mb": 207, "disk_write_mb": 2 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/merfish/mouse_cortex", @@ -10286,8 +9797,7 @@ "peak_memory_mb": 2356, "disk_read_mb": 152, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/merfish/mouse_cortex", @@ -10307,8 +9817,7 @@ "peak_memory_mb": 9524, "disk_read_mb": 1024, "disk_write_mb": 692 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/merfish/mouse_cortex", @@ -10328,8 +9837,7 @@ "peak_memory_mb": "NA", "disk_read_mb": "NA", "disk_write_mb": "NA" - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/merfish/mouse_cortex", @@ -10349,8 +9857,7 @@ "peak_memory_mb": 9319, "disk_read_mb": 138, "disk_write_mb": 2 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/merfish/mouse_cortex", @@ -10370,8 +9877,7 @@ "peak_memory_mb": 22733, "disk_read_mb": 151, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/merfish/mouse_cortex", @@ -10391,8 +9897,7 @@ "peak_memory_mb": 25908, "disk_read_mb": 215, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/merfish/mouse_cortex", @@ -10412,8 +9917,7 @@ "peak_memory_mb": 1741, "disk_read_mb": 19, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/seqfish/mouse_organogenesis_seqfish", @@ -10433,8 +9937,7 @@ "peak_memory_mb": "NA", "disk_read_mb": "NA", "disk_write_mb": "NA" - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/seqfish/mouse_organogenesis_seqfish", @@ -10454,8 +9957,7 @@ "peak_memory_mb": 36660, "disk_read_mb": 174, "disk_write_mb": 261 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/seqfish/mouse_organogenesis_seqfish", @@ -10475,8 +9977,7 @@ "peak_memory_mb": 11469, "disk_read_mb": 173, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/seqfish/mouse_organogenesis_seqfish", @@ -10496,8 +9997,7 @@ "peak_memory_mb": 87450, "disk_read_mb": 147, "disk_write_mb": 2 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/seqfish/mouse_organogenesis_seqfish", @@ -10517,8 +10017,7 @@ "peak_memory_mb": 1741, "disk_read_mb": 108, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/seqfish/mouse_organogenesis_seqfish", @@ -10538,8 +10037,7 @@ "peak_memory_mb": 225383, "disk_read_mb": 602, "disk_write_mb": 471 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/seqfish/mouse_organogenesis_seqfish", @@ -10559,8 +10057,7 @@ "peak_memory_mb": 8295, "disk_read_mb": 173, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/seqfish/mouse_organogenesis_seqfish", @@ -10580,8 +10077,7 @@ "peak_memory_mb": 6042, "disk_read_mb": 136, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/seqfish/mouse_organogenesis_seqfish", @@ -10601,8 +10097,7 @@ "peak_memory_mb": "NA", "disk_read_mb": "NA", "disk_write_mb": "NA" - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/seqfish/mouse_organogenesis_seqfish", @@ -10622,8 +10117,7 @@ "peak_memory_mb": 2458, "disk_read_mb": 135, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/seqfish/mouse_organogenesis_seqfish", @@ -10643,8 +10137,7 @@ "peak_memory_mb": 10343, "disk_read_mb": 852, "disk_write_mb": 558 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/seqfish/mouse_organogenesis_seqfish", @@ -10664,8 +10157,7 @@ "peak_memory_mb": "NA", "disk_read_mb": "NA", "disk_write_mb": "NA" - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/seqfish/mouse_organogenesis_seqfish", @@ -10685,8 +10177,7 @@ "peak_memory_mb": 5940, "disk_read_mb": 121, "disk_write_mb": 2 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/seqfish/mouse_organogenesis_seqfish", @@ -10706,8 +10197,7 @@ "peak_memory_mb": 46080, "disk_read_mb": 137, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/seqfish/mouse_organogenesis_seqfish", @@ -10727,8 +10217,7 @@ "peak_memory_mb": 53453, "disk_read_mb": 201, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/seqfish/mouse_organogenesis_seqfish", @@ -10748,8 +10237,7 @@ "peak_memory_mb": 2868, "disk_read_mb": 20, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/slideseqv2/mouse_cerebellum", @@ -10769,8 +10257,7 @@ "peak_memory_mb": "NA", "disk_read_mb": "NA", "disk_write_mb": "NA" - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/slideseqv2/mouse_cerebellum", @@ -10790,8 +10277,7 @@ "peak_memory_mb": 33280, "disk_read_mb": 100, "disk_write_mb": 105 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/slideseqv2/mouse_cerebellum", @@ -10811,8 +10297,7 @@ "peak_memory_mb": 6554, "disk_read_mb": 100, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/slideseqv2/mouse_cerebellum", @@ -10832,8 +10317,7 @@ "peak_memory_mb": 75674, "disk_read_mb": 74, "disk_write_mb": 2 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/slideseqv2/mouse_cerebellum", @@ -10853,8 +10337,7 @@ "peak_memory_mb": 780, "disk_read_mb": 35, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/slideseqv2/mouse_cerebellum", @@ -10874,8 +10357,7 @@ "peak_memory_mb": 117556, "disk_read_mb": 343, "disk_write_mb": 286 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/slideseqv2/mouse_cerebellum", @@ -10895,8 +10377,7 @@ "peak_memory_mb": 4711, "disk_read_mb": 100, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/slideseqv2/mouse_cerebellum", @@ -10916,8 +10397,7 @@ "peak_memory_mb": 3584, "disk_read_mb": 63, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/slideseqv2/mouse_cerebellum", @@ -10937,8 +10417,7 @@ "peak_memory_mb": 17920, "disk_read_mb": 117, "disk_write_mb": 2 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/slideseqv2/mouse_cerebellum", @@ -10958,8 +10437,7 @@ "peak_memory_mb": 2560, "disk_read_mb": 62, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/slideseqv2/mouse_cerebellum", @@ -10979,8 +10457,7 @@ "peak_memory_mb": 4813, "disk_read_mb": 183, "disk_write_mb": 46 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/slideseqv2/mouse_cerebellum", @@ -11000,8 +10477,7 @@ "peak_memory_mb": "NA", "disk_read_mb": "NA", "disk_write_mb": "NA" - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/slideseqv2/mouse_cerebellum", @@ -11021,8 +10497,7 @@ "peak_memory_mb": 6042, "disk_read_mb": 48, "disk_write_mb": 2 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/slideseqv2/mouse_cerebellum", @@ -11042,8 +10517,7 @@ "peak_memory_mb": 71168, "disk_read_mb": 64, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/slideseqv2/mouse_cerebellum", @@ -11063,8 +10537,7 @@ "peak_memory_mb": 53556, "disk_read_mb": 128, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/slideseqv2/mouse_cerebellum", @@ -11084,8 +10557,7 @@ "peak_memory_mb": 782, "disk_read_mb": 20, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/slideseqv2/mouse_cortex", @@ -11105,8 +10577,7 @@ "peak_memory_mb": "NA", "disk_read_mb": "NA", "disk_write_mb": "NA" - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/slideseqv2/mouse_cortex", @@ -11126,8 +10597,7 @@ "peak_memory_mb": 33076, "disk_read_mb": 102, "disk_write_mb": 105 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/slideseqv2/mouse_cortex", @@ -11147,8 +10617,7 @@ "peak_memory_mb": 11162, "disk_read_mb": 101, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/slideseqv2/mouse_cortex", @@ -11168,8 +10637,7 @@ "peak_memory_mb": 80589, "disk_read_mb": 76, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/slideseqv2/mouse_cortex", @@ -11189,8 +10657,7 @@ "peak_memory_mb": 2868, "disk_read_mb": 37, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/slideseqv2/mouse_cortex", @@ -11210,8 +10677,7 @@ "peak_memory_mb": 219341, "disk_read_mb": 262, "disk_write_mb": 203 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/slideseqv2/mouse_cortex", @@ -11231,8 +10697,7 @@ "peak_memory_mb": 6861, "disk_read_mb": 101, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/slideseqv2/mouse_cortex", @@ -11252,8 +10717,7 @@ "peak_memory_mb": "NA", "disk_read_mb": "NA", "disk_write_mb": "NA" - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/slideseqv2/mouse_cortex", @@ -11273,8 +10737,7 @@ "peak_memory_mb": 14848, "disk_read_mb": 119, "disk_write_mb": 2 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/slideseqv2/mouse_cortex", @@ -11294,8 +10757,7 @@ "peak_memory_mb": 2560, "disk_read_mb": 64, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/slideseqv2/mouse_cortex", @@ -11315,8 +10777,7 @@ "peak_memory_mb": 8500, "disk_read_mb": 643, "disk_write_mb": 372 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/slideseqv2/mouse_cortex", @@ -11336,8 +10797,7 @@ "peak_memory_mb": "NA", "disk_read_mb": "NA", "disk_write_mb": "NA" - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/slideseqv2/mouse_cortex", @@ -11357,8 +10817,7 @@ "peak_memory_mb": 3277, "disk_read_mb": 50, "disk_write_mb": 2 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/slideseqv2/mouse_cortex", @@ -11378,8 +10837,7 @@ "peak_memory_mb": 39424, "disk_read_mb": 65, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/slideseqv2/mouse_cortex", @@ -11399,8 +10857,7 @@ "peak_memory_mb": 36660, "disk_read_mb": 130, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/slideseqv2/mouse_cortex", @@ -11420,8 +10877,7 @@ "peak_memory_mb": 1434, "disk_read_mb": 19, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/slideseqv2/mouse_hippocampus_puck", @@ -11441,8 +10897,7 @@ "peak_memory_mb": "NA", "disk_read_mb": "NA", "disk_write_mb": "NA" - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/slideseqv2/mouse_hippocampus_puck", @@ -11462,8 +10917,7 @@ "peak_memory_mb": 33485, "disk_read_mb": 126, "disk_write_mb": 105 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/slideseqv2/mouse_hippocampus_puck", @@ -11483,8 +10937,7 @@ "peak_memory_mb": 11264, "disk_read_mb": 126, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/slideseqv2/mouse_hippocampus_puck", @@ -11504,8 +10957,7 @@ "peak_memory_mb": 82944, "disk_read_mb": 100, "disk_write_mb": 2 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/slideseqv2/mouse_hippocampus_puck", @@ -11525,8 +10977,7 @@ "peak_memory_mb": 778, "disk_read_mb": 61, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/slideseqv2/mouse_hippocampus_puck", @@ -11546,8 +10997,7 @@ "peak_memory_mb": 222720, "disk_read_mb": 428, "disk_write_mb": 344 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/slideseqv2/mouse_hippocampus_puck", @@ -11567,8 +11017,7 @@ "peak_memory_mb": 7373, "disk_read_mb": 126, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/slideseqv2/mouse_hippocampus_puck", @@ -11588,8 +11037,7 @@ "peak_memory_mb": 3892, "disk_read_mb": 89, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/slideseqv2/mouse_hippocampus_puck", @@ -11609,8 +11057,7 @@ "peak_memory_mb": 22221, "disk_read_mb": 143, "disk_write_mb": 2 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/slideseqv2/mouse_hippocampus_puck", @@ -11630,8 +11077,7 @@ "peak_memory_mb": 17204, "disk_read_mb": 88, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/slideseqv2/mouse_hippocampus_puck", @@ -11651,8 +11097,7 @@ "peak_memory_mb": 3277, "disk_read_mb": 327, "disk_write_mb": 52 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/slideseqv2/mouse_hippocampus_puck", @@ -11672,8 +11117,7 @@ "peak_memory_mb": "NA", "disk_read_mb": "NA", "disk_write_mb": "NA" - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/slideseqv2/mouse_hippocampus_puck", @@ -11693,8 +11137,7 @@ "peak_memory_mb": 3789, "disk_read_mb": 74, "disk_write_mb": 2 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/slideseqv2/mouse_hippocampus_puck", @@ -11714,8 +11157,7 @@ "peak_memory_mb": 94925, "disk_read_mb": 89, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/slideseqv2/mouse_hippocampus_puck", @@ -11735,8 +11177,7 @@ "peak_memory_mb": 87143, "disk_read_mb": 154, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/slideseqv2/mouse_hippocampus_puck", @@ -11756,8 +11197,7 @@ "peak_memory_mb": 782, "disk_read_mb": 20, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/slideseqv2/mouse_olfactory_bulb_puck", @@ -11777,8 +11217,7 @@ "peak_memory_mb": "NA", "disk_read_mb": "NA", "disk_write_mb": "NA" - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/slideseqv2/mouse_olfactory_bulb_puck", @@ -11798,8 +11237,7 @@ "peak_memory_mb": 33076, "disk_read_mb": 106, "disk_write_mb": 105 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/slideseqv2/mouse_olfactory_bulb_puck", @@ -11819,8 +11257,7 @@ "peak_memory_mb": 6554, "disk_read_mb": 105, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/slideseqv2/mouse_olfactory_bulb_puck", @@ -11840,8 +11277,7 @@ "peak_memory_mb": 85914, "disk_read_mb": 79, "disk_write_mb": 2 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/slideseqv2/mouse_olfactory_bulb_puck", @@ -11861,8 +11297,7 @@ "peak_memory_mb": 774, "disk_read_mb": 40, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/slideseqv2/mouse_olfactory_bulb_puck", @@ -11882,8 +11317,7 @@ "peak_memory_mb": 115712, "disk_read_mb": 310, "disk_write_mb": 248 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/slideseqv2/mouse_olfactory_bulb_puck", @@ -11903,8 +11337,7 @@ "peak_memory_mb": 4608, "disk_read_mb": 105, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/slideseqv2/mouse_olfactory_bulb_puck", @@ -11924,8 +11357,7 @@ "peak_memory_mb": 5940, "disk_read_mb": 68, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/slideseqv2/mouse_olfactory_bulb_puck", @@ -11945,8 +11377,7 @@ "peak_memory_mb": 14746, "disk_read_mb": 122, "disk_write_mb": 2 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/slideseqv2/mouse_olfactory_bulb_puck", @@ -11966,8 +11397,7 @@ "peak_memory_mb": 1741, "disk_read_mb": 68, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/slideseqv2/mouse_olfactory_bulb_puck", @@ -11987,8 +11417,7 @@ "peak_memory_mb": 4711, "disk_read_mb": 445, "disk_write_mb": 294 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/slideseqv2/mouse_olfactory_bulb_puck", @@ -12008,8 +11437,7 @@ "peak_memory_mb": "NA", "disk_read_mb": "NA", "disk_write_mb": "NA" - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/slideseqv2/mouse_olfactory_bulb_puck", @@ -12029,8 +11457,7 @@ "peak_memory_mb": 3687, "disk_read_mb": 53, "disk_write_mb": 2 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/slideseqv2/mouse_olfactory_bulb_puck", @@ -12050,8 +11477,7 @@ "peak_memory_mb": 53044, "disk_read_mb": 69, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/slideseqv2/mouse_olfactory_bulb_puck", @@ -12071,8 +11497,7 @@ "peak_memory_mb": 53248, "disk_read_mb": 133, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/slideseqv2/mouse_olfactory_bulb_puck", @@ -12092,8 +11517,7 @@ "peak_memory_mb": 778, "disk_read_mb": 20, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/slideseqv2/mouse_somatosensory_cortex_puck", @@ -12113,8 +11537,7 @@ "peak_memory_mb": "NA", "disk_read_mb": "NA", "disk_write_mb": "NA" - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/slideseqv2/mouse_somatosensory_cortex_puck", @@ -12134,8 +11557,7 @@ "peak_memory_mb": 33178, "disk_read_mb": 124, "disk_write_mb": 105 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/slideseqv2/mouse_somatosensory_cortex_puck", @@ -12155,8 +11577,7 @@ "peak_memory_mb": 6554, "disk_read_mb": 124, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/slideseqv2/mouse_somatosensory_cortex_puck", @@ -12176,8 +11597,7 @@ "peak_memory_mb": 81306, "disk_read_mb": 98, "disk_write_mb": 2 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/slideseqv2/mouse_somatosensory_cortex_puck", @@ -12197,8 +11617,7 @@ "peak_memory_mb": 775, "disk_read_mb": 59, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/slideseqv2/mouse_somatosensory_cortex_puck", @@ -12218,8 +11637,7 @@ "peak_memory_mb": 117453, "disk_read_mb": 337, "disk_write_mb": 256 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/slideseqv2/mouse_somatosensory_cortex_puck", @@ -12239,8 +11657,7 @@ "peak_memory_mb": 6964, "disk_read_mb": 124, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/slideseqv2/mouse_somatosensory_cortex_puck", @@ -12260,8 +11677,7 @@ "peak_memory_mb": 6247, "disk_read_mb": 87, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/slideseqv2/mouse_somatosensory_cortex_puck", @@ -12281,8 +11697,7 @@ "peak_memory_mb": 15053, "disk_read_mb": 141, "disk_write_mb": 2 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/slideseqv2/mouse_somatosensory_cortex_puck", @@ -12302,8 +11717,7 @@ "peak_memory_mb": 1741, "disk_read_mb": 86, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/slideseqv2/mouse_somatosensory_cortex_puck", @@ -12323,8 +11737,7 @@ "peak_memory_mb": 1946, "disk_read_mb": 207, "disk_write_mb": 41 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/slideseqv2/mouse_somatosensory_cortex_puck", @@ -12344,8 +11757,7 @@ "peak_memory_mb": "NA", "disk_read_mb": "NA", "disk_write_mb": "NA" - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/slideseqv2/mouse_somatosensory_cortex_puck", @@ -12365,8 +11777,7 @@ "peak_memory_mb": 14439, "disk_read_mb": 72, "disk_write_mb": 2 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/slideseqv2/mouse_somatosensory_cortex_puck", @@ -12386,8 +11797,7 @@ "peak_memory_mb": 54477, "disk_read_mb": 87, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/slideseqv2/mouse_somatosensory_cortex_puck", @@ -12407,8 +11817,7 @@ "peak_memory_mb": 53248, "disk_read_mb": 152, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/slideseqv2/mouse_somatosensory_cortex_puck", @@ -12428,8 +11837,7 @@ "peak_memory_mb": 767, "disk_read_mb": 20, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/starmap/mouse_brain_2d_zstep10_0", @@ -12449,8 +11857,7 @@ "peak_memory_mb": "NA", "disk_read_mb": "NA", "disk_write_mb": "NA" - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/starmap/mouse_brain_2d_zstep10_0", @@ -12470,8 +11877,7 @@ "peak_memory_mb": "NA", "disk_read_mb": "NA", "disk_write_mb": "NA" - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/starmap/mouse_brain_2d_zstep10_0", @@ -12491,8 +11897,7 @@ "peak_memory_mb": 12800, "disk_read_mb": 117, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/starmap/mouse_brain_2d_zstep10_0", @@ -12512,8 +11917,7 @@ "peak_memory_mb": 57856, "disk_read_mb": 91, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/starmap/mouse_brain_2d_zstep10_0", @@ -12533,8 +11937,7 @@ "peak_memory_mb": 2868, "disk_read_mb": 52, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/starmap/mouse_brain_2d_zstep10_0", @@ -12554,8 +11957,7 @@ "peak_memory_mb": 217703, "disk_read_mb": 143, "disk_write_mb": 69 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/starmap/mouse_brain_2d_zstep10_0", @@ -12575,8 +11977,7 @@ "peak_memory_mb": 6656, "disk_read_mb": 117, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/starmap/mouse_brain_2d_zstep10_0", @@ -12596,8 +11997,7 @@ "peak_memory_mb": 6964, "disk_read_mb": 80, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/starmap/mouse_brain_2d_zstep10_0", @@ -12617,8 +12017,7 @@ "peak_memory_mb": 9319, "disk_read_mb": 133, "disk_write_mb": 2 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/starmap/mouse_brain_2d_zstep10_0", @@ -12638,8 +12037,7 @@ "peak_memory_mb": 16896, "disk_read_mb": 79, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/starmap/mouse_brain_2d_zstep10_0", @@ -12659,8 +12057,7 @@ "peak_memory_mb": 18535, "disk_read_mb": 783, "disk_write_mb": 478 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/starmap/mouse_brain_2d_zstep10_0", @@ -12680,8 +12077,7 @@ "peak_memory_mb": 39629, "disk_read_mb": 181, "disk_write_mb": 117 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/starmap/mouse_brain_2d_zstep10_0", @@ -12701,8 +12097,7 @@ "peak_memory_mb": 16896, "disk_read_mb": 65, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/starmap/mouse_brain_2d_zstep10_0", @@ -12722,8 +12117,7 @@ "peak_memory_mb": 3789, "disk_read_mb": 80, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/starmap/mouse_brain_2d_zstep10_0", @@ -12743,8 +12137,7 @@ "peak_memory_mb": 19968, "disk_read_mb": 145, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/starmap/mouse_brain_2d_zstep10_0", @@ -12764,8 +12157,7 @@ "peak_memory_mb": 1434, "disk_read_mb": 18, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/starmap/mouse_brain_2d_zstep15_0", @@ -12785,8 +12177,7 @@ "peak_memory_mb": "NA", "disk_read_mb": "NA", "disk_write_mb": "NA" - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/starmap/mouse_brain_2d_zstep15_0", @@ -12806,8 +12197,7 @@ "peak_memory_mb": "NA", "disk_read_mb": "NA", "disk_write_mb": "NA" - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/starmap/mouse_brain_2d_zstep15_0", @@ -12827,8 +12217,7 @@ "peak_memory_mb": 6656, "disk_read_mb": 140, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/starmap/mouse_brain_2d_zstep15_0", @@ -12848,8 +12237,7 @@ "peak_memory_mb": 45466, "disk_read_mb": 114, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/starmap/mouse_brain_2d_zstep15_0", @@ -12869,8 +12257,7 @@ "peak_memory_mb": 772, "disk_read_mb": 75, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/starmap/mouse_brain_2d_zstep15_0", @@ -12890,8 +12277,7 @@ "peak_memory_mb": 219751, "disk_read_mb": 212, "disk_write_mb": 115 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/starmap/mouse_brain_2d_zstep15_0", @@ -12911,8 +12297,7 @@ "peak_memory_mb": 4404, "disk_read_mb": 140, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/starmap/mouse_brain_2d_zstep15_0", @@ -12932,8 +12317,7 @@ "peak_memory_mb": 6452, "disk_read_mb": 103, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/starmap/mouse_brain_2d_zstep15_0", @@ -12953,8 +12337,7 @@ "peak_memory_mb": 7066, "disk_read_mb": 156, "disk_write_mb": 2 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/starmap/mouse_brain_2d_zstep15_0", @@ -12974,8 +12357,7 @@ "peak_memory_mb": 2048, "disk_read_mb": 102, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/starmap/mouse_brain_2d_zstep15_0", @@ -12995,8 +12377,7 @@ "peak_memory_mb": 16282, "disk_read_mb": 499, "disk_write_mb": 299 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/starmap/mouse_brain_2d_zstep15_0", @@ -13016,8 +12397,7 @@ "peak_memory_mb": 60826, "disk_read_mb": 284, "disk_write_mb": 197 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/starmap/mouse_brain_2d_zstep15_0", @@ -13037,8 +12417,7 @@ "peak_memory_mb": 3380, "disk_read_mb": 88, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/starmap/mouse_brain_2d_zstep15_0", @@ -13058,8 +12437,7 @@ "peak_memory_mb": 5428, "disk_read_mb": 103, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/starmap/mouse_brain_2d_zstep15_0", @@ -13079,8 +12457,7 @@ "peak_memory_mb": 20788, "disk_read_mb": 168, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/starmap/mouse_brain_2d_zstep15_0", @@ -13100,8 +12477,7 @@ "peak_memory_mb": 775, "disk_read_mb": 19, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/stereoseq/drosophila_embryo_e10", @@ -13121,8 +12497,7 @@ "peak_memory_mb": 4711, "disk_read_mb": 93, "disk_write_mb": 28 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/stereoseq/drosophila_embryo_e10", @@ -13142,8 +12517,7 @@ "peak_memory_mb": 38708, "disk_read_mb": 93, "disk_write_mb": 521 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/stereoseq/drosophila_embryo_e10", @@ -13163,8 +12537,7 @@ "peak_memory_mb": 2253, "disk_read_mb": 92, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/stereoseq/drosophila_embryo_e10", @@ -13184,8 +12557,7 @@ "peak_memory_mb": 38503, "disk_read_mb": 66, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/stereoseq/drosophila_embryo_e10", @@ -13205,8 +12577,7 @@ "peak_memory_mb": 778, "disk_read_mb": 27, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/stereoseq/drosophila_embryo_e10", @@ -13226,8 +12597,7 @@ "peak_memory_mb": 112640, "disk_read_mb": 104, "disk_write_mb": 54 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/stereoseq/drosophila_embryo_e10", @@ -13247,8 +12617,7 @@ "peak_memory_mb": "NA", "disk_read_mb": "NA", "disk_write_mb": "NA" - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/stereoseq/drosophila_embryo_e10", @@ -13268,8 +12637,7 @@ "peak_memory_mb": "NA", "disk_read_mb": "NA", "disk_write_mb": "NA" - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/stereoseq/drosophila_embryo_e10", @@ -13289,8 +12657,7 @@ "peak_memory_mb": 6247, "disk_read_mb": 109, "disk_write_mb": 2 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/stereoseq/drosophila_embryo_e10", @@ -13310,8 +12677,7 @@ "peak_memory_mb": 11264, "disk_read_mb": 54, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/stereoseq/drosophila_embryo_e10", @@ -13331,8 +12697,7 @@ "peak_memory_mb": 4096, "disk_read_mb": 215, "disk_write_mb": 53 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/stereoseq/drosophila_embryo_e10", @@ -13352,8 +12717,7 @@ "peak_memory_mb": 29799, "disk_read_mb": 134, "disk_write_mb": 94 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/stereoseq/drosophila_embryo_e10", @@ -13373,8 +12737,7 @@ "peak_memory_mb": 1946, "disk_read_mb": 40, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/stereoseq/drosophila_embryo_e10", @@ -13394,8 +12757,7 @@ "peak_memory_mb": 3175, "disk_read_mb": 55, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/stereoseq/drosophila_embryo_e10", @@ -13415,8 +12777,7 @@ "peak_memory_mb": 19764, "disk_read_mb": 120, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/stereoseq/drosophila_embryo_e10", @@ -13436,8 +12797,7 @@ "peak_memory_mb": 1434, "disk_read_mb": 18, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/stereoseq/drosophila_embryo_e5_6", @@ -13457,8 +12817,7 @@ "peak_memory_mb": 2663, "disk_read_mb": 96, "disk_write_mb": 28 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/stereoseq/drosophila_embryo_e5_6", @@ -13478,8 +12837,7 @@ "peak_memory_mb": 38708, "disk_read_mb": 96, "disk_write_mb": 521 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/stereoseq/drosophila_embryo_e5_6", @@ -13499,8 +12857,7 @@ "peak_memory_mb": 11162, "disk_read_mb": 94, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/stereoseq/drosophila_embryo_e5_6", @@ -13520,8 +12877,7 @@ "peak_memory_mb": 64000, "disk_read_mb": 68, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/stereoseq/drosophila_embryo_e5_6", @@ -13541,8 +12897,7 @@ "peak_memory_mb": 772, "disk_read_mb": 29, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/stereoseq/drosophila_embryo_e5_6", @@ -13562,8 +12917,7 @@ "peak_memory_mb": 113869, "disk_read_mb": 113, "disk_write_mb": 61 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/stereoseq/drosophila_embryo_e5_6", @@ -13583,8 +12937,7 @@ "peak_memory_mb": "NA", "disk_read_mb": "NA", "disk_write_mb": "NA" - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/stereoseq/drosophila_embryo_e5_6", @@ -13604,8 +12957,7 @@ "peak_memory_mb": "NA", "disk_read_mb": "NA", "disk_write_mb": "NA" - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/stereoseq/drosophila_embryo_e5_6", @@ -13625,8 +12977,7 @@ "peak_memory_mb": 6349, "disk_read_mb": 111, "disk_write_mb": 2 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/stereoseq/drosophila_embryo_e5_6", @@ -13646,8 +12997,7 @@ "peak_memory_mb": 11264, "disk_read_mb": 56, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/stereoseq/drosophila_embryo_e5_6", @@ -13667,8 +13017,7 @@ "peak_memory_mb": 4199, "disk_read_mb": 228, "disk_write_mb": 64 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/stereoseq/drosophila_embryo_e5_6", @@ -13688,8 +13037,7 @@ "peak_memory_mb": 31028, "disk_read_mb": 149, "disk_write_mb": 107 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/stereoseq/drosophila_embryo_e5_6", @@ -13709,8 +13057,7 @@ "peak_memory_mb": 3584, "disk_read_mb": 42, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/stereoseq/drosophila_embryo_e5_6", @@ -13730,8 +13077,7 @@ "peak_memory_mb": 3277, "disk_read_mb": 58, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/stereoseq/drosophila_embryo_e5_6", @@ -13751,8 +13097,7 @@ "peak_memory_mb": 19764, "disk_read_mb": 122, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/stereoseq/drosophila_embryo_e5_6", @@ -13772,8 +13117,7 @@ "peak_memory_mb": 1434, "disk_read_mb": 18, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/stereoseq/drosophila_embryo_e6_3", @@ -13793,8 +13137,7 @@ "peak_memory_mb": 2663, "disk_read_mb": 94, "disk_write_mb": 28 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/stereoseq/drosophila_embryo_e6_3", @@ -13814,8 +13157,7 @@ "peak_memory_mb": 38708, "disk_read_mb": 94, "disk_write_mb": 521 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/stereoseq/drosophila_embryo_e6_3", @@ -13835,8 +13177,7 @@ "peak_memory_mb": 3380, "disk_read_mb": 93, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/stereoseq/drosophila_embryo_e6_3", @@ -13856,8 +13197,7 @@ "peak_memory_mb": 38810, "disk_read_mb": 67, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/stereoseq/drosophila_embryo_e6_3", @@ -13877,8 +13217,7 @@ "peak_memory_mb": 772, "disk_read_mb": 28, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/stereoseq/drosophila_embryo_e6_3", @@ -13898,8 +13237,7 @@ "peak_memory_mb": 112026, "disk_read_mb": 111, "disk_write_mb": 61 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/stereoseq/drosophila_embryo_e6_3", @@ -13919,8 +13257,7 @@ "peak_memory_mb": "NA", "disk_read_mb": "NA", "disk_write_mb": "NA" - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/stereoseq/drosophila_embryo_e6_3", @@ -13940,8 +13277,7 @@ "peak_memory_mb": "NA", "disk_read_mb": "NA", "disk_write_mb": "NA" - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/stereoseq/drosophila_embryo_e6_3", @@ -13961,8 +13297,7 @@ "peak_memory_mb": 6247, "disk_read_mb": 110, "disk_write_mb": 2 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/stereoseq/drosophila_embryo_e6_3", @@ -13982,8 +13317,7 @@ "peak_memory_mb": 11264, "disk_read_mb": 55, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/stereoseq/drosophila_embryo_e6_3", @@ -14003,8 +13337,7 @@ "peak_memory_mb": 4199, "disk_read_mb": 226, "disk_write_mb": 63 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/stereoseq/drosophila_embryo_e6_3", @@ -14024,8 +13357,7 @@ "peak_memory_mb": 30720, "disk_read_mb": 146, "disk_write_mb": 106 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/stereoseq/drosophila_embryo_e6_3", @@ -14045,8 +13377,7 @@ "peak_memory_mb": 5940, "disk_read_mb": 41, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/stereoseq/drosophila_embryo_e6_3", @@ -14066,8 +13397,7 @@ "peak_memory_mb": 3277, "disk_read_mb": 56, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/stereoseq/drosophila_embryo_e6_3", @@ -14087,8 +13417,7 @@ "peak_memory_mb": 19661, "disk_read_mb": 121, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/stereoseq/drosophila_embryo_e6_3", @@ -14108,8 +13437,7 @@ "peak_memory_mb": 1434, "disk_read_mb": 18, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/stereoseq/drosophila_embryo_e7", @@ -14129,8 +13457,7 @@ "peak_memory_mb": 2663, "disk_read_mb": 93, "disk_write_mb": 28 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/stereoseq/drosophila_embryo_e7", @@ -14150,8 +13477,7 @@ "peak_memory_mb": 38605, "disk_read_mb": 93, "disk_write_mb": 521 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/stereoseq/drosophila_embryo_e7", @@ -14171,8 +13497,7 @@ "peak_memory_mb": 2253, "disk_read_mb": 91, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/stereoseq/drosophila_embryo_e7", @@ -14192,8 +13517,7 @@ "peak_memory_mb": 55092, "disk_read_mb": 65, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/stereoseq/drosophila_embryo_e7", @@ -14213,8 +13537,7 @@ "peak_memory_mb": 775, "disk_read_mb": 26, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/stereoseq/drosophila_embryo_e7", @@ -14234,8 +13557,7 @@ "peak_memory_mb": 111821, "disk_read_mb": 100, "disk_write_mb": 51 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/stereoseq/drosophila_embryo_e7", @@ -14255,8 +13577,7 @@ "peak_memory_mb": "NA", "disk_read_mb": "NA", "disk_write_mb": "NA" - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/stereoseq/drosophila_embryo_e7", @@ -14276,8 +13597,7 @@ "peak_memory_mb": "NA", "disk_read_mb": "NA", "disk_write_mb": "NA" - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/stereoseq/drosophila_embryo_e7", @@ -14297,8 +13617,7 @@ "peak_memory_mb": 6247, "disk_read_mb": 108, "disk_write_mb": 2 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/stereoseq/drosophila_embryo_e7", @@ -14318,8 +13637,7 @@ "peak_memory_mb": 11264, "disk_read_mb": 54, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/stereoseq/drosophila_embryo_e7", @@ -14339,8 +13657,7 @@ "peak_memory_mb": 4096, "disk_read_mb": 209, "disk_write_mb": 47 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/stereoseq/drosophila_embryo_e7", @@ -14360,8 +13677,7 @@ "peak_memory_mb": 29594, "disk_read_mb": 127, "disk_write_mb": 88 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/stereoseq/drosophila_embryo_e7", @@ -14381,8 +13697,7 @@ "peak_memory_mb": 1946, "disk_read_mb": 39, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/stereoseq/drosophila_embryo_e7", @@ -14402,8 +13717,7 @@ "peak_memory_mb": 3175, "disk_read_mb": 55, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/stereoseq/drosophila_embryo_e7", @@ -14423,8 +13737,7 @@ "peak_memory_mb": 19764, "disk_read_mb": 119, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/stereoseq/drosophila_embryo_e7", @@ -14444,8 +13757,7 @@ "peak_memory_mb": 778, "disk_read_mb": 18, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/stereoseq/drosophila_embryo_e9_1", @@ -14465,8 +13777,7 @@ "peak_memory_mb": 3994, "disk_read_mb": 93, "disk_write_mb": 28 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/stereoseq/drosophila_embryo_e9_1", @@ -14486,8 +13797,7 @@ "peak_memory_mb": 38605, "disk_read_mb": 93, "disk_write_mb": 521 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/stereoseq/drosophila_embryo_e9_1", @@ -14507,8 +13817,7 @@ "peak_memory_mb": 11162, "disk_read_mb": 91, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/stereoseq/drosophila_embryo_e9_1", @@ -14528,8 +13837,7 @@ "peak_memory_mb": 55194, "disk_read_mb": 65, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/stereoseq/drosophila_embryo_e9_1", @@ -14549,8 +13857,7 @@ "peak_memory_mb": 1434, "disk_read_mb": 27, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/stereoseq/drosophila_embryo_e9_1", @@ -14570,8 +13877,7 @@ "peak_memory_mb": 268391, "disk_read_mb": 102, "disk_write_mb": 53 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/stereoseq/drosophila_embryo_e9_1", @@ -14591,8 +13897,7 @@ "peak_memory_mb": "NA", "disk_read_mb": "NA", "disk_write_mb": "NA" - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/stereoseq/drosophila_embryo_e9_1", @@ -14612,8 +13917,7 @@ "peak_memory_mb": "NA", "disk_read_mb": "NA", "disk_write_mb": "NA" - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/stereoseq/drosophila_embryo_e9_1", @@ -14633,8 +13937,7 @@ "peak_memory_mb": 8807, "disk_read_mb": 108, "disk_write_mb": 2 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/stereoseq/drosophila_embryo_e9_1", @@ -14654,8 +13957,7 @@ "peak_memory_mb": 16896, "disk_read_mb": 54, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/stereoseq/drosophila_embryo_e9_1", @@ -14675,8 +13977,7 @@ "peak_memory_mb": 7680, "disk_read_mb": 426, "disk_write_mb": 144 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/stereoseq/drosophila_embryo_e9_1", @@ -14696,8 +13997,7 @@ "peak_memory_mb": 29799, "disk_read_mb": 131, "disk_write_mb": 92 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/stereoseq/drosophila_embryo_e9_1", @@ -14717,8 +14017,7 @@ "peak_memory_mb": 3277, "disk_read_mb": 40, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/stereoseq/drosophila_embryo_e9_1", @@ -14738,8 +14037,7 @@ "peak_memory_mb": 3380, "disk_read_mb": 55, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/stereoseq/drosophila_embryo_e9_1", @@ -14759,8 +14057,7 @@ "peak_memory_mb": 36557, "disk_read_mb": 120, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/stereoseq/drosophila_embryo_e9_1", @@ -14780,8 +14077,7 @@ "peak_memory_mb": 780, "disk_read_mb": 18, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/visium/human_heart_myocardial_infarction_1", @@ -14801,8 +14097,7 @@ "peak_memory_mb": "NA", "disk_read_mb": "NA", "disk_write_mb": "NA" - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/visium/human_heart_myocardial_infarction_1", @@ -14822,8 +14117,7 @@ "peak_memory_mb": 39220, "disk_read_mb": 181, "disk_write_mb": 521 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/visium/human_heart_myocardial_infarction_1", @@ -14843,8 +14137,7 @@ "peak_memory_mb": 6759, "disk_read_mb": 180, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/visium/human_heart_myocardial_infarction_1", @@ -14864,8 +14157,7 @@ "peak_memory_mb": 62157, "disk_read_mb": 154, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/visium/human_heart_myocardial_infarction_1", @@ -14885,8 +14177,7 @@ "peak_memory_mb": 776, "disk_read_mb": 115, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/visium/human_heart_myocardial_infarction_1", @@ -14906,8 +14197,7 @@ "peak_memory_mb": 39424, "disk_read_mb": 334, "disk_write_mb": 197 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/visium/human_heart_myocardial_infarction_1", @@ -14927,8 +14217,7 @@ "peak_memory_mb": 6861, "disk_read_mb": 180, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/visium/human_heart_myocardial_infarction_1", @@ -14948,8 +14237,7 @@ "peak_memory_mb": 3175, "disk_read_mb": 143, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/visium/human_heart_myocardial_infarction_1", @@ -14969,8 +14257,7 @@ "peak_memory_mb": 10036, "disk_read_mb": 197, "disk_write_mb": 2 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/visium/human_heart_myocardial_infarction_1", @@ -14990,8 +14277,7 @@ "peak_memory_mb": 1434, "disk_read_mb": 142, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/visium/human_heart_myocardial_infarction_1", @@ -15011,8 +14297,7 @@ "peak_memory_mb": 9114, "disk_read_mb": 390, "disk_write_mb": 45 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/visium/human_heart_myocardial_infarction_1", @@ -15032,8 +14317,7 @@ "peak_memory_mb": 59392, "disk_read_mb": 487, "disk_write_mb": 360 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/visium/human_heart_myocardial_infarction_1", @@ -15053,8 +14337,7 @@ "peak_memory_mb": 3584, "disk_read_mb": 128, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/visium/human_heart_myocardial_infarction_1", @@ -15074,8 +14357,7 @@ "peak_memory_mb": 5120, "disk_read_mb": 143, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/visium/human_heart_myocardial_infarction_1", @@ -15095,8 +14377,7 @@ "peak_memory_mb": 20788, "disk_read_mb": 208, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/visium/human_heart_myocardial_infarction_1", @@ -15116,8 +14397,7 @@ "peak_memory_mb": 2868, "disk_read_mb": 19, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/visium/human_heart_myocardial_infarction_2", @@ -15137,8 +14417,7 @@ "peak_memory_mb": "NA", "disk_read_mb": "NA", "disk_write_mb": "NA" - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/visium/human_heart_myocardial_infarction_2", @@ -15158,8 +14437,7 @@ "peak_memory_mb": 39220, "disk_read_mb": 162, "disk_write_mb": 521 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/visium/human_heart_myocardial_infarction_2", @@ -15179,8 +14457,7 @@ "peak_memory_mb": 11367, "disk_read_mb": 160, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/visium/human_heart_myocardial_infarction_2", @@ -15200,8 +14477,7 @@ "peak_memory_mb": 46080, "disk_read_mb": 134, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/visium/human_heart_myocardial_infarction_2", @@ -15221,8 +14497,7 @@ "peak_memory_mb": 2970, "disk_read_mb": 95, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/visium/human_heart_myocardial_infarction_2", @@ -15242,8 +14517,7 @@ "peak_memory_mb": 116532, "disk_read_mb": 326, "disk_write_mb": 208 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/visium/human_heart_myocardial_infarction_2", @@ -15263,8 +14537,7 @@ "peak_memory_mb": 6759, "disk_read_mb": 160, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/visium/human_heart_myocardial_infarction_2", @@ -15284,8 +14557,7 @@ "peak_memory_mb": 3380, "disk_read_mb": 123, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/visium/human_heart_myocardial_infarction_2", @@ -15305,8 +14577,7 @@ "peak_memory_mb": 10036, "disk_read_mb": 177, "disk_write_mb": 2 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/visium/human_heart_myocardial_infarction_2", @@ -15326,8 +14597,7 @@ "peak_memory_mb": 11572, "disk_read_mb": 123, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/visium/human_heart_myocardial_infarction_2", @@ -15347,8 +14617,7 @@ "peak_memory_mb": 2048, "disk_read_mb": 246, "disk_write_mb": 44 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/visium/human_heart_myocardial_infarction_2", @@ -15368,8 +14637,7 @@ "peak_memory_mb": 62055, "disk_read_mb": 489, "disk_write_mb": 381 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/visium/human_heart_myocardial_infarction_2", @@ -15389,8 +14657,7 @@ "peak_memory_mb": 5735, "disk_read_mb": 109, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/visium/human_heart_myocardial_infarction_2", @@ -15410,8 +14677,7 @@ "peak_memory_mb": 5325, "disk_read_mb": 124, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/visium/human_heart_myocardial_infarction_2", @@ -15431,8 +14697,7 @@ "peak_memory_mb": 20890, "disk_read_mb": 189, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial/visium/human_heart_myocardial_infarction_2", @@ -15452,8 +14717,7 @@ "peak_memory_mb": 780, "disk_read_mb": 19, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial_slidetags/slidetags/human_cortex", @@ -15473,8 +14737,7 @@ "peak_memory_mb": "NA", "disk_read_mb": "NA", "disk_write_mb": "NA" - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial_slidetags/slidetags/human_cortex", @@ -15494,8 +14757,7 @@ "peak_memory_mb": 39117, "disk_read_mb": 130, "disk_write_mb": 521 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial_slidetags/slidetags/human_cortex", @@ -15515,8 +14777,7 @@ "peak_memory_mb": 12596, "disk_read_mb": 128, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial_slidetags/slidetags/human_cortex", @@ -15536,8 +14797,7 @@ "peak_memory_mb": 61850, "disk_read_mb": 102, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial_slidetags/slidetags/human_cortex", @@ -15557,8 +14817,7 @@ "peak_memory_mb": 1434, "disk_read_mb": 63, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial_slidetags/slidetags/human_cortex", @@ -15578,8 +14837,7 @@ "peak_memory_mb": 280372, "disk_read_mb": 272, "disk_write_mb": 186 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial_slidetags/slidetags/human_cortex", @@ -15599,8 +14857,7 @@ "peak_memory_mb": 6656, "disk_read_mb": 128, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial_slidetags/slidetags/human_cortex", @@ -15620,8 +14877,7 @@ "peak_memory_mb": 5530, "disk_read_mb": 91, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial_slidetags/slidetags/human_cortex", @@ -15641,8 +14897,7 @@ "peak_memory_mb": 9626, "disk_read_mb": 145, "disk_write_mb": 2 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial_slidetags/slidetags/human_cortex", @@ -15662,8 +14917,7 @@ "peak_memory_mb": 2048, "disk_read_mb": 91, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial_slidetags/slidetags/human_cortex", @@ -15683,8 +14937,7 @@ "peak_memory_mb": 7885, "disk_read_mb": 334, "disk_write_mb": 40 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial_slidetags/slidetags/human_cortex", @@ -15704,8 +14957,7 @@ "peak_memory_mb": 58164, "disk_read_mb": 419, "disk_write_mb": 343 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial_slidetags/slidetags/human_cortex", @@ -15725,8 +14977,7 @@ "peak_memory_mb": 3380, "disk_read_mb": 76, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial_slidetags/slidetags/human_cortex", @@ -15746,8 +14997,7 @@ "peak_memory_mb": 4813, "disk_read_mb": 92, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial_slidetags/slidetags/human_cortex", @@ -15767,8 +15017,7 @@ "peak_memory_mb": 20480, "disk_read_mb": 156, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial_slidetags/slidetags/human_cortex", @@ -15788,8 +15037,7 @@ "peak_memory_mb": 1434, "disk_read_mb": 19, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial_slidetags/slidetags/human_skin_melanoma", @@ -15809,8 +15057,7 @@ "peak_memory_mb": "NA", "disk_read_mb": "NA", "disk_write_mb": "NA" - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial_slidetags/slidetags/human_skin_melanoma", @@ -15830,8 +15077,7 @@ "peak_memory_mb": 46695, "disk_read_mb": 108, "disk_write_mb": 521 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial_slidetags/slidetags/human_skin_melanoma", @@ -15851,8 +15097,7 @@ "peak_memory_mb": 12493, "disk_read_mb": 106, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial_slidetags/slidetags/human_skin_melanoma", @@ -15872,8 +15117,7 @@ "peak_memory_mb": 64205, "disk_read_mb": 80, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial_slidetags/slidetags/human_skin_melanoma", @@ -15893,8 +15137,7 @@ "peak_memory_mb": 1434, "disk_read_mb": 41, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial_slidetags/slidetags/human_skin_melanoma", @@ -15914,8 +15157,7 @@ "peak_memory_mb": 219956, "disk_read_mb": 282, "disk_write_mb": 218 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial_slidetags/slidetags/human_skin_melanoma", @@ -15935,8 +15177,7 @@ "peak_memory_mb": 6656, "disk_read_mb": 106, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial_slidetags/slidetags/human_skin_melanoma", @@ -15956,8 +15197,7 @@ "peak_memory_mb": 5530, "disk_read_mb": 69, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial_slidetags/slidetags/human_skin_melanoma", @@ -15977,8 +15217,7 @@ "peak_memory_mb": 9421, "disk_read_mb": 123, "disk_write_mb": 2 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial_slidetags/slidetags/human_skin_melanoma", @@ -15998,8 +15237,7 @@ "peak_memory_mb": 16896, "disk_read_mb": 69, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial_slidetags/slidetags/human_skin_melanoma", @@ -16019,8 +15257,7 @@ "peak_memory_mb": 8500, "disk_read_mb": 316, "disk_write_mb": 47 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial_slidetags/slidetags/human_skin_melanoma", @@ -16040,8 +15277,7 @@ "peak_memory_mb": 114791, "disk_read_mb": 460, "disk_write_mb": 405 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial_slidetags/slidetags/human_skin_melanoma", @@ -16061,8 +15297,7 @@ "peak_memory_mb": 6042, "disk_read_mb": 54, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial_slidetags/slidetags/human_skin_melanoma", @@ -16082,8 +15317,7 @@ "peak_memory_mb": 5530, "disk_read_mb": 70, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial_slidetags/slidetags/human_skin_melanoma", @@ -16103,8 +15337,7 @@ "peak_memory_mb": 21504, "disk_read_mb": 134, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial_slidetags/slidetags/human_skin_melanoma", @@ -16124,8 +15357,7 @@ "peak_memory_mb": 1434, "disk_read_mb": 19, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial_slidetags/slidetags/human_tonsil", @@ -16145,8 +15377,7 @@ "peak_memory_mb": "NA", "disk_read_mb": "NA", "disk_write_mb": "NA" - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial_slidetags/slidetags/human_tonsil", @@ -16166,8 +15397,7 @@ "peak_memory_mb": 39527, "disk_read_mb": 154, "disk_write_mb": 521 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial_slidetags/slidetags/human_tonsil", @@ -16187,8 +15417,7 @@ "peak_memory_mb": 11264, "disk_read_mb": 153, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial_slidetags/slidetags/human_tonsil", @@ -16208,8 +15437,7 @@ "peak_memory_mb": 66765, "disk_read_mb": 127, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial_slidetags/slidetags/human_tonsil", @@ -16229,8 +15457,7 @@ "peak_memory_mb": 1434, "disk_read_mb": 88, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial_slidetags/slidetags/human_tonsil", @@ -16250,8 +15477,7 @@ "peak_memory_mb": 222004, "disk_read_mb": 372, "disk_write_mb": 262 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial_slidetags/slidetags/human_tonsil", @@ -16271,8 +15497,7 @@ "peak_memory_mb": 6759, "disk_read_mb": 153, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial_slidetags/slidetags/human_tonsil", @@ -16292,8 +15517,7 @@ "peak_memory_mb": 5632, "disk_read_mb": 116, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial_slidetags/slidetags/human_tonsil", @@ -16313,8 +15537,7 @@ "peak_memory_mb": 9933, "disk_read_mb": 169, "disk_write_mb": 2 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial_slidetags/slidetags/human_tonsil", @@ -16334,8 +15557,7 @@ "peak_memory_mb": 2151, "disk_read_mb": 115, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial_slidetags/slidetags/human_tonsil", @@ -16355,8 +15577,7 @@ "peak_memory_mb": 5018, "disk_read_mb": 359, "disk_write_mb": 57 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial_slidetags/slidetags/human_tonsil", @@ -16376,8 +15597,7 @@ "peak_memory_mb": 103015, "disk_read_mb": 588, "disk_write_mb": 487 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial_slidetags/slidetags/human_tonsil", @@ -16397,8 +15617,7 @@ "peak_memory_mb": 9216, "disk_read_mb": 101, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial_slidetags/slidetags/human_tonsil", @@ -16418,8 +15637,7 @@ "peak_memory_mb": 6759, "disk_read_mb": 116, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial_slidetags/slidetags/human_tonsil", @@ -16439,8 +15657,7 @@ "peak_memory_mb": 21607, "disk_read_mb": 181, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial_slidetags/slidetags/human_tonsil", @@ -16460,8 +15677,7 @@ "peak_memory_mb": 1434, "disk_read_mb": 19, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial_slidetags/slidetags/mouse_embryo", @@ -16481,8 +15697,7 @@ "peak_memory_mb": "NA", "disk_read_mb": "NA", "disk_write_mb": "NA" - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial_slidetags/slidetags/mouse_embryo", @@ -16502,8 +15717,7 @@ "peak_memory_mb": 39220, "disk_read_mb": 126, "disk_write_mb": 521 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial_slidetags/slidetags/mouse_embryo", @@ -16523,8 +15737,7 @@ "peak_memory_mb": 11469, "disk_read_mb": 124, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial_slidetags/slidetags/mouse_embryo", @@ -16544,8 +15757,7 @@ "peak_memory_mb": 62669, "disk_read_mb": 98, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial_slidetags/slidetags/mouse_embryo", @@ -16565,8 +15777,7 @@ "peak_memory_mb": 2765, "disk_read_mb": 59, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial_slidetags/slidetags/mouse_embryo", @@ -16586,8 +15797,7 @@ "peak_memory_mb": 222208, "disk_read_mb": 292, "disk_write_mb": 210 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial_slidetags/slidetags/mouse_embryo", @@ -16607,8 +15817,7 @@ "peak_memory_mb": 6656, "disk_read_mb": 124, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial_slidetags/slidetags/mouse_embryo", @@ -16628,8 +15837,7 @@ "peak_memory_mb": 6861, "disk_read_mb": 87, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial_slidetags/slidetags/mouse_embryo", @@ -16649,8 +15857,7 @@ "peak_memory_mb": 9728, "disk_read_mb": 141, "disk_write_mb": 2 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial_slidetags/slidetags/mouse_embryo", @@ -16670,8 +15877,7 @@ "peak_memory_mb": 1946, "disk_read_mb": 87, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial_slidetags/slidetags/mouse_embryo", @@ -16691,8 +15897,7 @@ "peak_memory_mb": 11162, "disk_read_mb": 390, "disk_write_mb": 45 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial_slidetags/slidetags/mouse_embryo", @@ -16712,8 +15917,7 @@ "peak_memory_mb": 63079, "disk_read_mb": 462, "disk_write_mb": 390 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial_slidetags/slidetags/mouse_embryo", @@ -16733,8 +15937,7 @@ "peak_memory_mb": 8909, "disk_read_mb": 73, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial_slidetags/slidetags/mouse_embryo", @@ -16754,8 +15957,7 @@ "peak_memory_mb": 13005, "disk_read_mb": 88, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial_slidetags/slidetags/mouse_embryo", @@ -16775,8 +15977,7 @@ "peak_memory_mb": 20890, "disk_read_mb": 152, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } }, { "dataset_id": "zenodo_spatial_slidetags/slidetags/mouse_embryo", @@ -16796,7 +15997,6 @@ "peak_memory_mb": 1434, "disk_read_mb": 19, "disk_write_mb": 1 - }, - "task_id": "task_spatially_variable_genes" + } } ] diff --git a/results/spatially_variable_genes/data/task_info.json b/results/spatially_variable_genes/data/task_info.json index 82ce9800e..c2ff01b8e 100644 --- a/results/spatially_variable_genes/data/task_info.json +++ b/results/spatially_variable_genes/data/task_info.json @@ -3,8 +3,9 @@ "commit_sha": null, "task_name": "Spatially Variable Genes", "task_summary": "Spatially variable genes (SVGs) are genes whose expression levels vary significantly across different spatial regions within a tissue or across cells in a spatially structured context.", - "task_description": "\n\nRecent years have witnessed significant progress in spatially-resolved transcriptome profiling techniques that simultaneously characterize cellular gene expression and their physical position, generating spatial transcriptomic (ST) data. The application of these techniques has dramatically advanced our understanding of disease and developmental biology. One common task for all ST profiles, regardless of the employed protocols, is to identify genes that exhibit spatial patterns. These genes, defined as spatially variable genes (SVGs), contain additional information about the spatial structure of the tissues of interest, compared to highly variable genes (HVGs).\n\nIdentification of spatially variable genes is crucial to for studying spatial domains within tissue microenvironmnets, developmental gradients and cell signaling pathways. In this task we attempt to evaluate various methods for detecting SVGs using a number of realistic simulated datasets with diverse patterns derived from real-world spatial transcriptomics data using scDesign3. Synthetic data is generated by mixing a Gaussian Process (GP) model and a non-spatial model (obtained by shuffling mean parameters of the GP model to remove spatial correlation between spots) to generate gene expressions with various spatial variability. For more details, please refer to our [manuscript](https://www.biorxiv.org/content/10.1101/2023.12.02.569717v1) and [Github](https://github.com/pinellolab/SVG_Benchmarking).\n", - "repo": "openproblems-bio/openproblems", + "task_description": "Recent years have witnessed significant progress in spatially-resolved transcriptome profiling techniques that simultaneously characterize cellular gene expression and their physical position, generating spatial transcriptomic (ST) data. The application of these techniques has dramatically advanced our understanding of disease and developmental biology. One common task for all ST profiles, regardless of the employed protocols, is to identify genes that exhibit spatial patterns. These genes, defined as spatially variable genes (SVGs), contain additional information about the spatial structure of the tissues of interest, compared to highly variable genes (HVGs).\n\nIdentification of spatially variable genes is crucial to for studying spatial domains within tissue microenvironmnets, developmental gradients and cell signaling pathways. In this task we attempt to evaluate various methods for detecting SVGs using a number of realistic simulated datasets with diverse patterns derived from real-world spatial transcriptomics data using scDesign3. Synthetic data is generated by mixing a Gaussian Process (GP) model and a non-spatial model (obtained by shuffling mean parameters of the GP model to remove spatial correlation between spots) to generate gene expressions with various spatial variability. For more details, please refer to our [manuscript](https://www.biorxiv.org/content/10.1101/2023.12.02.569717v1) and [Github](https://github.com/pinellolab/SVG_Benchmarking).\n", + "repo": "https://github.com/openproblems-bio/task_spatially_variable_genes", + "issue_tracker": "https://github.com/openproblems-bio/task_spatially_variable_genes/issues", "authors": [ { "name": "Zhijian Li", @@ -62,5 +63,7 @@ "orcid": "0009-0003-6319-9803" } } - ] + ], + "version": "build_main", + "license": "MIT" } diff --git a/results/spatially_variable_genes/index.qmd b/results/spatially_variable_genes/index.qmd index a2f9d7e3c..e7acedd38 100644 --- a/results/spatially_variable_genes/index.qmd +++ b/results/spatially_variable_genes/index.qmd @@ -1,12 +1,15 @@ --- title: "Spatially Variable Genes" -subtitle: "Detecting genes whose expression levels vary across spatial regions." +subtitle: "Spatially variable genes (SVGs) are genes whose expression levels vary significantly across different spatial regions within a tissue or across cells in a spatially structured context." image: thumbnail.svg page-layout: full css: ../_include/task_template.css engine: knitr fig-cap-location: bottom citation-location: document +bibliography: + - library.bib + - ../../library.bib toc: false ---