diff --git a/.vscode/settings.json b/.vscode/settings.json index e662fc6472..7695e2a406 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,10 +1,4 @@ { "yaml.schemas": { - "src/common/schemas/api_component.yaml": "src/**/api/comp_*.yaml", - "src/common/schemas/api_file.yaml": "src/**/api/file_*.yaml", - "src/common/schemas/task_info.yaml": "src/**/api/task_info.yaml", - "src/common/schemas/task_method.yaml": "src/tasks/**/methods/**/config.vsh.yaml", - "src/common/schemas/task_control_method.yaml": "src/tasks/**/control_methods/**/config.vsh.yaml", - "src/common/schemas/task_metric.yaml": "src/tasks/**/metrics/**/config.vsh.yaml" } } \ No newline at end of file diff --git a/src/common/check_dataset_schema/config.vsh.yaml b/src/common/check_dataset_schema/config.vsh.yaml deleted file mode 100644 index 08449c3e7d..0000000000 --- a/src/common/check_dataset_schema/config.vsh.yaml +++ /dev/null @@ -1,45 +0,0 @@ -functionality: - name: check_dataset_schema - namespace: common - description: Checks if the dataset has the necessary slots that are predefined in a schema. - argument_groups: - - name: Inputs - arguments: - - name: --input - type: file - required: true - description: A h5ad file. - - name: --schema - type: file - required: true - description: A schema file for the h5ad object. - - name: Arguments - arguments: - - name: --stop_on_error - type: boolean - default: false - description: Whether or not to stop with exit code 1 if the input file does not adhere to the schema. - - name: Output - arguments: - - name: --output - type: file - required: true - description: If specified, this file will contain a structured log of which checks succeeded (or not). - example: checks.json - direction: output - resources: - - type: python_script - path: script.py - test_resources: - - path: /resources_test/common/pancreas - - type: python_script - path: test.py -platforms: - - type: docker - image: openproblems/base_python:1.0.0 - test_setup: - - type: python - packages: viashpy - - type: nextflow - directives: - label: [midtime, midmem, midcpu] diff --git a/src/common/check_dataset_schema/script.py b/src/common/check_dataset_schema/script.py deleted file mode 100644 index cd84f9cdcf..0000000000 --- a/src/common/check_dataset_schema/script.py +++ /dev/null @@ -1,60 +0,0 @@ -import anndata as ad -import yaml -import json - -## VIASH START -par = { - 'input': 'work/d4/f4fabc8aa4f2308841d4ab57bcff62/_viash_par/input_1/dataset.h5ad', - 'schema': 'work/d4/f4fabc8aa4f2308841d4ab57bcff62/_viash_par/schema_1/schema.yaml', - 'stop_on_error': False, - 'output': 'work/d4/f4fabc8aa4f2308841d4ab57bcff62/out.yaml', -} -## VIASH END - -def check_structure(slot, slot_info, adata_slot): - missing = [] - if slot == "X": - slot_info["name"] = "X" - slot_info = [slot_info] - for obj in slot_info: - adata_data = adata_slot.get(obj['name']) if slot != 'X' else adata_slot - if obj.get('required') and adata_data is None: - missing.append(obj['name']) - # todo: check types - return missing - -print('Load data', flush=True) -adata = ad.read_h5ad(par['input']) - -# create data structure -out = { - "exit_code": 0, - "error": {}, - "data_schema": "ok" -} - -print("Check AnnData against schema", flush=True) -with open(par["schema"], "r") as f: - data_struct = yaml.safe_load(f) - -def_slots = data_struct['info']['slots'] - -out = { - "exit_code": 0, - "error": {}, - "data_schema": "ok" -} -for slot in def_slots: - print("Checking slot", slot, flush=True) - missing = check_structure(slot, def_slots[slot], getattr(adata, slot)) - if missing: - print(f"Dataset is missing {slot} {missing}", flush=True) - out['exit_code'] = 1 - out['data_schema'] = 'not ok' - out['error'][slot] = missing - -with open(par["output"], "w") as f: - json.dump(out, f, indent=2) - -if par['stop_on_error']: - exit(out['exit_code']) diff --git a/src/common/check_yaml_schema/config.vsh.yaml b/src/common/check_yaml_schema/config.vsh.yaml deleted file mode 100644 index b87bec5429..0000000000 --- a/src/common/check_yaml_schema/config.vsh.yaml +++ /dev/null @@ -1,26 +0,0 @@ -functionality: - name: check_yaml_schema - namespace: common - description: Checks if a YAML file adheres to a custom schema file. - argument_groups: - - name: Inputs - arguments: - - name: --input - type: file - required: true - description: A yaml file. - - name: --schema - type: file - required: true - description: A schema file for the yaml file. - resources: - - type: python_script - path: script.py -platforms: - - type: docker - image: openproblems/base_python:1.0.0 - setup: - - type: python - pypi: - - jsonschema - - type: nextflow diff --git a/src/common/create_component/config.vsh.yaml b/src/common/create_component/config.vsh.yaml deleted file mode 100644 index 58303a1ca8..0000000000 --- a/src/common/create_component/config.vsh.yaml +++ /dev/null @@ -1,72 +0,0 @@ -functionality: - name: create_component - status: disabled - namespace: common - description: | - Create a component Viash component. - - Usage: - ``` - bin/create_component --task denoising --type method --language r --name foo - bin/create_component --task denoising --type metric --language python --name bar - ``` - arguments: - - type: string - name: --task - description: Which task the component will be added to. - example: denoising - - type: string - name: --type - example: metric - description: The type of component to create. Typically must be one of 'method', 'control_method' or 'metric'. - - type: string - name: --language - description: Which scripting language to use. Options are 'python', 'r'. - default: python - choices: [python, r] - - type: string - name: --name - example: new_comp - description: Name of the new method, formatted in snake case. - - type: file - name: --output - direction: output - # required: true - description: Path to the component directory. Suggested location is `src//s/`. - default: src/tasks/${VIASH_PAR_TASK}/${VIASH_PAR_TYPE}s/${VIASH_PAR_NAME} - - type: file - name: --api_file - description: | - Which API file to use. Defaults to `src//api/comp_.yaml`. - In tasks with different subtypes of method, this location might not exist and you might need - to manually specify a different API file to inherit from. - must_exist: false - # required: true - default: src/tasks/${VIASH_PAR_TASK}/api/comp_${VIASH_PAR_TYPE}.yaml - - type: file - name: --viash_yaml - description: | - Path to the project config file. Needed for knowing the relative location of a file to the project root. - # required: true - default: "_viash.yaml" - resources: - - type: python_script - path: script.py - - path: /src/common/helper_functions/read_and_merge_yaml.py - test_resources: - - type: python_script - path: test.py - - path: /src - dest: openproblems/src - - path: /_viash.yaml - dest: openproblems/_viash.yaml -platforms: - - type: docker - image: python:3.10-slim - setup: - - type: python - pypi: ruamel.yaml - - type: native - - type: nextflow - - diff --git a/src/common/create_component/script.sh b/src/common/create_component/script.sh deleted file mode 100755 index 9fef9ef3a7..0000000000 --- a/src/common/create_component/script.sh +++ /dev/null @@ -1,5 +0,0 @@ -TASK=dimensionality_reduction -viash run src/common/create_component/config.vsh.yaml -- --task $TASK --type metric --name foor --language r -viash run src/common/create_component/config.vsh.yaml -- --task $TASK --type method --name foor --language r -viash run src/common/create_component/config.vsh.yaml -- --task $TASK --type method --name foopy -viash run src/common/create_component/config.vsh.yaml -- --task $TASK --type metric --name foopy \ No newline at end of file diff --git a/src/common/create_component/test.py b/src/common/create_component/test.py deleted file mode 100644 index a53e54a18e..0000000000 --- a/src/common/create_component/test.py +++ /dev/null @@ -1,52 +0,0 @@ -import os -import subprocess -from os import path -from ruamel.yaml import YAML - -## VIASH START -meta = { - 'executable': 'foo' -} -## VIASH END - -opv2 = f"{meta['resources_dir']}/openproblems" -output_path = f"{opv2}/src/tasks/label_projection/methods/test_method" - -cmd = [ - meta['executable'], - '--task', 'label_projection', - '--type', 'method', - '--name', 'test_method', - '--language', 'python' -] - -print('>> Running the script as test', flush=True) -out = subprocess.run(cmd, stderr=subprocess.STDOUT, cwd=opv2) - -if out.stdout: - print(out.stdout) - -if out.returncode: - print(f"script: '{cmd}' exited with an error.") - exit(out.returncode) - -print('>> Checking whether output files exist', flush=True) -assert os.path.exists(output_path), "Output dir does not exist" - -conf_f = path.join(output_path, 'config.vsh.yaml') -assert os.path.exists(conf_f), "Config file does not exist" - -script_f = path.join(output_path, "script.py") -assert os.path.exists(script_f), "Script file does not exist" - -print('>> Checking file contents', flush=True) -yaml = YAML(typ='safe', pure=True) -with open(conf_f) as f: - conf_data = yaml.load(f) - -assert conf_data['functionality']['name'] == 'test_method', "Name should be equal to 'test_method'" -# assert conf_data['platforms'][0]['image'] == 'python:3.10', "Python image should be equal to python:3.10" - - -print('All checks succeeded!', flush=True) - diff --git a/src/common/create_task_readme/config.vsh.yaml b/src/common/create_task_readme/config.vsh.yaml deleted file mode 100644 index 273e196ffb..0000000000 --- a/src/common/create_task_readme/config.vsh.yaml +++ /dev/null @@ -1,70 +0,0 @@ -functionality: - name: create_task_readme - status: disabled - namespace: common - description: | - Create a README for the task. - argument_groups: - - name: Inputs - arguments: - - type: string - name: --task - description: Which task the component will be added to. - example: denoising - required: false - - type: file - name: --task_dir - description: Path to the task directory. - default: src/tasks/${VIASH_PAR_TASK} - required: false - - type: file - name: --viash_yaml - description: | - Path to the project config file. Needed for knowing the relative location of a file to the project root. - default: "_viash.yaml" - - type: string - name: --github_url - description: | - URL to the GitHub repository. Needed for linking to the source code. - default: "https://github.com/openproblems-bio/openproblems/tree/main/" - - name: Outputs - arguments: - - type: file - name: --output - direction: output - description: Path to the component directory. Suggested location is `src/tasks//README.md`. - default: src/tasks/${VIASH_PAR_TASK}/README.md - resources: - - type: r_script - path: script.R - - path: /src/common/helper_functions/read_and_merge_yaml.R - - path: /src/common/helper_functions/read_api_files.R - - path: /src/common/helper_functions/strip_margin.R - test_resources: - - type: r_script - path: test.R - - path: /src - dest: openproblems/src - - path: /_viash.yaml - dest: openproblems/_viash.yaml -platforms: - - type: docker - image: openproblems/base_r:1.0.0 - setup: - - type: r - packages: [dplyr, purrr, rlang, glue, yaml, fs, cli, igraph, rmarkdown, processx] - - type: apt - packages: [jq, curl] - - type: docker - # download and install quarto-*-linux-amd64.deb from latest release - run: | - release_info=$(curl -s https://api.github.com/repos/quarto-dev/quarto-cli/releases/latest) && \ - download_url=$(printf "%s" "$release_info" | jq -r '.assets[] | select(.name | test("quarto-.*-linux-amd64.deb")) | .browser_download_url') && \ - curl -sL "$download_url" -o /opt/quarto.deb && \ - dpkg -i /opt/quarto.deb && \ - rm /opt/quarto.deb - - type: native - - type: nextflow - directives: - label: [midtime, lowmem, lowcpu] - diff --git a/src/common/create_task_readme/render_all.sh b/src/common/create_task_readme/render_all.sh deleted file mode 100755 index e44195c1ed..0000000000 --- a/src/common/create_task_readme/render_all.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!/bin/bash - -set -e - -TASK_IDS=`ls src/tasks` - -for task_id in $TASK_IDS; do - echo ">> Processing $task_id" - viash run src/common/create_task_readme/config.vsh.yaml -- --task $task_id -done \ No newline at end of file diff --git a/src/common/create_task_readme/script.R b/src/common/create_task_readme/script.R deleted file mode 100644 index 35320e4d97..0000000000 --- a/src/common/create_task_readme/script.R +++ /dev/null @@ -1,134 +0,0 @@ -library(rlang, quietly = TRUE, warn.conflicts = FALSE) -library(purrr, quietly = TRUE, warn.conflicts = FALSE) -library(dplyr, quietly = TRUE, warn.conflicts = FALSE) - -## VIASH START -par <- list( - "task" = "batch_integration", - "task_dir" = "src/tasks/batch_integration", - "output" = "src/tasks/batch_integration/README.md", - "viash_yaml" = "_viash.yaml", - "github_url" = "https://github.com/openproblems-bio/openproblems/tree/main/" -) -meta <- list( - "resources_dir" = "src/common/helper_functions", - "temp_dir" = "temp/" -) -## VIASH END - -if (is.null(par$task) && is.null(par$task_dir)) { - stop("Either 'task' or 'task_dir' must be provided") -} -if (is.null(par$viash_yaml)) { - stop("Argument 'viash_yaml' must be provided") -} -if (is.null(par$output)) { - stop("Argument 'output' must be provided") -} - -# import helper function -source(paste0(meta["resources_dir"], "/read_and_merge_yaml.R")) -source(paste0(meta["resources_dir"], "/strip_margin.R")) -source(paste0(meta["resources_dir"], "/read_api_files.R")) - -cat("Read task info\n") -task_api <- read_task_api(par[["task_dir"]]) - -# determine ordering -root <- .task_graph_get_root(task_api) - -r_graph <- render_task_graph(task_api, root) - -cat("Render API details\n") -order <- names(igraph::bfs(task_api$task_graph, root)$order) -r_details <- map_chr( - order, - function(file_name) { - if (file_name %in% names(task_api$comp_specs)) { - render_component(task_api$comp_specs[[file_name]]) - } else { - render_file(task_api$file_specs[[file_name]]) - } - } -) - -cat("Render authors\n") -authors_str <- - if (nrow(task_api$authors) > 0) { - paste0( - "\n## Authors & contributors\n\n", - task_api$authors %>% knitr::kable() %>% paste(collapse = "\n"), - "\n" - ) - } else { - "" - } -readme_str <- - if (is.null(task_api$task_info$readme) || is.na(task_api$task_info$readme)) { - "" - } else { - paste0( - "\n## README\n\n", - task_api$task_info$readme, - "\n" - ) - } - -cat("Generate qmd content\n") -relative_path <- par[["task_dir"]] %>% - gsub(paste0(dirname(par[["viash_yaml"]]), "/*"), "", .) %>% - gsub("/*$", "", .) -source_url <- paste0(par[["github_url"]], relative_path) -qmd_content <- strip_margin(glue::glue(" - §--- - §title: \"{task_api$task_info$label}\" - §format: gfm - §--- - § - § - § - §{task_api$task_info$summary} - § - §Path to source: [`{relative_path}`]({source_url}) - § - §{readme_str} - § - §## Motivation - § - §{task_api$task_info$motivation} - § - §## Description - § - §{task_api$task_info$description} - §{authors_str} - §## API - § - §{r_graph} - § - §{paste(r_details, collapse = '\n\n')} - § - §"), symbol = "§") - -cat("Write README.qmd to file\n") -qmd_file <- tempfile( - pattern = "README_", - fileext = ".qmd", - tmpdir = meta$temp_dir -) - -if (!dir.exists(meta$temp_dir)) { - dir.create(meta$temp_dir, recursive = TRUE) -} -writeLines(qmd_content, qmd_file) - -cat("Render README.qmd to README.md\n") -out <- processx::run( - command = "quarto", - args = c("render", qmd_file, "--output", "-"), - echo = TRUE -) - -writeLines(out$stdout, par$output) diff --git a/src/common/create_task_readme/test.R b/src/common/create_task_readme/test.R deleted file mode 100644 index 3a981fe7ca..0000000000 --- a/src/common/create_task_readme/test.R +++ /dev/null @@ -1,30 +0,0 @@ -requireNamespace("assertthat", quietly = TRUE) - -## VIASH START -## VIASH END - -opv2 <- paste0(meta$resources_dir, "/openproblems") -output_path <- "output.md" - -cat(">> Running the script as test\n") -system(paste( - meta["executable"], - "--task", "label_projection", - "--output", output_path, - "--task_dir", paste0(opv2, "/src/tasks/label_projection"), - "--viash_yaml", paste0(opv2, "/_viash.yaml") -)) - -cat(">> Checking whether output files exist\n") -assertthat::assert_that(file.exists(output_path)) - -cat(">> Checking file contents\n") -lines <- readLines(output_path) -assertthat::assert_that(any(grepl("# Label projection", lines))) -assertthat::assert_that(any(grepl("# Description", lines))) -assertthat::assert_that(any(grepl("# Motivation", lines))) -assertthat::assert_that(any(grepl("# Authors", lines))) -assertthat::assert_that(any(grepl("flowchart LR", lines))) -assertthat::assert_that(any(grepl("# File format:", lines))) - -cat("All checks succeeded!\n") diff --git a/src/common/extract_metadata/config.vsh.yaml b/src/common/extract_metadata/config.vsh.yaml deleted file mode 100644 index 76e73cb975..0000000000 --- a/src/common/extract_metadata/config.vsh.yaml +++ /dev/null @@ -1,40 +0,0 @@ -functionality: - name: extract_metadata - namespace: common - description: Extract the metadata from an h5ad file. - argument_groups: - - name: Inputs - arguments: - - name: --input - type: file - required: true - description: A h5ad file. - - name: --schema - type: file - required: false - description: An optional schema with which to annotate the output - - name: Output - arguments: - - name: --output - type: file - required: true - description: A yaml file containing the metadata. - example: output_meta.yaml - direction: output - resources: - - type: python_script - path: script.py - test_resources: - - path: /resources_test/common/pancreas - - path: /src/datasets/api/file_raw.yaml - - type: python_script - path: test.py -platforms: - - type: docker - image: openproblems/base_python:1.0.0 - test_setup: - - type: python - packages: viashpy - - type: nextflow - directives: - label: [midtime, midmem, midcpu] diff --git a/src/common/extract_metadata/test.py b/src/common/extract_metadata/test.py deleted file mode 100644 index 8af023d8f6..0000000000 --- a/src/common/extract_metadata/test.py +++ /dev/null @@ -1,26 +0,0 @@ -import sys -import re -import pytest -import json -import subprocess - -## VIASH START -## VIASH END - -input_path = meta["resources_dir"] + "/pancreas/dataset.h5ad" -schema_path = meta["resources_dir"] + "/file_raw.yaml" - -def test_run(run_component, tmp_path): - output_path = tmp_path / "meta.yaml" - - run_component([ - "--input", input_path, - "--schema", schema_path, - "--output", str(output_path), - ]) - - assert output_path.exists(), "Output path does not exist" - - -if __name__ == "__main__": - sys.exit(pytest.main([__file__])) diff --git a/src/common/extract_scores/config.vsh.yaml b/src/common/extract_scores/config.vsh.yaml deleted file mode 100644 index 72270b7a95..0000000000 --- a/src/common/extract_scores/config.vsh.yaml +++ /dev/null @@ -1,35 +0,0 @@ -functionality: - name: "extract_scores" - status: disabled - namespace: "common" - description: "Extract evaluation data frame on output" - arguments: - - name: "--input" - alternatives: ["-i"] - type: "file" - multiple: true - default: "input.h5ad" - description: "Input h5ad files containing metadata and metrics in adata.uns" - - name: "--column_names" - type: "string" - multiple: true - default: [ "dataset_id", "method_id", "metric_ids", "metric_values" ] - description: "Which fields from adata.uns to extract and store as a data frame." - - name: "--output" - alternatives: ["-o"] - type: "file" - direction: "output" - default: "output.tsv" - description: "Output tsv" - resources: - - type: r_script - path: script.R -platforms: - - type: docker - image: openproblems/base_r:1.0.0 - setup: - - type: r - cran: [ tidyverse ] - - type: nextflow - directives: - label: [midtime, lowmem, lowcpu] diff --git a/src/common/extract_scores/script.R b/src/common/extract_scores/script.R deleted file mode 100644 index 6b540380ab..0000000000 --- a/src/common/extract_scores/script.R +++ /dev/null @@ -1,30 +0,0 @@ -cat("Loading dependencies\n") -library(anndata, warn.conflicts = FALSE) -options(tidyverse.quiet = TRUE) -library(tidyverse) -library(assertthat) - -## VIASH START -par <- list( - input = "resources_test/label_projection/pancreas/knn_accuracy.h5ad", - output = "scores.tsv" -) -inp <- par$input[[1]] -## VIASH END - -cat("Reading input h5ad files\n") -scores <- map_df(par$input, function(inp) { - cat("Reading '", inp, "'\n", sep = "") - ad <- read_h5ad(inp) - - for (uns_name in par$column_names) { - assert_that( - uns_name %in% names(ad$uns), - msg = paste0("File ", inp, " must contain `uns['", uns_name, "']`") - ) - } - - data.frame(ad$uns[par$column_names]) -}) - -write_tsv(scores, par$output) diff --git a/src/common/library.bib b/src/common/library.bib deleted file mode 100644 index af730fe8cd..0000000000 --- a/src/common/library.bib +++ /dev/null @@ -1,2191 +0,0 @@ -@misc{10x2018pbmc, - title = {1k PBMCs from a Healthy Donor (v3 chemistry)}, - author = {{10x Genomics}}, - year = {2018}, - url = {https://www.10xgenomics.com/resources/datasets/1-k-pbm-cs-from-a-healthy-donor-v-3-chemistry-3-standard-3-0-0} -} - - -@misc{10x2019heart, - title = {Human Heart}, - author = {{10x Genomics}}, - year = {2019}, - url = {https://www.10xgenomics.com/datasets/human-heart-1-standard-1-0-0} -} - - -@misc{10x2019lymph, - title = {Human Lymph Node}, - author = {{10x Genomics}}, - year = {2019}, - url = {https://www.10xgenomics.com/datasets/human-lymph-node-1-standard-1-0-0} -} - - -@misc{10x2019pbmc, - title = {5k Peripheral Blood Mononuclear Cells (PBMCs) from a Healthy Donor with a Panel of TotalSeq-B Antibodies (v3 chemistry)}, - author = {{10x Genomics}}, - year = {2019}, - url = {https://www.10xgenomics.com/resources/datasets/5-k-peripheral-blood-mononuclear-cells-pbm-cs-from-a-healthy-donor-with-cell-surface-proteins-v-3-chemistry-3-1-standard-3-1-0} -} - - -@misc{10x2020breast, - title = {Human Breast Cancer: Whole Transcriptome Analysis}, - author = {{10x Genomics}}, - year = {2020}, - url = {https://www.10xgenomics.com/datasets/human-breast-cancer-whole-transcriptome-analysis-1-standard-1-2-0} -} - - -@misc{10x2020cerebellum, - title = {Human Cerebellum: Whole Transcriptome Analysis}, - author = {{10x Genomics}}, - year = {2020}, - url = {https://www.10xgenomics.com/datasets/human-cerebellum-whole-transcriptome-analysis-1-standard-1-2-0} -} - - -@misc{10x2020kidney, - title = {Mouse Kidney Section (Coronal)}, - author = {{10x Genomics}}, - year = {2020}, - url = {https://www.10xgenomics.com/datasets/mouse-kidney-section-coronal-1-standard-1-1-0} -} - - -@misc{10x2021breast, - title = {Human Breast Cancer: Ductal Carcinoma In Situ, Invasive Carcinoma (FFPE)}, - author = {{10x Genomics}}, - year = {2021}, - url = {https://www.10xgenomics.com/datasets/human-breast-cancer-ductal-carcinoma-in-situ-invasive-carcinoma-ffpe-1-standard-1-3-0} -} - - -@misc{10x2021prostate, - title = {Normal Human Prostate (FFPE)}, - author = {{10x Genomics}}, - year = {2021}, - url = {https://www.10xgenomics.com/datasets/normal-human-prostate-ffpe-1-standard-1-3-0} -} - - -@misc{10x2022brain, - title = {Mouse Brain Coronal Section 1 (FFPE)}, - author = {{10x Genomics}}, - year = {2022}, - url = {https://www.10xgenomics.com/datasets/mouse-brain-coronal-section-1-ffpe-2-standard} -} - - -@misc{10x2022cervical, - title = {Human Cervical Cancer (FFPE)}, - author = {{10x Genomics}}, - year = {2022}, - url = {https://www.10xgenomics.com/datasets/human-cervical-cancer-1-standard} -} - - -@misc{10x2022olfactory, - title = {Adult Mouse Olfactory Bulb}, - author = {{10x Genomics}}, - year = {2022}, - url = {https://www.10xgenomics.com/datasets/adult-mouse-olfactory-bulb-1-standard-1} -} - - -@misc{10x2022intestine, - title = {Human Intestine Cancer (FPPE)}, - author = {{10x Genomics}}, - year = {2022}, - url = {https://www.10xgenomics.com/datasets/human-intestine-cancer-1-standard} -} - - -@misc{10x2022melanoma, - title = {Human Melanoma, IF Stained (FFPE)}, - author = {{10x Genomics}}, - year = {2022}, - url = {https://www.10xgenomics.com/datasets/human-melanoma-if-stained-ffpe-2-standard} -} - - -@misc{10x2022prostate, - title = {Human Prostate Cancer, Adjacent Normal Section with IF Staining (FFPE)}, - author = {{10x Genomics}}, - year = {2022}, - url = {https://www.10xgenomics.com/datasets/human-prostate-cancer-adjacent-normal-section-with-if-staining-ffpe-1-standard} -} - - -@misc{10x2023brain, - title = {Human Brain Cancer, 11 mm Capture Area (FFPE)}, - author = {{10x Genomics}}, - year = {2023}, - url = {https://www.10xgenomics.com/datasets/human-brain-cancer-11-mm-capture-area-ffpe-2-standard} -} - - -@misc{10x2023colon, - title = {Visium CytAssist Gene Expression Libraries of Post-Xenium Human Colon Cancer (FFPE)}, - author = {{10x Genomics}}, - year = {2023}, - url = {https://www.10xgenomics.com/datasets/visium-cytassist-gene-expression-libraries-of-post-xenium-human-colon-cancer-ffpe-using-the-human-whole-transcriptome-probe-set-2-standard} -} - - -@misc{10x2023colorectal, - title = {Human Colorectal Cancer, 11 mm Capture Area (FFPE)}, - author = {{10x Genomics}}, - year = {2023}, - url = {https://www.10xgenomics.com/datasets/human-colorectal-cancer-11-mm-capture-area-ffpe-2-standard} -} - - -@misc{10x2023embryo, - title = {Visium CytAssist, Mouse Embryo, 11 mm Capture Area (FFPE)}, - author = {{10x Genomics}}, - year = {2023}, - url = {https://www.10xgenomics.com/datasets/visium-cytassist-mouse-embryo-11-mm-capture-area-ffpe-2-standard} -} - - -@misc{10x2023kidney, - title = {Human Kidney, 11 mm Capture Area (FFPE)}, - author = {{10x Genomics}}, - year = {2023}, - url = {https://www.10xgenomics.com/datasets/human-kidney-11-mm-capture-area-ffpe-2-standard} -} - - -@misc{10x2023lung, - title = {Human Lung Cancer, 11 mm Capture Area (FFPE)}, - author = {{10x Genomics}}, - year = {2023}, - url = {https://www.10xgenomics.com/datasets/human-lung-cancer-11-mm-capture-area-ffpe-2-standard} -} - - -@misc{10x2023mousebrain, - title = {Visium CytAssist Gene Expression Libraries of Post-Xenium Mouse Brain (FF)}, - author = {{10x Genomics}}, - year = {2023}, - url = {https://www.10xgenomics.com/datasets/visium-cytassist-gene-expression-libraries-of-post-xenium-mouse-brain-ff-using-the-mouse-whole-transcriptome-probe-set-2-standard} -} - - -@article{agostinis2022newwave, - doi = {10.1093/bioinformatics/btac149}, - url = {https://doi.org/10.1093/bioinformatics/btac149}, - year = {2022}, - month = {Mar.}, - publisher = {Oxford University Press ({OUP})}, - volume = {38}, - number = {9}, - pages = {2648--2650}, - author = {Federico Agostinis and Chiara Romualdi and Gabriele Sales and Davide Risso}, - editor = {Yann Ponty}, - title = {NewWave: a scalable R/Bioconductor package for the dimensionality reduction and batch effect removal of single-cell {RNA}-seq data}, - journal = {Bioinformatics} -} - - -@article{agrawal2021mde, - title = {Minimum-Distortion Embedding}, - author = {Akshay Agrawal and Alnur Ali and Stephen Boyd}, - year = {2021}, - journal = {Foundations and Trends{\textregistered} in Machine Learning}, - publisher = {Now Publishers}, - volume = {14}, - number = {3}, - pages = {211--378}, - doi = {10.1561/2200000090}, - url = {https://doi.org/10.1561/2200000090} -} - - -@article{aliee2021autogenes, - title = {{AutoGeneS}: Automatic gene selection using multi-objective optimization for {RNA}-seq deconvolution}, - author = {Hananeh Aliee and Fabian J. Theis}, - year = {2021}, - month = {Jul.}, - journal = {Cell Systems}, - publisher = {Elsevier {BV}}, - volume = {12}, - number = {7}, - pages = {706--715.e4}, - doi = {10.1016/j.cels.2021.05.006}, - url = {https://doi.org/10.1016/j.cels.2021.05.006} -} - - -@inproceedings{amelio2015normalized, - doi = {10.1145/2808797.2809344}, - url = {https://doi.org/10.1145/2808797.2809344}, - year = {2015}, - month = {Aug.}, - publisher = {{ACM}}, - author = {Alessia Amelio and Clara Pizzuti}, - title = {Is Normalized Mutual Information a Fair Measure for Comparing Community Detection Methods?}, - booktitle = {Proceedings of the 2015 {IEEE}/{ACM} International Conference on Advances in Social Networks Analysis and Mining 2015} -} - - -@article{andersson2020single, - title = {Single-cell and spatial transcriptomics enables probabilistic inference of cell type topography}, - author = {Alma Andersson and Joseph Bergenstr{\aa}hle and Michaela Asp and Ludvig Bergenstr{\aa}hle and Aleksandra Jurek and Jos{\'{e}} Fern{\'{a}}ndez Navarro and Joakim Lundeberg}, - year = {2020}, - month = {Oct.}, - journal = {Communications Biology}, - publisher = {Springer Science and Business Media {LLC}}, - volume = {3}, - number = {1}, - doi = {10.1038/s42003-020-01247-y}, - url = {https://doi.org/10.1038/s42003-020-01247-y} -} - - -@article{andersson2021sepal, - title={sepal: Identifying transcript profiles with spatial patterns by diffusion-based modeling}, - author={Andersson, Alma and Lundeberg, Joakim}, - journal={Bioinformatics}, - volume={37}, - number={17}, - pages={2644--2650}, - year={2021}, - publisher={Oxford University Press}, - doi={10.1093/bioinformatics/btab164} -} - - -@string{apr = {Apr.}} - - -@string{aug = {Aug.}} - - -@article{batson2019molecular, - title = {Molecular Cross-Validation for Single-Cell RNA-seq}, - author = {Batson, Joshua and Royer, Lo{\"\i}c and Webber, James}, - year = {2019}, - journal = {bioRxiv}, - publisher = {Cold Spring Harbor Laboratory}, - doi = {10.1101/786269}, - url = {https://www.biorxiv.org/content/early/2019/09/30/786269}, - elocation-id = {786269}, - eprint = {https://www.biorxiv.org/content/early/2019/09/30/786269.full.pdf} -} - - -@article{biancalani2021deep, - title = {Deep learning and alignment of spatially resolved single-cell transcriptomes with Tangram}, - author = {Tommaso Biancalani and Gabriele Scalia and Lorenzo Buffoni and Raghav Avasthi and Ziqing Lu and Aman Sanger and Neriman Tokcan and Charles R. Vanderburg and {\AA}sa Segerstolpe and Meng Zhang and Inbal Avraham-Davidi and Sanja Vickovic and Mor Nitzan and Sai Ma and Ayshwarya Subramanian and Michal Lipinski and Jason Buenrostro and Nik Bear Brown and Duccio Fanelli and Xiaowei Zhuang and Evan Z. Macosko and Aviv Regev}, - year = {2021}, - month = {Oct.}, - journal = {Nature Methods}, - publisher = {Springer Science and Business Media {LLC}}, - volume = {18}, - number = {11}, - pages = {1352--1362}, - doi = {10.1038/s41592-021-01264-7}, - url = {https://doi.org/10.1038/s41592-021-01264-7} -} - - -@article{bintayyash2021non, - author = {BinTayyash, Nuha and Georgaka, Sokratia and John, S T and Ahmed, Sumon and Boukouvalas, Alexis and Hensman, James and Rattray, Magnus}, - title = "{Non-parametric modelling of temporal and spatial counts data from RNA-seq experiments}", - journal = {Bioinformatics}, - volume = {37}, - number = {21}, - pages = {3788-3795}, - year = {2021}, - month = {07}, - issn = {1367-4803}, - doi = {10.1093/bioinformatics/btab486}, - url = {https://doi.org/10.1093/bioinformatics/btab486}, - eprint = {https://academic.oup.com/bioinformatics/article-pdf/37/21/3788/50336570/btab486.pdf}, -} - - -@article{bland2000odds, - title = {Statistics Notes: The odds ratio}, - author = {J. M. Bland}, - year = {2000}, - month = {May}, - journal = {{BMJ}}, - publisher = {{BMJ}}, - volume = {320}, - number = {7247}, - pages = {1468--1468}, - doi = {10.1136/bmj.320.7247.1468}, - url = {https://doi.org/10.1136/bmj.320.7247.1468} -} - - -@article{breiman2001random, - doi = {10.1023/a:1010933404324}, - url = {https://doi.org/10.1023/a:1010933404324}, - year = {2001}, - publisher = {Springer Science and Business Media {LLC}}, - volume = {45}, - number = {1}, - pages = {5--32}, - author = {Leo Breiman}, - journal = {Machine Learning} -} - - -@article{bttner2018test, - title = {A test metric for assessing single-cell {RNA}-seq batch correction}, - author = {Maren B\"{u}ttner and Zhichao Miao and F. Alexander Wolf and Sarah A. Teichmann and Fabian J. Theis}, - year = {2018}, - month = {Dec.}, - journal = {Nature Methods}, - publisher = {Springer Science and Business Media {LLC}}, - volume = {16}, - number = {1}, - pages = {43--49}, - doi = {10.1038/s41592-018-0254-1}, - url = {https://doi.org/10.1038/s41592-018-0254-1} -} - - -@article{cabello2020singlecellsignalr, - title = {{SingleCellSignalR}: inference of intercellular networks from single-cell transcriptomics}, - author = {Simon Cabello-Aguilar and M{\'{e}}lissa Alame and Fabien Kon-Sun-Tack and Caroline Fau and Matthieu Lacroix and Jacques Colinge}, - year = {2020}, - month = {Mar.}, - journal = {Nucleic Acids Research}, - publisher = {Oxford University Press ({OUP})}, - volume = {48}, - number = {10}, - pages = {e55--e55}, - doi = {10.1093/nar/gkaa183}, - url = {https://doi.org/10.1093/nar/gkaa183} -} - - -@article{cable2021robust, - title = {Robust decomposition of cell type mixtures in spatial transcriptomics}, - author = {Dylan M. Cable and Evan Murray and Luli S. Zou and Aleksandrina Goeva and Evan Z. Macosko and Fei Chen and Rafael A. Irizarry}, - year = {2021}, - month = {Feb.}, - journal = {Nature Biotechnology}, - publisher = {Springer Science and Business Media {LLC}}, - volume = {40}, - number = {4}, - pages = {517--526}, - doi = {10.1038/s41587-021-00830-w}, - url = {https://doi.org/10.1038/s41587-021-00830-w} -} - - -@misc{cannoodt2021viashfromscripts, - doi = {10.48550/ARXIV.2110.11494}, - url = {https://arxiv.org/abs/2110.11494}, - author = {Cannoodt, Robrecht and Cannoodt, Hendrik and Van de Kerckhove, Eric and Boschmans, Andy and De Maeyer, Dries and Verbeiren, Toni}, - keywords = {Software Engineering (cs.SE), FOS: Computer and information sciences, FOS: Computer and information sciences}, - title = {Viash: from scripts to pipelines}, - publisher = {arXiv}, - year = {2021}, - copyright = {Creative Commons Attribution Non Commercial Share Alike 4.0 International} -} - - -@article{cai2023spanve, - title={Spanve: an Statistical Method to Detect Clustering-friendly Spatially Variable Genes in Large-scale Spatial Transcriptomics Data}, - author={Cai, Guoxin and Chen, Yichang and Chen, Shuqing and Gu, Xun and Zhou, Zhan}, - journal={bioRxiv}, - pages={2023--02}, - year={2023}, - publisher={Cold Spring Harbor Laboratory}, - doi={10.1101/2023.02.08.527623} -} - - -@article{cao2018joint, - title = {Joint profiling of chromatin accessibility and gene expression in thousands of single cells}, - author = {Junyue Cao and Darren A. Cusanovich and Vijay Ramani and Delasa Aghamirzaie and Hannah A. Pliner and Andrew J. Hill and Riza M. Daza and Jose L. McFaline-Figueroa and Jonathan S. Packer and Lena Christiansen and Frank J. Steemers and Andrew C. Adey and Cole Trapnell and Jay Shendure}, - year = {2018}, - month = {Sept.}, - journal = {Science}, - publisher = {American Association for the Advancement of Science ({AAAS})}, - volume = {361}, - number = {6409}, - pages = {1380--1385}, - doi = {10.1126/science.aau0730}, - url = {https://doi.org/10.1126/science.aau0730} -} - - -@article{cao2020human, - title = {A human cell atlas of fetal gene expression}, - author = {Junyue Cao and Diana R. O'Day and Hannah A. Pliner and Paul D. Kingsley and Mei Deng and Riza M. Daza and Michael A. Zager and Kimberly A. Aldinger and Ronnie Blecher-Gonen and Fan Zhang and Malte Spielmann and James Palis and Dan Doherty and Frank J. Steemers and Ian A. Glass and Cole Trapnell and Jay Shendure}, - year = {2020}, - month = {Nov.}, - journal = {Science}, - publisher = {American Association for the Advancement of Science ({AAAS})}, - volume = {370}, - number = {6518}, - doi = {10.1126/science.aba7721}, - url = {https://doi.org/10.1126/science.aba7721} -} - - -@article{chai2014root, - doi = {10.5194/gmdd-7-1525-2014}, - url = {https://doi.org/10.5194/gmdd-7-1525-2014}, - year = {2014}, - month = {Feb.}, - publisher = {Copernicus {GmbH}}, - author = {T. Chai and R. R. Draxler}, - title = {Root mean square error ({RMSE}) or mean absolute error ({MAE})?} -} - - -@article{chang2022spatial, - title={Spatial omics representation and functional tissue module inference using graph Fourier transform}, - author={Chang, Yuzhou and Liu, Jixin and Ma, Anjun and Jiang, Sizun and Krull, Jordan and Yeo, Yao Yu and Liu, Yang and Rodig, Scott J and Barouch, Dan H and Fan, Rong and others}, - journal={bioRxiv}, - pages={2022--12}, - year={2022}, - publisher={Cold Spring Harbor Laboratory}, - doi={10.1101/2022.12.10.519929} -} - - -@article{chazarragil2021flexible, - doi = {10.1093/nar/gkab004}, - url = {https://doi.org/10.1093/nar/gkab004}, - year = {2021}, - month = {Feb.}, - publisher = {Oxford University Press ({OUP})}, - volume = {49}, - number = {7}, - pages = {e42--e42}, - author = {Ruben Chazarra-Gil and Stijn van~Dongen and Vladimir~Yu Kiselev and Martin Hemberg}, - title = {Flexible comparison of batch correction methods for single-cell {RNA}-seq using {BatchBench}}, - journal = {Nucleic Acids Research} -} - - -@article{chen2009local, - title = {Local Multidimensional Scaling for Nonlinear Dimension Reduction, Graph Drawing, and Proximity Analysis}, - author = {Lisha Chen and Andreas Buja}, - year = {2009}, - month = {Mar.}, - journal = {Journal of the American Statistical Association}, - publisher = {Informa {UK} Limited}, - volume = {104}, - number = {485}, - pages = {209--219}, - doi = {10.1198/jasa.2009.0111}, - url = {https://doi.org/10.1198/jasa.2009.0111} -} - - -@inproceedings{chen2016xgboost, - title = {{XGBoost}}, - author = {Tianqi Chen and Carlos Guestrin}, - year = {2016}, - month = {Aug.}, - booktitle = {Proceedings of the 22nd {ACM} {SIGKDD} International Conference on Knowledge Discovery and Data Mining}, - publisher = {{Acm}}, - doi = {10.1145/2939672.2939785}, - url = {https://doi.org/10.1145/2939672.2939785} -} - - -@article{cichocki2009fast, - title = {Fast Local Algorithms for Large Scale Nonnegative Matrix and Tensor Factorizations}, - author = {Andrzej Cichocki and Anh-Huy Phan}, - year = {2009}, - journal = {{IEICE} Transactions on Fundamentals of Electronics, Communications and Computer Sciences}, - publisher = {Institute of Electronics, Information and Communications Engineers ({IEICE})}, - volume = {E92-a}, - number = {3}, - pages = {708--721}, - doi = {10.1587/transfun.e92.a.708}, - url = {https://doi.org/10.1587/transfun.e92.a.708} -} - - -@article{coifman2006diffusion, - title = {Diffusion maps}, - author = {Ronald R. Coifman and St{\'{e}}phane Lafon}, - year = {2006}, - month = {Jul.}, - journal = {Applied and Computational Harmonic Analysis}, - publisher = {Elsevier {BV}}, - volume = {21}, - number = {1}, - pages = {5--30}, - doi = {10.1016/j.acha.2006.04.006}, - url = {https://doi.org/10.1016/j.acha.2006.04.006} -} - - -@article{cover1967nearest, - title = {Nearest neighbor pattern classification}, - author = {T. Cover and P. Hart}, - year = {1967}, - month = {Jan}, - journal = {{IEEE} Transactions on Information Theory}, - publisher = {Institute of Electrical and Electronics Engineers ({IEEE})}, - volume = {13}, - number = {1}, - pages = {21--27}, - doi = {10.1109/tit.1967.1053964}, - url = {https://doi.org/10.1109/tit.1967.1053964} -} - - -@inproceedings{davis2006prauc, - title = {The relationship between Precision-Recall and {ROC} curves}, - author = {Jesse Davis and Mark Goadrich}, - year = {2006}, - booktitle = {Proceedings of the 23rd international conference on Machine learning - {ICML} {\textquotesingle}06}, - publisher = {{ACM} Press}, - doi = {10.1145/1143844.1143874}, - url = {https://doi.org/10.1145/1143844.1143874} -} - - -@string{dec = {Dec.}} - -@article{Demetci2020scot, - author = {Pinar Demetci and Rebecca Santorella and Bj{\"o}rn Sandstede and William Stafford Noble and Ritambhara Singh}, - title = {Gromov-Wasserstein optimal transport to align single-cell multi-omics data}, - elocation-id = {2020.04.28.066787}, - year = {2020}, - doi = {10.1101/2020.04.28.066787}, - publisher = {Cold Spring Harbor Laboratory}, - URL = {https://www.biorxiv.org/content/early/2020/11/11/2020.04.28.066787}, - eprint = {https://www.biorxiv.org/content/early/2020/11/11/2020.04.28.066787.full.pdf}, - journal = {bioRxiv} -} - - -@article{dimitrov2022comparison, - title = {Comparison of methods and resources for cell-cell communication inference from single-cell {RNA}-Seq data}, - author = {Daniel Dimitrov and D{\'{e}}nes T\"{u}rei and Martin Garrido-Rodriguez and Paul L. Burmedi and James S. Nagai and Charlotte Boys and Ricardo O. Ramirez Flores and Hyojin Kim and Bence Szalai and Ivan G. Costa and Alberto Valdeolivas and Aur{\'{e}}lien Dugourd and Julio Saez-Rodriguez}, - year = {2022}, - month = {Jun.}, - journal = {Nature Communications}, - publisher = {Springer Science and Business Media {LLC}}, - volume = {13}, - number = {1}, - doi = {10.1038/s41467-022-30755-0}, - url = {https://doi.org/10.1038/s41467-022-30755-0} -} - - -@article{donoho2017yearsdatascience, - doi = {10.1080/10618600.2017.1384734}, - url = {https://doi.org/10.1080/10618600.2017.1384734}, - year = {2017}, - month = {Oct.}, - publisher = {Informa {UK} Limited}, - volume = {26}, - number = {4}, - pages = {745--766}, - author = {David Donoho}, - title = {50 Years of Data Science}, - journal = {Journal of Computational and Graphical Statistics} -} - - -@article{efremova2020cellphonedb, - title = {{CellPhoneDB}: inferring cell{\textendash}cell communication from combined expression of multi-subunit ligand{\textendash}receptor complexes}, - author = {Mirjana Efremova and Miquel Vento-Tormo and Sarah A. Teichmann and Roser Vento-Tormo}, - year = {2020}, - month = {Feb.}, - journal = {Nature Protocols}, - publisher = {Springer Science and Business Media {LLC}}, - volume = {15}, - number = {4}, - pages = {1484--1506}, - doi = {10.1038/s41596-020-0292-x}, - url = {https://doi.org/10.1038/s41596-020-0292-x} -} - - -@article{emmons2016analysis, - title = {Analysis of Network Clustering Algorithms and Cluster Quality Metrics at Scale}, - volume = {11}, - ISSN = {1932-6203}, - url = {http://dx.doi.org/10.1371/journal.pone.0159161}, - doi = {10.1371/journal.pone.0159161}, - number = {7}, - journal = {PLOS ONE}, - publisher = {Public Library of Science (PLoS)}, - author = {Emmons, Scott and Kobourov, Stephen and Gallant, Mike and B\"{o}rner, Katy}, - editor = {Dovrolis, Constantine}, - year = {2016}, - month = jul, - pages = {e0159161} -} - - -@article{eraslan2019single, - title = {Single-cell {RNA}-seq denoising using a deep count autoencoder}, - author = {G\"{o}kcen Eraslan and Lukas M. Simon and Maria Mircea and Nikola S. Mueller and Fabian J. Theis}, - year = {2019}, - month = {Jan}, - journal = {Nature Communications}, - publisher = {Springer Science and Business Media {LLC}}, - volume = {10}, - number = {1}, - doi = {10.1038/s41467-018-07931-2}, - url = {https://doi.org/10.1038/s41467-018-07931-2} -} - - -@article{fang2022conservation, - title = {Conservation and divergence of cortical cell organization in human and mouse revealed by MERFISH}, - volume = {377}, - ISSN = {1095-9203}, - url = {http://dx.doi.org/10.1126/science.abm1741}, - DOI = {10.1126/science.abm1741}, - number = {6601}, - journal = {Science}, - publisher = {American Association for the Advancement of Science (AAAS)}, - author = {Fang, Rongxin and Xia, Chenglong and Close, Jennie L. and Zhang, Meng and He, Jiang and Huang, Zhengkai and Halpern, Aaron R. and Long, Brian and Miller, Jeremy A. and Lein, Ed S. and Zhuang, Xiaowei}, - year = {2022}, - month = jul, - pages = {56-62} -} - - -@string{feb = {Feb.}} - - -@article{fix1989discriminatory, - doi = {10.2307/1403797}, - url = {https://doi.org/10.2307/1403797}, - year = {1989}, - month = {Dec.}, - publisher = {{JSTOR}}, - volume = {57}, - number = {3}, - pages = {238}, - author = {Evelyn Fix and J. L. Hodges}, - title = {Discriminatory Analysis. Nonparametric Discrimination: Consistency Properties}, - journal = {International Statistical Review / Revue Internationale de Statistique} -} - - -@article{gower1975generalized, - title = {Generalized procrustes analysis}, - author = {J. C. Gower}, - year = {1975}, - month = {Mar.}, - journal = {Psychometrika}, - publisher = {Springer Science and Business Media {LLC}}, - volume = {40}, - number = {1}, - pages = {33--51}, - doi = {10.1007/bf02291478}, - url = {https://doi.org/10.1007/bf02291478} -} - - -@article{grandini2020metrics, - title = {Metrics for Multi-Class Classification: an Overview}, - author = {Grandini, Margherita and Bagli, Enrico and Visani, Giorgio}, - year = {2020}, - journal = {arXiv}, - publisher = {Cornell University}, - doi = {10.48550/arxiv.2008.05756}, - url = {https://arxiv.org/abs/2008.05756}, - copyright = {arXiv.org perpetual, non-exclusive license}, - keywords = {Machine Learning (stat.ML), Machine Learning (cs.LG), FOS: Computer and information sciences, FOS: Computer and information sciences} -} - - -@article{granja2021archr, - title = {{ArchR} is a scalable software package for integrative single-cell chromatin accessibility analysis}, - author = {Jeffrey M. Granja and M. Ryan Corces and Sarah E. Pierce and S. Tansu Bagdatli and Hani Choudhry and Howard Y. Chang and William J. Greenleaf}, - year = {2021}, - month = {Feb.}, - journal = {Nature Genetics}, - publisher = {Springer Science and Business Media {LLC}}, - volume = {53}, - number = {3}, - pages = {403--411}, - doi = {10.1038/s41588-021-00790-6}, - url = {https://doi.org/10.1038/s41588-021-00790-6} -} - - -@article{grn2014validation, - title = {Validation of noise models for single-cell transcriptomics}, - author = {Dominic Gr\"{u}n and Lennart Kester and Alexander van Oudenaarden}, - year = {2014}, - month = {Apr.}, - journal = {Nature Methods}, - publisher = {Springer Science and Business Media {LLC}}, - volume = {11}, - number = {6}, - pages = {637--640}, - doi = {10.1038/nmeth.2930}, - url = {https://doi.org/10.1038/nmeth.2930} -} - - -@article{haghverdi2018batch, - title = {Batch effects in single-cell {RNA}-sequencing data are corrected by matching mutual nearest neighbors}, - author = {Laleh Haghverdi and Aaron T L Lun and Michael D Morgan and John C Marioni}, - year = {2018}, - month = {Apr.}, - journal = {Nature Biotechnology}, - publisher = {Springer Science and Business Media {LLC}}, - volume = {36}, - number = {5}, - pages = {421--427}, - doi = {10.1038/nbt.4091}, - url = {https://doi.org/10.1038/nbt.4091} -} - - -@article{hammarlund2018cengen, - title = {The {CeNGEN} Project: The Complete Gene Expression Map of an Entire Nervous System}, - author = {Marc Hammarlund and Oliver Hobert and David M. Miller and Nenad Sestan}, - year = {2018}, - month = {Aug.}, - journal = {Neuron}, - publisher = {Elsevier {BV}}, - volume = {99}, - number = {3}, - pages = {430--433}, - doi = {10.1016/j.neuron.2018.07.042}, - url = {https://doi.org/10.1016/j.neuron.2018.07.042} -} - - -@article{hansen2012removing, - title = {Adjusting batch effects in microarray expression data using empirical Bayes methods}, - author = {W. Evan Johnson and Cheng Li and Ariel Rabinovic}, - year = {2006}, - month = {Apr.}, - journal = {Biostatistics}, - publisher = {Oxford University Press ({OUP})}, - volume = {8}, - number = {1}, - pages = {118--127}, - doi = {10.1093/biostatistics/kxj037}, - url = {https://doi.org/10.1093/biostatistics/kxj037} -} - - -@article{hao2021integrated, - title = {Integrated analysis of multimodal single-cell data}, - author = {Yuhan Hao and Stephanie Hao and Erica Andersen-Nissen and William M. Mauck and Shiwei Zheng and Andrew Butler and Maddie J. Lee and Aaron J. Wilk and Charlotte Darby and Michael Zager and Paul Hoffman and Marlon Stoeckius and Efthymia Papalexi and Eleni P. Mimitou and Jaison Jain and Avi Srivastava and Tim Stuart and Lamar M. Fleming and Bertrand Yeung and Angela J. Rogers and Juliana M. McElrath and Catherine A. Blish and Raphael Gottardo and Peter Smibert and Rahul Satija}, - year = {2021}, - month = {Jun.}, - journal = {Cell}, - publisher = {Elsevier {BV}}, - volume = {184}, - number = {13}, - pages = {3573--3587.e29}, - doi = {10.1016/j.cell.2021.04.048}, - url = {https://doi.org/10.1016/j.cell.2021.04.048} -} - - -@article{hao2021somde, - title={SOMDE: a scalable method for identifying spatially variable genes with self-organizing map}, - author={Hao, Minsheng and Hua, Kui and Zhang, Xuegong}, - journal={Bioinformatics}, - volume={37}, - number={23}, - pages={4392--4398}, - year={2021}, - publisher={Oxford University Press}, - doi={10.1093/bioinformatics/btab471} -} - - -@article{hie2019efficient, - title = {Efficient integration of heterogeneous single-cell transcriptomes using Scanorama}, - author = {Brian Hie and Bryan Bryson and Bonnie Berger}, - year = {2019}, - month = {May}, - journal = {Nature Biotechnology}, - publisher = {Springer Science and Business Media {LLC}}, - volume = {37}, - number = {6}, - pages = {685--691}, - doi = {10.1038/s41587-019-0113-3}, - url = {https://doi.org/10.1038/s41587-019-0113-3} -} - - -@article{hinton1989connectionist, - title = {Connectionist learning procedures}, - author = {Geoffrey E. Hinton}, - year = {1989}, - month = {Sept.}, - journal = {Artificial Intelligence}, - publisher = {Elsevier {BV}}, - volume = {40}, - number = {1-3}, - pages = {185--234}, - doi = {10.1016/0004-3702(89)90049-0}, - url = {https://doi.org/10.1016/0004-3702(89)90049-0} -} - - -@book{hosmer2013applied, - title = {Applied logistic regression}, - author = {Hosmer Jr, D.W. and Lemeshow, S. and Sturdivant, R.X.}, - year = {2013}, - publisher = {John Wiley \& Sons}, - volume = {398} -} - - -@article{hou2019scmatch, - title = {{scMatch}: a single-cell gene expression profile annotation tool using reference datasets}, - author = {Rui Hou and Elena Denisenko and Alistair R R Forrest}, - year = {2019}, - month = {Apr.}, - journal = {Bioinformatics}, - publisher = {Oxford University Press ({OUP})}, - volume = {35}, - number = {22}, - pages = {4688--4695}, - doi = {10.1093/bioinformatics/btz292}, - url = {https://doi.org/10.1093/bioinformatics/btz292}, - editor = {Janet Kelso} -} - - -@article{hou2020predicting, - title = {Predicting cell-to-cell communication networks using {NATMI}}, - author = {Rui Hou and Elena Denisenko and Huan Ting Ong and Jordan A. Ramilowski and Alistair R. R. Forrest}, - year = {2020}, - month = {Oct.}, - journal = {Nature Communications}, - publisher = {Springer Science and Business Media {LLC}}, - volume = {11}, - number = {1}, - doi = {10.1038/s41467-020-18873-z}, - url = {https://doi.org/10.1038/s41467-020-18873-z} -} - - -@article{hou2020systematic, - title = {A systematic evaluation of single-cell {RNA}-sequencing imputation methods}, - author = {Wenpin Hou and Zhicheng Ji and Hongkai Ji and Stephanie C. Hicks}, - year = {2020}, - month = {Aug.}, - journal = {Genome Biology}, - publisher = {Springer Science and Business Media {LLC}}, - volume = {21}, - number = {1}, - doi = {10.1186/s13059-020-02132-x}, - url = {https://doi.org/10.1186/s13059-020-02132-x} -} - - -@article{hubert1985comparing, - doi = {10.1007/bf01908075}, - url = {https://doi.org/10.1007/bf01908075}, - year = {1985}, - month = {Dec.}, - publisher = {Springer Science and Business Media {LLC}}, - volume = {2}, - number = {1}, - pages = {193--218}, - author = {Lawrence Hubert and Phipps Arabie}, - title = {Comparing partitions}, - journal = {Journal of Classification} -} - - -@article{hu2021spagcn, - title={SpaGCN: Integrating gene expression, spatial location and histology to identify spatial domains and spatially variable genes by graph convolutional network}, - author={Hu, Jian and Li, Xiangjie and Coleman, Kyle and Schroeder, Amelia and Ma, Nan and Irwin, David J and Lee, Edward B and Shinohara, Russell T and Li, Mingyao}, - journal={Nature methods}, - volume={18}, - number={11}, - pages={1342--1351}, - year={2021}, - publisher={Nature Publishing Group US New York}, - doi={10.1038/s41592-021-01255-8} -} - - -@string{jan = {Jan}} - - -@string{jul = {Jul.}} - - -@string{jun = {Jun.}} - - -@article{kats2021spatialde2, - title={SpatialDE2: fast and localized variance component analysis of spatial transcriptomics}, - author={Kats, Ilia and Vento-Tormo, Roser and Stegle, Oliver}, - journal={Biorxiv}, - pages={2021--10}, - year={2021}, - publisher={Cold Spring Harbor Laboratory}, - doi={10.1101/2021.10.27.466045} -} - - -@article{kendall1938new, - doi = {10.1093/biomet/30.1-2.81}, - url = {https://doi.org/10.1093/biomet/30.1-2.81}, - year = {1938}, - month = {Jun.}, - publisher = {Oxford University Press ({OUP})}, - volume = {30}, - number = {1-2}, - pages = {81--93}, - author = {M. G. KENDALL}, - title = {A new measure of rank correlation}, - journal = {Biometrika} -} - - -@article{kiselev2019challenges, - title = {Challenges in unsupervised clustering of single-cell {RNA}-seq data}, - author = {Vladimir Yu Kiselev and Tallulah S. Andrews and Martin Hemberg}, - year = {2019}, - month = {Jan}, - journal = {Nature Reviews Genetics}, - publisher = {Springer Science and Business Media {LLC}}, - volume = {20}, - number = {5}, - pages = {273--282}, - doi = {10.1038/s41576-018-0088-9}, - url = {https://doi.org/10.1038/s41576-018-0088-9} -} - - -@article{kleshchevnikov2022cell2location, - title = {Cell2location maps fine-grained cell types in spatial transcriptomics}, - author = {Vitalii Kleshchevnikov and Artem Shmatko and Emma Dann and Alexander Aivazidis and Hamish W. King and Tong Li and Rasa Elmentaite and Artem Lomakin and Veronika Kedlian and Adam Gayoso and Mika Sarkin Jain and Jun Sung Park and Lauma Ramona and Elizabeth Tuck and Anna Arutyunyan and Roser Vento-Tormo and Moritz Gerstung and Louisa James and Oliver Stegle and Omer Ali Bayraktar}, - year = {2022}, - month = {Jan}, - journal = {Nature Biotechnology}, - publisher = {Springer Science and Business Media {LLC}}, - volume = {40}, - number = {5}, - pages = {661--671}, - doi = {10.1038/s41587-021-01139-4}, - url = {https://doi.org/10.1038/s41587-021-01139-4} -} - - -@article{korsunsky2019fast, - title = {Fast, sensitive and accurate integration of single-cell data with Harmony}, - author = {Ilya Korsunsky and Nghia Millard and Jean Fan and Kamil Slowikowski and Fan Zhang and Kevin Wei and Yuriy Baglaenko and Michael Brenner and Po-ru Loh and Soumya Raychaudhuri}, - year = {2019}, - month = {Nov.}, - journal = {Nature Methods}, - publisher = {Springer Science and Business Media {LLC}}, - volume = {16}, - number = {12}, - pages = {1289--1296}, - doi = {10.1038/s41592-019-0619-0}, - url = {https://doi.org/10.1038/s41592-019-0619-0} -} - - -@article{kraemer2018dimred, - title = {{dimRed} and {coRanking} - Unifying Dimensionality Reduction in R}, - author = {Guido Kraemer and Markus Reichstein and Miguel, D. Mahecha}, - year = {2018}, - journal = {The R Journal}, - publisher = {The R Foundation}, - volume = {10}, - number = {1}, - pages = {342}, - doi = {10.32614/rj-2018-039}, - url = {https://doi.org/10.32614/rj-2018-039} -} - - -@article{kruskal1964mds, - title = {Multidimensional scaling by optimizing goodness of fit to a nonmetric hypothesis}, - author = {J. B. Kruskal}, - year = {1964}, - month = {Mar.}, - journal = {Psychometrika}, - publisher = {Springer Science and Business Media {LLC}}, - volume = {29}, - number = {1}, - pages = {1--27}, - doi = {10.1007/bf02289565}, - url = {https://doi.org/10.1007/bf02289565} -} - - -@article{kuppe2022spatial, - title={Spatial multi-omic map of human myocardial infarction}, - author={Kuppe, Christoph and Ramirez Flores, Ricardo O and Li, Zhijian and Hayat, Sikander and Levinson, Rebecca T and Liao, Xian and Hannani, Monica T and Tanevski, Jovan and W{\"u}nnemann, Florian and Nagai, James S and others}, - journal={Nature}, - volume={608}, - number={7924}, - pages={766--777}, - year={2022}, - publisher={Nature Publishing Group UK London} -} - - -@article{lance2022multimodal, - title = {Multimodal single cell data integration challenge: results and lessons learned}, - author = {Lance, Christopher and Luecken, Malte D. and Burkhardt, Daniel B. and Cannoodt, Robrecht and Rautenstrauch, Pia and Laddach, Anna and Ubingazhibov, Aidyn and Cao, Zhi-Jie and Deng, Kaiwen and Khan, Sumeer and Liu, Qiao and Russkikh, Nikolay and Ryazantsev, Gleb and Ohler, Uwe and , and Pisco, Angela Oliveira and Bloom, Jonathan and Krishnaswamy, Smita and Theis, Fabian J.}, - year = {2022}, - journal = {bioRxiv}, - publisher = {Cold Spring Harbor Laboratory}, - doi = {10.1101/2022.04.11.487796}, - url = {https://www.biorxiv.org/content/early/2022/04/12/2022.04.11.487796}, - elocation-id = {2022.04.11.487796}, - eprint = {https://www.biorxiv.org/content/early/2022/04/12/2022.04.11.487796.full.pdf} -} - - -@article{lance2024predicting, - title = {Predicting cellular profiles across modalities in longitudinal single-cell data: An Open Problems competition}, - author = {...}, - year = {2024}, - journal = {In preparation}, -} - - -@book{lawson1995solving, - title = {Solving Least Squares Problems}, - author = {Charles L. Lawson and Richard J. Hanson}, - year = {1995}, - month = {Jan}, - publisher = {Society for Industrial and Applied Mathematics}, - doi = {10.1137/1.9781611971217}, - url = {https://doi.org/10.1137/1.9781611971217} -} - - -@article{lee2009quality, - title = {Quality assessment of dimensionality reduction: Rank-based criteria}, - author = {John A. Lee and Michel Verleysen}, - year = {2009}, - month = {Mar.}, - journal = {Neurocomputing}, - publisher = {Elsevier {BV}}, - volume = {72}, - number = {7-9}, - pages = {1431--1443}, - doi = {10.1016/j.neucom.2008.12.017}, - url = {https://doi.org/10.1016/j.neucom.2008.12.017} -} - - -@article{li2021bayesian, - author = {Li, Qiwei and Zhang, Minzhe and Xie, Yang and Xiao, Guanghua}, - title = "{Bayesian modeling of spatial molecular profiling data via Gaussian process}", - journal = {Bioinformatics}, - volume = {37}, - number = {22}, - pages = {4129-4136}, - year = {2021}, - month = {06}, - abstract = "{The location, timing and abundance of gene expression (both mRNA and proteins) within a tissue define the molecular mechanisms of cell functions. Recent technology breakthroughs in spatial molecular profiling, including imaging-based technologies and sequencing-based technologies, have enabled the comprehensive molecular characterization of single cells while preserving their spatial and morphological contexts. This new bioinformatics scenario calls for effective and robust computational methods to identify genes with spatial patterns.We represent a novel Bayesian hierarchical model to analyze spatial transcriptomics data, with several unique characteristics. It models the zero-inflated and over-dispersed counts by deploying a zero-inflated negative binomial model that greatly increases model stability and robustness. Besides, the Bayesian inference framework allows us to borrow strength in parameter estimation in a de novo fashion. As a result, the proposed model shows competitive performances in accuracy and robustness over existing methods in both simulation studies and two real data applications.The related R/C++ source code is available at https://github.com/Minzhe/BOOST-GP.Supplementary data are available at Bioinformatics online. }", - issn = {1367-4803}, - doi = {10.1093/bioinformatics/btab455}, - url = {https://doi.org/10.1093/bioinformatics/btab455}, - eprint = {https://academic.oup.com/bioinformatics/article-pdf/37/22/4129/50335106/btab455.pdf}, -} - - -@article{linderman2018zero, - title = {Zero-preserving imputation of scRNA-seq data using low-rank approximation}, - author = {Linderman, George C. and Zhao, Jun and Kluger, Yuval}, - year = {2018}, - journal = {bioRxiv}, - publisher = {Cold Spring Harbor Laboratory}, - doi = {10.1101/397588}, - url = {https://www.biorxiv.org/content/early/2018/08/22/397588}, - elocation-id = {397588}, - eprint = {https://www.biorxiv.org/content/early/2018/08/22/397588.full.pdf} -} - - -@article{liu2020high, - title = {High-Spatial-Resolution Multi-Omics Sequencing via Deterministic Barcoding in Tissue}, - volume = {183}, - ISSN = {0092-8674}, - url = {http://dx.doi.org/10.1016/j.cell.2020.10.026}, - DOI = {10.1016/j.cell.2020.10.026}, - number = {6}, - journal = {Cell}, - publisher = {Elsevier BV}, - author = {Liu, Yang and Yang, Mingyu and Deng, Yanxiang and Su, Graham and Enninful, Archibald and Guo, Cindy C. and Tebaldi, Toma and Zhang, Di and Kim, Dongjoo and Bai, Zhiliang and Norris, Eileen and Pan, Alisia and Li, Jiatong and Xiao, Yang and Halene, Stephanie and Fan, Rong}, - year = {2020}, - month = dec, - pages = {1665--1681.e18} -} - - -@article{lohoff2021integration, - title = {Integration of spatial and single-cell transcriptomic data elucidates mouse organogenesis}, - volume = {40}, - ISSN = {1546-1696}, - url = {http://dx.doi.org/10.1038/s41587-021-01006-2}, - DOI = {10.1038/s41587-021-01006-2}, - number = {1}, - journal = {Nature Biotechnology}, - publisher = {Springer Science and Business Media LLC}, - author = {Lohoff, T. and Ghazanfar, S. and Missarova, A. and Koulena, N. and Pierson, N. and Griffiths, J. A. and Bardot, E. S. and Eng, C.-H. L. and Tyser, R. C. V. and Argelaguet, R. and Guibentif, C. and Srinivas, S. and Briscoe, J. and Simons, B. D. and Hadjantonakis, A.-K. and G\"{o}ttgens, B. and Reik, W. and Nichols, J. and Cai, L. and Marioni, J. C.}, - year = {2021}, - month = sep, - pages = {74-85} -} - - -@article{lopez2018deep, - title = {Deep generative modeling for single-cell transcriptomics}, - author = {Romain Lopez and Jeffrey Regier and Michael B. Cole and Michael I. Jordan and Nir Yosef}, - year = {2018}, - month = {Nov.}, - journal = {Nature Methods}, - publisher = {Springer Science and Business Media {LLC}}, - volume = {15}, - number = {12}, - pages = {1053--1058}, - doi = {10.1038/s41592-018-0229-2}, - url = {https://doi.org/10.1038/s41592-018-0229-2} -} - - -@article{lopez2022destvi, - title = {{DestVI} identifies continuums of cell types in spatial transcriptomics data}, - author = {Romain Lopez and Baoguo Li and Hadas Keren-Shaul and Pierre Boyeau and Merav Kedmi and David Pilzer and Adam Jelinski and Ido Yofe and Eyal David and Allon Wagner and Can Ergen and Yoseph Addadi and Ofra Golani and Franca Ronchese and Michael I. Jordan and Ido Amit and Nir Yosef}, - year = {2022}, - month = {Apr.}, - journal = {Nature Biotechnology}, - publisher = {Springer Science and Business Media {LLC}}, - volume = {40}, - number = {9}, - pages = {1360--1369}, - doi = {10.1038/s41587-022-01272-8}, - url = {https://doi.org/10.1038/s41587-022-01272-8} -} - - -@article{lotfollahi2020query, - title = {Query to reference single-cell integration with transfer learning}, - author = {Lotfollahi, Mohammad and Naghipourfar, Mohsen and Luecken, Malte D. and Khajavi, Matin and B{\"u}ttner, Maren and Avsec, Ziga and Misharin, Alexander V. and Theis, Fabian J.}, - year = {2020}, - journal = {bioRxiv}, - publisher = {Cold Spring Harbor Laboratory}, - doi = {10.1101/2020.07.16.205997}, - url = {https://doi.org/10.1101/2020.07.16.205997}, - elocation-id = {2020.07.16.205997}, - eprint = {https://www.biorxiv.org/content/early/2020/07/16/2020.07.16.205997.full.pdf} -} - - -@article{luecken2022benchmarking, - title = {Benchmarking atlas-level data integration in single-cell genomics}, - author = {Malte D. Luecken and M. B\"{u}ttner and K. Chaichoompu and A. Danese and M. Interlandi and M. F. Mueller and D. C. Strobl and L. Zappia and M. Dugas and M. Colom{\'{e}}-Tatch{\'{e}} and Fabian J. Theis}, - year = {2021}, - month = {Dec.}, - journal = {Nature Methods}, - publisher = {Springer Science and Business Media {LLC}}, - volume = {19}, - number = {1}, - pages = {41--50}, - doi = {10.1038/s41592-021-01336-8}, - url = {https://doi.org/10.1038/s41592-021-01336-8} -} - - -@article{lueks2011evaluate, - title = {How to Evaluate Dimensionality Reduction? - Improving the Co-ranking Matrix}, - author = {Lueks, Wouter and Mokbel, Bassam and Biehl, Michael and Hammer, Barbara}, - year = {2011}, - journal = {arXiv}, - doi = {10.48550/ARXIV.1110.3917}, - url = {https://arxiv.org/abs/1110.3917}, - copyright = {arXiv.org perpetual, non-exclusive license}, - keywords = {Machine Learning (cs.LG), Information Retrieval (cs.IR), FOS: Computer and information sciences, FOS: Computer and information sciences} -} - - -@misc{lun2019fastmnn, - title = {A description of the theory behind the fastMNN algorithm}, - author = {Lun, Aaron}, - year = {2019}, - url = {https://marionilab.github.io/FurtherMNN2018/theory/description.html} -} - - -@string{mar = {Mar.}} - - -@string{may = {May}} - - -@article{mcinnes2018umap, - title = {UMAP: Uniform Manifold Approximation and Projection for Dimension Reduction}, - author = {McInnes, Leland and Healy, John and Melville, James}, - year = {2018}, - journal = {arXiv}, - publisher = {Cornell University}, - doi = {10.48550/arxiv.1802.03426}, - url = {https://arxiv.org/abs/1802.03426}, - copyright = {arXiv.org perpetual, non-exclusive license}, - keywords = {Machine Learning (stat.ML), Computational Geometry (cs.CG), Machine Learning (cs.LG), FOS: Computer and information sciences, FOS: Computer and information sciences} -} - - -@article{mereu2020benchmarking, - doi = {10.1038/s41587-020-0469-4}, - author = {Mereu, Elisabetta and Lafzi, Atefeh and Moutinho, Catia and Ziegenhain, Christoph and McCarthy, Davis J and Alvarez-Varela, Adrian and Batlle, Eduard and Sagar and Gruen, Dominic and Lau, Julia K and others}, - journal = {Nature biotechnology}, - number = {6}, - pages = {747--755}, - publisher = {Nature Publishing Group US New York}, - title = {Benchmarking single-cell {RNA}-sequencing protocols for cell atlas projects}, - volume = {38}, - year = {2020} -} - - -@inbook{miles2005rsquared, - title = {Encyclopedia of Statistics in Behavioral Science}, - author = {Jeremy Miles}, - year = {2005}, - month = {Oct.}, - publisher = {John Wiley {\&} Sons, Ltd}, - doi = {10.1002/0470013192.bsa526}, - url = {https://doi.org/10.1002/0470013192.bsa526}, - chapter = {{R-Squared}, Adjusted {R-Squared}} -} - - -@article{moon2019visualizing, - title = {Visualizing structure and transitions in high-dimensional biological data}, - author = {Kevin R. Moon and David van Dijk and Zheng Wang and Scott Gigante and Daniel B. Burkhardt and William S. Chen and Kristina Yim and Antonia van den Elzen and Matthew J. Hirn and Ronald R. Coifman and Natalia B. Ivanova and Guy Wolf and Smita Krishnaswamy}, - year = {2019}, - month = {Dec.}, - journal = {Nature Biotechnology}, - publisher = {Springer Science and Business Media {LLC}}, - volume = {37}, - number = {12}, - pages = {1482--1492}, - doi = {10.1038/s41587-019-0336-3}, - url = {https://doi.org/10.1038/s41587-019-0336-3} -} - - -@article{narayan2021assessing, - title = {Assessing single-cell transcriptomic variability through density-preserving data visualization}, - author = {Ashwin Narayan and Bonnie Berger and Hyunghoon Cho}, - year = {2021}, - month = {Jan}, - journal = {Nature Biotechnology}, - publisher = {Springer Science and Business Media {LLC}}, - volume = {39}, - number = {6}, - pages = {765--774}, - doi = {10.1038/s41587-020-00801-7}, - url = {https://doi.org/10.1038/s41587-020-00801-7} -} - - -@article{nestorowa2016single, - title = {A single-cell resolution map of mouse hematopoietic stem and progenitor cell differentiation}, - author = {Sonia Nestorowa and Fiona K. Hamey and Blanca Pijuan Sala and Evangelia Diamanti and Mairi Shepherd and Elisa Laurenti and Nicola K. Wilson and David G. Kent and Berthold G\"{o}ttgens}, - year = {2016}, - month = {Aug.}, - journal = {Blood}, - publisher = {American Society of Hematology}, - volume = {128}, - number = {8}, - pages = {e20--e31}, - doi = {10.1182/blood-2016-05-716480}, - url = {https://doi.org/10.1182/blood-2016-05-716480} -} - - -@inproceedings{luecken2021neurips, - author = {Luecken, Malte and Burkhardt, Daniel and Cannoodt, Robrecht and Lance, Christopher and Agrawal, Aditi and Aliee, Hananeh and Chen, Ann and Deconinck, Louise and Detweiler, Angela and Granados, Alejandro and Huynh, Shelly and Isacco, Laura and Kim, Yang and Klein, Dominik and DE KUMAR, BONY and Kuppasani, Sunil and Lickert, Heiko and McGeever, Aaron and Melgarejo, Joaquin and Mekonen, Honey and Morri, Maurizio and M\"{u}ller, Michaela and Neff, Norma and Paul, Sheryl and Rieck, Bastian and Schneider, Kaylie and Steelman, Scott and Sterr, Michael and Treacy, Daniel and Tong, Alexander and Villani, Alexandra-Chloe and Wang, Guilin and Yan, Jia and Zhang, Ce and Pisco, Angela and Krishnaswamy, Smita and Theis, Fabian and Bloom, Jonathan M}, - booktitle = {Proceedings of the Neural Information Processing Systems Track on Datasets and Benchmarks}, - editor = {J. Vanschoren and S. Yeung}, - pages = {}, - publisher = {Curran}, - title = {A sandbox for prediction and integration of DNA, RNA, and proteins in single cells}, - url = {https://datasets-benchmarks-proceedings.neurips.cc/paper_files/paper/2021/file/158f3069a435b314a80bdcb024f8e422-Paper-round2.pdf}, - volume = {1}, - year = {2021} -} - - -@string{nov = {Nov.}} - - -@string{oct = {Oct.}} - - -@article{olsson2016single, - title = {Single-cell analysis of mixed-lineage states leading to a binary cell fate choice}, - author = {Andre Olsson and Meenakshi Venkatasubramanian and Viren K. Chaudhri and Bruce J. Aronow and Nathan Salomonis and Harinder Singh and H. Leighton Grimes}, - year = {2016}, - month = {Aug.}, - journal = {Nature}, - publisher = {Springer Science and Business Media {LLC}}, - volume = {537}, - number = {7622}, - pages = {698--702}, - doi = {10.1038/nature19348}, - url = {https://doi.org/10.1038/nature19348} -} - - -@misc{openproblems, - title = {Open Problems}, - author = {{Open Problems for Single Cell Analysis Consortium}}, - year = {2022}, - url = {https://openproblems.bio} -} - - -@article{palla2022squidpy, - title={Squidpy: a scalable framework for spatial omics analysis}, - author={Palla, Giovanni and Spitzer, Hannah and Klein, Michal and Fischer, David and Schaar, Anna Christina and Kuemmerle, Louis Benedikt and Rybakov, Sergei and Ibarra, Ignacio L and Holmberg, Olle and Virshup, Isaac and others}, - journal={Nature methods}, - volume={19}, - number={2}, - pages={171--178}, - year={2022}, - publisher={Nature Publishing Group US New York}, - doi={10.1038/s41592-021-01358-2} -} - - -@article{pearson1895regression, - doi = {10.1098/rspl.1895.0041}, - title = {VII. Note on regression and inheritance in the case of two parents}, - author = {Pearson, Karl}, - journal = {proceedings of the royal society of London}, - volume = {58}, - number = {347-352}, - pages = {240--242}, - year = {1895}, - publisher = {The Royal Society London} -} - - -@article{pearson1901pca, - title = {On lines and planes of closest fit to systems of points in space}, - author = {Karl Pearson}, - year = {1901}, - month = {Nov.}, - journal = {The London, Edinburgh, and Dublin Philosophical Magazine and Journal of Science}, - publisher = {Informa {UK} Limited}, - volume = {2}, - number = {11}, - pages = {559--572}, - doi = {10.1080/14786440109462720}, - url = {https://doi.org/10.1080/14786440109462720} -} - - -@article{pliner2019supervised, - title = {Supervised classification enables rapid annotation of cell atlases}, - author = {Hannah A. Pliner and Jay Shendure and Cole Trapnell}, - year = {2019}, - month = {Sept.}, - journal = {Nature Methods}, - publisher = {Springer Science and Business Media {LLC}}, - volume = {16}, - number = {10}, - pages = {983--986}, - doi = {10.1038/s41592-019-0535-3}, - url = {https://doi.org/10.1038/s41592-019-0535-3} -} - - -@article{polanski2020bbknn, - title = {{BBKNN}: fast batch alignment of single cell transcriptomes}, - author = {Krzysztof Pola{\'{n}}ski and Matthew D Young and Zhichao Miao and Kerstin B Meyer and Sarah A Teichmann and Jong-Eun Park}, - year = {2019}, - month = {Aug.}, - journal = {Bioinformatics}, - publisher = {Oxford University Press ({OUP})}, - doi = {10.1093/bioinformatics/btz625}, - url = {https://doi.org/10.1093/bioinformatics/btz625}, - editor = {Bonnie Berger} -} - - -@article{raredon2022computation, - title = {Computation and visualization of cell{\textendash}cell signaling topologies in single-cell systems data using Connectome}, - author = {Micha Sam Brickman Raredon and Junchen Yang and James Garritano and Meng Wang and Dan Kushnir and Jonas Christian Schupp and Taylor S. Adams and Allison M. Greaney and Katherine L. Leiby and Naftali Kaminski and Yuval Kluger and Andre Levchenko and Laura E. Niklason}, - year = {2022}, - month = {Mar.}, - journal = {Scientific Reports}, - publisher = {Springer Science and Business Media {LLC}}, - volume = {12}, - number = {1}, - doi = {10.1038/s41598-022-07959-x}, - url = {https://doi.org/10.1038/s41598-022-07959-x} -} - - -@article{rodriques2019slide, - title = {Slide-seq: A scalable technology for measuring genome-wide expression at high spatial resolution}, - author = {Samuel G. Rodriques and Robert R. Stickels and Aleksandrina Goeva and Carly A. Martin and Evan Murray and Charles R. Vanderburg and Joshua Welch and Linlin M. Chen and Fei Chen and Evan Z. Macosko}, - year = {2019}, - month = {Mar.}, - journal = {Science}, - publisher = {American Association for the Advancement of Science ({AAAS})}, - volume = {363}, - number = {6434}, - pages = {1463--1467}, - doi = {10.1126/science.aaw1219}, - url = {https://doi.org/10.1126/science.aaw1219} -} - - -@article{russell2023slide, - title = {Slide-tags enables single-nucleus barcoding for multimodal spatial genomics}, - volume = {625}, - ISSN = {1476-4687}, - url = {http://dx.doi.org/10.1038/s41586-023-06837-4}, - DOI = {10.1038/s41586-023-06837-4}, - number = {7993}, - journal = {Nature}, - publisher = {Springer Science and Business Media LLC}, - author = {Russell, Andrew J. C. and Weir, Jackson A. and Nadaf, Naeem M. and Shabet, Matthew and Kumar, Vipin and Kambhampati, Sandeep and Raichur, Ruth and Marrero, Giovanni J. and Liu, Sophia and Balderrama, Karol S. and Vanderburg, Charles R. and Shanmugam, Vignesh and Tian, Luyi and Iorgulescu, J. Bryan and Yoon, Charles H. and Wu, Catherine J. and Macosko, Evan Z. and Chen, Fei}, - year = {2023}, - month = dec, - pages = {101–109} -} - - -@InProceedings{santos2009on, - author = {Santos, Jorge M. and Embrechts, Mark"}, - editor = {Alippi, Cesare and Polycarpou, Marios and Panayiotou, Christos and Ellinas, Georgios}, - title = {On the Use of the Adjusted Rand Index as a Metric for Evaluating Supervised Classification}, - booktitle = {Artificial Neural Networks -- ICANN 2009}, - year = {2009}, - publisher = {Springer Berlin Heidelberg}, - address = {Berlin, Heidelberg}, - pages = {175--184}, - isbn = {978-3-642-04277-5}, - doi = {10.1007/978-3-642-04277-5_18}, - url = {https://doi.org/10.1007/978-3-642-04277-5_18} -} - - -@article{sarkar2021separating, - title = {Separating measurement and expression models clarifies confusion in single-cell {RNA} sequencing analysis}, - author = {Abhishek Sarkar and Matthew Stephens}, - year = {2021}, - month = {May}, - journal = {Nature Genetics}, - publisher = {Springer Science and Business Media {LLC}}, - volume = {53}, - number = {6}, - pages = {770--777}, - doi = {10.1038/s41588-021-00873-4}, - url = {https://doi.org/10.1038/s41588-021-00873-4} -} - - -@article{schober2018correlation, - title = {Correlation Coefficients}, - author = {Patrick Schober and Christa Boer and Lothar A. Schwarte}, - year = {2018}, - month = {May}, - journal = {Anesthesia {\&} Analgesia}, - publisher = {Ovid Technologies (Wolters Kluwer Health)}, - volume = {126}, - number = {5}, - pages = {1763--1768}, - doi = {10.1213/ane.0000000000002864}, - url = {https://doi.org/10.1213/ane.0000000000002864} -} - - -@string{sep = {Sept.}} - - -@inproceedings{stanley2020harmonic, - title = {Harmonic Alignment}, - author = {Jay S. Stanley and Scott Gigante and Guy Wolf and Smita Krishnaswamy}, - year = {2020}, - month = {Jan}, - booktitle = {Proceedings of the 2020 {SIAM} International Conference on Data Mining}, - publisher = {Society for Industrial and Applied Mathematics}, - pages = {316--324}, - doi = {10.1137/1.9781611976236.36}, - url = {https://doi.org/10.1137/1.9781611976236.36} -} - - -@article{stickels2020highly, - title = {Highly sensitive spatial transcriptomics at near-cellular resolution with Slide-seqV2}, - volume = {39}, - ISSN = {1546-1696}, - url = {http://dx.doi.org/10.1038/s41587-020-0739-1}, - DOI = {10.1038/s41587-020-0739-1}, - number = {3}, - journal = {Nature Biotechnology}, - publisher = {Springer Science and Business Media LLC}, - author = {Stickels, Robert R. and Murray, Evan and Kumar, Pawan and Li, Jilong and Marshall, Jamie L. and Di Bella, Daniela J. and Arlotta, Paola and Macosko, Evan Z. and Chen, Fei}, - year = {2020}, - month = dec, - pages = {313–319} -} - - -@article{stoeckius2017simultaneous, - title = {Simultaneous epitope and transcriptome measurement in single cells}, - author = {Marlon Stoeckius and Christoph Hafemeister and William Stephenson and Brian Houck-Loomis and Pratip K Chattopadhyay and Harold Swerdlow and Rahul Satija and Peter Smibert}, - year = {2017}, - month = {Jul.}, - journal = {Nature Methods}, - publisher = {Springer Science and Business Media {LLC}}, - volume = {14}, - number = {9}, - pages = {865--868}, - doi = {10.1038/nmeth.4380}, - url = {https://doi.org/10.1038/nmeth.4380} -} - - -@article{stuart2019comprehensive, - title = {Comprehensive Integration of Single-Cell Data}, - author = {Stuart, T. and Butler, A. and Hoffman, P. and Hafemeister, C. and Papalexi, E. and Mauck, W.M. and Hao, Y. and Stoeckius, M. and Smibert, P. and Satija, R.}, - year = {2019}, - journal = {Cell}, - volume = {177}, - number = {7}, - pages = {1888--1902.e21}, - doi = {10.1016/j.cell.2019.05.031} -} - - -@article{sun2020statistical, - title={Statistical analysis of spatial expression patterns for spatially resolved transcriptomic studies}, - author={Sun, Shiquan and Zhu, Jiaqiang and Zhou, Xiang}, - journal={Nature methods}, - volume={17}, - number={2}, - pages={193--200}, - year={2020}, - publisher={Nature Publishing Group US New York}, - doi={10.1038/s41592-019-0701-7} -} - - -@article{svensson2018spatialde, - title={SpatialDE: identification of spatially variable genes}, - author={Svensson, Valentine and Teichmann, Sarah A and Stegle, Oliver}, - journal={Nature methods}, - volume={15}, - number={5}, - pages={343--346}, - year={2018}, - publisher={Nature Publishing Group}, - doi={10.1038/nmeth.4636} -} - - -@article{szubert2019structurepreserving, - title = {Structure-preserving visualisation of high dimensional single-cell datasets}, - author = {Benjamin Szubert and Jennifer E. Cole and Claudia Monaco and Ignat Drozdov}, - year = {2019}, - month = {Jun.}, - journal = {Scientific Reports}, - publisher = {Springer Science and Business Media {LLC}}, - volume = {9}, - number = {1}, - doi = {10.1038/s41598-019-45301-0}, - url = {https://doi.org/10.1038/s41598-019-45301-0} -} - - -@article{tabula2018single, - title = {Single-cell transcriptomics of 20 mouse organs creates a Tabula Muris}, - author = {{Tabula Muris Consortium}}, - year = {2018}, - month = {Oct.}, - journal = {Nature}, - publisher = {Springer Science and Business Media {LLC}}, - volume = {562}, - number = {7727}, - pages = {367--372}, - doi = {10.1038/s41586-018-0590-4}, - url = {https://doi.org/10.1038/s41586-018-0590-4} -} - - -@article{tabula2020single, - title = {A single-cell transcriptomic atlas characterizes ageing tissues in the mouse}, - author = {{Tabula Muris Consortium}}, - year = {2020}, - month = {Jul.}, - journal = {Nature}, - publisher = {Springer Science and Business Media {LLC}}, - volume = {583}, - number = {7817}, - pages = {590--595}, - doi = {10.1038/s41586-020-2496-1}, - url = {https://doi.org/10.1038/s41586-020-2496-1} -} - - -@article{tasic2016adult, - title = {Adult mouse cortical cell taxonomy revealed by single cell transcriptomics}, - author = {Bosiljka Tasic and Vilas Menon and Thuc Nghi Nguyen and Tae Kyung Kim and Tim Jarsky and Zizhen Yao and Boaz Levi and Lucas T Gray and Staci A Sorensen and Tim Dolbeare and Darren Bertagnolli and Jeff Goldy and Nadiya Shapovalova and Sheana Parry and Changkyu Lee and Kimberly Smith and Amy Bernard and Linda Madisen and Susan M Sunkin and Michael Hawrylycz and Christof Koch and Hongkui Zeng}, - year = {2016}, - month = {Jan}, - journal = {Nature Neuroscience}, - publisher = {Springer Science and Business Media {LLC}}, - volume = {19}, - number = {2}, - pages = {335--346}, - doi = {10.1038/nn.4216}, - url = {https://doi.org/10.1038/nn.4216} -} - - -@article{tian2019benchmarking, - title = {Benchmarking single cell {RNA}-sequencing analysis pipelines using mixture control experiments}, - author = {Luyi Tian and Xueyi Dong and Saskia Freytag and Kim-Anh L{\^{e}} Cao and Shian Su and Abolfazl JalalAbadi and Daniela Amann-Zalcenstein and Tom S. Weber and Azadeh Seidi and Jafar S. Jabbari and Shalin H. Naik and Matthew E. Ritchie}, - year = {2019}, - month = {May}, - journal = {Nature Methods}, - publisher = {Springer Science and Business Media {LLC}}, - volume = {16}, - number = {6}, - pages = {479--487}, - doi = {10.1038/s41592-019-0425-8}, - url = {https://doi.org/10.1038/s41592-019-0425-8} -} - - -@article{tran2020benchmark, - doi = {10.1186/s13059-019-1850-9}, - url = {https://doi.org/10.1186/s13059-019-1850-9}, - year = {2020}, - month = {Jan}, - publisher = {Springer Science and Business Media {LLC}}, - volume = {21}, - number = {1}, - author = {Hoa Thi Nhu Tran and Kok Siong Ang and Marion Chevrier and Xiaomeng Zhang and Nicole Yee Shin Lee and Michelle Goh and Jinmiao Chen}, - title = {A benchmark of batch-effect correction methods for single-cell {RNA} sequencing data}, - journal = {Genome Biology} -} - - -@article{van2018recovering, - title = {Recovering Gene Interactions from Single-Cell Data Using Data Diffusion}, - author = {David van Dijk and Roshan Sharma and Juozas Nainys and Kristina Yim and Pooja Kathail and Ambrose J. Carr and Cassandra Burdziak and Kevin R. Moon and Christine L. Chaffer and Diwakar Pattabiraman and Brian Bierie and Linas Mazutis and Guy Wolf and Smita Krishnaswamy and Dana Pe'er}, - year = {2018}, - month = {Jul.}, - journal = {Cell}, - publisher = {Elsevier {BV}}, - volume = {174}, - number = {3}, - pages = {716--729.e27}, - doi = {10.1016/j.cell.2018.05.061}, - url = {https://doi.org/10.1016/j.cell.2018.05.061} -} - - -@article{vandermaaten2008visualizing, - title = {Visualizing Data using t-SNE}, - author = {{van der} Maaten, Laurens and Hinton, Geoffrey}, - year = {2008}, - journal = {Journal of Machine Learning Research}, - volume = {9}, - number = {86}, - pages = {2579--2605}, - url = {http://jmlr.org/papers/v9/vandermaaten08a.html} -} - - -@inproceedings{venna2001neighborhood, - title = {Neighborhood Preservation in Nonlinear Projection Methods: An Experimental Study}, - author = {Jarkko Venna and Samuel Kaski}, - year = {2001}, - booktitle = {Artificial Neural Networks {\textemdash} {ICANN} 2001}, - publisher = {Springer Berlin Heidelberg}, - pages = {485--491}, - doi = {{10.1007/3-540-44668-0\_68}}, - url = {{https://doi.org/10.1007/3-540-44668-0\_68}} -} - - -@article{venna2006local, - title = {Local multidimensional scaling}, - author = {Jarkko Venna and Samuel Kaski}, - year = {2006}, - month = {Jul.}, - journal = {Neural Networks}, - publisher = {Elsevier {BV}}, - volume = {19}, - number = {6-7}, - pages = {889--899}, - doi = {10.1016/j.neunet.2006.05.014}, - url = {https://doi.org/10.1016/j.neunet.2006.05.014} -} - - -@article{virshup2021anndataannotateddata, - doi = {10.1101/2021.12.16.473007}, - url = {https://doi.org/10.1101/2021.12.16.473007}, - year = {2021}, - month = {Dec.}, - publisher = {Cold Spring Harbor Laboratory}, - author = {Isaac Virshup and Sergei Rybakov and Fabian J. Theis and Philipp Angerer and F. Alexander Wolf}, - title = {anndata: Annotated data} -} - - -@article{wagner2018knearest, - title = {K-nearest neighbor smoothing for high-throughput single-cell RNA-Seq data}, - author = {Wagner, Florian and Yan, Yun and Yanai, Itai}, - year = {2018}, - journal = {bioRxiv}, - publisher = {Cold Spring Harbor Laboratory}, - doi = {10.1101/217737}, - url = {https://www.biorxiv.org/content/early/2018/04/09/217737}, - elocation-id = {217737}, - eprint = {https://www.biorxiv.org/content/early/2018/04/09/217737.full.pdf} -} - - -@article{wagner2018single, - title = {Single-cell mapping of gene expression landscapes and lineage in the zebrafish embryo}, - author = {Daniel E. Wagner and Caleb Weinreb and Zach M. Collins and James A. Briggs and Sean G. Megason and Allon M. Klein}, - year = {2018}, - month = {Jun.}, - journal = {Science}, - publisher = {American Association for the Advancement of Science ({AAAS})}, - volume = {360}, - number = {6392}, - pages = {981--987}, - doi = {10.1126/science.aar4362}, - url = {https://doi.org/10.1126/science.aar4362} -} - - -@article{wang2013target, - title = {Target analysis by integration of transcriptome and {ChIP}-seq data with {BETA}}, - author = {Su Wang and Hanfei Sun and Jian Ma and Chongzhi Zang and Chenfei Wang and Juan Wang and Qianzi Tang and Clifford A Meyer and Yong Zhang and X Shirley Liu}, - year = {2013}, - month = {Nov.}, - journal = {Nature Protocols}, - publisher = {Springer Science and Business Media {LLC}}, - volume = {8}, - number = {12}, - pages = {2502--2515}, - doi = {10.1038/nprot.2013.150}, - url = {https://doi.org/10.1038/nprot.2013.150} -} - - -@article{wang2017visualization, - title = {Visualization and analysis of single-cell {RNA}-seq data by kernel-based similarity learning}, - volume = {14}, - copyright = {2017 Springer Nature America, Inc.}, - issn = {1548-7105}, - url = {https://www.nature.com/articles/nmeth.4207}, - doi = {10.1038/nmeth.4207}, - abstract = {The SIMLR software identifies similarities between cells across a range of single-cell RNA-seq data, enabling effective dimension reduction, clustering and visualization.}, - language = {en}, - number = {4}, - journal = {Nature Methods}, - author = {Wang, Bo and Zhu, Junjie and Pierson, Emma and Ramazzotti, Daniele and Batzoglou, Serafim}, - month = apr, - year = {2017}, - publisher = {Nature Publishing Group}, - keywords = {Gene expression, Genome informatics, Machine learning, Statistical methods}, - pages = {414--416}, -} - - -@article{wang2018three, - title = {Three-dimensional intact-tissue sequencing of single-cell transcriptional states}, - volume = {361}, - ISSN = {1095-9203}, - url = {http://dx.doi.org/10.1126/science.aat5691}, - DOI = {10.1126/science.aat5691}, - number = {6400}, - journal = {Science}, - publisher = {American Association for the Advancement of Science (AAAS)}, - author = {Wang, Xiao and Allen, William E. and Wright, Matthew A. and Sylwestrak, Emily L. and Samusik, Nikolay and Vesuna, Sam and Evans, Kathryn and Liu, Cindy and Ramakrishnan, Charu and Liu, Jia and Nolan, Garry P. and Bava, Felice-Alessio and Deisseroth, Karl}, - year = {2018}, - month = jul -} - - -@article{wang2022high, - title = {High-resolution 3D spatiotemporal transcriptomic maps of developing Drosophila embryos and larvae}, - volume = {57}, - ISSN = {1534-5807}, - url = {http://dx.doi.org/10.1016/j.devcel.2022.04.006}, - DOI = {10.1016/j.devcel.2022.04.006}, - number = {10}, - journal = {Developmental Cell}, - publisher = {Elsevier BV}, - author = {Wang, Mingyue and Hu, Qinan and Lv, Tianhang and Wang, Yuhang and Lan, Qing and Xiang, Rong and Tu, Zhencheng and Wei, Yanrong and Han, Kai and Shi, Chang and Guo, Junfu and Liu, Chao and Yang, Tao and Du, Wensi and An, Yanru and Cheng, Mengnan and Xu, Jiangshan and Lu, Haorong and Li, Wangsheng and Zhang, Shaofang and Chen, Ao and Chen, Wei and Li, Yuxiang and Wang, Xiaoshan and Xu, Xun and Hu, Yuhui and Liu, Longqi}, - year = {2022}, - month = may, - pages = {1271--1283.e4} -} - - -@article{weber2023nnsvg, - title={nnSVG for the scalable identification of spatially variable genes using nearest-neighbor Gaussian processes}, - author={Weber, Lukas M and Saha, Arkajyoti and Datta, Abhirup and Hansen, Kasper D and Hicks, Stephanie C}, - journal={Nature communications}, - volume={14}, - number={1}, - pages={4059}, - year={2023}, - publisher={Nature Publishing Group UK London}, - doi={10.1038/s41467-023-39748-z} -} - - -@article{welch2019single, - title = {Single-Cell Multi-omic Integration Compares and Contrasts Features of Brain Cell Identity}, - author = {Joshua D. Welch and Velina Kozareva and Ashley Ferreira and Charles Vanderburg and Carly Martin and Evan Z. Macosko}, - year = {2019}, - month = {Jun.}, - journal = {Cell}, - publisher = {Elsevier {BV}}, - volume = {177}, - number = {7}, - pages = {1873--1887.e17}, - doi = {10.1016/j.cell.2019.05.006}, - url = {https://doi.org/10.1016/j.cell.2019.05.006} -} - - -@article{wilkinson1973symbolic, - doi = {10.2307/2346786}, - url = {https://doi.org/10.2307/2346786}, - year = {1973}, - publisher = {{JSTOR}}, - volume = {22}, - number = {3}, - pages = {392}, - author = {G. N. Wilkinson and C. E. Rogers}, - title = {Symbolic Description of Factorial Models for Analysis of Variance}, - journal = {Applied Statistics} -} - - -@article{wu2021single, - title = {A single-cell and spatially resolved atlas of human breast cancers}, - author = {Sunny Z. Wu and Ghamdan Al-Eryani and Daniel Lee Roden and Simon Junankar and Kate Harvey and Alma Andersson and Aatish Thennavan and Chenfei Wang and James R. Torpy and Nenad Bartonicek and Taopeng Wang and Ludvig Larsson and Dominik Kaczorowski and Neil I. Weisenfeld and Cedric R. Uytingco and Jennifer G. Chew and Zachary W. Bent and Chia-Ling Chan and Vikkitharan Gnanasambandapillai and Charles-Antoine Dutertre and Laurence Gluch and Mun N. Hui and Jane Beith and Andrew Parker and Elizabeth Robbins and Davendra Segara and Caroline Cooper and Cindy Mak and Belinda Chan and Sanjay Warrier and Florent Ginhoux and Ewan Millar and Joseph E. Powell and Stephen R. Williams and X. Shirley Liu and Sandra O'Toole and Elgene Lim and Joakim Lundeberg and Charles M. Perou and Alexander Swarbrick}, - year = {2021}, - month = {Sept.}, - journal = {Nature Genetics}, - publisher = {Springer Science and Business Media {LLC}}, - volume = {53}, - number = {9}, - pages = {1334--1347}, - doi = {10.1038/s41588-021-00911-1}, - url = {https://doi.org/10.1038/s41588-021-00911-1} -} - - -@article{xiong2020neuralee, - title = {{NeuralEE}: A {GPU}-Accelerated Elastic Embedding Dimensionality Reduction Method for Visualizing Large-Scale {scRNA}-Seq Data}, - author = {Jiankang Xiong and Fuzhou Gong and Lin Wan and Liang Ma}, - year = {2020}, - month = {Oct.}, - journal = {Frontiers in Genetics}, - publisher = {Frontiers Media {SA}}, - volume = {11}, - doi = {10.3389/fgene.2020.00786}, - url = {https://doi.org/10.3389/fgene.2020.00786} -} - - -@article{xiong2021online, - title = {Online single-cell data integration through projecting heterogeneous datasets into a common cell-embedding space}, - author = {Lei Xiong and Kang Tian and Yuzhe Li and Weixi Ning and Xin Gao and Qiangfeng Cliff Zhang}, - year = {2022}, - month = {Oct.}, - journal = {Nature Communications}, - publisher = {Springer Science and Business Media {LLC}}, - volume = {13}, - number = {1}, - doi = {10.1038/s41467-022-33758-z}, - url = {https://doi.org/10.1038/s41467-022-33758-z} -} - - -@article{xu2021probabilistic, - title = {Probabilistic harmonization and annotation of single-cell transcriptomics data with deep generative models}, - author = {Chenling Xu and Romain Lopez and Edouard Mehlman and Jeffrey Regier and Michael I Jordan and Nir Yosef}, - year = {2021}, - month = {Jan}, - journal = {Molecular Systems Biology}, - publisher = {{Embo}}, - volume = {17}, - number = {1}, - doi = {10.15252/msb.20209620}, - url = {https://doi.org/10.15252/msb.20209620} -} - - -@article{zappia2018exploring, - doi = {10.1371/journal.pcbi.1006245}, - url = {https://doi.org/10.1371/journal.pcbi.1006245}, - year = {2018}, - month = {Jun.}, - publisher = {Public Library of Science ({PLoS})}, - volume = {14}, - number = {6}, - pages = {e1006245}, - author = {Luke Zappia and Belinda Phipson and Alicia Oshlack}, - editor = {Dina Schneidman}, - title = {Exploring the single-cell {RNA}-seq analysis landscape with the {scRNA}-tools database}, - journal = {{PLOS} Computational Biology} -} - - -@article{zhang2021pydrmetrics, - title = {{pyDRMetrics} - A Python toolkit for dimensionality reduction quality assessment}, - author = {Yinsheng Zhang and Qian Shang and Guoming Zhang}, - year = {2021}, - month = {Feb.}, - journal = {Heliyon}, - publisher = {Elsevier {BV}}, - volume = {7}, - number = {2}, - pages = {e06199}, - doi = {10.1016/j.heliyon.2021.e06199}, - url = {https://doi.org/10.1016/j.heliyon.2021.e06199} -} - - -@article{zhang2022identification, - title={Identification of spatially variable genes with graph cuts}, - author={Zhang, Ke and Feng, Wanwan and Wang, Peng}, - journal={Nature Communications}, - volume={13}, - number={1}, - pages={5488}, - year={2022}, - publisher={Nature Publishing Group UK London}, - doi={10.1038/s41467-022-33182-3} -} - - -@article{zhu2021spark, - title={SPARK-X: non-parametric modeling enables scalable and robust detection of spatial expression patterns for large spatial transcriptomic studies}, - author={Zhu, Jiaqiang and Sun, Shiquan and Zhou, Xiang}, - journal={Genome biology}, - volume={22}, - number={1}, - pages={184}, - year={2021}, - publisher={Springer}, - doi={10.1186/s13059-021-02404-0} -} - - -@article {hrovatin2023delineating, - author = {Karin Hrovatin and Aim{\'e}e Bastidas-Ponce and Mostafa Bakhti and Luke Zappia and Maren B{\"u}ttner and Ciro Sallino and Michael Sterr and Anika B{\"o}ttcher and Adriana Migliorini and Heiko Lickert and Fabian J. Theis}, - title = {Delineating mouse β-cell identity during lifetime and in diabetes with a single cell atlas}, - elocation-id = {2022.12.22.521557}, - year = {2023}, - doi = {10.1101/2022.12.22.521557}, - publisher = {Cold Spring Harbor Laboratory}, - URL = {https://www.biorxiv.org/content/early/2023/04/25/2022.12.22.521557}, - eprint = {https://www.biorxiv.org/content/early/2023/04/25/2022.12.22.521557.full.pdf}, - journal = {bioRxiv} -} - -@article{sikkema2023integrated, - title = {An integrated cell atlas of the lung in health and disease}, - volume = {29}, - ISSN = {1546-170X}, - url = {http://dx.doi.org/10.1038/s41591-023-02327-2}, - DOI = {10.1038/s41591-023-02327-2}, - number = {6}, - journal = {Nature Medicine}, - publisher = {Springer Science and Business Media LLC}, - author = {Sikkema, Lisa and Ramírez-Suástegui, Ciro and Strobl, Daniel C. and Gillett, Tessa E. and Zappia, Luke and Madissoon, Elo and Markov, Nikolay S. and Zaragosi, Laure-Emmanuelle and Ji, Yuge and Ansari, Meshal and Arguel, Marie-Jeanne and Apperloo, Leonie and Banchero, Martin and Bécavin, Christophe and Berg, Marijn and Chichelnitskiy, Evgeny and Chung, Mei-i and Collin, Antoine and Gay, Aurore C. A. and Gote-Schniering, Janine and Hooshiar Kashani, Baharak and Inecik, Kemal and Jain, Manu and Kapellos, Theodore S. and Kole, Tessa M. and Leroy, Sylvie and Mayr, Christoph H. and Oliver, Amanda J. and von Papen, Michael and Peter, Lance and Taylor, Chase J. and Walzthoeni, Thomas and Xu, Chuan and Bui, Linh T. and De Donno, Carlo and Dony, Leander and Faiz, Alen and Guo, Minzhe and Gutierrez, Austin J. and Heumos, Lukas and Huang, Ni and Ibarra, Ignacio L. and Jackson, Nathan D. and Kadur Lakshminarasimha Murthy, Preetish and Lotfollahi, Mohammad and Tabib, Tracy and Talavera-López, Carlos and Travaglini, Kyle J. and Wilbrey-Clark, Anna and Worlock, Kaylee B. and Yoshida, Masahiro and Chen, Yuexin and Hagood, James S. and Agami, Ahmed and Horvath, Peter and Lundeberg, Joakim and Marquette, Charles-Hugo and Pryhuber, Gloria and Samakovlis, Chistos and Sun, Xin and Ware, Lorraine B. and Zhang, Kun and van den Berge, Maarten and Bossé, Yohan and Desai, Tushar J. and Eickelberg, Oliver and Kaminski, Naftali and Krasnow, Mark A. and Lafyatis, Robert and Nikolic, Marko Z. and Powell, Joseph E. and Rajagopal, Jayaraj and Rojas, Mauricio and Rozenblatt-Rosen, Orit and Seibold, Max A. and Sheppard, Dean and Shepherd, Douglas P. and Sin, Don D. and Timens, Wim and Tsankov, Alexander M. and Whitsett, Jeffrey and Xu, Yan and Banovich, Nicholas E. and Barbry, Pascal and Duong, Thu Elizabeth and Falk, Christine S. and Meyer, Kerstin B. and Kropski, Jonathan A. and Pe’er, Dana and Schiller, Herbert B. and Tata, Purushothama Rao and Schultze, Joachim L. and Teichmann, Sara A. and Misharin, Alexander V. and Nawijn, Martijn C. and Luecken, Malte D. and Theis, Fabian J.}, - year = {2023}, - month = jun, - pages = {1563–1577} -} - -@article{consortium2022tabula, - title = {The Tabula Sapiens: A multiple-organ, single-cell transcriptomic atlas of humans}, - volume = {376}, - ISSN = {1095-9203}, - url = {http://dx.doi.org/10.1126/science.abl4896}, - DOI = {10.1126/science.abl4896}, - number = {6594}, - journal = {Science}, - publisher = {American Association for the Advancement of Science (AAAS)}, - author = {Jones, Robert C. and Karkanias, Jim and Krasnow, Mark A. and Pisco, Angela Oliveira and Quake, Stephen R. and Salzman, Julia and Yosef, Nir and Bulthaup, Bryan and Brown, Phillip and Harper, William and Hemenez, Marisa and Ponnusamy, Ravikumar and Salehi, Ahmad and Sanagavarapu, Bhavani A. and Spallino, Eileen and Aaron, Ksenia A. and Concepcion, Waldo and Gardner, James M. and Kelly, Burnett and Neidlinger, Nikole and Wang, Zifa and Crasta, Sheela and Kolluru, Saroja and Morri, Maurizio and Pisco, Angela Oliveira and Tan, Serena Y. and Travaglini, Kyle J. and Xu, Chenling and Alcántara-Hernández, Marcela and Almanzar, Nicole and Antony, Jane and Beyersdorf, Benjamin and Burhan, Deviana and Calcuttawala, Kruti and Carter, Matthew M. and Chan, Charles K. F. and Chang, Charles A. and Chang, Stephen and Colville, Alex and Crasta, Sheela and Culver, Rebecca N. and Cvijović, Ivana and D’Amato, Gaetano and Ezran, Camille and Galdos, Francisco X. and Gillich, Astrid and Goodyer, William R. and Hang, Yan and Hayashi, Alyssa and Houshdaran, Sahar and Huang, Xianxi and Irwin, Juan C. and Jang, SoRi and Juanico, Julia Vallve and Kershner, Aaron M. and Kim, Soochi and Kiss, Bernhard and Kolluru, Saroja and Kong, William and Kumar, Maya E. and Kuo, Angera H. and Leylek, Rebecca and Li, Baoxiang and Loeb, Gabriel B. and Lu, Wan-Jin and Mantri, Sruthi and Markovic, Maxim and McAlpine, Patrick L. and de Morree, Antoine and Morri, Maurizio and Mrouj, Karim and Mukherjee, Shravani and Muser, Tyler and Neuh\"{o}fer, Patrick and Nguyen, Thi D. and Perez, Kimberly and Phansalkar, Ragini and Pisco, Angela Oliveira and Puluca, Nazan and Qi, Zhen and Rao, Poorvi and Raquer-McKay, Hayley and Schaum, Nicholas and Scott, Bronwyn and Seddighzadeh, Bobak and Segal, Joe and Sen, Sushmita and Sikandar, Shaheen and Spencer, Sean P. and Steffes, Lea C. and Subramaniam, Varun R. and Swarup, Aditi and Swift, Michael and Travaglini, Kyle J. and Van Treuren, Will and Trimm, Emily and Veizades, Stefan and Vijayakumar, Sivakamasundari and Vo, Kim Chi and Vorperian, Sevahn K. and Wang, Wanxin and Weinstein, Hannah N. W. and Winkler, Juliane and Wu, Timothy T. H. and Xie, Jamie and Yung, Andrea R. and Zhang, Yue and Detweiler, Angela M. and Mekonen, Honey and Neff, Norma F. and Sit, Rene V. and Tan, Michelle and Yan, Jia and Bean, Gregory R. and Charu, Vivek and Forgó, Erna and Martin, Brock A. and Ozawa, Michael G. and Silva, Oscar and Tan, Serena Y. and Toland, Angus and Vemuri, Venkata N. P. and Afik, Shaked and Awayan, Kyle and Botvinnik, Olga Borisovna and Byrne, Ashley and Chen, Michelle and Dehghannasiri, Roozbeh and Detweiler, Angela M. and Gayoso, Adam and Granados, Alejandro A. and Li, Qiqing and Mahmoudabadi, Gita and McGeever, Aaron and de Morree, Antoine and Olivieri, Julia Eve and Park, Madeline and Pisco, Angela Oliveira and Ravikumar, Neha and Salzman, Julia and Stanley, Geoff and Swift, Michael and Tan, Michelle and Tan, Weilun and Tarashansky, Alexander J. and Vanheusden, Rohan and Vorperian, Sevahn K. and Wang, Peter and Wang, Sheng and Xing, Galen and Xu, Chenling and Yosef, Nir and Alcántara-Hernández, Marcela and Antony, Jane and Chan, Charles K. F. and Chang, Charles A. and Colville, Alex and Crasta, Sheela and Culver, Rebecca and Dethlefsen, Les and Ezran, Camille and Gillich, Astrid and Hang, Yan and Ho, Po-Yi and Irwin, Juan C. and Jang, SoRi and Kershner, Aaron M. and Kong, William and Kumar, Maya E. and Kuo, Angera H. and Leylek, Rebecca and Liu, Shixuan and Loeb, Gabriel B. and Lu, Wan-Jin and Maltzman, Jonathan S. and Metzger, Ross J. and de Morree, Antoine and Neuh\"{o}fer, Patrick and Perez, Kimberly and Phansalkar, Ragini and Qi, Zhen and Rao, Poorvi and Raquer-McKay, Hayley and Sasagawa, Koki and Scott, Bronwyn and Sinha, Rahul and Song, Hanbing and Spencer, Sean P. and Swarup, Aditi and Swift, Michael and Travaglini, Kyle J. and Trimm, Emily and Veizades, Stefan and Vijayakumar, Sivakamasundari and Wang, Bruce and Wang, Wanxin and Winkler, Juliane and Xie, Jamie and Yung, Andrea R. and Artandi, Steven E. and Beachy, Philip A. and Clarke, Michael F. and Giudice, Linda C. and Huang, Franklin W. and Huang, Kerwyn Casey and Idoyaga, Juliana and Kim, Seung K. and Krasnow, Mark and Kuo, Christin S. and Nguyen, Patricia and Quake, Stephen R. and Rando, Thomas A. and Red-Horse, Kristy and Reiter, Jeremy and Relman, David A. and Sonnenburg, Justin L. and Wang, Bruce and Wu, Albert and Wu, Sean M. and Wyss-Coray, Tony}, - year = {2022}, - month = may -} - -@article{dominguez2022crosstissue, - title = {Cross-tissue immune cell analysis reveals tissue-specific features in humans}, - volume = {376}, - ISSN = {1095-9203}, - url = {http://dx.doi.org/10.1126/science.abl5197}, - DOI = {10.1126/science.abl5197}, - number = {6594}, - journal = {Science}, - publisher = {American Association for the Advancement of Science (AAAS)}, - author = {Domínguez Conde, C. and Xu, C. and Jarvis, L. B. and Rainbow, D. B. and Wells, S. B. and Gomes, T. and Howlett, S. K. and Suchanek, O. and Polanski, K. and King, H. W. and Mamanova, L. and Huang, N. and Szabo, P. A. and Richardson, L. and Bolt, L. and Fasouli, E. S. and Mahbubani, K. T. and Prete, M. and Tuck, L. and Richoz, N. and Tuong, Z. K. and Campos, L. and Mousa, H. S. and Needham, E. J. and Pritchard, S. and Li, T. and Elmentaite, R. and Park, J. and Rahmani, E. and Chen, D. and Menon, D. K. and Bayraktar, O. A. and James, L. K. and Meyer, K. B. and Yosef, N. and Clatworthy, M. R. and Sims, P. A. and Farber, D. L. and Saeb-Parsy, K. and Jones, J. L. and Teichmann, S. A.}, - year = {2022}, - month = may -} - -@article{eraslan2022singlenucleus, - title = {Single-nucleus cross-tissue molecular reference maps toward understanding disease gene function}, - volume = {376}, - ISSN = {1095-9203}, - url = {http://dx.doi.org/10.1126/science.abl4290}, - DOI = {10.1126/science.abl4290}, - number = {6594}, - journal = {Science}, - publisher = {American Association for the Advancement of Science (AAAS)}, - author = {Eraslan, G\"{o}kcen and Drokhlyansky, Eugene and Anand, Shankara and Fiskin, Evgenij and Subramanian, Ayshwarya and Slyper, Michal and Wang, Jiali and Van Wittenberghe, Nicholas and Rouhana, John M. and Waldman, Julia and Ashenberg, Orr and Lek, Monkol and Dionne, Danielle and Win, Thet Su and Cuoco, Michael S. and Kuksenko, Olena and Tsankov, Alexander M. and Branton, Philip A. and Marshall, Jamie L. and Greka, Anna and Getz, Gad and Segrè, Ayellet V. and Aguet, Fran\c{c}ois and Rozenblatt-Rosen, Orit and Ardlie, Kristin G. and Regev, Aviv}, - year = {2022}, - month = may -} - -@article{li2023integrated, - title = {Integrated multi-omics single cell atlas of the human retina}, - url = {http://dx.doi.org/10.1101/2023.11.07.566105}, - DOI = {10.1101/2023.11.07.566105}, - publisher = {Cold Spring Harbor Laboratory}, - author = {Li, Jin and Wang, Jun and Ibarra, Ignacio L and Cheng, Xuesen and Luecken, Malte D and Lu, Jiaxiong and Monavarfeshani, Aboozar and Yan, Wenjun and Zheng, Yiqiao and Zuo, Zhen and Zayas Colborn, Samantha Lynn and Cortez, Berenice Sarahi and Owen, Leah A and Tran, Nicholas M and Shekhar, Karthik and Sanes, Joshua R and Stout, J Timothy and Chen, Shiming and Li, Yumei and DeAngelis, Margaret M and Theis, Fabian J and Chen, Rui}, - year = {2023}, - month = nov -} - -@article{wilson2022multimodal, - title = {Multimodal single cell sequencing implicates chromatin accessibility and genetic background in diabetic kidney disease progression}, - volume = {13}, - ISSN = {2041-1723}, - url = {http://dx.doi.org/10.1038/s41467-022-32972-z}, - DOI = {10.1038/s41467-022-32972-z}, - number = {1}, - journal = {Nature Communications}, - publisher = {Springer Science and Business Media LLC}, - author = {Wilson, Parker C. and Muto, Yoshiharu and Wu, Haojia and Karihaloo, Anil and Waikar, Sushrut S. and Humphreys, Benjamin D.}, - year = {2022}, - month = sep -} - -@article{steuernagel2022hypomap, - title = {HypoMap—a unified single-cell gene expression atlas of the murine hypothalamus}, - volume = {4}, - ISSN = {2522-5812}, - url = {http://dx.doi.org/10.1038/s42255-022-00657-y}, - DOI = {10.1038/s42255-022-00657-y}, - number = {10}, - journal = {Nature Metabolism}, - publisher = {Springer Science and Business Media LLC}, - author = {Steuernagel, Lukas and Lam, Brian Y. H. and Klemm, Paul and Dowsett, Georgina K. C. and Bauder, Corinna A. and Tadross, John A. and Hitschfeld, Tamara Sotelo and del Rio Martin, Almudena and Chen, Weiyi and de Solis, Alain J. and Fenselau, Henning and Davidsen, Peter and Cimino, Irene and Kohnke, Sara N. and Rimmington, Debra and Coll, Anthony P. and Beyer, Andreas and Yeo, Giles S. H. and Br\"{u}ning, Jens C.}, - year = {2022}, - month = oct, - pages = {1402–1419} -} - -@article{tian2023singlecell, - title = {Single-cell DNA methylation and 3D genome architecture in the human brain}, - volume = {382}, - ISSN = {1095-9203}, - url = {http://dx.doi.org/10.1126/science.adf5357}, - DOI = {10.1126/science.adf5357}, - number = {6667}, - journal = {Science}, - publisher = {American Association for the Advancement of Science (AAAS)}, - author = {Tian, Wei and Zhou, Jingtian and Bartlett, Anna and Zeng, Qiurui and Liu, Hanqing and Castanon, Rosa G. and Kenworthy, Mia and Altshul, Jordan and Valadon, Cynthia and Aldridge, Andrew and Nery, Joseph R. and Chen, Huaming and Xu, Jiaying and Johnson, Nicholas D. and Lucero, Jacinta and Osteen, Julia K. and Emerson, Nora and Rink, Jon and Lee, Jasper and Li, Yang E. and Siletti, Kimberly and Liem, Michelle and Claffey, Naomi and O’Connor, Carolyn and Yanny, Anna Marie and Nyhus, Julie and Dee, Nick and Casper, Tamara and Shapovalova, Nadiya and Hirschstein, Daniel and Ding, Song-Lin and Hodge, Rebecca and Levi, Boaz P. and Keene, C. Dirk and Linnarsson, Sten and Lein, Ed and Ren, Bing and Behrens, M. Margarita and Ecker, Joseph R.}, - year = {2023}, - month = oct -} - - -@article{sonrel2023metaanalysis, - title = {Meta-analysis of (single-cell method) benchmarks reveals the need for extensibility and interoperability}, - volume = {24}, - ISSN = {1474-760X}, - url = {http://dx.doi.org/10.1186/s13059-023-02962-5}, - DOI = {10.1186/s13059-023-02962-5}, - number = {1}, - journal = {Genome Biology}, - publisher = {Springer Science and Business Media LLC}, - author = {Sonrel, Anthony and Luetge, Almut and Soneson, Charlotte and Mallona, Izaskun and Germain, Pierre-Luc and Knyazev, Sergey and Gilis, Jeroen and Gerber, Reto and Seurinck, Ruth and Paul, Dominique and Sonder, Emanuel and Crowell, Helena L. and Fanaswala, Imran and Al-Ajami, Ahmad and Heidari, Elyas and Schmeing, Stephan and Milosavljevic, Stefan and Saeys, Yvan and Mangul, Serghei and Robinson, Mark D.}, - year = {2023}, - month = may -} - - -@article{saelens2019comparison, - title = {A comparison of single-cell trajectory inference methods}, - volume = {37}, - ISSN = {1546-1696}, - url = {http://dx.doi.org/10.1038/s41587-019-0071-9}, - DOI = {10.1038/s41587-019-0071-9}, - number = {5}, - journal = {Nature Biotechnology}, - publisher = {Springer Science and Business Media LLC}, - author = {Saelens, Wouter and Cannoodt, Robrecht and Todorov, Helena and Saeys, Yvan}, - year = {2019}, - month = apr, - pages = {547–554} -} - - -@article{huang2018savergene, - title = {SAVER: gene expression recovery for single-cell RNA sequencing}, - volume = {15}, - ISSN = {1548-7105}, - url = {http://dx.doi.org/10.1038/s41592-018-0033-z}, - DOI = {10.1038/s41592-018-0033-z}, - number = {7}, - journal = {Nature Methods}, - publisher = {Springer Science and Business Media LLC}, - author = {Huang, Mo and Wang, Jingshu and Torre, Eduardo and Dueck, Hannah and Shaffer, Sydney and Bonasio, Roberto and Murray, John I. and Raj, Arjun and Li, Mingyao and Zhang, Nancy R.}, - year = {2018}, - month = jun, - pages = {539–542} -} - - -@article{chari2023speciousart, - title = {The specious art of single-cell genomics}, - volume = {19}, - ISSN = {1553-7358}, - url = {http://dx.doi.org/10.1371/journal.pcbi.1011288}, - DOI = {10.1371/journal.pcbi.1011288}, - number = {8}, - journal = {PLOS Computational Biology}, - publisher = {Public Library of Science (PLoS)}, - author = {Chari, Tara and Pachter, Lior}, - editor = {Papin, Jason A.}, - year = {2023}, - month = aug, - pages = {e1011288} -} - diff --git a/src/common/sync_test_resources/config.vsh.yaml b/src/common/sync_test_resources/config.vsh.yaml deleted file mode 100644 index f443d634e8..0000000000 --- a/src/common/sync_test_resources/config.vsh.yaml +++ /dev/null @@ -1,44 +0,0 @@ -functionality: - name: "sync_test_resources" - namespace: "common" - version: "dev" - description: Synchronise the test resources from s3 to resources_test - usage: | - sync_test_resources - sync_test_resources --input s3://openproblems-data/resources_test --output resources_test - arguments: - - name: "--input" - alternatives: ["-i"] - type: string - description: "Path to the S3 bucket to sync from." - default: "s3://openproblems-data/resources_test" - - name: "--output" - alternatives: ["-o"] - type: file - default: resources_test - direction: output - description: "Path to the test resource directory." - - name: "--quiet" - type: boolean_true - description: "Displays the operations that would be performed using the specified command without actually running them." - - name: "--dryrun" - type: boolean_true - description: "Does not display the operations performed from the specified command." - - name: "--delete" - type: boolean_true - description: "Files that exist in the destination but not in the source are deleted during sync." - - name: "--exclude" - type: "string" - multiple: true - description: Exclude all files or objects from the command that matches the specified pattern. - resources: - - type: bash_script - path: script.sh - test_resources: - - type: bash_script - path: run_test.sh -platforms: - - type: docker - image: "amazon/aws-cli:2.7.12" - - type: native - - type: nextflow diff --git a/src/common/sync_test_resources/script.sh b/src/common/sync_test_resources/script.sh deleted file mode 100644 index c97b9fcdfd..0000000000 --- a/src/common/sync_test_resources/script.sh +++ /dev/null @@ -1,34 +0,0 @@ -#!/bin/bash - -## VIASH START -par_input='s3://openproblems-data/resources_test' -par_output='resources_test' -## VIASH END - -extra_params=( ) - -if [ "$par_quiet" == "true" ]; then - extra_params+=( "--quiet" ) -fi -if [ "$par_dryrun" == "true" ]; then - extra_params+=( "--dryrun" ) -fi -if [ "$par_delete" == "true" ]; then - extra_params+=( "--delete" ) -fi - -if [ ! -z ${par_exclude+x} ]; then - IFS=":" - for var in $par_exclude; do - unset IFS - extra_params+=( "--exclude" "$var" ) - done -fi - - -# Disable the use of the Amazon EC2 instance metadata service (IMDS). -# see https://florian.ec/blog/github-actions-awscli-errors/ -# or https://github.com/aws/aws-cli/issues/5234#issuecomment-705831465 -export AWS_EC2_METADATA_DISABLED=true - -aws s3 sync "$par_input" "$par_output" --no-sign-request "${extra_params[@]}" diff --git a/src/files/extract_uns_metadata/config.vsh.yaml b/src/files/extract_uns_metadata/config.vsh.yaml new file mode 100644 index 0000000000..e415b25eac --- /dev/null +++ b/src/files/extract_uns_metadata/config.vsh.yaml @@ -0,0 +1,45 @@ +name: extract_uns_metadata +namespace: files +description: Extract .uns metadata from an h5ad file and write it to a yaml file. +argument_groups: + - name: Inputs + arguments: + - name: --input + type: file + required: true + description: A h5ad file. + - name: --schema + type: file + required: false + description: An optional schema with which to annotate the output + - name: --uns_length_cutoff + type: integer + required: false + description: The maximum length of the .uns metadata to extract. If a value in uns is a list or a dictionary with more elements than the provided cutoff, it will not be extracted. + default: 10 + - name: Output + arguments: + - name: --output + type: file + required: true + description: A yaml file containing the metadata. + example: output_meta.yaml + direction: output +resources: + - type: python_script + path: script.py +test_resources: + - path: /resources_test/common/pancreas + - type: python_script + path: test.py +engines: + - type: docker + image: openproblems/base_python:1.0.0 + test_setup: + - type: python + packages: viashpy +runners: + - type: executable + - type: nextflow + directives: + label: [midtime, midmem, midcpu] diff --git a/src/common/extract_metadata/script.py b/src/files/extract_uns_metadata/script.py similarity index 88% rename from src/common/extract_metadata/script.py rename to src/files/extract_uns_metadata/script.py index 7a55b50e21..5d759b60a6 100644 --- a/src/common/extract_metadata/script.py +++ b/src/files/extract_uns_metadata/script.py @@ -21,6 +21,14 @@ print("Load schema", flush=True) with open(par["schema"], "r") as f: schema = yaml.safe_load(f) + + schema_info = schema.get("info") or {} + assert schema_info, "Schema must contain an 'info' field" + + schema_info_format = schema_info.get("format") or {} + assert schema_info_format, "Schema must contain a '.info.format' field" + + assert schema_info_format.get("type") == "h5ad", ".info.format.type must be 'h5ad'" else: schema = None @@ -114,7 +122,8 @@ def get_structure_dtype(obj) -> str: def get_structure_schema_info(struct, key) -> dict: if schema is None: return {} - struct_args = schema.get("info", {}).get("slots", {}).get(struct, {}) + + struct_args = schema_info_format.get(struct, {}) if struct_args is None: return {} if struct == "X": @@ -149,10 +158,15 @@ def get_structure(adata, struct): # see if the schema has information about this struct schema_info = get_structure_schema_info(struct, key) - if schema_info.get("description"): - out["description"] = schema_info.get("description") - if schema_info.get("type"): - out["schema_type"] = schema_info.get("type") + copy = { + "description": "description", + "summary": "summary", + "label": "label", + "schema_type": "type" + } + for k, v in copy.items(): + if schema_info.get(v): + out[k] = schema_info.get(v) output.append(out) @@ -176,16 +190,15 @@ def get_file_creation_time(path: str) -> str: creation_time = creation_time.strftime('%d-%m-%Y') return str(creation_time) - print("Extract metadata from object", flush=True) # Extract metadata about the adata object uns = {} for key, val in adata.uns.items(): if is_atomic(val): uns[key] = to_atomic(val) - elif is_list_of_atomics(val) and len(val) <= 10: + elif is_list_of_atomics(val) and len(val) <= par["uns_length_cutoff"]: uns[key] = to_list_of_atomics(val) - elif is_dict_of_atomics(val) and len(val) <= 10: + elif is_dict_of_atomics(val) and len(val) <= par["uns_length_cutoff"]: uns[key] = to_dict_of_atomics(val) uns["file_size"] = get_file_size(par["input"]) diff --git a/src/files/extract_uns_metadata/test.py b/src/files/extract_uns_metadata/test.py new file mode 100644 index 0000000000..1884bbd47a --- /dev/null +++ b/src/files/extract_uns_metadata/test.py @@ -0,0 +1,57 @@ +import sys +import pytest +import yaml + +## VIASH START +## VIASH END + +input_path = meta["resources_dir"] + "/pancreas/dataset.h5ad" + +@pytest.fixture +def file_raw(tmp_path): + file_raw_content = { + "type": "file", + "label": "Raw dataset", + "summary": "An unprocessed dataset as output by a dataset loader.", + "description": "This dataset contains raw counts and metadata as output by a dataset loader.", + "info": { + "format": { + "type": "h5ad", + "layers": [ + { + "type": "integer", + "name": "counts", + "description": "Raw counts", + "required": True + } + ], + "obs": [ + { + "type": "string", + "name": "celltype", + "description": "Classification of the cell type based on its characteristics and function within the tissue or organism.", + "required": True + } + ] + } + } + } + file_raw_path = tmp_path / "file_raw.yaml" + with open(file_raw_path, "w") as f: + f.write(yaml.dump(file_raw_content)) + + return file_raw_path + +def test_run(run_component, file_raw, tmp_path): + output_path = tmp_path / "meta.yaml" + + run_component([ + "--input", input_path, + "--schema", str(file_raw), + "--output", str(output_path), + ]) + + assert output_path.exists(), "Output path does not exist" + +if __name__ == "__main__": + sys.exit(pytest.main([__file__])) diff --git a/src/files/validate_dataset_with_schema/config.vsh.yaml b/src/files/validate_dataset_with_schema/config.vsh.yaml new file mode 100644 index 0000000000..bc5e1bb3e4 --- /dev/null +++ b/src/files/validate_dataset_with_schema/config.vsh.yaml @@ -0,0 +1,47 @@ +name: validate_dataset_with_schema +namespace: files +summary: Check the format of a file against a schema +description: Checks if the file has the necessary data structures as defined in a schema. +argument_groups: + - name: Inputs + arguments: + - name: --input + type: file + required: true + description: An input file. Can be an .h5ad, .parquet, .csv, or .tsv file. + - name: --schema + type: file + required: true + description: A schema file for the input object. + - name: Arguments + arguments: + - name: --stop_on_error + type: boolean + default: false + description: Whether or not to stop with exit code 1 if the input file does not adhere to the schema. + - name: Output + arguments: + - name: --output + type: file + required: true + description: If specified, this file will contain a structured log of which checks succeeded (or not). + example: checks.json + direction: output +resources: + - type: python_script + path: script.py +test_resources: + - path: /resources_test/common/pancreas + - type: python_script + path: test.py +engines: + - type: docker + image: openproblems/base_python:1.0.0 + test_setup: + - type: python + packages: viashpy +runners: + - type: executable + - type: nextflow + directives: + label: [midtime, midmem, midcpu] diff --git a/src/files/validate_dataset_with_schema/script.py b/src/files/validate_dataset_with_schema/script.py new file mode 100644 index 0000000000..1768287fa9 --- /dev/null +++ b/src/files/validate_dataset_with_schema/script.py @@ -0,0 +1,93 @@ +import anndata as ad +import pandas as pd +import yaml +import json + +## VIASH START +par = { + 'input': 'work/d4/f4fabc8aa4f2308841d4ab57bcff62/_viash_par/input_1/dataset.h5ad', + 'schema': 'work/d4/f4fabc8aa4f2308841d4ab57bcff62/_viash_par/schema_1/schema.yaml', + 'stop_on_error': False, + 'output': 'work/d4/f4fabc8aa4f2308841d4ab57bcff62/out.yaml', +} +## VIASH END + +# TODO: need to refactor to reuse the same helper functions as in 'run_and_check_output.py'. + +def check_h5ad_struct(struc, struc_fields, adata_slot): + missing = [] + if struc == "X": + struc_fields["name"] = "X" + struc_fields = [struc_fields] + for obj in struc_fields: + adata_data = adata_slot.get(obj['name']) if struc != 'X' else adata_slot + if obj.get('required') and adata_data is None: + missing.append(obj['name']) + # todo: check types + return missing + +def check_df_columns(df, columns): + missing = [] + for col in columns: + if col not in df.columns: + missing.append(col) + return missing + +print("Load schema", flush=True) +with open(par["schema"], "r") as f: + schema = yaml.safe_load(f) + +schema_info = schema.get("info") +assert schema_info, "Schema must contain an 'info' field" + +schema_info_format = schema_info.get("format") +assert schema_info_format, "Schema must contain a '.info.format' field" + +format_type = schema_info_format.get("type") +assert format_type == "h5ad", ".info.format.type must be 'h5ad'" + +# create output data structure +out = { + "exit_code": 0, + "error": {}, + "data_schema": "ok" +} + +print('Load data', flush=True) +if format_type == "h5ad": + data = ad.read_h5ad(par['input']) +elif format_type == "csv": + data = pd.read_csv(par['input']) +elif format_type == "tsv": + data = pd.read_csv(par['input'], sep="\t") +elif format_type == "parquet": + data = pd.read_parquet(par['input']) +else: + raise ValueError(f"Unknown .info.format.type '{format_type}'") + +out = { + "exit_code": 0, + "error": {}, + "data_schema": "ok" +} +print("Check file against schema", flush=True) +if format_type == "h5ad": + for struc, struc_fields in schema_info_format.items(): + if struc == "type": + continue + print("Checking slot", struc, flush=True) + missing = check_h5ad_struct(struc, struc_fields, getattr(data, struc)) + if missing: + print(f"Dataset is missing {struc} {missing}", flush=True) + out['exit_code'] = 1 + out['data_schema'] = 'not ok' + out['error'][struc] = missing +elif format_type in ["csv", "tsv", "parquet"]: + columns = schema_info_format.get("columns") or [] + missing = check_df_columns(data, columns) + +with open(par["output"], "w") as f: + json.dump(out, f, indent=2) + +if par['stop_on_error']: + exit(out['exit_code']) diff --git a/src/common/check_dataset_schema/test.py b/src/files/validate_dataset_with_schema/test.py similarity index 95% rename from src/common/check_dataset_schema/test.py rename to src/files/validate_dataset_with_schema/test.py index 1e7b5eb1e9..384f9d149d 100644 --- a/src/common/check_dataset_schema/test.py +++ b/src/files/validate_dataset_with_schema/test.py @@ -16,9 +16,10 @@ def schema(tmp_path): type: file description: "A preprocessed dataset" example: "preprocessed.h5ad" +label: "Preprocessed dataset" info: - label: "Preprocessed dataset" - slots: + format: + type: h5ad layers: - type: integer name: counts @@ -39,9 +40,10 @@ def error_schema(tmp_path): type: file description: "A preprocessed dataset" example: "preprocessed.h5ad" +label: "Preprocessed dataset" info: - label: "Preprocessed dataset" - slots: + format: + type: h5ad X: type: double description: Normalized expression values diff --git a/src/files/validate_yaml_with_schema/config.vsh.yaml b/src/files/validate_yaml_with_schema/config.vsh.yaml new file mode 100644 index 0000000000..a05c9b0092 --- /dev/null +++ b/src/files/validate_yaml_with_schema/config.vsh.yaml @@ -0,0 +1,28 @@ +name: validate_yaml_with_schema +namespace: files +summary: Check the format of a YAML file against a schema +description: Checks if the YAML file has the necessary data structures as defined in a schema. +argument_groups: + - name: Inputs + arguments: + - name: --input + type: file + required: true + description: A yaml file. + - name: --schema + type: file + required: true + description: A schema file for the yaml file. +resources: + - type: python_script + path: script.py +engines: + - type: docker + image: openproblems/base_python:1.0.0 + setup: + - type: python + pypi: + - jsonschema +runners: + - type: executable + - type: nextflow diff --git a/src/common/check_yaml_schema/script.py b/src/files/validate_yaml_with_schema/script.py similarity index 100% rename from src/common/check_yaml_schema/script.py rename to src/files/validate_yaml_with_schema/script.py diff --git a/src/project/sync_resources/config.vsh.yaml b/src/project/sync_resources/config.vsh.yaml new file mode 100644 index 0000000000..908f1a7d8c --- /dev/null +++ b/src/project/sync_resources/config.vsh.yaml @@ -0,0 +1,56 @@ +name: sync_resources +namespace: project +description: Sync test resources to the local filesystem +usage: | + sync_resources + sync_resources --input _viash.yaml --output . +argument_groups: + - name: Inputs + arguments: + - name: "--input" + alternatives: ["-i"] + type: file + description: "Path to the _viash.yaml project configuration file." + default: _viash.yaml + - name: Outputs + arguments: + - name: "--output" + alternatives: ["-o"] + type: file + default: . + direction: output + description: "Path to the directory where the resources will be synced to." + - name: Arguments + arguments: + - name: "--quiet" + type: boolean_true + description: "Displays the operations that would be performed using the specified command without actually running them." + - name: "--dryrun" + type: boolean_true + description: "Does not display the operations performed from the specified command." + - name: "--delete" + type: boolean_true + description: "Files that exist in the destination but not in the source are deleted during sync." + - name: "--exclude" + type: "string" + multiple: true + description: Exclude all files or objects from the command that matches the specified pattern. +resources: + - type: bash_script + path: script.sh +test_resources: + - type: bash_script + path: test.sh +engines: + - type: docker + image: "amazon/aws-cli:2.17.11" + setup: + - type: yum + packages: [wget] + - type: docker + run : | + wget https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64 -O /usr/bin/yq && \ + chmod +x /usr/bin/yq +runners: + - type: executable + - type: nextflow diff --git a/src/project/sync_resources/script.sh b/src/project/sync_resources/script.sh new file mode 100644 index 0000000000..29afb29b9e --- /dev/null +++ b/src/project/sync_resources/script.sh @@ -0,0 +1,52 @@ +#!/bin/bash + +## VIASH START +par_input='_viash.yaml' +par_output='.' +## VIASH END + +extra_params=( ) + +if [ "$par_quiet" == "true" ]; then + extra_params+=( "--quiet" ) +fi +if [ "$par_dryrun" == "true" ]; then + extra_params+=( "--dryrun" ) +fi +if [ "$par_delete" == "true" ]; then + extra_params+=( "--delete" ) +fi + +if [ ! -z ${par_exclude+x} ]; then + IFS=";" + for var in $par_exclude; do + unset IFS + extra_params+=( "--exclude" "$var" ) + done +fi + +function sync_s3() { + local s3_path="$1" + local dest_path="$2" + AWS_EC2_METADATA_DISABLED=true \ + aws s3 sync \ + "$s3_path" \ + "$dest_path" \ + --no-sign-request \ + "${extra_params[@]}" +} + +yq e \ + '.info.test_resources[] | "{type: " + (.type // "s3") + ", path: " + .path + ", dest: " + .dest + "}"' \ + "${par_input}" | \ + while read -r line; do + type=$(echo "$line" | yq e '.type') + path=$(echo "$line" | yq e '.path') + dest=$(echo "$line" | yq e '.dest') + + echo "Syncing '$path' to '$dest'..." + + if [ "$type" == "s3" ]; then + sync_s3 "$path" "$par_output/$dest" + fi + done diff --git a/src/common/sync_test_resources/run_test.sh b/src/project/sync_resources/test.sh similarity index 52% rename from src/common/sync_test_resources/run_test.sh rename to src/project/sync_resources/test.sh index 67f2504531..55034d720d 100755 --- a/src/common/sync_test_resources/run_test.sh +++ b/src/project/sync_resources/test.sh @@ -3,10 +3,18 @@ ## VIASH START ## VIASH END +cat > _viash.yaml << EOM +info: + test_resources: + - type: s3 + path: s3://openproblems-data/resources_test/common/pancreas + dest: foo +EOM + echo ">> Run aws s3 sync" -./$meta_functionality_name \ - --input s3://openproblems-data/resources_test/common/pancreas \ - --output foo \ +"$meta_executable" \ + --input _viash.yaml \ + --output . \ --quiet echo ">> Check whether the right files were copied" diff --git a/src/project/upgrade_config/config.vsh.yaml b/src/project/upgrade_config/config.vsh.yaml new file mode 100644 index 0000000000..1c3d75346d --- /dev/null +++ b/src/project/upgrade_config/config.vsh.yaml @@ -0,0 +1,38 @@ +name: upgrade_config +namespace: project +description: | + Upgrade a component config from viash version 0.8 to version 0.9. +usage: + viash run upgrade_config/config.vsh.yaml -- --input method --output foo + +arguments: + - type: file + name: --input + direction: input + description: Path to the input config. + example: input.vsh.yaml + - type: file + name: --output + description: Path to the output config. + example: input.vsh.yaml + direction: output + +resources: + - type: python_script + path: script.py + # - path: library.bib + +test_resources: + - type: python_script + path: test.py + +engines: + - type: docker + image: openproblems/base_python:1.0.0 + setup: + - type: python + packages: ruamel.yaml +runners: + - type: executable + - type: nextflow + diff --git a/src/project/upgrade_config/script.py b/src/project/upgrade_config/script.py new file mode 100644 index 0000000000..c04af6c5c7 --- /dev/null +++ b/src/project/upgrade_config/script.py @@ -0,0 +1,91 @@ +import ruamel.yaml +# import re + +## VIASH START +par = { + "input": "input.vsh.yaml", + "output": "output.vsh.yaml", +} +## VIASH END + +yaml = ruamel.yaml.YAML() + +# Set indentation rules +yaml.indent(mapping=2, sequence=4, offset=2) + +# Load input config +with open(par["input"], "r") as file: + data = yaml.load(file) + +transformed_yaml_content = ruamel.yaml.CommentedMap() + +# Add __merge__, if necessary +if "__merge__" in data: + transformed_yaml_content["__merge__"] = data["__merge__"] + +# Remove .functionality +if "functionality" in data: + if "info" in data["functionality"]: + info_content = data["functionality"]["info"] + label = info_content.pop("label", None) + summary = info_content.pop("summary", None) + description = info_content.pop("description", None) + reference = info_content.pop("reference", None) + repository = info_content.pop("repository_url", None) + documentation = info_content.pop("documentation_url", None) + + # Remove 'info' if it becomes empty + if not info_content: + data["functionality"].pop("info") + + updated_functionality = ruamel.yaml.CommentedMap() + updated_functionality["name"] = data["functionality"].pop("name") + + # Move out of info + if label is not None: + updated_functionality["label"] = label + if summary is not None: + updated_functionality["summary"] = summary + if description is not None: + updated_functionality["description"] = description + + # Fetch doi using reference key + if reference is not None: + updated_functionality["references"] = {} + # with open(f"library.bib", "r") as file: + # bib = file.read() + # entry_pattern = r"(@\w+{[^}]*" + reference + r"[^}]*}(.|\n)*?)(?=@)" + # bib_entry = re.search(entry_pattern, bib) + # if bib_entry: + # doi_pattern = r"(?=[Dd][Oo][Ii]\s*=\s*{([^,}]+)})" + # entry_doi = re.search(doi_pattern, bib_entry.group(1)) + # updated_functionality["references"]["doi"] = entry_doi.group(1) + updated_functionality["references"]["bibtex"] = reference + + # Add links + updated_functionality["links"] = {} + if repository is not None: + updated_functionality["links"]["repository"] = repository + if documentation is not None: + updated_functionality["links"]["documentation"] = documentation + + # Add remaining contents from .functionality + updated_functionality.update(data["functionality"]) + + transformed_yaml_content.update(updated_functionality) + +# Mapping platforms to engines and runners +transformed_yaml_content["engines"] = [] +transformed_yaml_content["runners"] = [] +for platform in data["platforms"]: + if platform["type"] == "docker": + transformed_yaml_content["engines"].append(platform) + elif platform["type"] == "nextflow": + transformed_yaml_content["runners"].append(platform) + +# Insert `type: executable` into runners +transformed_yaml_content["runners"].insert(0, {"type": "executable"}) + +# Write the transformed YAML to a new file +with open(par["output"], 'w') as file: + yaml.dump(transformed_yaml_content, file) \ No newline at end of file diff --git a/src/project/upgrade_config/test.py b/src/project/upgrade_config/test.py new file mode 100644 index 0000000000..19bc5084bc --- /dev/null +++ b/src/project/upgrade_config/test.py @@ -0,0 +1,64 @@ +from openproblems.utils import strip_margin +from os import path +import subprocess +import yaml + +test_data = strip_margin(f'''\ + |functionality: + | name: "phate" + | info: + | label: PHATE + | summary: Preservating trajectories in a dataset by using heat diffusion potential. + | description: | + | PHATE uses the potential of heat diffusion to preserve trajectories in a dataset via a diffusion process + | reference: "moon2019visualizing" + | repository_url: "https://github.com/KrishnaswamyLab/PHATE" + | documentation_url: "https://github.com/KrishnaswamyLab/PHATE#readme" + | preferred_normalization: sqrt_cp10k + | # component specific arguments + | arguments: + | - name: '--n_pca_dims' + | type: integer + | description: Number of principal components of PCA to use. + | resources: + | - type: python_script + | path: script.py + |platforms: + | - type: docker + | image: ghcr.io/openproblems-bio/base_python:1.0.4 + | - type: nextflow + | directives: + | label: [midtime, highmem, highcpu] + |''' +) + +input = "input.vsh.yaml" +with open(input, "w") as file: + file.write(test_data) + +output = "output.vsh.yaml" + +cmd = [ + meta['executable'], + '--input', input, + '--output', output +] + +print('>> Running the script as test', flush=True) +out = subprocess.run(cmd, stderr=subprocess.STDOUT) + +if out.returncode: + print(f"script: '{cmd}' exited with an error.") + exit(out.returncode) + +print('>> Checking whether output files exist', flush=True) +assert path.exists(output), "Output file does not exist" + +print('>> Checking file contents', flush=True) +with open(output) as f: + conf_data = yaml.safe_load(f) + +assert "functionality" not in conf_data, ".functionality not removed" +assert "engines" in conf_data, ".platforms not updated" + +print('All checks succeeded!', flush=True) diff --git a/src/task/create_component/config.vsh.yaml b/src/task/create_component/config.vsh.yaml new file mode 100644 index 0000000000..e4e9183086 --- /dev/null +++ b/src/task/create_component/config.vsh.yaml @@ -0,0 +1,64 @@ +name: create_component +namespace: task +description: | + Create a new component +usage: + create_component --type method --language r --name foo + create_component --type metric --language python --name bar +argument_groups: + - name: Inputs + arguments: + - type: file + name: --input + direction: input + description: Path to the root of the project. + default: "." + - type: file + name: --api_file + description: | + Which API file to use. Defaults to `src/api/comp_.yaml`. + In tasks with different subtypes of method, this location might not exist and you might need + to manually specify a different API file to inherit from. + must_exist: false + default: src/api/comp_${VIASH_PAR_TYPE}.yaml + - name: Arguments + arguments: + - type: string + name: --type + example: metric + description: The type of component to create. Typically must be one of 'method', 'control_method' or 'metric'. + - type: string + name: --language + description: Which scripting language to use. Options are 'python', 'r'. + default: python + choices: [python, r] + - type: string + name: --name + example: new_comp + description: Name of the new method, formatted in snake case. + - name: Outputs + arguments: + - type: file + name: --output + direction: output + description: Path to the component directory. Suggested location is `src/s/`. + default: src/${VIASH_PAR_TYPE}s/${VIASH_PAR_NAME} +resources: + - type: python_script + path: script.py +test_resources: + - type: python_script + path: test.py +engines: + - type: docker + image: openproblems/base_python:1.0.0 + test_setup: + - type: apt + packages: git + - type: docker + run: | + git clone https://github.com/openproblems-bio/task_template.git /opt/task_template +runners: + - type: executable + - type: nextflow + diff --git a/src/common/create_component/script.py b/src/task/create_component/script.py similarity index 73% rename from src/common/create_component/script.py rename to src/task/create_component/script.py index 8c954a66d4..bc1dcdc410 100644 --- a/src/common/create_component/script.py +++ b/src/task/create_component/script.py @@ -3,6 +3,8 @@ import sys import os import re +from openproblems.utils import strip_margin +from openproblems.project import read_nested_yaml, find_project_root ## VIASH START par = { @@ -10,23 +12,17 @@ "type": "method", "language": "python", "name": "new_comp", - "output": "src/tasks/denoising/methods/new_comp", - "api_file": "src/tasks/denoising/api/comp_method.yaml", + "output": "src/methods/new_comp", + "api_file": "src/api/comp_method.yaml", "viash_yaml": "_viash.yaml" } ## VIASH END -# import helper function -sys.path.append(meta["resources_dir"]) -from read_and_merge_yaml import read_and_merge_yaml - -def strip_margin(text: str) -> str: - return re.sub("(^|\n)[ \t]*\|", "\\1", text) - def create_config(par, component_type, pretty_name, script_path) -> str: + general_str = generate_general_info(par, component_type, pretty_name) info_str = generate_info(par, component_type, pretty_name) resources_str = generate_resources(par, script_path) - docker_platform = generate_docker_platform(par) + docker_engine = generate_docker_engine(par) return strip_margin(f'''\ |# The API specifies which type of component this is. @@ -36,68 +32,89 @@ def create_config(par, component_type, pretty_name, script_path) -> str: |# - A unit test |__merge__: {os.path.relpath(par["api_file"], par["output"])} | - |functionality: - | # A unique identifier for your component (required). - | # Can contain only lowercase letters or underscores. - | name: {par["name"]} + |{general_str} + | | - | # Metadata for your component - | info: + |# Metadata for your component + |info: |{info_str} - | # Component-specific parameters (optional) - | # arguments: - | # - name: "--n_neighbors" - | # type: "integer" - | # default: 5 - | # description: Number of neighbors to use. + |# Component-specific parameters (optional) + |# arguments: + |# - name: "--n_neighbors" + |# type: "integer" + |# default: 5 + |# description: Number of neighbors to use. | - | # Resources required to run the component - | resources: + |# Resources required to run the component + |resources: |{resources_str} - |platforms: + |engines: | # Specifications for the Docker image for this component. - |{docker_platform} + |{docker_engine} + |runners: | # This platform allows running the component natively - | - type: native + | - type: executable | # Allows turning the component into a Nextflow module / pipeline. | - type: nextflow | directives: - | label: [midtime,midmem, midcpu] + | label: [midtime,midmem,midcpu] |''' ) -def generate_info(par, component_type, pretty_name) -> str: - """Generate the functionality info for a component.""" +def generate_general_info(par, component_type, pretty_name) -> str: + """Generate the general info for a method.""" + str = strip_margin(f'''\ + |# A unique identifier for your component (required). + |# Can contain only lowercase letters or underscores. + |name: {par["name"]} + |''') if component_type in ["method", "control_method"]: - str = strip_margin(f'''\ - | # A relatively short label, used when rendering visualisarions (required) - | label: {pretty_name} - | # A one sentence summary of how this method works (required). Used when - | # rendering summary tables. - | summary: "FILL IN: A one sentence summary of this method." - | # A multi-line description of how this component works (required). Used - | # when rendering reference documentation. - | description: | - | FILL IN: A (multi-line) description of how this method works. - | # Which normalisation method this component prefers to use (required). - | preferred_normalization: log_cp10k + str += strip_margin(f'''\ + |# A relatively short label, used when rendering visualisations (required) + |label: {pretty_name} + |# A one sentence summary of how this method works (required). Used when + |# rendering summary tables. + |summary: "FILL IN: A one sentence summary of this method." + |# A multi-line description of how this component works (required). Used + |# when rendering reference documentation. + |description: | + | FILL IN: A (multi-line) description of how this method works. |''') if component_type == "method": str += strip_margin(f'''\ - | # A reference key from the bibtex library at src/common/library.bib (required). - | reference: bibtex_reference_key - | # URL to the documentation for this method (required). - | documentation_url: https://url.to/the/documentation - | # URL to the code repository for this method (required). - | repository_url: https://github.com/organisation/repository + |# references: + |# doi: + |# - 10.1000/xx.123456.789 + |# bibtex: + |# - | + |# @article{{foo, + |# title={{Foo}}, + |# author={{Bar}}, + |# journal={{Baz}}, + |# year={{2024}} + |# }} + |links: + | # URL to the documentation for this method (required). + | documentation: https://url.to/the/documentation + | # URL to the code repository for this method (required). + | repository: https://github.com/organisation/repository |''') + return str + +def generate_info(par, component_type, pretty_name) -> str: + """Generate the info for a component.""" + if component_type in ["method", "control_method"]: + str = strip_margin(f'''\ + | # Which normalisation method this component prefers to use (required). + | preferred_normalization: log_cp10k + |''') return str elif component_type == "metric": return strip_margin(f'''\ - | metrics: + | metrics: | # A unique identifier for your metric (required). | # Can contain only lowercase letters or underscores. - | name: {par["name"]} + | - name: {par["name"]} | # A relatively short label, used when rendering visualisarions (required) | label: {pretty_name} | # A one sentence summary of how this metric works (required). Used when @@ -107,12 +124,22 @@ def generate_info(par, component_type, pretty_name) -> str: | # when rendering reference documentation. | description: | | FILL IN: A (multi-line) description of how this metric works. - | # A reference key from the bibtex library at src/common/library.bib (required). - | reference: bibtex_reference_key - | # URL to the documentation for this metric (required). - | documentation_url: https://url.to/the/documentation - | # URL to the code repository for this metric (required). - | repository_url: https://github.com/organisation/repository + | # references: + | # doi: + | # - 10.1000/xx.123456.789 + | # bibtex: + | # - | + | # @article{{foo, + | # title={{Foo}}, + | # author={{Bar}}, + | # journal={{Baz}}, + | # year={{2024}} + | # }} + | links: + | # URL to the documentation for this metric (required). + | documentation: https://url.to/the/documentation + | # URL to the code repository for this metric (required). + | repository: https://github.com/organisation/repository | # The minimum possible value for this metric (required) | min: 0 | # The maximum possible value for this metric (required) @@ -123,36 +150,36 @@ def generate_info(par, component_type, pretty_name) -> str: def generate_resources(par, script_path) -> str: - """Add the script to the functionality resources.""" + """Add the script to the resources.""" if par["language"] == "python": type_str = "python_script" elif par["language"] == "r": type_str = "r_script" return strip_margin(f'''\ - | # The script of your component (required) - | - type: {type_str} - | path: {script_path} - | # Additional resources your script needs (optional) - | # - type: file - | # path: weights.pt + | # The script of your component (required) + | - type: {type_str} + | path: {script_path} + | # Additional resources your script needs (optional) + | # - type: file + | # path: weights.pt |''') -def generate_docker_platform(par) -> str: - """Set up the docker platform for Python.""" +def generate_docker_engine(par) -> str: + """Set up the docker engine for Python.""" if par["language"] == "python": image_str = "openproblems/base_python:1.0.0" setup_type = "python" - package_example = "scib==1.1.5" + package_example = "numpy<2" elif par["language"] == "r": image_str = "openproblems/base_r:1.0.0" setup_type = "r" - package_example = "tidyverse" + package_example = "tibble" return strip_margin(f'''\ | - type: docker | image: {image_str} | # Add custom dependencies here (optional). For more information, see - | # https://viash.io/reference/config/platforms/docker/#setup . + | # https://viash.io/reference/config/engines/docker/#setup . | # setup: | # - type: {setup_type} | # packages: {package_example} @@ -160,7 +187,7 @@ def generate_docker_platform(par) -> str: def set_par_values(config) -> None: """Adds values to each of the arguments in a config file.""" - args = config['functionality']['arguments'] + args = config['arguments'] for argi, arg in enumerate(args): key = re.sub("^-*", "", arg['name']) @@ -169,14 +196,14 @@ def set_par_values(config) -> None: value = arg.get("default", arg.get("example", "...")) elif arg.get("direction", "input") == "input": key_strip = key.replace("input_", "") - value = f'resources_test/{par["task"]}/pancreas/{key_strip}.h5ad' + value = f'resources_test/.../{key_strip}.h5ad' else: key_strip = key.replace("output_", "") value = f'{key_strip}.h5ad' # store key and value - config['functionality']['arguments'][argi]["key"] = key - config['functionality']['arguments'][argi]["value"] = value + config['arguments'][argi]["key"] = key + config['arguments'][argi]["value"] = value def look_for_adata_arg(args, uns_field): """Look for an argument that has a .uns[uns_field] in its info.slots.""" @@ -200,7 +227,7 @@ def write_output_python(arg, copy_from_adata, is_metric): if is_metric: value = f"{copy_from_adata}.uns['{slot['name']}']" else: - value = "meta['functionality_name']" + value = "meta['name']" else: value = group_name + "_" + slot["name"] inner.append(f"'{slot['name']}': {value}") @@ -229,7 +256,7 @@ def write_output_r(arg, copy_from_adata, is_metric): if is_metric: value = f"{copy_from_adata}$uns[[\"{slot['name']}\"]]" else: - value = "meta[[\"functionality_name\"]]" + value = "meta[[\"name\"]]" else: value = group_name + "_" + slot["name"] inner.append(f"{slot['name']} = {value}") @@ -246,7 +273,7 @@ def write_output_r(arg, copy_from_adata, is_metric): ) def create_python_script(par, config, type): - args = config['functionality']['arguments'] + args = config['arguments'] # create the arguments of the par string par_string = ",\n ".join(f"'{arg['key']}': '{arg['value']}'" for arg in args) @@ -298,7 +325,7 @@ def create_python_script(par, config, type): | {par_string} |}} |meta = {{ - | 'functionality_name': '{par["name"]}' + | 'name': '{par["name"]}' |}} |## VIASH END | @@ -313,7 +340,7 @@ def create_python_script(par, config, type): return script def create_r_script(par, api_spec, type): - args = api_spec['functionality']['arguments'] + args = api_spec['arguments'] # create the arguments of the par string par_string = ",\n ".join(f'{arg["key"]} = "{arg["value"]}"' for arg in args) @@ -363,7 +390,7 @@ def create_r_script(par, api_spec, type): | {par_string} |) |meta <- list( - | functionality_name = "{par["name"]}" + | name = "{par["name"]}" |) |## VIASH END | @@ -377,25 +404,6 @@ def create_r_script(par, api_spec, type): return script -# def read_viash_config(file): -# file = file.absolute() - -# # read in config -# command = ["viash", "config", "view", str(file)] - -# # Execute the command and capture the output -# output = subprocess.check_output( -# command, -# universal_newlines=True, -# cwd=str(file.parent) -# ) - -# # Parse the output as YAML -# config = yaml.load(output) - -# return config - - def main(par): ####### CHECK INPUTS ####### print("Check inputs", flush=True) @@ -417,8 +425,7 @@ def main(par): ## CHECK API FILE print("Check API file", flush=True) api_file = Path(par["api_file"]) - viash_yaml = Path(par["viash_yaml"]) - project_dir = viash_yaml.parent + project_dir = find_project_root(api_file) if not api_file.exists(): comp_types = [x.with_suffix("").name.removeprefix("comp_") for x in api_file.parent.glob("**/comp_*.y*ml")] list.sort(comp_types) @@ -429,12 +436,12 @@ def main(par): ## READ API FILE print("Read API file", flush=True) - api = read_and_merge_yaml(api_file) - comp_type = api.get("functionality", {}).get("info", {}).get("type", {}) + api = read_nested_yaml(api_file) + comp_type = api.get("info", {}).get("type", {}) if not comp_type: sys.exit(strip_margin(f"""\ |Error: API file is incorrectly formatted. - | Reason: Could not find component type at `.functionality.info.type`.' + | Reason: Could not find component type at `.info.type`.' | Please fix the formatting of the API file.""")) ####### CREATE OUTPUT DIR ####### @@ -473,4 +480,4 @@ def main(par): if __name__ == "__main__": - main(par) + main(par) \ No newline at end of file diff --git a/src/task/create_component/test.py b/src/task/create_component/test.py new file mode 100644 index 0000000000..6e9236d61b --- /dev/null +++ b/src/task/create_component/test.py @@ -0,0 +1,51 @@ +import subprocess +from os import path +import yaml + +## VIASH START +meta = { + 'executable': 'foo' +} +## VIASH END + +task_template = "/opt/task_template" +output_path = f"{task_template}/src/methods/test_method" + +assert path.exists(task_template), "Task template does not exist" + +cmd = [ + meta['executable'], + '--type', 'method', + '--name', 'test_method', + '--language', 'python', + '--api_file', 'src/api/comp_method.yaml', + '--output', 'src/methods/test_method' +] + +print('>> Running the script as test', flush=True) +out = subprocess.run(cmd, stderr=subprocess.STDOUT, cwd=task_template) + +if out.stdout: + print(out.stdout) + +if out.returncode: + print(f"script: '{cmd}' exited with an error.") + exit(out.returncode) + +print('>> Checking whether output files exist', flush=True) +assert path.exists(output_path), "Output dir does not exist" + +conf_f = path.join(output_path, 'config.vsh.yaml') +assert path.exists(conf_f), "Config file does not exist" + +script_f = path.join(output_path, "script.py") +assert path.exists(script_f), "Script file does not exist" + +print('>> Checking file contents', flush=True) +with open(conf_f) as f: + conf_data = yaml.safe_load(f) + +assert conf_data['name'] == 'test_method', "Name should be equal to 'test_method'" + +print('All checks succeeded!', flush=True) + diff --git a/src/task/render_readme/config.vsh.yaml b/src/task/render_readme/config.vsh.yaml new file mode 100644 index 0000000000..cb46ae5ee9 --- /dev/null +++ b/src/task/render_readme/config.vsh.yaml @@ -0,0 +1,50 @@ +name: render_readme +namespace: task +description: | + Render the task README +argument_groups: + - name: Inputs + arguments: + - name: --input + type: file + description: Path to the root directory + default: "." + required: false + - name: Outputs + arguments: + - type: file + name: --output + direction: output + description: Path to the component directory. Suggested location is `README.md`. + default: README.md + required: false +resources: + - type: r_script + path: script.R +test_resources: + - type: r_script + path: test.R +engines: + - type: docker + image: openproblems/base_r:1.0.0 + setup: + - type: r + cran: + - processx + github: + - openproblems-bio/core/packages/r/openproblems.utils + - openproblems-bio/core/packages/r/openproblems + - openproblems-bio/core/packages/r/openproblems.docs + - type: apt + packages: [jq, curl] + - type: docker + # download and install quarto-*-linux-amd64.deb from latest release + run: | + release_info=$(curl -s https://api.github.com/repos/quarto-dev/quarto-cli/releases/latest) && \ + download_url=$(printf "%s" "$release_info" | jq -r '.assets[] | select(.name | test("quarto-.*-linux-amd64.deb")) | .browser_download_url') && \ + curl -sL "$download_url" -o /opt/quarto.deb && \ + dpkg -i /opt/quarto.deb && \ + rm /opt/quarto.deb +runners: + - type: executable + - type: nextflow diff --git a/src/task/render_readme/script.R b/src/task/render_readme/script.R new file mode 100644 index 0000000000..dcd394df4d --- /dev/null +++ b/src/task/render_readme/script.R @@ -0,0 +1,35 @@ +requireNamespace("openproblems.docs", quietly = TRUE) +requireNamespace("processx", quietly = TRUE) + +## VIASH START +par <- list( + "input" = "path/to/input", + "output" = "path/to/input/README.md" +) +## VIASH END + +cat("Read task metadata\n") +metadata <- openproblems.docs::read_task_metadata(par$input) + +cat("Render README.qmd content\n") +qmd_content <- openproblems.docs::render_task_readme_qmd(metadata) + +cat("Write README.qmd to file\n") +if (!dir.exists(meta$temp_dir)) { + dir.create(meta$temp_dir, recursive = TRUE) +} +qmd_file <- tempfile( + pattern = "README_", + fileext = ".qmd", + tmpdir = meta$temp_dir +) +writeLines(qmd_content, qmd_file) + +cat("Render README.qmd to README.md\n") +out <- processx::run( + command = "quarto", + args = c("render", qmd_file, "--output", "-"), + echo = TRUE +) + +writeLines(out$stdout, par$output) diff --git a/src/task/render_readme/test.R b/src/task/render_readme/test.R new file mode 100644 index 0000000000..96b029d1c5 --- /dev/null +++ b/src/task/render_readme/test.R @@ -0,0 +1,27 @@ +requireNamespace("assertthat", quietly = TRUE) + +## VIASH START +## VIASH END + +input <- system.file("extdata", "example_project", "api", package = "openproblems.docs") + +output_path <- "output.md" + +cat(">> Running the script as test\n") +out <- processx::run( + meta[["executable"]], + args = c("--input", input, "--output", output_path) +) + +cat(">> Checking whether output files exist\n") +assertthat::assert_that(file.exists(output_path)) + +cat(">> Checking file contents\n") +lines <- readLines(output_path) +assertthat::assert_that(any(grepl("# Template", lines))) +assertthat::assert_that(any(grepl("## Description", lines))) +# assertthat::assert_that(any(grepl("## Authors", lines))) +assertthat::assert_that(any(grepl("flowchart TB", lines))) +assertthat::assert_that(any(grepl("## File format:", lines))) + +cat("All checks succeeded!\n") diff --git a/src/common/decompress_gzip/config.vsh.yaml b/src/utils/decompress_gzip/config.vsh.yaml similarity index 96% rename from src/common/decompress_gzip/config.vsh.yaml rename to src/utils/decompress_gzip/config.vsh.yaml index 2716dc554d..7afe1a3d57 100644 --- a/src/common/decompress_gzip/config.vsh.yaml +++ b/src/utils/decompress_gzip/config.vsh.yaml @@ -1,6 +1,6 @@ functionality: name: decompress_gzip - namespace: common + namespace: utils arguments: - name: --input type: file diff --git a/src/common/decompress_gzip/script.sh b/src/utils/decompress_gzip/script.sh similarity index 100% rename from src/common/decompress_gzip/script.sh rename to src/utils/decompress_gzip/script.sh diff --git a/src/common/decompress_gzip/test.sh b/src/utils/decompress_gzip/test.sh similarity index 100% rename from src/common/decompress_gzip/test.sh rename to src/utils/decompress_gzip/test.sh