From e65002e921ef0db7a085fcd208acbe676bc32e82 Mon Sep 17 00:00:00 2001 From: Nirmayi Date: Fri, 20 Sep 2024 13:08:17 +0200 Subject: [PATCH 1/3] update dataset id --- src/datasets/loaders/tenx_visium/script.py | 2 +- src/datasets/loaders/tenx_visium/test.py | 2 +- src/datasets/resource_scripts/tenx_visium.sh | 46 ++++++++-------- .../resource_scripts/zenodo_spatial.sh.sh | 52 +++++++++---------- .../zenodo_spatial_slidetags.sh | 8 +-- ...nal_section1.sh => mouse_brain_coronal.sh} | 4 +- .../api/comp_control_method.yaml | 4 +- .../api/comp_method.yaml | 4 +- .../api/comp_metric.yaml | 4 +- .../api/comp_process_dataset.yaml | 4 +- .../api/file_common_dataset.yaml | 2 +- .../api/file_dataset.yaml | 2 +- .../api/file_output.yaml | 2 +- .../api/file_score.yaml | 2 +- .../api/file_simulated_dataset.yaml | 2 +- .../api/file_solution.yaml | 2 +- .../control_methods/random_ranking/script.py | 4 +- .../control_methods/true_ranking/script.py | 4 +- .../methods/boostgp/script.R | 2 +- .../methods/gpcounts/script.py | 2 +- .../methods/moran_i/script.py | 2 +- .../methods/nnsvg/script.R | 2 +- .../methods/scgco/script.py | 2 +- .../methods/sepal/script.py | 2 +- .../methods/somde/script.py | 2 +- .../methods/spagcn/script.py | 2 +- .../methods/spagft/script.py | 2 +- .../methods/spanve/script.py | 2 +- .../methods/spark/script.R | 2 +- .../methods/spark_x/script.R | 2 +- .../methods/spatialde/script.py | 2 +- .../methods/spatialde2/script.py | 2 +- .../metrics/correlation/script.py | 4 +- .../select_reference/config.vsh.yaml | 4 +- .../select_reference/script.py | 2 +- .../simulate_svg/config.vsh.yaml | 4 +- .../process_dataset/simulate_svg/script.R | 2 +- .../split_dataset/config.vsh.yaml | 4 +- .../process_dataset/split_dataset/script.py | 2 +- .../resources_scripts/process_datasets.sh | 42 ++++++++------- .../resources_scripts/run_benchmark.sh | 32 ++++++------ .../mouse_brain_coronal_section1.sh | 18 +++---- .../workflows/process_datasets/run_test.sh | 6 +-- 43 files changed, 150 insertions(+), 148 deletions(-) rename src/datasets/resource_test_scripts/{mouse_brain_coronal_section1.sh => mouse_brain_coronal.sh} (95%) diff --git a/src/datasets/loaders/tenx_visium/script.py b/src/datasets/loaders/tenx_visium/script.py index 7de04e6b5e..100bfde555 100644 --- a/src/datasets/loaders/tenx_visium/script.py +++ b/src/datasets/loaders/tenx_visium/script.py @@ -7,7 +7,7 @@ par = { "input_expression": "https://cf.10xgenomics.com/samples/spatial-exp/2.0.0/CytAssist_FFPE_Mouse_Brain_Rep1/CytAssist_FFPE_Mouse_Brain_Rep1_filtered_feature_bc_matrix.h5", "input_spatial": "https://cf.10xgenomics.com/samples/spatial-exp/2.0.0/CytAssist_FFPE_Mouse_Brain_Rep1/CytAssist_FFPE_Mouse_Brain_Rep1_spatial.tar.gz", - "dataset_id": "tenx_visium/mouse_brain_coronal_section1_visium", + "dataset_id": "mouse_brain_coronal", "dataset_name": "Mouse Brain Coronal Section 1 (FFPE)", "dataset_url": "https://www.10xgenomics.com/datasets/mouse-brain-coronal-section-1-ffpe-2-standard", "dataset_summary": "Gene expression library of Mouse Brain (CytAssist FFPE) using the Mouse Whole Transcriptome Probe Set", diff --git a/src/datasets/loaders/tenx_visium/test.py b/src/datasets/loaders/tenx_visium/test.py index a559ae1d3d..228880cc64 100644 --- a/src/datasets/loaders/tenx_visium/test.py +++ b/src/datasets/loaders/tenx_visium/test.py @@ -4,7 +4,7 @@ input_expression ="https://cf.10xgenomics.com/samples/spatial-exp/2.0.0/CytAssist_FFPE_Mouse_Brain_Rep1/CytAssist_FFPE_Mouse_Brain_Rep1_filtered_feature_bc_matrix.h5" input_spatial = "https://cf.10xgenomics.com/samples/spatial-exp/2.0.0/CytAssist_FFPE_Mouse_Brain_Rep1/CytAssist_FFPE_Mouse_Brain_Rep1_spatial.tar.gz" -dataset_id = "10x_visium/mouse_brain_coronal_section1" +dataset_id = "mouse_brain_coronal" dataset_name = "Mouse Brain Coronal Section 1 (FFPE)" dataset_url = "https://www.10xgenomics.com/datasets/mouse-brain-coronal-section-1-ffpe-2-standard" dataset_summary = "Gene expression library of Mouse Brain (CytAssist FFPE) using the Mouse Whole Transcriptome Probe Set" diff --git a/src/datasets/resource_scripts/tenx_visium.sh b/src/datasets/resource_scripts/tenx_visium.sh index d5b54e7ef5..79e5a53cc8 100755 --- a/src/datasets/resource_scripts/tenx_visium.sh +++ b/src/datasets/resource_scripts/tenx_visium.sh @@ -2,7 +2,7 @@ # cat > "/tmp/params.yaml" << 'HERE' # param_list: -# - id: tenx_visium/mouse_brain_coronal_section1_visium +# - id: tenx_visium/visium/mouse_brain_coronal # input_expression: "https://cf.10xgenomics.com/samples/spatial-exp/2.0.0/CytAssist_FFPE_Mouse_Brain_Rep1/CytAssist_FFPE_Mouse_Brain_Rep1_filtered_feature_bc_matrix.h5" # input_spatial: "https://cf.10xgenomics.com/samples/spatial-exp/2.0.0/CytAssist_FFPE_Mouse_Brain_Rep1/CytAssist_FFPE_Mouse_Brain_Rep1_spatial.tar.gz" # dataset_name: 10X Visium - Mouse Brain Coronal @@ -15,7 +15,7 @@ # gene_filter_min_spots: 50 # remove_mitochondrial: true -# - id: tenx_visium/human_colorectal_cancer_visium +# - id: tenx_visium/visium/human_colorectal_cancer # input_expression: "https://cf.10xgenomics.com/samples/spatial-exp/2.0.1/CytAssist_11mm_FFPE_Human_Colorectal_Cancer/CytAssist_11mm_FFPE_Human_Colorectal_Cancer_filtered_feature_bc_matrix.h5" # input_spatial: "https://cf.10xgenomics.com/samples/spatial-exp/2.0.1/CytAssist_11mm_FFPE_Human_Colorectal_Cancer/CytAssist_11mm_FFPE_Human_Colorectal_Cancer_spatial.tar.gz" # dataset_name: 10X Visium - Human Colorectal Cancer @@ -28,7 +28,7 @@ # gene_filter_min_spots: 50 # remove_mitochondrial: true -# - id: tenx_visium/human_heart_visium +# - id: tenx_visium/visium/human_heart # input_expression: "https://cf.10xgenomics.com/samples/spatial-exp/1.0.0/V1_Human_Heart/V1_Human_Heart_filtered_feature_bc_matrix.h5" # input_spatial: "https://cf.10xgenomics.com/samples/spatial-exp/1.0.0/V1_Human_Heart/V1_Human_Heart_spatial.tar.gz" # dataset_name: 10X Visium - Human Heart @@ -41,7 +41,7 @@ # gene_filter_min_spots: 50 # remove_mitochondrial: true -# - id: tenx_visium/mouse_embryo_visium +# - id: tenx_visium/visium/mouse_embryo # input_expression: "https://cf.10xgenomics.com/samples/spatial-exp/2.1.0/CytAssist_11mm_FFPE_Mouse_Embryo/CytAssist_11mm_FFPE_Mouse_Embryo_filtered_feature_bc_matrix.h5" # input_spatial: "https://cf.10xgenomics.com/samples/spatial-exp/2.1.0/CytAssist_11mm_FFPE_Mouse_Embryo/CytAssist_11mm_FFPE_Mouse_Embryo_spatial.tar.gz" # dataset_name: 10X Visium - Mouse Embryo @@ -54,7 +54,7 @@ # gene_filter_min_spots: 50 # remove_mitochondrial: false -# - id: tenx_visium/mouse_olfactory_bulb_visium +# - id: tenx_visium/visium/mouse_olfactory_bulb # input_expression: "https://cf.10xgenomics.com/samples/spatial-exp/1.3.0/Visium_Mouse_Olfactory_Bulb/Visium_Mouse_Olfactory_Bulb_filtered_feature_bc_matrix.h5" # input_spatial: "https://cf.10xgenomics.com/samples/spatial-exp/1.3.0/Visium_Mouse_Olfactory_Bulb/Visium_Mouse_Olfactory_Bulb_spatial.tar.gz" # dataset_name: 10X Visium - Mouse Olfactory Bulb @@ -67,7 +67,7 @@ # gene_filter_min_spots: 30 # remove_mitochondrial: false -# - id: tenx_visium/human_breast_cancer_1_visium +# - id: tenx_visium/visium/human_breast_cancer_1 # input_expression: "https://cf.10xgenomics.com/samples/spatial-exp/1.2.0/Parent_Visium_Human_BreastCancer/Parent_Visium_Human_BreastCancer_filtered_feature_bc_matrix.h5" # input_spatial: "https://cf.10xgenomics.com/samples/spatial-exp/1.2.0/Parent_Visium_Human_BreastCancer/Parent_Visium_Human_BreastCancer_spatial.tar.gz" # dataset_name: 10X Visium - Human Breast Cancer 1 @@ -80,7 +80,7 @@ # gene_filter_min_spots: 50 # remove_mitochondrial: true -# - id: tenx_visium/human_lymph_node_visium +# - id: tenx_visium/visium/human_lymph_node # input_expression: "https://cf.10xgenomics.com/samples/spatial-exp/1.0.0/V1_Human_Lymph_Node/V1_Human_Lymph_Node_filtered_feature_bc_matrix.h5" # input_spatial: "https://cf.10xgenomics.com/samples/spatial-exp/1.0.0/V1_Human_Lymph_Node/V1_Human_Lymph_Node_spatial.tar.gz" # dataset_name: 10X Visium - Human Lymph Node @@ -93,7 +93,7 @@ # gene_filter_min_spots: 50 # remove_mitochondrial: true -# - id: tenx_visium/human_normal_prostate_visium +# - id: tenx_visium/visium/human_normal_prostate # input_expression: "https://cf.10xgenomics.com/samples/spatial-exp/1.3.0/Visium_FFPE_Human_Normal_Prostate/Visium_FFPE_Human_Normal_Prostate_filtered_feature_bc_matrix.h5" # input_spatial: "https://cf.10xgenomics.com/samples/spatial-exp/1.3.0/Visium_FFPE_Human_Normal_Prostate/Visium_FFPE_Human_Normal_Prostate_spatial.tar.gz" # dataset_name: 10X Visium - Human Normal Prostate @@ -106,7 +106,7 @@ # gene_filter_min_spots: 30 # remove_mitochondrial: true -# - id: tenx_visium/human_prostate_cancer_visium +# - id: tenx_visium/visium/human_prostate_cancer # input_expression: "https://cf.10xgenomics.com/samples/spatial-exp/1.3.0/Visium_FFPE_Human_Prostate_IF/Visium_FFPE_Human_Prostate_IF_filtered_feature_bc_matrix.h5" # input_spatial: "https://cf.10xgenomics.com/samples/spatial-exp/1.3.0/Visium_FFPE_Human_Prostate_IF/Visium_FFPE_Human_Prostate_IF_spatial.tar.gz" # dataset_name: 10X Visium - Human Prostate Cancer @@ -130,7 +130,7 @@ cat > "/tmp/params.yaml" << 'HERE' param_list: - - id: tenx_visium/human_cerebellum_visium + - id: tenx_visium/visium/human_cerebellum input_expression: "https://cf.10xgenomics.com/samples/spatial-exp/1.2.0/Parent_Visium_Human_Cerebellum/Parent_Visium_Human_Cerebellum_filtered_feature_bc_matrix.h5" input_spatial: "https://cf.10xgenomics.com/samples/spatial-exp/1.2.0/Parent_Visium_Human_Cerebellum/Parent_Visium_Human_Cerebellum_spatial.tar.gz" dataset_name: 10X Visium - Adult Human Cerebellum @@ -143,7 +143,7 @@ param_list: gene_filter_min_spots: 50 remove_mitochondrial: true - - id: tenx_visium/mouse_kidney_v1_visium + - id: tenx_visium/visium/mouse_kidney_v1 input_expression: "https://cf.10xgenomics.com/samples/spatial-exp/1.1.0/V1_Mouse_Kidney/V1_Mouse_Kidney_filtered_feature_bc_matrix.h5" input_spatial: "https://cf.10xgenomics.com/samples/spatial-exp/1.1.0/V1_Mouse_Kidney/V1_Mouse_Kidney_spatial.tar.gz" dataset_name: 10X Visium - Mouse Kidney 1 @@ -156,7 +156,7 @@ param_list: gene_filter_min_spots: 30 remove_mitochondrial: false - - id: tenx_visium/human_lung_cancer_visium + - id: tenx_visium/visium/human_lung_cancer input_expression: "https://cf.10xgenomics.com/samples/spatial-exp/2.0.1/CytAssist_11mm_FFPE_Human_Lung_Cancer/CytAssist_11mm_FFPE_Human_Lung_Cancer_filtered_feature_bc_matrix.h5" input_spatial: "https://cf.10xgenomics.com/samples/spatial-exp/2.0.1/CytAssist_11mm_FFPE_Human_Lung_Cancer/CytAssist_11mm_FFPE_Human_Lung_Cancer_spatial.tar.gz" dataset_name: 10X Visium - Human Lung Cancer @@ -169,7 +169,7 @@ param_list: gene_filter_min_spots: 50 remove_mitochondrial: true - - id: tenx_visium/human_brain_cancer_visium + - id: tenx_visium/visium/human_brain_cancer input_expression: "https://cf.10xgenomics.com/samples/spatial-exp/2.0.1/CytAssist_11mm_FFPE_Human_Glioblastoma/CytAssist_11mm_FFPE_Human_Glioblastoma_filtered_feature_bc_matrix.h5" input_spatial: "https://cf.10xgenomics.com/samples/spatial-exp/2.0.1/CytAssist_11mm_FFPE_Human_Glioblastoma/CytAssist_11mm_FFPE_Human_Glioblastoma_spatial.tar.gz" dataset_name: 10X Visium - Human Brain Cancer @@ -182,7 +182,7 @@ param_list: gene_filter_min_spots: 100 remove_mitochondrial: true - - id: tenx_visium/human_kidney_visium + - id: tenx_visium/visium/human_kidney input_expression: "https://cf.10xgenomics.com/samples/spatial-exp/2.0.1/CytAssist_11mm_FFPE_Human_Kidney/CytAssist_11mm_FFPE_Human_Kidney_filtered_feature_bc_matrix.h5" input_spatial: "https://cf.10xgenomics.com/samples/spatial-exp/2.0.1/CytAssist_11mm_FFPE_Human_Kidney/CytAssist_11mm_FFPE_Human_Kidney_spatial.tar.gz" dataset_name: 10X Visium - Human Kidney @@ -195,7 +195,7 @@ param_list: gene_filter_min_spots: 50 remove_mitochondrial: true - - id: tenx_visium/human_intestinal_cancer_visium + - id: tenx_visium/visium/human_intestinal_cancer input_expression: "https://cf.10xgenomics.com/samples/spatial-exp/1.3.0/Visium_FFPE_Human_Intestinal_Cancer/Visium_FFPE_Human_Intestinal_Cancer_filtered_feature_bc_matrix.h5" input_spatial: "https://cf.10xgenomics.com/samples/spatial-exp/1.3.0/Visium_FFPE_Human_Intestinal_Cancer/Visium_FFPE_Human_Intestinal_Cancer_spatial.tar.gz" dataset_name: 10X Visium - Human Intestine Cancer @@ -208,7 +208,7 @@ param_list: gene_filter_min_spots: 30 remove_mitochondrial: true - - id: tenx_visium/human_skin_melanoma_visium + - id: tenx_visium/visium/human_skin_melanoma input_expression: "https://cf.10xgenomics.com/samples/spatial-exp/2.0.0/CytAssist_FFPE_Human_Skin_Melanoma/CytAssist_FFPE_Human_Skin_Melanoma_filtered_feature_bc_matrix.h5" input_spatial: "https://cf.10xgenomics.com/samples/spatial-exp/2.0.0/CytAssist_FFPE_Human_Skin_Melanoma/CytAssist_FFPE_Human_Skin_Melanoma_spatial.tar.gz" dataset_name: 10X Visium - Human Skin Melanoma @@ -221,7 +221,7 @@ param_list: gene_filter_min_spots: 50 remove_mitochondrial: true - - id: tenx_visium/human_cervical_cancer_visium + - id: tenx_visium/visium/human_cervical_cancer input_expression: "https://cf.10xgenomics.com/samples/spatial-exp/1.3.0/Visium_FFPE_Human_Cervical_Cancer/Visium_FFPE_Human_Cervical_Cancer_filtered_feature_bc_matrix.h5" input_spatial: "https://cf.10xgenomics.com/samples/spatial-exp/1.3.0/Visium_FFPE_Human_Cervical_Cancer/Visium_FFPE_Human_Cervical_Cancer_spatial.tar.gz" dataset_name: 10X Visium - Human Cervical Cancer @@ -234,13 +234,13 @@ param_list: gene_filter_min_spots: 50 remove_mitochondrial: true - - id: tenx_visium/human_breast_cancer_2_visium + - id: tenx_visium/visium/human_breast_cancer_2 input_expression: "https://cf.10xgenomics.com/samples/spatial-exp/1.3.0/Visium_FFPE_Human_Breast_Cancer/Visium_FFPE_Human_Breast_Cancer_filtered_feature_bc_matrix.h5" input_spatial: "https://cf.10xgenomics.com/samples/spatial-exp/1.3.0/Visium_FFPE_Human_Breast_Cancer/Visium_FFPE_Human_Breast_Cancer_spatial.tar.gz" dataset_name: 10X Visium - Human Breast Cancer 2 dataset_url: "https://www.10xgenomics.com/datasets/human-breast-cancer-ductal-carcinoma-in-situ-invasive-carcinoma-ffpe-1-standard-1-3-0" dataset_summary: Gene expression library of Human Breast Cancer (Visium FFPE) using the Human Whole Transcriptome Probe Set - dataset_description: "10x Genomics obtained FFPE human breast tissue from BioIVT Asterand Human Tissue Specimens. The tissue was annotated with Ductal Carcinoma In Situ, Invasive Carcinoma. The tissue was sectioned as described in Visium Spatial Gene Expression for FFPE – Tissue Preparation Guide Demonstrated Protocol (CG000408). Tissue sections of 5 µm were placed on Visium Gene Expression slides, then stained following Deparaffinization, H&E Staining, Imaging & Decrosslinking Demonstrated Protocol (CG000409)." + dataset_description: "10x Genomics obtained FFPE human breast tissue from BioIVT Asterand Human Tissue Specimens. The tissue was annotated with Ductal Carcinoma In Situ, Invasive Carcinoma. The tissue was sectioned as described in Visium Spatial Gene Expression for FFPE - Tissue Preparation Guide Demonstrated Protocol (CG000408). Tissue sections of 5 µm were placed on Visium Gene Expression slides, then stained following Deparaffinization, H&E Staining, Imaging & Decrosslinking Demonstrated Protocol (CG000409)." dataset_reference: 10x2021breast dataset_organism: Homo sapiens spot_filter_min_genes: 100 @@ -258,10 +258,10 @@ HERE # cat > "/tmp/params.yaml" << 'HERE' # param_list: -# - id: tenx_visium/human_colon_cancer_xenium +# - id: tenx_visium/post_xenium/human_colon_cancer # input_expression: "https://cf.10xgenomics.com/samples/spatial-exp/2.1.0/CytAssist_FFPE_Human_Colon_Post_Xenium_Rep1/CytAssist_FFPE_Human_Colon_Post_Xenium_Rep1_filtered_feature_bc_matrix.h5" # input_spatial: "https://cf.10xgenomics.com/samples/spatial-exp/2.1.0/CytAssist_FFPE_Human_Colon_Post_Xenium_Rep1/CytAssist_FFPE_Human_Colon_Post_Xenium_Rep1_spatial.tar.gz" -# dataset_name: 10X Xenium - Human Colon +# dataset_name: 10X Post Xenium - Human Colon # dataset_url: "https://www.10xgenomics.com/datasets/visium-cytassist-gene-expression-libraries-of-post-xenium-human-colon-cancer-ffpe-using-the-human-whole-transcriptome-probe-set-2-standard" # dataset_summary: Gene expression library of Post Xenium Human Colon Cancer (CytAssist FFPE) using the Human Whole Transcriptome Probe Set - Replicate 1 # dataset_description: "This dataset is provided as part of the Technical Note: Post-Xenium In Situ Applications: Immunofluorescence, H&E, and Visium CytAssist Spatial Gene Expression (CG000709). Post-Xenium samples were compared to controls (samples not processed through the Xenium workflow) using 5 µm (FFPE) serial sections." @@ -271,10 +271,10 @@ HERE # gene_filter_min_spots: 50 # remove_mitochondrial: true -# - id: tenx_visium/mouse_brain_xenium +# - id: tenx_visium/post_xenium/mouse_brain # input_expression: "https://cf.10xgenomics.com/samples/spatial-exp/2.1.0/CytAssist_FreshFrozen_Mouse_Brain_Post_Xenium_Rep1/CytAssist_FreshFrozen_Mouse_Brain_Post_Xenium_Rep1_filtered_feature_bc_matrix.h5" # input_spatial: "https://cf.10xgenomics.com/samples/spatial-exp/2.1.0/CytAssist_FreshFrozen_Mouse_Brain_Post_Xenium_Rep1/CytAssist_FreshFrozen_Mouse_Brain_Post_Xenium_Rep1_spatial.tar.gz" -# dataset_name: 10X Xenium - Mouse Brain +# dataset_name: 10X Post Xenium - Mouse Brain # dataset_url: "https://www.10xgenomics.com/datasets/visium-cytassist-gene-expression-libraries-of-post-xenium-mouse-brain-ff-using-the-mouse-whole-transcriptome-probe-set-2-standard" # dataset_summary: Gene expression library of Post Xenium Mouse Brain (CytAssist Fresh Frozen) using the Mouse Whole Transcriptome Probe Set - Replicate 1 # dataset_description: "This dataset is provided as part of the Technical Note: Post-Xenium In Situ Applications: Immunofluorescence, H&E, and Visium CytAssist Spatial Gene Expression (CG000709). Post-Xenium samples were compared to controls (samples not processed through the Xenium workflow) using 10 µm fresh-frozen (FF) serial sections." diff --git a/src/datasets/resource_scripts/zenodo_spatial.sh.sh b/src/datasets/resource_scripts/zenodo_spatial.sh.sh index 7842b4368f..d51e193435 100755 --- a/src/datasets/resource_scripts/zenodo_spatial.sh.sh +++ b/src/datasets/resource_scripts/zenodo_spatial.sh.sh @@ -2,7 +2,7 @@ cat > "/tmp/params.yaml" << 'HERE' param_list: - - id: zenodo_spatial/human_heart_myocardial_infarction_1_visium + - id: zenodo_spatial/visium/human_heart_myocardial_infarction_1 input_data: "https://zenodo.org/records/13328275/files/10X0018.h5ad?download=1" dataset_name: 10X Visium - Human Heart MI 1 dataset_url: "https://www.nature.com/articles/s41586-022-05060-x" @@ -14,7 +14,7 @@ param_list: gene_filter_min_spots: 50 remove_mitochondrial: true - - id: zenodo_spatial/human_heart_myocardial_infarction_2_visium + - id: zenodo_spatial/visium/human_heart_myocardial_infarction_2 input_data: "https://zenodo.org/records/13328275/files/10X009.h5ad?download=1" dataset_name: 10X Visium - Human Heart MI 2 dataset_url: "https://www.nature.com/articles/s41586-022-05060-x" @@ -38,7 +38,7 @@ HERE # catt > "/tmp/params.yaml" << 'HERE' # param_list: -# - id: zenodo_spatial/mouse_e10_brain_dbitseq +# - id: zenodo_spatial/dbitseq/mouse_e10_brain # input_data: "https://zenodo.org/records/12785822/files/DBiT-seq_liu2020high_E10_brain_gene_25um_data.h5ad?download=1" # dataset_name: DBiT-seq - Mouse Brain (E10) # dataset_url: "https://www.cell.com/cell/fulltext/S0092-8674(20)31390-8" @@ -50,7 +50,7 @@ HERE # gene_filter_min_spots: 50 # remove_mitochondrial: true -# - id: zenodo_spatial/mouse_e10_eye_dbitseq +# - id: zenodo_spatial/dbitseq/mouse_e10_eye # input_data: "https://zenodo.org/records/12785822/files/DBiT-seq_liu2020high_E10_eye_and_nearby_data.h5ad?download=1" # dataset_name: DBiT-seq - Mouse Eye (E10) # dataset_url: "https://www.cell.com/cell/fulltext/S0092-8674(20)31390-8" @@ -62,7 +62,7 @@ HERE # gene_filter_min_spots: 50 # remove_mitochondrial: true -# - id: zenodo_spatial/mouse_e10_whole_body_dbitseq +# - id: zenodo_spatial/dbitseq/mouse_e10_whole_body # input_data: "https://zenodo.org/records/12785822/files/DBiT-seq_liu2020high_E10_whole_gene_best_data.h5ad?download=1" # dataset_name: DBiT-seq - Mouse Whole Body (E10) # dataset_url: "https://www.cell.com/cell/fulltext/S0092-8674(20)31390-8" @@ -74,7 +74,7 @@ HERE # gene_filter_min_spots: 50 # remove_mitochondrial: true -# - id: zenodo_spatial/mouse_e11_lower_body_dbitseq +# - id: zenodo_spatial/dbitseq/mouse_e11_lower_body # input_data: "https://zenodo.org/records/12785822/files/DBiT-seq_liu2020high_E11_lower_body_data.h5ad?download=1" # dataset_name: DBiT-seq - Mouse Lower Body (E11) # dataset_url: "https://www.cell.com/cell/fulltext/S0092-8674(20)31390-8" @@ -86,7 +86,7 @@ HERE # gene_filter_min_spots: 50 # remove_mitochondrial: true -# - id: zenodo_spatial/mouse_e11_1_dbitseq +# - id: zenodo_spatial/dbitseq/mouse_e11_1 # input_data: "https://zenodo.org/records/12785822/files/DBiT-seq_liu2020high_GSM4364244_E11-FL-1L_gene_data.h5ad?download=1" # dataset_name: DBiT-seq - Mouse Whole Body 1 (E11) # dataset_url: "https://www.cell.com/cell/fulltext/S0092-8674(20)31390-8" @@ -98,7 +98,7 @@ HERE # gene_filter_min_spots: 50 # remove_mitochondrial: true -# - id: zenodo_spatial/mouse_e11_2_dbitseq +# - id: zenodo_spatial/dbitseq/mouse_e11_2 # input_data: "https://zenodo.org/records/12785822/files/DBiT-seq_liu2020high_GSM4364245_E11-FL-2L_gene_data.h5ad?download=1" # dataset_name: DBiT-seq - Mouse Whole Body 2 (E11) # dataset_url: "https://www.cell.com/cell/fulltext/S0092-8674(20)31390-8" @@ -121,7 +121,7 @@ HERE # cat > "/tmp/params.yaml" << 'HERE' # param_list: -# - id: zenodo_spatial/human_cortex_1_merfish +# - id: zenodo_spatial/merfish/human_cortex_1 # input_data: "https://zenodo.org/records/12785822/files/MERFISH_Fang2022Conservation_H18.06.006.MTG.250.expand.rep1_data.h5ad?download=1" # dataset_name: MERFISH - Human Cortex 1 # dataset_url: "https://www.science.org/doi/10.1126/science.abm1741" @@ -133,7 +133,7 @@ HERE # gene_filter_min_spots: 100 # remove_mitochondrial: false -# - id: zenodo_spatial/human_cortex_2_merfish +# - id: zenodo_spatial/merfish/human_cortex_2 # input_data: "https://zenodo.org/records/12785822/files/MERFISH_Fang2022Conservation_H18.06.006.MTG.4000.expand.rep1_data.h5ad?download=1" # dataset_name: MERFISH - Human Cortex 2 # dataset_url: "https://www.science.org/doi/10.1126/science.abm1741" @@ -145,7 +145,7 @@ HERE # gene_filter_min_spots: 50 # remove_mitochondrial: false -# - id: zenodo_spatial/human_cortex_3_merfish +# - id: zenodo_spatial/merfish/human_cortex_3 # input_data: "https://zenodo.org/records/12785822/files/MERFISH_Fang2022Conservation_H18.06.006.MTG.4000.expand.rep2_data.h5ad?download=1" # dataset_name: MERFISH - Human Cortex 3 # dataset_url: "https://www.science.org/doi/10.1126/science.abm1741" @@ -157,7 +157,7 @@ HERE # gene_filter_min_spots: 50 # remove_mitochondrial: false -# - id: zenodo_spatial/human_cortex_4_merfish +# - id: zenodo_spatial/merfish/human_cortex_4 # input_data: "https://zenodo.org/records/12785822/files/MERFISH_Fang2022Conservation_H18.06.006.MTG.4000.expand.rep3_data.h5ad?download=1" # dataset_name: MERFISH - Human Cortex 4 # dataset_url: "https://www.science.org/doi/10.1126/science.abm1741" @@ -169,7 +169,7 @@ HERE # gene_filter_min_spots: 50 # remove_mitochondrial: false -# - id: zenodo_spatial/mouse_cortex_merfish +# - id: zenodo_spatial/merfish/mouse_cortex # input_data: "https://zenodo.org/records/12785822/files/MERFISH_Fang2022Conservation_mouse1.AUD_TEA_VIS.242.unexpand_data.h5ad?download=1" # dataset_name: MERFISH - Mouse Cortex # dataset_url: "https://www.science.org/doi/10.1126/science.abm1741" @@ -192,7 +192,7 @@ HERE # cat > "/tmp/params.yaml" << 'HERE' # param_list: -# - id: zenodo_spatial/mouse_organogenesis_seqfish +# - id: zenodo_spatial/seqfish/mouse_organogenesis_seqfish # input_data: "https://zenodo.org/records/12785822/files/seqfish.h5ad?download=1" # dataset_name: Seqfish - Mouse Organogenesis # dataset_url: "https://www.nature.com/articles/s41587-021-01006-2" @@ -216,7 +216,7 @@ HERE # cat > "/tmp/params.yaml" << 'HERE' # param_list: -# - id: zenodo_spatial/mouse_olfactory_bulb_puck_slideseqv2 +# - id: zenodo_spatial/slideseqv2/mouse_olfactory_bulb_puck # input_data: "https://zenodo.org/records/12785822/files/Slide-seqV2_stickels2020highly_stickels2021highly_SlideSeqV2_Mouse_Olfactory_bulb_Puck_200127_15_data_whole.h5ad?download=1" # dataset_name: Slide-seqV2 - Mouse Olfactory Bulb Puck # dataset_url: "https://singlecell.broadinstitute.org/single_cell/study/SCP815/sensitive-spatial-genome-wide-expression-profiling-at-cellular-resolution#study-summary" @@ -228,7 +228,7 @@ HERE # gene_filter_min_spots: 500 # remove_mitochondrial: true -# - id: zenodo_spatial/mouse_cortex_slideseqv2 +# - id: zenodo_spatial/slideseqv2/mouse_cortex # input_data: "https://zenodo.org/records/12785822/files/Slide-seqV2_stickels2020highly_palla2021squidpy_Slide-seqV2_Mouse_Cortex_data_whole.h5ad?download=1" # dataset_name: Slide-seqV2 - Mouse Cortex # dataset_url: "https://singlecell.broadinstitute.org/single_cell/study/SCP815/sensitive-spatial-genome-wide-expression-profiling-at-cellular-resolution#study-summary" @@ -240,7 +240,7 @@ HERE # gene_filter_min_spots: 500 # remove_mitochondrial: true -# - id: zenodo_spatial/mouse_cerebellum_slideseqv2 +# - id: zenodo_spatial/slideseqv2/mouse_cerebellum # input_data: "https://zenodo.org/records/12785822/files/Slide-seqV2_stickels2020highly_stickels2021highly_Slide-seqV2_Mouse_Cerebellum_SCP948_data_whole.h5ad?download=1" # dataset_name: Slide-seqV2 - Mouse Cerebellum # dataset_url: "https://singlecell.broadinstitute.org/single_cell/study/SCP815/sensitive-spatial-genome-wide-expression-profiling-at-cellular-resolution#study-summary" @@ -252,7 +252,7 @@ HERE # gene_filter_min_spots: 500 # remove_mitochondrial: true -# - id: zenodo_spatial/mouse_hippocampus_puck_slideseqv2 +# - id: zenodo_spatial/slideseqv2/mouse_hippocampus_puck # input_data: "https://zenodo.org/records/12785822/files/Slide-seqV2_stickels2020highly_stickels2021highly_Slide-seqV2_Mouse_Hippocampus_Puck_200115_08_data_whole.h5ad?download=1" # dataset_name: Slide-seqV2 - Mouse Hippocampus Puck # dataset_url: "https://singlecell.broadinstitute.org/single_cell/study/SCP815/sensitive-spatial-genome-wide-expression-profiling-at-cellular-resolution#study-summary" @@ -264,7 +264,7 @@ HERE # gene_filter_min_spots: 500 # remove_mitochondrial: true -# - id: zenodo_spatial/mouse_somatosensory_cortex_puck_slideseqv2 +# - id: zenodo_spatial/slideseqv2/mouse_somatosensory_cortex_puck # input_data: "https://zenodo.org/records/12785822/files/Slide-seqV2_stickels2020highly_stickels2021highly_Slide-seqV2_Mouse_SomatosensoryCortex_Puck_200306_03_data_whole.h5ad?download=1" # dataset_name: Slide-seqV2 - Mouse Somatosensory Cortex Puck # dataset_url: "https://singlecell.broadinstitute.org/single_cell/study/SCP815/sensitive-spatial-genome-wide-expression-profiling-at-cellular-resolution#study-summary" @@ -287,7 +287,7 @@ HERE # cat > "/tmp/params.yaml" << 'HERE' # param_list: -# - id: zenodo_spatial/mouse_brain_2d_zstep10_0_starmap +# - id: zenodo_spatial/starmap/mouse_brain_2d_zstep10_0 # input_data: "https://zenodo.org/records/12785822/files/STARmap_Wang2018three_data_2D_zstep10_0_data.h5ad?download=1" # dataset_name: STARmap - Mouse Brain 1 # dataset_url: "https://www.science.org/doi/10.1126/science.aat5691" @@ -299,7 +299,7 @@ HERE # gene_filter_min_spots: 1 # remove_mitochondrial: true -# - id: zenodo_spatial/mouse_brain_2d_zstep15_0_starmap +# - id: zenodo_spatial/starmap/mouse_brain_2d_zstep15_0 # input_data: "https://zenodo.org/records/12785822/files/STARmap_Wang2018three_data_2D_zstep15_0_data.h5ad?download=1" # dataset_name: STARmap - Mouse Brain 2 # dataset_url: "https://www.science.org/doi/10.1126/science.aat5691" @@ -322,7 +322,7 @@ HERE # cat > "/tmp/params.yaml" << 'HERE' # param_list: -# - id: zenodo_spatial/drosophila_embryo_e5_6_stereoseq +# - id: zenodo_spatial/stereoseq/drosophila_embryo_e5_6 # input_data: "https://zenodo.org/records/12785822/files/Stereo-seq_wang2022high_E14-16h_a_count_normal_stereoseq_data_whole_time_point_5.6.h5ad?download=1" # dataset_name: Stereo-seq - Drosophila embryo E5_6 # dataset_url: "https://www.sciencedirect.com/science/article/pii/S1534580722002465" @@ -334,7 +334,7 @@ HERE # gene_filter_min_spots: 50 # remove_mitochondrial: true -# - id: zenodo_spatial/drosophila_embryo_e6_3_stereoseq +# - id: zenodo_spatial/stereoseq/drosophila_embryo_e6_3 # input_data: "https://zenodo.org/records/12785822/files/Stereo-seq_wang2022high_E14-16h_a_count_normal_stereoseq_data_whole_time_point_6.3.h5ad?download=1" # dataset_name: Stereo-seq - Drosophila embryo E6_3 # dataset_url: "https://www.sciencedirect.com/science/article/pii/S1534580722002465" @@ -346,7 +346,7 @@ HERE # gene_filter_min_spots: 50 # remove_mitochondrial: true -# - id: zenodo_spatial/drosophila_embryo_e7_stereoseq +# - id: zenodo_spatial/stereoseq/drosophila_embryo_e7 # input_data: "https://zenodo.org/records/12785822/files/Stereo-seq_wang2022high_E14-16h_a_count_normal_stereoseq_data_whole_time_point_7.h5ad?download=1" # dataset_name: Stereo-seq - Drosophila embryo E7 # dataset_url: "https://www.sciencedirect.com/science/article/pii/S1534580722002465" @@ -358,7 +358,7 @@ HERE # gene_filter_min_spots: 50 # remove_mitochondrial: true -# - id: zenodo_spatial/drosophila_embryo_e9_1_stereoseq +# - id: zenodo_spatial/stereoseq/drosophila_embryo_e9_1 # input_data: "https://zenodo.org/records/12785822/files/Stereo-seq_wang2022high_E14-16h_a_count_normal_stereoseq_data_whole_time_point_9.1.h5ad?download=1" # dataset_name: Stereo-seq - Drosophila embryo E9_1 # dataset_url: "https://www.sciencedirect.com/science/article/pii/S1534580722002465" @@ -370,7 +370,7 @@ HERE # gene_filter_min_spots: 50 # remove_mitochondrial: true -# - id: zenodo_spatial/drosophila_embryo_e10_stereoseq +# - id: zenodo_spatial/stereoseq/drosophila_embryo_e10 # input_data: "https://zenodo.org/records/12785822/files/Stereo-seq_wang2022high_E14-16h_a_count_normal_stereoseq_data_whole_time_point_10.5.h5ad?download=1" # dataset_name: Stereo-seq - Drosophila embryo E10 # dataset_url: "https://www.sciencedirect.com/science/article/pii/S1534580722002465" diff --git a/src/datasets/resource_scripts/zenodo_spatial_slidetags.sh b/src/datasets/resource_scripts/zenodo_spatial_slidetags.sh index 5ab4962240..aa4e7e094b 100755 --- a/src/datasets/resource_scripts/zenodo_spatial_slidetags.sh +++ b/src/datasets/resource_scripts/zenodo_spatial_slidetags.sh @@ -2,7 +2,7 @@ cat > "/tmp/params.yaml" << 'HERE' param_list: - - id: zenodo_spatial_slidetags/human_cortex_slidetags + - id: zenodo_spatial_slidetags/slidetags/human_cortex input_data: "https://zenodo.org/records/12785822/files/slidetag_human_cortex.tar.gz?download=1" dataset_name: Slide-tags - Human Cortex dataset_url: "https://www.nature.com/articles/s41586-023-06837-4" @@ -14,7 +14,7 @@ param_list: gene_filter_min_spots: 50 remove_mitochondrial: true - - id: zenodo_spatial_slidetags/human_skin_melanoma_slidetags + - id: zenodo_spatial_slidetags/slidetags/human_skin_melanoma input_data: "https://zenodo.org/records/12785822/files/slidetag_human_skin_melanoma.tar.gz?download=1" dataset_name: Slide-tags - Human Skin Melanoma dataset_url: "https://www.nature.com/articles/s41586-023-06837-4" @@ -26,7 +26,7 @@ param_list: gene_filter_min_spots: 50 remove_mitochondrial: true - - id: zenodo_spatial_slidetags/human_tonsil_slidetags + - id: zenodo_spatial_slidetags/slidetags/human_tonsil input_data: "https://zenodo.org/records/12785822/files/slidetag_human_tonsil.tar.gz?download=1" dataset_name: Slide-tags - Human Tonsil dataset_url: "https://www.nature.com/articles/s41586-023-06837-4" @@ -38,7 +38,7 @@ param_list: gene_filter_min_spots: 50 remove_mitochondrial: true - - id: zenodo_spatial_slidetags/mouse_embryo_slidetags + - id: zenodo_spatial_slidetags/slidetags/mouse_embryo input_data: "https://zenodo.org/records/12785822/files/slidetag_mouse_embryo.tar.gz?download=1" dataset_name: Slide-tags - Mouse Embryo dataset_url: "https://www.nature.com/articles/s41586-023-06837-4" diff --git a/src/datasets/resource_test_scripts/mouse_brain_coronal_section1.sh b/src/datasets/resource_test_scripts/mouse_brain_coronal.sh similarity index 95% rename from src/datasets/resource_test_scripts/mouse_brain_coronal_section1.sh rename to src/datasets/resource_test_scripts/mouse_brain_coronal.sh index e4b889e063..962c4c067d 100755 --- a/src/datasets/resource_test_scripts/mouse_brain_coronal_section1.sh +++ b/src/datasets/resource_test_scripts/mouse_brain_coronal.sh @@ -4,10 +4,10 @@ set -e cat > /tmp/params.yaml << 'HERE' param_list: - - id: mouse_brain_coronal_section1 + - id: mouse_brain_coronal input_expression: "https://cf.10xgenomics.com/samples/spatial-exp/2.0.0/CytAssist_FFPE_Mouse_Brain_Rep1/CytAssist_FFPE_Mouse_Brain_Rep1_filtered_feature_bc_matrix.h5" input_spatial: "https://cf.10xgenomics.com/samples/spatial-exp/2.0.0/CytAssist_FFPE_Mouse_Brain_Rep1/CytAssist_FFPE_Mouse_Brain_Rep1_spatial.tar.gz" - dataset_name: Mouse Brain Coronal Section 1 (FFPE) + dataset_name: 10X Visium - Mouse Brain Coronal dataset_url: "https://www.10xgenomics.com/datasets/mouse-brain-coronal-section-1-ffpe-2-standard" dataset_summary: Gene expression library of Mouse Brain (CytAssist FFPE) using the Mouse Whole Transcriptome Probe Set dataset_description: "FFPE Mouse Brain tissue blocks sectioned as described in Visium CytAssist Spatial Gene Expression for FFPE - Tissue Preparation Guide Demonstrated Protocol. The H&E stained glass slide with tissue section was processed via Visium CytAssist instrument to transfer analytes to a Visium CytAssist Spatial Gene Expression slide. The probe extension and library construction steps follow the standard Visium for FFPE workflow outside of the instrument. The H&E image was acquired using Olympus VS200 Slide Scanning Microscope. Sequencing depth was 53,497 reads per spot. Sequencing configuration: 28bp read 1 (16bp Visium spatial barcode, 12bp UMI), 90bp read 2 (transcript), 10bp i7 sample barcode and 10bp i5 sample barcode. Key metrics include: 2,310 spots detected under tissue; 6,736 median genes per spot; 24,862 median UMI counts per spot." diff --git a/src/tasks/spatially_variable_genes/api/comp_control_method.yaml b/src/tasks/spatially_variable_genes/api/comp_control_method.yaml index ee107bfd24..d61b147a2c 100644 --- a/src/tasks/spatially_variable_genes/api/comp_control_method.yaml +++ b/src/tasks/spatially_variable_genes/api/comp_control_method.yaml @@ -29,6 +29,6 @@ functionality: path: /src/common/comp_tests/check_method_config.py - type: python_script path: /src/common/comp_tests/run_and_check_adata.py - - path: /resources_test/spatially_variable_genes/mouse_brain_coronal_section1 - dest: resources_test/spatially_variable_genes/mouse_brain_coronal_section1 + - path: /resources_test/spatially_variable_genes/mouse_brain_coronal + dest: resources_test/spatially_variable_genes/mouse_brain_coronal - path: /src/common/library.bib diff --git a/src/tasks/spatially_variable_genes/api/comp_method.yaml b/src/tasks/spatially_variable_genes/api/comp_method.yaml index 52372f7b33..20cbf30869 100644 --- a/src/tasks/spatially_variable_genes/api/comp_method.yaml +++ b/src/tasks/spatially_variable_genes/api/comp_method.yaml @@ -20,6 +20,6 @@ functionality: path: /src/common/comp_tests/check_method_config.py - type: python_script path: /src/common/comp_tests/run_and_check_adata.py - - path: /resources_test/spatially_variable_genes/mouse_brain_coronal_section1 - dest: resources_test/spatially_variable_genes/mouse_brain_coronal_section1 + - path: /resources_test/spatially_variable_genes/mouse_brain_coronal + dest: resources_test/spatially_variable_genes/mouse_brain_coronal - path: /src/common/library.bib \ No newline at end of file diff --git a/src/tasks/spatially_variable_genes/api/comp_metric.yaml b/src/tasks/spatially_variable_genes/api/comp_metric.yaml index 73166a2160..b82243266d 100644 --- a/src/tasks/spatially_variable_genes/api/comp_metric.yaml +++ b/src/tasks/spatially_variable_genes/api/comp_metric.yaml @@ -25,7 +25,7 @@ functionality: path: /src/common/comp_tests/check_metric_config.py - type: python_script path: /src/common/comp_tests/run_and_check_adata.py - - path: /resources_test/spatially_variable_genes/mouse_brain_coronal_section1 - dest: resources_test/spatially_variable_genes/mouse_brain_coronal_section1 + - path: /resources_test/spatially_variable_genes/mouse_brain_coronal + dest: resources_test/spatially_variable_genes/mouse_brain_coronal - path: /src/common/library.bib \ No newline at end of file diff --git a/src/tasks/spatially_variable_genes/api/comp_process_dataset.yaml b/src/tasks/spatially_variable_genes/api/comp_process_dataset.yaml index b18780013d..e1043d8859 100644 --- a/src/tasks/spatially_variable_genes/api/comp_process_dataset.yaml +++ b/src/tasks/spatially_variable_genes/api/comp_process_dataset.yaml @@ -23,5 +23,5 @@ functionality: test_resources: - type: python_script path: /src/common/comp_tests/run_and_check_adata.py - - path: /resources_test/common/mouse_brain_coronal_section1 - dest: resources_test/common/mouse_brain_coronal_section1 \ No newline at end of file + - path: /resources_test/common/mouse_brain_coronal + dest: resources_test/common/mouse_brain_coronal \ No newline at end of file diff --git a/src/tasks/spatially_variable_genes/api/file_common_dataset.yaml b/src/tasks/spatially_variable_genes/api/file_common_dataset.yaml index 1837e45020..33ddccbed5 100644 --- a/src/tasks/spatially_variable_genes/api/file_common_dataset.yaml +++ b/src/tasks/spatially_variable_genes/api/file_common_dataset.yaml @@ -1,5 +1,5 @@ type: file -example: "resources_test/common/mouse_brain_coronal_section1/dataset.h5ad" +example: "resources_test/common/mouse_brain_coronal/dataset.h5ad" info: label: "Common Dataset" summary: A subset of the common dataset. diff --git a/src/tasks/spatially_variable_genes/api/file_dataset.yaml b/src/tasks/spatially_variable_genes/api/file_dataset.yaml index 1061720a11..3f2a127fee 100644 --- a/src/tasks/spatially_variable_genes/api/file_dataset.yaml +++ b/src/tasks/spatially_variable_genes/api/file_dataset.yaml @@ -1,5 +1,5 @@ type: file -example: "resources_test/spatially_variable_genes/mouse_brain_coronal_section1/dataset.h5ad" +example: "resources_test/spatially_variable_genes/mouse_brain_coronal/dataset.h5ad" info: label: "Dataset" summary: The dataset without spatially variable genes. diff --git a/src/tasks/spatially_variable_genes/api/file_output.yaml b/src/tasks/spatially_variable_genes/api/file_output.yaml index e1fb7f6eac..c34f45961f 100644 --- a/src/tasks/spatially_variable_genes/api/file_output.yaml +++ b/src/tasks/spatially_variable_genes/api/file_output.yaml @@ -1,5 +1,5 @@ type: file -example: "resources_test/spatially_variable_genes/mouse_brain_coronal_section1/output.h5ad" +example: "resources_test/spatially_variable_genes/mouse_brain_coronal/output.h5ad" info: label: Output summary: "Anndata with estimate spatial variability." diff --git a/src/tasks/spatially_variable_genes/api/file_score.yaml b/src/tasks/spatially_variable_genes/api/file_score.yaml index 28b3a47e14..a62782cd80 100644 --- a/src/tasks/spatially_variable_genes/api/file_score.yaml +++ b/src/tasks/spatially_variable_genes/api/file_score.yaml @@ -1,5 +1,5 @@ type: file -example: "resources_test/spatially_variable_genes/mouse_brain_coronal_section1/score.h5ad" +example: "resources_test/spatially_variable_genes/mouse_brain_coronal/score.h5ad" info: label: "Score" summary: Metric score file. diff --git a/src/tasks/spatially_variable_genes/api/file_simulated_dataset.yaml b/src/tasks/spatially_variable_genes/api/file_simulated_dataset.yaml index 043b459690..cc776514d0 100644 --- a/src/tasks/spatially_variable_genes/api/file_simulated_dataset.yaml +++ b/src/tasks/spatially_variable_genes/api/file_simulated_dataset.yaml @@ -1,5 +1,5 @@ type: file -example: "resources_test/spatially_variable_genes/mouse_brain_coronal_section1/simulated_dataset.h5ad" +example: "resources_test/spatially_variable_genes/mouse_brain_coronal/simulated_dataset.h5ad" info: label: "Common Dataset" summary: A subset of the common dataset. diff --git a/src/tasks/spatially_variable_genes/api/file_solution.yaml b/src/tasks/spatially_variable_genes/api/file_solution.yaml index f26006bfd0..1f0f981a1d 100644 --- a/src/tasks/spatially_variable_genes/api/file_solution.yaml +++ b/src/tasks/spatially_variable_genes/api/file_solution.yaml @@ -1,5 +1,5 @@ type: file -example: "resources_test/spatially_variable_genes/mouse_brain_coronal_section1/solution.h5ad" +example: "resources_test/spatially_variable_genes/mouse_brain_coronal/solution.h5ad" info: label: Solution summary: "Anndata with true spatial variability." diff --git a/src/tasks/spatially_variable_genes/control_methods/random_ranking/script.py b/src/tasks/spatially_variable_genes/control_methods/random_ranking/script.py index e43c4e5079..c6d615dcde 100644 --- a/src/tasks/spatially_variable_genes/control_methods/random_ranking/script.py +++ b/src/tasks/spatially_variable_genes/control_methods/random_ranking/script.py @@ -3,8 +3,8 @@ # VIASH START par = { - 'input_data': 'resources_test/spatially_variable_genes/mouse_brain_coronal_section1/dataset.h5ad', - 'input_solution': 'resources_test/spatially_variable_genes/mouse_brain_coronal_section1/solution.h5ad', + 'input_data': 'resources_test/spatially_variable_genes/mouse_brain_coronal/dataset.h5ad', + 'input_solution': 'resources_test/spatially_variable_genes/mouse_brain_coronal/solution.h5ad', 'output': 'output.h5ad' } meta = { diff --git a/src/tasks/spatially_variable_genes/control_methods/true_ranking/script.py b/src/tasks/spatially_variable_genes/control_methods/true_ranking/script.py index 2504fdc4f2..03617cc977 100644 --- a/src/tasks/spatially_variable_genes/control_methods/true_ranking/script.py +++ b/src/tasks/spatially_variable_genes/control_methods/true_ranking/script.py @@ -2,8 +2,8 @@ # VIASH START par = { - 'input_data': 'resources_test/spatially_variable_genes/mouse_brain_coronal_section1/dataset.h5ad', - 'input_solution': 'resources_test/spatially_variable_genes/mouse_brain_coronal_section1/solution.h5ad', + 'input_data': 'resources_test/spatially_variable_genes/mouse_brain_coronal/dataset.h5ad', + 'input_solution': 'resources_test/spatially_variable_genes/mouse_brain_coronal/solution.h5ad', 'output': 'output.h5ad' } meta = { diff --git a/src/tasks/spatially_variable_genes/methods/boostgp/script.R b/src/tasks/spatially_variable_genes/methods/boostgp/script.R index 4596bff2e6..5a5b3a81cd 100644 --- a/src/tasks/spatially_variable_genes/methods/boostgp/script.R +++ b/src/tasks/spatially_variable_genes/methods/boostgp/script.R @@ -5,7 +5,7 @@ dest <- getwd() # VIASH START par <- list( - "input_data" = "resources_test/spatially_variable_genes/mouse_brain_coronal_section1/dataset.h5ad", + "input_data" = "resources_test/spatially_variable_genes/mouse_brain_coronal/dataset.h5ad", "output" = "output.h5ad", "n_iter" = 10 ) diff --git a/src/tasks/spatially_variable_genes/methods/gpcounts/script.py b/src/tasks/spatially_variable_genes/methods/gpcounts/script.py index 9bcf0497be..07899ccf9f 100644 --- a/src/tasks/spatially_variable_genes/methods/gpcounts/script.py +++ b/src/tasks/spatially_variable_genes/methods/gpcounts/script.py @@ -10,7 +10,7 @@ # VIASH START par = { - 'input_data': 'resources_test/spatially_variable_genes/mouse_brain_coronal_section1/dataset.h5ad', + 'input_data': 'resources_test/spatially_variable_genes/mouse_brain_coronal/dataset.h5ad', 'output': 'output.h5ad', 'n_features': 120 } diff --git a/src/tasks/spatially_variable_genes/methods/moran_i/script.py b/src/tasks/spatially_variable_genes/methods/moran_i/script.py index c158348dd5..cfaa43beb4 100644 --- a/src/tasks/spatially_variable_genes/methods/moran_i/script.py +++ b/src/tasks/spatially_variable_genes/methods/moran_i/script.py @@ -6,7 +6,7 @@ # VIASH START par = { - 'input_data': 'resources_test/spatially_variable_genes/mouse_brain_coronal_section1/dataset.h5ad', + 'input_data': 'resources_test/spatially_variable_genes/mouse_brain_coronal/dataset.h5ad', 'output': 'output.h5ad', 'coord_type_moran_i': 'generic' diff --git a/src/tasks/spatially_variable_genes/methods/nnsvg/script.R b/src/tasks/spatially_variable_genes/methods/nnsvg/script.R index 44a95571d6..e10e53077f 100644 --- a/src/tasks/spatially_variable_genes/methods/nnsvg/script.R +++ b/src/tasks/spatially_variable_genes/methods/nnsvg/script.R @@ -6,7 +6,7 @@ suppressMessages(library(dplyr)) # VIASH START par = list( - 'input_data' = 'resources_test/spatially_variable_genes/mouse_brain_coronal_section1/dataset.h5ad', + 'input_data' = 'resources_test/spatially_variable_genes/mouse_brain_coronal/dataset.h5ad', 'output' = 'output.h5ad' ) meta = list( diff --git a/src/tasks/spatially_variable_genes/methods/scgco/script.py b/src/tasks/spatially_variable_genes/methods/scgco/script.py index 062a0dede3..ae7d658f01 100644 --- a/src/tasks/spatially_variable_genes/methods/scgco/script.py +++ b/src/tasks/spatially_variable_genes/methods/scgco/script.py @@ -12,7 +12,7 @@ # VIASH START par = { - 'input_data': 'resources_test/spatially_variable_genes/mouse_brain_coronal_section1/dataset.h5ad', + 'input_data': 'resources_test/spatially_variable_genes/mouse_brain_coronal/dataset.h5ad', 'output': 'output.h5ad' } meta = { diff --git a/src/tasks/spatially_variable_genes/methods/sepal/script.py b/src/tasks/spatially_variable_genes/methods/sepal/script.py index b2672adaed..e005108b25 100644 --- a/src/tasks/spatially_variable_genes/methods/sepal/script.py +++ b/src/tasks/spatially_variable_genes/methods/sepal/script.py @@ -3,7 +3,7 @@ # VIASH START par = { - 'input_data': 'resources_test/spatially_variable_genes/mouse_brain_coronal_section1/dataset.h5ad', + 'input_data': 'resources_test/spatially_variable_genes/mouse_brain_coronal/dataset.h5ad', 'output': 'output.h5ad', 'coord_type_sepal': 'grid', 'max_neighs_sepal': 6, diff --git a/src/tasks/spatially_variable_genes/methods/somde/script.py b/src/tasks/spatially_variable_genes/methods/somde/script.py index 4dc3b84c95..04040a0820 100644 --- a/src/tasks/spatially_variable_genes/methods/somde/script.py +++ b/src/tasks/spatially_variable_genes/methods/somde/script.py @@ -7,7 +7,7 @@ # VIASH START par = { - 'input_data': 'resources_test/spatially_variable_genes/mouse_brain_coronal_section1/dataset.h5ad', + 'input_data': 'resources_test/spatially_variable_genes/mouse_brain_coronal/dataset.h5ad', 'output': 'output.h5ad' } meta = { diff --git a/src/tasks/spatially_variable_genes/methods/spagcn/script.py b/src/tasks/spatially_variable_genes/methods/spagcn/script.py index e60e08db61..1654eceaaf 100644 --- a/src/tasks/spatially_variable_genes/methods/spagcn/script.py +++ b/src/tasks/spatially_variable_genes/methods/spagcn/script.py @@ -8,7 +8,7 @@ # VIASH START par = { - 'input_data': 'resources_test/spatially_variable_genes/mouse_brain_coronal_section1/dataset.h5ad', + 'input_data': 'resources_test/spatially_variable_genes/mouse_brain_coronal/dataset.h5ad', 'output': 'output.h5ad' } meta = { diff --git a/src/tasks/spatially_variable_genes/methods/spagft/script.py b/src/tasks/spatially_variable_genes/methods/spagft/script.py index 9968e5aad0..b12486cfd6 100644 --- a/src/tasks/spatially_variable_genes/methods/spagft/script.py +++ b/src/tasks/spatially_variable_genes/methods/spagft/script.py @@ -3,7 +3,7 @@ # VIASH START par = { - 'input_data': 'resources_test/spatially_variable_genes/mouse_brain_coronal_section1/dataset.h5ad', + 'input_data': 'resources_test/spatially_variable_genes/mouse_brain_coronal/dataset.h5ad', 'output': 'output.h5ad' } meta = { diff --git a/src/tasks/spatially_variable_genes/methods/spanve/script.py b/src/tasks/spatially_variable_genes/methods/spanve/script.py index ea2c7a98e3..f2a0102930 100644 --- a/src/tasks/spatially_variable_genes/methods/spanve/script.py +++ b/src/tasks/spatially_variable_genes/methods/spanve/script.py @@ -3,7 +3,7 @@ # VIASH START par = { - 'input_data': 'resources_test/spatially_variable_genes/mouse_brain_coronal_section1/dataset.h5ad', + 'input_data': 'resources_test/spatially_variable_genes/mouse_brain_coronal/dataset.h5ad', 'output': 'output.h5ad' } meta = { diff --git a/src/tasks/spatially_variable_genes/methods/spark/script.R b/src/tasks/spatially_variable_genes/methods/spark/script.R index 2de1f38bbb..50d48f2543 100644 --- a/src/tasks/spatially_variable_genes/methods/spark/script.R +++ b/src/tasks/spatially_variable_genes/methods/spark/script.R @@ -3,7 +3,7 @@ suppressMessages(library(anndata)) # VIASH START par <- list( - "input_data" = "resources_test/spatially_variable_genes/mouse_brain_coronal_section1/dataset.h5ad", + "input_data" = "resources_test/spatially_variable_genes/mouse_brain_coronal/dataset.h5ad", "output" = "output.h5ad" ) meta <- list( diff --git a/src/tasks/spatially_variable_genes/methods/spark_x/script.R b/src/tasks/spatially_variable_genes/methods/spark_x/script.R index c5f9d8a96b..23ac5c31ad 100644 --- a/src/tasks/spatially_variable_genes/methods/spark_x/script.R +++ b/src/tasks/spatially_variable_genes/methods/spark_x/script.R @@ -3,7 +3,7 @@ suppressMessages(library(anndata)) # VIASH START par <- list( - "input_data" = "resources_test/spatially_variable_genes/mouse_brain_coronal_section1/dataset.h5ad", + "input_data" = "resources_test/spatially_variable_genes/mouse_brain_coronal/dataset.h5ad", "output" = "output.h5ad" ) meta <- list( diff --git a/src/tasks/spatially_variable_genes/methods/spatialde/script.py b/src/tasks/spatially_variable_genes/methods/spatialde/script.py index f5e0a9b21d..1e23ef3f79 100644 --- a/src/tasks/spatially_variable_genes/methods/spatialde/script.py +++ b/src/tasks/spatially_variable_genes/methods/spatialde/script.py @@ -8,7 +8,7 @@ # VIASH START par = { - 'input_data': 'resources_test/spatially_variable_genes/mouse_brain_coronal_section1/dataset.h5ad', + 'input_data': 'resources_test/spatially_variable_genes/mouse_brain_coronal/dataset.h5ad', 'output': 'output.h5ad' } meta = { diff --git a/src/tasks/spatially_variable_genes/methods/spatialde2/script.py b/src/tasks/spatially_variable_genes/methods/spatialde2/script.py index fe82d40981..48dcf592a1 100644 --- a/src/tasks/spatially_variable_genes/methods/spatialde2/script.py +++ b/src/tasks/spatially_variable_genes/methods/spatialde2/script.py @@ -8,7 +8,7 @@ # VIASH START par = { - 'input_data': 'resources_test/spatially_variable_genes/mouse_brain_coronal_section1/dataset.h5ad', + 'input_data': 'resources_test/spatially_variable_genes/mouse_brain_coronal/dataset.h5ad', 'output': 'output.h5ad' } meta = { diff --git a/src/tasks/spatially_variable_genes/metrics/correlation/script.py b/src/tasks/spatially_variable_genes/metrics/correlation/script.py index f61ea17193..310a030f26 100644 --- a/src/tasks/spatially_variable_genes/metrics/correlation/script.py +++ b/src/tasks/spatially_variable_genes/metrics/correlation/script.py @@ -3,8 +3,8 @@ ## VIASH START par = { - 'input_method': 'resources_test/spatially_variable_genes/mouse_brain_coronal_section1/output.h5ad', - 'input_solution': 'resources_test/spatially_variable_genes/mouse_brain_coronal_section1/solution.h5ad', + 'input_method': 'resources_test/spatially_variable_genes/mouse_brain_coronal/output.h5ad', + 'input_solution': 'resources_test/spatially_variable_genes/mouse_brain_coronal/solution.h5ad', 'output': 'score.h5ad' } meta = { diff --git a/src/tasks/spatially_variable_genes/process_dataset/select_reference/config.vsh.yaml b/src/tasks/spatially_variable_genes/process_dataset/select_reference/config.vsh.yaml index 229f039a62..d1ec607aa4 100644 --- a/src/tasks/spatially_variable_genes/process_dataset/select_reference/config.vsh.yaml +++ b/src/tasks/spatially_variable_genes/process_dataset/select_reference/config.vsh.yaml @@ -35,8 +35,8 @@ functionality: - type: python_script path: script.py test_resources: - - path: /resources_test/common/mouse_brain_coronal_section1 - dest: resources_test/common/mouse_brain_coronal_section1 + - path: /resources_test/common/mouse_brain_coronal + dest: resources_test/common/mouse_brain_coronal - type: python_script path: /src/common/comp_tests/run_and_check_adata.py platforms: diff --git a/src/tasks/spatially_variable_genes/process_dataset/select_reference/script.py b/src/tasks/spatially_variable_genes/process_dataset/select_reference/script.py index 481735c6fa..6c4e9bfd4f 100644 --- a/src/tasks/spatially_variable_genes/process_dataset/select_reference/script.py +++ b/src/tasks/spatially_variable_genes/process_dataset/select_reference/script.py @@ -3,7 +3,7 @@ ### VIASH START par = { - "input": "resources_test/common/mouse_brain_coronal_section1/dataset.h5ad", + "input": "resources_test/common/mouse_brain_coronal/dataset.h5ad", "input_layer": "normalized", "output": "reference_dataset.h5ad", "num_features": 50, diff --git a/src/tasks/spatially_variable_genes/process_dataset/simulate_svg/config.vsh.yaml b/src/tasks/spatially_variable_genes/process_dataset/simulate_svg/config.vsh.yaml index 825958d337..5c637c7088 100644 --- a/src/tasks/spatially_variable_genes/process_dataset/simulate_svg/config.vsh.yaml +++ b/src/tasks/spatially_variable_genes/process_dataset/simulate_svg/config.vsh.yaml @@ -33,8 +33,8 @@ functionality: test_resources: - type: python_script path: /src/common/comp_tests/run_and_check_adata.py - - path: /resources_test/common/mouse_brain_coronal_section1 - dest: resources_test/common/mouse_brain_coronal_section1 + - path: /resources_test/common/mouse_brain_coronal + dest: resources_test/common/mouse_brain_coronal platforms: - type: docker image: ghcr.io/openproblems-bio/base_r:1.0.4 diff --git a/src/tasks/spatially_variable_genes/process_dataset/simulate_svg/script.R b/src/tasks/spatially_variable_genes/process_dataset/simulate_svg/script.R index 43ea0476d8..a23afd8d4b 100644 --- a/src/tasks/spatially_variable_genes/process_dataset/simulate_svg/script.R +++ b/src/tasks/spatially_variable_genes/process_dataset/simulate_svg/script.R @@ -9,7 +9,7 @@ set.seed(2024) ## VIASH START par <- list( - input = "resources_test/common/mouse_brain_coronal_section1/dataset.h5ad", + input = "resources_test/common/mouse_brain_coronal/dataset.h5ad", output = "dataset_sim.h5ad", gp_k = 50L, select_top_variable_genes = 50L diff --git a/src/tasks/spatially_variable_genes/process_dataset/split_dataset/config.vsh.yaml b/src/tasks/spatially_variable_genes/process_dataset/split_dataset/config.vsh.yaml index d99688d759..002b9bb3d2 100644 --- a/src/tasks/spatially_variable_genes/process_dataset/split_dataset/config.vsh.yaml +++ b/src/tasks/spatially_variable_genes/process_dataset/split_dataset/config.vsh.yaml @@ -28,8 +28,8 @@ functionality: test_resources: - type: python_script path: /src/common/comp_tests/run_and_check_adata.py - - path: /resources_test/spatially_variable_genes/mouse_brain_coronal_section1 - dest: resources_test/spatially_variable_genes/mouse_brain_coronal_section1 + - path: /resources_test/spatially_variable_genes/mouse_brain_coronal + dest: resources_test/spatially_variable_genes/mouse_brain_coronal platforms: - type: docker image: ghcr.io/openproblems-bio/base_python:1.0.4 diff --git a/src/tasks/spatially_variable_genes/process_dataset/split_dataset/script.py b/src/tasks/spatially_variable_genes/process_dataset/split_dataset/script.py index 97bf014fa5..ac2c0fea6d 100644 --- a/src/tasks/spatially_variable_genes/process_dataset/split_dataset/script.py +++ b/src/tasks/spatially_variable_genes/process_dataset/split_dataset/script.py @@ -3,7 +3,7 @@ ## VIASH START par = { - "input": "resources_test/spatially_variable_genes/mouse_brain_coronal_section1/simulated_dataset.h5ad", + "input": "resources_test/spatially_variable_genes/mouse_brain_coronal/simulated_dataset.h5ad", "output_dataset": "dataset.h5ad", "output_solution": "solution.h5ad", } diff --git a/src/tasks/spatially_variable_genes/resources_scripts/process_datasets.sh b/src/tasks/spatially_variable_genes/resources_scripts/process_datasets.sh index 74b18f465c..fce6c1d8b9 100755 --- a/src/tasks/spatially_variable_genes/resources_scripts/process_datasets.sh +++ b/src/tasks/spatially_variable_genes/resources_scripts/process_datasets.sh @@ -3,35 +3,39 @@ cat > /tmp/params.yaml << 'HERE' param_list: - id: svg_process_datasets_visium - input_states: "s3://openproblems-data/resources/datasets/spatial_10x_visium/**/state.yaml" + input_states: "s3://openproblems-data/resources/datasets/tenx_visium/visium/**/state.yaml" + settings: '{"output_dataset": "$id/dataset.h5ad", "output_solution": "$id/solution.h5ad", "dataset_simulated_normalized": "$id/simulated_dataset.h5ad", "gp_k_sim": 500, "select_top_variable_genes_sim": 50, "num_reference_genes": 200, "coord_type_proc": "grid"}' + + - id: svg_process_datasets_zenodo_visium + input_states: "s3://openproblems-data/resources/datasets/zenodo_spatial/visium/**/state.yaml" settings: '{"output_dataset": "$id/dataset.h5ad", "output_solution": "$id/solution.h5ad", "dataset_simulated_normalized": "$id/simulated_dataset.h5ad", "gp_k_sim": 500, "select_top_variable_genes_sim": 50, "num_reference_genes": 200, "coord_type_proc": "grid"}' - - id: svg_process_datasets_xenium - input_states: "s3://openproblems-data/resources/datasets/spatial_10x_xenium/**/state.yaml" + - id: svg_process_datasets_post_xenium + input_states: "s3://openproblems-data/resources/datasets/tenx_visium/post_xenium/**/state.yaml" settings: '{"output_dataset": "$id/dataset.h5ad", "output_solution": "$id/solution.h5ad", "dataset_simulated_normalized": "$id/simulated_dataset.h5ad", "gp_k_sim": 500, "select_top_variable_genes_sim": 50, "num_reference_genes": 100, "coord_type_proc": "grid"}' - id: svg_process_datasets_slidetags - input_states: "s3://openproblems-data/resources/datasets/spatial_slide_tags/**/state.yaml" + input_states: "s3://openproblems-data/resources/datasets/zenodo_spatial_slidetags/slidetags/**/state.yaml" settings: '{"output_dataset": "$id/dataset.h5ad", "output_solution": "$id/solution.h5ad", "dataset_simulated_normalized": "$id/simulated_dataset.h5ad", "gp_k_sim": 500, "select_top_variable_genes_sim": 50, "num_reference_genes": 50, "coord_type_proc": "grid"}' - id: svg_process_datasets_slideseqv2 - input_states: "s3://openproblems-data/resources/datasets/spatial_slideseq_v2/**/state.yaml" + input_states: "s3://openproblems-data/resources/datasets/zenodo_spatial/slideseqv2/**/state.yaml" settings: '{"output_dataset": "$id/dataset.h5ad", "output_solution": "$id/solution.h5ad", "dataset_simulated_normalized": "$id/simulated_dataset.h5ad", "gp_k_sim": 500, "select_top_variable_genes_sim": 10, "num_reference_genes": 10, "coord_type_proc": "generic"}' - id: svg_process_datasets_dbitseq - input_states: "s3://openproblems-data/resources/datasets/spatial_dbit_seq/**/state.yaml" + input_states: "s3://openproblems-data/resources/datasets/zenodo_spatial/dbitseq/**/state.yaml" settings: '{"output_dataset": "$id/dataset.h5ad", "output_solution": "$id/solution.h5ad", "dataset_simulated_normalized": "$id/simulated_dataset.h5ad", "gp_k_sim": 500, "select_top_variable_genes_sim": 50, "num_reference_genes": 200, "coord_type_proc": "generic"}' - id: svg_process_datasets_seqfish - input_states: "s3://openproblems-data/resources/datasets/spatial_seqfish/**/state.yaml" + input_states: "s3://openproblems-data/resources/datasets/zenodo_spatial/seqfish/**/state.yaml" settings: '{"output_dataset": "$id/dataset.h5ad", "output_solution": "$id/solution.h5ad", "dataset_simulated_normalized": "$id/simulated_dataset.h5ad", "gp_k_sim": 500, "select_top_variable_genes_sim": 25, "num_reference_genes": 25, "coord_type_proc": "generic"}' - id: svg_process_datasets_starmap - input_states: "s3://openproblems-data/resources/datasets/spatial_star_map/**/state.yaml" + input_states: "s3://openproblems-data/resources/datasets/zenodo_spatial/starmap/**/state.yaml" settings: '{"output_dataset": "$id/dataset.h5ad", "output_solution": "$id/solution.h5ad", "dataset_simulated_normalized": "$id/simulated_dataset.h5ad", "gp_k_sim": 500, "select_top_variable_genes_sim": 25, "num_reference_genes": 25, "coord_type_proc": "generic"}' - id: svg_process_datasets_stereoseq - input_states: "s3://openproblems-data/resources/datasets/spatial_stereo_seq/**/state.yaml" + input_states: "s3://openproblems-data/resources/datasets/zenodo_spatial/stereoseq/**/state.yaml" settings: '{"output_dataset": "$id/dataset.h5ad", "output_solution": "$id/solution.h5ad", "dataset_simulated_normalized": "$id/simulated_dataset.h5ad", "gp_k_sim": 500, "select_top_variable_genes_sim": 50, "num_reference_genes": 50, "coord_type_proc": "generic"}' rename_keys: 'input:output_dataset' @@ -41,36 +45,36 @@ HERE # cat > /tmp/params.yaml << 'HERE' # param_list: -# - id: spatial_merfish/human_cortex_1 -# input: "s3://openproblems-data/resources/datasets/spatial_merfish/human_cortex_1/dataset.h5ad" +# - id: zenodo_spatial/merfish/human_cortex_1 +# input: "s3://openproblems-data/resources/datasets/zenodo_spatial/merfish/human_cortex_1/dataset.h5ad" # gp_k_sim: 500 # select_top_variable_genes_sim: 25 # num_reference_genes: 25 # coord_type_proc: generic -# - id: spatial_merfish/human_cortex_2 -# input: "s3://openproblems-data/resources/datasets/spatial_merfish/human_cortex_2/dataset.h5ad" +# - id: zenodo_spatial/merfish/human_cortex_2 +# input: "s3://openproblems-data/resources/datasets/zenodo_spatial/merfish/human_cortex_2/dataset.h5ad" # gp_k_sim: 500 # select_top_variable_genes_sim: 50 # num_reference_genes: 50 # coord_type_proc: generic -# - id: spatial_merfish/human_cortex_3 -# input: "s3://openproblems-data/resources/datasets/spatial_merfish/human_cortex_3/dataset.h5ad" +# - id: zenodo_spatial/merfish/human_cortex_3 +# input: "s3://openproblems-data/resources/datasets/zenodo_spatial/merfish/human_cortex_3/dataset.h5ad" # gp_k_sim: 500 # select_top_variable_genes_sim: 50 # num_reference_genes: 50 # coord_type_proc: generic -# - id: spatial_merfish/human_cortex_4 -# input: "s3://openproblems-data/resources/datasets/spatial_merfish/human_cortex_4/dataset.h5ad" +# - id: zenodo_spatial/merfish/human_cortex_4 +# input: "s3://openproblems-data/resources/datasets/zenodo_spatial/merfish/human_cortex_4/dataset.h5ad" # gp_k_sim: 500 # select_top_variable_genes_sim: 50 # num_reference_genes: 50 # coord_type_proc: generic -# - id: spatial_merfish/mouse_cortex -# input: "s3://openproblems-data/resources/datasets/spatial_merfish/mouse_cortex/dataset.h5ad" +# - id: zenodo_spatial/merfish/mouse_cortex +# input: "s3://openproblems-data/resources/datasets/zenodo_spatial/merfish/mouse_cortex/dataset.h5ad" # gp_k_sim: 500 # select_top_variable_genes_sim: 25 # num_reference_genes: 25 diff --git a/src/tasks/spatially_variable_genes/resources_scripts/run_benchmark.sh b/src/tasks/spatially_variable_genes/resources_scripts/run_benchmark.sh index 8620bbafe8..d511a9b92e 100755 --- a/src/tasks/spatially_variable_genes/resources_scripts/run_benchmark.sh +++ b/src/tasks/spatially_variable_genes/resources_scripts/run_benchmark.sh @@ -3,51 +3,49 @@ RUN_ID="run_$(date +%Y-%m-%d_%H-%M-%S)" publish_dir="s3://openproblems-data/resources/spatially_variable_genes/results/${RUN_ID}" -# cat > /tmp/params.yaml << HERE -# input_states: s3://openproblems-data/resources/spatially_variable_genes/datasets/**/state.yaml -# rename_keys: 'input_dataset:output_dataset,input_solution:output_solution' -# output_state: "state.yaml" -# publish_dir: "$publish_dir" -# HERE - cat > /tmp/params.yaml << HERE param_list: - id: svg_datasets_visium - input_states: "s3://openproblems-data/resources/spatially_variable_genes/datasets/spatial_10x_visium/**/state.yaml" + input_states: "s3://openproblems-data/resources/spatially_variable_genes/datasets/tenx_visium/visium/**/state.yaml" + settings: '{"coord_type_moran_i": "generic", "coord_type_sepal": "grid", "max_neighs_sepal": 6}' + + - id: svg_datasets_zenodo_visium + input_states: "s3://openproblems-data/resources/spatially_variable_genes/datasets/zenodo_spatial/visium/**/state.yaml" settings: '{"coord_type_moran_i": "generic", "coord_type_sepal": "grid", "max_neighs_sepal": 6}' - - id: svg_datasets_xenium - input_states: "s3://openproblems-data/resources/spatially_variable_genes/datasets/spatial_10x_xenium/**/state.yaml" + - id: svg_datasets_post_xenium + input_states: "s3://openproblems-data/resources/spatially_variable_genes/datasets/tenx_visium/post_xenium/**/state.yaml" settings: '{"coord_type_moran_i": "generic", "coord_type_sepal": "grid", "max_neighs_sepal": 6}' - id: svg_datasets_dbitseq - input_states: "s3://openproblems-data/resources/spatially_variable_genes/datasets/spatial_dbit_seq/**/state.yaml" + input_states: "s3://openproblems-data/resources/spatially_variable_genes/datasets/zenodo_spatial/dbitseq/**/state.yaml" settings: '{"coord_type_moran_i": "generic", "coord_type_sepal": "grid", "max_neighs_sepal": 4}' - id: svg_datasets_merfish - input_states: "s3://openproblems-data/resources/spatially_variable_genes/datasets/spatial_merfish/**/state.yaml" + input_states: "s3://openproblems-data/resources/spatially_variable_genes/datasets/zenodo_spatial/merfish/**/state.yaml" settings: '{"coord_type_moran_i": "generic", "coord_type_sepal": "grid", "max_neighs_sepal": 6}' - id: svg_datasets_seqfish - input_states: "s3://openproblems-data/resources/spatially_variable_genes/datasets/spatial_seqfish/**/state.yaml" + input_states: "s3://openproblems-data/resources/spatially_variable_genes/datasets/zenodo_spatial/seqfish/**/state.yaml" settings: '{"coord_type_moran_i": "generic", "coord_type_sepal": "grid", "max_neighs_sepal": 6}' - id: svg_datasets_slidetags - input_states: "s3://openproblems-data/resources/spatially_variable_genes/datasets/spatial_slide_tags/**/state.yaml" + input_states: "s3://openproblems-data/resources/spatially_variable_genes/datasets/zenodo_spatial_slidetags/slidetags/**/state.yaml" settings: '{"coord_type_moran_i": "generic", "coord_type_sepal": "grid", "max_neighs_sepal": 6}' - id: svg_datasets_slideseqv2 - input_states: "s3://openproblems-data/resources/spatially_variable_genes/datasets/spatial_slideseq_v2/**/state.yaml" + input_states: "s3://openproblems-data/resources/spatially_variable_genes/datasets/zenodo_spatial/slideseqv2/**/state.yaml" settings: '{"coord_type_moran_i": "generic", "coord_type_sepal": "grid", "max_neighs_sepal": 6}' - id: svg_datasets_starmap - input_states: "s3://openproblems-data/resources/spatially_variable_genes/datasets/spatial_star_map/**/state.yaml" + input_states: "s3://openproblems-data/resources/spatially_variable_genes/datasets/zenodo_spatial/starmap/**/state.yaml" settings: '{"coord_type_moran_i": "generic", "coord_type_sepal": "grid", "max_neighs_sepal": 6}' - id: svg_datasets_stereoseq - input_states: "s3://openproblems-data/resources/spatially_variable_genes/datasets/spatial_stereo_seq/**/state.yaml" + input_states: "s3://openproblems-data/resources/spatially_variable_genes/datasets/zenodo_spatial/stereoseq/**/state.yaml" settings: '{"coord_type_moran_i": "generic", "coord_type_sepal": "grid", "max_neighs_sepal": 4}' + rename_keys: 'input_dataset:output_dataset,input_solution:output_solution' output_state: "state.yaml" publish_dir: "$publish_dir" diff --git a/src/tasks/spatially_variable_genes/resources_test_scripts/mouse_brain_coronal_section1.sh b/src/tasks/spatially_variable_genes/resources_test_scripts/mouse_brain_coronal_section1.sh index 2110d29f1b..e169420223 100755 --- a/src/tasks/spatially_variable_genes/resources_test_scripts/mouse_brain_coronal_section1.sh +++ b/src/tasks/spatially_variable_genes/resources_test_scripts/mouse_brain_coronal_section1.sh @@ -19,12 +19,12 @@ nextflow run . \ -main-script target/nextflow/spatially_variable_genes/workflows/process_datasets/main.nf \ -profile docker \ -c src/wf_utils/labels_ci.config \ - --id mouse_brain_coronal_section1 \ - --input $RAW_DATA/mouse_brain_coronal_section1/dataset.h5ad \ + --id mouse_brain_coronal \ + --input $RAW_DATA/mouse_brain_coronal/dataset.h5ad \ --output_dataset dataset.h5ad \ --output_solution solution.h5ad \ --dataset_simulated_normalized simulated_dataset.h5ad \ - --publish_dir $DATASET_DIR/mouse_brain_coronal_section1 \ + --publish_dir $DATASET_DIR/mouse_brain_coronal \ --output_state "state.yaml" \ --gp_k_sim 50 \ --select_top_variable_genes 50 \ @@ -32,12 +32,12 @@ nextflow run . \ echo "Running control method" viash run src/tasks/spatially_variable_genes/control_methods/true_ranking/config.vsh.yaml -- \ - --input_data $DATASET_DIR/mouse_brain_coronal_section1/dataset.h5ad \ - --input_solution $DATASET_DIR/mouse_brain_coronal_section1/solution.h5ad \ - --output $DATASET_DIR/mouse_brain_coronal_section1/output.h5ad + --input_data $DATASET_DIR/mouse_brain_coronal/dataset.h5ad \ + --input_solution $DATASET_DIR/mouse_brain_coronal/solution.h5ad \ + --output $DATASET_DIR/mouse_brain_coronal/output.h5ad echo "Running metric" viash run src/tasks/spatially_variable_genes/metrics/correlation/config.vsh.yaml -- \ - --input_method $DATASET_DIR/mouse_brain_coronal_section1/output.h5ad \ - --input_solution $DATASET_DIR/mouse_brain_coronal_section1/solution.h5ad \ - --output $DATASET_DIR/mouse_brain_coronal_section1/score.h5ad + --input_method $DATASET_DIR/mouse_brain_coronal/output.h5ad \ + --input_solution $DATASET_DIR/mouse_brain_coronal/solution.h5ad \ + --output $DATASET_DIR/mouse_brain_coronal/score.h5ad diff --git a/src/tasks/spatially_variable_genes/workflows/process_datasets/run_test.sh b/src/tasks/spatially_variable_genes/workflows/process_datasets/run_test.sh index b5df48aa92..ff410ea55d 100644 --- a/src/tasks/spatially_variable_genes/workflows/process_datasets/run_test.sh +++ b/src/tasks/spatially_variable_genes/workflows/process_datasets/run_test.sh @@ -20,12 +20,12 @@ nextflow run . \ -main-script target/nextflow/spatially_variable_genes/workflows/process_datasets/main.nf \ -profile docker \ -c src/wf_utils/labels_ci.config \ - --id mouse_brain_coronal_section1 \ - --input $RAW_DATA/mouse_brain_coronal_section1/dataset.h5ad \ + --id mouse_brain_coronal \ + --input $RAW_DATA/mouse_brain_coronal/dataset.h5ad \ --output_dataset dataset.h5ad \ --output_solution solution.h5ad \ --dataset_simulated_normalized simulated_dataset.h5ad \ - --publish_dir $DATASET_DIR/mouse_brain_coronal_section1 \ + --publish_dir $DATASET_DIR/mouse_brain_coronal \ --output_state "state.yaml" \ --gp_k_sim 50 \ --select_top_variable_genes 50 \ From e01dfcbb96d9156f412c24d25da7d67c70c4902f Mon Sep 17 00:00:00 2001 From: Nirmayi Date: Wed, 2 Oct 2024 10:42:06 +0200 Subject: [PATCH 2/3] update spatial resource scripts --- src/datasets/resource_scripts/tenx_visium.sh | 255 +++++++++--------- ...zenodo_spatial.sh.sh => zenodo_spatial.sh} | 208 +++++++------- src/tasks/spatially_variable_genes/README.md | 10 +- 3 files changed, 230 insertions(+), 243 deletions(-) rename src/datasets/resource_scripts/{zenodo_spatial.sh.sh => zenodo_spatial.sh} (66%) diff --git a/src/datasets/resource_scripts/tenx_visium.sh b/src/datasets/resource_scripts/tenx_visium.sh index 79e5a53cc8..3e2fb68a61 100755 --- a/src/datasets/resource_scripts/tenx_visium.sh +++ b/src/datasets/resource_scripts/tenx_visium.sh @@ -125,136 +125,136 @@ # output_state: '$id/state.yaml' # output_raw: force_null # output_normalized: force_null -# publish_dir: resources/datasets +# publish_dir: s3://openproblems-data/resources/datasets # HERE -cat > "/tmp/params.yaml" << 'HERE' -param_list: - - id: tenx_visium/visium/human_cerebellum - input_expression: "https://cf.10xgenomics.com/samples/spatial-exp/1.2.0/Parent_Visium_Human_Cerebellum/Parent_Visium_Human_Cerebellum_filtered_feature_bc_matrix.h5" - input_spatial: "https://cf.10xgenomics.com/samples/spatial-exp/1.2.0/Parent_Visium_Human_Cerebellum/Parent_Visium_Human_Cerebellum_spatial.tar.gz" - dataset_name: 10X Visium - Adult Human Cerebellum - dataset_url: "https://www.10xgenomics.com/datasets/human-cerebellum-whole-transcriptome-analysis-1-standard-1-2-0" - dataset_summary: Human Cerebellum Whole Transcriptome Analysis - dataset_description: "10X Genomics obtained fresh frozen human cerebellum tissue from BioIVT Asterand. The tissue was embedded and cryosectioned as described in Visium Spatial Protocols Tissue Preparation Guide (Demonstrated Protocol CG000240). Tissue sections of 10µm were placed on Visium Gene Expression slides and fixed and stained following Methanol Fixation, H&E Staining & Imaging for Visium Spatial Protocols (CG000160)." - dataset_reference: 10x2020cerebellum - dataset_organism: Homo sapiens - spot_filter_min_genes: 100 - gene_filter_min_spots: 50 - remove_mitochondrial: true +# cat > "/tmp/params.yaml" << 'HERE' +# param_list: +# - id: tenx_visium/visium/human_cerebellum +# input_expression: "https://cf.10xgenomics.com/samples/spatial-exp/1.2.0/Parent_Visium_Human_Cerebellum/Parent_Visium_Human_Cerebellum_filtered_feature_bc_matrix.h5" +# input_spatial: "https://cf.10xgenomics.com/samples/spatial-exp/1.2.0/Parent_Visium_Human_Cerebellum/Parent_Visium_Human_Cerebellum_spatial.tar.gz" +# dataset_name: 10X Visium - Adult Human Cerebellum +# dataset_url: "https://www.10xgenomics.com/datasets/human-cerebellum-whole-transcriptome-analysis-1-standard-1-2-0" +# dataset_summary: Human Cerebellum Whole Transcriptome Analysis +# dataset_description: "10X Genomics obtained fresh frozen human cerebellum tissue from BioIVT Asterand. The tissue was embedded and cryosectioned as described in Visium Spatial Protocols Tissue Preparation Guide (Demonstrated Protocol CG000240). Tissue sections of 10µm were placed on Visium Gene Expression slides and fixed and stained following Methanol Fixation, H&E Staining & Imaging for Visium Spatial Protocols (CG000160)." +# dataset_reference: 10x2020cerebellum +# dataset_organism: Homo sapiens +# spot_filter_min_genes: 100 +# gene_filter_min_spots: 50 +# remove_mitochondrial: true - - id: tenx_visium/visium/mouse_kidney_v1 - input_expression: "https://cf.10xgenomics.com/samples/spatial-exp/1.1.0/V1_Mouse_Kidney/V1_Mouse_Kidney_filtered_feature_bc_matrix.h5" - input_spatial: "https://cf.10xgenomics.com/samples/spatial-exp/1.1.0/V1_Mouse_Kidney/V1_Mouse_Kidney_spatial.tar.gz" - dataset_name: 10X Visium - Mouse Kidney 1 - dataset_url: "https://www.10xgenomics.com/datasets/mouse-kidney-section-coronal-1-standard-1-1-0" - dataset_summary: Mouse Kidney Whole Transcriptome Analysis - dataset_description: "10x Genomics obtained fresh frozen mouse kidney tissue from BioIVT Asterand. The tissue was embedded and cryosectioned as described in Visium Spatial Protocols - Tissue Preparation Guide Demonstrated Protocol (CG000240). Tissue sections of 10 µm thickness from a slice of the coronal plane were placed on Visium Gene Expression slides, then stained following the Methanol Fixation, H&E Staining & Imaging Demonstrated Protocol (CG000160)." - dataset_reference: 10x2020kidney - dataset_organism: Mus musculus - spot_filter_min_genes: 100 - gene_filter_min_spots: 30 - remove_mitochondrial: false +# - id: tenx_visium/visium/mouse_kidney_v1 +# input_expression: "https://cf.10xgenomics.com/samples/spatial-exp/1.1.0/V1_Mouse_Kidney/V1_Mouse_Kidney_filtered_feature_bc_matrix.h5" +# input_spatial: "https://cf.10xgenomics.com/samples/spatial-exp/1.1.0/V1_Mouse_Kidney/V1_Mouse_Kidney_spatial.tar.gz" +# dataset_name: 10X Visium - Mouse Kidney 1 +# dataset_url: "https://www.10xgenomics.com/datasets/mouse-kidney-section-coronal-1-standard-1-1-0" +# dataset_summary: Mouse Kidney Whole Transcriptome Analysis +# dataset_description: "10x Genomics obtained fresh frozen mouse kidney tissue from BioIVT Asterand. The tissue was embedded and cryosectioned as described in Visium Spatial Protocols - Tissue Preparation Guide Demonstrated Protocol (CG000240). Tissue sections of 10 µm thickness from a slice of the coronal plane were placed on Visium Gene Expression slides, then stained following the Methanol Fixation, H&E Staining & Imaging Demonstrated Protocol (CG000160)." +# dataset_reference: 10x2020kidney +# dataset_organism: Mus musculus +# spot_filter_min_genes: 100 +# gene_filter_min_spots: 30 +# remove_mitochondrial: false - - id: tenx_visium/visium/human_lung_cancer - input_expression: "https://cf.10xgenomics.com/samples/spatial-exp/2.0.1/CytAssist_11mm_FFPE_Human_Lung_Cancer/CytAssist_11mm_FFPE_Human_Lung_Cancer_filtered_feature_bc_matrix.h5" - input_spatial: "https://cf.10xgenomics.com/samples/spatial-exp/2.0.1/CytAssist_11mm_FFPE_Human_Lung_Cancer/CytAssist_11mm_FFPE_Human_Lung_Cancer_spatial.tar.gz" - dataset_name: 10X Visium - Human Lung Cancer - dataset_url: "https://www.10xgenomics.com/datasets/human-lung-cancer-11-mm-capture-area-ffpe-2-standard" - dataset_summary: Gene expression library of Human Lung Cancer (CytAssist FFPE) using the Human Whole Transcriptome Probe Set - dataset_description: "10x Genomics obtained FFPE human lung cancer tissue from Avaden Biosciences. The tissue was sectioned as described in the Visium CytAssist Spatial Gene Expression for FFPE Tissue Preparation Guide (CG000518). Tissue section of 5 µm was placed on a standard glass slide, then stained following the Deparaffinization, H&E Staining, Imaging & Decrosslinking Demonstrated Protocol (CG000520). The glass slide with tissue section was processed via Visium CytAssist instrument to transfer analytes to a Visium CytAssist Spatial Gene Expression Slide v2, with 11 mm capture areas following the Visium CytAssist Spatial Gene Expression Reagent Kits User Guide (CG000495)." - dataset_reference: 10x2023lung - dataset_organism: Homo sapiens - spot_filter_min_genes: 100 - gene_filter_min_spots: 50 - remove_mitochondrial: true +# - id: tenx_visium/visium/human_lung_cancer +# input_expression: "https://cf.10xgenomics.com/samples/spatial-exp/2.0.1/CytAssist_11mm_FFPE_Human_Lung_Cancer/CytAssist_11mm_FFPE_Human_Lung_Cancer_filtered_feature_bc_matrix.h5" +# input_spatial: "https://cf.10xgenomics.com/samples/spatial-exp/2.0.1/CytAssist_11mm_FFPE_Human_Lung_Cancer/CytAssist_11mm_FFPE_Human_Lung_Cancer_spatial.tar.gz" +# dataset_name: 10X Visium - Human Lung Cancer +# dataset_url: "https://www.10xgenomics.com/datasets/human-lung-cancer-11-mm-capture-area-ffpe-2-standard" +# dataset_summary: Gene expression library of Human Lung Cancer (CytAssist FFPE) using the Human Whole Transcriptome Probe Set +# dataset_description: "10x Genomics obtained FFPE human lung cancer tissue from Avaden Biosciences. The tissue was sectioned as described in the Visium CytAssist Spatial Gene Expression for FFPE Tissue Preparation Guide (CG000518). Tissue section of 5 µm was placed on a standard glass slide, then stained following the Deparaffinization, H&E Staining, Imaging & Decrosslinking Demonstrated Protocol (CG000520). The glass slide with tissue section was processed via Visium CytAssist instrument to transfer analytes to a Visium CytAssist Spatial Gene Expression Slide v2, with 11 mm capture areas following the Visium CytAssist Spatial Gene Expression Reagent Kits User Guide (CG000495)." +# dataset_reference: 10x2023lung +# dataset_organism: Homo sapiens +# spot_filter_min_genes: 100 +# gene_filter_min_spots: 50 +# remove_mitochondrial: true - - id: tenx_visium/visium/human_brain_cancer - input_expression: "https://cf.10xgenomics.com/samples/spatial-exp/2.0.1/CytAssist_11mm_FFPE_Human_Glioblastoma/CytAssist_11mm_FFPE_Human_Glioblastoma_filtered_feature_bc_matrix.h5" - input_spatial: "https://cf.10xgenomics.com/samples/spatial-exp/2.0.1/CytAssist_11mm_FFPE_Human_Glioblastoma/CytAssist_11mm_FFPE_Human_Glioblastoma_spatial.tar.gz" - dataset_name: 10X Visium - Human Brain Cancer - dataset_url: "https://www.10xgenomics.com/datasets/human-brain-cancer-11-mm-capture-area-ffpe-2-standard" - dataset_summary: Gene expression library of Human Glioblastoma (CytAssist FFPE) using the Human Whole Transcriptome Probe Set - dataset_description: "10x Genomics obtained FFPE human brain cancer tissue from Avaden Biosciences. The tissue was sectioned as described in the Visium CytAssist Spatial Gene Expression for FFPE - Tissue Preparation Guide (CG000518). Tissue section of 5 µm was placed on a standard glass slide, then stained following the Deparaffinization, H&E Staining, Imaging & Decrosslinking Demonstrated Protocol (CG000520). The glass slide with tissue section was processed via Visium CytAssist instrument to transfer analytes to a Visium CytAssist Spatial Gene Expression Slide v2, with 11 mm capture areas following the Visium CytAssist Spatial Gene Expression Reagent Kits User Guide (CG000495)." - dataset_reference: 10x2023brain - dataset_organism: Homo sapiens - spot_filter_min_genes: 100 - gene_filter_min_spots: 100 - remove_mitochondrial: true +# - id: tenx_visium/visium/human_brain_cancer +# input_expression: "https://cf.10xgenomics.com/samples/spatial-exp/2.0.1/CytAssist_11mm_FFPE_Human_Glioblastoma/CytAssist_11mm_FFPE_Human_Glioblastoma_filtered_feature_bc_matrix.h5" +# input_spatial: "https://cf.10xgenomics.com/samples/spatial-exp/2.0.1/CytAssist_11mm_FFPE_Human_Glioblastoma/CytAssist_11mm_FFPE_Human_Glioblastoma_spatial.tar.gz" +# dataset_name: 10X Visium - Human Brain Cancer +# dataset_url: "https://www.10xgenomics.com/datasets/human-brain-cancer-11-mm-capture-area-ffpe-2-standard" +# dataset_summary: Gene expression library of Human Glioblastoma (CytAssist FFPE) using the Human Whole Transcriptome Probe Set +# dataset_description: "10x Genomics obtained FFPE human brain cancer tissue from Avaden Biosciences. The tissue was sectioned as described in the Visium CytAssist Spatial Gene Expression for FFPE - Tissue Preparation Guide (CG000518). Tissue section of 5 µm was placed on a standard glass slide, then stained following the Deparaffinization, H&E Staining, Imaging & Decrosslinking Demonstrated Protocol (CG000520). The glass slide with tissue section was processed via Visium CytAssist instrument to transfer analytes to a Visium CytAssist Spatial Gene Expression Slide v2, with 11 mm capture areas following the Visium CytAssist Spatial Gene Expression Reagent Kits User Guide (CG000495)." +# dataset_reference: 10x2023brain +# dataset_organism: Homo sapiens +# spot_filter_min_genes: 100 +# gene_filter_min_spots: 100 +# remove_mitochondrial: true - - id: tenx_visium/visium/human_kidney - input_expression: "https://cf.10xgenomics.com/samples/spatial-exp/2.0.1/CytAssist_11mm_FFPE_Human_Kidney/CytAssist_11mm_FFPE_Human_Kidney_filtered_feature_bc_matrix.h5" - input_spatial: "https://cf.10xgenomics.com/samples/spatial-exp/2.0.1/CytAssist_11mm_FFPE_Human_Kidney/CytAssist_11mm_FFPE_Human_Kidney_spatial.tar.gz" - dataset_name: 10X Visium - Human Kidney - dataset_url: "https://www.10xgenomics.com/datasets/human-kidney-11-mm-capture-area-ffpe-2-standard" - dataset_summary: Gene expression library of Human Kidney (CytAssist FFPE) using the Human Whole Transcriptome Probe Set - dataset_description: "10x Genomics obtained FFPE human kidney tissue from Avaden Biosciences. The tissue was sectioned as described in the Visium CytAssist Spatial Gene Expression for FFPE – Tissue Preparation Guide (CG000518). Tissue section of 5 µm was placed on a standard glass slide, then stained following the Deparaffinization, H&E Staining, Imaging & Decrosslinking Demonstrated Protocol (CG000520). The glass slide with tissue section was processed via Visium CytAssist instrument to transfer analytes to a Visium CytAssist Spatial Gene Expression Slide v2, with 11 mm capture areas following the Visium CytAssist Spatial Gene Expression Reagent Kits User Guide (CG000495)." - dataset_reference: 10x2023kidney - dataset_organism: Homo sapiens - spot_filter_min_genes: 100 - gene_filter_min_spots: 50 - remove_mitochondrial: true +# - id: tenx_visium/visium/human_kidney +# input_expression: "https://cf.10xgenomics.com/samples/spatial-exp/2.0.1/CytAssist_11mm_FFPE_Human_Kidney/CytAssist_11mm_FFPE_Human_Kidney_filtered_feature_bc_matrix.h5" +# input_spatial: "https://cf.10xgenomics.com/samples/spatial-exp/2.0.1/CytAssist_11mm_FFPE_Human_Kidney/CytAssist_11mm_FFPE_Human_Kidney_spatial.tar.gz" +# dataset_name: 10X Visium - Human Kidney +# dataset_url: "https://www.10xgenomics.com/datasets/human-kidney-11-mm-capture-area-ffpe-2-standard" +# dataset_summary: Gene expression library of Human Kidney (CytAssist FFPE) using the Human Whole Transcriptome Probe Set +# dataset_description: "10x Genomics obtained FFPE human kidney tissue from Avaden Biosciences. The tissue was sectioned as described in the Visium CytAssist Spatial Gene Expression for FFPE – Tissue Preparation Guide (CG000518). Tissue section of 5 µm was placed on a standard glass slide, then stained following the Deparaffinization, H&E Staining, Imaging & Decrosslinking Demonstrated Protocol (CG000520). The glass slide with tissue section was processed via Visium CytAssist instrument to transfer analytes to a Visium CytAssist Spatial Gene Expression Slide v2, with 11 mm capture areas following the Visium CytAssist Spatial Gene Expression Reagent Kits User Guide (CG000495)." +# dataset_reference: 10x2023kidney +# dataset_organism: Homo sapiens +# spot_filter_min_genes: 100 +# gene_filter_min_spots: 50 +# remove_mitochondrial: true - - id: tenx_visium/visium/human_intestinal_cancer - input_expression: "https://cf.10xgenomics.com/samples/spatial-exp/1.3.0/Visium_FFPE_Human_Intestinal_Cancer/Visium_FFPE_Human_Intestinal_Cancer_filtered_feature_bc_matrix.h5" - input_spatial: "https://cf.10xgenomics.com/samples/spatial-exp/1.3.0/Visium_FFPE_Human_Intestinal_Cancer/Visium_FFPE_Human_Intestinal_Cancer_spatial.tar.gz" - dataset_name: 10X Visium - Human Intestine Cancer - dataset_url: "https://www.10xgenomics.com/datasets/human-intestine-cancer-1-standard" - dataset_summary: Gene expression library of Human Intestinal Cancer (Visium FFPE) using the Human Whole Transcriptome Probe Set - dataset_description: "5 µm section from Human Intestinal Cancer. FFPE tissue purchased from BioIVT Asterand Human Tissue Specimens. Libraries were prepared following the Visium Spatial Gene Expression Reagent Kits for FFPE User Guide (CG000407 Rev A)." - dataset_reference: 10x2022intestine - dataset_organism: Homo sapiens - spot_filter_min_genes: 100 - gene_filter_min_spots: 30 - remove_mitochondrial: true +# - id: tenx_visium/visium/human_intestinal_cancer +# input_expression: "https://cf.10xgenomics.com/samples/spatial-exp/1.3.0/Visium_FFPE_Human_Intestinal_Cancer/Visium_FFPE_Human_Intestinal_Cancer_filtered_feature_bc_matrix.h5" +# input_spatial: "https://cf.10xgenomics.com/samples/spatial-exp/1.3.0/Visium_FFPE_Human_Intestinal_Cancer/Visium_FFPE_Human_Intestinal_Cancer_spatial.tar.gz" +# dataset_name: 10X Visium - Human Intestine Cancer +# dataset_url: "https://www.10xgenomics.com/datasets/human-intestine-cancer-1-standard" +# dataset_summary: Gene expression library of Human Intestinal Cancer (Visium FFPE) using the Human Whole Transcriptome Probe Set +# dataset_description: "5 µm section from Human Intestinal Cancer. FFPE tissue purchased from BioIVT Asterand Human Tissue Specimens. Libraries were prepared following the Visium Spatial Gene Expression Reagent Kits for FFPE User Guide (CG000407 Rev A)." +# dataset_reference: 10x2022intestine +# dataset_organism: Homo sapiens +# spot_filter_min_genes: 100 +# gene_filter_min_spots: 30 +# remove_mitochondrial: true - - id: tenx_visium/visium/human_skin_melanoma - input_expression: "https://cf.10xgenomics.com/samples/spatial-exp/2.0.0/CytAssist_FFPE_Human_Skin_Melanoma/CytAssist_FFPE_Human_Skin_Melanoma_filtered_feature_bc_matrix.h5" - input_spatial: "https://cf.10xgenomics.com/samples/spatial-exp/2.0.0/CytAssist_FFPE_Human_Skin_Melanoma/CytAssist_FFPE_Human_Skin_Melanoma_spatial.tar.gz" - dataset_name: 10X Visium - Human Skin Melanoma - dataset_url: "https://www.10xgenomics.com/datasets/human-melanoma-if-stained-ffpe-2-standard" - dataset_summary: Gene expression library of Human Skin Melanoma (CytAssist FFPE) using the Human Whole Transcriptome Probe Set - dataset_description: "10x Genomics obtained FFPE Human Melanoma tissue blocks from Avaden Biosciences. The tissue was sectioned as described in Visium CytAssist Spatial Gene Expression for FFPE Tissue Preparation Guide Demonstrated Protocol (CG000518). Tissue sections of 5 µm was placed on a standard glass slide, deparaffinized followed by immunofluorescence (IF) staining. Sections were coverslipped with 85% glycerol, imaged, decoverslipped, followed by dehydration & decrosslinking Demonstrated Protocol (CG000519). The glass slide with tissue section was processed via Visium CytAssist instrument to transfer analytes to a Visium CytAssist Spatial Gene Expression slide. The probe extension and library construction steps follow the standard Visium for FFPE workflow outside of the instrument." - dataset_reference: 10x2022melanoma - dataset_organism: Homo sapiens - spot_filter_min_genes: 100 - gene_filter_min_spots: 50 - remove_mitochondrial: true +# - id: tenx_visium/visium/human_skin_melanoma +# input_expression: "https://cf.10xgenomics.com/samples/spatial-exp/2.0.0/CytAssist_FFPE_Human_Skin_Melanoma/CytAssist_FFPE_Human_Skin_Melanoma_filtered_feature_bc_matrix.h5" +# input_spatial: "https://cf.10xgenomics.com/samples/spatial-exp/2.0.0/CytAssist_FFPE_Human_Skin_Melanoma/CytAssist_FFPE_Human_Skin_Melanoma_spatial.tar.gz" +# dataset_name: 10X Visium - Human Skin Melanoma +# dataset_url: "https://www.10xgenomics.com/datasets/human-melanoma-if-stained-ffpe-2-standard" +# dataset_summary: Gene expression library of Human Skin Melanoma (CytAssist FFPE) using the Human Whole Transcriptome Probe Set +# dataset_description: "10x Genomics obtained FFPE Human Melanoma tissue blocks from Avaden Biosciences. The tissue was sectioned as described in Visium CytAssist Spatial Gene Expression for FFPE Tissue Preparation Guide Demonstrated Protocol (CG000518). Tissue sections of 5 µm was placed on a standard glass slide, deparaffinized followed by immunofluorescence (IF) staining. Sections were coverslipped with 85% glycerol, imaged, decoverslipped, followed by dehydration & decrosslinking Demonstrated Protocol (CG000519). The glass slide with tissue section was processed via Visium CytAssist instrument to transfer analytes to a Visium CytAssist Spatial Gene Expression slide. The probe extension and library construction steps follow the standard Visium for FFPE workflow outside of the instrument." +# dataset_reference: 10x2022melanoma +# dataset_organism: Homo sapiens +# spot_filter_min_genes: 100 +# gene_filter_min_spots: 50 +# remove_mitochondrial: true - - id: tenx_visium/visium/human_cervical_cancer - input_expression: "https://cf.10xgenomics.com/samples/spatial-exp/1.3.0/Visium_FFPE_Human_Cervical_Cancer/Visium_FFPE_Human_Cervical_Cancer_filtered_feature_bc_matrix.h5" - input_spatial: "https://cf.10xgenomics.com/samples/spatial-exp/1.3.0/Visium_FFPE_Human_Cervical_Cancer/Visium_FFPE_Human_Cervical_Cancer_spatial.tar.gz" - dataset_name: 10X Visium - Human Cervical Cancer - dataset_url: "https://www.10xgenomics.com/datasets/human-cervical-cancer-1-standard" - dataset_summary: Gene expression library of Human Cervical Cancer (Visium FFPE) using the Human Whole Transcriptome Probe Set - dataset_description: "5 µm section from squamous cell carcinoma of human cervical cancer. FFPE tissue purchased from Discovery Life Sciences." - dataset_reference: 10x2022cervical - dataset_organism: Homo sapiens - spot_filter_min_genes: 100 - gene_filter_min_spots: 50 - remove_mitochondrial: true +# - id: tenx_visium/visium/human_cervical_cancer +# input_expression: "https://cf.10xgenomics.com/samples/spatial-exp/1.3.0/Visium_FFPE_Human_Cervical_Cancer/Visium_FFPE_Human_Cervical_Cancer_filtered_feature_bc_matrix.h5" +# input_spatial: "https://cf.10xgenomics.com/samples/spatial-exp/1.3.0/Visium_FFPE_Human_Cervical_Cancer/Visium_FFPE_Human_Cervical_Cancer_spatial.tar.gz" +# dataset_name: 10X Visium - Human Cervical Cancer +# dataset_url: "https://www.10xgenomics.com/datasets/human-cervical-cancer-1-standard" +# dataset_summary: Gene expression library of Human Cervical Cancer (Visium FFPE) using the Human Whole Transcriptome Probe Set +# dataset_description: "5 µm section from squamous cell carcinoma of human cervical cancer. FFPE tissue purchased from Discovery Life Sciences." +# dataset_reference: 10x2022cervical +# dataset_organism: Homo sapiens +# spot_filter_min_genes: 100 +# gene_filter_min_spots: 50 +# remove_mitochondrial: true - - id: tenx_visium/visium/human_breast_cancer_2 - input_expression: "https://cf.10xgenomics.com/samples/spatial-exp/1.3.0/Visium_FFPE_Human_Breast_Cancer/Visium_FFPE_Human_Breast_Cancer_filtered_feature_bc_matrix.h5" - input_spatial: "https://cf.10xgenomics.com/samples/spatial-exp/1.3.0/Visium_FFPE_Human_Breast_Cancer/Visium_FFPE_Human_Breast_Cancer_spatial.tar.gz" - dataset_name: 10X Visium - Human Breast Cancer 2 - dataset_url: "https://www.10xgenomics.com/datasets/human-breast-cancer-ductal-carcinoma-in-situ-invasive-carcinoma-ffpe-1-standard-1-3-0" - dataset_summary: Gene expression library of Human Breast Cancer (Visium FFPE) using the Human Whole Transcriptome Probe Set - dataset_description: "10x Genomics obtained FFPE human breast tissue from BioIVT Asterand Human Tissue Specimens. The tissue was annotated with Ductal Carcinoma In Situ, Invasive Carcinoma. The tissue was sectioned as described in Visium Spatial Gene Expression for FFPE - Tissue Preparation Guide Demonstrated Protocol (CG000408). Tissue sections of 5 µm were placed on Visium Gene Expression slides, then stained following Deparaffinization, H&E Staining, Imaging & Decrosslinking Demonstrated Protocol (CG000409)." - dataset_reference: 10x2021breast - dataset_organism: Homo sapiens - spot_filter_min_genes: 100 - gene_filter_min_spots: 50 - remove_mitochondrial: true +# - id: tenx_visium/visium/human_breast_cancer_2 +# input_expression: "https://cf.10xgenomics.com/samples/spatial-exp/1.3.0/Visium_FFPE_Human_Breast_Cancer/Visium_FFPE_Human_Breast_Cancer_filtered_feature_bc_matrix.h5" +# input_spatial: "https://cf.10xgenomics.com/samples/spatial-exp/1.3.0/Visium_FFPE_Human_Breast_Cancer/Visium_FFPE_Human_Breast_Cancer_spatial.tar.gz" +# dataset_name: 10X Visium - Human Breast Cancer 2 +# dataset_url: "https://www.10xgenomics.com/datasets/human-breast-cancer-ductal-carcinoma-in-situ-invasive-carcinoma-ffpe-1-standard-1-3-0" +# dataset_summary: Gene expression library of Human Breast Cancer (Visium FFPE) using the Human Whole Transcriptome Probe Set +# dataset_description: "10x Genomics obtained FFPE human breast tissue from BioIVT Asterand Human Tissue Specimens. The tissue was annotated with Ductal Carcinoma In Situ, Invasive Carcinoma. The tissue was sectioned as described in Visium Spatial Gene Expression for FFPE - Tissue Preparation Guide Demonstrated Protocol (CG000408). Tissue sections of 5 µm were placed on Visium Gene Expression slides, then stained following Deparaffinization, H&E Staining, Imaging & Decrosslinking Demonstrated Protocol (CG000409)." +# dataset_reference: 10x2021breast +# dataset_organism: Homo sapiens +# spot_filter_min_genes: 100 +# gene_filter_min_spots: 50 +# remove_mitochondrial: true -normalization_methods: [log_cp10k] -output_dataset: '$id/dataset.h5ad' -output_meta: '$id/dataset_metadata.yaml' -output_state: '$id/state.yaml' -output_raw: force_null -output_normalized: force_null -publish_dir: resources/datasets -HERE +# normalization_methods: [log_cp10k] +# output_dataset: '$id/dataset.h5ad' +# output_meta: '$id/dataset_metadata.yaml' +# output_state: '$id/state.yaml' +# output_raw: force_null +# output_normalized: force_null +# publish_dir: s3://openproblems-data/resources/datasets +# HERE # cat > "/tmp/params.yaml" << 'HERE' # param_list: @@ -290,24 +290,11 @@ HERE # output_state: '$id/state.yaml' # output_raw: force_null # output_normalized: force_null -# publish_dir: resources/datasets +# publish_dir: s3://openproblems-data/resources/datasets # HERE -cat > /tmp/nextflow.config << HERE -process { - executor = 'awsbatch' - withLabel: highmem { - memory = '350GB' - } - withName: '.*publishStatesProc' { - memory = '16GB' - disk = '100GB' - } -} -HERE - tw launch https://github.com/openproblems-bio/openproblems.git \ - --revision integration_build \ + --revision main_build \ --pull-latest \ --main-script target/nextflow/datasets/workflows/process_tenx_visium/main.nf \ --workspace 53907369739130 \ diff --git a/src/datasets/resource_scripts/zenodo_spatial.sh.sh b/src/datasets/resource_scripts/zenodo_spatial.sh similarity index 66% rename from src/datasets/resource_scripts/zenodo_spatial.sh.sh rename to src/datasets/resource_scripts/zenodo_spatial.sh index d51e193435..c1386aeb84 100755 --- a/src/datasets/resource_scripts/zenodo_spatial.sh.sh +++ b/src/datasets/resource_scripts/zenodo_spatial.sh @@ -1,42 +1,42 @@ #!/bin/bash -cat > "/tmp/params.yaml" << 'HERE' -param_list: - - id: zenodo_spatial/visium/human_heart_myocardial_infarction_1 - input_data: "https://zenodo.org/records/13328275/files/10X0018.h5ad?download=1" - dataset_name: 10X Visium - Human Heart MI 1 - dataset_url: "https://www.nature.com/articles/s41586-022-05060-x" - dataset_summary: Gene expression library of human heart using 10x Visium. - dataset_description: "Frozen heart samples were embedded in OCT (Tissue-Tek) and cryosectioned (Thermo Cryostar). The 10-µm section was placed on the pre-chilled Optimization slides (Visium, 10X Genomics, PN-1000193) and the optimal lysis time was determined. The tissues were treated as recommended by 10X Genomics and the optimization procedure showed an optimal permeabilization time of 12 or 18 min of digestion and release of RNA from the tissue slide. Spatial gene expression slides (Visium, 10X Genomics, PN-1000187) were used for spatial transcriptomics following the Visium User Guides" - dataset_reference: kuppe2022spatial - dataset_organism: Homo sapiens - spot_filter_min_genes: 200 - gene_filter_min_spots: 50 - remove_mitochondrial: true +# cat > "/tmp/params.yaml" << 'HERE' +# param_list: +# - id: zenodo_spatial/visium/human_heart_myocardial_infarction_1 +# input_data: "https://zenodo.org/records/13328275/files/10X0018.h5ad?download=1" +# dataset_name: 10X Visium - Human Heart MI 1 +# dataset_url: "https://www.nature.com/articles/s41586-022-05060-x" +# dataset_summary: Gene expression library of human heart using 10x Visium. +# dataset_description: "Frozen heart samples were embedded in OCT (Tissue-Tek) and cryosectioned (Thermo Cryostar). The 10-µm section was placed on the pre-chilled Optimization slides (Visium, 10X Genomics, PN-1000193) and the optimal lysis time was determined. The tissues were treated as recommended by 10X Genomics and the optimization procedure showed an optimal permeabilization time of 12 or 18 min of digestion and release of RNA from the tissue slide. Spatial gene expression slides (Visium, 10X Genomics, PN-1000187) were used for spatial transcriptomics following the Visium User Guides" +# dataset_reference: kuppe2022spatial +# dataset_organism: Homo sapiens +# spot_filter_min_genes: 200 +# gene_filter_min_spots: 50 +# remove_mitochondrial: true - - id: zenodo_spatial/visium/human_heart_myocardial_infarction_2 - input_data: "https://zenodo.org/records/13328275/files/10X009.h5ad?download=1" - dataset_name: 10X Visium - Human Heart MI 2 - dataset_url: "https://www.nature.com/articles/s41586-022-05060-x" - dataset_summary: Gene expression library of human heart using 10x Visium. - dataset_description: "Frozen heart samples were embedded in OCT (Tissue-Tek) and cryosectioned (Thermo Cryostar). The 10-µm section was placed on the pre-chilled Optimization slides (Visium, 10X Genomics, PN-1000193) and the optimal lysis time was determined. The tissues were treated as recommended by 10X Genomics and the optimization procedure showed an optimal permeabilization time of 12 or 18 min of digestion and release of RNA from the tissue slide. Spatial gene expression slides (Visium, 10X Genomics, PN-1000187) were used for spatial transcriptomics following the Visium User Guides" - dataset_reference: kuppe2022spatial - dataset_organism: Homo sapiens - spot_filter_min_genes: 200 - gene_filter_min_spots: 50 - remove_mitochondrial: true +# - id: zenodo_spatial/visium/human_heart_myocardial_infarction_2 +# input_data: "https://zenodo.org/records/13328275/files/10X009.h5ad?download=1" +# dataset_name: 10X Visium - Human Heart MI 2 +# dataset_url: "https://www.nature.com/articles/s41586-022-05060-x" +# dataset_summary: Gene expression library of human heart using 10x Visium. +# dataset_description: "Frozen heart samples were embedded in OCT (Tissue-Tek) and cryosectioned (Thermo Cryostar). The 10-µm section was placed on the pre-chilled Optimization slides (Visium, 10X Genomics, PN-1000193) and the optimal lysis time was determined. The tissues were treated as recommended by 10X Genomics and the optimization procedure showed an optimal permeabilization time of 12 or 18 min of digestion and release of RNA from the tissue slide. Spatial gene expression slides (Visium, 10X Genomics, PN-1000187) were used for spatial transcriptomics following the Visium User Guides" +# dataset_reference: kuppe2022spatial +# dataset_organism: Homo sapiens +# spot_filter_min_genes: 200 +# gene_filter_min_spots: 50 +# remove_mitochondrial: true -normalization_methods: [log_cp10k] -output_dataset: '$id/dataset.h5ad' -output_meta: '$id/dataset_metadata.yaml' -output_state: '$id/state.yaml' -output_raw: force_null -output_normalized: force_null -publish_dir: resources/datasets -remove_mitochondrial: true -HERE +# normalization_methods: [log_cp10k] +# output_dataset: '$id/dataset.h5ad' +# output_meta: '$id/dataset_metadata.yaml' +# output_state: '$id/state.yaml' +# output_raw: force_null +# output_normalized: force_null +# publish_dir: s3://openproblems-data/resources/datasets +# remove_mitochondrial: true +# HERE -# catt > "/tmp/params.yaml" << 'HERE' +# cat > "/tmp/params.yaml" << 'HERE' # param_list: # - id: zenodo_spatial/dbitseq/mouse_e10_brain # input_data: "https://zenodo.org/records/12785822/files/DBiT-seq_liu2020high_E10_brain_gene_25um_data.h5ad?download=1" @@ -116,7 +116,7 @@ HERE # output_state: '$id/state.yaml' # output_raw: force_null # output_normalized: force_null -# publish_dir: resources/datasets +# publish_dir: s3://openproblems-data/resources/datasets # HERE # cat > "/tmp/params.yaml" << 'HERE' @@ -187,7 +187,7 @@ HERE # output_state: '$id/state.yaml' # output_raw: force_null # output_normalized: force_null -# publish_dir: resources/datasets +# publish_dir: s3://openproblems-data/resources/datasets # HERE # cat > "/tmp/params.yaml" << 'HERE' @@ -210,7 +210,7 @@ HERE # output_state: '$id/state.yaml' # output_raw: force_null # output_normalized: force_null -# publish_dir: resources/datasets +# publish_dir: s3://openproblems-data/resources/datasets # remove_mitochondrial: true # HERE @@ -282,7 +282,7 @@ HERE # output_state: '$id/state.yaml' # output_raw: force_null # output_normalized: force_null -# publish_dir: resources/datasets +# publish_dir: s3://openproblems-data/resources/datasets # HERE # cat > "/tmp/params.yaml" << 'HERE' @@ -317,79 +317,79 @@ HERE # output_state: '$id/state.yaml' # output_raw: force_null # output_normalized: force_null -# publish_dir: resources/datasets +# publish_dir: s3://openproblems-data/resources/datasets # HERE -# cat > "/tmp/params.yaml" << 'HERE' -# param_list: -# - id: zenodo_spatial/stereoseq/drosophila_embryo_e5_6 -# input_data: "https://zenodo.org/records/12785822/files/Stereo-seq_wang2022high_E14-16h_a_count_normal_stereoseq_data_whole_time_point_5.6.h5ad?download=1" -# dataset_name: Stereo-seq - Drosophila embryo E5_6 -# dataset_url: "https://www.sciencedirect.com/science/article/pii/S1534580722002465" -# dataset_summary: Stereo-seq faithfully captures Drosophila spatial transcriptomes with high resolution. -# dataset_description: "Drosophila has long been a successful model organism in multiple biomedical fields. Spatial gene expression patterns are critical for the understanding of complex pathways and interactions, whereas temporal gene expression changes are vital for studying highly dynamic physiological activities. Systematic studies in Drosophila are still impeded by the lack of spatiotemporal transcriptomic information. Here, utilizing spatial enhanced resolution omics-sequencing (Stereo-seq), we dissected the spatiotemporal transcriptomic changes of developing Drosophila with high resolution and sensitivity. (Data from an embryo collected 14-16 h after egg laying)" -# dataset_organism: Drosophila -# dataset_reference: wang2022high -# spot_filter_min_genes: 10 -# gene_filter_min_spots: 50 -# remove_mitochondrial: true +cat > "/tmp/params.yaml" << 'HERE' +param_list: + - id: zenodo_spatial/stereoseq/drosophila_embryo_e5_6 + input_data: "https://zenodo.org/records/12785822/files/Stereo-seq_wang2022high_E14-16h_a_count_normal_stereoseq_data_whole_time_point_5.6.h5ad?download=1" + dataset_name: Stereo-seq - Drosophila embryo E5_6 + dataset_url: "https://www.sciencedirect.com/science/article/pii/S1534580722002465" + dataset_summary: Stereo-seq faithfully captures Drosophila spatial transcriptomes with high resolution. + dataset_description: "Drosophila has long been a successful model organism in multiple biomedical fields. Spatial gene expression patterns are critical for the understanding of complex pathways and interactions, whereas temporal gene expression changes are vital for studying highly dynamic physiological activities. Systematic studies in Drosophila are still impeded by the lack of spatiotemporal transcriptomic information. Here, utilizing spatial enhanced resolution omics-sequencing (Stereo-seq), we dissected the spatiotemporal transcriptomic changes of developing Drosophila with high resolution and sensitivity. (Data from an embryo collected 14-16 h after egg laying)" + dataset_organism: Drosophila + dataset_reference: wang2022high + spot_filter_min_genes: 10 + gene_filter_min_spots: 50 + remove_mitochondrial: true -# - id: zenodo_spatial/stereoseq/drosophila_embryo_e6_3 -# input_data: "https://zenodo.org/records/12785822/files/Stereo-seq_wang2022high_E14-16h_a_count_normal_stereoseq_data_whole_time_point_6.3.h5ad?download=1" -# dataset_name: Stereo-seq - Drosophila embryo E6_3 -# dataset_url: "https://www.sciencedirect.com/science/article/pii/S1534580722002465" -# dataset_summary: Stereo-seq faithfully captures Drosophila spatial transcriptomes with high resolution. -# dataset_description: "Drosophila has long been a successful model organism in multiple biomedical fields. Spatial gene expression patterns are critical for the understanding of complex pathways and interactions, whereas temporal gene expression changes are vital for studying highly dynamic physiological activities. Systematic studies in Drosophila are still impeded by the lack of spatiotemporal transcriptomic information. Here, utilizing spatial enhanced resolution omics-sequencing (Stereo-seq), we dissected the spatiotemporal transcriptomic changes of developing Drosophila with high resolution and sensitivity. (Data from an embryo collected 14-16 h after egg laying)" -# dataset_organism: Drosophila -# dataset_reference: wang2022high -# spot_filter_min_genes: 10 -# gene_filter_min_spots: 50 -# remove_mitochondrial: true + - id: zenodo_spatial/stereoseq/drosophila_embryo_e6_3 + input_data: "https://zenodo.org/records/12785822/files/Stereo-seq_wang2022high_E14-16h_a_count_normal_stereoseq_data_whole_time_point_6.3.h5ad?download=1" + dataset_name: Stereo-seq - Drosophila embryo E6_3 + dataset_url: "https://www.sciencedirect.com/science/article/pii/S1534580722002465" + dataset_summary: Stereo-seq faithfully captures Drosophila spatial transcriptomes with high resolution. + dataset_description: "Drosophila has long been a successful model organism in multiple biomedical fields. Spatial gene expression patterns are critical for the understanding of complex pathways and interactions, whereas temporal gene expression changes are vital for studying highly dynamic physiological activities. Systematic studies in Drosophila are still impeded by the lack of spatiotemporal transcriptomic information. Here, utilizing spatial enhanced resolution omics-sequencing (Stereo-seq), we dissected the spatiotemporal transcriptomic changes of developing Drosophila with high resolution and sensitivity. (Data from an embryo collected 14-16 h after egg laying)" + dataset_organism: Drosophila + dataset_reference: wang2022high + spot_filter_min_genes: 10 + gene_filter_min_spots: 50 + remove_mitochondrial: true -# - id: zenodo_spatial/stereoseq/drosophila_embryo_e7 -# input_data: "https://zenodo.org/records/12785822/files/Stereo-seq_wang2022high_E14-16h_a_count_normal_stereoseq_data_whole_time_point_7.h5ad?download=1" -# dataset_name: Stereo-seq - Drosophila embryo E7 -# dataset_url: "https://www.sciencedirect.com/science/article/pii/S1534580722002465" -# dataset_summary: Stereo-seq faithfully captures Drosophila spatial transcriptomes with high resolution. -# dataset_description: "Drosophila has long been a successful model organism in multiple biomedical fields. Spatial gene expression patterns are critical for the understanding of complex pathways and interactions, whereas temporal gene expression changes are vital for studying highly dynamic physiological activities. Systematic studies in Drosophila are still impeded by the lack of spatiotemporal transcriptomic information. Here, utilizing spatial enhanced resolution omics-sequencing (Stereo-seq), we dissected the spatiotemporal transcriptomic changes of developing Drosophila with high resolution and sensitivity. (Data from an embryo collected 14-16 h after egg laying)" -# dataset_organism: Drosophila -# dataset_reference: wang2022high -# spot_filter_min_genes: 10 -# gene_filter_min_spots: 50 -# remove_mitochondrial: true + - id: zenodo_spatial/stereoseq/drosophila_embryo_e7 + input_data: "https://zenodo.org/records/12785822/files/Stereo-seq_wang2022high_E14-16h_a_count_normal_stereoseq_data_whole_time_point_7.h5ad?download=1" + dataset_name: Stereo-seq - Drosophila embryo E7 + dataset_url: "https://www.sciencedirect.com/science/article/pii/S1534580722002465" + dataset_summary: Stereo-seq faithfully captures Drosophila spatial transcriptomes with high resolution. + dataset_description: "Drosophila has long been a successful model organism in multiple biomedical fields. Spatial gene expression patterns are critical for the understanding of complex pathways and interactions, whereas temporal gene expression changes are vital for studying highly dynamic physiological activities. Systematic studies in Drosophila are still impeded by the lack of spatiotemporal transcriptomic information. Here, utilizing spatial enhanced resolution omics-sequencing (Stereo-seq), we dissected the spatiotemporal transcriptomic changes of developing Drosophila with high resolution and sensitivity. (Data from an embryo collected 14-16 h after egg laying)" + dataset_organism: Drosophila + dataset_reference: wang2022high + spot_filter_min_genes: 10 + gene_filter_min_spots: 50 + remove_mitochondrial: true -# - id: zenodo_spatial/stereoseq/drosophila_embryo_e9_1 -# input_data: "https://zenodo.org/records/12785822/files/Stereo-seq_wang2022high_E14-16h_a_count_normal_stereoseq_data_whole_time_point_9.1.h5ad?download=1" -# dataset_name: Stereo-seq - Drosophila embryo E9_1 -# dataset_url: "https://www.sciencedirect.com/science/article/pii/S1534580722002465" -# dataset_summary: Stereo-seq faithfully captures Drosophila spatial transcriptomes with high resolution. -# dataset_description: "Drosophila has long been a successful model organism in multiple biomedical fields. Spatial gene expression patterns are critical for the understanding of complex pathways and interactions, whereas temporal gene expression changes are vital for studying highly dynamic physiological activities. Systematic studies in Drosophila are still impeded by the lack of spatiotemporal transcriptomic information. Here, utilizing spatial enhanced resolution omics-sequencing (Stereo-seq), we dissected the spatiotemporal transcriptomic changes of developing Drosophila with high resolution and sensitivity. (Data from an embryo collected 14-16 h after egg laying)" -# dataset_organism: Drosophila -# dataset_reference: wang2022high -# spot_filter_min_genes: 10 -# gene_filter_min_spots: 50 -# remove_mitochondrial: true + - id: zenodo_spatial/stereoseq/drosophila_embryo_e9_1 + input_data: "https://zenodo.org/records/12785822/files/Stereo-seq_wang2022high_E14-16h_a_count_normal_stereoseq_data_whole_time_point_9.1.h5ad?download=1" + dataset_name: Stereo-seq - Drosophila embryo E9_1 + dataset_url: "https://www.sciencedirect.com/science/article/pii/S1534580722002465" + dataset_summary: Stereo-seq faithfully captures Drosophila spatial transcriptomes with high resolution. + dataset_description: "Drosophila has long been a successful model organism in multiple biomedical fields. Spatial gene expression patterns are critical for the understanding of complex pathways and interactions, whereas temporal gene expression changes are vital for studying highly dynamic physiological activities. Systematic studies in Drosophila are still impeded by the lack of spatiotemporal transcriptomic information. Here, utilizing spatial enhanced resolution omics-sequencing (Stereo-seq), we dissected the spatiotemporal transcriptomic changes of developing Drosophila with high resolution and sensitivity. (Data from an embryo collected 14-16 h after egg laying)" + dataset_organism: Drosophila + dataset_reference: wang2022high + spot_filter_min_genes: 10 + gene_filter_min_spots: 50 + remove_mitochondrial: true -# - id: zenodo_spatial/stereoseq/drosophila_embryo_e10 -# input_data: "https://zenodo.org/records/12785822/files/Stereo-seq_wang2022high_E14-16h_a_count_normal_stereoseq_data_whole_time_point_10.5.h5ad?download=1" -# dataset_name: Stereo-seq - Drosophila embryo E10 -# dataset_url: "https://www.sciencedirect.com/science/article/pii/S1534580722002465" -# dataset_summary: Stereo-seq faithfully captures Drosophila spatial transcriptomes with high resolution. -# dataset_description: "Drosophila has long been a successful model organism in multiple biomedical fields. Spatial gene expression patterns are critical for the understanding of complex pathways and interactions, whereas temporal gene expression changes are vital for studying highly dynamic physiological activities. Systematic studies in Drosophila are still impeded by the lack of spatiotemporal transcriptomic information. Here, utilizing spatial enhanced resolution omics-sequencing (Stereo-seq), we dissected the spatiotemporal transcriptomic changes of developing Drosophila with high resolution and sensitivity. (Data from an embryo collected 14-16 h after egg laying)" -# dataset_organism: Drosophila -# dataset_reference: wang2022high -# spot_filter_min_genes: 10 -# gene_filter_min_spots: 50 -# remove_mitochondrial: true + - id: zenodo_spatial/stereoseq/drosophila_embryo_e10 + input_data: "https://zenodo.org/records/12785822/files/Stereo-seq_wang2022high_E14-16h_a_count_normal_stereoseq_data_whole_time_point_10.5.h5ad?download=1" + dataset_name: Stereo-seq - Drosophila embryo E10 + dataset_url: "https://www.sciencedirect.com/science/article/pii/S1534580722002465" + dataset_summary: Stereo-seq faithfully captures Drosophila spatial transcriptomes with high resolution. + dataset_description: "Drosophila has long been a successful model organism in multiple biomedical fields. Spatial gene expression patterns are critical for the understanding of complex pathways and interactions, whereas temporal gene expression changes are vital for studying highly dynamic physiological activities. Systematic studies in Drosophila are still impeded by the lack of spatiotemporal transcriptomic information. Here, utilizing spatial enhanced resolution omics-sequencing (Stereo-seq), we dissected the spatiotemporal transcriptomic changes of developing Drosophila with high resolution and sensitivity. (Data from an embryo collected 14-16 h after egg laying)" + dataset_organism: Drosophila + dataset_reference: wang2022high + spot_filter_min_genes: 10 + gene_filter_min_spots: 50 + remove_mitochondrial: true -# normalization_methods: [log_cp10k] -# output_dataset: '$id/dataset.h5ad' -# output_meta: '$id/dataset_metadata.yaml' -# output_state: '$id/state.yaml' -# output_raw: force_null -# output_normalized: force_null -# publish_dir: resources/datasets -# HERE +normalization_methods: [log_cp10k] +output_dataset: '$id/dataset.h5ad' +output_meta: '$id/dataset_metadata.yaml' +output_state: '$id/state.yaml' +output_raw: force_null +output_normalized: force_null +publish_dir: s3://openproblems-data/resources/datasets +HERE cat > /tmp/nextflow.config << HERE process { diff --git a/src/tasks/spatially_variable_genes/README.md b/src/tasks/spatially_variable_genes/README.md index 5e9f43407d..d4fd1b4ea3 100644 --- a/src/tasks/spatially_variable_genes/README.md +++ b/src/tasks/spatially_variable_genes/README.md @@ -83,7 +83,7 @@ flowchart LR A subset of the common dataset. Example file: -`resources_test/common/mouse_brain_coronal_section1/dataset.h5ad` +`resources_test/common//dataset.h5ad` Format: @@ -142,7 +142,7 @@ Arguments: The dataset without spatially variable genes. Example file: -`resources_test/spatially_variable_genes/mouse_brain_coronal_section1/dataset.h5ad` +`resources_test/spatially_variable_genes//dataset.h5ad` Format: @@ -177,7 +177,7 @@ Slot description: Anndata with true spatial variability. Example file: -`resources_test/spatially_variable_genes/mouse_brain_coronal_section1/solution.h5ad` +`resources_test/spatially_variable_genes//solution.h5ad` Description: @@ -274,7 +274,7 @@ Arguments: Anndata with estimate spatial variability. Example file: -`resources_test/spatially_variable_genes/mouse_brain_coronal_section1/output.h5ad` +`resources_test/spatially_variable_genes//output.h5ad` Description: @@ -309,7 +309,7 @@ Slot description: Metric score file. Example file: -`resources_test/spatially_variable_genes/mouse_brain_coronal_section1/score.h5ad` +`resources_test/spatially_variable_genes//score.h5ad` Format: From 9a412dc5799e846bd7eb811fdd60602725c5cc47 Mon Sep 17 00:00:00 2001 From: Nirmayi Date: Wed, 2 Oct 2024 12:16:22 +0200 Subject: [PATCH 3/3] add changelog entry --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 10f0c38690..e54ad71d10 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ ## Minor changes - Add the CELLxGENE immune cell atlas dataset as a common test resource (PR #907) +- Update `dataset_id` for `tenx_visium`, `zenodo_spatial`, `zenodo_spatial_slidetags` datasets and use `mouse_brain_coronal` as a test resource in the `spatially_variable_genes` task (PR #908) # openproblems v2.0.0 @@ -25,7 +26,7 @@ Most relevant parts of the overall structure: - `openproblems_neurips2022_pbmc`: Fetch a dataset from the OpenProblems NeurIPS2022 competition - `openproblems_v1`: Fetch a legacy OpenProblems v1 dataset - `openproblems_v1_multimodal`: Fetch a legacy OpenProblems v1 multimodal dataset - - `tenx_vision`: Fetch a and convert 10x Visium dataset + - `tenx_visium`: Fetch a and convert 10x Visium dataset - `zenodo_spatial`: Fetch and process an Anndata file containing DBiT seq, MERFISH, seqFISH, Slide-seq v2, STARmap, and Stereo-seq data from Zenodo. - `zenodo_spatial_slidetags`: Download a compressed file containing gene expression matrix and spatial locations from zenodo.