Add spatially variable genes task (#462)

* add spatially variable genes task * add file_output.ymal * add control method * add metrics * update file format * fix control method * add task info and generate readme * update example path * fix slot name * add preferred normalization * update input data path * add author info * fix path * update api * update path in script * add dataset processor and simple workflows * update path * Update readme * update path * Update api * Minor updates to scripts * Drop feature_name column and group by index feature_name * Update arguments * Fix workflow * Update test scripts * Update task readme * add moranI for spatially variable gene detection task * add SpatialDE * Add 10x visium dataset loader * add comment * add wip simulate svg * extend wf * update output options * add Sepal * Add example * Add initial 10x visium workflow * Add resource scripts for 10x visium * Remove gene name from method output * wip upgrade script * Add subsampling and processing * make script executable * set n_obs to 600 * add SPARK and SPARK-X * update api * update workflow and components * add nnSVG * update resource test script * rename resource test script * update resource script * rename .var columns * fix nnSVG * fix typo * add Spanve * add SOMDE * add SpaGFT * add SpaGCN * add scGCO * add scGCO * add BOOST-GP * fix BOOST-GP * add spatialDE2 * add GPcounts * update library.bib * add spot or gene filters * remove subsetting * update test resources * fix dataset id * update svg simulator * add feature id to .var in dataset * optionally output simulated dataset * sort svg method references * add info * fix .var slot * add doi * fix .var slot name * fix metric * fix positive control * fix methods * add feature id as index * add methods * compute svg for reference * update common dataset loader * minor fixes * select top variable genes as reference for simulation * update test value * update dataset processor to select reference genes * Add info * use k 50 for test resource generation * update test value * fix output file destination * update dataset description * add spatial_10x_xenium.sh * fix import error in GPcounts * update GPcounts to all genes * update spatialDE2 config * Using normalized data in Sepal * update SpaGFT to use normalized data * rename component * add filters * fix command * rename components to match funcionality name * minor fix * update benchmark workflow * minor fix * update component name * add slideseq v2 datasets * update dataset url * minor updates * add metadata * add DBiT-seq datasets * update DBiT-seq and slideseq v2 datasets * add MERFISH datasets * add data loader for DBiT-seq * add loaders for MERFISH datasets * add slide-tags datasets * add stereo-seq datasets * add STARmap datasets * add seqFISH dataset * add visium and xenium dataset references * add dataset reference * minor fix * update docker setup * minor updates * update test value * add dataset reference to .uns slot * update docker setup * add dataset reference * add dataset references * create single component for all datasets from zenodo * update dataset loader and processor for slide tags datasets * add feature name to .var * use feature name as var names when feature id not available * use feature name instead of feature id * add resource scripts * add feature name to .var * revert back to using feature id * add datasets * update nextflow directives * update .var slots * update input arguments * fix typo * change to hightime * update readme * add negative control * update test default * fix column name * add time label veryhightime * add negative control * revert back to label hightime * update time labels * add veryhightime label to test * minor updates * check if matrix is sparse before converting to dense matrix * minor updates * minor update * Add task description * update metric description * get author info * add doi to the preprint and update list of authors * add protocol to dataset name * add two 10x visium datasets * update datasets * update simulate script * update simulation script * rename gene id to feature id * update api * update dataset id * include boostgp * minor fix * update api and task readme * update dataset resource * update scripts for data resource * update normalization function to allow None as a parameter * update resource scripts * update parameters for each dataset * update parameter for sepal * add arguments * fix random seed for simulation * add coord_type and max_neighs arguments * update arguments * fix workflow * update intro * update links * update task info * update dataset ids and dataset loader scripts * rename dataset loader --------- Co-authored-by: lzj1769 <[email protected]> Co-authored-by: Robrecht Cannoodt <[email protected]> Former-commit-id: f02ba12
openproblems-bio · Sep 4, 2024 · d04a3fe · d04a3fe
1 parent adb40c7
commit d04a3fe
Show file tree

Hide file tree

Showing 88 changed files with 6,461 additions and 14 deletions.
diff --git a/_viash.yaml b/_viash.yaml
@@ -10,5 +10,5 @@ config_mods: |
   .platforms[.type == 'docker'].target_image_source := 'https://github.com/openproblems-bio/openproblems-v2'
   .platforms[.type == "nextflow"].directives.tag := "$id"
   .platforms[.type == "nextflow"].auto.simplifyOutput := false
-  .platforms[.type == "nextflow"].config.labels := { lowmem : "memory = 20.Gb", midmem : "memory = 50.Gb", highmem : "memory = 100.Gb", lowcpu : "cpus = 5", midcpu : "cpus = 15", highcpu : "cpus = 30", lowtime : "time = 1.h", midtime : "time = 4.h", hightime : "time = 8.h" }
+  .platforms[.type == "nextflow"].config.labels := { lowmem : "memory = 20.Gb", midmem : "memory = 50.Gb", highmem : "memory = 100.Gb", lowcpu : "cpus = 5", midcpu : "cpus = 15", highcpu : "cpus = 30", lowtime : "time = 1.h", midtime : "time = 4.h", hightime : "time = 8.h", veryhightime : "time = 24.h" }
   .platforms[.type == "nextflow"].config.script := "process.errorStrategy = 'ignore'"
diff --git a/src/common/comp_tests/check_method_config.py b/src/common/comp_tests/check_method_config.py
@@ -15,7 +15,7 @@
 
 _MISSING_DOIS = ["vandermaaten2008visualizing", "hosmer2013applied"]
 
-TIME_LABELS = ["lowtime", "midtime", "hightime"]
+TIME_LABELS = ["lowtime", "midtime", "hightime", "veryhightime"]
 MEM_LABELS = ["lowmem", "midmem", "highmem"]
 CPU_LABELS = ["lowcpu", "midcpu", "highcpu"]
 

diff --git a/src/common/comp_tests/run_and_check_adata.py b/src/common/comp_tests/run_and_check_adata.py
@@ -77,6 +77,7 @@ def run_and_check(arguments, cmd):
 
 for arg in config["functionality"]["arguments"]:
     new_arg = arg.copy()
+    arg_info = new_arg.get("info") or {}
 
     # set clean name
     clean_name = re.sub("^--", "", arg["name"])
@@ -89,7 +90,9 @@ def run_and_check(arguments, cmd):
       else:
           value = f"{clean_name}.h5ad"
       new_arg["value"] = value
-
+    elif "test_default" in arg_info:
+        new_arg["value"] = arg_info["test_default"]
+
     arguments.append(new_arg)
 
 
@@ -115,7 +118,10 @@ def run_and_check(arguments, cmd):
     # construct command
     cmd = [ meta["executable"] ]
     for arg in argset_args:
-        if arg["type"] == "file":
-            cmd.extend([arg["name"], arg["value"]])
+        if "value" in arg:
+            value = arg["value"]
+            if arg["multiple"] and isinstance(value, list):
+                value = arg["multiple_sep"].join(value)
+            cmd.extend([arg["name"], str(value)])
 
     run_and_check(argset_args, cmd)