From a1735607e72b6db47fad21ea0cd1e81c96cf1235 Mon Sep 17 00:00:00 2001 From: mjosse Date: Fri, 13 Sep 2024 09:51:06 +0200 Subject: [PATCH 01/17] Add tools section in ro-crate --- lib/galaxy/model/store/ro_crate_utils.py | 46 ++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/lib/galaxy/model/store/ro_crate_utils.py b/lib/galaxy/model/store/ro_crate_utils.py index f3592ac04ab9..77493354df16 100644 --- a/lib/galaxy/model/store/ro_crate_utils.py +++ b/lib/galaxy/model/store/ro_crate_utils.py @@ -222,6 +222,46 @@ def _add_workflows(self, crate: ROCrate): crate.mainEntity["name"] = self.workflow.name crate.mainEntity["subjectOf"] = cwl_wf + # Add tools used in the workflow + self._add_tools(crate) + + def _add_tools(self, crate: ROCrate): + tool_entities = [] + + # Iterate over each step in the workflow + for step in self.workflow.steps: + # Check if the step corresponds to a tool + if step.type == "tool": + tool_id = step.tool_id + tool_version = step.tool_version + tool_name = step.label or tool_id # use label if available, fallback to tool_id + + # Description can be part of the step inputs or other properties + tool_description = step.tool_inputs.get("description", "") if step.tool_inputs else "" + + # Add tool entity to the RO-Crate + tool_entity = crate.add( + ContextEntity( + crate, + tool_id, + properties={ + "@type": "SoftwareApplication", + "name": tool_name, + "version": tool_version, + "description": tool_description, + "url": f"https://toolshed.g2.bx.psu.edu/view/{tool_id}", # URL if relevant + }, + ) + ) + tool_entities.append(tool_entity) + + # Link tool entity with the workflow + crate.mainEntity.append_to("instrument", tool_entity) + + return tool_entities + + + def _add_create_action(self, crate: ROCrate): self.create_action = crate.add( ContextEntity( @@ -235,6 +275,12 @@ def _add_create_action(self, crate: ROCrate): }, ) ) + + # Append tools to the create action + tools = self._add_tools(crate) + for tool in tools: + self.create_action.append_to("instrument", tool) + crate.root_dataset.append_to("mentions", self.create_action) def _add_engine_run(self, crate: ROCrate): From 48cf14a7470e91b4f5e0a70db6936e5fff699344 Mon Sep 17 00:00:00 2001 From: mjosse Date: Fri, 13 Sep 2024 11:55:56 +0200 Subject: [PATCH 02/17] rm tool description and url --- lib/galaxy/model/store/ro_crate_utils.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/lib/galaxy/model/store/ro_crate_utils.py b/lib/galaxy/model/store/ro_crate_utils.py index 77493354df16..bedbbdf1bf5b 100644 --- a/lib/galaxy/model/store/ro_crate_utils.py +++ b/lib/galaxy/model/store/ro_crate_utils.py @@ -214,13 +214,19 @@ def _add_workflows(self, crate: ROCrate): cls=workflow_cls, lang=lang, ) + self.workflow_entities[wf.id] = wf if lang == "cwl": cwl_wf = wf + # Add license if available crate.license = self.workflow.license or "" + + # Add main entity information crate.mainEntity["name"] = self.workflow.name - crate.mainEntity["subjectOf"] = cwl_wf + + # Add CWL workflow entity if exists + crate.mainEntity["subjectOf"] = cwl_wf if cwl_wf else "" # Add tools used in the workflow self._add_tools(crate) @@ -235,9 +241,6 @@ def _add_tools(self, crate: ROCrate): tool_id = step.tool_id tool_version = step.tool_version tool_name = step.label or tool_id # use label if available, fallback to tool_id - - # Description can be part of the step inputs or other properties - tool_description = step.tool_inputs.get("description", "") if step.tool_inputs else "" # Add tool entity to the RO-Crate tool_entity = crate.add( @@ -248,8 +251,7 @@ def _add_tools(self, crate: ROCrate): "@type": "SoftwareApplication", "name": tool_name, "version": tool_version, - "description": tool_description, - "url": f"https://toolshed.g2.bx.psu.edu/view/{tool_id}", # URL if relevant + "url": "https://toolshed.g2.bx.psu.edu", # URL if relevant }, ) ) From 16e28f02a26243430f327723591091c638b64822 Mon Sep 17 00:00:00 2001 From: mjosse Date: Mon, 16 Sep 2024 14:57:12 +0200 Subject: [PATCH 03/17] add tool description and author --- lib/galaxy/model/store/ro_crate_utils.py | 54 ++++++++++++++++++------ 1 file changed, 42 insertions(+), 12 deletions(-) diff --git a/lib/galaxy/model/store/ro_crate_utils.py b/lib/galaxy/model/store/ro_crate_utils.py index bedbbdf1bf5b..d352a371764a 100644 --- a/lib/galaxy/model/store/ro_crate_utils.py +++ b/lib/galaxy/model/store/ro_crate_utils.py @@ -1,5 +1,6 @@ import logging import os +import json from typing import ( Any, Dict, @@ -214,23 +215,40 @@ def _add_workflows(self, crate: ROCrate): cls=workflow_cls, lang=lang, ) - self.workflow_entities[wf.id] = wf if lang == "cwl": cwl_wf = wf - # Add license if available crate.license = self.workflow.license or "" - - # Add main entity information crate.mainEntity["name"] = self.workflow.name + # Adding the creator information + if hasattr(self.workflow, 'creator_metadata') and self.workflow.creator_metadata: + creators = self.workflow.creator_metadata + if creators and isinstance(creators, list) and len(creators) > 0: + first_creator = creators[0] + creator_entity = crate.add( + ContextEntity( + crate, + first_creator.get('identifier', ''), # Default to empty string if identifier is missing + properties={ + "@type": "Person", + "name": first_creator.get('name', ''), # Default to empty string if name is missing + "orcid": first_creator.get('identifier', ''), # Assuming identifier as orcid, or adjust accordingly + }, + ) + ) + crate.mainEntity.append_to("creator", creator_entity) + # Add CWL workflow entity if exists crate.mainEntity["subjectOf"] = cwl_wf if cwl_wf else "" + workflow_dict = vars(self.workflow) # or self.workflow.__dict__ if vars() does not work + print(f"lol {workflow_dict}") # Add tools used in the workflow self._add_tools(crate) + def _add_tools(self, crate: ROCrate): tool_entities = [] @@ -242,6 +260,25 @@ def _add_tools(self, crate: ROCrate): tool_version = step.tool_version tool_name = step.label or tool_id # use label if available, fallback to tool_id + step_dict = vars(step) # or self.workflow.__dict__ if vars() does not work + print(f"TOOOOOOOOL {step_dict}") + + # Initialize tool description for each tool + tool_description = "" + + # Check if the tool step has annotations + if hasattr(step, 'annotations') and step.annotations: + # Assuming each annotation object has an 'annotation' attribute + annotations_list = [] + for annotation_obj in step.annotations: + annotation_text = getattr(annotation_obj, 'annotation', None) + if annotation_text: # Check if annotation_text is not None + annotations_list.append(annotation_text) + + # Join annotations into a single string or handle them individually, depending on your requirement + tool_description = " ".join(annotations_list) if annotations_list else "" + + # Add tool entity to the RO-Crate tool_entity = crate.add( ContextEntity( @@ -251,6 +288,7 @@ def _add_tools(self, crate: ROCrate): "@type": "SoftwareApplication", "name": tool_name, "version": tool_version, + "description": tool_description, "url": "https://toolshed.g2.bx.psu.edu", # URL if relevant }, ) @@ -262,8 +300,6 @@ def _add_tools(self, crate: ROCrate): return tool_entities - - def _add_create_action(self, crate: ROCrate): self.create_action = crate.add( ContextEntity( @@ -277,12 +313,6 @@ def _add_create_action(self, crate: ROCrate): }, ) ) - - # Append tools to the create action - tools = self._add_tools(crate) - for tool in tools: - self.create_action.append_to("instrument", tool) - crate.root_dataset.append_to("mentions", self.create_action) def _add_engine_run(self, crate: ROCrate): From 92360ec9f954872a547d488e8aa4712ec5ab820b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marie=20Joss=C3=A9?= <84919248+Marie59@users.noreply.github.com> Date: Mon, 16 Sep 2024 19:35:34 +0200 Subject: [PATCH 04/17] rm some useless print --- lib/galaxy/model/store/ro_crate_utils.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/lib/galaxy/model/store/ro_crate_utils.py b/lib/galaxy/model/store/ro_crate_utils.py index d352a371764a..3e61a0263c08 100644 --- a/lib/galaxy/model/store/ro_crate_utils.py +++ b/lib/galaxy/model/store/ro_crate_utils.py @@ -1,6 +1,5 @@ import logging import os -import json from typing import ( Any, Dict, @@ -243,8 +242,6 @@ def _add_workflows(self, crate: ROCrate): # Add CWL workflow entity if exists crate.mainEntity["subjectOf"] = cwl_wf if cwl_wf else "" - workflow_dict = vars(self.workflow) # or self.workflow.__dict__ if vars() does not work - print(f"lol {workflow_dict}") # Add tools used in the workflow self._add_tools(crate) @@ -260,9 +257,6 @@ def _add_tools(self, crate: ROCrate): tool_version = step.tool_version tool_name = step.label or tool_id # use label if available, fallback to tool_id - step_dict = vars(step) # or self.workflow.__dict__ if vars() does not work - print(f"TOOOOOOOOL {step_dict}") - # Initialize tool description for each tool tool_description = "" From d1dc41d7374f4e2db9e377ebe3691c2542a50dac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marie=20Joss=C3=A9?= <84919248+Marie59@users.noreply.github.com> Date: Tue, 17 Sep 2024 09:31:12 +0200 Subject: [PATCH 05/17] fix lint --- lib/galaxy/model/store/ro_crate_utils.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/lib/galaxy/model/store/ro_crate_utils.py b/lib/galaxy/model/store/ro_crate_utils.py index 3e61a0263c08..574d2b6d4ea2 100644 --- a/lib/galaxy/model/store/ro_crate_utils.py +++ b/lib/galaxy/model/store/ro_crate_utils.py @@ -238,14 +238,13 @@ def _add_workflows(self, crate: ROCrate): ) ) crate.mainEntity.append_to("creator", creator_entity) - + # Add CWL workflow entity if exists crate.mainEntity["subjectOf"] = cwl_wf if cwl_wf else "" # Add tools used in the workflow self._add_tools(crate) - def _add_tools(self, crate: ROCrate): tool_entities = [] @@ -272,7 +271,6 @@ def _add_tools(self, crate: ROCrate): # Join annotations into a single string or handle them individually, depending on your requirement tool_description = " ".join(annotations_list) if annotations_list else "" - # Add tool entity to the RO-Crate tool_entity = crate.add( ContextEntity( From e50b2355256d0bef3117a26a6260dcd39a985198 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marie=20Joss=C3=A9?= <84919248+Marie59@users.noreply.github.com> Date: Wed, 18 Sep 2024 09:14:34 +0200 Subject: [PATCH 06/17] Update lib/galaxy/model/store/ro_crate_utils.py Co-authored-by: Marius van den Beek --- lib/galaxy/model/store/ro_crate_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/galaxy/model/store/ro_crate_utils.py b/lib/galaxy/model/store/ro_crate_utils.py index 574d2b6d4ea2..2f375b2641a1 100644 --- a/lib/galaxy/model/store/ro_crate_utils.py +++ b/lib/galaxy/model/store/ro_crate_utils.py @@ -222,7 +222,7 @@ def _add_workflows(self, crate: ROCrate): crate.mainEntity["name"] = self.workflow.name # Adding the creator information - if hasattr(self.workflow, 'creator_metadata') and self.workflow.creator_metadata: + if self.workflow.creator_metadata: creators = self.workflow.creator_metadata if creators and isinstance(creators, list) and len(creators) > 0: first_creator = creators[0] From 9502cc035baa79167ec8bd2765cfff062127aad1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marie=20Joss=C3=A9?= <84919248+Marie59@users.noreply.github.com> Date: Wed, 18 Sep 2024 09:14:49 +0200 Subject: [PATCH 07/17] Update lib/galaxy/model/store/ro_crate_utils.py Co-authored-by: Marius van den Beek --- lib/galaxy/model/store/ro_crate_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/galaxy/model/store/ro_crate_utils.py b/lib/galaxy/model/store/ro_crate_utils.py index 2f375b2641a1..a5ce6ec7065c 100644 --- a/lib/galaxy/model/store/ro_crate_utils.py +++ b/lib/galaxy/model/store/ro_crate_utils.py @@ -260,7 +260,7 @@ def _add_tools(self, crate: ROCrate): tool_description = "" # Check if the tool step has annotations - if hasattr(step, 'annotations') and step.annotations: + if step.annotations: # Assuming each annotation object has an 'annotation' attribute annotations_list = [] for annotation_obj in step.annotations: From 9d8336eeab319a901b5e21207e57c7057f12613d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marie=20Joss=C3=A9?= <84919248+Marie59@users.noreply.github.com> Date: Wed, 18 Sep 2024 09:15:01 +0200 Subject: [PATCH 08/17] Update lib/galaxy/model/store/ro_crate_utils.py Co-authored-by: Marius van den Beek --- lib/galaxy/model/store/ro_crate_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/galaxy/model/store/ro_crate_utils.py b/lib/galaxy/model/store/ro_crate_utils.py index a5ce6ec7065c..58ee201a9c42 100644 --- a/lib/galaxy/model/store/ro_crate_utils.py +++ b/lib/galaxy/model/store/ro_crate_utils.py @@ -257,7 +257,7 @@ def _add_tools(self, crate: ROCrate): tool_name = step.label or tool_id # use label if available, fallback to tool_id # Initialize tool description for each tool - tool_description = "" + tool_description: Optional[str] = None # Check if the tool step has annotations if step.annotations: From 6587ab402f03220267477218601ce8829e9c4bc3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marie=20Joss=C3=A9?= <84919248+Marie59@users.noreply.github.com> Date: Wed, 18 Sep 2024 09:15:40 +0200 Subject: [PATCH 09/17] Update lib/galaxy/model/store/ro_crate_utils.py Co-authored-by: Marius van den Beek --- lib/galaxy/model/store/ro_crate_utils.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/galaxy/model/store/ro_crate_utils.py b/lib/galaxy/model/store/ro_crate_utils.py index 58ee201a9c42..cb9e543b4356 100644 --- a/lib/galaxy/model/store/ro_crate_utils.py +++ b/lib/galaxy/model/store/ro_crate_utils.py @@ -263,10 +263,10 @@ def _add_tools(self, crate: ROCrate): if step.annotations: # Assuming each annotation object has an 'annotation' attribute annotations_list = [] - for annotation_obj in step.annotations: - annotation_text = getattr(annotation_obj, 'annotation', None) - if annotation_text: # Check if annotation_text is not None - annotations_list.append(annotation_text) + for annotation_association in step.annotations: + annotation = annotation_association.annotation + if annotation: # Check if annotation_text is not None + annotations_list.append(annotation) # Join annotations into a single string or handle them individually, depending on your requirement tool_description = " ".join(annotations_list) if annotations_list else "" From 4fb42c294ae45a2bb732ee660493641d6cac0195 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marie=20Joss=C3=A9?= <84919248+Marie59@users.noreply.github.com> Date: Wed, 18 Sep 2024 09:15:51 +0200 Subject: [PATCH 10/17] Update lib/galaxy/model/store/ro_crate_utils.py Co-authored-by: Marius van den Beek --- lib/galaxy/model/store/ro_crate_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/galaxy/model/store/ro_crate_utils.py b/lib/galaxy/model/store/ro_crate_utils.py index cb9e543b4356..616dae629a2b 100644 --- a/lib/galaxy/model/store/ro_crate_utils.py +++ b/lib/galaxy/model/store/ro_crate_utils.py @@ -269,7 +269,7 @@ def _add_tools(self, crate: ROCrate): annotations_list.append(annotation) # Join annotations into a single string or handle them individually, depending on your requirement - tool_description = " ".join(annotations_list) if annotations_list else "" + tool_description = " ".join(annotations_list) if annotations_list else None # Add tool entity to the RO-Crate tool_entity = crate.add( From eaca23a3e18a8fb365227bac651ef2c330e440bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marie=20Joss=C3=A9?= <84919248+Marie59@users.noreply.github.com> Date: Wed, 18 Sep 2024 09:23:47 +0200 Subject: [PATCH 11/17] keep tool id not label --- lib/galaxy/model/store/ro_crate_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/galaxy/model/store/ro_crate_utils.py b/lib/galaxy/model/store/ro_crate_utils.py index 616dae629a2b..fa37a18b21a7 100644 --- a/lib/galaxy/model/store/ro_crate_utils.py +++ b/lib/galaxy/model/store/ro_crate_utils.py @@ -254,7 +254,7 @@ def _add_tools(self, crate: ROCrate): if step.type == "tool": tool_id = step.tool_id tool_version = step.tool_version - tool_name = step.label or tool_id # use label if available, fallback to tool_id + tool_name = tool_id # label can de irrelevant or descritption better keep the tool id # Initialize tool description for each tool tool_description: Optional[str] = None From c86b4661a7f0f245c9cd87f01267784b9b600be5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marie=20Joss=C3=A9?= <84919248+Marie59@users.noreply.github.com> Date: Wed, 18 Sep 2024 10:55:07 +0200 Subject: [PATCH 12/17] add multiple creator and organizations --- lib/galaxy/model/store/ro_crate_utils.py | 54 ++++++++++++++++-------- 1 file changed, 37 insertions(+), 17 deletions(-) diff --git a/lib/galaxy/model/store/ro_crate_utils.py b/lib/galaxy/model/store/ro_crate_utils.py index fa37a18b21a7..bae8b7f25e6e 100644 --- a/lib/galaxy/model/store/ro_crate_utils.py +++ b/lib/galaxy/model/store/ro_crate_utils.py @@ -220,27 +220,47 @@ def _add_workflows(self, crate: ROCrate): crate.license = self.workflow.license or "" crate.mainEntity["name"] = self.workflow.name - - # Adding the creator information + + # Adding multiple creators if available if self.workflow.creator_metadata: - creators = self.workflow.creator_metadata - if creators and isinstance(creators, list) and len(creators) > 0: - first_creator = creators[0] - creator_entity = crate.add( - ContextEntity( - crate, - first_creator.get('identifier', ''), # Default to empty string if identifier is missing - properties={ - "@type": "Person", - "name": first_creator.get('name', ''), # Default to empty string if name is missing - "orcid": first_creator.get('identifier', ''), # Assuming identifier as orcid, or adjust accordingly - }, + for creator_data in self.workflow.creator_metadata: + if creator_data.get('class') == 'Person': + # Create the person entity + creator_entity = crate.add( + ContextEntity( + crate, + creator_data.get('identifier', ''), # Default to empty string if identifier is missing + properties={ + "@type": "Person", + "name": creator_data.get('name', ''), # Default to empty string if name is missing + "orcid": creator_data.get('identifier', ''), # Assuming identifier is ORCID, or adjust as needed + "url": creator_data.get('url', ''), # Add URL if available, otherwise empty string + "email": creator_data.get('email', ''), # Add email if available, otherwise empty string + }, + ) ) - ) - crate.mainEntity.append_to("creator", creator_entity) + # Append the person creator entity to the mainEntity + crate.mainEntity.append_to("creator", creator_entity) + + elif creator_data.get('class') == 'Organization': + # Create the organization entity + organization_entity = crate.add( + ContextEntity( + crate, + creator_data.get('url', ''), # Use URL as identifier if available, otherwise empty string + properties={ + "@type": "Organization", + "name": creator_data.get('name', ''), # Default to empty string if name is missing + "url": creator_data.get('url', ''), # Add URL if available, otherwise empty string + }, + ) + ) + # Append the organization entity to the mainEntity + crate.mainEntity.append_to("creator", organization_entity) + # Add CWL workflow entity if exists - crate.mainEntity["subjectOf"] = cwl_wf if cwl_wf else "" + crate.mainEntity["subjectOf"] = cwl_wf # Add tools used in the workflow self._add_tools(crate) From dee290f8891b1053293a7c782ca14bcea47ca04a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marie=20Joss=C3=A9?= <84919248+Marie59@users.noreply.github.com> Date: Thu, 19 Sep 2024 11:30:28 +0200 Subject: [PATCH 13/17] add tools info for subworkflows --- lib/galaxy/model/store/ro_crate_utils.py | 26 +++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/lib/galaxy/model/store/ro_crate_utils.py b/lib/galaxy/model/store/ro_crate_utils.py index bae8b7f25e6e..b655a18c5f08 100644 --- a/lib/galaxy/model/store/ro_crate_utils.py +++ b/lib/galaxy/model/store/ro_crate_utils.py @@ -220,7 +220,7 @@ def _add_workflows(self, crate: ROCrate): crate.license = self.workflow.license or "" crate.mainEntity["name"] = self.workflow.name - + # Adding multiple creators if available if self.workflow.creator_metadata: for creator_data in self.workflow.creator_metadata: @@ -268,13 +268,22 @@ def _add_workflows(self, crate: ROCrate): def _add_tools(self, crate: ROCrate): tool_entities = [] - # Iterate over each step in the workflow - for step in self.workflow.steps: + # Call a recursive method to add tools for the main workflow and subworkflows + self._add_tools_recursive(self.workflow.steps, crate, tool_entities) + + return tool_entities + + def _add_tools_recursive(self, steps, crate: ROCrate, tool_entities): + """ + Recursively add tools from workflow steps and handle subworkflows. + """ + # Iterate over each step in the given workflow steps + for step in steps: # Check if the step corresponds to a tool if step.type == "tool": tool_id = step.tool_id tool_version = step.tool_version - tool_name = tool_id # label can de irrelevant or descritption better keep the tool id + tool_name = tool_id # label can de irrelevant or description better keep the tool id # Initialize tool description for each tool tool_description: Optional[str] = None @@ -310,7 +319,14 @@ def _add_tools(self, crate: ROCrate): # Link tool entity with the workflow crate.mainEntity.append_to("instrument", tool_entity) - return tool_entities + # Handle subworkflows + elif step.type == "subworkflow": + subworkflow = step.subworkflow + if subworkflow: + # Recursively add tools for the subworkflow steps + self._add_tools_recursive(subworkflow.steps, crate, tool_entities) + + def _add_create_action(self, crate: ROCrate): self.create_action = crate.add( From 00c805f5dfda4a14286e46606947bc871f0703b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marie=20Joss=C3=A9?= <84919248+Marie59@users.noreply.github.com> Date: Mon, 23 Sep 2024 14:40:21 +0200 Subject: [PATCH 14/17] Add a step function (avoid Softwarapp repetition) --- lib/galaxy/model/store/ro_crate_utils.py | 137 ++++++++++++++++------- 1 file changed, 94 insertions(+), 43 deletions(-) diff --git a/lib/galaxy/model/store/ro_crate_utils.py b/lib/galaxy/model/store/ro_crate_utils.py index b655a18c5f08..aea7299d0285 100644 --- a/lib/galaxy/model/store/ro_crate_utils.py +++ b/lib/galaxy/model/store/ro_crate_utils.py @@ -85,6 +85,8 @@ def __init__(self, model_store: Any): self.file_entities: Dict[int, Any] = {} self.param_entities: Dict[int, Any] = {} self.pv_entities: Dict[str, Any] = {} + # Cache for tools to avoid duplicating entities for the same tool + self.tool_cache: Dict[str, ContextEntity] = {} def build_crate(self): crate = ROCrate() @@ -264,71 +266,120 @@ def _add_workflows(self, crate: ROCrate): # Add tools used in the workflow self._add_tools(crate) + self._add_steps(crate) + + def _add_steps(self, crate: ROCrate): + """ + Add workflow steps (HowToStep) to the RO-Crate. These are unique for each tool occurrence. + """ + step_entities = [] + # Initialize the position as a list with a single element to keep it mutable + position = [1] + self._add_steps_recursive(self.workflow.steps, crate, step_entities, position) + return step_entities + + def _add_steps_recursive(self, steps, crate: ROCrate, step_entities, position): + """ + Recursively add HowToStep entities from workflow steps, ensuring that + the position index is maintained across subworkflows. + """ + for step in steps: + if step.type == "tool": + # Create a unique HowToStep entity for each step + step_id = f"step_{position[0]}" + step_description = None + if step.annotations: + annotations_list = [annotation.annotation for annotation in step.annotations if annotation] + step_description = " ".join(annotations_list) if annotations_list else None + + # Add HowToStep entity to the crate + step_entity = crate.add( + ContextEntity( + crate, + step_id, + properties={ + "@type": "HowToStep", + "position": position[0], + "name": step.tool_id, + "description": step_description, + }, + ) + ) + + # Append the HowToStep entity to the workflow steps list + step_entities.append(step_entity) + crate.mainEntity.append_to("step", step_entity) + + # Increment the position counter + position[0] += 1 + + # Handle subworkflows recursively + elif step.type == "subworkflow": + subworkflow = step.subworkflow + if subworkflow: + self._add_steps_recursive(subworkflow.steps, crate, step_entities, position) def _add_tools(self, crate: ROCrate): tool_entities = [] - - # Call a recursive method to add tools for the main workflow and subworkflows self._add_tools_recursive(self.workflow.steps, crate, tool_entities) - return tool_entities def _add_tools_recursive(self, steps, crate: ROCrate, tool_entities): """ - Recursively add tools from workflow steps and handle subworkflows. + Recursively add SoftwareApplication entities from workflow steps, reusing tools when necessary. """ - # Iterate over each step in the given workflow steps for step in steps: - # Check if the step corresponds to a tool if step.type == "tool": tool_id = step.tool_id tool_version = step.tool_version - tool_name = tool_id # label can de irrelevant or description better keep the tool id - # Initialize tool description for each tool - tool_description: Optional[str] = None - - # Check if the tool step has annotations - if step.annotations: - # Assuming each annotation object has an 'annotation' attribute - annotations_list = [] - for annotation_association in step.annotations: - annotation = annotation_association.annotation - if annotation: # Check if annotation_text is not None - annotations_list.append(annotation) - - # Join annotations into a single string or handle them individually, depending on your requirement - tool_description = " ".join(annotations_list) if annotations_list else None - - # Add tool entity to the RO-Crate - tool_entity = crate.add( - ContextEntity( - crate, - tool_id, - properties={ - "@type": "SoftwareApplication", - "name": tool_name, - "version": tool_version, - "description": tool_description, - "url": "https://toolshed.g2.bx.psu.edu", # URL if relevant - }, + # Cache key based on tool ID and version + tool_key = f"{tool_id}:{tool_version}" + + # Check if tool entity is already in cache + if tool_key in self.tool_cache: + tool_entity = self.tool_cache[tool_key] + else: + # Create a new tool entity + tool_name = tool_id + tool_description = None + if step.annotations: + annotations_list = [annotation.annotation for annotation in step.annotations if annotation] + tool_description = " ".join(annotations_list) if annotations_list else None + + # Add tool entity to the RO-Crate + tool_entity = crate.add( + ContextEntity( + crate, + tool_id, + properties={ + "@type": "SoftwareApplication", + "name": tool_name, + "version": tool_version, + "description": tool_description, + "url": "https://toolshed.g2.bx.psu.edu", # URL if relevant + }, + ) ) - ) - tool_entities.append(tool_entity) - # Link tool entity with the workflow + # Store the tool entity in the cache + self.tool_cache[tool_key] = tool_entity + + # Append the tool entity to the workflow (instrument) and store it in the list + tool_entities.append(tool_entity) crate.mainEntity.append_to("instrument", tool_entity) - # Handle subworkflows + # Handle subworkflows recursively elif step.type == "subworkflow": subworkflow = step.subworkflow if subworkflow: - # Recursively add tools for the subworkflow steps self._add_tools_recursive(subworkflow.steps, crate, tool_entities) - - def _add_create_action(self, crate: ROCrate): + """ + Adds the CreateAction indicating the workflow invocation. + """ + # CreateAction for the entire workflow run self.create_action = crate.add( ContextEntity( crate, @@ -337,8 +388,8 @@ def _add_create_action(self, crate: ROCrate): "name": self.workflow.name, "startTime": self.invocation.workflow.create_time.isoformat(), "endTime": self.invocation.workflow.update_time.isoformat(), - "instrument": {"@id": crate.mainEntity["@id"]}, - }, + "instrument": {"@id": crate.mainEntity["@id"]} + } ) ) crate.root_dataset.append_to("mentions", self.create_action) From 761c45647b8449dc8f42ac08e281e7d660d6dfbf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marie=20Joss=C3=A9?= <84919248+Marie59@users.noreply.github.com> Date: Tue, 15 Oct 2024 09:07:03 +0200 Subject: [PATCH 15/17] add edem, xrefs, citations --- lib/galaxy/model/store/ro_crate_utils.py | 37 +++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/lib/galaxy/model/store/ro_crate_utils.py b/lib/galaxy/model/store/ro_crate_utils.py index aea7299d0285..c13b5136f82b 100644 --- a/lib/galaxy/model/store/ro_crate_utils.py +++ b/lib/galaxy/model/store/ro_crate_utils.py @@ -43,8 +43,10 @@ class WorkflowRunCrateProfileBuilder: + from galaxy.tools import ToolBox def __init__(self, model_store: Any): self.model_store = model_store + self.toolbox: ToolBox self.invocation: WorkflowInvocation = model_store.included_invocations[0] self.workflow: Workflow = self.invocation.workflow self.param_type_mapping = { @@ -322,7 +324,6 @@ def _add_steps_recursive(self, steps, crate: ROCrate, step_entities, position): def _add_tools(self, crate: ROCrate): tool_entities = [] self._add_tools_recursive(self.workflow.steps, crate, tool_entities) - return tool_entities def _add_tools_recursive(self, steps, crate: ROCrate, tool_entities): """ @@ -347,6 +348,9 @@ def _add_tools_recursive(self, steps, crate: ROCrate, tool_entities): annotations_list = [annotation.annotation for annotation in step.annotations if annotation] tool_description = " ".join(annotations_list) if annotations_list else None + # Retrieve the tool metadata from the toolbox + tool_metadata = self._get_tool_metadata(tool_id) + # Add tool entity to the RO-Crate tool_entity = crate.add( ContextEntity( @@ -358,6 +362,9 @@ def _add_tools_recursive(self, steps, crate: ROCrate, tool_entities): "version": tool_version, "description": tool_description, "url": "https://toolshed.g2.bx.psu.edu", # URL if relevant + "citation": tool_metadata['citation'], + "identifier": tool_metadata['xref'], + "EDAM operation": tool_metadata['edam_operation'], }, ) ) @@ -375,6 +382,34 @@ def _add_tools_recursive(self, steps, crate: ROCrate, tool_entities): if subworkflow: self._add_tools_recursive(subworkflow.steps, crate, tool_entities) + def _get_tool_metadata(self, tool_id: str): + + """ + Retrieve the tool metadata (citations, xrefs, EDAM operations) using the ToolBox. + + Args: + toolbox (ToolBox): An instance of the Galaxy ToolBox. + tool_id (str): The ID of the tool to retrieve metadata for. + + Returns: + dict: A dictionary containing citations, xrefs, and EDAM operations for the tool. + """ + tool = toolbox.get_tool(tool_id) + if not tool: + return None + + # Extracting relevant metadata from the tool object + citation = tool.citation if tool.citation else None + xref = tool.xref if tool.xref else None + edam_operation = tool.edam_operation if tool.edam_operation else None + + return { + "citation": citation, + "xref": xref, + "edam_operation": edam_operation, + } + + def _add_create_action(self, crate: ROCrate): """ Adds the CreateAction indicating the workflow invocation. From 8ab241b0c6156ce16e9aac809edbeb714f5bc638 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marie=20Joss=C3=A9?= <84919248+Marie59@users.noreply.github.com> Date: Wed, 16 Oct 2024 11:08:28 +0200 Subject: [PATCH 16/17] fix citations, edam, xrefs adding --- lib/galaxy/model/store/ro_crate_utils.py | 81 ++++++++++++++++-------- 1 file changed, 53 insertions(+), 28 deletions(-) diff --git a/lib/galaxy/model/store/ro_crate_utils.py b/lib/galaxy/model/store/ro_crate_utils.py index c13b5136f82b..730beffd287e 100644 --- a/lib/galaxy/model/store/ro_crate_utils.py +++ b/lib/galaxy/model/store/ro_crate_utils.py @@ -43,10 +43,10 @@ class WorkflowRunCrateProfileBuilder: - from galaxy.tools import ToolBox + # from galaxy.tools import ToolBox def __init__(self, model_store: Any): self.model_store = model_store - self.toolbox: ToolBox + self.toolbox = self.model_store.app.toolbox self.invocation: WorkflowInvocation = model_store.included_invocations[0] self.workflow: Workflow = self.invocation.workflow self.param_type_mapping = { @@ -228,41 +228,46 @@ def _add_workflows(self, crate: ROCrate): # Adding multiple creators if available if self.workflow.creator_metadata: for creator_data in self.workflow.creator_metadata: - if creator_data.get('class') == 'Person': + if creator_data.get("class") == "Person": # Create the person entity creator_entity = crate.add( ContextEntity( crate, - creator_data.get('identifier', ''), # Default to empty string if identifier is missing + creator_data.get("identifier", ""), # Default to empty string if identifier is missing properties={ "@type": "Person", - "name": creator_data.get('name', ''), # Default to empty string if name is missing - "orcid": creator_data.get('identifier', ''), # Assuming identifier is ORCID, or adjust as needed - "url": creator_data.get('url', ''), # Add URL if available, otherwise empty string - "email": creator_data.get('email', ''), # Add email if available, otherwise empty string + "name": creator_data.get("name", ""), # Default to empty string if name is missing + "orcid": creator_data.get( + "identifier", "" + ), # Assuming identifier is ORCID, or adjust as needed + "url": creator_data.get("url", ""), # Add URL if available, otherwise empty string + "email": creator_data.get( + "email", "" + ), # Add email if available, otherwise empty string }, ) ) # Append the person creator entity to the mainEntity crate.mainEntity.append_to("creator", creator_entity) - elif creator_data.get('class') == 'Organization': + elif creator_data.get("class") == "Organization": # Create the organization entity organization_entity = crate.add( ContextEntity( crate, - creator_data.get('url', ''), # Use URL as identifier if available, otherwise empty string + creator_data.get( + "url", "" + ), # Use URL as identifier if available, otherwise empty string properties={ "@type": "Organization", - "name": creator_data.get('name', ''), # Default to empty string if name is missing - "url": creator_data.get('url', ''), # Add URL if available, otherwise empty string + "name": creator_data.get("name", ""), # Default to empty string if name is missing + "url": creator_data.get("url", ""), # Add URL if available, otherwise empty string }, ) ) # Append the organization entity to the mainEntity crate.mainEntity.append_to("creator", organization_entity) - # Add CWL workflow entity if exists crate.mainEntity["subjectOf"] = cwl_wf @@ -282,7 +287,7 @@ def _add_steps(self, crate: ROCrate): def _add_steps_recursive(self, steps, crate: ROCrate, step_entities, position): """ - Recursively add HowToStep entities from workflow steps, ensuring that + Recursively add HowToStep entities from workflow steps, ensuring that the position index is maintained across subworkflows. """ for step in steps: @@ -362,9 +367,9 @@ def _add_tools_recursive(self, steps, crate: ROCrate, tool_entities): "version": tool_version, "description": tool_description, "url": "https://toolshed.g2.bx.psu.edu", # URL if relevant - "citation": tool_metadata['citation'], - "identifier": tool_metadata['xref'], - "EDAM operation": tool_metadata['edam_operation'], + "citation": tool_metadata["citations"], + "identifier": tool_metadata["xrefs"], + "EDAM operation": tool_metadata["edam_operations"], }, ) ) @@ -383,7 +388,6 @@ def _add_tools_recursive(self, steps, crate: ROCrate, tool_entities): self._add_tools_recursive(subworkflow.steps, crate, tool_entities) def _get_tool_metadata(self, tool_id: str): - """ Retrieve the tool metadata (citations, xrefs, EDAM operations) using the ToolBox. @@ -394,22 +398,43 @@ def _get_tool_metadata(self, tool_id: str): Returns: dict: A dictionary containing citations, xrefs, and EDAM operations for the tool. """ - tool = toolbox.get_tool(tool_id) + tool = self.toolbox.get_tool(tool_id) if not tool: return None # Extracting relevant metadata from the tool object - citation = tool.citation if tool.citation else None - xref = tool.xref if tool.xref else None - edam_operation = tool.edam_operation if tool.edam_operation else None + citations = [] + if tool.citations: + for citation in tool.citations: + citations.append( + { + "type": citation.type, # e.g., "doi" or "bibtex" + "value": citation.value, # The actual DOI, BibTeX, etc. + } + ) + + xrefs = [] + if tool.xrefs: + for xref in tool.xrefs: + xrefs.append( + { + "type": xref.type, # e.g., "registry", "repository", etc. + "value": xref.value, # The identifier or link + } + ) + + # Handling EDAM operations, which are simple values in your XML + edam_operations = [] + if tool.edam_operations: + for operation in tool.edam_operations: + edam_operations.append({"value": operation}) # Extract the operation code (e.g., "operation_3482") return { - "citation": citation, - "xref": xref, - "edam_operation": edam_operation, + "citations": citations, # List of structured citation entries + "xrefs": xrefs, # List of structured xref entries + "edam_operations": edam_operations, # List of structured EDAM operations } - def _add_create_action(self, crate: ROCrate): """ Adds the CreateAction indicating the workflow invocation. @@ -423,8 +448,8 @@ def _add_create_action(self, crate: ROCrate): "name": self.workflow.name, "startTime": self.invocation.workflow.create_time.isoformat(), "endTime": self.invocation.workflow.update_time.isoformat(), - "instrument": {"@id": crate.mainEntity["@id"]} - } + "instrument": {"@id": crate.mainEntity["@id"]}, + }, ) ) crate.root_dataset.append_to("mentions", self.create_action) From 0762d9905ec7d42d957b23f1c530f5dbbc60711c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marie=20Joss=C3=A9?= <84919248+Marie59@users.noreply.github.com> Date: Wed, 16 Oct 2024 12:01:44 +0200 Subject: [PATCH 17/17] add toolbox check in export ro-crate --- test/unit/data/model/test_model_store.py | 274 +++++++++-------------- 1 file changed, 100 insertions(+), 174 deletions(-) diff --git a/test/unit/data/model/test_model_store.py b/test/unit/data/model/test_model_store.py index 80dd3789ef1b..f247e378be18 100644 --- a/test/unit/data/model/test_model_store.py +++ b/test/unit/data/model/test_model_store.py @@ -1,5 +1,4 @@ """Unit tests for importing and exporting data from model stores.""" - import json import os import pathlib @@ -17,8 +16,8 @@ import pytest from rocrate.rocrate import ROCrate -from sqlalchemy import select from sqlalchemy.orm.scoping import scoped_session +from unittest.mock import MagicMock from galaxy import model from galaxy.model import store @@ -44,7 +43,6 @@ TESTCASE_DIRECTORY = pathlib.Path(__file__).parent TEST_PATH_1 = TESTCASE_DIRECTORY / "1.txt" TEST_PATH_2 = TESTCASE_DIRECTORY / "2.bed" -TEST_PATH_2_CONVERTED = TESTCASE_DIRECTORY / "2.txt" DEFAULT_OBJECT_STORE_BY = "id" @@ -75,7 +73,9 @@ def test_import_export_history_hidden_false_with_hidden_dataset(): u, h, d1, d2, j = _setup_simple_cat_job(app) d2.visible = False - app.commit() + session = app.model.session + with transaction(session): + session.commit() imported_history = _import_export_history(app, h, export_files="copy", include_hidden=False) assert d1.dataset.get_size() == imported_history.datasets[0].get_size() @@ -87,7 +87,9 @@ def test_import_export_history_hidden_true_with_hidden_dataset(): u, h, d1, d2, j = _setup_simple_cat_job(app) d2.visible = False - app.commit() + session = app.model.session + with transaction(session): + session.commit() imported_history = _import_export_history(app, h, export_files="copy", include_hidden=True) assert d1.dataset.get_size() == imported_history.datasets[0].get_size() @@ -122,75 +124,6 @@ def test_import_export_history_allow_discarded_data(): assert imported_job.output_datasets[0].dataset == datasets[1] -def test_import_export_history_with_implicit_conversion(): - app = _mock_app() - - u, h, d1, d2, j = _setup_simple_cat_job(app) - - convert_ext = "fasta" - implicit_hda = model.HistoryDatasetAssociation(extension=convert_ext, create_dataset=True, flush=False, history=h) - implicit_hda.hid = d2.hid - # this adds and flushes the result... - d2.attach_implicitly_converted_dataset(app.model.context, implicit_hda, convert_ext) - app.object_store.update_from_file(implicit_hda.dataset, file_name=TEST_PATH_2_CONVERTED, create=True) - - assert len(h.active_datasets) == 3 - imported_history = _import_export_history(app, h, export_files="copy", include_hidden=True) - - assert len(imported_history.active_datasets) == 3 - recovered_hda_2 = imported_history.active_datasets[1] - assert recovered_hda_2.implicitly_converted_datasets - imported_conversion = recovered_hda_2.implicitly_converted_datasets[0] - assert imported_conversion.type == "fasta" - assert imported_conversion.dataset == imported_history.active_datasets[2] - - # implicit conversions have the same HID... ensure this property is recovered... - assert imported_history.active_datasets[2].hid == imported_history.active_datasets[1].hid - - -def test_import_export_history_with_implicit_conversion_and_extra_files(): - app = _mock_app() - - u, h, d1, d2, j = _setup_simple_cat_job(app) - - convert_ext = "fasta" - implicit_hda = model.HistoryDatasetAssociation(extension=convert_ext, create_dataset=True, flush=False, history=h) - implicit_hda.hid = d2.hid - # this adds and flushes the result... - d2.attach_implicitly_converted_dataset(app.model.context, implicit_hda, convert_ext) - app.object_store.update_from_file(implicit_hda.dataset, file_name=TEST_PATH_2_CONVERTED, create=True) - - d2.dataset.create_extra_files_path() - implicit_hda.dataset.create_extra_files_path() - - app.write_primary_file(d2, "cool primary file 1") - app.write_composite_file(d2, "cool composite file", "child_file") - - app.write_primary_file(implicit_hda, "cool primary file implicit") - app.write_composite_file(implicit_hda, "cool composite file implicit", "child_file_converted") - - assert len(h.active_datasets) == 3 - imported_history = _import_export_history(app, h, export_files="copy", include_hidden=True) - - assert len(imported_history.active_datasets) == 3 - recovered_hda_2 = imported_history.active_datasets[1] - assert recovered_hda_2.implicitly_converted_datasets - imported_conversion = recovered_hda_2.implicitly_converted_datasets[0] - assert imported_conversion.type == "fasta" - assert imported_conversion.dataset == imported_history.active_datasets[2] - - # implicit conversions have the same HID... ensure this property is recovered... - assert imported_history.active_datasets[2].hid == imported_history.active_datasets[1].hid - - _assert_extra_files_has_parent_directory_with_single_file_containing( - imported_history.active_datasets[1], "child_file", "cool composite file" - ) - - _assert_extra_files_has_parent_directory_with_single_file_containing( - imported_history.active_datasets[2], "child_file_converted", "cool composite file implicit" - ) - - def test_import_export_bag_archive(): """Test a simple job import/export using a BagIt archive.""" dest_parent = mkdtemp() @@ -263,9 +196,9 @@ def test_import_library_from_dict(): perform_import_from_store_dict(fixture_context, import_dict, import_options=import_options) sa_session = fixture_context.sa_session - all_libraries = sa_session.scalars(select(model.Library)).all() + all_libraries = sa_session.query(model.Library).all() assert len(all_libraries) == 1, len(all_libraries) - all_lddas = sa_session.scalars(select(model.LibraryDatasetDatasetAssociation)).all() + all_lddas = sa_session.query(model.LibraryDatasetDatasetAssociation).all() assert len(all_lddas) == 1, len(all_lddas) @@ -318,7 +251,8 @@ def test_import_library_require_permissions(): root_folder = model.LibraryFolder(name="my library 1", description="folder description") library.root_folder = root_folder sa_session.add_all((library, root_folder)) - app.commit() + with transaction(sa_session): + sa_session.commit() temp_directory = mkdtemp() with store.DirectoryModelExportStore(temp_directory, app=app) as export_store: @@ -346,7 +280,8 @@ def test_import_export_library(): root_folder = model.LibraryFolder(name="my library 1", description="folder description") library.root_folder = root_folder sa_session.add_all((library, root_folder)) - app.commit() + with transaction(sa_session): + sa_session.commit() subfolder = model.LibraryFolder(name="sub folder 1", description="sub folder") root_folder.add_folder(subfolder) @@ -360,7 +295,8 @@ def test_import_export_library(): sa_session.add(ld) sa_session.add(ldda) - app.commit() + with transaction(sa_session): + sa_session.commit() assert len(root_folder.datasets) == 1 assert len(root_folder.folders) == 1 @@ -373,9 +309,9 @@ def test_import_export_library(): ) import_model_store.perform_import() - all_libraries = sa_session.scalars(select(model.Library)).all() + all_libraries = sa_session.query(model.Library).all() assert len(all_libraries) == 2, len(all_libraries) - all_lddas = sa_session.scalars(select(model.LibraryDatasetDatasetAssociation)).all() + all_lddas = sa_session.query(model.LibraryDatasetDatasetAssociation).all() assert len(all_lddas) == 2, len(all_lddas) new_library = [lib for lib in all_libraries if lib.id != library.id][0] @@ -402,7 +338,8 @@ def test_import_export_invocation(): sa_session = app.model.context h2 = model.History(user=workflow_invocation.user) sa_session.add(h2) - app.commit() + with transaction(sa_session): + sa_session.commit() import_model_store = store.get_import_model_store_for_directory( temp_directory, app=app, user=workflow_invocation.user, import_options=store.ImportOptions() @@ -545,21 +482,6 @@ def validate_invocation_collection_crate_directory(crate_directory): assert dataset in root["hasPart"] -def test_export_history_with_missing_hid(): - # The dataset's hid was used to compose the file name during the export but it - # can be missing sometimes. We now use the dataset's encoded id instead. - app = _mock_app() - u, history, d1, d2, j = _setup_simple_cat_job(app) - - # Remove hid from d1 - d1.hid = None - app.commit() - - temp_directory = mkdtemp() - with store.DirectoryModelExportStore(temp_directory, app=app, export_files="copy") as export_store: - export_store.export_history(history) - - def test_export_history_to_ro_crate(tmp_path): app = _mock_app() u, history, d1, d2, j = _setup_simple_cat_job(app) @@ -580,11 +502,26 @@ def test_export_invocation_to_ro_crate(tmp_path): def test_export_simple_invocation_to_ro_crate(tmp_path): + # Mock the app, which includes a mock toolbox app = _mock_app() + + # Mock the toolbox behavior if needed + mock_tool = MagicMock() + mock_tool.id = "test_tool" + mock_tool.version = "1.0" + app.toolbox.get_tool.return_value = mock_tool # Simulate fetching a tool from the toolbox + + # Set up a simple workflow invocation workflow_invocation = _setup_simple_invocation(app) + + # Create a directory to export the RO-Crate to crate_directory = tmp_path / "crate" + + # Export the workflow invocation to the RO-Crate with store.ROCrateModelExportStore(crate_directory, app=app) as export_store: export_store.export_workflow_invocation(workflow_invocation) + + # Validate the exported crate validate_invocation_crate_directory(crate_directory) @@ -693,7 +630,9 @@ def test_import_export_edit_collection(): sa_session.add(hc1) sa_session.add(h) import_history = model.History(name="Test History for Import", user=u) - app.add_and_commit(import_history) + sa_session.add(import_history) + with transaction(sa_session): + sa_session.commit() temp_directory = mkdtemp() with store.DirectoryModelExportStore(temp_directory, app=app, for_edit=True) as export_store: @@ -766,38 +705,48 @@ def test_import_export_composite_datasets(): d1 = _create_datasets(sa_session, h, 1, extension="html")[0] d1.dataset.create_extra_files_path() - app.add_and_commit(h, d1) - - app.write_primary_file(d1, "cool primary file") - app.write_composite_file(d1, "cool composite file", "child_file") + sa_session.add_all((h, d1)) + with transaction(sa_session): + sa_session.commit() + + primary = NamedTemporaryFile("w") + primary.write("cool primary file") + primary.flush() + app.object_store.update_from_file(d1.dataset, file_name=primary.name, create=True, preserve_symlinks=True) + + composite1 = NamedTemporaryFile("w") + composite1.write("cool composite file") + composite1.flush() + + app.object_store.update_from_file( + d1.dataset, + extra_dir=os.path.normpath(os.path.join(d1.extra_files_path, "parent_dir")), + alt_name="child_file", + file_name=composite1.name, + create=True, + preserve_symlinks=True, + ) temp_directory = mkdtemp() with store.DirectoryModelExportStore(temp_directory, app=app, export_files="copy") as export_store: export_store.add_dataset(d1) import_history = model.History(name="Test History for Import", user=u) - app.add_and_commit(import_history) + sa_session.add(import_history) + with transaction(sa_session): + sa_session.commit() _perform_import_from_directory(temp_directory, app, u, import_history) assert len(import_history.datasets) == 1 import_dataset = import_history.datasets[0] - _assert_extra_files_has_parent_directory_with_single_file_containing( - import_dataset, "child_file", "cool composite file" - ) - - -def _assert_extra_files_has_parent_directory_with_single_file_containing( - dataset, expected_file_name, expected_contents -): - root_extra_files_path = dataset.extra_files_path + root_extra_files_path = import_dataset.extra_files_path assert len(os.listdir(root_extra_files_path)) == 1 assert os.listdir(root_extra_files_path)[0] == "parent_dir" composite_sub_dir = os.path.join(root_extra_files_path, "parent_dir") child_files = os.listdir(composite_sub_dir) assert len(child_files) == 1 - assert child_files[0] == expected_file_name with open(os.path.join(composite_sub_dir, child_files[0])) as f: contents = f.read() - assert contents == expected_contents + assert contents == "cool composite file" def test_edit_metadata_files(): @@ -808,7 +757,9 @@ def test_edit_metadata_files(): h = model.History(name="Test History", user=u) d1 = _create_datasets(sa_session, h, 1, extension="bam")[0] - app.add_and_commit(h, d1) + sa_session.add_all((h, d1)) + with transaction(sa_session): + sa_session.commit() index = NamedTemporaryFile("w") index.write("cool bam index") metadata_dict = {"bam_index": MetadataTempFile.from_JSON({"kwds": {}, "filename": index.name})} @@ -823,7 +774,9 @@ def test_edit_metadata_files(): export_store.add_dataset(d1) import_history = model.History(name="Test History for Import", user=u) - app.add_and_commit(import_history) + sa_session.add(import_history) + with transaction(sa_session): + sa_session.commit() _perform_import_from_directory(temp_directory, app, u, import_history, store.ImportOptions(allow_edit=True)) @@ -842,21 +795,6 @@ def test_sessionless_import_edit_datasets(): assert d2 is not None -def test_import_job_with_output_copy(): - app, h, temp_directory, import_history = _setup_simple_export({"for_edit": True}) - hda = h.active_datasets[-1] - # Simulate a copy being made of an output hda - copy = hda.copy(new_name="output copy") - # set extension to auto, should be changed to real extension when finalizing job - copy.extension = "auto" - app.add_and_commit(copy) - import_model_store = store.get_import_model_store_for_directory( - temp_directory, import_options=store.ImportOptions(allow_dataset_object_edit=True, allow_edit=True), app=app - ) - import_model_store.perform_import() - assert copy.extension == "txt" - - def test_import_datasets_with_ids_fails_if_not_editing_models(): app, h, temp_directory, import_history = _setup_simple_export({"for_edit": True}) u = h.user @@ -875,8 +813,12 @@ def _setup_simple_export(export_kwds): u, h, d1, d2, j = _setup_simple_cat_job(app) + sa_session = app.model.context + import_history = model.History(name="Test History for Import", user=u) - app.add_and_commit(import_history) + sa_session.add(import_history) + with transaction(sa_session): + sa_session.commit() temp_directory = mkdtemp() with store.DirectoryModelExportStore(temp_directory, app=app, **export_kwds) as export_store: @@ -901,9 +843,9 @@ def _assert_simple_cat_job_imported(imported_history, state="ok"): assert imported_job.input_datasets assert imported_job.input_datasets[0].dataset == datasets[0] - with open(datasets[0].get_file_name()) as f: + with open(datasets[0].file_name) as f: assert f.read().startswith("chr1 4225 19670") - with open(datasets[1].get_file_name()) as f: + with open(datasets[1].file_name) as f: assert f.read().startswith("chr1\t147962192\t147962580\tNM_005997_cds_0_0_chr1_147962193_r\t0\t-") @@ -924,7 +866,9 @@ def _setup_simple_cat_job(app, state="ok"): j.add_input_dataset("input1", d1) j.add_output_dataset("out_file1", d2) - app.add_and_commit(d1, d2, h, j) + sa_session.add_all((d1, d2, h, j)) + with transaction(sa_session): + sa_session.commit() app.object_store.update_from_file(d1, file_name=TEST_PATH_1, create=True) app.object_store.update_from_file(d2, file_name=TEST_PATH_2, create=True) @@ -959,7 +903,9 @@ def _setup_invocation(app): workflow_invocation.add_input(d1, step=workflow_step_1) wf_output = model.WorkflowOutput(workflow_step_1, label="output_label") workflow_invocation.add_output(wf_output, workflow_step_1, d2) - app.add_and_commit(workflow_invocation) + sa_session.add(workflow_invocation) + with transaction(sa_session): + sa_session.commit() return workflow_invocation @@ -1005,7 +951,8 @@ def _setup_simple_collection_job(app, state="ok"): sa_session.add(hc2) sa_session.add(hc3) sa_session.add(j) - app.commit() + with transaction(sa_session): + sa_session.commit() return u, h, c1, c2, c3, hc1, hc2, hc3, j @@ -1018,7 +965,7 @@ def _setup_collection_invocation(app): workflow_step_1 = model.WorkflowStep() workflow_step_1.order_index = 0 workflow_step_1.type = "data_collection_input" - workflow_step_1.tool_inputs = {} # type:ignore[assignment] + workflow_step_1.tool_inputs = {} sa_session.add(workflow_step_1) workflow_1 = _workflow_from_steps(u, [workflow_step_1]) workflow_1.license = "MIT" @@ -1031,7 +978,9 @@ def _setup_collection_invocation(app): wf_output = model.WorkflowOutput(workflow_step_1, label="output_label") workflow_invocation.add_output(wf_output, workflow_step_1, hc3) - app.add_and_commit(workflow_invocation) + sa_session.add(workflow_invocation) + with transaction(sa_session): + sa_session.commit() return workflow_invocation @@ -1044,7 +993,7 @@ def _setup_simple_invocation(app): workflow_step_1 = model.WorkflowStep() workflow_step_1.order_index = 0 workflow_step_1.type = "data_input" - workflow_step_1.tool_inputs = {} # type:ignore[assignment] + workflow_step_1.tool_inputs = {} sa_session.add(workflow_step_1) workflow = _workflow_from_steps(u, [workflow_step_1]) workflow.license = "MIT" @@ -1110,45 +1059,20 @@ def read_workflow_from_path(self, app, user, path, allow_in_directory=None): workflow = model.Workflow() workflow.steps = [workflow_step_1] stored_workflow.latest_workflow = workflow - app.add_and_commit(stored_workflow, workflow) + sa_session = app.model.context + sa_session.add_all((stored_workflow, workflow)) + with transaction(sa_session): + sa_session.commit() return workflow class TestApp(GalaxyDataTestApp): workflow_contents_manager = MockWorkflowContentsManager() - def add_and_commit(self, *objs): - session = self.model.session - session.add_all(objs) - self.commit() - - def commit(self): - session = self.model.session - with transaction(session): - session.commit() - - def write_primary_file(self, dataset_instance, contents): - primary = NamedTemporaryFile("w") - primary.write(contents) - primary.flush() - self.object_store.update_from_file( - dataset_instance.dataset, file_name=primary.name, create=True, preserve_symlinks=True - ) - - def write_composite_file(self, dataset_instance, contents, file_name): - composite1 = NamedTemporaryFile("w") - composite1.write(contents) - composite1.flush() - - dataset_instance.dataset.create_extra_files_path() - self.object_store.update_from_file( - dataset_instance.dataset, - extra_dir=os.path.normpath(os.path.join(dataset_instance.extra_files_path, "parent_dir")), - alt_name=file_name, - file_name=composite1.name, - create=True, - preserve_symlinks=True, - ) + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + # Add a mock toolbox to the test app + self.toolbox = MagicMock() def _mock_app(store_by=DEFAULT_OBJECT_STORE_BY): @@ -1187,7 +1111,8 @@ def setup_fixture_context_with_history( app, sa_session, user = setup_fixture_context_with_user(**kwd) history = model.History(name=history_name, user=user) sa_session.add(history) - app.commit() + with transaction(sa_session): + sa_session.commit() return StoreFixtureContextWithHistory(app, sa_session, user, history) @@ -1215,6 +1140,7 @@ def import_archive(archive_path, app, user, import_options=None): dest_dir = CompressedFile(archive_path).extract(dest_parent) import_options = import_options or store.ImportOptions() + new_history = None model_store = store.get_import_model_store_for_directory( dest_dir, app=app,