Merge pull request #18036 from mvdbeek/implicit_conversion_no_hid_fix

[24.0] Fix History Dataset Association creation so that hid is always set
galaxyproject · Apr 23, 2024 · b97825b · b97825b
2 parents 7722cf0 + 121f27e
commit b97825b
Show file tree

Hide file tree

Showing 12 changed files with 57 additions and 68 deletions.
diff --git a/client/src/api/schema/schema.ts b/client/src/api/schema/schema.ts
@@ -3751,7 +3751,7 @@ export interface components {
              * Element Type
              * @description The type of the element. Used to interpret the `object` field.
              */
-            element_type: components["schemas"]["DCEType"];
+            element_type?: components["schemas"]["DCEType"] | null;
             /**
              * Dataset Collection Element ID
              * @example 0123456789ABCDEF
@@ -3767,10 +3767,11 @@ export interface components {
              * Object
              * @description The element's specific data depending on the value of `element_type`.
              */
-            object:
+            object?:
                 | components["schemas"]["HDAObject"]
                 | components["schemas"]["HDADetailed"]
-                | components["schemas"]["DCObject"];
+                | components["schemas"]["DCObject"]
+                | null;
         };
         /**
          * DCEType

diff --git a/lib/galaxy/datatypes/data.py b/lib/galaxy/datatypes/data.py
@@ -842,7 +842,11 @@ def convert_dataset(
         # Make the target datatype available to the converter
         params["__target_datatype__"] = target_type
         # Run converter, job is dispatched through Queue
-        job, converted_datasets, *_ = converter.execute(trans, incoming=params, set_output_hid=visible, history=history)
+        job, converted_datasets, *_ = converter.execute(
+            trans, incoming=params, set_output_hid=visible, history=history, flush_job=False
+        )
+        for converted_dataset in converted_datasets.values():
+            original_dataset.attach_implicitly_converted_dataset(trans.sa_session, converted_dataset, target_type)
         trans.app.job_manager.enqueue(job, tool=converter)
         if len(params) > 0:
             trans.log_event(f"Converter params: {str(params)}", tool_id=converter.id)

diff --git a/lib/galaxy/datatypes/display_applications/parameters.py b/lib/galaxy/datatypes/display_applications/parameters.py
@@ -11,7 +11,6 @@
 
 from galaxy.datatypes.data import Data
 from galaxy.model import DatasetInstance
-from galaxy.model.base import transaction
 from galaxy.schema.schema import DatasetState
 from galaxy.util import string_as_bool
 from galaxy.util.template import fill_template
@@ -182,22 +181,7 @@ def prepare(self, other_values, dataset_hash, user_hash, trans):
                 if target_ext and not converted_dataset:
                     if isinstance(data, DisplayDataValueWrapper):
                         data = data.value
-                    new_data = next(
-                        iter(
-                            data.datatype.convert_dataset(
-                                trans, data, target_ext, return_output=True, visible=False
-                            ).values()
-                        )
-                    )
-                    new_data.hid = data.hid
-                    new_data.name = data.name
-                    trans.sa_session.add(new_data)
-                    assoc = trans.app.model.ImplicitlyConvertedDatasetAssociation(
-                        parent=data, file_type=target_ext, dataset=new_data, metadata_safe=False
-                    )
-                    trans.sa_session.add(assoc)
-                    with transaction(trans.sa_session):
-                        trans.sa_session.commit()
+                    data.datatype.convert_dataset(trans, data, target_ext, return_output=True, visible=False)
                 elif converted_dataset and converted_dataset.state == DatasetState.ERROR:
                     raise Exception(f"Dataset conversion failed for data parameter: {self.name}")
         return self.get_value(other_values, dataset_hash, user_hash, trans)

diff --git a/lib/galaxy/datatypes/protocols.py b/lib/galaxy/datatypes/protocols.py
@@ -85,5 +85,7 @@ def has_data(self) -> bool: ...
 
     def set_peek(self) -> None: ...
 
+    def attach_implicitly_converted_dataset(self, session, new_dataset, target_ext: str) -> None: ...
+
 
 class DatasetHasHidProtocol(DatasetProtocol, HasHid, Protocol): ...
diff --git a/lib/galaxy/managers/collections.py b/lib/galaxy/managers/collections.py
@@ -95,7 +95,7 @@ def precreate_dataset_collection_instance(
         # TODO: prebuild all required HIDs and send them in so no need to flush in between.
         dataset_collection = self.precreate_dataset_collection(
             structure,
-            allow_unitialized_element=implicit_output_name is not None,
+            allow_uninitialized_element=implicit_output_name is not None,
             completed_collection=completed_collection,
             implicit_output_name=implicit_output_name,
         )
@@ -112,10 +112,10 @@ def precreate_dataset_collection_instance(
         return instance
 
     def precreate_dataset_collection(
-        self, structure, allow_unitialized_element=True, completed_collection=None, implicit_output_name=None
+        self, structure, allow_uninitialized_element=True, completed_collection=None, implicit_output_name=None
     ):
         has_structure = not structure.is_leaf and structure.children_known
-        if not has_structure and allow_unitialized_element:
+        if not has_structure and allow_uninitialized_element:
             dataset_collection = model.DatasetCollectionElement.UNINITIALIZED_ELEMENT
         elif not has_structure:
             collection_type_description = structure.collection_type_description
@@ -143,7 +143,7 @@ def precreate_dataset_collection(
                         element = model.DatasetCollectionElement.UNINITIALIZED_ELEMENT
                     else:
                         element = self.precreate_dataset_collection(
-                            substructure, allow_unitialized_element=allow_unitialized_element
+                            substructure, allow_uninitialized_element=allow_uninitialized_element
                         )
 
                 element = model.DatasetCollectionElement(

diff --git a/lib/galaxy/managers/hdas.py b/lib/galaxy/managers/hdas.py
@@ -202,7 +202,7 @@ def copy(
         if not isinstance(item, model.HistoryDatasetAssociation):
             raise TypeError()
         hda = item
-        copy = hda.copy(parent_id=kwargs.get("parent_id"), copy_hid=False, copy_tags=hda.tags, flush=flush)
+        copy = hda.copy(parent_id=kwargs.get("parent_id"), copy_hid=False, copy_tags=hda.tags, flush=False)
         if hide_copy:
             copy.visible = False
         if history:
@@ -221,12 +221,6 @@ def copy(
 
         return copy
 
-    def copy_ldda(self, history, ldda, **kwargs):
-        """
-        Copy this HDA as a LDDA and return.
-        """
-        return ldda.to_history_dataset_association(history, add_to_history=True)
-
     # .... deletion and purging
     def purge(self, hda, flush=True, **kwargs):
         if self.app.config.enable_celery_tasks:
@@ -561,6 +555,7 @@ def add_serializers(self):
         annotatable.AnnotatableSerializerMixin.add_serializers(self)
 
         serializers: Dict[str, base.Serializer] = {
+            "hid": lambda item, key, **context: item.hid if item.hid is not None else -1,
             "model_class": lambda item, key, **context: "HistoryDatasetAssociation",
             "history_content_type": lambda item, key, **context: "dataset",
             "hda_ldda": lambda item, key, **context: "hda",

diff --git a/lib/galaxy/model/__init__.py b/lib/galaxy/model/__init__.py
@@ -4736,7 +4736,7 @@ def get_converted_dataset(self, trans, target_ext, target_context=None, history=
             raise NoConverterException(f"A dependency ({dependency}) is missing a converter.")
         except KeyError:
             pass  # No deps
-        new_dataset = next(
+        return next(
             iter(
                 self.datatype.convert_dataset(
                     trans,
@@ -4750,7 +4750,6 @@ def get_converted_dataset(self, trans, target_ext, target_context=None, history=
                 ).values()
             )
         )
-        return self.attach_implicitly_converted_dataset(trans.sa_session, new_dataset, target_ext)
 
     def attach_implicitly_converted_dataset(self, session, new_dataset, target_ext: str):
         new_dataset.name = self.name
@@ -4760,9 +4759,6 @@ def attach_implicitly_converted_dataset(self, session, new_dataset, target_ext:
         )
         session.add(new_dataset)
         session.add(assoc)
-        with transaction(session):
-            session.commit()
-        return new_dataset
 
     def copy_attributes(self, new_dataset):
         """
@@ -5134,7 +5130,8 @@ def copy_tags_to(self, copy_tags=None):
                 self.tags.append(copied_tag)
 
     def copy_attributes(self, new_dataset):
-        new_dataset.hid = self.hid
+        if new_dataset.hid is None:
+            new_dataset.hid = self.hid
 
     def to_library_dataset_dataset_association(
         self,
@@ -5821,11 +5818,9 @@ def to_history_dataset_association(self, target_history, parent_id=None, add_to_
 
         tag_manager = galaxy.model.tags.GalaxyTagHandler(sa_session)
         src_ldda_tags = tag_manager.get_tags_str(self.tags)
-        tag_manager.apply_item_tags(user=self.user, item=hda, tags_str=src_ldda_tags)
+        tag_manager.apply_item_tags(user=self.user, item=hda, tags_str=src_ldda_tags, flush=False)
         sa_session.add(hda)
-        with transaction(sa_session):
-            sa_session.commit()
-        hda.metadata = self.metadata  # need to set after flushed, as MetadataFiles require dataset.id
+        hda.metadata = self.metadata
         if add_to_history and target_history:
             target_history.add_dataset(hda)
         with transaction(sa_session):
@@ -6362,6 +6357,8 @@ def has_deferred_data(self):
     @property
     def populated_optimized(self):
         if not hasattr(self, "_populated_optimized"):
+            if not self.id:
+                return self.populated
             _populated_optimized = True
             if ":" not in self.collection_type:
                 _populated_optimized = self.populated_state == DatasetCollection.populated_states.OK
@@ -7149,7 +7146,8 @@ def __init__(
         self.element_identifier = element_identifier or str(element_index)
 
     def __strict_check_before_flush__(self):
-        assert self.element_object, "Dataset Collection Element without child entity detected, this is not valid"
+        if self.collection.populated_optimized:
+            assert self.element_object, "Dataset Collection Element without child entity detected, this is not valid"
 
     @property
     def element_type(self):

diff --git a/lib/galaxy/model/base.py b/lib/galaxy/model/base.py
@@ -174,18 +174,32 @@ def versioned_objects_strict(iter):
             yield obj
 
 
-if os.environ.get("GALAXY_TEST_RAISE_EXCEPTION_ON_HISTORYLESS_HDA"):
-    log.debug("Using strict flush checks")
-    versioned_objects = versioned_objects_strict  # noqa: F811
+def get_before_flush_handler():
+    if os.environ.get("GALAXY_TEST_RAISE_EXCEPTION_ON_HISTORYLESS_HDA"):
+        log.debug("Using strict flush checks")
+
+        def before_flush(session, flush_context, instances):
+            for obj in session.new:
+                if hasattr(obj, "__strict_check_before_flush__"):
+                    obj.__strict_check_before_flush__()
+            for obj in versioned_objects_strict(session.dirty):
+                obj.__create_version__(session)
+            for obj in versioned_objects_strict(session.deleted):
+                obj.__create_version__(session, deleted=True)
+
+    else:
+
+        def before_flush(session, flush_context, instances):
+            for obj in versioned_objects(session.dirty):
+                obj.__create_version__(session)
+            for obj in versioned_objects(session.deleted):
+                obj.__create_version__(session, deleted=True)
+
+    return before_flush
 
 
 def versioned_session(session):
-    @event.listens_for(session, "before_flush")
-    def before_flush(session, flush_context, instances):
-        for obj in versioned_objects(session.dirty):
-            obj.__create_version__(session)
-        for obj in versioned_objects(session.deleted):
-            obj.__create_version__(session, deleted=True)
+    event.listens_for(session, "before_flush")(get_before_flush_handler())
 
 
 def ensure_object_added_to_session(object_to_add, *, object_in_session=None, session=None) -> bool:

diff --git a/lib/galaxy/model/deferred.py b/lib/galaxy/model/deferred.py
@@ -165,7 +165,7 @@ def ensure_materialized(
             if transient_paths:
                 metadata_tmp_files_dir = transient_paths.metadata_files_dir
             else:
-                # If metadata_tmp_files_dir is set we generate a MetdataTempFile,
+                # If metadata_tmp_files_dir is set we generate a MetadataTempFile,
                 # which we don't want when we're generating an attached materialized dataset instance
                 metadata_tmp_files_dir = None
             materialized_dataset_instance.set_meta(metadata_tmp_files_dir=metadata_tmp_files_dir)

diff --git a/lib/galaxy/model/metadata.py b/lib/galaxy/model/metadata.py
@@ -670,7 +670,7 @@ def from_external_value(self, value, parent, path_rewriter=None):
                 # directory. Correct.
                 file_name = path_rewriter(file_name)
             mf.update_from_file(file_name)
-            value = mf.id
+            value = str(mf.uuid)
         return value
 
     def to_external_value(self, value):
@@ -692,11 +692,9 @@ def new_file(self, dataset=None, metadata_tmp_files_dir=None, **kwds):
             sa_session = object_session(dataset)
             if sa_session:
                 sa_session.add(mf)
-                with transaction(sa_session):
-                    sa_session.commit()  # commit to assign id
             return mf
         else:
-            # we need to make a tmp file that is accessable to the head node,
+            # we need to make a tmp file that is accessible to the head node,
             # we will be copying its contents into the MetadataFile objects filename after restoring from JSON
             # we do not include 'dataset' in the kwds passed, as from_JSON_value() will handle this for us
             return MetadataTempFile(metadata_tmp_files_dir=metadata_tmp_files_dir, **kwds)

diff --git a/lib/galaxy/schema/schema.py b/lib/galaxy/schema/schema.py
@@ -971,13 +971,13 @@ class DCESummary(Model, WithModelClass):
         title="Element Identifier",
         description="The actual name of this element.",
     )
-    element_type: DCEType = Field(
-        ...,
+    element_type: Optional[DCEType] = Field(
+        None,
         title="Element Type",
         description="The type of the element. Used to interpret the `object` field.",
     )
-    object: Union[HDAObject, HDADetailed, DCObject] = Field(
-        ...,
+    object: Optional[Union[HDAObject, HDADetailed, DCObject]] = Field(
+        None,
         title="Object",
         description="The element's specific data depending on the value of `element_type`.",
     )

diff --git a/test/unit/app/managers/test_HDAManager.py b/test/unit/app/managers/test_HDAManager.py
@@ -133,13 +133,6 @@ def test_copy_from_hda(self):
         hda3_annotation = self.hda_manager.annotation(hda3)
         assert annotation == hda3_annotation
 
-    # def test_copy_from_ldda( self ):
-    #    owner = self.user_manager.create( self.trans, **user2_data )
-    #    history1 = self.history_mgr.create( self.trans, name='history1', user=owner )
-    #
-    #    self.log( "should be able to copy an HDA" )
-    #    hda2 = self.hda_manager.copy_ldda( history1, hda1 )
-
     def test_delete(self):
         owner = self.user_manager.create(**user2_data)
         history1 = self.history_manager.create(name="history1", user=owner)