From a83cf727a039b85a9096e894f5b5a75a51421016 Mon Sep 17 00:00:00 2001 From: brynnz22 Date: Wed, 30 Oct 2024 10:26:21 -0700 Subject: [PATCH 01/18] move has_calibration off of WorkflowExecution subclasses --- ...olomicsAnalysis-invalid-has-slot-used.yaml | 1 - src/data/valid/Database-interleaved.yaml | 2 - src/schema/nmdc.yaml | 89 +++++++++++++++++ src/schema/workflow_execution_activity.yaml | 95 ------------------- 4 files changed, 89 insertions(+), 98 deletions(-) diff --git a/src/data/invalid/MetabolomicsAnalysis-invalid-has-slot-used.yaml b/src/data/invalid/MetabolomicsAnalysis-invalid-has-slot-used.yaml index 29ae53f810..e1e87cb9c3 100644 --- a/src/data/invalid/MetabolomicsAnalysis-invalid-has-slot-used.yaml +++ b/src/data/invalid/MetabolomicsAnalysis-invalid-has-slot-used.yaml @@ -2,7 +2,6 @@ id: nmdc:wfmb-11-547rwq94.1 ended_at_time: '2021-09-15T10:13:20+00:00' execution_resource: NERSC-Cori git_url: https://example.org/WorkflowExecutionActivity -has_calibration: calibration with 0.01% phosphoric acid was_informed_by: nmdc:omprc-11-d8a8da started_at_time: '2021-08-05T14:48:51+00:00' type: nmdc:MetabolomicsAnalysis diff --git a/src/data/valid/Database-interleaved.yaml b/src/data/valid/Database-interleaved.yaml index 649fd03785..f1b093ad66 100644 --- a/src/data/valid/Database-interleaved.yaml +++ b/src/data/valid/Database-interleaved.yaml @@ -3687,7 +3687,6 @@ workflow_execution_set: git_url: https://github.com/microbiomedata/metabolomics_analysis/releases/tag/v0.5.0 was_informed_by: nmdc:omprc-11-di84md started_at_time: '2023-08-02T09:00:00Z' - has_calibration: nmdc:calib-l2k-9d6j3 has_metabolite_identifications: - type: nmdc:MetaboliteIdentification highest_similarity_score: 0.88 @@ -3901,7 +3900,6 @@ workflow_execution_set: git_url: https://github.com/microbiomedata/nom_analysis/releases/tag/v0.3.2 was_informed_by: nmdc:dgms-12-dfa74b started_at_time: '2023-08-08T09:30:00Z' - has_calibration: nmdc:calib-99-v9w6 data_generation_set: - id: nmdc:dgms-99-zUCd5N type: nmdc:MassSpectrometry diff --git a/src/schema/nmdc.yaml b/src/schema/nmdc.yaml index 6fd123e6ec..e2064cca4b 100644 --- a/src/schema/nmdc.yaml +++ b/src/schema/nmdc.yaml @@ -255,6 +255,48 @@ classes: syntax: "{id_nmdc_prefix}:chrcon-{id_shoulder}-{id_blade}$" interpolated: true + CalibrationInformation: + class_uri: nmdc:CalibrationInformation + is_a: InformationObject + description: A calibration object that is associated with a process. + slots: + - calibration_object + - internal_calibration + - calibration_target + - calibration_standard + rules: + - title: calibration_standard_if_rt + description: >- + If the calibration_target is retention_index, a calibration_standard is required. + preconditions: + slot_conditions: + calibration_target: + equals_string: retention_index + postconditions: + slot_conditions: + calibration_standard: + required: true + - title: calibration_object_if_not_internal_calibration + description: >- + If internal_calibration is false, a calibration_object is required. + preconditions: + slot_conditions: + internal_calibration: false + postconditions: + slot_conditions: + calibration_object: + required: true + slot_usage: + internal_calibration: + required: true + calibration_target: + required: true + id: + structured_pattern: + syntax: "{id_nmdc_prefix}:calib-{id_shoulder}-{id_blade}$" + interpolated: true + + FunctionalAnnotationAggMember: class_uri: nmdc:FunctionalAnnotationAggMember slots: @@ -661,7 +703,28 @@ classes: interpolated: true enums: + CalibrationTargetEnum: + permissible_values: + mass_charge_ratio: + title: m/z + aliases: + - Mass + - m/z + retention_time: + aliases: + - RT + retention_index: + aliases: + - RI + CalibrationStandardEnum: + permissible_values: + fames: + aliases: + - FAMES + alkanes: + aliases: + - Alkanes StrandedOrientationEnum: description: This enumeration specifies information about stranded RNA library preparations. @@ -885,6 +948,32 @@ enums: slots: + has_calibration: + any_of: + - range: CalibrationInformation + description: a calibration instance associated with a process + notes: >- + has_calibration slot will be removed from all WorkflowExecution classes but remain on the + MassSpectrometry class after an ingest of the appropriate set has occurred. + Once this has occurred, this slot's range can be updated to CalibrationInformation and class/slot definitions can move to nmdc.yaml. + See PR #29 in Berkeley schema. + + calibration_object: + range: DataObject + description: the file containing calibration data object + + internal_calibration: + range: boolean + description: whether internal calibration was used, if false, external calibration was used + + calibration_target: + range: CalibrationTargetEnum + description: the target measurement of the calibration + + calibration_standard: + range: CalibrationStandardEnum + description: the reference standard(s) used for calibration + polarity_mode: range: PolarityModeEnum description: the polarity of which ions are generated and detected diff --git a/src/schema/workflow_execution_activity.yaml b/src/schema/workflow_execution_activity.yaml index 1e5814121f..5d7fcc1f0d 100644 --- a/src/schema/workflow_execution_activity.yaml +++ b/src/schema/workflow_execution_activity.yaml @@ -264,7 +264,6 @@ classes: in_subset: - workflow subset slots: - - has_calibration - has_metabolite_identifications slot_usage: id: @@ -300,8 +299,6 @@ classes: is_a: WorkflowExecution in_subset: - workflow subset - slots: - - has_calibration slot_usage: id: required: true @@ -313,47 +310,6 @@ classes: syntax: "{id_nmdc_prefix}:(omprc|dgms)-{id_shoulder}-{id_blade}$" interpolated: true - CalibrationInformation: - class_uri: nmdc:CalibrationInformation - is_a: InformationObject - description: A calibration object that is associated with a process. - slots: - - calibration_object - - internal_calibration - - calibration_target - - calibration_standard - rules: - - title: calibration_standard_if_rt - description: >- - If the calibration_target is retention_index, a calibration_standard is required. - preconditions: - slot_conditions: - calibration_target: - equals_string: retention_index - postconditions: - slot_conditions: - calibration_standard: - required: true - - title: calibration_object_if_not_internal_calibration - description: >- - If internal_calibration is false, a calibration_object is required. - preconditions: - slot_conditions: - internal_calibration: false - postconditions: - slot_conditions: - calibration_object: - required: true - slot_usage: - internal_calibration: - required: true - calibration_target: - required: true - id: - structured_pattern: - syntax: "{id_nmdc_prefix}:calib-{id_shoulder}-{id_blade}$" - interpolated: true - slots: metagenome_assembly_parameter: @@ -594,58 +550,7 @@ slots: description: >- TODO - has_calibration: - any_of: - - range: CalibrationInformation - - range: string - description: a calibration instance associated with a process - notes: >- - has_calibration slot will be removed from all WorkflowExecution classes but remain on the - MassSpectrometry class after an ingest of the appropriate set has occurred. - Once this has occurred, this slot's range can be updated to CalibrationInformation and class/slot definitions can move to nmdc.yaml. - See PR #29 in Berkeley schema. - - calibration_object: - range: DataObject - description: the file containing calibration data object - - internal_calibration: - range: boolean - description: whether internal calibration was used, if false, external calibration was used - - calibration_target: - range: CalibrationTargetEnum - description: the target measurement of the calibration - - calibration_standard: - range: CalibrationStandardEnum - description: the reference standard(s) used for calibration - has_metabolite_identifications: range: MetaboliteIdentification multivalued: true inlined_as_list: true - -enums: - CalibrationTargetEnum: - permissible_values: - mass_charge_ratio: - title: m/z - aliases: - - Mass - - m/z - retention_time: - aliases: - - RT - retention_index: - aliases: - - RI - - CalibrationStandardEnum: - permissible_values: - fames: - aliases: - - FAMES - alkanes: - aliases: - - Alkanes From 9eca1f5e9c6ea39de18510e75ef3e599963e75a5 Mon Sep 17 00:00:00 2001 From: brynnz22 Date: Wed, 30 Oct 2024 11:44:17 -0700 Subject: [PATCH 02/18] add first pass at migrator --- .../migrators/migrator_has_calibration.py | 170 ++++++++++++++++++ 1 file changed, 170 insertions(+) create mode 100644 nmdc_schema/migrators/migrator_has_calibration.py diff --git a/nmdc_schema/migrators/migrator_has_calibration.py b/nmdc_schema/migrators/migrator_has_calibration.py new file mode 100644 index 0000000000..ecdf7c0ced --- /dev/null +++ b/nmdc_schema/migrators/migrator_has_calibration.py @@ -0,0 +1,170 @@ +from nmdc_schema.migrators.migrator_base import MigratorBase +import re + + +class Migrator(MigratorBase): + r""" + Migrates a database between two schemas. + + This migrator removes the `has_calibration` field from all documents that represent an instance of + the `NomAnalysis` and 'MetabolomicsAnalysis' class, and moves the information to its corresponding + 'MassSpectrometry` `has_calibration` slot. + + The creation of this migrator was in response to this issue: + https://github.com/microbiomedata/nmdc-schema/issues/2139 + + """ + + _from_version = "11.0.3" + _to_version = "11.1.0" + + def upgrade(self) -> None: + r""" + Migrates the database from conforming to the original schema, to conforming to the new schema. + + >>> from nmdc_schema.migrators.adapters.dictionary_adapter import DictionaryAdapter + >>> db = { + ... 'workflow_execution_set': [ + ... {'id': 'nmdc:wfx1', 'has_calibration': 'nmdc:dobj-13-abc123', 'was_informed_by': 'nmdc:dgen1', 'type': 'nmdc:MetabolomicsAnalysis'}, + ... {'id': 'nmdc:wfx2', 'has_calibration': 'false', 'was_informed_by': 'nmdc:dgen2', 'type': 'nmdc:NomAnalysis'}, + ... {'id': 'nmdc:wfx3', 'was_informed_by': 'nmdc:dgen3', 'type': 'nmdc:MetabolomicsAnalysis'} + ... ], + ... 'data_generation_set': [ + ... {'id': 'nmdc:dgen1'}, + ... {'id': 'nmdc:dgen2'}, + ... {'id': 'nmdc:dgen3'} + ... ], + ... 'data_object_set': [ + ... {'id': 'nmdc:dobj-13-abc123'} + ... ] + ... } + >>> a = DictionaryAdapter(database=db) + >>> m = Migrator(adapter=a) + >>> m.upgrade() + >>> any('has_calibration' in doc for doc in db['workflow_execution_set']) # Calibrations removed from workflow + False + >>> db['data_generation_set'][0] # Calibration moved to data generation + {'id': 'nmdc:dgen1', 'has_calibration': 'nmdc:dobj-13-abc123'} + >>> db['data_generation_set'][1] # No calibration added when value was 'false' + {'id': 'nmdc:dgen2'} + """ + + + self.adapter.process_each_document(collection_name="workflow_execution_set", pipeline=[self.store_and_remove_calibrations]) + self.adapter.process_each_document(collection_name="data_generation_set", pipeline=[self.update_data_gen_calibration]) + + def check_has_calibration(self, has_calibration_value) -> bool: + + pattern = r'^nmdc:dobj' + + return bool(re.match(pattern, has_calibration_value)) + + def check_for_valid_data_object(self, data_obj_id) -> bool: + + data_obj_doc = self.adapter.get_document_having_value_in_field( + collection_name="data_object_set", field_name="id", value=data_obj_id + ) + + return data_obj_doc is not None + + def store_and_remove_calibrations(self, workflow_execution_doc) -> dict: + r""" + Moves the `has_calibration` field from the `WorkflowExecution` document to + the corresponding `DataGeneration` document. + + >>> from nmdc_schema.migrators.adapters.dictionary_adapter import DictionaryAdapter + >>> db = { + ... 'workflow_execution_set': [ + ... {'id': 'nmdc:wfx1', 'has_calibration': 'nmdc:dobj-13-abc123', 'was_informed_by': 'nmdc:dgen1'}, + ... {'id': 'nmdc:wfx2', 'has_calibration': 'false', 'was_informed_by': 'nmdc:dgen2'} + ... ], + ... 'data_generation_set': [ + ... {'id': 'nmdc:dgen1'}, + ... {'id': 'nmdc:dgen2'} + ... ], + ... 'data_object_set': [ + ... {'id': 'nmdc:dobj-13-abc123'} + ... ] + ... } + >>> a = DictionaryAdapter(database=db) + >>> m = Migrator(adapter=a) + >>> doc = {'id': 'nmdc:wfx1', 'has_calibration': 'nmdc:dobj-13-abc123', 'was_informed_by': 'nmdc:dgen1'} + >>> m.store_and_remove_calibrations(doc) + {'id': 'nmdc:wfx1', 'was_informed_by': 'nmdc:dgen1'} + >>> doc = {'id': 'nmdc:wfx2', 'has_calibration': 'false', 'was_informed_by': 'nmdc:dgen2'} + >>> m.store_and_remove_calibrations(doc) + {'id': 'nmdc:wfx2', 'was_informed_by': 'nmdc:dgen2'} + >>> doc = {'id': 'nmdc:wfx3', 'has_calibration': 'invalid', 'was_informed_by': 'nmdc:dgen3'} + >>> m.store_and_remove_calibrations(doc) # doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): + ValueError: The 'has_calibration' value (invalid) in document (nmdc:wfx3) is not recognized + """ + + calibration_mapping = {} #create dictionary to store mappings + + if "has_calibration" in workflow_execution_doc: + has_calibration = workflow_execution_doc.get("has_calibration") + + # If has_calibration has a string value of false, remove the slot altogether from the document + if has_calibration.lower() == 'false': + workflow_execution_doc.pop("has_calibration") + + # If the has_calibration value is not a data object id or does not have a value of "false" + # raise an error. + elif not self.check_has_calibration(has_calibration) and has_calibration.lower() != 'false': + raise ValueError(f"The 'has_calibration' value ({has_calibration}) in document " + f"({workflow_execution_doc['id']}) is not recognized") + + # If has_calibration is a nmdc data object identifier: + elif self.check_has_calibration(has_calibration): + calib_data_object = workflow_execution_doc.get("has_calibration") + + if not self.check_for_valid_data_object(calib_data_object): + raise ValueError(f"The 'has_calibration' value ({has_calibration}) in document " + f"({workflow_execution_doc['id']}) is not a valid data object. The data object + does not exist") + else: + data_gen_doc = self.adapter.get_document_having_value_in_field( + collection_name="data_generation_set", field_name="id", value=workflow_execution_doc["was_informed_by"] + ) + + # Store has_calibrations in calibration_mapping dictionary + calibration_mapping[data_gen_doc["id"]] = has_calibration + + if not hasattr(self, "calibration_mappings"): + self.calibration_mappings = {} + self.calibration_mappings.update(calibration_mapping) + + # Remove calibration slot after storing mappings + workflow_execution_doc.pop("has_calibration") + + return workflow_execution_doc + + def update_data_gen_calibration(self, data_gen_doc) -> dict: + r""" + Updates data generation documents with calibration information + + >>> from nmdc_schema.migrators.adapters.dictionary_adapter import DictionaryAdapter + >>> db = { + ... 'workflow_execution_set': [ + ... {'id': 'nmdc:wfx1', 'has_calibration': 'nmdc:dobj-13-abc123', 'was_informed_by': 'nmdc:dgen1'} + ... ], + ... 'data_generation_set': [ + ... {'id': 'nmdc:dgen1'}, + ... {'id': 'nmdc:dgen2'} # doc without corresponding calibration + ... ] + ... } + >>> a = DictionaryAdapter(database=db) + >>> m = Migrator(adapter=a) + >>> # First store calibrations + >>> m.store_and_remove_calibrations({'id': 'nmdc:wfx1', 'has_calibration': 'nmdc:dobj-13-abc123', 'was_informed_by': 'nmdc:dgen1'}) + >>> # Then test update_data_gen_calibration + >>> m.update_data_gen_calibration({'id': 'nmdc:dgen1'}) # doc with corresponding calibration + {'id': 'nmdc:dgen1', 'has_calibration': 'nmdc:dobj-13-abc123'} + >>> m.update_data_gen_calibration({'id': 'nmdc:dgen2'}) # doc without corresponding calibration + {'id': 'nmdc:dgen2'} + """ + + if data_gen_doc["id"] in self.calibration_mappings: + data_gen_doc["has_calibration"] = self.calibration_mappings[data_gen_doc["id"]] + return data_gen_doc From 767faec3b803d73a792ca7e0b3a884fef34b9f45 Mon Sep 17 00:00:00 2001 From: brynnz22 Date: Wed, 30 Oct 2024 11:47:32 -0700 Subject: [PATCH 03/18] update doc strings --- .../migrators/migrator_has_calibration.py | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/nmdc_schema/migrators/migrator_has_calibration.py b/nmdc_schema/migrators/migrator_has_calibration.py index ecdf7c0ced..ec86015b58 100644 --- a/nmdc_schema/migrators/migrator_has_calibration.py +++ b/nmdc_schema/migrators/migrator_has_calibration.py @@ -54,12 +54,43 @@ def upgrade(self) -> None: self.adapter.process_each_document(collection_name="data_generation_set", pipeline=[self.update_data_gen_calibration]) def check_has_calibration(self, has_calibration_value) -> bool: + r""" + Checks for a valid data object id format (starts with 'nmdc:dobj') + + >>> from nmdc_schema.migrators.adapters.dictionary_adapter import DictionaryAdapter + >>> db = {} + >>> a = DictionaryAdapter(database=db) + >>> m = Migrator(adapter=a) + >>> m.check_has_calibration('nmdc:dobj-13-abc123') # Valid format + True + >>> m.check_has_calibration('false') # Invalid format + False + >>> m.check_has_calibration('nmdc:something-else') # Invalid format + False + """ pattern = r'^nmdc:dobj' return bool(re.match(pattern, has_calibration_value)) def check_for_valid_data_object(self, data_obj_id) -> bool: + r""" + Checks database for valid data object. Returns False if not valid + + >>> from nmdc_schema.migrators.adapters.dictionary_adapter import DictionaryAdapter + >>> db = { + ... 'data_object_set': [ + ... {'id': 'nmdc:dobj-13-abc123'}, + ... {'id': 'nmdc:dobj-13-def456'} + ... ] + ... } + >>> a = DictionaryAdapter(database=db) + >>> m = Migrator(adapter=a) + >>> m.check_for_valid_data_object('nmdc:dobj-13-abc123') # Exists in database + True + >>> m.check_for_valid_data_object('nmdc:dobj-13-nonexistent') # Doesn't exist + False + """ data_obj_doc = self.adapter.get_document_having_value_in_field( collection_name="data_object_set", field_name="id", value=data_obj_id From f7bb087df4fd1f342b4dc6e83ffc0e275a16749c Mon Sep 17 00:00:00 2001 From: brynnz22 Date: Wed, 30 Oct 2024 12:53:03 -0700 Subject: [PATCH 04/18] fix string literal --- nmdc_schema/migrators/migrator_has_calibration.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/nmdc_schema/migrators/migrator_has_calibration.py b/nmdc_schema/migrators/migrator_has_calibration.py index ec86015b58..e5dddedc9a 100644 --- a/nmdc_schema/migrators/migrator_has_calibration.py +++ b/nmdc_schema/migrators/migrator_has_calibration.py @@ -152,8 +152,7 @@ def store_and_remove_calibrations(self, workflow_execution_doc) -> dict: if not self.check_for_valid_data_object(calib_data_object): raise ValueError(f"The 'has_calibration' value ({has_calibration}) in document " - f"({workflow_execution_doc['id']}) is not a valid data object. The data object - does not exist") + f"({workflow_execution_doc['id']}) is not a valid data object. The data object does not exist") else: data_gen_doc = self.adapter.get_document_having_value_in_field( collection_name="data_generation_set", field_name="id", value=workflow_execution_doc["was_informed_by"] From 0e4a84f8a3250fa197db3057fd1543864476df52 Mon Sep 17 00:00:00 2001 From: brynnz22 Date: Wed, 30 Oct 2024 12:56:43 -0700 Subject: [PATCH 05/18] rename migrator --- .../migrator_from_11_0_3_to_11_1_0_part_1.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename nmdc_schema/migrators/{migrator_has_calibration.py => partials/migrator_from_10_2_0_to_11_0_0/migrator_from_11_0_3_to_11_1_0_part_1.py} (100%) diff --git a/nmdc_schema/migrators/migrator_has_calibration.py b/nmdc_schema/migrators/partials/migrator_from_10_2_0_to_11_0_0/migrator_from_11_0_3_to_11_1_0_part_1.py similarity index 100% rename from nmdc_schema/migrators/migrator_has_calibration.py rename to nmdc_schema/migrators/partials/migrator_from_10_2_0_to_11_0_0/migrator_from_11_0_3_to_11_1_0_part_1.py From 18d2493e0940d30a17f20a2ae3940a74e383be0e Mon Sep 17 00:00:00 2001 From: brynnz22 Date: Wed, 30 Oct 2024 13:02:32 -0700 Subject: [PATCH 06/18] change has_calibration range --- src/schema/nmdc.yaml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/schema/nmdc.yaml b/src/schema/nmdc.yaml index e2064cca4b..b8dc3b1c16 100644 --- a/src/schema/nmdc.yaml +++ b/src/schema/nmdc.yaml @@ -949,8 +949,7 @@ enums: slots: has_calibration: - any_of: - - range: CalibrationInformation + range: CalibrationInformation description: a calibration instance associated with a process notes: >- has_calibration slot will be removed from all WorkflowExecution classes but remain on the From 736d1039d8031d4891d8de74dcb90b19551a974c Mon Sep 17 00:00:00 2001 From: brynnz22 Date: Wed, 30 Oct 2024 13:18:40 -0700 Subject: [PATCH 07/18] fix doc test --- .../migrator_from_11_0_3_to_11_1_0_part_1.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/nmdc_schema/migrators/partials/migrator_from_10_2_0_to_11_0_0/migrator_from_11_0_3_to_11_1_0_part_1.py b/nmdc_schema/migrators/partials/migrator_from_10_2_0_to_11_0_0/migrator_from_11_0_3_to_11_1_0_part_1.py index e5dddedc9a..09de9c9be2 100644 --- a/nmdc_schema/migrators/partials/migrator_from_10_2_0_to_11_0_0/migrator_from_11_0_3_to_11_1_0_part_1.py +++ b/nmdc_schema/migrators/partials/migrator_from_10_2_0_to_11_0_0/migrator_from_11_0_3_to_11_1_0_part_1.py @@ -177,17 +177,21 @@ def update_data_gen_calibration(self, data_gen_doc) -> dict: >>> from nmdc_schema.migrators.adapters.dictionary_adapter import DictionaryAdapter >>> db = { ... 'workflow_execution_set': [ - ... {'id': 'nmdc:wfx1', 'has_calibration': 'nmdc:dobj-13-abc123', 'was_informed_by': 'nmdc:dgen1'} + ... {'id': 'nmdc:wfx1', 'has_calibration': 'nmdc:dobj-13-abc123', 'was_informed_by': 'nmdc:dgen1', 'type': 'nmdc:MetabolomicsAnalysis'} ... ], ... 'data_generation_set': [ ... {'id': 'nmdc:dgen1'}, ... {'id': 'nmdc:dgen2'} # doc without corresponding calibration + ... ], + ... 'data_object_set': [ + ... {'id': 'nmdc:dobj-13-abc123'} ... ] ... } >>> a = DictionaryAdapter(database=db) >>> m = Migrator(adapter=a) >>> # First store calibrations - >>> m.store_and_remove_calibrations({'id': 'nmdc:wfx1', 'has_calibration': 'nmdc:dobj-13-abc123', 'was_informed_by': 'nmdc:dgen1'}) + >>> doc = {'id': 'nmdc:wfx1', 'has_calibration': 'nmdc:dobj-13-abc123', 'was_informed_by': 'nmdc:dgen1', 'type': 'nmdc:MetabolomicsAnalysis'} + >>> m.store_and_remove_calibrations(doc) >>> # Then test update_data_gen_calibration >>> m.update_data_gen_calibration({'id': 'nmdc:dgen1'}) # doc with corresponding calibration {'id': 'nmdc:dgen1', 'has_calibration': 'nmdc:dobj-13-abc123'} From f9d7bd3ff4c8b74a6546947074e3351df59ebd2b Mon Sep 17 00:00:00 2001 From: brynnz22 Date: Wed, 30 Oct 2024 13:35:48 -0700 Subject: [PATCH 08/18] update doc tests --- .../migrator_from_11_0_3_to_11_1_0_part_1.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/nmdc_schema/migrators/partials/migrator_from_10_2_0_to_11_0_0/migrator_from_11_0_3_to_11_1_0_part_1.py b/nmdc_schema/migrators/partials/migrator_from_10_2_0_to_11_0_0/migrator_from_11_0_3_to_11_1_0_part_1.py index 09de9c9be2..39fe432b80 100644 --- a/nmdc_schema/migrators/partials/migrator_from_10_2_0_to_11_0_0/migrator_from_11_0_3_to_11_1_0_part_1.py +++ b/nmdc_schema/migrators/partials/migrator_from_10_2_0_to_11_0_0/migrator_from_11_0_3_to_11_1_0_part_1.py @@ -119,14 +119,14 @@ def store_and_remove_calibrations(self, workflow_execution_doc) -> dict: ... } >>> a = DictionaryAdapter(database=db) >>> m = Migrator(adapter=a) - >>> doc = {'id': 'nmdc:wfx1', 'has_calibration': 'nmdc:dobj-13-abc123', 'was_informed_by': 'nmdc:dgen1'} - >>> m.store_and_remove_calibrations(doc) + >>> workflow_execution_doc = {'id': 'nmdc:wfx1', 'has_calibration': 'nmdc:dobj-13-abc123', 'was_informed_by': 'nmdc:dgen1'} + >>> m.store_and_remove_calibrations(workflow_execution_doc) {'id': 'nmdc:wfx1', 'was_informed_by': 'nmdc:dgen1'} - >>> doc = {'id': 'nmdc:wfx2', 'has_calibration': 'false', 'was_informed_by': 'nmdc:dgen2'} - >>> m.store_and_remove_calibrations(doc) + >>> workflow_execution_doc = {'id': 'nmdc:wfx2', 'has_calibration': 'false', 'was_informed_by': 'nmdc:dgen2'} + >>> m.store_and_remove_calibrations(workflow_execution_doc) {'id': 'nmdc:wfx2', 'was_informed_by': 'nmdc:dgen2'} - >>> doc = {'id': 'nmdc:wfx3', 'has_calibration': 'invalid', 'was_informed_by': 'nmdc:dgen3'} - >>> m.store_and_remove_calibrations(doc) # doctest: +IGNORE_EXCEPTION_DETAIL + >>> workflow_execution_doc = {'id': 'nmdc:wfx3', 'has_calibration': 'invalid', 'was_informed_by': 'nmdc:dgen3'} + >>> m.store_and_remove_calibrations(workflow_execution_doc) # doctest: +IGNORE_EXCEPTION_DETAIL Traceback (most recent call last): ValueError: The 'has_calibration' value (invalid) in document (nmdc:wfx3) is not recognized """ @@ -190,11 +190,12 @@ def update_data_gen_calibration(self, data_gen_doc) -> dict: >>> a = DictionaryAdapter(database=db) >>> m = Migrator(adapter=a) >>> # First store calibrations - >>> doc = {'id': 'nmdc:wfx1', 'has_calibration': 'nmdc:dobj-13-abc123', 'was_informed_by': 'nmdc:dgen1', 'type': 'nmdc:MetabolomicsAnalysis'} - >>> m.store_and_remove_calibrations(doc) + >>> workflow_execution_doc = {'id': 'nmdc:wfx1', 'has_calibration': 'nmdc:dobj-13-abc123', 'was_informed_by': 'nmdc:dgen1', 'type': 'nmdc:MetabolomicsAnalysis'} + >>> _ = m.store_and_remove_calibrations(workflow_execution_doc) # Store the calibrations first >>> # Then test update_data_gen_calibration >>> m.update_data_gen_calibration({'id': 'nmdc:dgen1'}) # doc with corresponding calibration {'id': 'nmdc:dgen1', 'has_calibration': 'nmdc:dobj-13-abc123'} + >>> # Test document without calibration >>> m.update_data_gen_calibration({'id': 'nmdc:dgen2'}) # doc without corresponding calibration {'id': 'nmdc:dgen2'} """ From 6a365bdbc3b5d813d67503c444bc40fd0e8be8b6 Mon Sep 17 00:00:00 2001 From: brynnz22 Date: Wed, 30 Oct 2024 14:43:37 -0700 Subject: [PATCH 09/18] fix migrator and implement partial --- .../migrator_from_11_0_3_to_11_1_0_part_1.py | 37 +++++++++++++------ 1 file changed, 25 insertions(+), 12 deletions(-) rename nmdc_schema/migrators/partials/{migrator_from_10_2_0_to_11_0_0 => migrator_from_11_0_3_to_11_1_0}/migrator_from_11_0_3_to_11_1_0_part_1.py (86%) diff --git a/nmdc_schema/migrators/partials/migrator_from_10_2_0_to_11_0_0/migrator_from_11_0_3_to_11_1_0_part_1.py b/nmdc_schema/migrators/partials/migrator_from_11_0_3_to_11_1_0/migrator_from_11_0_3_to_11_1_0_part_1.py similarity index 86% rename from nmdc_schema/migrators/partials/migrator_from_10_2_0_to_11_0_0/migrator_from_11_0_3_to_11_1_0_part_1.py rename to nmdc_schema/migrators/partials/migrator_from_11_0_3_to_11_1_0/migrator_from_11_0_3_to_11_1_0_part_1.py index 39fe432b80..0167d7877e 100644 --- a/nmdc_schema/migrators/partials/migrator_from_10_2_0_to_11_0_0/migrator_from_11_0_3_to_11_1_0_part_1.py +++ b/nmdc_schema/migrators/partials/migrator_from_11_0_3_to_11_1_0/migrator_from_11_0_3_to_11_1_0_part_1.py @@ -36,6 +36,9 @@ def upgrade(self) -> None: ... ], ... 'data_object_set': [ ... {'id': 'nmdc:dobj-13-abc123'} + ... ], + ... 'calibration_set': [ + ... {'id': 'nmdc:calib1', 'calibration_object': 'nmdc:dobj-13-abc123'} ... ] ... } >>> a = DictionaryAdapter(database=db) @@ -44,7 +47,7 @@ def upgrade(self) -> None: >>> any('has_calibration' in doc for doc in db['workflow_execution_set']) # Calibrations removed from workflow False >>> db['data_generation_set'][0] # Calibration moved to data generation - {'id': 'nmdc:dgen1', 'has_calibration': 'nmdc:dobj-13-abc123'} + {'id': 'nmdc:dgen1', 'has_calibration': 'nmdc:calib1'} >>> db['data_generation_set'][1] # No calibration added when value was 'false' {'id': 'nmdc:dgen2'} """ @@ -115,6 +118,9 @@ def store_and_remove_calibrations(self, workflow_execution_doc) -> dict: ... ], ... 'data_object_set': [ ... {'id': 'nmdc:dobj-13-abc123'} + ... ], + ... 'calibration_set': [ + ... {'id': 'nmdc:calib1', 'calibration_object': 'nmdc:dobj-13-abc123'} ... ] ... } >>> a = DictionaryAdapter(database=db) @@ -134,32 +140,36 @@ def store_and_remove_calibrations(self, workflow_execution_doc) -> dict: calibration_mapping = {} #create dictionary to store mappings if "has_calibration" in workflow_execution_doc: - has_calibration = workflow_execution_doc.get("has_calibration") + has_calibration_data_obj = workflow_execution_doc.get("has_calibration") # If has_calibration has a string value of false, remove the slot altogether from the document - if has_calibration.lower() == 'false': + if has_calibration_data_obj.lower() == 'false': workflow_execution_doc.pop("has_calibration") + self.logger.info(f"calib_data_object is {has_calibration_data_obj}") # If the has_calibration value is not a data object id or does not have a value of "false" # raise an error. - elif not self.check_has_calibration(has_calibration) and has_calibration.lower() != 'false': - raise ValueError(f"The 'has_calibration' value ({has_calibration}) in document " + elif not self.check_has_calibration(has_calibration_data_obj) and has_calibration_data_obj.lower() != 'false': + raise ValueError(f"The 'has_calibration' value ({has_calibration_data_obj}) in document " f"({workflow_execution_doc['id']}) is not recognized") # If has_calibration is a nmdc data object identifier: - elif self.check_has_calibration(has_calibration): - calib_data_object = workflow_execution_doc.get("has_calibration") - - if not self.check_for_valid_data_object(calib_data_object): - raise ValueError(f"The 'has_calibration' value ({has_calibration}) in document " + elif self.check_has_calibration(has_calibration_data_obj): + + if not self.check_for_valid_data_object(has_calibration_data_obj): + raise ValueError(f"The 'has_calibration' value ({has_calibration_data_obj}) in document " f"({workflow_execution_doc['id']}) is not a valid data object. The data object does not exist") else: data_gen_doc = self.adapter.get_document_having_value_in_field( collection_name="data_generation_set", field_name="id", value=workflow_execution_doc["was_informed_by"] ) + + calibration_doc = self.adapter.get_document_having_value_in_field( + collection_name="calibration_set", field_name="calibration_object", value=has_calibration_data_obj + ) # Store has_calibrations in calibration_mapping dictionary - calibration_mapping[data_gen_doc["id"]] = has_calibration + calibration_mapping[data_gen_doc["id"]] = calibration_doc["id"] if not hasattr(self, "calibration_mappings"): self.calibration_mappings = {} @@ -185,6 +195,9 @@ def update_data_gen_calibration(self, data_gen_doc) -> dict: ... ], ... 'data_object_set': [ ... {'id': 'nmdc:dobj-13-abc123'} + ... ], + ... 'calibration_set': [ + ... {'id': 'nmdc:calib1', 'calibration_object': 'nmdc:dobj-13-abc123'} ... ] ... } >>> a = DictionaryAdapter(database=db) @@ -194,7 +207,7 @@ def update_data_gen_calibration(self, data_gen_doc) -> dict: >>> _ = m.store_and_remove_calibrations(workflow_execution_doc) # Store the calibrations first >>> # Then test update_data_gen_calibration >>> m.update_data_gen_calibration({'id': 'nmdc:dgen1'}) # doc with corresponding calibration - {'id': 'nmdc:dgen1', 'has_calibration': 'nmdc:dobj-13-abc123'} + {'id': 'nmdc:dgen1', 'has_calibration': 'nmdc:calib1'} >>> # Test document without calibration >>> m.update_data_gen_calibration({'id': 'nmdc:dgen2'}) # doc without corresponding calibration {'id': 'nmdc:dgen2'} From 789b2f08b6a3a2dabc15d140fad0b142d1568be6 Mon Sep 17 00:00:00 2001 From: brynnz22 Date: Wed, 30 Oct 2024 14:44:57 -0700 Subject: [PATCH 10/18] add no op migrator --- .../migrators/migrator_from_11_0_3_to_11_1_0.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 nmdc_schema/migrators/migrator_from_11_0_3_to_11_1_0.py diff --git a/nmdc_schema/migrators/migrator_from_11_0_3_to_11_1_0.py b/nmdc_schema/migrators/migrator_from_11_0_3_to_11_1_0.py new file mode 100644 index 0000000000..3350b6fc17 --- /dev/null +++ b/nmdc_schema/migrators/migrator_from_11_0_3_to_11_1_0.py @@ -0,0 +1,17 @@ +from nmdc_schema.migrators.migrator_base import MigratorBase + + +class Migrator(MigratorBase): + r""" + Migrates a database between two schemas. + + Note: This is a "no op" migrator. Its existence serves as documentation that no + database migration is necessary between the specified schema versions. + """ + + _from_version = "11.0.3" + _to_version = "11.1.0" + + def upgrade(self) -> None: + r"""Do nothing.""" + pass \ No newline at end of file From 513cf6e03f7f617aa2816daa454b017b872a5060 Mon Sep 17 00:00:00 2001 From: brynnz22 Date: Wed, 30 Oct 2024 14:48:28 -0700 Subject: [PATCH 11/18] implement partials --- .../migrator_from_11_0_3_to_11_1_0.py | 29 +++++++++++++++---- .../__init__.py | 25 ++++++++++++++++ 2 files changed, 49 insertions(+), 5 deletions(-) create mode 100644 nmdc_schema/migrators/partials/migrator_from_11_0_3_to_11_1_0/__init__.py diff --git a/nmdc_schema/migrators/migrator_from_11_0_3_to_11_1_0.py b/nmdc_schema/migrators/migrator_from_11_0_3_to_11_1_0.py index 3350b6fc17..cc85c40826 100644 --- a/nmdc_schema/migrators/migrator_from_11_0_3_to_11_1_0.py +++ b/nmdc_schema/migrators/migrator_from_11_0_3_to_11_1_0.py @@ -1,17 +1,36 @@ from nmdc_schema.migrators.migrator_base import MigratorBase +from nmdc_schema.migrators.partials.migrator_from_11_0_3_to_11_1_0 import ( + get_migrator_classes, +) class Migrator(MigratorBase): r""" Migrates a database between two schemas. - Note: This is a "no op" migrator. Its existence serves as documentation that no - database migration is necessary between the specified schema versions. + Specifically, this migrator migrates a database that conforms to the "pre-Berkeley schema" + into one that conforms to the "Berkeley schema". + + Reference: https://pypi.org/project/nmdc-schema/#history """ _from_version = "11.0.3" - _to_version = "11.1.0" + _to_version = "11.1.0" def upgrade(self) -> None: - r"""Do nothing.""" - pass \ No newline at end of file + r""" + Migrates the database from conforming to the original schema, to conforming to the new schema. + + This migrator uses partial migrators. It runs them in the order in which they were designed to be run. + """ + + migrator_classes = get_migrator_classes() + num_migrators = len(migrator_classes) + for idx, migrator_class in enumerate(migrator_classes): + self.logger.info(f"Running migrator {idx + 1} of {num_migrators}") + self.logger.debug( + f"Migrating from {migrator_class.get_origin_version()} " + f"to {migrator_class.get_destination_version()}" + ) + migrator = migrator_class(adapter=self.adapter, logger=self.logger) + migrator.upgrade() \ No newline at end of file diff --git a/nmdc_schema/migrators/partials/migrator_from_11_0_3_to_11_1_0/__init__.py b/nmdc_schema/migrators/partials/migrator_from_11_0_3_to_11_1_0/__init__.py new file mode 100644 index 0000000000..aa71ebaab1 --- /dev/null +++ b/nmdc_schema/migrators/partials/migrator_from_11_0_3_to_11_1_0/__init__.py @@ -0,0 +1,25 @@ +from typing import List, Type + +from nmdc_schema.migrators.migrator_base import MigratorBase +from nmdc_schema.migrators.partials.migrator_from_11_0_3_to_11_1_0 import ( + migrator_from_11_0_3_to_11_1_0_part_1 +) + +def get_migrator_classes() -> List[Type[MigratorBase]]: + r""" + Returns a list of migrator classes in the order in which they (i.e. their `upgrade` methods) + were designed to be run. + + >>> migrator_classes = get_migrator_classes() + >>> type(migrator_classes) is list and len(migrator_classes) > 0 # the function returns a list + True + >>> from inspect import isclass + >>> all(isclass(c) for c in migrator_classes) # each list item is a classes + True + >>> all(callable(getattr(c, "upgrade")) for c in migrator_classes) # each class has an `upgrade` method + True + """ + + return [ + migrator_from_11_0_3_to_11_1_0_part_1.Migrator, + ] \ No newline at end of file From 138eaad009b8fac5ac70c3e038a9c765bf61af0e Mon Sep 17 00:00:00 2001 From: brynnz22 Date: Wed, 30 Oct 2024 14:57:45 -0700 Subject: [PATCH 12/18] fix documentation --- nmdc_schema/migrators/migrator_from_11_0_3_to_11_1_0.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/nmdc_schema/migrators/migrator_from_11_0_3_to_11_1_0.py b/nmdc_schema/migrators/migrator_from_11_0_3_to_11_1_0.py index cc85c40826..d10f991581 100644 --- a/nmdc_schema/migrators/migrator_from_11_0_3_to_11_1_0.py +++ b/nmdc_schema/migrators/migrator_from_11_0_3_to_11_1_0.py @@ -8,9 +8,6 @@ class Migrator(MigratorBase): r""" Migrates a database between two schemas. - Specifically, this migrator migrates a database that conforms to the "pre-Berkeley schema" - into one that conforms to the "Berkeley schema". - Reference: https://pypi.org/project/nmdc-schema/#history """ From 2fb71d96b6c07f64b9d46a92ffe0d1892cfaa727 Mon Sep 17 00:00:00 2001 From: brynnz22 Date: Wed, 30 Oct 2024 14:59:45 -0700 Subject: [PATCH 13/18] remove extraneous code --- .../migrator_from_11_0_3_to_11_1_0_part_1.py | 1 - 1 file changed, 1 deletion(-) diff --git a/nmdc_schema/migrators/partials/migrator_from_11_0_3_to_11_1_0/migrator_from_11_0_3_to_11_1_0_part_1.py b/nmdc_schema/migrators/partials/migrator_from_11_0_3_to_11_1_0/migrator_from_11_0_3_to_11_1_0_part_1.py index 0167d7877e..fb49706203 100644 --- a/nmdc_schema/migrators/partials/migrator_from_11_0_3_to_11_1_0/migrator_from_11_0_3_to_11_1_0_part_1.py +++ b/nmdc_schema/migrators/partials/migrator_from_11_0_3_to_11_1_0/migrator_from_11_0_3_to_11_1_0_part_1.py @@ -146,7 +146,6 @@ def store_and_remove_calibrations(self, workflow_execution_doc) -> dict: if has_calibration_data_obj.lower() == 'false': workflow_execution_doc.pop("has_calibration") - self.logger.info(f"calib_data_object is {has_calibration_data_obj}") # If the has_calibration value is not a data object id or does not have a value of "false" # raise an error. elif not self.check_has_calibration(has_calibration_data_obj) and has_calibration_data_obj.lower() != 'false': From d894bca82f36ce5c6bef2fb36e118cc52ee274db Mon Sep 17 00:00:00 2001 From: brynnz22 Date: Wed, 30 Oct 2024 15:19:02 -0700 Subject: [PATCH 14/18] remove notes --- src/schema/nmdc.yaml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/schema/nmdc.yaml b/src/schema/nmdc.yaml index b8dc3b1c16..bd0aeb746d 100644 --- a/src/schema/nmdc.yaml +++ b/src/schema/nmdc.yaml @@ -951,11 +951,6 @@ slots: has_calibration: range: CalibrationInformation description: a calibration instance associated with a process - notes: >- - has_calibration slot will be removed from all WorkflowExecution classes but remain on the - MassSpectrometry class after an ingest of the appropriate set has occurred. - Once this has occurred, this slot's range can be updated to CalibrationInformation and class/slot definitions can move to nmdc.yaml. - See PR #29 in Berkeley schema. calibration_object: range: DataObject From 2bee85c631c985d05cb52bba6bbff98084bf6c5c Mon Sep 17 00:00:00 2001 From: Brynn Zalmanek Date: Thu, 31 Oct 2024 15:16:53 -0700 Subject: [PATCH 15/18] Update nmdc_schema/migrators/partials/migrator_from_11_0_3_to_11_1_0/__init__.py Co-authored-by: eecavanna <134325062+eecavanna@users.noreply.github.com> --- .../partials/migrator_from_11_0_3_to_11_1_0/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nmdc_schema/migrators/partials/migrator_from_11_0_3_to_11_1_0/__init__.py b/nmdc_schema/migrators/partials/migrator_from_11_0_3_to_11_1_0/__init__.py index aa71ebaab1..22649dbaa2 100644 --- a/nmdc_schema/migrators/partials/migrator_from_11_0_3_to_11_1_0/__init__.py +++ b/nmdc_schema/migrators/partials/migrator_from_11_0_3_to_11_1_0/__init__.py @@ -14,7 +14,7 @@ def get_migrator_classes() -> List[Type[MigratorBase]]: >>> type(migrator_classes) is list and len(migrator_classes) > 0 # the function returns a list True >>> from inspect import isclass - >>> all(isclass(c) for c in migrator_classes) # each list item is a classes + >>> all(isclass(c) for c in migrator_classes) # each list item is a class True >>> all(callable(getattr(c, "upgrade")) for c in migrator_classes) # each class has an `upgrade` method True From b45329205239ff8739b5fc63ad12ffbe0a23ad7c Mon Sep 17 00:00:00 2001 From: Brynn Zalmanek Date: Thu, 31 Oct 2024 15:17:23 -0700 Subject: [PATCH 16/18] Update nmdc_schema/migrators/partials/migrator_from_11_0_3_to_11_1_0/migrator_from_11_0_3_to_11_1_0_part_1.py Thanks!! Co-authored-by: eecavanna <134325062+eecavanna@users.noreply.github.com> --- .../migrator_from_11_0_3_to_11_1_0_part_1.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nmdc_schema/migrators/partials/migrator_from_11_0_3_to_11_1_0/migrator_from_11_0_3_to_11_1_0_part_1.py b/nmdc_schema/migrators/partials/migrator_from_11_0_3_to_11_1_0/migrator_from_11_0_3_to_11_1_0_part_1.py index fb49706203..1a1f75f89a 100644 --- a/nmdc_schema/migrators/partials/migrator_from_11_0_3_to_11_1_0/migrator_from_11_0_3_to_11_1_0_part_1.py +++ b/nmdc_schema/migrators/partials/migrator_from_11_0_3_to_11_1_0/migrator_from_11_0_3_to_11_1_0_part_1.py @@ -148,7 +148,7 @@ def store_and_remove_calibrations(self, workflow_execution_doc) -> dict: # If the has_calibration value is not a data object id or does not have a value of "false" # raise an error. - elif not self.check_has_calibration(has_calibration_data_obj) and has_calibration_data_obj.lower() != 'false': + elif not self.check_has_calibration(has_calibration_data_obj): raise ValueError(f"The 'has_calibration' value ({has_calibration_data_obj}) in document " f"({workflow_execution_doc['id']}) is not recognized") From fa23573aba8cbb2e16445c50883daa725edc35e9 Mon Sep 17 00:00:00 2001 From: brynnz22 Date: Thu, 31 Oct 2024 15:19:03 -0700 Subject: [PATCH 17/18] change variable name --- .../migrator_from_11_0_3_to_11_1_0_part_1.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/nmdc_schema/migrators/partials/migrator_from_11_0_3_to_11_1_0/migrator_from_11_0_3_to_11_1_0_part_1.py b/nmdc_schema/migrators/partials/migrator_from_11_0_3_to_11_1_0/migrator_from_11_0_3_to_11_1_0_part_1.py index 1a1f75f89a..a1446f4d5a 100644 --- a/nmdc_schema/migrators/partials/migrator_from_11_0_3_to_11_1_0/migrator_from_11_0_3_to_11_1_0_part_1.py +++ b/nmdc_schema/migrators/partials/migrator_from_11_0_3_to_11_1_0/migrator_from_11_0_3_to_11_1_0_part_1.py @@ -140,23 +140,23 @@ def store_and_remove_calibrations(self, workflow_execution_doc) -> dict: calibration_mapping = {} #create dictionary to store mappings if "has_calibration" in workflow_execution_doc: - has_calibration_data_obj = workflow_execution_doc.get("has_calibration") + has_calibration_data_obj_id = workflow_execution_doc.get("has_calibration") # If has_calibration has a string value of false, remove the slot altogether from the document - if has_calibration_data_obj.lower() == 'false': + if has_calibration_data_obj_id.lower() == 'false': workflow_execution_doc.pop("has_calibration") # If the has_calibration value is not a data object id or does not have a value of "false" # raise an error. - elif not self.check_has_calibration(has_calibration_data_obj): - raise ValueError(f"The 'has_calibration' value ({has_calibration_data_obj}) in document " + elif not self.check_has_calibration(has_calibration_data_obj_id): + raise ValueError(f"The 'has_calibration' value ({has_calibration_data_obj_id}) in document " f"({workflow_execution_doc['id']}) is not recognized") # If has_calibration is a nmdc data object identifier: - elif self.check_has_calibration(has_calibration_data_obj): + elif self.check_has_calibration(has_calibration_data_obj_id): - if not self.check_for_valid_data_object(has_calibration_data_obj): - raise ValueError(f"The 'has_calibration' value ({has_calibration_data_obj}) in document " + if not self.check_for_valid_data_object(has_calibration_data_obj_id): + raise ValueError(f"The 'has_calibration' value ({has_calibration_data_obj_id}) in document " f"({workflow_execution_doc['id']}) is not a valid data object. The data object does not exist") else: data_gen_doc = self.adapter.get_document_having_value_in_field( @@ -164,7 +164,7 @@ def store_and_remove_calibrations(self, workflow_execution_doc) -> dict: ) calibration_doc = self.adapter.get_document_having_value_in_field( - collection_name="calibration_set", field_name="calibration_object", value=has_calibration_data_obj + collection_name="calibration_set", field_name="calibration_object", value=has_calibration_data_obj_id ) # Store has_calibrations in calibration_mapping dictionary From b8e4ecc127790675f14dcd604b615e8b4b6064c4 Mon Sep 17 00:00:00 2001 From: brynnz22 Date: Thu, 31 Oct 2024 15:22:17 -0700 Subject: [PATCH 18/18] reformat and change to name --- .../migrator_from_11_0_3_to_11_1_0_part_1.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/nmdc_schema/migrators/partials/migrator_from_11_0_3_to_11_1_0/migrator_from_11_0_3_to_11_1_0_part_1.py b/nmdc_schema/migrators/partials/migrator_from_11_0_3_to_11_1_0/migrator_from_11_0_3_to_11_1_0_part_1.py index a1446f4d5a..da75666ce4 100644 --- a/nmdc_schema/migrators/partials/migrator_from_11_0_3_to_11_1_0/migrator_from_11_0_3_to_11_1_0_part_1.py +++ b/nmdc_schema/migrators/partials/migrator_from_11_0_3_to_11_1_0/migrator_from_11_0_3_to_11_1_0_part_1.py @@ -16,7 +16,7 @@ class Migrator(MigratorBase): """ _from_version = "11.0.3" - _to_version = "11.1.0" + _to_version = "11.1.0.part_1" def upgrade(self) -> None: r""" @@ -160,12 +160,10 @@ def store_and_remove_calibrations(self, workflow_execution_doc) -> dict: f"({workflow_execution_doc['id']}) is not a valid data object. The data object does not exist") else: data_gen_doc = self.adapter.get_document_having_value_in_field( - collection_name="data_generation_set", field_name="id", value=workflow_execution_doc["was_informed_by"] - ) + collection_name="data_generation_set", field_name="id", value=workflow_execution_doc["was_informed_by"]) calibration_doc = self.adapter.get_document_having_value_in_field( - collection_name="calibration_set", field_name="calibration_object", value=has_calibration_data_obj_id - ) + collection_name="calibration_set", field_name="calibration_object", value=has_calibration_data_obj_id) # Store has_calibrations in calibration_mapping dictionary calibration_mapping[data_gen_doc["id"]] = calibration_doc["id"]