From 18f62b782d5a5c2b73ac6ecd4b0ea37a72331f81 Mon Sep 17 00:00:00 2001 From: Lisa-Ann B Date: Fri, 19 Jan 2024 10:20:22 -0500 Subject: [PATCH 1/5] Review entity-api superclass behavior, add support for registration on constraints (may need to revise to pass superclass method instead) - #263 --- src/app.py | 1 + src/lib/constraints/dataset.py | 1 + src/lib/ontology.py | 11 ++- src/schema/provenance_schema.yaml | 127 ------------------------------ 4 files changed, 12 insertions(+), 128 deletions(-) diff --git a/src/app.py b/src/app.py index bbec44cb..c1853b80 100644 --- a/src/app.py +++ b/src/app.py @@ -110,6 +110,7 @@ app.ubkg = initialize_ubkg(app.config) with app.app_context(): init_ontology() + Ontology.modify_entities_cache() logger.info("Initialized ubkg module successfully :)") # Use a broad catch-all here diff --git a/src/lib/constraints/dataset.py b/src/lib/constraints/dataset.py index e6d7bcfb..b5f02e5d 100644 --- a/src/lib/constraints/dataset.py +++ b/src/lib/constraints/dataset.py @@ -8,6 +8,7 @@ def build_all_dataset_constraints(entity): # Dataset ---> Dataset ancestor = build_constraint_unit(entity) descendant = build_constraint_unit(entity) + # descendant2 = build_constraint_unit(Ontology.ops().entities().PUBLICATION) return [ build_constraint(ancestor, [descendant]) ] diff --git a/src/lib/ontology.py b/src/lib/ontology.py index 70a50eca..5dc26256 100644 --- a/src/lib/ontology.py +++ b/src/lib/ontology.py @@ -1,6 +1,15 @@ from atlas_consortia_commons.ubkg.ubkg_sdk import UbkgSDK +from flask import current_app # Custom accessors etc. can be added to the Ontology class class Ontology(UbkgSDK): - pass + @staticmethod + def modify_entities_cache(): + cache = current_app.ubkg.get_cache() + entities = current_app.ubkg.entities + key = f"VALUESET_{entities}" + if key in cache: + for e in cache[key]: + if e['term'] == 'Publication Entity': + e['term'] = 'Publication' \ No newline at end of file diff --git a/src/schema/provenance_schema.yaml b/src/schema/provenance_schema.yaml index f67e9311..79a5359f 100644 --- a/src/schema/provenance_schema.yaml +++ b/src/schema/provenance_schema.yaml @@ -558,15 +558,6 @@ ENTITIES: <<: *shared_properties <<: *shared_entity_properties <<: *doi_properties - antibodies: - type: list - description: "A list of antibodies used in the assay that created the dataset" - description: - type: string - description: "Free text description of the dataset" - dataset_info: - type: string - description: "Additional information about the dataset, which can be used to find this dataset, including lab specific (non-PHI) identifiers." # The Dataset.data_access_level is based on Dataset.status and Dataset.contains_human_genetic_sequences creation_action: type: string @@ -575,115 +566,19 @@ ENTITIES: description: "The activity that was performed." before_property_create_validators: - validate_creation_action - data_access_level: - type: string - generated: true - description: "One of the values: public, consortium, protected. Only Dataset may have protected value" - before_create_trigger: set_data_access_level - # When contains_human_genetic_sequences is true, even if status is 'Published', the data_access_level is still 'protected' - contains_human_genetic_sequences: - type: boolean - required_on_create: true # Only required for create via POST, not update via PUT - description: "True if the data contains any human genetic sequence information." error_message: type: string description: "An open text field that holds the last error message that arose from pipeline validation or analysis." - status: - type: string - before_property_update_validators: - - validate_application_header_before_property_update - - validate_dataset_status_value - generated: true - description: "One of: New|Processing|QA|Published|Error|Hold|Invalid" - before_create_trigger: set_dataset_status_new - after_update_trigger: update_dataset_and_ancestors_data_access_level title: type: string description: "The title of the publication." required_on_create: true # Only required for create via POST, not update via PUT - lab_dataset_id: - type: string - description: "A name or identifier used by the lab who is uploading the data to cross reference the data locally" - dataset_type: - type: string - description: "The data or assay type contained in this dataset." - required_on_create: true # Only required for create via POST, not update via PUT - #TODO: Remove data_types -# TODO: How do we want to handle publications with the new `dataset_type` property? - data_types: - before_property_create_validators: - - validate_no_duplicates_in_list - before_property_update_validators: - - validate_no_duplicates_in_list - type: list - description: "The data or assay types contained in this dataset as a json array of strings. Each is an assay code from [assay types](https://github.com/hubmapconsortium/search-api/blob/main/src/search-schema/data/definitions/enums/assay_types.yaml)." - collections: - type: list - transient: true - generated: true - description: "A list of collections that this dataset belongs to. Will be returned in response" - on_read_trigger: get_dataset_collections - upload: - type: json_string # dict - transient: true - generated: true - description: "The Upload that this dataset is associated with. Will be returned in response" - on_read_trigger: get_dataset_upload - contributors: - type: list - description: "A list of people who contributed to the creation of this dataset. Returned as an array of contributor where the structure of a contributor is" - direct_ancestor_uuids: - required_on_create: true # Only required for create via POST, not update via PUT - type: list - before_property_create_validators: - - validate_no_duplicates_in_list - before_property_update_validators: - - validate_no_duplicates_in_list - transient: true - exposed: false - description: "The uuids of source entities from which this new entity is derived. Used to pass source entity ids in on POST or PUT calls used to create the linkages." - # Note: link_dataset_to_direct_ancestors() will always delete all the old linkages first - after_create_trigger: set_was_generated_by - after_update_trigger: set_was_generated_by - direct_ancestors: - type: list - description: "A list of direct parent ancensters (one level above) that the Dataset was derived from." - generated: true - transient: true - on_read_trigger: get_dataset_direct_ancestors - published_timestamp: - type: integer - immutable: true - generated: true - description: "The timestamp of when the dataset was published. The format is an integer representing milliseconds since midnight, Jan 1, 1970. Cannot be set directly must be set with the /datasets//publish method." - published_user_displayname: - type: string - generated: true - immutable: true - description: "The name of the authenticated user or process that published the data. Cannot be set directly must be set with the /datasets//publish method." - published_user_sub: - type: string - generated: true - immutable: true - description: "The subject id as provided by the authorization mechanism for the person or process authenticated when the dataset was publised. Cannot be set directly must be set with the /datasets//publish method." - published_user_email: - type: string - generated: true - immutable: true - description: "The email address provided by the authorization mechanism for the person or process authenticated when published. Cannot be set directly must be set with the /datasets//publish method." pipeline_message: #todo: where is this attribute sourced from? Is it stored in the database? <- Not in neo4j type: string ingest_metadata: type: json_string # dict description: "The metadata returned from the processing at data submission time." - local_directory_rel_path: - # Example: protected// - type: string - generated: true - transient: true - description: "The path on the local file system, relative to the base data directory, where the data is stored." - on_read_trigger: get_local_directory_rel_path run_id: type: string ingest_id: @@ -694,28 +589,6 @@ ENTITIES: immutable: true description: "The uuid of globus group which the user who created this entity is a member of. This is required on Create/POST if the user creating the Donor is a member of more than one write group. This property cannot be set via PUT (only on Create/POST)." before_create_trigger: set_group_uuid #method that, if group_uuid is not already set looks for membership in a single "data provider" group and sets to that. Otherwise if not set and no single "provider group" membership throws error - # Must set in neo4j - group_name: - # It's not being mapped in the current version, what to do for the existing entities? - type: string - generated: true - immutable: true - description: "The displayname of globus group which the user who created this entity is a member of" - before_create_trigger: set_group_name #same as group_uuid, except set group_name - previous_revision_uuid: - type: string - transient: true - immutable: true - description: "The uuid of previous revision dataset" - after_create_trigger: link_to_previous_revision - on_read_trigger: get_previous_revision_uuid - next_revision_uuid: - type: string - generated: true - transient: true - immutable: true - description: "The uuid of next revision dataset" - on_read_trigger: get_next_revision_uuid # No like image and metadata files handling for Donor/Sample # Dataset has only one thumbnail file thumbnail_file: From fe451b97f8c25056e712a789c4cee8ce05dde434 Mon Sep 17 00:00:00 2001 From: Lisa-Ann B Date: Mon, 22 Jan 2024 15:14:27 -0500 Subject: [PATCH 2/5] Remove definition of shared_properties from yaml section of Publication - #263 --- src/lib/constraints/dataset.py | 4 ++-- src/schema/provenance_schema.yaml | 4 ---- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/src/lib/constraints/dataset.py b/src/lib/constraints/dataset.py index b5f02e5d..51543252 100644 --- a/src/lib/constraints/dataset.py +++ b/src/lib/constraints/dataset.py @@ -8,9 +8,9 @@ def build_all_dataset_constraints(entity): # Dataset ---> Dataset ancestor = build_constraint_unit(entity) descendant = build_constraint_unit(entity) - # descendant2 = build_constraint_unit(Ontology.ops().entities().PUBLICATION) + descendant2 = build_constraint_unit(Ontology.ops().entities().PUBLICATION) return [ - build_constraint(ancestor, [descendant]) + build_constraint(ancestor, [descendant, descendant2]) ] diff --git a/src/schema/provenance_schema.yaml b/src/schema/provenance_schema.yaml index 79a5359f..7fd5a50a 100644 --- a/src/schema/provenance_schema.yaml +++ b/src/schema/provenance_schema.yaml @@ -555,10 +555,6 @@ ENTITIES: source: true target: true properties: - <<: *shared_properties - <<: *shared_entity_properties - <<: *doi_properties - # The Dataset.data_access_level is based on Dataset.status and Dataset.contains_human_genetic_sequences creation_action: type: string transient: true From 5e088461abe105c835c70173de6372967bb21d41 Mon Sep 17 00:00:00 2001 From: Lisa-Ann B Date: Mon, 22 Jan 2024 15:31:17 -0500 Subject: [PATCH 3/5] Update comment --- src/lib/constraints/dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib/constraints/dataset.py b/src/lib/constraints/dataset.py index 51543252..29f6f0fb 100644 --- a/src/lib/constraints/dataset.py +++ b/src/lib/constraints/dataset.py @@ -5,7 +5,7 @@ # can be the descendant of / ---> def build_all_dataset_constraints(entity): - # Dataset ---> Dataset + # Dataset, Publication ---> Dataset ancestor = build_constraint_unit(entity) descendant = build_constraint_unit(entity) descendant2 = build_constraint_unit(Ontology.ops().entities().PUBLICATION) From 1806f793701ffa6feecd19070adde67d22b686e8 Mon Sep 17 00:00:00 2001 From: maxsibilla Date: Tue, 30 Jan 2024 12:58:38 -0500 Subject: [PATCH 4/5] Updating constraint to use publication entity --- src/lib/constraints/dataset.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/lib/constraints/dataset.py b/src/lib/constraints/dataset.py index 29f6f0fb..e4bb91c4 100644 --- a/src/lib/constraints/dataset.py +++ b/src/lib/constraints/dataset.py @@ -8,7 +8,8 @@ def build_all_dataset_constraints(entity): # Dataset, Publication ---> Dataset ancestor = build_constraint_unit(entity) descendant = build_constraint_unit(entity) - descendant2 = build_constraint_unit(Ontology.ops().entities().PUBLICATION) + # TODO: Need to set this to just be PUBLICATION eventually but UBKG has this as publication_entity now + descendant2 = build_constraint_unit(Ontology.ops().entities().PUBLICATION_ENTITY) return [ build_constraint(ancestor, [descendant, descendant2]) ] From 866c28d3cdaf7274533c4a716507c682b05f125c Mon Sep 17 00:00:00 2001 From: maxsibilla Date: Tue, 30 Jan 2024 14:20:37 -0500 Subject: [PATCH 5/5] Reverting ontology change --- src/lib/constraints/dataset.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/lib/constraints/dataset.py b/src/lib/constraints/dataset.py index e4bb91c4..29f6f0fb 100644 --- a/src/lib/constraints/dataset.py +++ b/src/lib/constraints/dataset.py @@ -8,8 +8,7 @@ def build_all_dataset_constraints(entity): # Dataset, Publication ---> Dataset ancestor = build_constraint_unit(entity) descendant = build_constraint_unit(entity) - # TODO: Need to set this to just be PUBLICATION eventually but UBKG has this as publication_entity now - descendant2 = build_constraint_unit(Ontology.ops().entities().PUBLICATION_ENTITY) + descendant2 = build_constraint_unit(Ontology.ops().entities().PUBLICATION) return [ build_constraint(ancestor, [descendant, descendant2]) ]