From 18f62b782d5a5c2b73ac6ecd4b0ea37a72331f81 Mon Sep 17 00:00:00 2001
From: Lisa-Ann B <lisabruney@pitt.edu>
Date: Fri, 19 Jan 2024 10:20:22 -0500
Subject: [PATCH 1/5] Review entity-api superclass behavior, add support for
 registration on constraints (may need to revise to pass superclass method
 instead) - #263

---
 src/app.py                        |   1 +
 src/lib/constraints/dataset.py    |   1 +
 src/lib/ontology.py               |  11 ++-
 src/schema/provenance_schema.yaml | 127 ------------------------------
 4 files changed, 12 insertions(+), 128 deletions(-)

diff --git a/src/app.py b/src/app.py
index bbec44cb..c1853b80 100644
--- a/src/app.py
+++ b/src/app.py
@@ -110,6 +110,7 @@
     app.ubkg = initialize_ubkg(app.config)
     with app.app_context():
         init_ontology()
+        Ontology.modify_entities_cache()
 
     logger.info("Initialized ubkg module successfully :)")
 # Use a broad catch-all here
diff --git a/src/lib/constraints/dataset.py b/src/lib/constraints/dataset.py
index e6d7bcfb..b5f02e5d 100644
--- a/src/lib/constraints/dataset.py
+++ b/src/lib/constraints/dataset.py
@@ -8,6 +8,7 @@ def build_all_dataset_constraints(entity):
     # Dataset ---> Dataset
     ancestor = build_constraint_unit(entity)
     descendant = build_constraint_unit(entity)
+    # descendant2 = build_constraint_unit(Ontology.ops().entities().PUBLICATION)
     return [
         build_constraint(ancestor, [descendant])
     ]
diff --git a/src/lib/ontology.py b/src/lib/ontology.py
index 70a50eca..5dc26256 100644
--- a/src/lib/ontology.py
+++ b/src/lib/ontology.py
@@ -1,6 +1,15 @@
 from atlas_consortia_commons.ubkg.ubkg_sdk import UbkgSDK
+from flask import current_app
 
 
 # Custom accessors etc. can be added to the Ontology class
 class Ontology(UbkgSDK):
-    pass
+    @staticmethod
+    def modify_entities_cache():
+        cache = current_app.ubkg.get_cache()
+        entities = current_app.ubkg.entities
+        key = f"VALUESET_{entities}"
+        if key in cache:
+            for e in cache[key]:
+                if e['term'] == 'Publication Entity':
+                    e['term'] = 'Publication'
\ No newline at end of file
diff --git a/src/schema/provenance_schema.yaml b/src/schema/provenance_schema.yaml
index f67e9311..79a5359f 100644
--- a/src/schema/provenance_schema.yaml
+++ b/src/schema/provenance_schema.yaml
@@ -558,15 +558,6 @@ ENTITIES:
       <<: *shared_properties
       <<: *shared_entity_properties
       <<: *doi_properties
-      antibodies:
-        type: list
-        description: "A list of antibodies used in the assay that created the dataset"
-      description:
-        type: string
-        description: "Free text description of the dataset"
-      dataset_info:
-        type: string
-        description: "Additional information about the dataset, which can be used to find this dataset, including lab specific (non-PHI) identifiers."
       # The Dataset.data_access_level is based on Dataset.status and Dataset.contains_human_genetic_sequences
       creation_action:
         type: string
@@ -575,115 +566,19 @@ ENTITIES:
         description: "The activity that was performed."
         before_property_create_validators:
           - validate_creation_action
-      data_access_level:
-        type: string
-        generated: true
-        description: "One of the values: public, consortium, protected. Only Dataset may have protected value"
-        before_create_trigger: set_data_access_level
-      # When contains_human_genetic_sequences is true, even if status is 'Published', the data_access_level is still 'protected'
-      contains_human_genetic_sequences:
-        type: boolean
-        required_on_create: true # Only required for create via POST, not update via PUT
-        description: "True if the data contains any human genetic sequence information."
       error_message:
         type: string
         description: "An open text field that holds the last error message that arose from pipeline validation or analysis."
-      status:
-        type: string
-        before_property_update_validators:
-          - validate_application_header_before_property_update
-          - validate_dataset_status_value
-        generated: true
-        description: "One of: New|Processing|QA|Published|Error|Hold|Invalid"
-        before_create_trigger: set_dataset_status_new
-        after_update_trigger: update_dataset_and_ancestors_data_access_level
       title:
         type: string
         description: "The title of the publication."
         required_on_create: true # Only required for create via POST, not update via PUT
-      lab_dataset_id:
-        type: string
-        description: "A name or identifier used by the lab who is uploading the data to cross reference the data locally"
-      dataset_type:
-        type: string
-        description: "The data or assay type contained in this dataset."
-        required_on_create: true # Only required for create via POST, not update via PUT
-      #TODO: Remove data_types
-#      TODO: How do we want to handle publications with the new `dataset_type` property?
-      data_types:
-        before_property_create_validators:
-          - validate_no_duplicates_in_list
-        before_property_update_validators:
-          - validate_no_duplicates_in_list
-        type: list
-        description: "The data or assay types contained in this dataset as a json array of strings.  Each is an assay code from [assay types](https://github.com/hubmapconsortium/search-api/blob/main/src/search-schema/data/definitions/enums/assay_types.yaml)."
-      collections:
-        type: list
-        transient: true
-        generated: true
-        description: "A list of collections that this dataset belongs to. Will be returned in response"
-        on_read_trigger: get_dataset_collections
-      upload:
-        type: json_string # dict
-        transient: true
-        generated: true
-        description: "The Upload that this dataset is associated with. Will be returned in response"
-        on_read_trigger: get_dataset_upload
-      contributors:
-        type: list
-        description: "A list of people who contributed to the creation of this dataset.  Returned as an array of contributor where the structure of a contributor is"
-      direct_ancestor_uuids:
-        required_on_create: true # Only required for create via POST, not update via PUT
-        type: list
-        before_property_create_validators:
-          - validate_no_duplicates_in_list
-        before_property_update_validators:
-          - validate_no_duplicates_in_list
-        transient: true
-        exposed: false
-        description: "The uuids of source entities from which this new entity is derived.  Used to pass source entity ids in on POST or PUT calls used to create the linkages."
-        # Note: link_dataset_to_direct_ancestors() will always delete all the old linkages first
-        after_create_trigger: set_was_generated_by
-        after_update_trigger: set_was_generated_by
-      direct_ancestors:
-        type: list
-        description: "A list of direct parent ancensters (one level above) that the Dataset was derived from."
-        generated: true
-        transient: true
-        on_read_trigger: get_dataset_direct_ancestors
-      published_timestamp:
-        type: integer
-        immutable: true
-        generated: true
-        description: "The timestamp of when the dataset was published.  The format is an integer representing milliseconds since midnight, Jan 1, 1970.  Cannot be set directly must be set with the /datasets/<id>/publish method."
-      published_user_displayname:
-        type: string
-        generated: true
-        immutable: true
-        description: "The name of the authenticated user or process that published the data.  Cannot be set directly must be set with the /datasets/<id>/publish method."
-      published_user_sub:
-        type: string
-        generated: true
-        immutable: true
-        description: "The subject id as provided by the authorization mechanism for the person or process authenticated when the dataset was publised.  Cannot be set directly must be set with the /datasets/<id>/publish method."
-      published_user_email:
-        type: string
-        generated: true
-        immutable: true
-        description: "The email address provided by the authorization mechanism for the person or process authenticated when published.  Cannot be set directly must be set with the /datasets/<id>/publish method."
       pipeline_message:
         #todo: where is this attribute sourced from?  Is it stored in the database? <- Not in neo4j
         type: string
       ingest_metadata:
         type: json_string # dict
         description: "The metadata returned from the processing at data submission time."
-      local_directory_rel_path:
-        # Example: protected/<TMC>/<uuid>
-        type: string
-        generated: true
-        transient: true
-        description: "The path on the local file system, relative to the base data directory, where the data is stored."
-        on_read_trigger: get_local_directory_rel_path
       run_id:
         type: string
       ingest_id:
@@ -694,28 +589,6 @@ ENTITIES:
         immutable: true
         description: "The uuid of globus group which the user who created this entity is a member of.  This is required on Create/POST if the user creating the Donor is a member of more than one write group.  This property cannot be set via PUT (only on Create/POST)."
         before_create_trigger: set_group_uuid #method that, if group_uuid is not already set looks for membership in a single "data provider" group and sets to that. Otherwise if not set and no single "provider group" membership throws error
-      # Must set in neo4j
-      group_name:
-        # It's not being mapped in the current version, what to do for the existing entities?
-        type: string
-        generated: true
-        immutable: true
-        description: "The displayname of globus group which the user who created this entity is a member of"
-        before_create_trigger: set_group_name #same as group_uuid, except set group_name
-      previous_revision_uuid:
-        type: string
-        transient: true
-        immutable: true
-        description: "The uuid of previous revision dataset"
-        after_create_trigger: link_to_previous_revision
-        on_read_trigger: get_previous_revision_uuid
-      next_revision_uuid:
-        type: string
-        generated: true
-        transient: true
-        immutable: true
-        description: "The uuid of next revision dataset"
-        on_read_trigger: get_next_revision_uuid
       # No like image and metadata files handling for Donor/Sample
       # Dataset has only one thumbnail file
       thumbnail_file:

From fe451b97f8c25056e712a789c4cee8ce05dde434 Mon Sep 17 00:00:00 2001
From: Lisa-Ann B <lisabruney@pitt.edu>
Date: Mon, 22 Jan 2024 15:14:27 -0500
Subject: [PATCH 2/5] Remove definition of shared_properties from yaml section
 of Publication - #263

---
 src/lib/constraints/dataset.py    | 4 ++--
 src/schema/provenance_schema.yaml | 4 ----
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/src/lib/constraints/dataset.py b/src/lib/constraints/dataset.py
index b5f02e5d..51543252 100644
--- a/src/lib/constraints/dataset.py
+++ b/src/lib/constraints/dataset.py
@@ -8,9 +8,9 @@ def build_all_dataset_constraints(entity):
     # Dataset ---> Dataset
     ancestor = build_constraint_unit(entity)
     descendant = build_constraint_unit(entity)
-    # descendant2 = build_constraint_unit(Ontology.ops().entities().PUBLICATION)
+    descendant2 = build_constraint_unit(Ontology.ops().entities().PUBLICATION)
     return [
-        build_constraint(ancestor, [descendant])
+        build_constraint(ancestor, [descendant, descendant2])
     ]
 
 
diff --git a/src/schema/provenance_schema.yaml b/src/schema/provenance_schema.yaml
index 79a5359f..7fd5a50a 100644
--- a/src/schema/provenance_schema.yaml
+++ b/src/schema/provenance_schema.yaml
@@ -555,10 +555,6 @@ ENTITIES:
       source: true
       target: true
     properties:
-      <<: *shared_properties
-      <<: *shared_entity_properties
-      <<: *doi_properties
-      # The Dataset.data_access_level is based on Dataset.status and Dataset.contains_human_genetic_sequences
       creation_action:
         type: string
         transient: true

From 5e088461abe105c835c70173de6372967bb21d41 Mon Sep 17 00:00:00 2001
From: Lisa-Ann B <lisabruney@pitt.edu>
Date: Mon, 22 Jan 2024 15:31:17 -0500
Subject: [PATCH 3/5] Update comment

---
 src/lib/constraints/dataset.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/lib/constraints/dataset.py b/src/lib/constraints/dataset.py
index 51543252..29f6f0fb 100644
--- a/src/lib/constraints/dataset.py
+++ b/src/lib/constraints/dataset.py
@@ -5,7 +5,7 @@
 # can be the descendant of / --->
 def build_all_dataset_constraints(entity):
 
-    # Dataset ---> Dataset
+    # Dataset, Publication ---> Dataset
     ancestor = build_constraint_unit(entity)
     descendant = build_constraint_unit(entity)
     descendant2 = build_constraint_unit(Ontology.ops().entities().PUBLICATION)

From 1806f793701ffa6feecd19070adde67d22b686e8 Mon Sep 17 00:00:00 2001
From: maxsibilla <maxsibilla@icloud.com>
Date: Tue, 30 Jan 2024 12:58:38 -0500
Subject: [PATCH 4/5] Updating constraint to use publication entity

---
 src/lib/constraints/dataset.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/lib/constraints/dataset.py b/src/lib/constraints/dataset.py
index 29f6f0fb..e4bb91c4 100644
--- a/src/lib/constraints/dataset.py
+++ b/src/lib/constraints/dataset.py
@@ -8,7 +8,8 @@ def build_all_dataset_constraints(entity):
     # Dataset, Publication ---> Dataset
     ancestor = build_constraint_unit(entity)
     descendant = build_constraint_unit(entity)
-    descendant2 = build_constraint_unit(Ontology.ops().entities().PUBLICATION)
+    # TODO: Need to set this to just be PUBLICATION eventually but UBKG has this as publication_entity now
+    descendant2 = build_constraint_unit(Ontology.ops().entities().PUBLICATION_ENTITY)
     return [
         build_constraint(ancestor, [descendant, descendant2])
     ]

From 866c28d3cdaf7274533c4a716507c682b05f125c Mon Sep 17 00:00:00 2001
From: maxsibilla <maxsibilla@icloud.com>
Date: Tue, 30 Jan 2024 14:20:37 -0500
Subject: [PATCH 5/5] Reverting ontology change

---
 src/lib/constraints/dataset.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/lib/constraints/dataset.py b/src/lib/constraints/dataset.py
index e4bb91c4..29f6f0fb 100644
--- a/src/lib/constraints/dataset.py
+++ b/src/lib/constraints/dataset.py
@@ -8,8 +8,7 @@ def build_all_dataset_constraints(entity):
     # Dataset, Publication ---> Dataset
     ancestor = build_constraint_unit(entity)
     descendant = build_constraint_unit(entity)
-    # TODO: Need to set this to just be PUBLICATION eventually but UBKG has this as publication_entity now
-    descendant2 = build_constraint_unit(Ontology.ops().entities().PUBLICATION_ENTITY)
+    descendant2 = build_constraint_unit(Ontology.ops().entities().PUBLICATION)
     return [
         build_constraint(ancestor, [descendant, descendant2])
     ]