Skip to content

Commit

Permalink
Merge pull request #266 from sennetconsortium/libpitt/263-superclass
Browse files Browse the repository at this point in the history
Review entity-api superclass behavior, add support for registration o…
  • Loading branch information
maxsibilla authored Jan 23, 2024
2 parents 04347f0 + 5e08846 commit 2b18ae3
Show file tree
Hide file tree
Showing 4 changed files with 14 additions and 134 deletions.
1 change: 1 addition & 0 deletions src/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@
app.ubkg = initialize_ubkg(app.config)
with app.app_context():
init_ontology()
Ontology.modify_entities_cache()

logger.info("Initialized ubkg module successfully :)")
# Use a broad catch-all here
Expand Down
5 changes: 3 additions & 2 deletions src/lib/constraints/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,12 @@
# can be the descendant of / --->
def build_all_dataset_constraints(entity):

# Dataset ---> Dataset
# Dataset, Publication ---> Dataset
ancestor = build_constraint_unit(entity)
descendant = build_constraint_unit(entity)
descendant2 = build_constraint_unit(Ontology.ops().entities().PUBLICATION)
return [
build_constraint(ancestor, [descendant])
build_constraint(ancestor, [descendant, descendant2])
]


Expand Down
11 changes: 10 additions & 1 deletion src/lib/ontology.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,15 @@
from atlas_consortia_commons.ubkg.ubkg_sdk import UbkgSDK
from flask import current_app


# Custom accessors etc. can be added to the Ontology class
class Ontology(UbkgSDK):
pass
@staticmethod
def modify_entities_cache():
cache = current_app.ubkg.get_cache()
entities = current_app.ubkg.entities
key = f"VALUESET_{entities}"
if key in cache:
for e in cache[key]:
if e['term'] == 'Publication Entity':
e['term'] = 'Publication'
131 changes: 0 additions & 131 deletions src/schema/provenance_schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -558,135 +558,26 @@ ENTITIES:
source: true
target: true
properties:
<<: *shared_properties
<<: *shared_entity_properties
<<: *doi_properties
antibodies:
type: list
description: "A list of antibodies used in the assay that created the dataset"
description:
type: string
description: "Free text description of the dataset"
dataset_info:
type: string
description: "Additional information about the dataset, which can be used to find this dataset, including lab specific (non-PHI) identifiers."
# The Dataset.data_access_level is based on Dataset.status and Dataset.contains_human_genetic_sequences
creation_action:
type: string
transient: true
immutable: true
description: "The activity that was performed."
before_property_create_validators:
- validate_creation_action
data_access_level:
type: string
generated: true
description: "One of the values: public, consortium, protected. Only Dataset may have protected value"
before_create_trigger: set_data_access_level
# When contains_human_genetic_sequences is true, even if status is 'Published', the data_access_level is still 'protected'
contains_human_genetic_sequences:
type: boolean
required_on_create: true # Only required for create via POST, not update via PUT
description: "True if the data contains any human genetic sequence information."
error_message:
type: string
description: "An open text field that holds the last error message that arose from pipeline validation or analysis."
status:
type: string
before_property_update_validators:
- validate_application_header_before_property_update
- validate_dataset_status_value
generated: true
description: "One of: New|Processing|QA|Published|Error|Hold|Invalid"
before_create_trigger: set_dataset_status_new
after_update_trigger: update_dataset_and_ancestors_data_access_level
title:
type: string
description: "The title of the publication."
required_on_create: true # Only required for create via POST, not update via PUT
lab_dataset_id:
type: string
description: "A name or identifier used by the lab who is uploading the data to cross reference the data locally"
dataset_type:
type: string
description: "The data or assay type contained in this dataset."
required_on_create: true # Only required for create via POST, not update via PUT
#TODO: Remove data_types
# TODO: How do we want to handle publications with the new `dataset_type` property?
data_types:
before_property_create_validators:
- validate_no_duplicates_in_list
before_property_update_validators:
- validate_no_duplicates_in_list
type: list
description: "The data or assay types contained in this dataset as a json array of strings. Each is an assay code from [assay types](https://github.com/hubmapconsortium/search-api/blob/main/src/search-schema/data/definitions/enums/assay_types.yaml)."
collections:
type: list
transient: true
generated: true
description: "A list of collections that this dataset belongs to. Will be returned in response"
on_read_trigger: get_dataset_collections
upload:
type: json_string # dict
transient: true
generated: true
description: "The Upload that this dataset is associated with. Will be returned in response"
on_read_trigger: get_dataset_upload
contributors:
type: list
description: "A list of people who contributed to the creation of this dataset. Returned as an array of contributor where the structure of a contributor is"
direct_ancestor_uuids:
required_on_create: true # Only required for create via POST, not update via PUT
type: list
before_property_create_validators:
- validate_no_duplicates_in_list
before_property_update_validators:
- validate_no_duplicates_in_list
transient: true
exposed: false
description: "The uuids of source entities from which this new entity is derived. Used to pass source entity ids in on POST or PUT calls used to create the linkages."
# Note: link_dataset_to_direct_ancestors() will always delete all the old linkages first
after_create_trigger: set_was_generated_by
after_update_trigger: set_was_generated_by
direct_ancestors:
type: list
description: "A list of direct parent ancensters (one level above) that the Dataset was derived from."
generated: true
transient: true
on_read_trigger: get_dataset_direct_ancestors
published_timestamp:
type: integer
immutable: true
generated: true
description: "The timestamp of when the dataset was published. The format is an integer representing milliseconds since midnight, Jan 1, 1970. Cannot be set directly must be set with the /datasets/<id>/publish method."
published_user_displayname:
type: string
generated: true
immutable: true
description: "The name of the authenticated user or process that published the data. Cannot be set directly must be set with the /datasets/<id>/publish method."
published_user_sub:
type: string
generated: true
immutable: true
description: "The subject id as provided by the authorization mechanism for the person or process authenticated when the dataset was publised. Cannot be set directly must be set with the /datasets/<id>/publish method."
published_user_email:
type: string
generated: true
immutable: true
description: "The email address provided by the authorization mechanism for the person or process authenticated when published. Cannot be set directly must be set with the /datasets/<id>/publish method."
pipeline_message:
#todo: where is this attribute sourced from? Is it stored in the database? <- Not in neo4j
type: string
ingest_metadata:
type: json_string # dict
description: "The metadata returned from the processing at data submission time."
local_directory_rel_path:
# Example: protected/<TMC>/<uuid>
type: string
generated: true
transient: true
description: "The path on the local file system, relative to the base data directory, where the data is stored."
on_read_trigger: get_local_directory_rel_path
run_id:
type: string
ingest_id:
Expand All @@ -697,28 +588,6 @@ ENTITIES:
immutable: true
description: "The uuid of globus group which the user who created this entity is a member of. This is required on Create/POST if the user creating the Donor is a member of more than one write group. This property cannot be set via PUT (only on Create/POST)."
before_create_trigger: set_group_uuid #method that, if group_uuid is not already set looks for membership in a single "data provider" group and sets to that. Otherwise if not set and no single "provider group" membership throws error
# Must set in neo4j
group_name:
# It's not being mapped in the current version, what to do for the existing entities?
type: string
generated: true
immutable: true
description: "The displayname of globus group which the user who created this entity is a member of"
before_create_trigger: set_group_name #same as group_uuid, except set group_name
previous_revision_uuid:
type: string
transient: true
immutable: true
description: "The uuid of previous revision dataset"
after_create_trigger: link_to_previous_revision
on_read_trigger: get_previous_revision_uuid
next_revision_uuid:
type: string
generated: true
transient: true
immutable: true
description: "The uuid of next revision dataset"
on_read_trigger: get_next_revision_uuid
# No like image and metadata files handling for Donor/Sample
# Dataset has only one thumbnail file
thumbnail_file:
Expand Down

0 comments on commit 2b18ae3

Please sign in to comment.