Skip to content

Commit

Permalink
Merge pull request #811 from hubmapconsortium/Derek-Furst/fix-duplica…
Browse files Browse the repository at this point in the history
…te-ancestors

Derek furst/fix duplicate ancestors
  • Loading branch information
yuanzhou authored Feb 11, 2025
2 parents 5e8ff19 + e010d85 commit 831cbdb
Showing 1 changed file with 12 additions and 14 deletions.
26 changes: 12 additions & 14 deletions src/schema/schema_neo4j_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,9 +176,8 @@ def get_children(neo4j_driver, uuid, property_key = None):
query = (f"MATCH (e:Entity)-[:ACTIVITY_INPUT]->(:Activity)-[:ACTIVITY_OUTPUT]->(child:Entity) "
# The target entity can't be a Lab
f"WHERE e.uuid='{uuid}' AND e.entity_type <> 'Lab' "
# COLLECT() returns a list
# apoc.coll.toSet() reruns a set containing unique nodes
f"RETURN apoc.coll.toSet(COLLECT(apoc.create.vNode(labels(child), apoc.map.removeKeys(properties(child), {fields_to_omit})))) AS {record_field_name}")
f"WITH COLLECT(DISTINCT child) AS uniqueChildren "
f"RETURN [a IN uniqueChildren | apoc.create.vNode(labels(a), apoc.map.removeKeys(properties(a), {fields_to_omit}))] AS {record_field_name}")

logger.info("======get_children() query======")
logger.info(query)
Expand Down Expand Up @@ -228,9 +227,8 @@ def get_parents(neo4j_driver, uuid, property_key = None):
query = (f"MATCH (e:Entity)<-[:ACTIVITY_OUTPUT]-(:Activity)<-[:ACTIVITY_INPUT]-(parent:Entity) "
# Filter out the Lab entities
f"WHERE e.uuid='{uuid}' AND parent.entity_type <> 'Lab' "
# COLLECT() returns a list
# apoc.coll.toSet() reruns a set containing unique nodes
f"RETURN apoc.coll.toSet(COLLECT(apoc.create.vNode(labels(parent), apoc.map.removeKeys(properties(parent), {fields_to_omit})))) AS {record_field_name}")
f"WITH COLLECT(DISTINCT parent) AS uniqueParents "
f"RETURN [a IN uniqueParents | apoc.create.vNode(labels(a), apoc.map.removeKeys(properties(a), {fields_to_omit}))] AS {record_field_name}")

logger.info("======get_parents() query======")
logger.info(query)
Expand Down Expand Up @@ -392,9 +390,8 @@ def get_ancestors(neo4j_driver, uuid, property_key = None):
query = (f"MATCH (e:Entity)<-[:ACTIVITY_INPUT|ACTIVITY_OUTPUT*]-(ancestor:Entity) "
# Filter out the Lab entities
f"WHERE e.uuid='{uuid}' AND ancestor.entity_type <> 'Lab' "
# COLLECT() returns a list
# apoc.coll.toSet() reruns a set containing unique nodes
f"RETURN apoc.coll.toSet(COLLECT(apoc.create.vNode(labels(ancestor), apoc.map.removeKeys(properties(ancestor), {fields_to_omit})))) AS {record_field_name}")
f"WITH COLLECT(DISTINCT ancestor) AS uniqueAncestors "
f"RETURN [a IN uniqueAncestors | apoc.create.vNode(labels(a), apoc.map.removeKeys(properties(a), {fields_to_omit}))] AS {record_field_name}")

logger.info("======get_ancestors() query======")
logger.info(query)
Expand Down Expand Up @@ -443,9 +440,8 @@ def get_descendants(neo4j_driver, uuid, property_key = None):
query = (f"MATCH (e:Entity)-[:ACTIVITY_INPUT|ACTIVITY_OUTPUT*]->(descendant:Entity) "
# The target entity can't be a Lab
f"WHERE e.uuid='{uuid}' AND e.entity_type <> 'Lab' "
# COLLECT() returns a list
# apoc.coll.toSet() reruns a set containing unique nodes
f"RETURN apoc.coll.toSet(COLLECT(apoc.create.vNode(labels(descendant), apoc.map.removeKeys(properties(descendant), {fields_to_omit})))) AS {record_field_name}")
f"WITH COLLECT(DISTINCT descendant) AS uniqueDescendants "
f"RETURN [a IN uniqueDescendants | apoc.create.vNode(labels(a), apoc.map.removeKeys(properties(a), {fields_to_omit}))] AS {record_field_name}")

logger.info("======get_descendants() query======")
logger.info(query)
Expand Down Expand Up @@ -1188,7 +1184,8 @@ def get_collection_datasets(neo4j_driver, uuid):
fields_to_omit = SchemaConstants.OMITTED_FIELDS
query = (f"MATCH (e:Dataset)-[:IN_COLLECTION]->(c:Collection) "
f"WHERE c.uuid = '{uuid}' "
f"RETURN COLLECT(apoc.create.vNode(labels(e), apoc.map.removeKeys(properties(e), {fields_to_omit}))) AS {record_field_name}")
f"WITH COLLECT(DISTINCT e) AS uniqueDataset "
f"RETURN [a IN uniqueDataset | apoc.create.vNode(labels(a), apoc.map.removeKeys(properties(a), {fields_to_omit}))] AS {record_field_name}")

logger.info("======get_collection_datasets() query======")
logger.info(query)
Expand Down Expand Up @@ -1401,7 +1398,8 @@ def get_upload_datasets(neo4j_driver, uuid, property_key = None):
else:
query = (f"MATCH (e:Dataset)-[:IN_UPLOAD]->(s:Upload) "
f"WHERE s.uuid = '{uuid}' "
f"RETURN COLLECT(apoc.create.vNode(labels(e), apoc.map.removeKeys(properties(e), {fields_to_omit}))) AS {record_field_name}")
f"WITH COLLECT(DISTINCT e) AS uniqueUploads "
f"RETURN [a IN uniqueUploads | apoc.create.vNode(labels(a), apoc.map.removeKeys(properties(a), {fields_to_omit}))] AS {record_field_name}")

logger.info("======get_upload_datasets() query======")
logger.info(query)
Expand Down

0 comments on commit 831cbdb

Please sign in to comment.