Skip to content

Commit

Permalink
WIP: don't simply toss extensions
Browse files Browse the repository at this point in the history
  • Loading branch information
mikix committed Sep 10, 2024
1 parent d7abeb3 commit 962aa53
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 2 deletions.
2 changes: 1 addition & 1 deletion cumulus_etl/deid/ms-config.json
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
{"path": "Patient.extension('http://hl7.org/fhir/us/core/StructureDefinition/us-core-sex-for-clinical-use')", "method": "keep"},
{"path": "Patient.extension('http://open.epic.com/FHIR/StructureDefinition/extension/sex-for-clinical-use')", "method": "keep"}, // Epic has used this pre-final-spec URL
{"path": "nodesByName('modifierExtension')", "method": "keep"}, // keep these so we can ignore resources with modifiers we don't understand
{"path": "nodesByType('Extension')", "method": "redact"}, // drop all unknown extensions
{"path": "nodesByType('Extension')", "method": "keep"}, // drop all unknown extensions

// Elements that might be embedded and kept elsewhere -- redact pieces of the whole
{"path": "nodesByType('Attachment').title", "method": "redact"},
Expand Down
24 changes: 23 additions & 1 deletion cumulus_etl/deid/scrubber.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def scrub_resource(self, node: dict, scrub_attachments: bool = True) -> bool:
node.get("resourceType"), "root", node, scrub_attachments=scrub_attachments
)
except SkipResource as exc:
logging.warning("Ignoring resource of type %s: %s", node.__class__.__name__, exc)
logging.warning("Ignoring resource: %s", exc)
return False
except ValueError as exc:
logging.warning("Could not parse value: %s", exc)
Expand Down Expand Up @@ -121,6 +121,7 @@ def _scrub_single_value(
"""Examines one single property of a node"""
# For now, just manually run each operation. If this grows further, we can abstract it more.
self._check_ids(node, key, value)
self._check_extensions(resource_type, node, key, value)
self._check_modifier_extensions(key, value)
self._check_security(node_path, node, key, value)
self._check_text(node, key, value)
Expand All @@ -139,11 +140,32 @@ def _scrub_single_value(
#
###############################################################################

def _check_extensions(self, resource_type: str, node: dict, key: str, value: Any) -> None:
"""If there's any unrecognized extensions, log and delete them"""
if key == "extension" and isinstance(value, dict):
known_extensions = [
# {"path": "Patient.extension('http://hl7.org/fhir/Profile/us-core#ethnicity')", "method": "keep"}, // Old DSTU1 URL, still out there in the wild: https://www.hl7.org/fhir/DSTU1/us-core.html
# {"path": "Patient.extension('http://hl7.org/fhir/Profile/us-core#race')", "method": "keep"}, // Old DSTU1 URL, still out there in the wild: https://www.hl7.org/fhir/DSTU1/us-core.html
# {"path": "Patient.extension('http://hl7.org/fhir/us/core/StructureDefinition/us-core-birthsex')", "method": "keep"},
# {"path": "Patient.extension('http://hl7.org/fhir/us/core/StructureDefinition/us-core-ethnicity')", "method": "keep"},
# {"path": "Patient.extension('http://hl7.org/fhir/us/core/StructureDefinition/us-core-genderIdentity')", "method": "keep"},
# {"path": "Patient.extension('http://hl7.org/fhir/us/core/StructureDefinition/us-core-race')", "method": "keep"},
"http://hl7.org/fhir/us/core/StructureDefinition/us-core-sex-for-clinical-use",
"http://open.epic.com/FHIR/StructureDefinition/extension/sex-for-clinical-use", # Epic has used this pre-final-spec URL
]
url = value.get("url")
if url not in known_extensions:
value.clear() # get rid of any other keys
value["url"] = url # just keep the url, to track that it existed

@staticmethod
def _check_modifier_extensions(key: str, value: Any) -> None:
"""If there's any unrecognized modifierExtensions, raise a SkipResource exception"""
if key == "modifierExtension" and isinstance(value, dict):
known_extensions = [
# These NLP extensions are generated by ctakesclient's text2fhir code.
# While we don't anticipate ingesting any resources using these extensions
# (and we don't currently generate them ourselves), we might in the future.
"http://fhir-registry.smarthealthit.org/StructureDefinition/nlp-polarity",
"http://fhir-registry.smarthealthit.org/StructureDefinition/nlp-source",
]
Expand Down

0 comments on commit 962aa53

Please sign in to comment.