Skip to content

Commit

Permalink
Merge pull request #361 from smart-on-fhir/mikix/mask-attachments
Browse files Browse the repository at this point in the history
feat: mark data/url fields inside DocRef attachments as absent
  • Loading branch information
mikix authored Nov 7, 2024
2 parents 352ab34 + 4b782f2 commit 93311d1
Show file tree
Hide file tree
Showing 6 changed files with 104 additions and 15 deletions.
27 changes: 26 additions & 1 deletion cumulus_etl/deid/scrubber.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@ class SkipValue(Exception):
pass


class MaskValue(SkipValue):
pass


class Scrubber:
"""
Manages de-identification for FHIR resources.
Expand Down Expand Up @@ -160,6 +164,9 @@ def _scrub_node(
inside_extension=inside_extension,
)
)
except MaskValue:
# TODO: (not needed yet) support masking values inside array fields
self._add_data_absent_extension(node, f"_{key}")
except SkipValue:
pass

Expand Down Expand Up @@ -224,6 +231,24 @@ def _print_extension_table(self, title: str, table: ExtensionCount) -> None:
indented = rich.padding.Padding.indent(tree, 1)
rich.get_console().print(indented)

def _add_data_absent_extension(self, node: dict, parent: str) -> None:
element = node.setdefault(parent, {})
extensions = element.setdefault("extension", [])

# Check if the value is already marked as absent for any reason - leave it in place.
# (though that would be weird, since the field was present or we wouldn't be in this path)
for extension in extensions:
if extension.get("url") == "http://hl7.org/fhir/StructureDefinition/data-absent-reason":
return

# See https://hl7.org/fhir/extensions/StructureDefinition-data-absent-reason.html
extensions.append(
{
"url": "http://hl7.org/fhir/StructureDefinition/data-absent-reason",
"valueCode": "masked",
}
)

###############################################################################
#
# Individual checkers
Expand Down Expand Up @@ -402,7 +427,7 @@ def _check_attachments(resource_type: str, node_path: str, key: str, value: Any)
and node_path == "root.content.attachment"
and key in {"data", "url"}
):
raise SkipValue
raise MaskValue

return value

Expand Down
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
{"resourceType":"DocumentReference","id":"228b982ddae20b8da26a212666995acde914b941a4ff7c314adf89d02c3831f0","subject":{"reference":"Patient\/26f4d6d38eaa3347b8bd22bb4bc66ecbff5384926152738d282e841a247bfefb"},"context":{"encounter":[{"reference":"Encounter\/5388b42b262276bfbcb659b1ff937b0e3e5b0ec8901ed3ad53fa387fd6f2589f"}],"period":{"start":"2021-06-23","end":"2021-06-24"}},"type":{"coding":[{"code":"NOTE:149798455","system":"http:\/\/cumulus.smarthealthit.org\/i2b2","display":"Admission MD"}]},"status":"current","content":[{"attachment":{"contentType":"text\/plain"}}]}
{"resourceType":"DocumentReference","id":"dfc45702900136d5fb09b8737853f5c727132882bd6ba0871942685c0b1df588","subject":{"reference":"Patient\/49fbb06b4b49eb49a096cf2a96674fb84a4d52ee74ec25c8f6f26023cb4764a7"},"context":{"encounter":[{"reference":"Encounter\/fb29ea2a68ca2e1e4bbe22bdeedf021d94ec89f7e3d38ecbe908a8f2b3d89687"}],"period":{"start":"2021-06-24","end":"2021-06-25"}},"type":{"coding":[{"code":"NOTE:149798455","system":"http:\/\/cumulus.smarthealthit.org\/i2b2","display":"Admission MD"}]},"status":"current","content":[{"attachment":{"contentType":"text\/plain"}}]}
{"resourceType": "DocumentReference", "id": "228b982ddae20b8da26a212666995acde914b941a4ff7c314adf89d02c3831f0", "subject": {"reference": "Patient/26f4d6d38eaa3347b8bd22bb4bc66ecbff5384926152738d282e841a247bfefb"}, "context": {"encounter": [{"reference": "Encounter/5388b42b262276bfbcb659b1ff937b0e3e5b0ec8901ed3ad53fa387fd6f2589f"}], "period": {"start": "2021-06-23", "end": "2021-06-24"}}, "type": {"coding": [{"code": "NOTE:149798455", "system": "http://cumulus.smarthealthit.org/i2b2", "display": "Admission MD"}]}, "status": "current", "content": [{"attachment": {"contentType": "text/plain", "_data": {"extension": [{"url": "http://hl7.org/fhir/StructureDefinition/data-absent-reason", "valueCode": "masked"}]}}}]}
{"resourceType": "DocumentReference", "id": "dfc45702900136d5fb09b8737853f5c727132882bd6ba0871942685c0b1df588", "subject": {"reference": "Patient/49fbb06b4b49eb49a096cf2a96674fb84a4d52ee74ec25c8f6f26023cb4764a7"}, "context": {"encounter": [{"reference": "Encounter/fb29ea2a68ca2e1e4bbe22bdeedf021d94ec89f7e3d38ecbe908a8f2b3d89687"}], "period": {"start": "2021-06-24", "end": "2021-06-25"}}, "type": {"coding": [{"code": "NOTE:149798455", "system": "http://cumulus.smarthealthit.org/i2b2", "display": "Admission MD"}]}, "status": "current", "content": [{"attachment": {"contentType": "text/plain", "_data": {"extension": [{"url": "http://hl7.org/fhir/StructureDefinition/data-absent-reason", "valueCode": "masked"}]}}}]}
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"id":"f29736c29af5b962b3947fd40bed6b8c3e97c642b72aaa08e082fec05148e7dd","content":[{"attachment":{"contentType":"text\/plain"}}],"context":{"encounter":[{"reference":"Encounter\/d30aad4b-4503-8e22-0bc4-621b94398520"}],"period":{"end":"2021-06-24","start":"2021-06-23"}},"status":"current","subject":{"reference":"Patient\/118dc10e-7745-20d7-e98d-7c358a84c15c"},"type":{"coding":[{"code":"NOTE:149798455","display":"Admission MD","system":"http:\/\/cumulus.smarthealthit.org\/i2b2"}]},"resourceType":"DocumentReference"}
{"resourceType": "DocumentReference", "id": "f29736c29af5b962b3947fd40bed6b8c3e97c642b72aaa08e082fec05148e7dd", "content": [{"attachment": {"contentType": "text/plain", "_data": {"extension": [{"url": "http://hl7.org/fhir/StructureDefinition/data-absent-reason", "valueCode": "masked"}]}}}], "context": {"encounter": [{"reference": "Encounter/d30aad4b-4503-8e22-0bc4-621b94398520"}], "period": {"end": "2021-06-24", "start": "2021-06-23"}}, "status": "current", "subject": {"reference": "Patient/118dc10e-7745-20d7-e98d-7c358a84c15c"}, "type": {"coding": [{"code": "NOTE:149798455", "display": "Admission MD", "system": "http://cumulus.smarthealthit.org/i2b2"}]}}
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"id":"c601849ceffe49dba22ee952533ac87928cd7a472dee6d0390d53c9130519971","content":[{"attachment":{"contentType":"text\/plain"}}],"context":{"encounter":[{"reference":"Encounter\/af1e6186-3f9a-1fa9-3c73-cfa56c84a056"}],"period":{"end":"2021-06-25","start":"2021-06-24"}},"status":"current","subject":{"reference":"Patient\/1de9ea66-70d3-da1f-c735-df5ef7697fb9"},"type":{"coding":[{"code":"NOTE:149798455","display":"Admission MD","system":"http:\/\/cumulus.smarthealthit.org\/i2b2"}]},"resourceType":"DocumentReference"}
{"resourceType": "DocumentReference", "id": "c601849ceffe49dba22ee952533ac87928cd7a472dee6d0390d53c9130519971", "content": [{"attachment": {"contentType": "text/plain", "_data": {"extension": [{"url": "http://hl7.org/fhir/StructureDefinition/data-absent-reason", "valueCode": "masked"}]}}}], "context": {"encounter": [{"reference": "Encounter/af1e6186-3f9a-1fa9-3c73-cfa56c84a056"}], "period": {"end": "2021-06-25", "start": "2021-06-24"}}, "status": "current", "subject": {"reference": "Patient/1de9ea66-70d3-da1f-c735-df5ef7697fb9"}, "type": {"coding": [{"code": "NOTE:149798455", "display": "Admission MD", "system": "http://cumulus.smarthealthit.org/i2b2"}]}}
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
{"id":"f29736c29af5b962b3947fd40bed6b8c3e97c642b72aaa08e082fec05148e7dd","content":[{"attachment":{"contentType":"text\/plain"}}],"context":{"encounter":[{"reference":"Encounter\/d30aad4b-4503-8e22-0bc4-621b94398520"}],"period":{"end":"2021-06-24","start":"2021-06-23"}},"status":"current","subject":{"reference":"Patient\/118dc10e-7745-20d7-e98d-7c358a84c15c"},"type":{"coding":[{"code":"NOTE:149798455","display":"Admission MD","system":"http:\/\/cumulus.smarthealthit.org\/i2b2"}]},"resourceType":"DocumentReference"}
{"id":"c601849ceffe49dba22ee952533ac87928cd7a472dee6d0390d53c9130519971","content":[{"attachment":{"contentType":"text\/plain"}}],"context":{"encounter":[{"reference":"Encounter\/af1e6186-3f9a-1fa9-3c73-cfa56c84a056"}],"period":{"end":"2021-06-25","start":"2021-06-24"}},"status":"current","subject":{"reference":"Patient\/1de9ea66-70d3-da1f-c735-df5ef7697fb9"},"type":{"coding":[{"code":"NOTE:149798455","display":"Admission MD","system":"http:\/\/cumulus.smarthealthit.org\/i2b2"}]},"resourceType":"DocumentReference"}
{"resourceType": "DocumentReference", "id": "f29736c29af5b962b3947fd40bed6b8c3e97c642b72aaa08e082fec05148e7dd", "content": [{"attachment": {"contentType": "text/plain", "_data": {"extension": [{"url": "http://hl7.org/fhir/StructureDefinition/data-absent-reason", "valueCode": "masked"}]}}}], "context": {"encounter": [{"reference": "Encounter/d30aad4b-4503-8e22-0bc4-621b94398520"}], "period": {"end": "2021-06-24", "start": "2021-06-23"}}, "status": "current", "subject": {"reference": "Patient/118dc10e-7745-20d7-e98d-7c358a84c15c"}, "type": {"coding": [{"code": "NOTE:149798455", "display": "Admission MD", "system": "http://cumulus.smarthealthit.org/i2b2"}]}}
{"resourceType": "DocumentReference", "id": "c601849ceffe49dba22ee952533ac87928cd7a472dee6d0390d53c9130519971", "content": [{"attachment": {"contentType": "text/plain", "_data": {"extension": [{"url": "http://hl7.org/fhir/StructureDefinition/data-absent-reason", "valueCode": "masked"}]}}}], "context": {"encounter": [{"reference": "Encounter/af1e6186-3f9a-1fa9-3c73-cfa56c84a056"}], "period": {"end": "2021-06-25", "start": "2021-06-24"}}, "status": "current", "subject": {"reference": "Patient/1de9ea66-70d3-da1f-c735-df5ef7697fb9"}, "type": {"coding": [{"code": "NOTE:149798455", "display": "Admission MD", "system": "http://cumulus.smarthealthit.org/i2b2"}]}}
80 changes: 72 additions & 8 deletions tests/deid/test_deid_scrubber.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,13 +58,36 @@ def test_condition(self):

def test_documentreference(self):
"""Test DocumentReference, which is interesting because of its list of encounters and attachments"""
docref = i2b2_mock_data.documentreference()
self.assertEqual("345", docref["id"])
self.assertEqual("Patient/12345", docref["subject"]["reference"])
self.assertEqual(1, len(docref["context"]["encounter"]))
self.assertEqual("Encounter/67890", docref["context"]["encounter"][0]["reference"])
self.assertEqual(1, len(docref["content"]))
self.assertIsNotNone(docref["content"][0]["attachment"]["data"])
docref = {
"resourceType": "DocumentReference",
"id": "345",
"subject": {"reference": "Patient/12345"},
"context": {
"encounter": [{"reference": "Encounter/67890"}],
},
"content": [
{
"attachment": {
"data": "aGVsbG8gd29ybGQ=",
"url": "https://example.com/hello-world",
},
},
{
"attachment": {
"data": "xxx",
"_data": {
"extension": [
{
"url": "http://hl7.org/fhir/StructureDefinition/data-absent-reason",
"valueCode": "error",
}
],
},
"url": "https://example.com/hello-world",
},
},
],
}

scrubber = Scrubber()
self.assertTrue(scrubber.scrub_resource(docref))
Expand All @@ -77,7 +100,48 @@ def test_documentreference(self):
docref["context"]["encounter"][0]["reference"],
f"Encounter/{scrubber.codebook.fake_id('Encounter', '67890')}",
)
self.assertNotIn("data", docref["content"][0]["attachment"])
self.assertEqual(
docref["content"][0]["attachment"],
{
"_data": {
"extension": [
{
"url": "http://hl7.org/fhir/StructureDefinition/data-absent-reason",
"valueCode": "masked",
}
]
},
"_url": {
"extension": [
{
"url": "http://hl7.org/fhir/StructureDefinition/data-absent-reason",
"valueCode": "masked",
}
]
},
},
)
self.assertEqual(
docref["content"][1]["attachment"],
{
"_data": {
"extension": [
{
"url": "http://hl7.org/fhir/StructureDefinition/data-absent-reason",
"valueCode": "error", # we left this reason in place
}
]
},
"_url": {
"extension": [
{
"url": "http://hl7.org/fhir/StructureDefinition/data-absent-reason",
"valueCode": "masked",
}
]
},
},
)

def test_contained_reference(self):
"""Verify that we leave contained references contained but scrubbed"""
Expand Down

0 comments on commit 93311d1

Please sign in to comment.