diff --git a/cumulus_etl/deid/scrubber.py b/cumulus_etl/deid/scrubber.py index fac259f9..0356a8ea 100644 --- a/cumulus_etl/deid/scrubber.py +++ b/cumulus_etl/deid/scrubber.py @@ -23,6 +23,10 @@ class SkipValue(Exception): pass +class MaskValue(SkipValue): + pass + + class Scrubber: """ Manages de-identification for FHIR resources. @@ -160,6 +164,9 @@ def _scrub_node( inside_extension=inside_extension, ) ) + except MaskValue: + # TODO: (not needed yet) support masking values inside array fields + self._add_data_absent_extension(node, f"_{key}") except SkipValue: pass @@ -224,6 +231,24 @@ def _print_extension_table(self, title: str, table: ExtensionCount) -> None: indented = rich.padding.Padding.indent(tree, 1) rich.get_console().print(indented) + def _add_data_absent_extension(self, node: dict, parent: str) -> None: + element = node.setdefault(parent, {}) + extensions = element.setdefault("extension", []) + + # Check if the value is already marked as absent for any reason - leave it in place. + # (though that would be weird, since the field was present or we wouldn't be in this path) + for extension in extensions: + if extension.get("url") == "http://hl7.org/fhir/StructureDefinition/data-absent-reason": + return + + # See https://hl7.org/fhir/extensions/StructureDefinition-data-absent-reason.html + extensions.append( + { + "url": "http://hl7.org/fhir/StructureDefinition/data-absent-reason", + "valueCode": "masked", + } + ) + ############################################################################### # # Individual checkers @@ -402,7 +427,7 @@ def _check_attachments(resource_type: str, node_path: str, key: str, value: Any) and node_path == "root.content.attachment" and key in {"data", "url"} ): - raise SkipValue + raise MaskValue return value diff --git a/tests/data/i2b2/output/documentreference/documentreference.000.ndjson b/tests/data/i2b2/output/documentreference/documentreference.000.ndjson index 139b538a..9090bfde 100644 --- a/tests/data/i2b2/output/documentreference/documentreference.000.ndjson +++ b/tests/data/i2b2/output/documentreference/documentreference.000.ndjson @@ -1,2 +1,2 @@ -{"resourceType":"DocumentReference","id":"228b982ddae20b8da26a212666995acde914b941a4ff7c314adf89d02c3831f0","subject":{"reference":"Patient\/26f4d6d38eaa3347b8bd22bb4bc66ecbff5384926152738d282e841a247bfefb"},"context":{"encounter":[{"reference":"Encounter\/5388b42b262276bfbcb659b1ff937b0e3e5b0ec8901ed3ad53fa387fd6f2589f"}],"period":{"start":"2021-06-23","end":"2021-06-24"}},"type":{"coding":[{"code":"NOTE:149798455","system":"http:\/\/cumulus.smarthealthit.org\/i2b2","display":"Admission MD"}]},"status":"current","content":[{"attachment":{"contentType":"text\/plain"}}]} -{"resourceType":"DocumentReference","id":"dfc45702900136d5fb09b8737853f5c727132882bd6ba0871942685c0b1df588","subject":{"reference":"Patient\/49fbb06b4b49eb49a096cf2a96674fb84a4d52ee74ec25c8f6f26023cb4764a7"},"context":{"encounter":[{"reference":"Encounter\/fb29ea2a68ca2e1e4bbe22bdeedf021d94ec89f7e3d38ecbe908a8f2b3d89687"}],"period":{"start":"2021-06-24","end":"2021-06-25"}},"type":{"coding":[{"code":"NOTE:149798455","system":"http:\/\/cumulus.smarthealthit.org\/i2b2","display":"Admission MD"}]},"status":"current","content":[{"attachment":{"contentType":"text\/plain"}}]} +{"resourceType": "DocumentReference", "id": "228b982ddae20b8da26a212666995acde914b941a4ff7c314adf89d02c3831f0", "subject": {"reference": "Patient/26f4d6d38eaa3347b8bd22bb4bc66ecbff5384926152738d282e841a247bfefb"}, "context": {"encounter": [{"reference": "Encounter/5388b42b262276bfbcb659b1ff937b0e3e5b0ec8901ed3ad53fa387fd6f2589f"}], "period": {"start": "2021-06-23", "end": "2021-06-24"}}, "type": {"coding": [{"code": "NOTE:149798455", "system": "http://cumulus.smarthealthit.org/i2b2", "display": "Admission MD"}]}, "status": "current", "content": [{"attachment": {"contentType": "text/plain", "_data": {"extension": [{"url": "http://hl7.org/fhir/StructureDefinition/data-absent-reason", "valueCode": "masked"}]}}}]} +{"resourceType": "DocumentReference", "id": "dfc45702900136d5fb09b8737853f5c727132882bd6ba0871942685c0b1df588", "subject": {"reference": "Patient/49fbb06b4b49eb49a096cf2a96674fb84a4d52ee74ec25c8f6f26023cb4764a7"}, "context": {"encounter": [{"reference": "Encounter/fb29ea2a68ca2e1e4bbe22bdeedf021d94ec89f7e3d38ecbe908a8f2b3d89687"}], "period": {"start": "2021-06-24", "end": "2021-06-25"}}, "type": {"coding": [{"code": "NOTE:149798455", "system": "http://cumulus.smarthealthit.org/i2b2", "display": "Admission MD"}]}, "status": "current", "content": [{"attachment": {"contentType": "text/plain", "_data": {"extension": [{"url": "http://hl7.org/fhir/StructureDefinition/data-absent-reason", "valueCode": "masked"}]}}}]} diff --git a/tests/data/simple/batched-output/documentreference/documentreference.000.ndjson b/tests/data/simple/batched-output/documentreference/documentreference.000.ndjson index b289375e..d732b323 100644 --- a/tests/data/simple/batched-output/documentreference/documentreference.000.ndjson +++ b/tests/data/simple/batched-output/documentreference/documentreference.000.ndjson @@ -1 +1 @@ -{"id":"f29736c29af5b962b3947fd40bed6b8c3e97c642b72aaa08e082fec05148e7dd","content":[{"attachment":{"contentType":"text\/plain"}}],"context":{"encounter":[{"reference":"Encounter\/d30aad4b-4503-8e22-0bc4-621b94398520"}],"period":{"end":"2021-06-24","start":"2021-06-23"}},"status":"current","subject":{"reference":"Patient\/118dc10e-7745-20d7-e98d-7c358a84c15c"},"type":{"coding":[{"code":"NOTE:149798455","display":"Admission MD","system":"http:\/\/cumulus.smarthealthit.org\/i2b2"}]},"resourceType":"DocumentReference"} +{"resourceType": "DocumentReference", "id": "f29736c29af5b962b3947fd40bed6b8c3e97c642b72aaa08e082fec05148e7dd", "content": [{"attachment": {"contentType": "text/plain", "_data": {"extension": [{"url": "http://hl7.org/fhir/StructureDefinition/data-absent-reason", "valueCode": "masked"}]}}}], "context": {"encounter": [{"reference": "Encounter/d30aad4b-4503-8e22-0bc4-621b94398520"}], "period": {"end": "2021-06-24", "start": "2021-06-23"}}, "status": "current", "subject": {"reference": "Patient/118dc10e-7745-20d7-e98d-7c358a84c15c"}, "type": {"coding": [{"code": "NOTE:149798455", "display": "Admission MD", "system": "http://cumulus.smarthealthit.org/i2b2"}]}} diff --git a/tests/data/simple/batched-output/documentreference/documentreference.001.ndjson b/tests/data/simple/batched-output/documentreference/documentreference.001.ndjson index 5f1a69c2..ec78ea8d 100644 --- a/tests/data/simple/batched-output/documentreference/documentreference.001.ndjson +++ b/tests/data/simple/batched-output/documentreference/documentreference.001.ndjson @@ -1 +1 @@ -{"id":"c601849ceffe49dba22ee952533ac87928cd7a472dee6d0390d53c9130519971","content":[{"attachment":{"contentType":"text\/plain"}}],"context":{"encounter":[{"reference":"Encounter\/af1e6186-3f9a-1fa9-3c73-cfa56c84a056"}],"period":{"end":"2021-06-25","start":"2021-06-24"}},"status":"current","subject":{"reference":"Patient\/1de9ea66-70d3-da1f-c735-df5ef7697fb9"},"type":{"coding":[{"code":"NOTE:149798455","display":"Admission MD","system":"http:\/\/cumulus.smarthealthit.org\/i2b2"}]},"resourceType":"DocumentReference"} +{"resourceType": "DocumentReference", "id": "c601849ceffe49dba22ee952533ac87928cd7a472dee6d0390d53c9130519971", "content": [{"attachment": {"contentType": "text/plain", "_data": {"extension": [{"url": "http://hl7.org/fhir/StructureDefinition/data-absent-reason", "valueCode": "masked"}]}}}], "context": {"encounter": [{"reference": "Encounter/af1e6186-3f9a-1fa9-3c73-cfa56c84a056"}], "period": {"end": "2021-06-25", "start": "2021-06-24"}}, "status": "current", "subject": {"reference": "Patient/1de9ea66-70d3-da1f-c735-df5ef7697fb9"}, "type": {"coding": [{"code": "NOTE:149798455", "display": "Admission MD", "system": "http://cumulus.smarthealthit.org/i2b2"}]}} diff --git a/tests/data/simple/output/documentreference/documentreference.000.ndjson b/tests/data/simple/output/documentreference/documentreference.000.ndjson index 8fa2914d..2adcb213 100644 --- a/tests/data/simple/output/documentreference/documentreference.000.ndjson +++ b/tests/data/simple/output/documentreference/documentreference.000.ndjson @@ -1,2 +1,2 @@ -{"id":"f29736c29af5b962b3947fd40bed6b8c3e97c642b72aaa08e082fec05148e7dd","content":[{"attachment":{"contentType":"text\/plain"}}],"context":{"encounter":[{"reference":"Encounter\/d30aad4b-4503-8e22-0bc4-621b94398520"}],"period":{"end":"2021-06-24","start":"2021-06-23"}},"status":"current","subject":{"reference":"Patient\/118dc10e-7745-20d7-e98d-7c358a84c15c"},"type":{"coding":[{"code":"NOTE:149798455","display":"Admission MD","system":"http:\/\/cumulus.smarthealthit.org\/i2b2"}]},"resourceType":"DocumentReference"} -{"id":"c601849ceffe49dba22ee952533ac87928cd7a472dee6d0390d53c9130519971","content":[{"attachment":{"contentType":"text\/plain"}}],"context":{"encounter":[{"reference":"Encounter\/af1e6186-3f9a-1fa9-3c73-cfa56c84a056"}],"period":{"end":"2021-06-25","start":"2021-06-24"}},"status":"current","subject":{"reference":"Patient\/1de9ea66-70d3-da1f-c735-df5ef7697fb9"},"type":{"coding":[{"code":"NOTE:149798455","display":"Admission MD","system":"http:\/\/cumulus.smarthealthit.org\/i2b2"}]},"resourceType":"DocumentReference"} +{"resourceType": "DocumentReference", "id": "f29736c29af5b962b3947fd40bed6b8c3e97c642b72aaa08e082fec05148e7dd", "content": [{"attachment": {"contentType": "text/plain", "_data": {"extension": [{"url": "http://hl7.org/fhir/StructureDefinition/data-absent-reason", "valueCode": "masked"}]}}}], "context": {"encounter": [{"reference": "Encounter/d30aad4b-4503-8e22-0bc4-621b94398520"}], "period": {"end": "2021-06-24", "start": "2021-06-23"}}, "status": "current", "subject": {"reference": "Patient/118dc10e-7745-20d7-e98d-7c358a84c15c"}, "type": {"coding": [{"code": "NOTE:149798455", "display": "Admission MD", "system": "http://cumulus.smarthealthit.org/i2b2"}]}} +{"resourceType": "DocumentReference", "id": "c601849ceffe49dba22ee952533ac87928cd7a472dee6d0390d53c9130519971", "content": [{"attachment": {"contentType": "text/plain", "_data": {"extension": [{"url": "http://hl7.org/fhir/StructureDefinition/data-absent-reason", "valueCode": "masked"}]}}}], "context": {"encounter": [{"reference": "Encounter/af1e6186-3f9a-1fa9-3c73-cfa56c84a056"}], "period": {"end": "2021-06-25", "start": "2021-06-24"}}, "status": "current", "subject": {"reference": "Patient/1de9ea66-70d3-da1f-c735-df5ef7697fb9"}, "type": {"coding": [{"code": "NOTE:149798455", "display": "Admission MD", "system": "http://cumulus.smarthealthit.org/i2b2"}]}} diff --git a/tests/deid/test_deid_scrubber.py b/tests/deid/test_deid_scrubber.py index dcecfaf3..5ab881ae 100644 --- a/tests/deid/test_deid_scrubber.py +++ b/tests/deid/test_deid_scrubber.py @@ -58,13 +58,36 @@ def test_condition(self): def test_documentreference(self): """Test DocumentReference, which is interesting because of its list of encounters and attachments""" - docref = i2b2_mock_data.documentreference() - self.assertEqual("345", docref["id"]) - self.assertEqual("Patient/12345", docref["subject"]["reference"]) - self.assertEqual(1, len(docref["context"]["encounter"])) - self.assertEqual("Encounter/67890", docref["context"]["encounter"][0]["reference"]) - self.assertEqual(1, len(docref["content"])) - self.assertIsNotNone(docref["content"][0]["attachment"]["data"]) + docref = { + "resourceType": "DocumentReference", + "id": "345", + "subject": {"reference": "Patient/12345"}, + "context": { + "encounter": [{"reference": "Encounter/67890"}], + }, + "content": [ + { + "attachment": { + "data": "aGVsbG8gd29ybGQ=", + "url": "https://example.com/hello-world", + }, + }, + { + "attachment": { + "data": "xxx", + "_data": { + "extension": [ + { + "url": "http://hl7.org/fhir/StructureDefinition/data-absent-reason", + "valueCode": "error", + } + ], + }, + "url": "https://example.com/hello-world", + }, + }, + ], + } scrubber = Scrubber() self.assertTrue(scrubber.scrub_resource(docref)) @@ -77,7 +100,48 @@ def test_documentreference(self): docref["context"]["encounter"][0]["reference"], f"Encounter/{scrubber.codebook.fake_id('Encounter', '67890')}", ) - self.assertNotIn("data", docref["content"][0]["attachment"]) + self.assertEqual( + docref["content"][0]["attachment"], + { + "_data": { + "extension": [ + { + "url": "http://hl7.org/fhir/StructureDefinition/data-absent-reason", + "valueCode": "masked", + } + ] + }, + "_url": { + "extension": [ + { + "url": "http://hl7.org/fhir/StructureDefinition/data-absent-reason", + "valueCode": "masked", + } + ] + }, + }, + ) + self.assertEqual( + docref["content"][1]["attachment"], + { + "_data": { + "extension": [ + { + "url": "http://hl7.org/fhir/StructureDefinition/data-absent-reason", + "valueCode": "error", # we left this reason in place + } + ] + }, + "_url": { + "extension": [ + { + "url": "http://hl7.org/fhir/StructureDefinition/data-absent-reason", + "valueCode": "masked", + } + ] + }, + }, + ) def test_contained_reference(self): """Verify that we leave contained references contained but scrubbed"""