Skip to content

Commit

Permalink
test FHIR load
Browse files Browse the repository at this point in the history
  • Loading branch information
bwalsh committed Aug 28, 2024
1 parent 0b16fbe commit 89b6391
Show file tree
Hide file tree
Showing 11 changed files with 127 additions and 0 deletions.
4 changes: 4 additions & 0 deletions test/pygrip_test/fhir/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
This test has a dependency
```commandline
pip install nested_lookup
```
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"resourceType":"DocumentReference","id":"9ae7e542-767f-4b03-a854-7ceed17152cb","identifier":[{"use":"official","system":"https://my_demo.org/labA","value":"9ae7e542-767f-4b03-a854-7ceed17152cb"}],"status":"current","docStatus":"final","subject":{"reference":"Specimen/60c67a06-ea2d-4d24-9249-418dc77a16a9"},"date":"2024-08-21T10:53:00+00:00","content":[{"attachment":{"extension":[{"url":"http://aced-idp.org/fhir/StructureDefinition/md5","valueString":"227f0a5379362d42eaa1814cfc0101b8"},{"url":"http://aced-idp.org/fhir/StructureDefinition/source_path","valueUrl":"file:///home/LabA/specimen_1234_labA.fq.gz"}],"contentType":"text/fastq","url":"file:///home/LabA/specimen_1234_labA.fq.gz","size":5595609484,"title":"specimen_1234_labA.fq.gz","creation":"2024-08-21T10:53:00+00:00"}}]}
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{"resourceType":"Observation","id":"cec32723-9ede-5f24-ba63-63cb8c6a02cf","identifier":[{"use":"official","system":"https://my_demo.org/labA","value":"patientX_1234-9ae7e542-767f-4b03-a854-7ceed17152cb-sequencer"}], "status":"final","category":[{"coding":[{"system":"http://terminology.hl7.org/CodeSystem/observation-category","code":"laboratory","display":"Laboratory"}]}],"code":{"coding":[{"system":"https://my_demo.org/labA","code":"Gen3 Sequencing Metadata","display":"Gen3 Sequencing Metadata"}]},"subject":{"reference":"Patient/bc4e1aa6-cb52-40e9-8f20-594d9c84f920"},"focus":[{"reference":"DocumentReference/9ae7e542-767f-4b03-a854-7ceed17152cb"}],"specimen":{"reference":"Specimen/60c67a06-ea2d-4d24-9249-418dc77a16a9"},"component":[{"code":{"coding":[{"system":"https://my_demo.org/labA","code":"sequencer","display":"sequencer"}],"text":"sequencer"},"valueString":"Illumina Seq 1000"},{"code":{"coding":[{"system":"https://my_demo.org/labA","code":"index","display":"index"}],"text":"index"},"valueString":"100bp Single index"},{"code":{"coding":[{"system":"https://my_demo.org/labA","code":"type","display":"type"}],"text":"type"},"valueString":"Exome"},{"code":{"coding":[{"system":"https://my_demo.org/labA","code":"project_id","display":"project_id"}],"text":"project_id"},"valueString":"labA_projectXYZ"},{"code":{"coding":[{"system":"https://my_demo.org/labA","code":"read_length","display":"read_length"}],"text":"read_length"},"valueString":"100"},{"code":{"coding":[{"system":"https://my_demo.org/labA","code":"instrument_run_id","display":"instrument_run_id"}],"text":"instrument_run_id"},"valueString":"234_ABC_1_8899"},{"code":{"coding":[{"system":"https://my_demo.org/labA","code":"capture_bait_set","display":"capture_bait_set"}],"text":"capture_bait_set"},"valueString":"Human Exom 2X"},{"code":{"coding":[{"system":"https://my_demo.org/labA","code":"end_type","display":"end_type"}],"text":"end_type"},"valueString":"Paired-End"},{"code":{"coding":[{"system":"https://my_demo.org/labA","code":"capture","display":"capture"}],"text":"capture"},"valueString":"emitter XT"},{"code":{"coding":[{"system":"https://my_demo.org/labA","code":"sequencing_site","display":"sequencing_site"}],"text":"sequencing_site"},"valueString":"AdvancedGeneExom"},{"code":{"coding":[{"system":"https://my_demo.org/labA","code":"construction","display":"construction"}],"text":"construction"},"valueString":"library_construction"}]}
{"resourceType":"Observation","id":"4e3c6b59-b1fd-5c26-a611-da4cde9fd061","identifier":[{"use":"official","system":"https://my_demo.org/labA","value":"patientX_1234-specimen_1234_labA-sample_type"}],"status":"final","category":[{"coding":[{"system":"http://terminology.hl7.org/CodeSystem/observation-category","code":"laboratory","display":"Laboratory"}],"text":"Laboratory"}],"code":{"coding":[{"system":"https://my_demo.org/labA","code":"labA specimen metadata","display":"labA specimen metadata"}],"text":"sample type abc"},"subject":{"reference":"Patient/bc4e1aa6-cb52-40e9-8f20-594d9c84f920"},"focus":[{"reference":"Specimen/60c67a06-ea2d-4d24-9249-418dc77a16a9"}],"component":[{"code":{"coding":[{"system":"https://my_demo.org/labA","code":"sample_type","display":"sample_type"}],"text":"sample_type"},"valueString":"Primary Solid Tumor"},{"code":{"coding":[{"system":"https://my_demo.org/labA","code":"library_id","display":"library_id"}],"text":"library_id"},"valueString":"12345"},{"code":{"coding":[{"system":"https://my_demo.org/labA","code":"tissue_type","display":"tissue_type"}],"text":"tissue_type"},"valueString":"Tumor"},{"code":{"coding":[{"system":"https://my_demo.org/labA","code":"treatments","display":"treatments"}],"text":"treatments"},"valueString":"Trastuzumab"},{"code":{"coding":[{"system":"https://my_demo.org/labA","code":"allocated_for_site","display":"allocated_for_site"}],"text":"allocated_for_site"},"valueString":"TEST Clinical Research"},{"code":{"coding":[{"system":"https://my_demo.org/labA","code":"pathology_data","display":"pathology_data"}],"text":"pathology_data"}},{"code":{"coding":[{"system":"https://my_demo.org/labA","code":"clinical_event","display":"clinical_event"}],"text":"clinical_event"}},{"code":{"coding":[{"system":"https://my_demo.org/labA","code":"indexed_collection_date","display":"indexed_collection_date"}],"text":"indexed_collection_date"},"valueInteger":365},{"code":{"coding":[{"system":"https://my_demo.org/labA","code":"biopsy_specimens_bems_id","display":"biopsy_specimens_bems_id"}],"text":"biopsy_specimens"},"valueString":"specimenA, specimenB, specimenC"},{"code":{"coding":[{"system":"https://my_demo.org/labA","code":"biopsy_procedure_type","display":"biopsy_procedure_type"}],"text":"biopsy_procedure_type"},"valueString":"Biopsy - Core"},{"code":{"coding":[{"system":"https://my_demo.org/labA","code":"biopsy_anatomical_location","display":"biopsy_anatomical_location"}],"text":"biopsy_anatomical_location"},"valueString":"top axillary lymph node"},{"code":{"coding":[{"system":"https://my_demo.org/labA","code":"percent_tumor","display":"percent_tumor"}],"text":"percent_tumor"},"valueString":"30"}]}
{"resourceType":"Observation","id":"21f3411d-89a4-4bcc-9ce7-b76edb1c745f","identifier":[{"use":"official","system":"https://my_demo.org/labA","value":"patientX_1234-9ae7e542-767f-4b03-a854-7ceed17152cb-Gene"}], "status":"final","category":[{"coding":[{"system":"http://terminology.hl7.org/CodeSystem/observation-category","code":"laboratory","display":"Laboratory"}]}],"code":{"coding":[{"system":"https://loinc.org","code":"81247-9","display":"Genomic structural variant copy number"}]},"subject":{"reference":"Patient/bc4e1aa6-cb52-40e9-8f20-594d9c84f920"},"focus":[{"reference":"DocumentReference/9ae7e542-767f-4b03-a854-7ceed17152cb"}],"specimen":{"reference":"Specimen/60c67a06-ea2d-4d24-9249-418dc77a16a9"},"component":[{"code":{"coding":[{"system":"https://my_demo.org/labA","code":"Gene","display":"Gene"}],"text":"Gene"},"valueString":"TP53"},{"code":{"coding":[{"system":"https://my_demo.org/labA","code":"Chromosome","display":"Chromosome"}],"text":"Chromosome"},"valueString":"chr17"},{"code":{"coding":[{"system":"https://my_demo.org/labA","code":"result","display":"result"}],"text":"result"},"valueString":"gain of function (GOF)"}]}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"resourceType":"Organization","id":"89c8dc4c-2d9c-48c7-8862-241a49a78f14","identifier":[{"use":"official","system":"https://my_demo.org/labA","value":"LabA_ORGANIZATION"}],"type":[{"coding":[{"system":"http://terminology.hl7.org/CodeSystem/organization-type","code":"prov","display":"Healthcare Provider"}],"text":"An organization that provides healthcare services."},{"coding":[{"system":"http://terminology.hl7.org/CodeSystem/organization-type","code":"edu","display":"Educational Institute"}],"text":"An educational institution that provides education or research facilities."}]}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"resourceType":"Patient","id":"bc4e1aa6-cb52-40e9-8f20-594d9c84f920","identifier":[{"use":"official","system":"https://my_demo.org/labA","value":"patientX_1234"}],"active":true}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"resourceType":"ResearchStudy","id":"7dacd4d0-3c8e-470b-bf61-103891627d45","identifier":[{"use":"official","system":"https://my_demo.org/labA","value":"labA"}],"name":"LabA","status":"active","description":"LabA Clinical Trial Study: FHIR Schema Chorot Integration"}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"resourceType":"ResearchSubject","id":"2fc448d6-a23b-4b94-974b-c66110164851","identifier":[{"use":"official","system":"https://my_demo.org/labA","value":"subjectX_1234"}],"status":"active","study":{"reference":"ResearchStudy/7dacd4d0-3c8e-470b-bf61-103891627d45"},"subject":{"reference":"Patient/bc4e1aa6-cb52-40e9-8f20-594d9c84f920"}}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"resourceType":"Specimen","id":"60c67a06-ea2d-4d24-9249-418dc77a16a9","identifier":[{"use":"official","system":"https://my_demo.org/labA","value":"specimen_1234_labA"}],"subject":{"reference":"Patient/bc4e1aa6-cb52-40e9-8f20-594d9c84f920"},"collection":{"collector":{"reference":"Organization/89c8dc4c-2d9c-48c7-8862-241a49a78f14"},"bodySite":{"concept":{"coding":[{"system":"http://snomed.info/sct","code":"76752008","display":"Breast"}],"text":"Breast"}}},"processing":[{"method":{"coding":[{"system":"http://snomed.info/sct","code":"117032008","display":"Spun specimen (procedure)"},{"system":"https://my_demo.org/labA","code":"Double-Spun","display":"Double-Spun"}],"text":"Spun specimen (procedure)"}}]}
11 changes: 11 additions & 0 deletions test/pygrip_test/fhir/fixtures/fhir-compbio-examples/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
##### META folder test-data:

```
>>>> resources={'summary': {'DocumentReference': 1, 'Specimen': 1, 'Observation': 3, 'ResearchStudy': 1, 'ResearchSubject': 1, 'Organization': 1, 'Patient': 1}}
```

There are three Observations with user-defined metadata component.
1. Focus - reference -> Specimen
2. Focus - reference -> DocumentReference
1. The first Observation contains metadata on the file's sequencing metadata.
2. The second Observation includes a simple summary of a CNV analysis result computed from this file.
103 changes: 103 additions & 0 deletions test/pygrip_test/fhir/test_load.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
import json
import pathlib

import pytest

import pygrip
from nested_lookup import nested_lookup
from typing import Generator, Dict, Any


def resources() -> Generator[Dict[str, Any], None, None]:
"""Load a local db with smmart data fixture."""
base = pathlib.Path(__file__).parent.absolute()
fixture_path = pathlib.Path(base / 'fixtures' / 'fhir-compbio-examples' / 'META')
assert fixture_path.exists(), f"Fixture path {fixture_path.absolute()} does not exist."
for file in fixture_path.glob('*.ndjson'):
with open(str(file)) as fp:
for l_ in fp.readlines():
yield json.loads(l_)


@pytest.fixture
def expected_edges() -> list[tuple]:
return [('21f3411d-89a4-4bcc-9ce7-b76edb1c745f',
'9ae7e542-767f-4b03-a854-7ceed17152cb',
'DocumentReference'),
('21f3411d-89a4-4bcc-9ce7-b76edb1c745f',
'60c67a06-ea2d-4d24-9249-418dc77a16a9',
'Specimen'),
('21f3411d-89a4-4bcc-9ce7-b76edb1c745f',
'bc4e1aa6-cb52-40e9-8f20-594d9c84f920',
'Patient'),
('2fc448d6-a23b-4b94-974b-c66110164851',
'7dacd4d0-3c8e-470b-bf61-103891627d45',
'ResearchStudy'),
('2fc448d6-a23b-4b94-974b-c66110164851',
'bc4e1aa6-cb52-40e9-8f20-594d9c84f920',
'Patient'),
('4e3c6b59-b1fd-5c26-a611-da4cde9fd061',
'60c67a06-ea2d-4d24-9249-418dc77a16a9',
'Specimen'),
('4e3c6b59-b1fd-5c26-a611-da4cde9fd061',
'bc4e1aa6-cb52-40e9-8f20-594d9c84f920',
'Patient'),
('60c67a06-ea2d-4d24-9249-418dc77a16a9',
'89c8dc4c-2d9c-48c7-8862-241a49a78f14',
'Organization'),
('60c67a06-ea2d-4d24-9249-418dc77a16a9',
'bc4e1aa6-cb52-40e9-8f20-594d9c84f920',
'Patient'),
('9ae7e542-767f-4b03-a854-7ceed17152cb',
'60c67a06-ea2d-4d24-9249-418dc77a16a9',
'Specimen'),
('cec32723-9ede-5f24-ba63-63cb8c6a02cf',
'9ae7e542-767f-4b03-a854-7ceed17152cb',
'DocumentReference'),
('cec32723-9ede-5f24-ba63-63cb8c6a02cf',
'60c67a06-ea2d-4d24-9249-418dc77a16a9',
'Specimen'),
('cec32723-9ede-5f24-ba63-63cb8c6a02cf',
'bc4e1aa6-cb52-40e9-8f20-594d9c84f920',
'Patient')]


def test_load(expected_edges):
graph = pygrip.NewMemServer()

# load the resources into the graph
expected_count = 0
for _ in resources():
graph.addVertex(_['id'], _['resourceType'], _)
expected_count += 1

# read them back in, check the count
actual_count = 0
for _ in graph.V():
actual_count += 1
assert expected_count == actual_count, f"Expected {expected_count} but got {actual_count}."

# load the edges into the graph (we could have done this when we loaded vertices, but do it separately for clarity)
actual_edges = []
for _ in graph.V():
assert 'vertex' in _, f"Expected 'vertex' in {_}"
assert 'data' in _['vertex'], f"Expected 'vertex' in {_['data']}"
resource = _['vertex']['data']
nested_references = nested_lookup('reference', resource)
# https://www.hl7.org/fhir/medicationrequest-definitions.html#MedicationRequest.medication
# is a reference to a Medication resource https://www.hl7.org/fhir/references.html#CodeableReference
# so it has a reference.reference form, strip it out
nested_references = [_ for _ in nested_references if isinstance(_, str)]
for nested_reference in nested_references:
label, dst_id = nested_reference.split('/')
graph.addEdge(resource['id'], dst_id, label)
actual_edges.append((resource['id'], dst_id, label)) # the edge label is the dst label

assert actual_edges == expected_edges, f"Expected {expected_edges} but got {actual_edges}."

# specimen -> patient
q = graph.V().hasLabel("Specimen").as_("d").out("Patient")
actual_patient_count = 0
for row in q:
actual_patient_count += 1
assert actual_patient_count == 1, f"Expected 1 but got {actual_patient_count}."

0 comments on commit 89b6391

Please sign in to comment.