diff --git a/README.md b/README.md index 82f63ff..2a544e6 100644 --- a/README.md +++ b/README.md @@ -20,6 +20,13 @@ data= { patient = Patient(**data) ``` +or in bulk from a FHIR export as an .ndjson file. +``` +from fhir.resources.patient import Patient + +patients = Patient.fhir_bulk_import("patient_export.ndjson") +``` + ### To FHIRflat Once initialised, FHIR resources can be transformed to FHIRflat files using the `to_flat()` function like this ``` @@ -30,9 +37,170 @@ which will produce a [parquet file](https://towardsdatascience.com/demystifying- |--------------|------|--------|------------|-----------------| | Patient | f001 | male | 1996-05-30 | False | +or a FHIRflat file can be generated directly from a FHIR .ndjson export file. +``` +from fhir.resources.patient import Patient + +Patient.fhir_file_to_flat("patient_export.ndjson") +``` +will create a "patient_export.parquet" FHIRflat file. +This first initialises a Patient data class for each row to make use of the Pydantic +data validation, then creates a FHIRflat file. + ### From FHIRflat FHIR resources can also be created directly from FHIRflat files ``` Patient.from_flat("patient_flat.parquet") ``` -which will return either a single Patient resource, or a list of Patient resources. \ No newline at end of file +which will return either a single Patient resource, or a list of Patient resources if +the Parquet file contains multiple rows of data. + +### Specification + +The FHIRflat structure closely follows that of FHIR, and simply flattens nested columns +in a manner similar to `pd.json_normalize()`. Some fields are excluded either because they are simply used for convenience within a FHIR server, because they contain information not relevant within ISARIC clinical data, or because they would contain Personally identifiable information (PII). These fields can be accessed and edited for each resource using the `flat_exclusions` property. There are a few specifics to FHIRflat that differ from simply normalising a FHIR structure, noted below. + +1. **codeableConcepts** + + CodeableConcepts are converted into 2 lists, one of codes and one of the corresponding text. The coding is compressed into a single string with the format `system|code`. The ‘|’ symbol was chosen as it is the standard way to query codes in FHIR servers [(example)](https://www.hl7.org/fhir/search.html#3.2.1.5.5.1.3). Thus a JSON snippet containing a codebleConcept: + ``` + "code": { + "coding": [ + [ + { + "system": "http://loinc.org", + "code": "3141-9", + "display": "Body weight Measured", + }, + { + "system": "http://snomed.info/sct", + "code": "27113001", + "display": "Body weight", + }, + ] + ] + } + ``` + is coded as two fields + | code.code | code.text | + |------------------------------------------------------------------|-----------------------------------------| + | ["http://loinc.org\|3141-9", "http://snomed.info/sct\|27113001"] | ["Body weight Measured", "Body weight"] | + + Note that the external `coding` label is removed. + +2. **References** + + Reference are a string with the name of the resource with the ID, separated by a forward slash. + ``` + "subject": { + "reference": "Patient/f001", + "display": "Donald Duck" + } + ``` + becomes + | subject.reference | + |-------------------| + |"Patient/f001" | + + The display text will not be converted due to the risk of revealing identifying information (e.g., a patient's name). + +3. **Extensions** + + The base FHIR schema can be extented to meet the needs of individual implementations using extension fields. FHIRflat displays these with the extension `url` as part of the column name. For example + + ``` + "extension": [ + { + "url": "timingPhase", + "valueCodeableConcept": { + "coding": [ + { + "system": "http://snomed.info/sct", + "code": 278307001, + "display": "on admission", + } + ] + }, + }, + { + "url": "relativePeriod", + "extension": [ + {"url": "relativeStart", "valueInteger": 2}, + {"url": "relativeEnd", "valueInteger": 5}, + ], + }, + ] + ``` + becomes + | extension.timingPhase.code | extension.timingPhase.text | extension.relativePeriod.relativeStart | extension.relativePeriod.relativeEnd | + |-------------------------------------|----------------------------|----------------------------------------|--------------------------------------| + | "http://snomed.info/sct\|278307001" | "on admission" | 2 | 5 | + + Complex (nested) extensions such as relativePeriod also omit the internal `extension` labels. + + +3. **0..\* cardinality fields** + + Fields which can contain an unspecified number of duplicate entries are dealt with according to the number of entries present. lists of length == 1 are expanded out as above, while any longer lists are kept in a single column with the data in it's original nested structure and `_dense` appended to the end of the field name. These fields are not expected to be queried regularly in standard analyses. + + For example, the `diagnosis` field of the [Encounter](https://hl7.org/fhir/encounter.html) resource has 0..* cardinality. If a single diagnosis is present, the field is expanded out: + ``` + "diagnosis": [ + { + "condition": [{"reference": {"reference": "Condition/stroke"}}], + "use": [ + { + "coding": [ + { + "system": "http://terminology.hl7.org/CodeSystem/diagnosis-role", + "code": "AD", + "display": "Admission diagnosis", + } + ] + } + ], + } + ] + ``` + becomes + | diagnosis.condition.reference | diagnosis.use.code | diagnosis.use.text | + |-------------------------------|------------------------------------------------------------|---------------------| + | Condition/stroke | "http://terminology.hl7.org/CodeSystem/diagnosis-role\|AD" | Admission diagnosis | + + whereas if 2 different diagnoses are present + ``` + "diagnosis": [ + { + "condition": [{"reference": {"reference": "Condition/stroke"}}], + "use": [ + { + "coding": [ + { + "system": "http://terminology.hl7.org/CodeSystem/diagnosis-role", + "code": "AD", + "display": "Admission diagnosis", + } + ] + } + ], + }, + { + "condition": [{"reference": {"reference": "Condition/f201"}}], + "use": [ + { + "coding": [ + { + "system": "http://terminology.hl7.org/CodeSystem/diagnosis-role", + "code": "DD", + "display": "Discharge diagnosis", + } + ] + } + ], + }, + ] + ``` + becomes + | encounter.diagnosis_dense | + |--------------------------------------| + |"[{"condition": [{"reference"...}]}]" | \ No newline at end of file diff --git a/fhirflat/resources/base.py b/fhirflat/resources/base.py index 3adbc00..5437c78 100644 --- a/fhirflat/resources/base.py +++ b/fhirflat/resources/base.py @@ -98,6 +98,40 @@ def fhir_bulk_import(cls, file: str) -> list[FHIRFlatBase]: else: return resources + @classmethod + def fhir_file_to_flat(cls, source_file: str, output_name: str | None = None): + """ + Converts a .ndjson file of exported FHIR resources to a FHIRflat parquet file. + + source_file: str + Path to the FHIR resource file. + + output_name: str + Name of the parquet file to be generated. + + Returns + ------- + parquet file + FHIRflat file containing condition data + """ + + if not output_name: + output_name = f"{cls.resource_type}.parquet" + + # identify attributes that are lists of FHIR types and not excluded + list_resources = [x for x in cls.attr_lists() if x not in cls.flat_exclusions] + + fhir_data = cls.fhir_bulk_import(source_file) + + flat_rows = [] + for resource in fhir_data: + for field in cls.flat_exclusions: + setattr(resource, field, None) + flat_rows.append(fhir2flat(resource, lists=list_resources)) + + df = pd.concat(flat_rows) + return df.to_parquet(output_name) + def to_flat(self, filename: str) -> None: """ Generates a FHIRflat parquet file from the resource. @@ -111,17 +145,11 @@ def to_flat(self, filename: str) -> None: FHIRflat file containing condition data """ - # TODO: add support for lists of fhir resources, most likely from a fhir bundle - # or single file json output. - # Most likely the input format from FHIR bulk export or for import into FHIR - # server will be ndjson as referenced in - # https://build.fhir.org/ig/HL7/bulk-data/export.html. - # identify attributes that are lists of FHIR types list_resources = self.attr_lists() # clear data from attributes not used in FHIRflat - for field in [x for x in self.elements_sequence() if x in self.flat_exclusions]: + for field in self.flat_exclusions: setattr(self, field, None) list_resources.remove(field) if field in list_resources else None diff --git a/tests/data/patient.ndjson b/tests/data/patient.ndjson index a734463..d030f0c 100644 --- a/tests/data/patient.ndjson +++ b/tests/data/patient.ndjson @@ -1,3 +1,3 @@ -{"resourceType":"Patient","id":"ewnMwMK-UNvVvM.bakFSlkw3","extension":[{"extension":[{"valueCoding":{"system":"http://terminology.hl7.org/CodeSystem/v3-NullFlavor","code":"UNK","display":"Unknown"},"url":"ombCategory"},{"valueString":"Unknown","url":"text"}],"url":"http://hl7.org/fhir/us/core/StructureDefinition/us-core-race"},{"extension":[{"valueString":"Unknown","url":"text"}],"url":"http://hl7.org/fhir/us/core/StructureDefinition/us-core-ethnicity"},{"valueCodeableConcept":{"coding":[{"system":"urn:oid:1.2.840.114350.1.13.520.3.7.10.698084.130.657370.334258","code":"female"}]},"url":"http://open.epic.com/FHIR/StructureDefinition/extension/legal-sex"},{"valueCodeableConcept":{"coding":[{"system":"urn:oid:1.2.840.114350.1.13.520.3.7.10.698084.130.657370.334258","code":"female"}]},"url":"http://open.epic.com/FHIR/StructureDefinition/extension/sex-for-clinical-use"},{"valueCode":"F","url":"http://hl7.org/fhir/us/core/StructureDefinition/us-core-birthsex"}],"identifier":[{"use":"usual","type":{"text":"EPIC"},"system":"urn:oid:1.2.840.114350.1.13.520.3.7.5.737384.0","value":"E9254"},{"use":"usual","type":{"text":"EXTERNAL"},"system":"urn:oid:1.2.840.114350.1.13.520.3.7.2.698084","value":"Z11363"},{"use":"usual","type":{"text":"FHIR"},"system":"http://open.epic.com/FHIR/StructureDefinition/patient-dstu2-fhir-id","value":"TYO3ktvhYAUhbae7JuBwDdpyIbUZc8kZG.bMW2ZwVnwgB"},{"use":"usual","type":{"text":"FHIR STU3"},"system":"http://open.epic.com/FHIR/StructureDefinition/patient-fhir-id","value":"ewnMwMK-UNvVvM.bakFSlkw3"},{"use":"usual","type":{"text":"PAS"},"system":"urn:oid:2.16.840.1.113883.2.1.3.12.1.1","value":"2005294"},{"use":"usual","type":{"text":"INTERNAL"},"system":"urn:oid:1.2.840.114350.1.13.520.3.7.2.698084","value":" Z11363"}],"active":true,"name":[{"extension":[{"valueCode":"NL4","url":"http://hl7.org/fhir/StructureDefinition/humanname-assembly-order"}],"use":"official","text":"Test ADVANCEPREPTWO","family":"ADVANCEPREPTWO","given":["Test"],"_family":{"extension":[{"valueString":"Advancepreptwo","url":"http://hl7.org/fhir/StructureDefinition/humanname-own-name"}]}},{"extension":[{"valueCode":"NL4","url":"http://hl7.org/fhir/StructureDefinition/humanname-assembly-order"}],"use":"usual","text":"Test ADVANCEPREPTWO","family":"ADVANCEPREPTWO","given":["Test"],"_family":{"extension":[{"valueString":"Advancepreptwo","url":"http://hl7.org/fhir/StructureDefinition/humanname-own-name"}]}}],"gender":"female","birthDate":"2006-10-07","deceasedBoolean":false,"maritalStatus":{"text":"Single"}} -{"resourceType":"Patient","id":"exU8JSL0p8npSw5g1QYAyOw3","extension":[{"extension":[{"valueCoding":{"system":"http://terminology.hl7.org/CodeSystem/v3-NullFlavor","code":"UNK","display":"Unknown"},"url":"ombCategory"},{"valueString":"Unknown","url":"text"}],"url":"http://hl7.org/fhir/us/core/StructureDefinition/us-core-race"},{"extension":[{"valueString":"Unknown","url":"text"}],"url":"http://hl7.org/fhir/us/core/StructureDefinition/us-core-ethnicity"},{"valueCodeableConcept":{"coding":[{"system":"urn:oid:1.2.840.114350.1.13.520.3.7.10.698084.130.657370.334258","code":"female"}]},"url":"http://open.epic.com/FHIR/StructureDefinition/extension/legal-sex"},{"valueCodeableConcept":{"coding":[{"system":"urn:oid:1.2.840.114350.1.13.520.3.7.10.698084.130.657370.334258","code":"female"}]},"url":"http://open.epic.com/FHIR/StructureDefinition/extension/sex-for-clinical-use"},{"valueCode":"F","url":"http://hl7.org/fhir/us/core/StructureDefinition/us-core-birthsex"}],"identifier":[{"use":"usual","type":{"text":"EPIC"},"system":"urn:oid:1.2.840.114350.1.13.520.3.7.5.737384.0","value":"E9340"},{"use":"usual","type":{"text":"EXTERNAL"},"system":"urn:oid:1.2.840.114350.1.13.520.3.7.2.698084","value":"Z11449"},{"use":"usual","type":{"text":"FHIR"},"system":"http://open.epic.com/FHIR/StructureDefinition/patient-dstu2-fhir-id","value":"TwzgMHMPt5OhnHkFA2H2DudL3FU8qcRZkZyn0F3extkAB"},{"use":"usual","type":{"text":"FHIR STU3"},"system":"http://open.epic.com/FHIR/StructureDefinition/patient-fhir-id","value":"exU8JSL0p8npSw5g1QYAyOw3"},{"use":"usual","type":{"text":"PAS"},"system":"urn:oid:2.16.840.1.113883.2.1.3.12.1.1","value":"2005380"},{"use":"usual","type":{"text":"INTERNAL"},"system":"urn:oid:1.2.840.114350.1.13.520.3.7.2.698084","value":" Z11449"}],"active":true,"name":[{"extension":[{"valueCode":"NL4","url":"http://hl7.org/fhir/StructureDefinition/humanname-assembly-order"}],"use":"official","text":"BcnScTenAugTwentyOne UPGRADETEST","family":"UPGRADETEST","given":["BcnScTenAugTwentyOne"],"_family":{"extension":[{"valueString":"UpgradeTest","url":"http://hl7.org/fhir/StructureDefinition/humanname-own-name"}]}},{"extension":[{"valueCode":"NL4","url":"http://hl7.org/fhir/StructureDefinition/humanname-assembly-order"}],"use":"usual","text":"BcnScTenAugTwentyOne UPGRADETEST","family":"UPGRADETEST","given":["BcnScTenAugTwentyOne"],"_family":{"extension":[{"valueString":"UpgradeTest","url":"http://hl7.org/fhir/StructureDefinition/humanname-own-name"}]}}],"gender":"female","birthDate":"2019-09-21","deceasedBoolean":false,"maritalStatus":{"text":"Single"}} -{"resourceType":"Patient","id":"ezER-U3fAMP-WvI-Fc8V9wQ3","extension":[{"extension":[{"valueCoding":{"system":"http://terminology.hl7.org/CodeSystem/v3-NullFlavor","code":"UNK","display":"Unknown"},"url":"ombCategory"},{"valueString":"Unknown","url":"text"}],"url":"http://hl7.org/fhir/us/core/StructureDefinition/us-core-race"},{"extension":[{"valueString":"Unknown","url":"text"}],"url":"http://hl7.org/fhir/us/core/StructureDefinition/us-core-ethnicity"},{"valueCodeableConcept":{"coding":[{"system":"urn:oid:1.2.840.114350.1.13.520.3.7.10.698084.130.657370.334258","code":"male"}]},"url":"http://open.epic.com/FHIR/StructureDefinition/extension/legal-sex"},{"valueCodeableConcept":{"coding":[{"system":"urn:oid:1.2.840.114350.1.13.520.3.7.10.698084.130.657370.334258","code":"male"}]},"url":"http://open.epic.com/FHIR/StructureDefinition/extension/sex-for-clinical-use"},{"valueCode":"M","url":"http://hl7.org/fhir/us/core/StructureDefinition/us-core-birthsex"}],"identifier":[{"use":"usual","type":{"text":"EPIC"},"system":"urn:oid:1.2.840.114350.1.13.520.3.7.5.737384.0","value":"E9411"},{"use":"usual","type":{"text":"EXTERNAL"},"system":"urn:oid:1.2.840.114350.1.13.520.3.7.2.698084","value":"Z11522"},{"use":"usual","type":{"text":"FHIR"},"system":"http://open.epic.com/FHIR/StructureDefinition/patient-dstu2-fhir-id","value":"T2oDrcOUw0w.1ZUdthu24c7V95lTOTEOUPRqBqGn99KEB"},{"use":"usual","type":{"text":"FHIR STU3"},"system":"http://open.epic.com/FHIR/StructureDefinition/patient-fhir-id","value":"ezER-U3fAMP-WvI-Fc8V9wQ3"},{"use":"usual","type":{"text":"PAS"},"system":"urn:oid:2.16.840.1.113883.2.1.3.12.1.1","value":"2005452"},{"use":"usual","type":{"text":"INTERNAL"},"system":"urn:oid:1.2.840.114350.1.13.520.3.7.2.698084","value":" Z11522"}],"active":true,"name":[{"extension":[{"valueCode":"NL4","url":"http://hl7.org/fhir/StructureDefinition/humanname-assembly-order"}],"use":"official","text":"Fred TEST","family":"TEST","given":["Fred"],"_family":{"extension":[{"valueString":"Test","url":"http://hl7.org/fhir/StructureDefinition/humanname-own-name"}]}},{"extension":[{"valueCode":"NL4","url":"http://hl7.org/fhir/StructureDefinition/humanname-assembly-order"}],"use":"usual","text":"Fred TEST","family":"TEST","given":["Fred"],"_family":{"extension":[{"valueString":"Test","url":"http://hl7.org/fhir/StructureDefinition/humanname-own-name"}]}}],"telecom":[{"system":"phone","value":"07594 832748","use":"mobile"},{"system":"email","value":"fred@email.com"}],"gender":"male","birthDate":"1967-01-19","deceasedBoolean":false,"address":[{"use":"old","line":["123 Anywhere"],"city":"Bristol","postalCode":"BS1 6JY","country":"ENG"},{"use":"home","line":["123 Anywhere"],"city":"Bristol","postalCode":"BS1 6JY","country":"ENG","period":{"start":"2022-01-19"}}],"maritalStatus":{"text":"Single"},"communication":[{"language":{"coding":[{"system":"urn:ietf:bcp:47","code":"en","display":"English"}],"text":"English"},"preferred":true}]} \ No newline at end of file +{"resourceType":"Patient","id":"ewnMwMK-UNvVvM.bakFSlkw3","identifier":[{"use":"usual","type":{"text":"EPIC"},"system":"urn:oid:1.2.840.114350.1.13.520.3.7.5.737384.0","value":"E9254"},{"use":"usual","type":{"text":"EXTERNAL"},"system":"urn:oid:1.2.840.114350.1.13.520.3.7.2.698084","value":"Z11363"},{"use":"usual","type":{"text":"FHIR"},"system":"http://open.epic.com/FHIR/StructureDefinition/patient-dstu2-fhir-id","value":"TYO3ktvhYAUhbae7JuBwDdpyIbUZc8kZG.bMW2ZwVnwgB"},{"use":"usual","type":{"text":"FHIR STU3"},"system":"http://open.epic.com/FHIR/StructureDefinition/patient-fhir-id","value":"ewnMwMK-UNvVvM.bakFSlkw3"},{"use":"usual","type":{"text":"PAS"},"system":"urn:oid:2.16.840.1.113883.2.1.3.12.1.1","value":"2005294"},{"use":"usual","type":{"text":"INTERNAL"},"system":"urn:oid:1.2.840.114350.1.13.520.3.7.2.698084","value":" Z11363"}],"active":true,"name":[{"extension":[{"valueCode":"NL4","url":"http://hl7.org/fhir/StructureDefinition/humanname-assembly-order"}],"use":"official","text":"Test ADVANCEPREPTWO","family":"ADVANCEPREPTWO","given":["Test"],"_family":{"extension":[{"valueString":"Advancepreptwo","url":"http://hl7.org/fhir/StructureDefinition/humanname-own-name"}]}},{"extension":[{"valueCode":"NL4","url":"http://hl7.org/fhir/StructureDefinition/humanname-assembly-order"}],"use":"usual","text":"Test ADVANCEPREPTWO","family":"ADVANCEPREPTWO","given":["Test"],"_family":{"extension":[{"valueString":"Advancepreptwo","url":"http://hl7.org/fhir/StructureDefinition/humanname-own-name"}]}}],"gender":"female","birthDate":"2006-10-07","deceasedBoolean":false,"maritalStatus":{"text":"Single"}} +{"resourceType":"Patient","id":"exU8JSL0p8npSw5g1QYAyOw3","identifier":[{"use":"usual","type":{"text":"EPIC"},"system":"urn:oid:1.2.840.114350.1.13.520.3.7.5.737384.0","value":"E9340"},{"use":"usual","type":{"text":"EXTERNAL"},"system":"urn:oid:1.2.840.114350.1.13.520.3.7.2.698084","value":"Z11449"},{"use":"usual","type":{"text":"FHIR"},"system":"http://open.epic.com/FHIR/StructureDefinition/patient-dstu2-fhir-id","value":"TwzgMHMPt5OhnHkFA2H2DudL3FU8qcRZkZyn0F3extkAB"},{"use":"usual","type":{"text":"FHIR STU3"},"system":"http://open.epic.com/FHIR/StructureDefinition/patient-fhir-id","value":"exU8JSL0p8npSw5g1QYAyOw3"},{"use":"usual","type":{"text":"PAS"},"system":"urn:oid:2.16.840.1.113883.2.1.3.12.1.1","value":"2005380"},{"use":"usual","type":{"text":"INTERNAL"},"system":"urn:oid:1.2.840.114350.1.13.520.3.7.2.698084","value":" Z11449"}],"active":true,"name":[{"extension":[{"valueCode":"NL4","url":"http://hl7.org/fhir/StructureDefinition/humanname-assembly-order"}],"use":"official","text":"BcnScTenAugTwentyOne UPGRADETEST","family":"UPGRADETEST","given":["BcnScTenAugTwentyOne"],"_family":{"extension":[{"valueString":"UpgradeTest","url":"http://hl7.org/fhir/StructureDefinition/humanname-own-name"}]}},{"extension":[{"valueCode":"NL4","url":"http://hl7.org/fhir/StructureDefinition/humanname-assembly-order"}],"use":"usual","text":"BcnScTenAugTwentyOne UPGRADETEST","family":"UPGRADETEST","given":["BcnScTenAugTwentyOne"],"_family":{"extension":[{"valueString":"UpgradeTest","url":"http://hl7.org/fhir/StructureDefinition/humanname-own-name"}]}}],"gender":"female","birthDate":"2019-09-21","deceasedBoolean":false,"maritalStatus":{"text":"Single"}} +{"resourceType":"Patient","id":"ezER-U3fAMP-WvI-Fc8V9wQ3","identifier":[{"use":"usual","type":{"text":"EPIC"},"system":"urn:oid:1.2.840.114350.1.13.520.3.7.5.737384.0","value":"E9411"},{"use":"usual","type":{"text":"EXTERNAL"},"system":"urn:oid:1.2.840.114350.1.13.520.3.7.2.698084","value":"Z11522"},{"use":"usual","type":{"text":"FHIR"},"system":"http://open.epic.com/FHIR/StructureDefinition/patient-dstu2-fhir-id","value":"T2oDrcOUw0w.1ZUdthu24c7V95lTOTEOUPRqBqGn99KEB"},{"use":"usual","type":{"text":"FHIR STU3"},"system":"http://open.epic.com/FHIR/StructureDefinition/patient-fhir-id","value":"ezER-U3fAMP-WvI-Fc8V9wQ3"},{"use":"usual","type":{"text":"PAS"},"system":"urn:oid:2.16.840.1.113883.2.1.3.12.1.1","value":"2005452"},{"use":"usual","type":{"text":"INTERNAL"},"system":"urn:oid:1.2.840.114350.1.13.520.3.7.2.698084","value":" Z11522"}],"active":true,"name":[{"extension":[{"valueCode":"NL4","url":"http://hl7.org/fhir/StructureDefinition/humanname-assembly-order"}],"use":"official","text":"Fred TEST","family":"TEST","given":["Fred"],"_family":{"extension":[{"valueString":"Test","url":"http://hl7.org/fhir/StructureDefinition/humanname-own-name"}]}},{"extension":[{"valueCode":"NL4","url":"http://hl7.org/fhir/StructureDefinition/humanname-assembly-order"}],"use":"usual","text":"Fred TEST","family":"TEST","given":["Fred"],"_family":{"extension":[{"valueString":"Test","url":"http://hl7.org/fhir/StructureDefinition/humanname-own-name"}]}}],"telecom":[{"system":"phone","value":"07594 832748","use":"mobile"},{"system":"email","value":"fred@email.com"}],"gender":"male","birthDate":"1967-01-19","deceasedBoolean":false,"address":[{"use":"old","line":["123 Anywhere"],"city":"Bristol","postalCode":"BS1 6JY","country":"ENG"},{"use":"home","line":["123 Anywhere"],"city":"Bristol","postalCode":"BS1 6JY","country":"ENG","period":{"start":"2022-01-19"}}],"maritalStatus":{"text":"Single"},"communication":[{"language":{"coding":[{"system":"urn:ietf:bcp:47","code":"en","display":"English"}],"text":"English"},"preferred":true}]} \ No newline at end of file diff --git a/tests/test_patient_resource.py b/tests/test_patient_resource.py index 561cb5e..b704168 100644 --- a/tests/test_patient_resource.py +++ b/tests/test_patient_resource.py @@ -1,6 +1,7 @@ import pandas as pd from pandas.testing import assert_frame_equal import os +import datetime from fhirflat.resources.patient import Patient PATIENT_DICT_INPUT = { @@ -53,3 +54,33 @@ def test_bulk_fhir_import_patient(): patients = Patient.fhir_bulk_import("tests/data/patient.ndjson") assert len(patients) == 3 + + +patient_ndjson_out = { + # "index": [0, 0, 0], + "resourceType": ["Patient", "Patient", "Patient"], + "id": [ + "ewnMwMK-UNvVvM.bakFSlkw3", + "exU8JSL0p8npSw5g1QYAyOw3", + "ezER-U3fAMP-WvI-Fc8V9wQ3", + ], + "gender": ["female", "female", "male"], + "birthDate": [ + datetime.date(2006, 10, 7), + datetime.date(2019, 9, 21), + datetime.date(1967, 1, 19), + ], + "deceasedBoolean": [False, False, False], + "maritalStatus.text": ["Single", "Single", "Single"], +} + + +def test_bulk_fhir_to_flat_patient(): + Patient.fhir_file_to_flat( + "tests/data/patient.ndjson", "multi_patient_output.parquet" + ) + + df = pd.read_parquet("multi_patient_output.parquet") + df.reset_index(inplace=True, drop=True) + assert_frame_equal(pd.DataFrame(patient_ndjson_out), df) + os.remove("multi_patient_output.parquet")