-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: initial completeness-tracking work
This is early support for completeness-tracking (the ability to mark which groups & resources have been loaded by the ETL and are thus ready for studies to use). - Adds some new (secret for now) CLI arguments: --export-group --export-timestamp --write-completion - Adds a new `etl__completion` table which holds: - table - group - export_time - Adds a new `etl__completion_encounters` table which holds: - group - encounter_id - export_time - This table is automatically written to, using the CLI values - Currently, those arguments are optional. A future change will make them required. (though hopefully usually automatically inferred from export logs) - The export args will be automatically provided internally, if we are handling the bulk export ourselves (i.e. Loaders can provide group name and export timestamp). - When using the ndjson output format, you can no longer have any files in the output folder. This is to safeguard against accidents (and to make some code paths simpler)
- Loading branch information
Showing
61 changed files
with
909 additions
and
200 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
""" | ||
Helpers for implementing completion-tracking. | ||
Completion tracking allows downstream consumers to know when ETL runs are | ||
"complete enough" for their purposes. | ||
For example, the `core` study may want to not expose Encounters whose | ||
Conditions have not yet been loaded. These metadata tables allow that. | ||
Although these metadata tables aren't themselves tasks, they need a | ||
lot of the same information that tasks need. This module provides that. | ||
""" | ||
|
||
from .schema import ( | ||
COMPLETION_TABLE, | ||
COMPLETION_ENCOUNTERS_TABLE, | ||
completion_encounters_output_args, | ||
completion_encounters_schema, | ||
completion_format_args, | ||
completion_schema, | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
"""Schemas and Format helpers for writing completion tables.""" | ||
|
||
import pyarrow | ||
|
||
|
||
COMPLETION_TABLE = "etl__completion" | ||
COMPLETION_ENCOUNTERS_TABLE = "etl__completion_encounters" | ||
|
||
|
||
# FORMATTERS | ||
|
||
|
||
def completion_format_args() -> dict: | ||
"""Returns kwargs to pass to the Format class initializer of your choice""" | ||
return { | ||
"dbname": COMPLETION_TABLE, | ||
"uniqueness_fields": {"table_name", "group_name"}, | ||
} | ||
|
||
|
||
# OUTPUT TABLES | ||
|
||
|
||
def completion_encounters_output_args() -> dict: | ||
"""Returns output table kwargs for the etl__completion_encounters table""" | ||
return { | ||
"name": COMPLETION_ENCOUNTERS_TABLE, | ||
"uniqueness_fields": {"encounter_id", "group_name"}, | ||
"update_existing": False, # we want to keep the first export time we make for a group | ||
"resource_type": None, | ||
"visible": False, | ||
} | ||
|
||
|
||
# SCHEMAS | ||
|
||
|
||
def completion_schema() -> pyarrow.Schema: | ||
"""Returns a schema for the etl__completion table""" | ||
return pyarrow.schema( | ||
[ | ||
pyarrow.field("table_name", pyarrow.string()), | ||
pyarrow.field("group_name", pyarrow.string()), | ||
# You might think this is an opportunity to use pyarrow.timestamp(), | ||
# but because ndjson output formats (which can't natively represent a | ||
# datetime) would then require conversion to and fro, it's easier to | ||
# just mirror our FHIR tables and use strings for timestamps. | ||
pyarrow.field("export_time", pyarrow.string()), | ||
] | ||
) | ||
|
||
|
||
def completion_encounters_schema() -> pyarrow.Schema: | ||
"""Returns a schema for the etl__completion_encounters table""" | ||
return pyarrow.schema( | ||
[ | ||
pyarrow.field("encounter_id", pyarrow.string()), | ||
pyarrow.field("group_name", pyarrow.string()), | ||
# See note above for why this isn't a pyarrow.timestamp() field. | ||
pyarrow.field("export_time", pyarrow.string()), | ||
] | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.