diff --git a/.gitignore b/.gitignore index b9990c4..4e1d468 100644 --- a/.gitignore +++ b/.gitignore @@ -106,6 +106,5 @@ dist downloads/* config/* -!config/example-config.ts .DS_Store volume/ \ No newline at end of file diff --git a/config/example-config.ts b/config/example-config.ts deleted file mode 100644 index 02071f5..0000000 --- a/config/example-config.ts +++ /dev/null @@ -1,145 +0,0 @@ -import { Config } from "../src/types" - -/** - * FHIR Date parameter to be used in the query strings below. - */ -const SINCE = "2018-01-01T05:00:00.000Z" - -/** - * Client settings for Bulk Data export - */ -const bulkClient = { - baseUrl : "BULK DATA SERVER BASE URL", - clientId : "BULK DATA CLIENT ID", - tokenEndpoint : "BULK DATA AUTH SERVER TOKEN URL", // Can be found in the CapabilityStatement - privateJWKorSecret: { /* PRIVATE KEY AS JWK */ } // OR client secret string for basic auth -} - -/** - * Client settings for FHIR API calls. - * NOTE: In Cerner this must be a separate client. In Epic it can be the same - * (although it can also be separate). To reuse the same client you can repeat - * the same settings or just do: - * ``` - * const fhirClient = bulkClient; - * ``` - */ -const fhirClient = { - baseUrl : "FHIR SERVER BASE URL", - clientId : "FHIR CLIENT ID", - tokenEndpoint : "AUTH SERVER TOKEN URL", // Can be found in the CapabilityStatement - privateJWKorSecret: { /* PRIVATE KEY AS JWK */ } // OR client secret string for basic auth -} - -const config: Config = { - - /** - * BulkData Group ID - */ - groupId: "BULK GROUP ID", - - /** - * Path to destination folder for ndjson downloads and logs. Can be absolute - * or relative to CWD - */ - destination : "PATH TO DOWNLOADS FOLDER", - - /** - * Delay in milliseconds between HTTP requests (in case you need to reduce - * the load on the server) - */ - throttle: 0, - - /** - * While we are waiting for the bulk export the server might send back a - * "retry-after" header. If so, we will try to respect that within a - * reasonable boundaries. Otherwise, the `poolInterval` option will be used - * to suggest after what delay to check again. - * NOTE: The value is in milliseconds. - */ - poolInterval: 5 * 60 * 1000, // 5 min - - /** - * Don't allow the bulk status pool interval to be smaller than this. This - * can be useful when you want to "correct" the retry-after delay - * recommended by the server. - * NOTE: The value is in milliseconds and must be <= `poolInterval`. - */ - minPoolInterval: 100, // 100 ms - - /** - * Don't allow the bulk status pool interval to be bigger than this. This - * can be useful when you want to "correct" the retry-after delay - * recommended by the server. - * NOTE: The value is in milliseconds and must be >= `poolInterval`. - */ - maxPoolInterval: 1000 * 60 * 60, // 1 hour - - /** - * Downloaded files are named as `..ndjson` where - * start from `1`. While the file size is less then this, new lines - * will be appended to it. Once that size is reached another fille will be - * created with incremented and the lines will be appending to it. - */ - maxFileSize : 1e9, // ~ 1 GB - - /** - * Retried failed requests if they returned one of these status codes. - * NOTE: Only failed requests are retried. - */ - retryStatusCodes: [408, 413, 429, 500, 502, 503, 504, 521, 522, 524], - - /** - * Wait this many milliseconds before retrying a failed request - */ - retryDelay: 1000, - - /** - * How many times to retry failed requests. Set to 0 to disable retrying. - */ - retryLimit: 5, - - /** - * Client settings for Bulk Data export - */ - bulkClient, - - /** - * Client settings for FHIR API calls - */ - fhirClient, - - /** - * Request timeout in milliseconds - */ - requestTimeout: 60000, - - /** - * - `1` (or less) means serial downloads - * - `>1` means that there is one download process for each resourceType - * other than Patient, but not more than this number. - * For example (if this is set to 10): - * 1. If you are downloading 5 resource types, setting this to 10 is the - * same as setting it to 5. - * 2. If you are downloading 50 resource types the first 10 will be started - * immediately and work in parallel and the rest will start whenever a - * worker becomes available. - */ - parallel: 10, - - /** - * Map of resource types we want to download and their corresponding query - * string. This does not include `Patient` resources which are downloaded - * via bulk data export. - * NOTE: #{patientId} will be replaced with the current patient ID - */ - resources: { - Encounter : `?patient=#{patientId}&date=gt${SINCE}`, - Condition : `?patient=#{patientId}`, - DocumentReference: `?patient=#{patientId}&date=gt${SINCE}&category=clinical-note`, - MedicationRequest: `?patient=#{patientId}`, - Observation : `?patient=#{patientId}&date=gt${SINCE}&category=laboratory,vital-signs,social-history`, - } -} - -export default config diff --git a/src/index.ts b/src/index.ts index 551be3b..1efcfda 100644 --- a/src/index.ts +++ b/src/index.ts @@ -13,6 +13,7 @@ import humanizeDuration from "humanize-duration" import { ndjsonEntries, print, + readLine, sweep } from "./utils" @@ -23,10 +24,11 @@ program.version(pkg.version) program.option("-p, --path [path]", "Path to directory containing the config file") program.option( "--patients [paths...]", - "Path to ndjson file with patients. If passed, the bulk data part of the export " + - "will be skipped and these patients will be used instead. Can be specified " + - "multiple times for multiple patient files. Paths should be relative to the " + - "input directory.", + "Path to an NDJSON file with Patients or a text file with Patient IDs. " + + "If passed, the bulk data part of the export will be skipped and " + + "these patients will be used instead. " + + "Can be specified multiple times for multiple patient files. " + + "Paths should be relative to the input directory.", [] ) @@ -79,6 +81,31 @@ async function loadConfig(configDir: string): Promise { return config } +/** + Parses either an ndjson/jsonl file or a basic text file, and returns the Patient IDs found. + * @param path The path to the file to read + */ +function *readPatientIdsFromFile(path: string): IterableIterator { + + if (path.endsWith(".ndjson") || path.endsWith(".jsonl")) { + for (const patient of ndjsonEntries(path)) { + if (!patient || typeof patient !== "object" || patient.resourceType !== "Patient") { + // istanbul ignore next + throw new Error(format(`A non-patient entry found in the Patient ndjson file: %o`, patient)) + } + yield patient.id; + } + } + else { + const lines = readLine(path); + for (const line of lines) { + const trimmed = line.trim(); + if (trimmed) + yield trimmed; + } + } +} + async function main(args: Record) { const inputDir = getInputDirectory(args.path) @@ -156,15 +183,11 @@ async function main(args: Record) { // next available url (if any) whenever we are ready to download it const downloadUrls = (function*() { for (const loc of files) { - for (const patient of ndjsonEntries(loc)) { - if (!patient || typeof patient !== "object" || patient.resourceType !== "Patient") { - // istanbul ignore next - throw new Error(format(`A non-patient entry found in the Patient ndjson file: %o`, patient)) - } + for (const patientId of readPatientIdsFromFile(loc)) { counts.Patient++ counts["Total FHIR Resources"]++ for (const resourceType of Object.keys(config.resources)) { - const query = config.resources[resourceType].replace("#{patientId}", patient.id) + const query = config.resources[resourceType].replace("#{patientId}", patientId) yield `${resourceType}${query}` } }