diff --git a/README.md b/README.md index 47cc36c6..4ca75b6a 100644 --- a/README.md +++ b/README.md @@ -20,6 +20,7 @@ A Node.js framework for extracting mCODE FHIR resources. All resources are profi - [Extraction Date Range](#extraction-date-range) - [CLI From-Date and To-Date (NOT recommended use)](#cli-from-date-and-to-date-not-recommended-use) - [Troubleshooting](#troubleshooting) + - [NULL/NIL values found and replaced with empty-strings](#nullnil-values-found-and-replaced-with-empty-strings) - [Byte Order Markers in CSV Files](#byte-order-markers-in-csv-files) - [Terminology and Architecture](#terminology-and-architecture) - [Glossary](#glossary) @@ -165,6 +166,10 @@ npm start -- --entries-filter --from-date --to-date -- ### Troubleshooting +#### NULL/NIL values found and replaced with empty-strings + +When CSV files are provided containing NULL/NIL values, those values are treated as empty values and are translated into ''. Each Extractor, however, defines a set of `unalterableColumns` which will be immune from this NULL/NIL correction. All values that are corrected will produce a `debug`-level message, and can be seen by running the extractor with the debug flag set. + #### Byte Order Markers in CSV Files The extraction client has built-in handling of byte order markers for CSV files in UTF-8 and UTF-16LE encodings. When using CSV files in other encodings, if you experience unexpected errors be sure to check for a byte order marker at the beginning of the file. One way to check is to run the following command from the command line: diff --git a/package-lock.json b/package-lock.json index aec9ad45..0d28857c 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,6 +1,6 @@ { "name": "mcode-extraction-framework", - "version": "1.0.0", + "version": "1.0.1", "lockfileVersion": 1, "requires": true, "dependencies": { @@ -4416,9 +4416,9 @@ } }, "hosted-git-info": { - "version": "2.8.8", - "resolved": "https://registry.npmjs.org/hosted-git-info/-/hosted-git-info-2.8.8.tgz", - "integrity": "sha512-f/wzC2QaWBs7t9IYqB4T3sR1xviIViXJRJTWBlx2Gf3g0Xi5vI7Yy4koXQ1c9OYDGHN9sBy1DQ2AB8fqZBWhUg==", + "version": "2.8.9", + "resolved": "https://registry.npmjs.org/hosted-git-info/-/hosted-git-info-2.8.9.tgz", + "integrity": "sha512-mxIDAb9Lsm6DoOJ7xH+5+X4y1LU/4Hi50L9C5sIswK3JzULS4bwk1FvjdBgvYR4bzT4tuUQiC15FE2f5HbLvYw==", "dev": true }, "html-encoding-sniffer": { @@ -8417,9 +8417,9 @@ } }, "lodash": { - "version": "4.17.19", - "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.19.tgz", - "integrity": "sha512-JNvd8XER9GQX0v2qJgsaN/mzFCNA5BRe/j8JN9d+tWyGLSodKQHKFicdwNYzWwI3wjRnaKPsGj1XkBjx/F96DQ==" + "version": "4.17.21", + "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz", + "integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==" }, "lodash.get": { "version": "4.4.2", @@ -8756,9 +8756,9 @@ "dev": true }, "nodemailer": { - "version": "6.4.14", - "resolved": "https://registry.npmjs.org/nodemailer/-/nodemailer-6.4.14.tgz", - "integrity": "sha512-0AQHOOT+nRAOK6QnksNaK7+5vjviVvEBzmZytKU7XSA+Vze2NLykTx/05ti1uJgXFTWrMq08u3j3x4r4OE6PAA==" + "version": "6.4.16", + "resolved": "https://registry.npmjs.org/nodemailer/-/nodemailer-6.4.16.tgz", + "integrity": "sha512-68K0LgZ6hmZ7PVmwL78gzNdjpj5viqBdFqKrTtr9bZbJYj6BRj5W6WGkxXrEnUl3Co3CBXi3CZBUlpV/foGnOQ==" }, "normalize-package-data": { "version": "2.5.0", diff --git a/package.json b/package.json index 9373ecd1..84961513 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "mcode-extraction-framework", - "version": "1.0.0", + "version": "1.0.1", "description": "", "contributors": [ "Julia Afeltra ", @@ -8,6 +8,7 @@ "Matthew Gramigna ", "Daniel Lee ", "Dylan Mahalingam ", + "Dylan Mendelowitz ", "Dylan Phelan " ], "main": "src/", @@ -26,9 +27,9 @@ "csv-parse": "^4.8.8", "fhir-crud-client": "^1.2.2", "fhirpath": "2.1.5", - "lodash": "^4.17.19", + "lodash": "^4.17.21", "moment": "^2.26.0", - "nodemailer": "^6.4.14", + "nodemailer": "^6.4.16", "sha.js": "^2.4.9", "winston": "^3.2.1" }, diff --git a/src/cli/app.js b/src/cli/app.js index 6dc82903..057cb7a5 100644 --- a/src/cli/app.js +++ b/src/cli/app.js @@ -84,7 +84,7 @@ async function mcodeApp(Client, fromDate, toDate, pathToConfig, pathToRunLogs, d // Parse CSV for list of patient mrns const patientIdsCsvPath = path.resolve(config.patientIdCsvPath); - const patientIds = parse(fs.readFileSync(patientIdsCsvPath, 'utf8'), { columns: true }).map((row) => row.mrn); + const patientIds = parse(fs.readFileSync(patientIdsCsvPath, 'utf8'), { columns: true, bom: true }).map((row) => row.mrn); // Get RunInstanceLogger for recording new runs and inferring dates from previous runs const runLogger = allEntries ? null : new RunInstanceLogger(pathToRunLogs); diff --git a/src/extractors/BaseCSVExtractor.js b/src/extractors/BaseCSVExtractor.js index ec96cd44..c78abdfa 100644 --- a/src/extractors/BaseCSVExtractor.js +++ b/src/extractors/BaseCSVExtractor.js @@ -5,11 +5,12 @@ const { validateCSV } = require('../helpers/csvValidator'); const logger = require('../helpers/logger'); class BaseCSVExtractor extends Extractor { - constructor({ filePath, csvSchema }) { + constructor({ filePath, csvSchema, unalterableColumns }) { super(); + this.unalterableColumns = unalterableColumns || []; this.csvSchema = csvSchema; this.filePath = path.resolve(filePath); - this.csvModule = new CSVModule(this.filePath); + this.csvModule = new CSVModule(this.filePath, this.unalterableColumns); } validate() { diff --git a/src/extractors/CSVPatientExtractor.js b/src/extractors/CSVPatientExtractor.js index 342a2348..7cfb5577 100644 --- a/src/extractors/CSVPatientExtractor.js +++ b/src/extractors/CSVPatientExtractor.js @@ -55,7 +55,9 @@ function joinAndReformatData(patientData) { class CSVPatientExtractor extends BaseCSVExtractor { constructor({ filePath, mask = [] }) { - super({ filePath, csvSchema: CSVPatientSchema }); + // Define CSV Columns whose values should never be altered + const unalterableColumns = ['familyName', 'givenName']; + super({ filePath, csvSchema: CSVPatientSchema, unalterableColumns }); this.mask = mask; } diff --git a/src/modules/CSVModule.js b/src/modules/CSVModule.js index 5fc27f0a..286a8f67 100644 --- a/src/modules/CSVModule.js +++ b/src/modules/CSVModule.js @@ -3,16 +3,54 @@ const moment = require('moment'); const parse = require('csv-parse/lib/sync'); const logger = require('../helpers/logger'); +// The standard string normalizer function +function stringNormalizer(str) { + return str.toLowerCase(); +} + +// For translating null/nil-like values into empty strings +function normalizeEmptyValues(data, unalterableColumns = []) { + const EMPTY_VALUES = ['null', 'nil'].map(stringNormalizer); + const normalizedUnalterableColumns = unalterableColumns.map(stringNormalizer); + // Flag tracking if empty values were normalized or not. + let wasEmptyNormalized = false; + const newData = data.map((row, i) => { + const newRow = { ...row }; + // Filter out unalterable columns + const columnsToNormalize = Object.keys(row).filter((col) => !normalizedUnalterableColumns.includes(stringNormalizer(col))); + columnsToNormalize.forEach((col) => { + const value = newRow[col]; + // If the value for this row-col combo is a value that should be empty, replace it + if (EMPTY_VALUES.includes(stringNormalizer(value))) { + logger.debug(`NULL/NIL values '${value}' found in row-${i}, col-${col}`); + wasEmptyNormalized = true; + newRow[col] = ''; + } + }); + return newRow; + }); + + if (wasEmptyNormalized) { + logger.warn('NULL/NIL values found and replaced with empty-strings'); + } + return newData; +} + class CSVModule { - constructor(csvFilePath) { - this.data = parse(fs.readFileSync(csvFilePath), { columns: (header) => header.map((column) => column.toLowerCase()), bom: true }); + constructor(csvFilePath, unalterableColumns) { + // Parse then normalize the data + const parsedData = parse(fs.readFileSync(csvFilePath), { + columns: (header) => header.map((column) => stringNormalizer(column)), + bom: true, + }); + this.data = normalizeEmptyValues(parsedData, unalterableColumns); } async get(key, value, fromDate, toDate) { logger.debug(`Get csvModule info by key '${key}'`); // return all rows if key and value aren't provided if (!key && !value) return this.data; - let result = this.data.filter((d) => d[key.toLowerCase()] === value); + let result = this.data.filter((d) => d[stringNormalizer(key)] === value); if (result.length === 0) { logger.warn(`CSV Record with provided key '${key}' and value was not found`); return result; diff --git a/test/modules/CSVModule.test.js b/test/modules/CSVModule.test.js index 8fd7e7aa..3ce0f814 100644 --- a/test/modules/CSVModule.test.js +++ b/test/modules/CSVModule.test.js @@ -1,48 +1,91 @@ const path = require('path'); +const rewire = require('rewire'); const { CSVModule } = require('../../src/modules'); const exampleResponse = require('./fixtures/csv-response.json'); +const CSVModuleRewired = rewire('../../src/modules/CSVModule.js'); +const normalizeEmptyValues = CSVModuleRewired.__get__('normalizeEmptyValues'); + const INVALID_MRN = 'INVALID MRN'; const csvModule = new CSVModule(path.join(__dirname, './fixtures/example-csv.csv')); const csvModuleWithBOMs = new CSVModule(path.join(__dirname, './fixtures/example-csv-bom.csv')); -test('Reads data from CSV', async () => { - const data = await csvModule.get('mrn', 'example-mrn-1'); - expect(data).toEqual(exampleResponse); -}); -test('Reads data from CSV with a Byte Order Mark', async () => { - const data = await csvModuleWithBOMs.get('mrn', 'example-mrn-1'); - expect(data).toEqual(exampleResponse); -}); +describe('CSVModule', () => { + describe('get', () => { + test('Reads data from CSV', async () => { + const data = await csvModule.get('mrn', 'example-mrn-1'); + expect(data).toEqual(exampleResponse); + }); -test('Returns multiple rows', async () => { - const data = await csvModule.get('mrn', 'example-mrn-2'); - expect(data).toHaveLength(2); -}); + test('Reads data from CSV with a Byte Order Mark', async () => { + const data = await csvModuleWithBOMs.get('mrn', 'example-mrn-1'); + expect(data).toEqual(exampleResponse); + }); -test('Returns all rows when both key and value are undefined', async () => { - const data = await csvModule.get(); - expect(data).toHaveLength(csvModule.data.length); - expect(data).toEqual(csvModule.data); -}); + test('Returns multiple rows', async () => { + const data = await csvModule.get('mrn', 'example-mrn-2'); + expect(data).toHaveLength(2); + }); -test('Returns data with recordedDate after specified from date', async () => { - const data = await csvModule.get('mrn', 'example-mrn-2', '2020-05-01'); - expect(data).toHaveLength(1); -}); + test('Returns all rows when both key and value are undefined', async () => { + const data = await csvModule.get(); + expect(data).toHaveLength(csvModule.data.length); + expect(data).toEqual(csvModule.data); + }); -test('Returns data with recordedDate before specified to date', async () => { - const data = await csvModule.get('mrn', 'example-mrn-2', null, '2020-05-01'); - expect(data).toHaveLength(1); -}); + test('Returns data with recordedDate after specified from date', async () => { + const data = await csvModule.get('mrn', 'example-mrn-2', '2020-05-01'); + expect(data).toHaveLength(1); + }); -test('Should return an empty array when key-value pair does not exist', async () => { - const data = await csvModule.get('mrn', INVALID_MRN); - expect(data).toEqual([]); -}); + test('Returns data with recordedDate before specified to date', async () => { + const data = await csvModule.get('mrn', 'example-mrn-2', null, '2020-05-01'); + expect(data).toHaveLength(1); + }); + + test('Should return an empty array when key-value pair does not exist', async () => { + const data = await csvModule.get('mrn', INVALID_MRN); + expect(data).toEqual([]); + }); + + test('Should return proper value regardless of key casing', async () => { + const data = await csvModule.get('mRN', 'example-mrn-1'); + expect(data).toEqual(exampleResponse); + }); + }); + + describe('normalizeEmptyValues', () => { + it('Should turn "null" values into empty strings, regardless of case', () => { + const data = [{ key: 'null' }, { key: 'NULL' }, { key: 'nuLL' }]; + const normalizedData = normalizeEmptyValues(data); + normalizedData.forEach((d) => { + expect(d.key).toBe(''); + }); + }); + + it('Should turn "nil" values into empty strings, regardless of case', () => { + const data = [{ key: 'nil' }, { key: 'NIL' }, { key: 'NIl' }]; + const normalizedData = normalizeEmptyValues(data); + normalizedData.forEach((d) => { + expect(d.key).toBe(''); + }); + }); + + it('Should not modify unalterableColumns, regardless of their value', () => { + const data = [{ key: 'null' }, { key: 'NULL' }, { key: 'nuLL' }, { key: 'nil' }, { key: 'NIL' }, { key: 'NIl' }]; + const normalizedData = normalizeEmptyValues(data, ['key']); + normalizedData.forEach((d) => { + expect(d.key).not.toBe(''); + }); + }); -test('Should return proper value regardless of key casing', async () => { - const data = await csvModule.get('mRN', 'example-mrn-1'); - expect(data).toEqual(exampleResponse); + it('Should leave all other values uneffected, regardless of case', () => { + const data = [{ key: 'anything' }, { key: 'any' }, { key: 'thing' }]; + const normalizedData = normalizeEmptyValues(data); + normalizedData.forEach((d) => { + expect(d.key).not.toBe(''); + }); + }); + }); });