Merge pull request #122 from mcode/develop

Merge for v1.0.1
mcode · May 20, 2021 · 0aedf4b · 0aedf4b
2 parents 9258212 + 8f79a1b
commit 0aedf4b
Show file tree

Hide file tree

Showing 8 changed files with 142 additions and 52 deletions.
diff --git a/README.md b/README.md
@@ -20,6 +20,7 @@ A Node.js framework for extracting mCODE FHIR resources. All resources are profi
     - [Extraction Date Range](#extraction-date-range)
       - [CLI From-Date and To-Date (NOT recommended use)](#cli-from-date-and-to-date-not-recommended-use)
     - [Troubleshooting](#troubleshooting)
+      - [NULL/NIL values found and replaced with empty-strings](#nullnil-values-found-and-replaced-with-empty-strings)
       - [Byte Order Markers in CSV Files](#byte-order-markers-in-csv-files)
   - [Terminology and Architecture](#terminology-and-architecture)
     - [Glossary](#glossary)
@@ -165,6 +166,10 @@ npm start -- --entries-filter --from-date <YYYY-MM-DD> --to-date <YYYY-MM-DD> --
 
 ### Troubleshooting
 
+#### NULL/NIL values found and replaced with empty-strings
+
+When CSV files are provided containing NULL/NIL values, those values are treated as empty values and are translated into ''. Each Extractor, however, defines a set of `unalterableColumns` which will be immune from this NULL/NIL correction. All values that are corrected will produce a `debug`-level message, and can be seen by running the extractor with the debug flag set.
+
 #### Byte Order Markers in CSV Files
 
 The extraction client has built-in handling of byte order markers for CSV files in UTF-8 and UTF-16LE encodings. When using CSV files in other encodings, if you experience unexpected errors be sure to check for a byte order marker at the beginning of the file. One way to check is to run the following command from the command line:

diff --git a/package-lock.json b/package-lock.json
diff --git a/package.json b/package.json
@@ -1,13 +1,14 @@
 {
   "name": "mcode-extraction-framework",
-  "version": "1.0.0",
+  "version": "1.0.1",
   "description": "",
   "contributors": [
     "Julia Afeltra <[email protected]>",
     "Julian Carter <[email protected]>",
     "Matthew Gramigna <[email protected]>",
     "Daniel Lee <[email protected]>",
     "Dylan Mahalingam <[email protected]>",
+    "Dylan Mendelowitz <[email protected]>",
     "Dylan Phelan <[email protected]>"
   ],
   "main": "src/",
@@ -26,9 +27,9 @@
     "csv-parse": "^4.8.8",
     "fhir-crud-client": "^1.2.2",
     "fhirpath": "2.1.5",
-    "lodash": "^4.17.19",
+    "lodash": "^4.17.21",
     "moment": "^2.26.0",
-    "nodemailer": "^6.4.14",
+    "nodemailer": "^6.4.16",
     "sha.js": "^2.4.9",
     "winston": "^3.2.1"
   },

diff --git a/src/cli/app.js b/src/cli/app.js
@@ -84,7 +84,7 @@ async function mcodeApp(Client, fromDate, toDate, pathToConfig, pathToRunLogs, d
 
     // Parse CSV for list of patient mrns
     const patientIdsCsvPath = path.resolve(config.patientIdCsvPath);
-    const patientIds = parse(fs.readFileSync(patientIdsCsvPath, 'utf8'), { columns: true }).map((row) => row.mrn);
+    const patientIds = parse(fs.readFileSync(patientIdsCsvPath, 'utf8'), { columns: true, bom: true }).map((row) => row.mrn);
 
     // Get RunInstanceLogger for recording new runs and inferring dates from previous runs
     const runLogger = allEntries ? null : new RunInstanceLogger(pathToRunLogs);

diff --git a/src/extractors/BaseCSVExtractor.js b/src/extractors/BaseCSVExtractor.js
@@ -5,11 +5,12 @@ const { validateCSV } = require('../helpers/csvValidator');
 const logger = require('../helpers/logger');
 
 class BaseCSVExtractor extends Extractor {
-  constructor({ filePath, csvSchema }) {
+  constructor({ filePath, csvSchema, unalterableColumns }) {
     super();
+    this.unalterableColumns = unalterableColumns || [];
     this.csvSchema = csvSchema;
     this.filePath = path.resolve(filePath);
-    this.csvModule = new CSVModule(this.filePath);
+    this.csvModule = new CSVModule(this.filePath, this.unalterableColumns);
   }
 
   validate() {

diff --git a/src/extractors/CSVPatientExtractor.js b/src/extractors/CSVPatientExtractor.js
@@ -55,7 +55,9 @@ function joinAndReformatData(patientData) {
 
 class CSVPatientExtractor extends BaseCSVExtractor {
   constructor({ filePath, mask = [] }) {
-    super({ filePath, csvSchema: CSVPatientSchema });
+    // Define CSV Columns whose values should never be altered
+    const unalterableColumns = ['familyName', 'givenName'];
+    super({ filePath, csvSchema: CSVPatientSchema, unalterableColumns });
     this.mask = mask;
   }
 

diff --git a/src/modules/CSVModule.js b/src/modules/CSVModule.js
@@ -3,16 +3,54 @@ const moment = require('moment');
 const parse = require('csv-parse/lib/sync');
 const logger = require('../helpers/logger');
 
+// The standard string normalizer function
+function stringNormalizer(str) {
+  return str.toLowerCase();
+}
+
+// For translating null/nil-like values into empty strings
+function normalizeEmptyValues(data, unalterableColumns = []) {
+  const EMPTY_VALUES = ['null', 'nil'].map(stringNormalizer);
+  const normalizedUnalterableColumns = unalterableColumns.map(stringNormalizer);
+  // Flag tracking if empty values were normalized or not.
+  let wasEmptyNormalized = false;
+  const newData = data.map((row, i) => {
+    const newRow = { ...row };
+    // Filter out unalterable columns
+    const columnsToNormalize = Object.keys(row).filter((col) => !normalizedUnalterableColumns.includes(stringNormalizer(col)));
+    columnsToNormalize.forEach((col) => {
+      const value = newRow[col];
+      // If the value for this row-col combo is a value that should be empty, replace it
+      if (EMPTY_VALUES.includes(stringNormalizer(value))) {
+        logger.debug(`NULL/NIL values '${value}' found in row-${i}, col-${col}`);
+        wasEmptyNormalized = true;
+        newRow[col] = '';
+      }
+    });
+    return newRow;
+  });
+
+  if (wasEmptyNormalized) {
+    logger.warn('NULL/NIL values found and replaced with empty-strings');
+  }
+  return newData;
+}
+
 class CSVModule {
-  constructor(csvFilePath) {
-    this.data = parse(fs.readFileSync(csvFilePath), { columns: (header) => header.map((column) => column.toLowerCase()), bom: true });
+  constructor(csvFilePath, unalterableColumns) {
+    // Parse then normalize the data
+    const parsedData = parse(fs.readFileSync(csvFilePath), {
+      columns: (header) => header.map((column) => stringNormalizer(column)),
+      bom: true,
+    });
+    this.data = normalizeEmptyValues(parsedData, unalterableColumns);
   }
 
   async get(key, value, fromDate, toDate) {
     logger.debug(`Get csvModule info by key '${key}'`);
     // return all rows if key and value aren't provided
     if (!key && !value) return this.data;
-    let result = this.data.filter((d) => d[key.toLowerCase()] === value);
+    let result = this.data.filter((d) => d[stringNormalizer(key)] === value);
     if (result.length === 0) {
       logger.warn(`CSV Record with provided key '${key}' and value was not found`);
       return result;

diff --git a/test/modules/CSVModule.test.js b/test/modules/CSVModule.test.js
@@ -1,48 +1,91 @@
 const path = require('path');
+const rewire = require('rewire');
 const { CSVModule } = require('../../src/modules');
 const exampleResponse = require('./fixtures/csv-response.json');
 
+const CSVModuleRewired = rewire('../../src/modules/CSVModule.js');
+const normalizeEmptyValues = CSVModuleRewired.__get__('normalizeEmptyValues');
+
 const INVALID_MRN = 'INVALID MRN';
 const csvModule = new CSVModule(path.join(__dirname, './fixtures/example-csv.csv'));
 const csvModuleWithBOMs = new CSVModule(path.join(__dirname, './fixtures/example-csv-bom.csv'));
 
-test('Reads data from CSV', async () => {
-  const data = await csvModule.get('mrn', 'example-mrn-1');
-  expect(data).toEqual(exampleResponse);
-});
 
-test('Reads data from CSV with a Byte Order Mark', async () => {
-  const data = await csvModuleWithBOMs.get('mrn', 'example-mrn-1');
-  expect(data).toEqual(exampleResponse);
-});
+describe('CSVModule', () => {
+  describe('get', () => {
+    test('Reads data from CSV', async () => {
+      const data = await csvModule.get('mrn', 'example-mrn-1');
+      expect(data).toEqual(exampleResponse);
+    });
 
-test('Returns multiple rows', async () => {
-  const data = await csvModule.get('mrn', 'example-mrn-2');
-  expect(data).toHaveLength(2);
-});
+    test('Reads data from CSV with a Byte Order Mark', async () => {
+      const data = await csvModuleWithBOMs.get('mrn', 'example-mrn-1');
+      expect(data).toEqual(exampleResponse);
+    });
 
-test('Returns all rows when both key and value are undefined', async () => {
-  const data = await csvModule.get();
-  expect(data).toHaveLength(csvModule.data.length);
-  expect(data).toEqual(csvModule.data);
-});
+    test('Returns multiple rows', async () => {
+      const data = await csvModule.get('mrn', 'example-mrn-2');
+      expect(data).toHaveLength(2);
+    });
 
-test('Returns data with recordedDate after specified from date', async () => {
-  const data = await csvModule.get('mrn', 'example-mrn-2', '2020-05-01');
-  expect(data).toHaveLength(1);
-});
+    test('Returns all rows when both key and value are undefined', async () => {
+      const data = await csvModule.get();
+      expect(data).toHaveLength(csvModule.data.length);
+      expect(data).toEqual(csvModule.data);
+    });
 
-test('Returns data with recordedDate before specified to date', async () => {
-  const data = await csvModule.get('mrn', 'example-mrn-2', null, '2020-05-01');
-  expect(data).toHaveLength(1);
-});
+    test('Returns data with recordedDate after specified from date', async () => {
+      const data = await csvModule.get('mrn', 'example-mrn-2', '2020-05-01');
+      expect(data).toHaveLength(1);
+    });
 
-test('Should return an empty array when key-value pair does not exist', async () => {
-  const data = await csvModule.get('mrn', INVALID_MRN);
-  expect(data).toEqual([]);
-});
+    test('Returns data with recordedDate before specified to date', async () => {
+      const data = await csvModule.get('mrn', 'example-mrn-2', null, '2020-05-01');
+      expect(data).toHaveLength(1);
+    });
+
+    test('Should return an empty array when key-value pair does not exist', async () => {
+      const data = await csvModule.get('mrn', INVALID_MRN);
+      expect(data).toEqual([]);
+    });
+
+    test('Should return proper value regardless of key casing', async () => {
+      const data = await csvModule.get('mRN', 'example-mrn-1');
+      expect(data).toEqual(exampleResponse);
+    });
+  });
+
+  describe('normalizeEmptyValues', () => {
+    it('Should turn "null" values into empty strings, regardless of case', () => {
+      const data = [{ key: 'null' }, { key: 'NULL' }, { key: 'nuLL' }];
+      const normalizedData = normalizeEmptyValues(data);
+      normalizedData.forEach((d) => {
+        expect(d.key).toBe('');
+      });
+    });
+
+    it('Should turn "nil" values into empty strings, regardless of case', () => {
+      const data = [{ key: 'nil' }, { key: 'NIL' }, { key: 'NIl' }];
+      const normalizedData = normalizeEmptyValues(data);
+      normalizedData.forEach((d) => {
+        expect(d.key).toBe('');
+      });
+    });
+
+    it('Should not modify unalterableColumns, regardless of their value', () => {
+      const data = [{ key: 'null' }, { key: 'NULL' }, { key: 'nuLL' }, { key: 'nil' }, { key: 'NIL' }, { key: 'NIl' }];
+      const normalizedData = normalizeEmptyValues(data, ['key']);
+      normalizedData.forEach((d) => {
+        expect(d.key).not.toBe('');
+      });
+    });
 
-test('Should return proper value regardless of key casing', async () => {
-  const data = await csvModule.get('mRN', 'example-mrn-1');
-  expect(data).toEqual(exampleResponse);
+    it('Should leave all other values uneffected, regardless of case', () => {
+      const data = [{ key: 'anything' }, { key: 'any' }, { key: 'thing' }];
+      const normalizedData = normalizeEmptyValues(data);
+      normalizedData.forEach((d) => {
+        expect(d.key).not.toBe('');
+      });
+    });
+  });
 });