Skip to content

Commit

Permalink
Merge pull request #122 from mcode/develop
Browse files Browse the repository at this point in the history
Merge for v1.0.1
  • Loading branch information
Dtphelan1 authored May 20, 2021
2 parents 9258212 + 8f79a1b commit 0aedf4b
Show file tree
Hide file tree
Showing 8 changed files with 142 additions and 52 deletions.
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ A Node.js framework for extracting mCODE FHIR resources. All resources are profi
- [Extraction Date Range](#extraction-date-range)
- [CLI From-Date and To-Date (NOT recommended use)](#cli-from-date-and-to-date-not-recommended-use)
- [Troubleshooting](#troubleshooting)
- [NULL/NIL values found and replaced with empty-strings](#nullnil-values-found-and-replaced-with-empty-strings)
- [Byte Order Markers in CSV Files](#byte-order-markers-in-csv-files)
- [Terminology and Architecture](#terminology-and-architecture)
- [Glossary](#glossary)
Expand Down Expand Up @@ -165,6 +166,10 @@ npm start -- --entries-filter --from-date <YYYY-MM-DD> --to-date <YYYY-MM-DD> --

### Troubleshooting

#### NULL/NIL values found and replaced with empty-strings

When CSV files are provided containing NULL/NIL values, those values are treated as empty values and are translated into ''. Each Extractor, however, defines a set of `unalterableColumns` which will be immune from this NULL/NIL correction. All values that are corrected will produce a `debug`-level message, and can be seen by running the extractor with the debug flag set.

#### Byte Order Markers in CSV Files

The extraction client has built-in handling of byte order markers for CSV files in UTF-8 and UTF-16LE encodings. When using CSV files in other encodings, if you experience unexpected errors be sure to check for a byte order marker at the beginning of the file. One way to check is to run the following command from the command line:
Expand Down
20 changes: 10 additions & 10 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 4 additions & 3 deletions package.json
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
{
"name": "mcode-extraction-framework",
"version": "1.0.0",
"version": "1.0.1",
"description": "",
"contributors": [
"Julia Afeltra <[email protected]>",
"Julian Carter <[email protected]>",
"Matthew Gramigna <[email protected]>",
"Daniel Lee <[email protected]>",
"Dylan Mahalingam <[email protected]>",
"Dylan Mendelowitz <[email protected]>",
"Dylan Phelan <[email protected]>"
],
"main": "src/",
Expand All @@ -26,9 +27,9 @@
"csv-parse": "^4.8.8",
"fhir-crud-client": "^1.2.2",
"fhirpath": "2.1.5",
"lodash": "^4.17.19",
"lodash": "^4.17.21",
"moment": "^2.26.0",
"nodemailer": "^6.4.14",
"nodemailer": "^6.4.16",
"sha.js": "^2.4.9",
"winston": "^3.2.1"
},
Expand Down
2 changes: 1 addition & 1 deletion src/cli/app.js
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ async function mcodeApp(Client, fromDate, toDate, pathToConfig, pathToRunLogs, d

// Parse CSV for list of patient mrns
const patientIdsCsvPath = path.resolve(config.patientIdCsvPath);
const patientIds = parse(fs.readFileSync(patientIdsCsvPath, 'utf8'), { columns: true }).map((row) => row.mrn);
const patientIds = parse(fs.readFileSync(patientIdsCsvPath, 'utf8'), { columns: true, bom: true }).map((row) => row.mrn);

// Get RunInstanceLogger for recording new runs and inferring dates from previous runs
const runLogger = allEntries ? null : new RunInstanceLogger(pathToRunLogs);
Expand Down
5 changes: 3 additions & 2 deletions src/extractors/BaseCSVExtractor.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,12 @@ const { validateCSV } = require('../helpers/csvValidator');
const logger = require('../helpers/logger');

class BaseCSVExtractor extends Extractor {
constructor({ filePath, csvSchema }) {
constructor({ filePath, csvSchema, unalterableColumns }) {
super();
this.unalterableColumns = unalterableColumns || [];
this.csvSchema = csvSchema;
this.filePath = path.resolve(filePath);
this.csvModule = new CSVModule(this.filePath);
this.csvModule = new CSVModule(this.filePath, this.unalterableColumns);
}

validate() {
Expand Down
4 changes: 3 additions & 1 deletion src/extractors/CSVPatientExtractor.js
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,9 @@ function joinAndReformatData(patientData) {

class CSVPatientExtractor extends BaseCSVExtractor {
constructor({ filePath, mask = [] }) {
super({ filePath, csvSchema: CSVPatientSchema });
// Define CSV Columns whose values should never be altered
const unalterableColumns = ['familyName', 'givenName'];
super({ filePath, csvSchema: CSVPatientSchema, unalterableColumns });
this.mask = mask;
}

Expand Down
44 changes: 41 additions & 3 deletions src/modules/CSVModule.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,54 @@ const moment = require('moment');
const parse = require('csv-parse/lib/sync');
const logger = require('../helpers/logger');

// The standard string normalizer function
function stringNormalizer(str) {
return str.toLowerCase();
}

// For translating null/nil-like values into empty strings
function normalizeEmptyValues(data, unalterableColumns = []) {
const EMPTY_VALUES = ['null', 'nil'].map(stringNormalizer);
const normalizedUnalterableColumns = unalterableColumns.map(stringNormalizer);
// Flag tracking if empty values were normalized or not.
let wasEmptyNormalized = false;
const newData = data.map((row, i) => {
const newRow = { ...row };
// Filter out unalterable columns
const columnsToNormalize = Object.keys(row).filter((col) => !normalizedUnalterableColumns.includes(stringNormalizer(col)));
columnsToNormalize.forEach((col) => {
const value = newRow[col];
// If the value for this row-col combo is a value that should be empty, replace it
if (EMPTY_VALUES.includes(stringNormalizer(value))) {
logger.debug(`NULL/NIL values '${value}' found in row-${i}, col-${col}`);
wasEmptyNormalized = true;
newRow[col] = '';
}
});
return newRow;
});

if (wasEmptyNormalized) {
logger.warn('NULL/NIL values found and replaced with empty-strings');
}
return newData;
}

class CSVModule {
constructor(csvFilePath) {
this.data = parse(fs.readFileSync(csvFilePath), { columns: (header) => header.map((column) => column.toLowerCase()), bom: true });
constructor(csvFilePath, unalterableColumns) {
// Parse then normalize the data
const parsedData = parse(fs.readFileSync(csvFilePath), {
columns: (header) => header.map((column) => stringNormalizer(column)),
bom: true,
});
this.data = normalizeEmptyValues(parsedData, unalterableColumns);
}

async get(key, value, fromDate, toDate) {
logger.debug(`Get csvModule info by key '${key}'`);
// return all rows if key and value aren't provided
if (!key && !value) return this.data;
let result = this.data.filter((d) => d[key.toLowerCase()] === value);
let result = this.data.filter((d) => d[stringNormalizer(key)] === value);
if (result.length === 0) {
logger.warn(`CSV Record with provided key '${key}' and value was not found`);
return result;
Expand Down
107 changes: 75 additions & 32 deletions test/modules/CSVModule.test.js
Original file line number Diff line number Diff line change
@@ -1,48 +1,91 @@
const path = require('path');
const rewire = require('rewire');
const { CSVModule } = require('../../src/modules');
const exampleResponse = require('./fixtures/csv-response.json');

const CSVModuleRewired = rewire('../../src/modules/CSVModule.js');
const normalizeEmptyValues = CSVModuleRewired.__get__('normalizeEmptyValues');

const INVALID_MRN = 'INVALID MRN';
const csvModule = new CSVModule(path.join(__dirname, './fixtures/example-csv.csv'));
const csvModuleWithBOMs = new CSVModule(path.join(__dirname, './fixtures/example-csv-bom.csv'));

test('Reads data from CSV', async () => {
const data = await csvModule.get('mrn', 'example-mrn-1');
expect(data).toEqual(exampleResponse);
});

test('Reads data from CSV with a Byte Order Mark', async () => {
const data = await csvModuleWithBOMs.get('mrn', 'example-mrn-1');
expect(data).toEqual(exampleResponse);
});
describe('CSVModule', () => {
describe('get', () => {
test('Reads data from CSV', async () => {
const data = await csvModule.get('mrn', 'example-mrn-1');
expect(data).toEqual(exampleResponse);
});

test('Returns multiple rows', async () => {
const data = await csvModule.get('mrn', 'example-mrn-2');
expect(data).toHaveLength(2);
});
test('Reads data from CSV with a Byte Order Mark', async () => {
const data = await csvModuleWithBOMs.get('mrn', 'example-mrn-1');
expect(data).toEqual(exampleResponse);
});

test('Returns all rows when both key and value are undefined', async () => {
const data = await csvModule.get();
expect(data).toHaveLength(csvModule.data.length);
expect(data).toEqual(csvModule.data);
});
test('Returns multiple rows', async () => {
const data = await csvModule.get('mrn', 'example-mrn-2');
expect(data).toHaveLength(2);
});

test('Returns data with recordedDate after specified from date', async () => {
const data = await csvModule.get('mrn', 'example-mrn-2', '2020-05-01');
expect(data).toHaveLength(1);
});
test('Returns all rows when both key and value are undefined', async () => {
const data = await csvModule.get();
expect(data).toHaveLength(csvModule.data.length);
expect(data).toEqual(csvModule.data);
});

test('Returns data with recordedDate before specified to date', async () => {
const data = await csvModule.get('mrn', 'example-mrn-2', null, '2020-05-01');
expect(data).toHaveLength(1);
});
test('Returns data with recordedDate after specified from date', async () => {
const data = await csvModule.get('mrn', 'example-mrn-2', '2020-05-01');
expect(data).toHaveLength(1);
});

test('Should return an empty array when key-value pair does not exist', async () => {
const data = await csvModule.get('mrn', INVALID_MRN);
expect(data).toEqual([]);
});
test('Returns data with recordedDate before specified to date', async () => {
const data = await csvModule.get('mrn', 'example-mrn-2', null, '2020-05-01');
expect(data).toHaveLength(1);
});

test('Should return an empty array when key-value pair does not exist', async () => {
const data = await csvModule.get('mrn', INVALID_MRN);
expect(data).toEqual([]);
});

test('Should return proper value regardless of key casing', async () => {
const data = await csvModule.get('mRN', 'example-mrn-1');
expect(data).toEqual(exampleResponse);
});
});

describe('normalizeEmptyValues', () => {
it('Should turn "null" values into empty strings, regardless of case', () => {
const data = [{ key: 'null' }, { key: 'NULL' }, { key: 'nuLL' }];
const normalizedData = normalizeEmptyValues(data);
normalizedData.forEach((d) => {
expect(d.key).toBe('');
});
});

it('Should turn "nil" values into empty strings, regardless of case', () => {
const data = [{ key: 'nil' }, { key: 'NIL' }, { key: 'NIl' }];
const normalizedData = normalizeEmptyValues(data);
normalizedData.forEach((d) => {
expect(d.key).toBe('');
});
});

it('Should not modify unalterableColumns, regardless of their value', () => {
const data = [{ key: 'null' }, { key: 'NULL' }, { key: 'nuLL' }, { key: 'nil' }, { key: 'NIL' }, { key: 'NIl' }];
const normalizedData = normalizeEmptyValues(data, ['key']);
normalizedData.forEach((d) => {
expect(d.key).not.toBe('');
});
});

test('Should return proper value regardless of key casing', async () => {
const data = await csvModule.get('mRN', 'example-mrn-1');
expect(data).toEqual(exampleResponse);
it('Should leave all other values uneffected, regardless of case', () => {
const data = [{ key: 'anything' }, { key: 'any' }, { key: 'thing' }];
const normalizedData = normalizeEmptyValues(data);
normalizedData.forEach((d) => {
expect(d.key).not.toBe('');
});
});
});
});

0 comments on commit 0aedf4b

Please sign in to comment.