diff --git a/documentation/docs/parsing/examples.mdx b/documentation/docs/parsing/examples.mdx index 68025e92..3ad968c2 100644 --- a/documentation/docs/parsing/examples.mdx +++ b/documentation/docs/parsing/examples.mdx @@ -64,7 +64,7 @@ Parsed 1 rows ## Alternate Delimiter -You can provide a `delimiter` option to change the delimiter from a `,` character. +You can provide a `delimiter` option to change the delimiter from a `,` character. If you specify an array, it will treat all of the characters as delimiters. { it('should escape a custom delimiter', () => { expect(createOptions({ delimiter: '\\' }).delimiter).toBe('\\'); - expect(createOptions({ delimiter: '\\' }).escapedDelimiter).toBe('\\\\'); + expect(createOptions({ delimiter: '\\' }).escapedDelimiter).toEqual(['\\\\']); }); }); diff --git a/packages/parse/__tests__/parser/Parser.spec.ts b/packages/parse/__tests__/parser/Parser.spec.ts index e3d5be8d..17df09c7 100644 --- a/packages/parse/__tests__/parser/Parser.spec.ts +++ b/packages/parse/__tests__/parser/Parser.spec.ts @@ -19,6 +19,15 @@ describe('Parser', () => { }); }); + it('should should support multiple potential field delimeters', () => { + const data = 'first_name,last_name|email_address\tphone'; + const myParser = createParser({ delimiter: [',', '|', '\t'] }); + expect(parse(data, false, myParser)).toEqual({ + line: '', + rows: [['first_name', 'last_name', 'email_address', 'phone']], + }); + }); + describe('with \\n', () => { describe('unescaped data', () => { it('should parse a block of CSV text', () => { diff --git a/packages/parse/src/ParserOptions.ts b/packages/parse/src/ParserOptions.ts index b50e1649..dcdb39ff 100644 --- a/packages/parse/src/ParserOptions.ts +++ b/packages/parse/src/ParserOptions.ts @@ -4,7 +4,7 @@ import { HeaderArray, HeaderTransformFunction } from './types'; export interface ParserOptionsArgs { objectMode?: boolean; - delimiter?: string; + delimiter?: string | string[]; quote?: string | null; escape?: string; headers?: boolean | HeaderTransformFunction | HeaderArray; @@ -23,7 +23,7 @@ export interface ParserOptionsArgs { } export class ParserOptions { - public readonly escapedDelimiter: string; + public readonly escapedDelimiter: string[]; public readonly objectMode: boolean = true; @@ -71,13 +71,20 @@ export class ParserOptions { public constructor(opts?: ParserOptionsArgs) { Object.assign(this, opts || {}); - if (this.delimiter.length > 1) { + const delimiters = Array.isArray(this.delimiter) ? this.delimiter : [this.delimiter]; + if ( + delimiters.some((d: string) => { + return d.length > 1; + }) + ) { throw new Error('delimiter option must be one character long'); } - this.escapedDelimiter = escapeRegExp(this.delimiter); + this.escapedDelimiter = delimiters.map((d) => { + return escapeRegExp(d); + }); this.escapeChar = this.escape ?? this.quote; this.supportsComments = !isNil(this.comment); - this.NEXT_TOKEN_REGEXP = new RegExp(`([^\\s]|\\r\\n|\\n|\\r|${this.escapedDelimiter})`); + this.NEXT_TOKEN_REGEXP = new RegExp(`([^\\s]|\\r\\n|\\n|\\r|${this.escapedDelimiter.join('|')})`); if (this.maxRows > 0) { this.limitRows = true; diff --git a/packages/parse/src/parser/Token.ts b/packages/parse/src/parser/Token.ts index 3ddfdb27..c5a3ed0a 100644 --- a/packages/parse/src/parser/Token.ts +++ b/packages/parse/src/parser/Token.ts @@ -31,7 +31,8 @@ export class Token { } public static isTokenDelimiter(token: Token, parserOptions: ParserOptions): boolean { - return token.token === parserOptions.delimiter; + const delimiter = Array.isArray(parserOptions.delimiter) ? parserOptions.delimiter : [parserOptions.delimiter]; + return delimiter.includes(token.token); } public readonly token: string; diff --git a/packages/parse/src/parser/column/QuotedColumnParser.ts b/packages/parse/src/parser/column/QuotedColumnParser.ts index 23c3a6e6..6c6158cb 100644 --- a/packages/parse/src/parser/column/QuotedColumnParser.ts +++ b/packages/parse/src/parser/column/QuotedColumnParser.ts @@ -98,7 +98,7 @@ export class QuotedColumnParser { // tldr: only part of the column was quoted const linePreview = scanner.lineFromCursor.substr(0, 10).replace(/[\r\n]/g, "\\n'"); throw new Error( - `Parse Error: expected: '${parserOptions.escapedDelimiter}' OR new line got: '${nextNonSpaceToken.token}'. at '${linePreview}'`, + `Parse Error: expected: '${parserOptions.escapedDelimiter.join(',')}' OR new line got: '${nextNonSpaceToken.token}'. at '${linePreview}'`, ); } scanner.advanceToToken(nextNonSpaceToken);