Skip to content

Commit

Permalink
Add support for multiple delimiters
Browse files Browse the repository at this point in the history
  • Loading branch information
Will Munn authored and willm committed Dec 5, 2024
1 parent 33c90bf commit cd680f6
Show file tree
Hide file tree
Showing 7 changed files with 30 additions and 11 deletions.
2 changes: 1 addition & 1 deletion documentation/docs/parsing/examples.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ Parsed 1 rows

## Alternate Delimiter

You can provide a `delimiter` option to change the delimiter from a `,` character.
You can provide a `delimiter` option to change the delimiter from a `,` character. If you specify an array, it will treat all of the characters as delimiters.

<Tabs
defaultValue="ts"
Expand Down
6 changes: 4 additions & 2 deletions documentation/docs/parsing/options.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,14 @@ If set to `false` all data will be a JSON version of the row.
## delimiter
**Type**: `string` **Default**: `','`

The delimiter that will separate columns.
**Type**: `string|string[]` **Default**: `','`

The delimiter(s) that will separate columns.

Set this option if your file uses an alternate delimiter such as `;` or `\t`. [Example](./examples.mdx#alternate-delimiter)

:::note
When specifying an alternate delimiter you may only pass in a single character!
When specifying an alternate delimiter you may only pass in a single character or list of single characters!
:::


Expand Down
2 changes: 1 addition & 1 deletion packages/parse/__tests__/ParserOptions.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ describe('ParserOptions', () => {

it('should escape a custom delimiter', () => {
expect(createOptions({ delimiter: '\\' }).delimiter).toBe('\\');
expect(createOptions({ delimiter: '\\' }).escapedDelimiter).toBe('\\\\');
expect(createOptions({ delimiter: '\\' }).escapedDelimiter).toEqual(['\\\\']);
});
});

Expand Down
9 changes: 9 additions & 0 deletions packages/parse/__tests__/parser/Parser.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,15 @@ describe('Parser', () => {
});
});

it('should should support multiple potential field delimeters', () => {
const data = 'first_name,last_name|email_address\tphone';
const myParser = createParser({ delimiter: [',', '|', '\t'] });
expect(parse(data, false, myParser)).toEqual({
line: '',
rows: [['first_name', 'last_name', 'email_address', 'phone']],
});
});

describe('with \\n', () => {
describe('unescaped data', () => {
it('should parse a block of CSV text', () => {
Expand Down
17 changes: 12 additions & 5 deletions packages/parse/src/ParserOptions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import { HeaderArray, HeaderTransformFunction } from './types';

export interface ParserOptionsArgs {
objectMode?: boolean;
delimiter?: string;
delimiter?: string | string[];
quote?: string | null;
escape?: string;
headers?: boolean | HeaderTransformFunction | HeaderArray;
Expand All @@ -23,7 +23,7 @@ export interface ParserOptionsArgs {
}

export class ParserOptions {
public readonly escapedDelimiter: string;
public readonly escapedDelimiter: string[];

public readonly objectMode: boolean = true;

Expand Down Expand Up @@ -71,13 +71,20 @@ export class ParserOptions {

public constructor(opts?: ParserOptionsArgs) {
Object.assign(this, opts || {});
if (this.delimiter.length > 1) {
const delimiters = Array.isArray(this.delimiter) ? this.delimiter : [this.delimiter];
if (
delimiters.some((d: string) => {
return d.length > 1;
})
) {
throw new Error('delimiter option must be one character long');
}
this.escapedDelimiter = escapeRegExp(this.delimiter);
this.escapedDelimiter = delimiters.map((d) => {
return escapeRegExp(d);
});
this.escapeChar = this.escape ?? this.quote;
this.supportsComments = !isNil(this.comment);
this.NEXT_TOKEN_REGEXP = new RegExp(`([^\\s]|\\r\\n|\\n|\\r|${this.escapedDelimiter})`);
this.NEXT_TOKEN_REGEXP = new RegExp(`([^\\s]|\\r\\n|\\n|\\r|${this.escapedDelimiter.join('|')})`);

if (this.maxRows > 0) {
this.limitRows = true;
Expand Down
3 changes: 2 additions & 1 deletion packages/parse/src/parser/Token.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@ export class Token {
}

public static isTokenDelimiter(token: Token, parserOptions: ParserOptions): boolean {
return token.token === parserOptions.delimiter;
const delimiter = Array.isArray(parserOptions.delimiter) ? parserOptions.delimiter : [parserOptions.delimiter];
return delimiter.includes(token.token);
}

public readonly token: string;
Expand Down
2 changes: 1 addition & 1 deletion packages/parse/src/parser/column/QuotedColumnParser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ export class QuotedColumnParser {
// tldr: only part of the column was quoted
const linePreview = scanner.lineFromCursor.substr(0, 10).replace(/[\r\n]/g, "\\n'");
throw new Error(
`Parse Error: expected: '${parserOptions.escapedDelimiter}' OR new line got: '${nextNonSpaceToken.token}'. at '${linePreview}'`,
`Parse Error: expected: '${parserOptions.escapedDelimiter.join(',')}' OR new line got: '${nextNonSpaceToken.token}'. at '${linePreview}'`,
);
}
scanner.advanceToToken(nextNonSpaceToken);
Expand Down

0 comments on commit cd680f6

Please sign in to comment.