Skip to content

Commit

Permalink
Merge pull request #647 from jvalue/641-leading-whitespace
Browse files Browse the repository at this point in the history
[BUG] Enable `TableInterpreter` to trim leading and trailing whitespace
  • Loading branch information
TungstnBallon authored Feb 5, 2025
2 parents 1f4dc40 + ffc7d73 commit 233bafe
Show file tree
Hide file tree
Showing 7 changed files with 166 additions and 19 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,27 @@ import {
ValueTypeVisitor,
} from '@jvalue/jayvee-language-server';

export interface ParseOpts {
skipLeadingWhitespace: boolean;
skipTrailingWhitespace: boolean;
}

const DEFAULT_PARSE_OPTS: ParseOpts = {
skipLeadingWhitespace: true,
skipTrailingWhitespace: true,
};

export function parseValueToInternalRepresentation<
I extends InternalValueRepresentation,
>(value: string, valueType: ValueType<I>): I | undefined {
const visitor = new InternalRepresentationParserVisitor(value);
>(
value: string,
valueType: ValueType<I>,
parseOpts?: Partial<ParseOpts>,
): I | undefined {
const visitor = new InternalRepresentationParserVisitor(value, {
...DEFAULT_PARSE_OPTS,
...parseOpts,
});
const result = valueType.acceptVisitor(visitor);
if (!valueType.isInternalValueRepresentation(result)) {
return undefined;
Expand All @@ -30,20 +47,33 @@ export function parseValueToInternalRepresentation<
class InternalRepresentationParserVisitor extends ValueTypeVisitor<
InternalValueRepresentation | undefined
> {
constructor(private value: string) {
constructor(private value: string, private parseOpts: ParseOpts) {
super();
}

private applyTrimOptions(value: string): string {
// BUG: https://github.com/jvalue/jayvee/issues/646
if (typeof this.value === 'string') {
if (this.parseOpts.skipLeadingWhitespace) {
value = value.trimStart();
}
if (this.parseOpts.skipTrailingWhitespace) {
value = value.trimEnd();
}
}
return value;
}

visitBoolean(vt: BooleanValuetype): boolean | undefined {
return vt.fromString(this.value);
return vt.fromString(this.applyTrimOptions(this.value));
}

visitDecimal(vt: DecimalValuetype): number | undefined {
return vt.fromString(this.value);
return vt.fromString(this.applyTrimOptions(this.value));
}

visitInteger(vt: IntegerValuetype): number | undefined {
return vt.fromString(this.value);
return vt.fromString(this.applyTrimOptions(this.value));
}

visitText(vt: TextValuetype): string {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -299,5 +299,57 @@ describe('Validation of TableInterpreterExecutor', () => {
expect(result.right.getNumberOfRows()).toEqual(0);
}
});

it('should skip leading and trailing whitespace on numeric columns but not text columns', async () => {
const text = readJvTestAsset('valid-without-header.jv');

const testWorkbook = await readTestWorkbook('test-with-whitespace.xlsx');
const result = await parseAndExecuteExecutor(
text,
testWorkbook.getSheetByName('Sheet1') as R.Sheet,
);

expect(R.isErr(result)).toEqual(false);
assert(R.isOk(result));

expect(result.right.ioType).toEqual(IOType.TABLE);
expect(result.right.getNumberOfColumns()).toEqual(3);
expect(result.right.getNumberOfRows()).toEqual(3);

expect([...result.right.getColumns().keys()]).toStrictEqual([
'index',
'name',
'flag',
]);

const row = result.right.getRow(0);
const index = row.get('index');
expect(index).toBe(0);
const name = row.get('name');
expect(name).toBe(' text with leading whitespace');

for (let rowIdx = 1; rowIdx < result.right.getNumberOfRows(); rowIdx++) {
const row = result.right.getRow(rowIdx);
const index = row.get('index');
expect(index).toBe(rowIdx);
}
});

it('should not skip leading or trailing whitespace if the relevant block properties are false', async () => {
const text = readJvTestAsset('valid-without-header-without-trim.jv');

const testWorkbook = await readTestWorkbook('test-with-whitespace.xlsx');
const result = await parseAndExecuteExecutor(
text,
testWorkbook.getSheetByName('Sheet1') as R.Sheet,
);

expect(R.isErr(result)).toEqual(false);
if (R.isOk(result)) {
expect(result.right.ioType).toEqual(IOType.TABLE);
expect(result.right.getNumberOfColumns()).toEqual(3);
expect(result.right.getNumberOfRows()).toEqual(0);
}
});
});
});
31 changes: 29 additions & 2 deletions libs/extensions/tabular/exec/src/lib/table-interpreter-executor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,14 @@ export class TableInterpreterExecutor extends AbstractBlockExecutor<
context.valueTypeProvider.Primitives.ValuetypeAssignment,
),
);
const skipLeadingWhitespace = context.getPropertyValue(
'skipLeadingWhitespace',
context.valueTypeProvider.Primitives.Boolean,
);
const skipTrailingWhitespace = context.getPropertyValue(
'skipTrailingWhitespace',
context.valueTypeProvider.Primitives.Boolean,
);

let columnEntries: ColumnDefinitionEntry[];

Expand Down Expand Up @@ -107,6 +115,8 @@ export class TableInterpreterExecutor extends AbstractBlockExecutor<
inputSheet,
header,
columnEntries,
skipLeadingWhitespace,
skipTrailingWhitespace,
context,
);
context.logger.logDebug(
Expand All @@ -119,6 +129,8 @@ export class TableInterpreterExecutor extends AbstractBlockExecutor<
sheet: Sheet,
header: boolean,
columnEntries: ColumnDefinitionEntry[],
skipLeadingWhitespace: boolean,
skipTrailingWhitespace: boolean,
context: ExecutionContext,
): Table {
const table = new Table();
Expand All @@ -141,6 +153,8 @@ export class TableInterpreterExecutor extends AbstractBlockExecutor<
sheetRow,
sheetRowIndex,
columnEntries,
skipLeadingWhitespace,
skipTrailingWhitespace,
context,
);
if (tableRow === undefined) {
Expand All @@ -158,6 +172,8 @@ export class TableInterpreterExecutor extends AbstractBlockExecutor<
sheetRow: string[],
sheetRowIndex: number,
columnEntries: ColumnDefinitionEntry[],
skipLeadingWhitespace: boolean,
skipTrailingWhitespace: boolean,
context: ExecutionContext,
): R.TableRow | undefined {
let invalidRow = false;
Expand All @@ -168,7 +184,13 @@ export class TableInterpreterExecutor extends AbstractBlockExecutor<
const value = sheetRow[sheetColumnIndex]!;
const valueType = columnEntry.valueType;

const parsedValue = this.parseAndValidateValue(value, valueType, context);
const parsedValue = this.parseAndValidateValue(
value,
valueType,
skipLeadingWhitespace,
skipTrailingWhitespace,
context,
);
if (parsedValue === undefined) {
const currentCellIndex = new CellIndex(sheetColumnIndex, sheetRowIndex);
context.logger.logDebug(
Expand All @@ -192,9 +214,14 @@ export class TableInterpreterExecutor extends AbstractBlockExecutor<
private parseAndValidateValue(
value: string,
valueType: ValueType,
skipLeadingWhitespace: boolean,
skipTrailingWhitespace: boolean,
context: ExecutionContext,
): InternalValueRepresentation | undefined {
const parsedValue = parseValueToInternalRepresentation(value, valueType);
const parsedValue = parseValueToInternalRepresentation(value, valueType, {
skipLeadingWhitespace,
skipTrailingWhitespace,
});
if (parsedValue === undefined) {
return undefined;
}
Expand Down
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
SPDX-FileCopyrightText: 2025 Friedrich-Alexander-Universitat Erlangen-Nurnberg

SPDX-License-Identifier: AGPL-3.0-only
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
// SPDX-FileCopyrightText: 2025 Friedrich-Alexander-Universitat Erlangen-Nurnberg
//
// SPDX-License-Identifier: AGPL-3.0-only

pipeline TestPipeline {

block TestExtractor oftype TestSheetExtractor { }

block TestBlock oftype TableInterpreter {
header: false;
columns: [
"index" oftype integer,
"name" oftype text,
"flag" oftype boolean
];
skipLeadingWhitespace: false;
skipTrailingWhitespace: false;
}

block TestLoader oftype TestTableLoader { }

TestExtractor
-> TestBlock
-> TestLoader;
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

/**
* Interprets a `Sheet` as a `Table`. In case a header row is present in the sheet, its names can be matched with the provided column names. Otherwise, the provided column names are assigned in order.
*
*
* @example Interprets a `Sheet` about cars with a topmost header row and interprets it as a `Table` by assigning a primitive value type to each column. The column names are matched to the header, so the order of the type assignments does not matter.
* block CarsTableInterpreter oftype TableInterpreter {
* header: true;
Expand All @@ -14,7 +14,7 @@
* "cyl" oftype integer,
* ];
* }
*
*
* @example Interprets a `Sheet` about cars without a topmost header row and interprets it as a `Table` by sequentially assigning a name and a primitive value type to each column of the sheet. Note that the order of columns matters here. The first column (column `A`) will be named "name", the second column (column `B`) will be named "mpg" etc.
* block CarsTableInterpreter oftype TableInterpreter {
* header: false;
Expand All @@ -28,14 +28,24 @@
publish builtin blocktype TableInterpreter {
input default oftype Sheet;
output default oftype Table;

/**
* Whether the first row should be interpreted as header row.

/**
* Whether the first row should be interpreted as header row.
*/
property header oftype boolean: true;

/**
* Collection of value type assignments. Uses column names (potentially matched with the header or by sequence depending on the `header` property) to assign a primitive value type to each column.
*/
property header oftype boolean: true;

/**
* Collection of value type assignments. Uses column names (potentially matched with the header or by sequence depending on the `header` property) to assign a primitive value type to each column.
property columns oftype Collection<ValuetypeAssignment>;

/**
* Whether to ignore whitespace before values. Does not apply to `text` cells
*/
property skipLeadingWhitespace oftype boolean: true;

/**
* Whether to ignore whitespace after values. Does not apply to `text` cells
*/
property columns oftype Collection<ValuetypeAssignment>;
}
property skipTrailingWhitespace oftype boolean: true;
}

0 comments on commit 233bafe

Please sign in to comment.