From cbd767b5c226057f02b29bb162120ad097651cab Mon Sep 17 00:00:00 2001 From: Naraen Date: Wed, 5 Apr 2023 21:33:03 -0700 Subject: [PATCH 1/2] Concatentate partial token from subsequent chunk before passing to lexer --- lib/stream.js | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/lib/stream.js b/lib/stream.js index ced9232c..0ec4ad1f 100644 --- a/lib/stream.js +++ b/lib/stream.js @@ -10,8 +10,22 @@ function StreamWrapper(parser) { util.inherits(StreamWrapper, Writable); + +var tailPrevChunk=''; StreamWrapper.prototype._write = function write(chunk, encoding, callback) { - this._parser.feed(chunk.toString()); + //https://github.com/kach/nearley/issues/632 + //Token might be span multiple chunks. + var chunkString = chunk.toString(); + var idxLastNewline = chunkString.lastIndexOf('\n'); + if (idxLastNewline === -1 ){ + tailPrevChunk += chunkString; + return; + } + + var alteredChunk = tailPrevChunk + chunkString.substring(0,idxLastNewline); + tailPrevChunk= chunkString.substring(idxLastNewline); + + this._parser.feed(alteredChunk); callback(); }; From c6ce94ca855e7768b8d2565b78c27e586058c930 Mon Sep 17 00:00:00 2001 From: Naraen Date: Thu, 6 Apr 2023 00:21:50 -0700 Subject: [PATCH 2/2] Flush buffer when EOF not preceded by line break --- lib/stream.js | 16 +++++++++++----- test/nearleyc.test.js | 6 ++++++ 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/lib/stream.js b/lib/stream.js index 0ec4ad1f..9c8070a8 100644 --- a/lib/stream.js +++ b/lib/stream.js @@ -6,27 +6,33 @@ var util = require('util'); function StreamWrapper(parser) { Writable.call(this); this._parser = parser; + this._buffer=''; } util.inherits(StreamWrapper, Writable); -var tailPrevChunk=''; StreamWrapper.prototype._write = function write(chunk, encoding, callback) { //https://github.com/kach/nearley/issues/632 //Token might be span multiple chunks. var chunkString = chunk.toString(); var idxLastNewline = chunkString.lastIndexOf('\n'); - if (idxLastNewline === -1 ){ - tailPrevChunk += chunkString; + if (idxLastNewline === -1 ) { + this._buffer += chunkString; return; } - var alteredChunk = tailPrevChunk + chunkString.substring(0,idxLastNewline); - tailPrevChunk= chunkString.substring(idxLastNewline); + var alteredChunk = this._buffer + chunkString.substring(0,idxLastNewline); + this._buffer= chunkString.substring(idxLastNewline); this._parser.feed(alteredChunk); callback(); }; +StreamWrapper.prototype._final = function final(callback) { + this._parser.feed(this._buffer); + this._buffer = ''; + callback(); +} + module.exports = StreamWrapper; diff --git a/test/nearleyc.test.js b/test/nearleyc.test.js index 2c304380..886c1df3 100644 --- a/test/nearleyc.test.js +++ b/test/nearleyc.test.js @@ -110,6 +110,12 @@ describe("bin/nearleyc", function() { expect(stderr).toBe(""); }); + it('correctly handles long tokens that span multiple chunks', function() { + const {outPath, stdout, stderr} = externalNearleyc("grammars/long-tokens-split-over-multiple-chunks.ne", '.js'); + expect(stderr).toBe(""); + expect(stdout).toBe(""); + const grammar = nearley.Grammar.fromCompiled(require(`./${outPath}.js`)); + }); })