diff --git a/History.md b/History.md index b3c68ddf..3453ce7a 100644 --- a/History.md +++ b/History.md @@ -1,6 +1,10 @@ +# 0.4.4 + +* Added support for comments. [#56](https://github.com/C2FO/fast-csv/issues/56) + # v0.4.3 -* Added ability to include a `rowDelimiter` at the end of a csv with the `includeEndRowDelimiter` optioin [#54](https://github.com/C2FO/fast-csv/issues/54) +* Added ability to include a `rowDelimiter` at the end of a csv with the `includeEndRowDelimiter` option [#54](https://github.com/C2FO/fast-csv/issues/54) * Added escaping for values that include a row delimiter * Added more tests for new feature and escaping row delimiter values. diff --git a/README.md b/README.md index 19e52b47..6e473928 100644 --- a/README.md +++ b/README.md @@ -28,6 +28,7 @@ All methods accept the following `options` * `trim=false`: If you want to trim all values parsed set to true. * `rtrim=false`: If you want to right trim all values parsed set to true. * `ltrim=false`: If you want to left trim all values parsed set to true. + * `comment=null`: If your CSV contains comments you can use this option to ignore lines that begin with the specified character (e.g. `#`). **events** diff --git a/docs/History.html b/docs/History.html index 229e0017..7ad6b955 100644 --- a/docs/History.html +++ b/docs/History.html @@ -176,9 +176,13 @@ +

0.4.4

+

v0.4.3

diff --git a/docs/index.html b/docs/index.html index e866f551..0e46c998 100644 --- a/docs/index.html +++ b/docs/index.html @@ -204,6 +204,7 @@

Parsing

  • trim=false: If you want to trim all values parsed set to true.
  • rtrim=false: If you want to right trim all values parsed set to true.
  • ltrim=false: If you want to left trim all values parsed set to true.
  • +
  • comment=null: If your CSV contains comments you can use this option to ignore lines that begin with the specified character (e.g. #).
  • diff --git a/lib/parser.js b/lib/parser.js index 735cbcbf..203f4ec2 100644 --- a/lib/parser.js +++ b/lib/parser.js @@ -1,4 +1,5 @@ var extended = require("./extended"), + has = extended.has, isUndefinedOrNull = extended.isUndefinedOrNull, trim = extended.trim, trimLeft = extended.trimLeft, @@ -15,7 +16,12 @@ function createParser(options) { SEARCH_REGEXP = new RegExp("(?:\\n|\\r|" + delimiter + ")"), ESCAPE_CHAR = options.escape || '"', NEXT_TOKEN_REGEXP = new RegExp("([^\\s]|\\r\\n|\\n|\\r|" + delimiter + ")"), - LINE_BREAK = /(\r\n|\n|\r)/; + ROW_DELIMITER = /(\r\n|\n|\r)/, + COMMENT, hasComments; + if (has(options, "comment")) { + COMMENT = options.comment; + hasComments = true; + } function formatItem(item) { if (doTrim) { @@ -71,7 +77,7 @@ function createParser(options) { } } else if ((!depth && nextToken && nextToken.search(SEARCH_REGEXP) === -1)) { throw new Error("Parse Error: expected: '" + ESCAPE + "' got: '" + nextToken + "'. at '" + str.substr(cursor, 10).replace(/[\r\n]/g, "\\n" + "'")); - } else if (hasMoreData && (!nextToken || !LINE_BREAK.test(nextToken))) { + } else if (hasMoreData && (!nextToken || !ROW_DELIMITER.test(nextToken))) { cursor = null; } if (cursor !== null) { @@ -80,6 +86,20 @@ function createParser(options) { return cursor; } + function parseCommentLine(line, cursor, hasMoreData) { + var nextIndex = line.substr(cursor).search(ROW_DELIMITER); + if (nextIndex === -1) { + if (hasMoreData) { + nextIndex = null; + } else { + nextIndex = line.length + 1; + } + } else { + nextIndex = (cursor + nextIndex) + 1; //go past the next line break + } + return nextIndex; + } + function parseItem(line, items, cursor, hasMoreData) { var searchStr = line.substr(cursor), nextIndex = searchStr.search(SEARCH_REGEXP); @@ -98,7 +118,7 @@ function createParser(options) { items.push(formatItem(searchStr.substr(0, nextIndex))); cursor += nextIndex + 1; } - } else if (LINE_BREAK.test(nextChar)) { + } else if (ROW_DELIMITER.test(nextChar)) { items.push(formatItem(searchStr.substr(0, nextIndex))); cursor += nextIndex; } else if (!hasMoreData) { @@ -128,7 +148,7 @@ function createParser(options) { if (isUndefinedOrNull(token)) { i = lastLineI; break; - } else if (LINE_BREAK.test(token)) { + } else if (ROW_DELIMITER.test(token)) { i = nextToken.cursor + 1; if (i < l) { rows.push(items); @@ -137,6 +157,18 @@ function createParser(options) { } else { break; } + } else if (hasComments && token === COMMENT) { + cursor = parseCommentLine(line, i, hasMoreData); + if (cursor === null) { + i = lastLineI; + break; + } else if (cursor < l) { + lastLineI = i = cursor; + } else { + i = cursor; + cursor = null; + break; + } } else { if (token === ESCAPE) { cursor = parseEscapedItem(line, items, nextToken.cursor, hasMoreData); diff --git a/package.json b/package.json index eff26c28..bba9b2ef 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "fast-csv", - "version": "0.4.3", + "version": "0.4.4", "description": "CSV parser and writer", "main": "index.js", "scripts": { diff --git a/test/assets/test24.csv b/test/assets/test24.csv new file mode 100644 index 00000000..12a60c30 --- /dev/null +++ b/test/assets/test24.csv @@ -0,0 +1,23 @@ +#This is a test CSV +#It contains a bunch of comments!!!! +#The fist row contains headers. +first_name,last_name,email_address,address +#Line 1 +First1,Last1,email1@email.com,"1 Street St, State ST, 88888" +#Line 2 +First2,Last2,email2@email.com,"2 Street St, State ST, 88888" +#Line 3 +First3,Last3,email3@email.com,"3 Street St, State ST, 88888" +#Line 4 +First4,Last4,email4@email.com,"4 Street St, State ST, 88888" +#Line 5 +First5,Last5,email5@email.com,"5 Street St, State ST, 88888" +#Line 6 +First6,Last6,email6@email.com,"6 Street St, State ST, 88888" +#Line 7 +First7,Last7,email7@email.com,"7 Street St, State ST, 88888" +#Line 8 +First8,Last8,email8@email.com,"8 Street St, State ST, 88888" +#Line 9 +First9,Last9,email9@email.com,"9 Street St, State ST, 88888" +#End of CSV \ No newline at end of file diff --git a/test/fast-csv.test.js b/test/fast-csv.test.js index 8436e6f4..34f97c24 100644 --- a/test/fast-csv.test.js +++ b/test/fast-csv.test.js @@ -825,6 +825,20 @@ it.describe("fast-csv", function (it) { }); }); + it.should("handle CSVs with comments", function (next) { + var actual = []; + csv + .fromPath(path.resolve(__dirname, "./assets/test24.csv"), {headers: true, comment: "#"}) + .on("record", function (data, index) { + actual[index] = data; + }). + on("end", function (count) { + assert.deepEqual(actual, expected1); + assert.equal(count, actual.length); + next(); + }); + }); + it.describe("pause/resume", function () { it.should("support pausing a stream", function (next) { diff --git a/test/parser.test.js b/test/parser.test.js index f97fc409..802af41a 100644 --- a/test/parser.test.js +++ b/test/parser.test.js @@ -11,10 +11,12 @@ it.describe("fast-csv parser", function (it) { it.should("parse a block of CSV text", function () { var data = "first_name,last_name,email_address\nFirst1,Last1,email1@email.com"; var myParser = parser({delimiter: ","}); - assert.deepEqual(myParser(data, false), {"line": "", "rows": [ - ["first_name", "last_name", "email_address"], - ["First1", "Last1", "email1@email.com"] - ]}); + assert.deepEqual(myParser(data, false), { + "line": "", "rows": [ + ["first_name", "last_name", "email_address"], + ["First1", "Last1", "email1@email.com"] + ] + }); }); it.should("return the rest of the line if there is more data", function () { @@ -38,10 +40,12 @@ it.describe("fast-csv parser", function (it) { ["first_name", "last_name", "email_address"] ] }); - assert.deepEqual(myParser(parsedData.line + "\nFirst2,Last2,email2@email.com", false), {"line": "", "rows": [ - ["First1", "Last1", "email1@email.com"], - ["First2", "Last2", "email2@email.com"] - ]}); + assert.deepEqual(myParser(parsedData.line + "\nFirst2,Last2,email2@email.com", false), { + "line": "", "rows": [ + ["First1", "Last1", "email1@email.com"], + ["First2", "Last2", "email2@email.com"] + ] + }); }); it.should("not parse a row if a new line is not found and there is more data", function () { @@ -83,28 +87,34 @@ it.describe("fast-csv parser", function (it) { it.should("parse a block of CSV text", function () { var data = 'first_name,last_name,email_address\n"First,1","Last,1","email1@email.com"'; var myParser = parser({delimiter: ","}); - assert.deepEqual(myParser(data, false), {"line": "", "rows": [ - ["first_name", "last_name", "email_address"], - ["First,1", "Last,1", "email1@email.com"] - ]}); + assert.deepEqual(myParser(data, false), { + "line": "", "rows": [ + ["first_name", "last_name", "email_address"], + ["First,1", "Last,1", "email1@email.com"] + ] + }); }); it.should("parse a block of CSV text with escaped escaped char", function () { var data = 'first_name,last_name,email_address\n"First,""1""","Last,""1""","email1@email.com"'; var myParser = parser({delimiter: ","}); - assert.deepEqual(myParser(data, false), {"line": "", "rows": [ - ["first_name", "last_name", "email_address"], - ["First,\"1\"", "Last,\"1\"", "email1@email.com"] - ]}); + assert.deepEqual(myParser(data, false), { + "line": "", "rows": [ + ["first_name", "last_name", "email_address"], + ["First,\"1\"", "Last,\"1\"", "email1@email.com"] + ] + }); }); it.should("parse a block of CSV text with alternate escape char", function () { var data = 'first_name,last_name,email_address\n"First,\\"1\\"","Last,\\"1\\"","email1@email.com"'; var myParser = parser({delimiter: ",", escape: "\\"}); - assert.deepEqual(myParser(data, false), {"line": "", "rows": [ - ["first_name", "last_name", "email_address"], - ["First,\"1\"", "Last,\"1\"", "email1@email.com"] - ]}); + assert.deepEqual(myParser(data, false), { + "line": "", "rows": [ + ["first_name", "last_name", "email_address"], + ["First,\"1\"", "Last,\"1\"", "email1@email.com"] + ] + }); }); it.should("return the rest of the line if a complete value is not found", function () { @@ -162,10 +172,12 @@ it.describe("fast-csv parser", function (it) { var data = '"","",""\n,Last4,email4@email.com'; var myParser = parser({delimiter: ","}), parsedData = myParser(data, false); - assert.deepEqual(parsedData, {"line": "", "rows": [ - ["", "", ""], - ["", "Last4", "email4@email.com"] - ]}); + assert.deepEqual(parsedData, { + "line": "", "rows": [ + ["", "", ""], + ["", "Last4", "email4@email.com"] + ] + }); }); it.should("not parse a row if a new line is not found and there is more data", function () { @@ -209,10 +221,12 @@ it.describe("fast-csv parser", function (it) { it.should("parse a block of CSV text", function () { var data = "first_name,last_name,email_address\rFirst1,Last1,email1@email.com"; var myParser = parser({delimiter: ","}); - assert.deepEqual(myParser(data, false), {"line": "", "rows": [ - ["first_name", "last_name", "email_address"], - ["First1", "Last1", "email1@email.com"] - ]}); + assert.deepEqual(myParser(data, false), { + "line": "", "rows": [ + ["first_name", "last_name", "email_address"], + ["First1", "Last1", "email1@email.com"] + ] + }); }); it.should("return the rest of the line if there is more data", function () { @@ -236,10 +250,12 @@ it.describe("fast-csv parser", function (it) { ["first_name", "last_name", "email_address"] ] }); - assert.deepEqual(myParser(parsedData.line + "\rFirst2,Last2,email2@email.com", false), {"line": "", "rows": [ - ["First1", "Last1", "email1@email.com"], - ["First2", "Last2", "email2@email.com"] - ]}); + assert.deepEqual(myParser(parsedData.line + "\rFirst2,Last2,email2@email.com", false), { + "line": "", "rows": [ + ["First1", "Last1", "email1@email.com"], + ["First2", "Last2", "email2@email.com"] + ] + }); }); it.should("not parse a row if a new line is not found and there is more data", function () { @@ -281,28 +297,34 @@ it.describe("fast-csv parser", function (it) { it.should("parse a block of CSV text", function () { var data = 'first_name,last_name,email_address\r"First,1","Last,1","email1@email.com"'; var myParser = parser({delimiter: ","}); - assert.deepEqual(myParser(data, false), {"line": "", "rows": [ - ["first_name", "last_name", "email_address"], - ["First,1", "Last,1", "email1@email.com"] - ]}); + assert.deepEqual(myParser(data, false), { + "line": "", "rows": [ + ["first_name", "last_name", "email_address"], + ["First,1", "Last,1", "email1@email.com"] + ] + }); }); it.should("parse a block of CSV text with escaped escaped char", function () { var data = 'first_name,last_name,email_address\r"First,""1""","Last,""1""","email1@email.com"'; var myParser = parser({delimiter: ","}); - assert.deepEqual(myParser(data, false), {"line": "", "rows": [ - ["first_name", "last_name", "email_address"], - ["First,\"1\"", "Last,\"1\"", "email1@email.com"] - ]}); + assert.deepEqual(myParser(data, false), { + "line": "", "rows": [ + ["first_name", "last_name", "email_address"], + ["First,\"1\"", "Last,\"1\"", "email1@email.com"] + ] + }); }); it.should("parse a block of CSV text with alternate escape char", function () { var data = 'first_name,last_name,email_address\r"First,\\"1\\"","Last,\\"1\\"","email1@email.com"'; var myParser = parser({delimiter: ",", escape: "\\"}); - assert.deepEqual(myParser(data, false), {"line": "", "rows": [ - ["first_name", "last_name", "email_address"], - ["First,\"1\"", "Last,\"1\"", "email1@email.com"] - ]}); + assert.deepEqual(myParser(data, false), { + "line": "", "rows": [ + ["first_name", "last_name", "email_address"], + ["First,\"1\"", "Last,\"1\"", "email1@email.com"] + ] + }); }); it.should("return the rest of the line if a complete value is not found", function () { @@ -360,10 +382,12 @@ it.describe("fast-csv parser", function (it) { var data = '"","",""\r,Last4,email4@email.com'; var myParser = parser({delimiter: ","}), parsedData = myParser(data, false); - assert.deepEqual(parsedData, {"line": "", "rows": [ - ["", "", ""], - ["", "Last4", "email4@email.com"] - ]}); + assert.deepEqual(parsedData, { + "line": "", "rows": [ + ["", "", ""], + ["", "Last4", "email4@email.com"] + ] + }); }); it.should("not parse a row if a new line is not found and there is more data", function () { @@ -408,10 +432,12 @@ it.describe("fast-csv parser", function (it) { it.should("parse a block of CSV text", function () { var data = "first_name,last_name,email_address\r\nFirst1,Last1,email1@email.com"; var myParser = parser({delimiter: ","}); - assert.deepEqual(myParser(data, false), {"line": "", "rows": [ - ["first_name", "last_name", "email_address"], - ["First1", "Last1", "email1@email.com"] - ]}); + assert.deepEqual(myParser(data, false), { + "line": "", "rows": [ + ["first_name", "last_name", "email_address"], + ["First1", "Last1", "email1@email.com"] + ] + }); }); it.should("return the rest of the line if there is more data", function () { @@ -435,10 +461,12 @@ it.describe("fast-csv parser", function (it) { ["first_name", "last_name", "email_address"] ] }); - assert.deepEqual(myParser(parsedData.line + "\r\nFirst2,Last2,email2@email.com", false), {"line": "", "rows": [ - ["First1", "Last1", "email1@email.com"], - ["First2", "Last2", "email2@email.com"] - ]}); + assert.deepEqual(myParser(parsedData.line + "\r\nFirst2,Last2,email2@email.com", false), { + "line": "", "rows": [ + ["First1", "Last1", "email1@email.com"], + ["First2", "Last2", "email2@email.com"] + ] + }); }); it.should("not parse a row if a new line is not found and there is more data", function () { @@ -480,28 +508,34 @@ it.describe("fast-csv parser", function (it) { it.should("parse a block of CSV text", function () { var data = 'first_name,last_name,email_address\r\n"First,1","Last,1","email1@email.com"'; var myParser = parser({delimiter: ","}); - assert.deepEqual(myParser(data, false), {"line": "", "rows": [ - ["first_name", "last_name", "email_address"], - ["First,1", "Last,1", "email1@email.com"] - ]}); + assert.deepEqual(myParser(data, false), { + "line": "", "rows": [ + ["first_name", "last_name", "email_address"], + ["First,1", "Last,1", "email1@email.com"] + ] + }); }); it.should("parse a block of CSV text with escaped escaped char", function () { var data = 'first_name,last_name,email_address\r\n"First,""1""","Last,""1""","email1@email.com"'; var myParser = parser({delimiter: ","}); - assert.deepEqual(myParser(data, false), {"line": "", "rows": [ - ["first_name", "last_name", "email_address"], - ["First,\"1\"", "Last,\"1\"", "email1@email.com"] - ]}); + assert.deepEqual(myParser(data, false), { + "line": "", "rows": [ + ["first_name", "last_name", "email_address"], + ["First,\"1\"", "Last,\"1\"", "email1@email.com"] + ] + }); }); it.should("parse a block of CSV text with alternate escape char", function () { var data = 'first_name,last_name,email_address\r\n"First,\\"1\\"","Last,\\"1\\"","email1@email.com"'; var myParser = parser({delimiter: ",", escape: "\\"}); - assert.deepEqual(myParser(data, false), {"line": "", "rows": [ - ["first_name", "last_name", "email_address"], - ["First,\"1\"", "Last,\"1\"", "email1@email.com"] - ]}); + assert.deepEqual(myParser(data, false), { + "line": "", "rows": [ + ["first_name", "last_name", "email_address"], + ["First,\"1\"", "Last,\"1\"", "email1@email.com"] + ] + }); }); it.should("return the rest of the line if a complete value is not found", function () { @@ -559,10 +593,12 @@ it.describe("fast-csv parser", function (it) { var data = '"","",""\r\n,Last4,email4@email.com'; var myParser = parser({delimiter: ","}), parsedData = myParser(data, false); - assert.deepEqual(parsedData, {"line": "", "rows": [ - ["", "", ""], - ["", "Last4", "email4@email.com"] - ]}); + assert.deepEqual(parsedData, { + "line": "", "rows": [ + ["", "", ""], + ["", "Last4", "email4@email.com"] + ] + }); }); it.should("not parse a row if a new line is not found and there is more data", function () { @@ -600,4 +636,96 @@ it.describe("fast-csv parser", function (it) { }); + it.describe("with comments", function (it) { + it.should("parse a block of CSV text", function () { + var data = "first_name,last_name,email_address\n#The first row of data\nFirst1,Last1,email1@email.com"; + var myParser = parser({delimiter: ",", comment: "#"}); + assert.deepEqual(myParser(data, false), { + "line": "", "rows": [ + ["first_name", "last_name", "email_address"], + ["First1", "Last1", "email1@email.com"] + ] + }); + }); + + it.should("return the rest of the line if there is more data", function () { + var data = "first_name,last_name,email_address\n#First1,Last1,email1@email.com"; + var myParser = parser({delimiter: ",", comment: "#"}); + assert.deepEqual(myParser(data, true), { + "line": "#First1,Last1,email1@email.com", + "rows": [ + ["first_name", "last_name", "email_address"] + ] + }); + }); + + it.should("accept new data and return the result", function () { + var data = "first_name,last_name,email_address\n#This is a comment"; + var myParser = parser({delimiter: ",", comment: "#"}); + var parsedData = myParser(data, true); + assert.deepEqual(parsedData, { + "line": "#This is a comment", + "rows": [ + ["first_name", "last_name", "email_address"] + ] + }); + assert.deepEqual(myParser(parsedData.line + "\nFirst1,Last1,email1@email.com\nFirst2,Last2,email2@email.com", false), { + "line": "", "rows": [ + ["First1", "Last1", "email1@email.com"], + ["First2", "Last2", "email2@email.com"] + ] + }); + }); + + it.should("not parse a row if a new line is not found and there is more data", function () { + var data = "#first_name,last_name,email_address"; + var myParser = parser({delimiter: ",", comment: "#"}); + var parsedData = myParser(data, true); + assert.deepEqual(parsedData, { + "line": "#first_name,last_name,email_address", + "rows": [] + }); + }); + + it.should("not parse data as a comment if it is contained in a line", function () { + var data = "f#irst_name,last_name,email_address"; + var myParser = parser({delimiter: ",", comment: "#"}); + var parsedData = myParser(data, false); + assert.deepEqual(parsedData, { + "line": "", + "rows": [["f#irst_name", "last_name", "email_address"]] + }); + }); + + it.should("not parse data as a comment if it at the beginning but escaped", function () { + var data = '"#first_name",last_name,email_address'; + var myParser = parser({delimiter: ",", comment: "#"}); + var parsedData = myParser(data, false); + assert.deepEqual(parsedData, { + "line": "", + "rows": [["#first_name", "last_name", "email_address"]] + }); + }); + + it.should("return empty rows if it is all comments as there is no more data and there is not a final row delimiter", function () { + var data = '#Comment1\n#Comment2'; + var myParser = parser({delimiter: ",", comment: "#"}); + var parsedData = myParser(data, false); + assert.deepEqual(parsedData, { + "line": "", + "rows": [] + }); + }); + + it.should("return empty rows if it is all comments as there is no more data and there is a final row delimiter", function () { + var data = '#Comment1\n#Comment2\n'; + var myParser = parser({delimiter: ",", comment: "#"}); + var parsedData = myParser(data, false); + assert.deepEqual(parsedData, { + "line": "", + "rows": [] + }); + }); + }); + }); \ No newline at end of file