diff --git a/History.md b/History.md
index b3c68ddf..3453ce7a 100644
--- a/History.md
+++ b/History.md
@@ -1,6 +1,10 @@
+# 0.4.4
+
+* Added support for comments. [#56](https://github.com/C2FO/fast-csv/issues/56)
+
# v0.4.3
-* Added ability to include a `rowDelimiter` at the end of a csv with the `includeEndRowDelimiter` optioin [#54](https://github.com/C2FO/fast-csv/issues/54)
+* Added ability to include a `rowDelimiter` at the end of a csv with the `includeEndRowDelimiter` option [#54](https://github.com/C2FO/fast-csv/issues/54)
* Added escaping for values that include a row delimiter
* Added more tests for new feature and escaping row delimiter values.
diff --git a/README.md b/README.md
index 19e52b47..6e473928 100644
--- a/README.md
+++ b/README.md
@@ -28,6 +28,7 @@ All methods accept the following `options`
* `trim=false`: If you want to trim all values parsed set to true.
* `rtrim=false`: If you want to right trim all values parsed set to true.
* `ltrim=false`: If you want to left trim all values parsed set to true.
+ * `comment=null`: If your CSV contains comments you can use this option to ignore lines that begin with the specified character (e.g. `#`).
**events**
diff --git a/docs/History.html b/docs/History.html
index 229e0017..7ad6b955 100644
--- a/docs/History.html
+++ b/docs/History.html
@@ -176,9 +176,13 @@
+
0.4.4
+
+- Added support for comments. #56
+
v0.4.3
-- Added ability to include a
rowDelimiter
at the end of a csv with the includeEndRowDelimiter
optioin #54
+- Added ability to include a
rowDelimiter
at the end of a csv with the includeEndRowDelimiter
option #54
- Added escaping for values that include a row delimiter
- Added more tests for new feature and escaping row delimiter values.
diff --git a/docs/index.html b/docs/index.html
index e866f551..0e46c998 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -204,6 +204,7 @@ Parsing
trim=false
: If you want to trim all values parsed set to true.
rtrim=false
: If you want to right trim all values parsed set to true.
ltrim=false
: If you want to left trim all values parsed set to true.
+comment=null
: If your CSV contains comments you can use this option to ignore lines that begin with the specified character (e.g. #
).
diff --git a/lib/parser.js b/lib/parser.js
index 735cbcbf..203f4ec2 100644
--- a/lib/parser.js
+++ b/lib/parser.js
@@ -1,4 +1,5 @@
var extended = require("./extended"),
+ has = extended.has,
isUndefinedOrNull = extended.isUndefinedOrNull,
trim = extended.trim,
trimLeft = extended.trimLeft,
@@ -15,7 +16,12 @@ function createParser(options) {
SEARCH_REGEXP = new RegExp("(?:\\n|\\r|" + delimiter + ")"),
ESCAPE_CHAR = options.escape || '"',
NEXT_TOKEN_REGEXP = new RegExp("([^\\s]|\\r\\n|\\n|\\r|" + delimiter + ")"),
- LINE_BREAK = /(\r\n|\n|\r)/;
+ ROW_DELIMITER = /(\r\n|\n|\r)/,
+ COMMENT, hasComments;
+ if (has(options, "comment")) {
+ COMMENT = options.comment;
+ hasComments = true;
+ }
function formatItem(item) {
if (doTrim) {
@@ -71,7 +77,7 @@ function createParser(options) {
}
} else if ((!depth && nextToken && nextToken.search(SEARCH_REGEXP) === -1)) {
throw new Error("Parse Error: expected: '" + ESCAPE + "' got: '" + nextToken + "'. at '" + str.substr(cursor, 10).replace(/[\r\n]/g, "\\n" + "'"));
- } else if (hasMoreData && (!nextToken || !LINE_BREAK.test(nextToken))) {
+ } else if (hasMoreData && (!nextToken || !ROW_DELIMITER.test(nextToken))) {
cursor = null;
}
if (cursor !== null) {
@@ -80,6 +86,20 @@ function createParser(options) {
return cursor;
}
+ function parseCommentLine(line, cursor, hasMoreData) {
+ var nextIndex = line.substr(cursor).search(ROW_DELIMITER);
+ if (nextIndex === -1) {
+ if (hasMoreData) {
+ nextIndex = null;
+ } else {
+ nextIndex = line.length + 1;
+ }
+ } else {
+ nextIndex = (cursor + nextIndex) + 1; //go past the next line break
+ }
+ return nextIndex;
+ }
+
function parseItem(line, items, cursor, hasMoreData) {
var searchStr = line.substr(cursor),
nextIndex = searchStr.search(SEARCH_REGEXP);
@@ -98,7 +118,7 @@ function createParser(options) {
items.push(formatItem(searchStr.substr(0, nextIndex)));
cursor += nextIndex + 1;
}
- } else if (LINE_BREAK.test(nextChar)) {
+ } else if (ROW_DELIMITER.test(nextChar)) {
items.push(formatItem(searchStr.substr(0, nextIndex)));
cursor += nextIndex;
} else if (!hasMoreData) {
@@ -128,7 +148,7 @@ function createParser(options) {
if (isUndefinedOrNull(token)) {
i = lastLineI;
break;
- } else if (LINE_BREAK.test(token)) {
+ } else if (ROW_DELIMITER.test(token)) {
i = nextToken.cursor + 1;
if (i < l) {
rows.push(items);
@@ -137,6 +157,18 @@ function createParser(options) {
} else {
break;
}
+ } else if (hasComments && token === COMMENT) {
+ cursor = parseCommentLine(line, i, hasMoreData);
+ if (cursor === null) {
+ i = lastLineI;
+ break;
+ } else if (cursor < l) {
+ lastLineI = i = cursor;
+ } else {
+ i = cursor;
+ cursor = null;
+ break;
+ }
} else {
if (token === ESCAPE) {
cursor = parseEscapedItem(line, items, nextToken.cursor, hasMoreData);
diff --git a/package.json b/package.json
index eff26c28..bba9b2ef 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
{
"name": "fast-csv",
- "version": "0.4.3",
+ "version": "0.4.4",
"description": "CSV parser and writer",
"main": "index.js",
"scripts": {
diff --git a/test/assets/test24.csv b/test/assets/test24.csv
new file mode 100644
index 00000000..12a60c30
--- /dev/null
+++ b/test/assets/test24.csv
@@ -0,0 +1,23 @@
+#This is a test CSV
+#It contains a bunch of comments!!!!
+#The fist row contains headers.
+first_name,last_name,email_address,address
+#Line 1
+First1,Last1,email1@email.com,"1 Street St, State ST, 88888"
+#Line 2
+First2,Last2,email2@email.com,"2 Street St, State ST, 88888"
+#Line 3
+First3,Last3,email3@email.com,"3 Street St, State ST, 88888"
+#Line 4
+First4,Last4,email4@email.com,"4 Street St, State ST, 88888"
+#Line 5
+First5,Last5,email5@email.com,"5 Street St, State ST, 88888"
+#Line 6
+First6,Last6,email6@email.com,"6 Street St, State ST, 88888"
+#Line 7
+First7,Last7,email7@email.com,"7 Street St, State ST, 88888"
+#Line 8
+First8,Last8,email8@email.com,"8 Street St, State ST, 88888"
+#Line 9
+First9,Last9,email9@email.com,"9 Street St, State ST, 88888"
+#End of CSV
\ No newline at end of file
diff --git a/test/fast-csv.test.js b/test/fast-csv.test.js
index 8436e6f4..34f97c24 100644
--- a/test/fast-csv.test.js
+++ b/test/fast-csv.test.js
@@ -825,6 +825,20 @@ it.describe("fast-csv", function (it) {
});
});
+ it.should("handle CSVs with comments", function (next) {
+ var actual = [];
+ csv
+ .fromPath(path.resolve(__dirname, "./assets/test24.csv"), {headers: true, comment: "#"})
+ .on("record", function (data, index) {
+ actual[index] = data;
+ }).
+ on("end", function (count) {
+ assert.deepEqual(actual, expected1);
+ assert.equal(count, actual.length);
+ next();
+ });
+ });
+
it.describe("pause/resume", function () {
it.should("support pausing a stream", function (next) {
diff --git a/test/parser.test.js b/test/parser.test.js
index f97fc409..802af41a 100644
--- a/test/parser.test.js
+++ b/test/parser.test.js
@@ -11,10 +11,12 @@ it.describe("fast-csv parser", function (it) {
it.should("parse a block of CSV text", function () {
var data = "first_name,last_name,email_address\nFirst1,Last1,email1@email.com";
var myParser = parser({delimiter: ","});
- assert.deepEqual(myParser(data, false), {"line": "", "rows": [
- ["first_name", "last_name", "email_address"],
- ["First1", "Last1", "email1@email.com"]
- ]});
+ assert.deepEqual(myParser(data, false), {
+ "line": "", "rows": [
+ ["first_name", "last_name", "email_address"],
+ ["First1", "Last1", "email1@email.com"]
+ ]
+ });
});
it.should("return the rest of the line if there is more data", function () {
@@ -38,10 +40,12 @@ it.describe("fast-csv parser", function (it) {
["first_name", "last_name", "email_address"]
]
});
- assert.deepEqual(myParser(parsedData.line + "\nFirst2,Last2,email2@email.com", false), {"line": "", "rows": [
- ["First1", "Last1", "email1@email.com"],
- ["First2", "Last2", "email2@email.com"]
- ]});
+ assert.deepEqual(myParser(parsedData.line + "\nFirst2,Last2,email2@email.com", false), {
+ "line": "", "rows": [
+ ["First1", "Last1", "email1@email.com"],
+ ["First2", "Last2", "email2@email.com"]
+ ]
+ });
});
it.should("not parse a row if a new line is not found and there is more data", function () {
@@ -83,28 +87,34 @@ it.describe("fast-csv parser", function (it) {
it.should("parse a block of CSV text", function () {
var data = 'first_name,last_name,email_address\n"First,1","Last,1","email1@email.com"';
var myParser = parser({delimiter: ","});
- assert.deepEqual(myParser(data, false), {"line": "", "rows": [
- ["first_name", "last_name", "email_address"],
- ["First,1", "Last,1", "email1@email.com"]
- ]});
+ assert.deepEqual(myParser(data, false), {
+ "line": "", "rows": [
+ ["first_name", "last_name", "email_address"],
+ ["First,1", "Last,1", "email1@email.com"]
+ ]
+ });
});
it.should("parse a block of CSV text with escaped escaped char", function () {
var data = 'first_name,last_name,email_address\n"First,""1""","Last,""1""","email1@email.com"';
var myParser = parser({delimiter: ","});
- assert.deepEqual(myParser(data, false), {"line": "", "rows": [
- ["first_name", "last_name", "email_address"],
- ["First,\"1\"", "Last,\"1\"", "email1@email.com"]
- ]});
+ assert.deepEqual(myParser(data, false), {
+ "line": "", "rows": [
+ ["first_name", "last_name", "email_address"],
+ ["First,\"1\"", "Last,\"1\"", "email1@email.com"]
+ ]
+ });
});
it.should("parse a block of CSV text with alternate escape char", function () {
var data = 'first_name,last_name,email_address\n"First,\\"1\\"","Last,\\"1\\"","email1@email.com"';
var myParser = parser({delimiter: ",", escape: "\\"});
- assert.deepEqual(myParser(data, false), {"line": "", "rows": [
- ["first_name", "last_name", "email_address"],
- ["First,\"1\"", "Last,\"1\"", "email1@email.com"]
- ]});
+ assert.deepEqual(myParser(data, false), {
+ "line": "", "rows": [
+ ["first_name", "last_name", "email_address"],
+ ["First,\"1\"", "Last,\"1\"", "email1@email.com"]
+ ]
+ });
});
it.should("return the rest of the line if a complete value is not found", function () {
@@ -162,10 +172,12 @@ it.describe("fast-csv parser", function (it) {
var data = '"","",""\n,Last4,email4@email.com';
var myParser = parser({delimiter: ","}),
parsedData = myParser(data, false);
- assert.deepEqual(parsedData, {"line": "", "rows": [
- ["", "", ""],
- ["", "Last4", "email4@email.com"]
- ]});
+ assert.deepEqual(parsedData, {
+ "line": "", "rows": [
+ ["", "", ""],
+ ["", "Last4", "email4@email.com"]
+ ]
+ });
});
it.should("not parse a row if a new line is not found and there is more data", function () {
@@ -209,10 +221,12 @@ it.describe("fast-csv parser", function (it) {
it.should("parse a block of CSV text", function () {
var data = "first_name,last_name,email_address\rFirst1,Last1,email1@email.com";
var myParser = parser({delimiter: ","});
- assert.deepEqual(myParser(data, false), {"line": "", "rows": [
- ["first_name", "last_name", "email_address"],
- ["First1", "Last1", "email1@email.com"]
- ]});
+ assert.deepEqual(myParser(data, false), {
+ "line": "", "rows": [
+ ["first_name", "last_name", "email_address"],
+ ["First1", "Last1", "email1@email.com"]
+ ]
+ });
});
it.should("return the rest of the line if there is more data", function () {
@@ -236,10 +250,12 @@ it.describe("fast-csv parser", function (it) {
["first_name", "last_name", "email_address"]
]
});
- assert.deepEqual(myParser(parsedData.line + "\rFirst2,Last2,email2@email.com", false), {"line": "", "rows": [
- ["First1", "Last1", "email1@email.com"],
- ["First2", "Last2", "email2@email.com"]
- ]});
+ assert.deepEqual(myParser(parsedData.line + "\rFirst2,Last2,email2@email.com", false), {
+ "line": "", "rows": [
+ ["First1", "Last1", "email1@email.com"],
+ ["First2", "Last2", "email2@email.com"]
+ ]
+ });
});
it.should("not parse a row if a new line is not found and there is more data", function () {
@@ -281,28 +297,34 @@ it.describe("fast-csv parser", function (it) {
it.should("parse a block of CSV text", function () {
var data = 'first_name,last_name,email_address\r"First,1","Last,1","email1@email.com"';
var myParser = parser({delimiter: ","});
- assert.deepEqual(myParser(data, false), {"line": "", "rows": [
- ["first_name", "last_name", "email_address"],
- ["First,1", "Last,1", "email1@email.com"]
- ]});
+ assert.deepEqual(myParser(data, false), {
+ "line": "", "rows": [
+ ["first_name", "last_name", "email_address"],
+ ["First,1", "Last,1", "email1@email.com"]
+ ]
+ });
});
it.should("parse a block of CSV text with escaped escaped char", function () {
var data = 'first_name,last_name,email_address\r"First,""1""","Last,""1""","email1@email.com"';
var myParser = parser({delimiter: ","});
- assert.deepEqual(myParser(data, false), {"line": "", "rows": [
- ["first_name", "last_name", "email_address"],
- ["First,\"1\"", "Last,\"1\"", "email1@email.com"]
- ]});
+ assert.deepEqual(myParser(data, false), {
+ "line": "", "rows": [
+ ["first_name", "last_name", "email_address"],
+ ["First,\"1\"", "Last,\"1\"", "email1@email.com"]
+ ]
+ });
});
it.should("parse a block of CSV text with alternate escape char", function () {
var data = 'first_name,last_name,email_address\r"First,\\"1\\"","Last,\\"1\\"","email1@email.com"';
var myParser = parser({delimiter: ",", escape: "\\"});
- assert.deepEqual(myParser(data, false), {"line": "", "rows": [
- ["first_name", "last_name", "email_address"],
- ["First,\"1\"", "Last,\"1\"", "email1@email.com"]
- ]});
+ assert.deepEqual(myParser(data, false), {
+ "line": "", "rows": [
+ ["first_name", "last_name", "email_address"],
+ ["First,\"1\"", "Last,\"1\"", "email1@email.com"]
+ ]
+ });
});
it.should("return the rest of the line if a complete value is not found", function () {
@@ -360,10 +382,12 @@ it.describe("fast-csv parser", function (it) {
var data = '"","",""\r,Last4,email4@email.com';
var myParser = parser({delimiter: ","}),
parsedData = myParser(data, false);
- assert.deepEqual(parsedData, {"line": "", "rows": [
- ["", "", ""],
- ["", "Last4", "email4@email.com"]
- ]});
+ assert.deepEqual(parsedData, {
+ "line": "", "rows": [
+ ["", "", ""],
+ ["", "Last4", "email4@email.com"]
+ ]
+ });
});
it.should("not parse a row if a new line is not found and there is more data", function () {
@@ -408,10 +432,12 @@ it.describe("fast-csv parser", function (it) {
it.should("parse a block of CSV text", function () {
var data = "first_name,last_name,email_address\r\nFirst1,Last1,email1@email.com";
var myParser = parser({delimiter: ","});
- assert.deepEqual(myParser(data, false), {"line": "", "rows": [
- ["first_name", "last_name", "email_address"],
- ["First1", "Last1", "email1@email.com"]
- ]});
+ assert.deepEqual(myParser(data, false), {
+ "line": "", "rows": [
+ ["first_name", "last_name", "email_address"],
+ ["First1", "Last1", "email1@email.com"]
+ ]
+ });
});
it.should("return the rest of the line if there is more data", function () {
@@ -435,10 +461,12 @@ it.describe("fast-csv parser", function (it) {
["first_name", "last_name", "email_address"]
]
});
- assert.deepEqual(myParser(parsedData.line + "\r\nFirst2,Last2,email2@email.com", false), {"line": "", "rows": [
- ["First1", "Last1", "email1@email.com"],
- ["First2", "Last2", "email2@email.com"]
- ]});
+ assert.deepEqual(myParser(parsedData.line + "\r\nFirst2,Last2,email2@email.com", false), {
+ "line": "", "rows": [
+ ["First1", "Last1", "email1@email.com"],
+ ["First2", "Last2", "email2@email.com"]
+ ]
+ });
});
it.should("not parse a row if a new line is not found and there is more data", function () {
@@ -480,28 +508,34 @@ it.describe("fast-csv parser", function (it) {
it.should("parse a block of CSV text", function () {
var data = 'first_name,last_name,email_address\r\n"First,1","Last,1","email1@email.com"';
var myParser = parser({delimiter: ","});
- assert.deepEqual(myParser(data, false), {"line": "", "rows": [
- ["first_name", "last_name", "email_address"],
- ["First,1", "Last,1", "email1@email.com"]
- ]});
+ assert.deepEqual(myParser(data, false), {
+ "line": "", "rows": [
+ ["first_name", "last_name", "email_address"],
+ ["First,1", "Last,1", "email1@email.com"]
+ ]
+ });
});
it.should("parse a block of CSV text with escaped escaped char", function () {
var data = 'first_name,last_name,email_address\r\n"First,""1""","Last,""1""","email1@email.com"';
var myParser = parser({delimiter: ","});
- assert.deepEqual(myParser(data, false), {"line": "", "rows": [
- ["first_name", "last_name", "email_address"],
- ["First,\"1\"", "Last,\"1\"", "email1@email.com"]
- ]});
+ assert.deepEqual(myParser(data, false), {
+ "line": "", "rows": [
+ ["first_name", "last_name", "email_address"],
+ ["First,\"1\"", "Last,\"1\"", "email1@email.com"]
+ ]
+ });
});
it.should("parse a block of CSV text with alternate escape char", function () {
var data = 'first_name,last_name,email_address\r\n"First,\\"1\\"","Last,\\"1\\"","email1@email.com"';
var myParser = parser({delimiter: ",", escape: "\\"});
- assert.deepEqual(myParser(data, false), {"line": "", "rows": [
- ["first_name", "last_name", "email_address"],
- ["First,\"1\"", "Last,\"1\"", "email1@email.com"]
- ]});
+ assert.deepEqual(myParser(data, false), {
+ "line": "", "rows": [
+ ["first_name", "last_name", "email_address"],
+ ["First,\"1\"", "Last,\"1\"", "email1@email.com"]
+ ]
+ });
});
it.should("return the rest of the line if a complete value is not found", function () {
@@ -559,10 +593,12 @@ it.describe("fast-csv parser", function (it) {
var data = '"","",""\r\n,Last4,email4@email.com';
var myParser = parser({delimiter: ","}),
parsedData = myParser(data, false);
- assert.deepEqual(parsedData, {"line": "", "rows": [
- ["", "", ""],
- ["", "Last4", "email4@email.com"]
- ]});
+ assert.deepEqual(parsedData, {
+ "line": "", "rows": [
+ ["", "", ""],
+ ["", "Last4", "email4@email.com"]
+ ]
+ });
});
it.should("not parse a row if a new line is not found and there is more data", function () {
@@ -600,4 +636,96 @@ it.describe("fast-csv parser", function (it) {
});
+ it.describe("with comments", function (it) {
+ it.should("parse a block of CSV text", function () {
+ var data = "first_name,last_name,email_address\n#The first row of data\nFirst1,Last1,email1@email.com";
+ var myParser = parser({delimiter: ",", comment: "#"});
+ assert.deepEqual(myParser(data, false), {
+ "line": "", "rows": [
+ ["first_name", "last_name", "email_address"],
+ ["First1", "Last1", "email1@email.com"]
+ ]
+ });
+ });
+
+ it.should("return the rest of the line if there is more data", function () {
+ var data = "first_name,last_name,email_address\n#First1,Last1,email1@email.com";
+ var myParser = parser({delimiter: ",", comment: "#"});
+ assert.deepEqual(myParser(data, true), {
+ "line": "#First1,Last1,email1@email.com",
+ "rows": [
+ ["first_name", "last_name", "email_address"]
+ ]
+ });
+ });
+
+ it.should("accept new data and return the result", function () {
+ var data = "first_name,last_name,email_address\n#This is a comment";
+ var myParser = parser({delimiter: ",", comment: "#"});
+ var parsedData = myParser(data, true);
+ assert.deepEqual(parsedData, {
+ "line": "#This is a comment",
+ "rows": [
+ ["first_name", "last_name", "email_address"]
+ ]
+ });
+ assert.deepEqual(myParser(parsedData.line + "\nFirst1,Last1,email1@email.com\nFirst2,Last2,email2@email.com", false), {
+ "line": "", "rows": [
+ ["First1", "Last1", "email1@email.com"],
+ ["First2", "Last2", "email2@email.com"]
+ ]
+ });
+ });
+
+ it.should("not parse a row if a new line is not found and there is more data", function () {
+ var data = "#first_name,last_name,email_address";
+ var myParser = parser({delimiter: ",", comment: "#"});
+ var parsedData = myParser(data, true);
+ assert.deepEqual(parsedData, {
+ "line": "#first_name,last_name,email_address",
+ "rows": []
+ });
+ });
+
+ it.should("not parse data as a comment if it is contained in a line", function () {
+ var data = "f#irst_name,last_name,email_address";
+ var myParser = parser({delimiter: ",", comment: "#"});
+ var parsedData = myParser(data, false);
+ assert.deepEqual(parsedData, {
+ "line": "",
+ "rows": [["f#irst_name", "last_name", "email_address"]]
+ });
+ });
+
+ it.should("not parse data as a comment if it at the beginning but escaped", function () {
+ var data = '"#first_name",last_name,email_address';
+ var myParser = parser({delimiter: ",", comment: "#"});
+ var parsedData = myParser(data, false);
+ assert.deepEqual(parsedData, {
+ "line": "",
+ "rows": [["#first_name", "last_name", "email_address"]]
+ });
+ });
+
+ it.should("return empty rows if it is all comments as there is no more data and there is not a final row delimiter", function () {
+ var data = '#Comment1\n#Comment2';
+ var myParser = parser({delimiter: ",", comment: "#"});
+ var parsedData = myParser(data, false);
+ assert.deepEqual(parsedData, {
+ "line": "",
+ "rows": []
+ });
+ });
+
+ it.should("return empty rows if it is all comments as there is no more data and there is a final row delimiter", function () {
+ var data = '#Comment1\n#Comment2\n';
+ var myParser = parser({delimiter: ",", comment: "#"});
+ var parsedData = myParser(data, false);
+ assert.deepEqual(parsedData, {
+ "line": "",
+ "rows": []
+ });
+ });
+ });
+
});
\ No newline at end of file