diff --git a/README.md b/README.md index 8305938..2003ce9 100644 --- a/README.md +++ b/README.md @@ -97,13 +97,22 @@ You can also check out this nice [working implementation](https://github.com/scr behavior. If you want total control over handling these errors and optionally aborting parsing the feed, use this option. +- `strip_html` - Set to `true` to override Feedparser's default behavior, which is + to pass through all substrings that look like html. In older versions, we always + stripped these html-like substrings to help users avoid inadvertently creating + XSS vulnerabilities by reflecting the value of these elements without properly + escaping them. We decided that wasn't particularly helpful because the simple + sanitation we were performing didn't address all cases and did a poor job. However, + if you were relying on the legacy behavior, you can set this option to `true`. + ## Examples See the [`examples`](examples/) directory. ## Changes in v3 -- dropped support for Node 4 +- Dropped support for Node 4 +- Change default behavior to not strip html by default [#264](https://github.com/danmactough/node-feedparser/pull/264) ## API diff --git a/lib/feedparser/index.js b/lib/feedparser/index.js index 7915725..5574519 100644 --- a/lib/feedparser/index.js +++ b/lib/feedparser/index.js @@ -72,6 +72,7 @@ function FeedParser (options) { if (!('normalize' in this.options)) this.options.normalize = true; if (!('addmeta' in this.options)) this.options.addmeta = true; if (!('resume_saxerror' in this.options)) this.options.resume_saxerror = true; + if (!('strip_html' in this.options)) this.options.strip_html = false; if ('MAX_BUFFER_LENGTH' in this.options) { sax.MAX_BUFFER_LENGTH = this.options.MAX_BUFFER_LENGTH; // set to Infinity to have unlimited buffers } else { @@ -430,6 +431,7 @@ FeedParser.prototype.handleMeta = function handleMeta (node, type, options) { var meta = {} , normalize = !options || (options && options.normalize) + , stripHtml = !options || (options && options.strip_html) ; if (normalize) { @@ -765,8 +767,10 @@ FeedParser.prototype.handleMeta = function handleMeta (node, type, options) { if (!meta.xmlurl && this.options.feedurl) { meta.xmlurl = meta.xmlUrl = this.options.feedurl; } - meta.title = meta.title && _.stripHtml(meta.title); - meta.description = meta.description && _.stripHtml(meta.description); + if (stripHtml) { + meta.title = meta.title && _.stripHtml(meta.title); + meta.description = meta.description && _.stripHtml(meta.description); + } } return meta; @@ -777,6 +781,7 @@ FeedParser.prototype.handleItem = function handleItem (node, type, options){ var item = {} , normalize = !options || (options && options.normalize) + , stripHtml = !options || (options && options.strip_html) ; if (normalize) { @@ -1106,7 +1111,9 @@ FeedParser.prototype.handleItem = function handleItem (node, type, options){ item.link = item.guid; } } - item.title = item.title && _.stripHtml(item.title); + if (stripHtml) { + item.title = item.title && _.stripHtml(item.title); + } } return item; }; diff --git a/lib/utils.js b/lib/utils.js index a396ea8..a97cd09 100644 --- a/lib/utils.js +++ b/lib/utils.js @@ -169,7 +169,7 @@ exports.reresolve = reresolve; * @private */ function stripHtml (str) { - return str.replace(/<.*?>/g, ''); + return str.replace(/<+[^>]+?>+/g, ''); } exports.stripHtml = stripHtml; diff --git a/test/feeds/title-with-angle-brackets.xml b/test/feeds/title-with-angle-brackets.xml new file mode 100644 index 0000000..a6ccd67 --- /dev/null +++ b/test/feeds/title-with-angle-brackets.xml @@ -0,0 +1,11 @@ + + + + Channel title + http://example.com/ + Channel + + RSS <<< Title >>> + + + diff --git a/test/strip-html.js b/test/strip-html.js new file mode 100644 index 0000000..21ba623 --- /dev/null +++ b/test/strip-html.js @@ -0,0 +1,31 @@ +describe('strip html', function () { + + var feed = __dirname + '/feeds/title-with-angle-brackets.xml'; + + it('should NOT aggressively strip html by default', function (done) { + fs.createReadStream(feed).pipe(new FeedParser()) + .once('readable', function () { + var stream = this; + assert.equal(stream.read().title, 'RSS <<< Title >>>'); + done(); + }) + .on('error', function (err) { + assert.ifError(err); + done(err); + }); + }); + + it('should aggressively strip html with option `strip_html`', function (done) { + fs.createReadStream(feed).pipe(new FeedParser({ strip_html: true })) + .once('readable', function () { + var stream = this; + assert.equal(stream.read().title, 'RSS '); + done(); + }) + .on('error', function (err) { + assert.ifError(err); + done(err); + }); + }); + +});