Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Do not strip html by default #264

Merged
merged 3 commits into from
Jul 15, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -97,13 +97,22 @@ You can also check out this nice [working implementation](https://github.com/scr
behavior. If you want total control over handling these errors and optionally
aborting parsing the feed, use this option.

- `strip_html` - Set to `true` to override Feedparser's default behavior, which is
to pass through all substrings that look like html. In older versions, we always
stripped these html-like substrings to help users avoid inadvertently creating
XSS vulnerabilities by reflecting the value of these elements without properly
escaping them. We decided that wasn't particularly helpful because the simple
sanitation we were performing didn't address all cases and did a poor job. However,
if you were relying on the legacy behavior, you can set this option to `true`.

## Examples

See the [`examples`](examples/) directory.

## Changes in v3

- dropped support for Node 4
- Dropped support for Node 4
- Change default behavior to not strip html by default [#264](https://github.com/danmactough/node-feedparser/pull/264)

## API

Expand Down
13 changes: 10 additions & 3 deletions lib/feedparser/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ function FeedParser (options) {
if (!('normalize' in this.options)) this.options.normalize = true;
if (!('addmeta' in this.options)) this.options.addmeta = true;
if (!('resume_saxerror' in this.options)) this.options.resume_saxerror = true;
if (!('strip_html' in this.options)) this.options.strip_html = false;
if ('MAX_BUFFER_LENGTH' in this.options) {
sax.MAX_BUFFER_LENGTH = this.options.MAX_BUFFER_LENGTH; // set to Infinity to have unlimited buffers
} else {
Expand Down Expand Up @@ -430,6 +431,7 @@ FeedParser.prototype.handleMeta = function handleMeta (node, type, options) {

var meta = {}
, normalize = !options || (options && options.normalize)
, stripHtml = !options || (options && options.strip_html)
;

if (normalize) {
Expand Down Expand Up @@ -765,8 +767,10 @@ FeedParser.prototype.handleMeta = function handleMeta (node, type, options) {
if (!meta.xmlurl && this.options.feedurl) {
meta.xmlurl = meta.xmlUrl = this.options.feedurl;
}
meta.title = meta.title && _.stripHtml(meta.title);
meta.description = meta.description && _.stripHtml(meta.description);
if (stripHtml) {
meta.title = meta.title && _.stripHtml(meta.title);
meta.description = meta.description && _.stripHtml(meta.description);
}
}

return meta;
Expand All @@ -777,6 +781,7 @@ FeedParser.prototype.handleItem = function handleItem (node, type, options){

var item = {}
, normalize = !options || (options && options.normalize)
, stripHtml = !options || (options && options.strip_html)
;

if (normalize) {
Expand Down Expand Up @@ -1106,7 +1111,9 @@ FeedParser.prototype.handleItem = function handleItem (node, type, options){
item.link = item.guid;
}
}
item.title = item.title && _.stripHtml(item.title);
if (stripHtml) {
item.title = item.title && _.stripHtml(item.title);
}
}
return item;
};
Expand Down
2 changes: 1 addition & 1 deletion lib/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ exports.reresolve = reresolve;
* @private
*/
function stripHtml (str) {
return str.replace(/<.*?>/g, '');
return str.replace(/<+[^>]+?>+/g, '');
}

exports.stripHtml = stripHtml;
11 changes: 11 additions & 0 deletions test/feeds/title-with-angle-brackets.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0">
<channel>
<title>Channel title</title>
<link>http://example.com/</link>
<description>Channel</description>
<item>
<title>RSS &lt;&lt;&lt; Title &gt;&gt;&gt;</title>
</item>
</channel>
</rss>
31 changes: 31 additions & 0 deletions test/strip-html.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
describe('strip html', function () {

var feed = __dirname + '/feeds/title-with-angle-brackets.xml';

it('should NOT aggressively strip html by default', function (done) {
fs.createReadStream(feed).pipe(new FeedParser())
.once('readable', function () {
var stream = this;
assert.equal(stream.read().title, 'RSS <<< Title >>>');
done();
})
.on('error', function (err) {
assert.ifError(err);
done(err);
});
});

it('should aggressively strip html with option `strip_html`', function (done) {
fs.createReadStream(feed).pipe(new FeedParser({ strip_html: true }))
.once('readable', function () {
var stream = this;
assert.equal(stream.read().title, 'RSS ');
done();
})
.on('error', function (err) {
assert.ifError(err);
done(err);
});
});

});