Skip to content

Make attribute value and text content escaping more conforming #304

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions cjs/html/title-element.js
Original file line number Diff line number Diff line change
@@ -1,17 +1,21 @@
'use strict';
const {registerHTMLClass} = require('../shared/register-html-class.js');
const {escapeHtmlTextContent} = require('../shared/text-escaper.js');

const {TextElement} = require('./text-element.js');
const {HTMLElement} = require('./element.js');

const tagName = 'title';

/**
* @implements globalThis.HTMLTitleElement
*/
class HTMLTitleElement extends TextElement {
class HTMLTitleElement extends HTMLElement {
constructor(ownerDocument, localName = tagName) {
super(ownerDocument, localName);
}

get innerHTML() { return super.innerHTML; }
set innerHTML(html) { super.innerHTML = escapeHtmlTextContent(html); }
}

registerHTMLClass(tagName, HTMLTitleElement);
Expand Down
6 changes: 2 additions & 4 deletions cjs/interface/attr.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,12 @@ const {CHANGED, VALUE} = require('../shared/symbols.js');
const {String, ignoreCase} = require('../shared/utils.js');
const {attrAsJSON} = require('../shared/jsdon.js');
const {emptyAttributes} = require('../shared/attributes.js');
const {escapeHtmlAttributeValue, escapeXmlAttributeValue} = require('../shared/text-escaper.js');

const {attributeChangedCallback: moAttributes} = require('./mutation-observer.js');
const {attributeChangedCallback: ceAttributes} = require('./custom-element-registry.js');

const {Node} = require('./node.js');
const {escape} = require('../shared/text-escaper.js');

const QUOTE = /"/g;

/**
* @implements globalThis.Attr
Expand Down Expand Up @@ -46,7 +44,7 @@ class Attr extends Node {
if (emptyAttributes.has(name) && !value) {
return ignoreCase(this) ? name : `${name}=""`;
}
const escapedValue = (ignoreCase(this) ? value : escape(value)).replace(QUOTE, '"');
const escapedValue = ignoreCase(this) ? escapeHtmlAttributeValue(value) : escapeXmlAttributeValue(value);
return `${name}="${escapedValue}"`;
}

Expand Down
3 changes: 1 addition & 2 deletions cjs/interface/element.js
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@ const {ShadowRoot} = require('./shadow-root.js');
const {NodeList} = require('./node-list.js');
const {Attr} = require('./attr.js');
const {Text} = require('./text.js');
const {escape} = require('../shared/text-escaper.js');

// <utils>
const attributesHandler = {
Expand Down Expand Up @@ -228,7 +227,7 @@ class Element extends ParentNode {
if (name === 'class')
return this.className;
const attribute = this.getAttributeNode(name);
return attribute && (ignoreCase(this) ? attribute.value : escape(attribute.value));
return attribute && attribute.value;
}

getAttributeNode(name) {
Expand Down
5 changes: 3 additions & 2 deletions cjs/interface/text.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
'use strict';
const {TEXT_NODE} = require('../shared/constants.js');
const {VALUE} = require('../shared/symbols.js');
const {escape} = require('../shared/text-escaper.js');
const {escapeHtmlTextContent, escapeXmlTextContent} = require('../shared/text-escaper.js');
const {ignoreCase} = require('../shared/utils.js');

const {CharacterData} = require('./character-data.js');

Expand Down Expand Up @@ -39,6 +40,6 @@ class Text extends CharacterData {
return new Text(ownerDocument, data);
}

toString() { return escape(this[VALUE]); }
toString() { return ignoreCase(this) ? escapeHtmlTextContent(this[VALUE]) : escapeXmlTextContent(this[VALUE]); }
}
exports.Text = Text
57 changes: 47 additions & 10 deletions cjs/shared/text-escaper.js
Original file line number Diff line number Diff line change
@@ -1,24 +1,61 @@
'use strict';
const {replace} = '';

// escape
const ca = /[<>&\xA0]/g;
const htmlAttributeValueCharacters = /["&<>\xA0]/g;
const xmlAttributeValueCharacters = /[\t\n\r"&<>]/g;

const esca = {
'\xA0': '&#160;',
const htmlTextContentCharacters = /[&<>\xA0]/g;
const xmlTextContentCharacters = /[&<>]/g;

const characterEntities = {
'\t': '&#x9;',
'\n': '&#xA;',
'\r': '&#xD;',
'"': '&quot;',
'&': '&amp;',
'<': '&lt;',
'>': '&gt;'
'>': '&gt;',
'\xA0': '&nbsp;'
};

const pe = m => esca[m];
const replaceCharacterByEntity = character => characterEntities[character];

/**
* Safely escape HTML entities such as `"`, `&`, `<`, `>` and U+00A0 NO-BREAK SPACE only.
* @param {string} value the input to safely escape
* @returns {string} the escaped input, and it **throws** an error if
* the input type is unexpected, except for boolean and numbers,
* converted as string.
*/
const escapeHtmlAttributeValue = value => replace.call(value, htmlAttributeValueCharacters, replaceCharacterByEntity);
exports.escapeHtmlAttributeValue = escapeHtmlAttributeValue;

/**
* Safely escape XML entities such as `\t`, `\n`, `\r`, `"`, `&`, `<` and `>` only.
* @param {string} value the input to safely escape
* @returns {string} the escaped input, and it **throws** an error if
* the input type is unexpected, except for boolean and numbers,
* converted as string.
*/
const escapeXmlAttributeValue = value => replace.call(value, xmlAttributeValueCharacters, replaceCharacterByEntity);
exports.escapeXmlAttributeValue = escapeXmlAttributeValue;

/**
* Safely escape HTML entities such as `&`, `<`, `>` and U+00A0 NO-BREAK SPACE only.
* @param {string} content the input to safely escape
* @returns {string} the escaped input, and it **throws** an error if
* the input type is unexpected, except for boolean and numbers,
* converted as string.
*/
const escapeHtmlTextContent = content => replace.call(content, htmlTextContentCharacters, replaceCharacterByEntity);
exports.escapeHtmlTextContent = escapeHtmlTextContent;

/**
* Safely escape HTML entities such as `&`, `<`, `>` only.
* @param {string} es the input to safely escape
* Safely escape XML entities such as `&`, `<` and `>` only.
* @param {string} content the input to safely escape
* @returns {string} the escaped input, and it **throws** an error if
* the input type is unexpected, except for boolean and numbers,
* converted as string.
*/
const escape = es => replace.call(es, ca, pe);
exports.escape = escape;
const escapeXmlTextContent = content => replace.call(content, xmlTextContentCharacters, replaceCharacterByEntity);
exports.escapeXmlTextContent = escapeXmlTextContent;
8 changes: 6 additions & 2 deletions esm/html/title-element.js
Original file line number Diff line number Diff line change
@@ -1,16 +1,20 @@
import {registerHTMLClass} from '../shared/register-html-class.js';
import {escapeHtmlTextContent} from '../shared/text-escaper.js';

import {TextElement} from './text-element.js';
import {HTMLElement} from './element.js';

const tagName = 'title';

/**
* @implements globalThis.HTMLTitleElement
*/
class HTMLTitleElement extends TextElement {
class HTMLTitleElement extends HTMLElement {
constructor(ownerDocument, localName = tagName) {
super(ownerDocument, localName);
}

get innerHTML() { return super.innerHTML; }
set innerHTML(html) { super.innerHTML = escapeHtmlTextContent(html); }
}

registerHTMLClass(tagName, HTMLTitleElement);
Expand Down
6 changes: 2 additions & 4 deletions esm/interface/attr.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,12 @@ import {CHANGED, VALUE} from '../shared/symbols.js';
import {String, ignoreCase} from '../shared/utils.js';
import {attrAsJSON} from '../shared/jsdon.js';
import {emptyAttributes} from '../shared/attributes.js';
import {escapeHtmlAttributeValue, escapeXmlAttributeValue} from '../shared/text-escaper.js';

import {attributeChangedCallback as moAttributes} from './mutation-observer.js';
import {attributeChangedCallback as ceAttributes} from './custom-element-registry.js';

import {Node} from './node.js';
import {escape} from '../shared/text-escaper.js';

const QUOTE = /"/g;

/**
* @implements globalThis.Attr
Expand Down Expand Up @@ -45,7 +43,7 @@ export class Attr extends Node {
if (emptyAttributes.has(name) && !value) {
return ignoreCase(this) ? name : `${name}=""`;
}
const escapedValue = (ignoreCase(this) ? value : escape(value)).replace(QUOTE, '&quot;');
const escapedValue = ignoreCase(this) ? escapeHtmlAttributeValue(value) : escapeXmlAttributeValue(value);
return `${name}="${escapedValue}"`;
}

Expand Down
3 changes: 1 addition & 2 deletions esm/interface/element.js
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@ import {ShadowRoot} from './shadow-root.js';
import {NodeList} from './node-list.js';
import {Attr} from './attr.js';
import {Text} from './text.js';
import {escape} from '../shared/text-escaper.js';

// <utils>
const attributesHandler = {
Expand Down Expand Up @@ -230,7 +229,7 @@ export class Element extends ParentNode {
if (name === 'class')
return this.className;
const attribute = this.getAttributeNode(name);
return attribute && (ignoreCase(this) ? attribute.value : escape(attribute.value));
return attribute && attribute.value;
}

getAttributeNode(name) {
Expand Down
5 changes: 3 additions & 2 deletions esm/interface/text.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import {TEXT_NODE} from '../shared/constants.js';
import {VALUE} from '../shared/symbols.js';
import {escape} from '../shared/text-escaper.js';
import {escapeHtmlTextContent, escapeXmlTextContent} from '../shared/text-escaper.js';
import {ignoreCase} from '../shared/utils.js';

import {CharacterData} from './character-data.js';

Expand Down Expand Up @@ -38,5 +39,5 @@ export class Text extends CharacterData {
return new Text(ownerDocument, data);
}

toString() { return escape(this[VALUE]); }
toString() { return ignoreCase(this) ? escapeHtmlTextContent(this[VALUE]) : escapeXmlTextContent(this[VALUE]); }
}
52 changes: 43 additions & 9 deletions esm/shared/text-escaper.js
Original file line number Diff line number Diff line change
@@ -1,22 +1,56 @@
const {replace} = '';

// escape
const ca = /[<>&\xA0]/g;
const htmlAttributeValueCharacters = /["&<>\xA0]/g;
const xmlAttributeValueCharacters = /[\t\n\r"&<>]/g;

const esca = {
'\xA0': '&#160;',
const htmlTextContentCharacters = /[&<>\xA0]/g;
const xmlTextContentCharacters = /[&<>]/g;

const characterEntities = {
'\t': '&#x9;',
'\n': '&#xA;',
'\r': '&#xD;',
'"': '&quot;',
'&': '&amp;',
'<': '&lt;',
'>': '&gt;'
'>': '&gt;',
'\xA0': '&nbsp;'
};

const pe = m => esca[m];
const replaceCharacterByEntity = character => characterEntities[character];

/**
* Safely escape HTML entities such as `"`, `&`, `<`, `>` and U+00A0 NO-BREAK SPACE only.
* @param {string} value the input to safely escape
* @returns {string} the escaped input, and it **throws** an error if
* the input type is unexpected, except for boolean and numbers,
* converted as string.
*/
export const escapeHtmlAttributeValue = value => replace.call(value, htmlAttributeValueCharacters, replaceCharacterByEntity);

/**
* Safely escape XML entities such as `\t`, `\n`, `\r`, `"`, `&`, `<` and `>` only.
* @param {string} value the input to safely escape
* @returns {string} the escaped input, and it **throws** an error if
* the input type is unexpected, except for boolean and numbers,
* converted as string.
*/
export const escapeXmlAttributeValue = value => replace.call(value, xmlAttributeValueCharacters, replaceCharacterByEntity);

/**
* Safely escape HTML entities such as `&`, `<`, `>` and U+00A0 NO-BREAK SPACE only.
* @param {string} content the input to safely escape
* @returns {string} the escaped input, and it **throws** an error if
* the input type is unexpected, except for boolean and numbers,
* converted as string.
*/
export const escapeHtmlTextContent = content => replace.call(content, htmlTextContentCharacters, replaceCharacterByEntity);

/**
* Safely escape HTML entities such as `&`, `<`, `>` only.
* @param {string} es the input to safely escape
* Safely escape XML entities such as `&`, `<` and `>` only.
* @param {string} content the input to safely escape
* @returns {string} the escaped input, and it **throws** an error if
* the input type is unexpected, except for boolean and numbers,
* converted as string.
*/
export const escape = es => replace.call(es, ca, pe);
export const escapeXmlTextContent = content => replace.call(content, xmlTextContentCharacters, replaceCharacterByEntity);
4 changes: 2 additions & 2 deletions test/html/anchor-element.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@ const {document} = parseHTML('<a href="https://google.com/?q=1&page=2">click me<

const {lastElementChild: a} = document;

assert(a.toString(), '<a href="https://google.com/?q=1&page=2">click me</a>');
assert(a.toString(), '<a href="https://google.com/?q=1&amp;page=2">click me</a>');
a.setAttribute('href', 'https://google.com/?q=1&page=2&test="');
assert(a.toString(), '<a href="https://google.com/?q=1&page=2&test=&quot;">click me</a>');
assert(a.toString(), '<a href="https://google.com/?q=1&amp;page=2&amp;test=&quot;">click me</a>');
a.setAttribute('href', 'https://google.com/?q=asd&lol=<2>"');
assert(a.href, 'https://google.com/?q=asd&lol=%3C2%3E%22');
a.setAttribute('href', 'https://google.com/path%20to%20some%20file.pdf');
Expand Down
2 changes: 1 addition & 1 deletion test/html/document.js
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ document.title = 'I';
assert(document.title + document.title + document.title, 'III', 'side-effects detected when inspecting the title');

document.title = '&';
assert(document.toString(), '<!DOCTYPE html><html><head><title>&</title></head><body></body></html>');
assert(document.toString(), '<!DOCTYPE html><html><head><title>&amp;</title></head><body></body></html>');

assert(document.all.length, 4);
assert(document.all[0], document.querySelector('html'));
Expand Down
6 changes: 3 additions & 3 deletions test/html/i-frame-element.js
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ assert(iframe.src, './test.html', 'Issue #82 - <iframe>.src');
iframe.srcdoc = `<html><span style="color: red">Test</span></html>`;
assert(
document.body.innerHTML,
`<iframe srcdoc="<html><span style=&quot;color: red&quot;>Test</span></html>"></iframe>`
`<iframe srcdoc="&lt;html&gt;&lt;span style=&quot;color: red&quot;&gt;Test&lt;/span&gt;&lt;/html&gt;"></iframe>`
);
}

Expand Down Expand Up @@ -50,12 +50,12 @@ assert(iframe.src, './test.html', 'Issue #82 - <iframe>.src');
{
const { document } = parseHTML(
`<html><body><iframe loading="lazy" referrerpolicy="no-referrer" name="iframe-name" allow="geolocation"></iframe></body></html>`
);
);

const iframe = document.body.querySelector("iframe");
assert(iframe.allowFullscreen, false);
assert(iframe.loading, 'lazy');
assert(iframe.referrerPolicy, "no-referrer");
assert(iframe.name, "iframe-name");
assert(iframe.allow, "geolocation");
}
}
2 changes: 1 addition & 1 deletion test/html/meta-element.js
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ assert(b.charset, 'utf-8');
const {document: httpEquivRefresh} = parseHTML('<meta http-equiv="refresh" content="0; url=https://google.com/?q=1&page=2">');
const {lastElementChild: c} = httpEquivRefresh;
// assert toString
assert(c.toString(), '<meta http-equiv="refresh" content="0; url=https://google.com/?q=1&page=2">');
assert(c.toString(), '<meta http-equiv="refresh" content="0; url=https://google.com/?q=1&amp;page=2">');
// assert httpEquiv & content attribute
assert(c.httpEquiv, 'refresh');
assert(c.content, '0; url=https://google.com/?q=1&page=2');
Expand Down
39 changes: 39 additions & 0 deletions test/html/title-element.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
const assert = require('../assert.js').for('HTMLTitleElement');

const {DOMParser, parseHTML} = global[Symbol.for('linkedom')];

const {document: htmlDoc} = parseHTML('<title>abc&<>"\t\n\r\xA0</title>');
assert(
htmlDoc.toString(),
'<title>abc&amp;&lt;&gt;"\t\n\r&nbsp;</title>'
);

const htmlTitle = htmlDoc.querySelector('title');
htmlTitle.innerHTML = '<a>sub element</a>';
assert(
htmlTitle.innerHTML,
'&lt;a&gt;sub element&lt;/a&gt;'
);
assert(
htmlDoc.toString(),
'<title>&lt;a&gt;sub element&lt;/a&gt;</title>'
);
assert(htmlDoc.querySelectorAll('a').length, 0);

const xhtmlDoc = (new DOMParser).parseFromString('<title xmlns="http://www.w3.org/1999/xhtml">abc&<>"\t\n\r\xA0</title>', 'application/xhtml+xml');
assert(
xhtmlDoc.toString(),
'<?xml version="1.0" encoding="utf-8"?><title xmlns="http://www.w3.org/1999/xhtml">abc&amp;&lt;&gt;"\t\n\r\xA0</title>'
);

const xhtmlTitle = xhtmlDoc.querySelector('title');
xhtmlTitle.innerHTML = '<a>sub element</a>';
assert(
xhtmlTitle.innerHTML,
'<a>sub element</a>'
);
assert(
xhtmlDoc.toString(),
'<?xml version="1.0" encoding="utf-8"?><title xmlns="http://www.w3.org/1999/xhtml"><a>sub element</a></title>'
);
assert(xhtmlDoc.querySelectorAll('a').length, 1);
Loading