Skip to content

Commit 26b8442

Browse files
committed
Escape attribute values and text contents differently when serializing
To match `XMLSerializer` for XML documents and `element.outerHTML` for HTML documents.
1 parent b6d1859 commit 26b8442

File tree

16 files changed

+177
-76
lines changed

16 files changed

+177
-76
lines changed

cjs/interface/attr.js

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,12 @@ const {CHANGED, VALUE} = require('../shared/symbols.js');
44
const {String, ignoreCase} = require('../shared/utils.js');
55
const {attrAsJSON} = require('../shared/jsdon.js');
66
const {emptyAttributes} = require('../shared/attributes.js');
7+
const {escapeHtmlAttributeValue, escapeXmlAttributeValue} = require('../shared/text-escaper.js');
78

89
const {attributeChangedCallback: moAttributes} = require('./mutation-observer.js');
910
const {attributeChangedCallback: ceAttributes} = require('./custom-element-registry.js');
1011

1112
const {Node} = require('./node.js');
12-
const {escape} = require('../shared/text-escaper.js');
13-
14-
const QUOTE = /"/g;
1513

1614
/**
1715
* @implements globalThis.Attr
@@ -46,7 +44,7 @@ class Attr extends Node {
4644
if (emptyAttributes.has(name) && !value) {
4745
return ignoreCase(this) ? name : `${name}=""`;
4846
}
49-
const escapedValue = (ignoreCase(this) ? value : escape(value)).replace(QUOTE, '"');
47+
const escapedValue = ignoreCase(this) ? escapeHtmlAttributeValue(value) : escapeXmlAttributeValue(value);
5048
return `${name}="${escapedValue}"`;
5149
}
5250

cjs/interface/element.js

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,6 @@ const {ShadowRoot} = require('./shadow-root.js');
4848
const {NodeList} = require('./node-list.js');
4949
const {Attr} = require('./attr.js');
5050
const {Text} = require('./text.js');
51-
const {escape} = require('../shared/text-escaper.js');
5251

5352
// <utils>
5453
const attributesHandler = {
@@ -228,7 +227,7 @@ class Element extends ParentNode {
228227
if (name === 'class')
229228
return this.className;
230229
const attribute = this.getAttributeNode(name);
231-
return attribute && (ignoreCase(this) ? attribute.value : escape(attribute.value));
230+
return attribute && attribute.value;
232231
}
233232

234233
getAttributeNode(name) {

cjs/interface/text.js

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
'use strict';
22
const {TEXT_NODE} = require('../shared/constants.js');
33
const {VALUE} = require('../shared/symbols.js');
4-
const {escape} = require('../shared/text-escaper.js');
4+
const {escapeHtmlTextContent, escapeXmlTextContent} = require('../shared/text-escaper.js');
5+
const {ignoreCase} = require('../shared/utils.js');
56

67
const {CharacterData} = require('./character-data.js');
78

@@ -39,6 +40,6 @@ class Text extends CharacterData {
3940
return new Text(ownerDocument, data);
4041
}
4142

42-
toString() { return escape(this[VALUE]); }
43+
toString() { return ignoreCase(this) ? escapeHtmlTextContent(this[VALUE]) : escapeXmlTextContent(this[VALUE]); }
4344
}
4445
exports.Text = Text

cjs/shared/text-escaper.js

Lines changed: 47 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,61 @@
11
'use strict';
22
const {replace} = '';
33

4-
// escape
5-
const ca = /[<>&\xA0]/g;
4+
const htmlAttributeValueCharacters = /["&<>\xA0]/g;
5+
const xmlAttributeValueCharacters = /[\t\n\r"&<>]/g;
66

7-
const esca = {
8-
'\xA0': '&#160;',
7+
const htmlTextContentCharacters = /[&<>\xA0]/g;
8+
const xmlTextContentCharacters = /[&<>]/g;
9+
10+
const characterEntities = {
11+
'\t': '&#x9;',
12+
'\n': '&#xA;',
13+
'\r': '&#xD;',
14+
'"': '&quot;',
915
'&': '&amp;',
1016
'<': '&lt;',
11-
'>': '&gt;'
17+
'>': '&gt;',
18+
'\xA0': '&nbsp;'
1219
};
1320

14-
const pe = m => esca[m];
21+
const replaceCharacterByEntity = character => characterEntities[character];
22+
23+
/**
24+
* Safely escape HTML entities such as `"`, `&`, `<`, `>` and U+00A0 NO-BREAK SPACE only.
25+
* @param {string} value the input to safely escape
26+
* @returns {string} the escaped input, and it **throws** an error if
27+
* the input type is unexpected, except for boolean and numbers,
28+
* converted as string.
29+
*/
30+
const escapeHtmlAttributeValue = value => replace.call(value, htmlAttributeValueCharacters, replaceCharacterByEntity);
31+
exports.escapeHtmlAttributeValue = escapeHtmlAttributeValue;
32+
33+
/**
34+
* Safely escape XML entities such as `\t`, `\n`, `\r`, `"`, `&`, `<` and `>` only.
35+
* @param {string} value the input to safely escape
36+
* @returns {string} the escaped input, and it **throws** an error if
37+
* the input type is unexpected, except for boolean and numbers,
38+
* converted as string.
39+
*/
40+
const escapeXmlAttributeValue = value => replace.call(value, xmlAttributeValueCharacters, replaceCharacterByEntity);
41+
exports.escapeXmlAttributeValue = escapeXmlAttributeValue;
42+
43+
/**
44+
* Safely escape HTML entities such as `&`, `<`, `>` and U+00A0 NO-BREAK SPACE only.
45+
* @param {string} content the input to safely escape
46+
* @returns {string} the escaped input, and it **throws** an error if
47+
* the input type is unexpected, except for boolean and numbers,
48+
* converted as string.
49+
*/
50+
const escapeHtmlTextContent = content => replace.call(content, htmlTextContentCharacters, replaceCharacterByEntity);
51+
exports.escapeHtmlTextContent = escapeHtmlTextContent;
1552

1653
/**
17-
* Safely escape HTML entities such as `&`, `<`, `>` only.
18-
* @param {string} es the input to safely escape
54+
* Safely escape XML entities such as `&`, `<` and `>` only.
55+
* @param {string} content the input to safely escape
1956
* @returns {string} the escaped input, and it **throws** an error if
2057
* the input type is unexpected, except for boolean and numbers,
2158
* converted as string.
2259
*/
23-
const escape = es => replace.call(es, ca, pe);
24-
exports.escape = escape;
60+
const escapeXmlTextContent = content => replace.call(content, xmlTextContentCharacters, replaceCharacterByEntity);
61+
exports.escapeXmlTextContent = escapeXmlTextContent;

esm/interface/attr.js

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,12 @@ import {CHANGED, VALUE} from '../shared/symbols.js';
33
import {String, ignoreCase} from '../shared/utils.js';
44
import {attrAsJSON} from '../shared/jsdon.js';
55
import {emptyAttributes} from '../shared/attributes.js';
6+
import {escapeHtmlAttributeValue, escapeXmlAttributeValue} from '../shared/text-escaper.js';
67

78
import {attributeChangedCallback as moAttributes} from './mutation-observer.js';
89
import {attributeChangedCallback as ceAttributes} from './custom-element-registry.js';
910

1011
import {Node} from './node.js';
11-
import {escape} from '../shared/text-escaper.js';
12-
13-
const QUOTE = /"/g;
1412

1513
/**
1614
* @implements globalThis.Attr
@@ -45,7 +43,7 @@ export class Attr extends Node {
4543
if (emptyAttributes.has(name) && !value) {
4644
return ignoreCase(this) ? name : `${name}=""`;
4745
}
48-
const escapedValue = (ignoreCase(this) ? value : escape(value)).replace(QUOTE, '&quot;');
46+
const escapedValue = ignoreCase(this) ? escapeHtmlAttributeValue(value) : escapeXmlAttributeValue(value);
4947
return `${name}="${escapedValue}"`;
5048
}
5149

esm/interface/element.js

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,6 @@ import {ShadowRoot} from './shadow-root.js';
5050
import {NodeList} from './node-list.js';
5151
import {Attr} from './attr.js';
5252
import {Text} from './text.js';
53-
import {escape} from '../shared/text-escaper.js';
5453

5554
// <utils>
5655
const attributesHandler = {
@@ -230,7 +229,7 @@ export class Element extends ParentNode {
230229
if (name === 'class')
231230
return this.className;
232231
const attribute = this.getAttributeNode(name);
233-
return attribute && (ignoreCase(this) ? attribute.value : escape(attribute.value));
232+
return attribute && attribute.value;
234233
}
235234

236235
getAttributeNode(name) {

esm/interface/text.js

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import {TEXT_NODE} from '../shared/constants.js';
22
import {VALUE} from '../shared/symbols.js';
3-
import {escape} from '../shared/text-escaper.js';
3+
import {escapeHtmlTextContent, escapeXmlTextContent} from '../shared/text-escaper.js';
4+
import {ignoreCase} from '../shared/utils.js';
45

56
import {CharacterData} from './character-data.js';
67

@@ -38,5 +39,5 @@ export class Text extends CharacterData {
3839
return new Text(ownerDocument, data);
3940
}
4041

41-
toString() { return escape(this[VALUE]); }
42+
toString() { return ignoreCase(this) ? escapeHtmlTextContent(this[VALUE]) : escapeXmlTextContent(this[VALUE]); }
4243
}

esm/shared/text-escaper.js

Lines changed: 43 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,56 @@
11
const {replace} = '';
22

3-
// escape
4-
const ca = /[<>&\xA0]/g;
3+
const htmlAttributeValueCharacters = /["&<>\xA0]/g;
4+
const xmlAttributeValueCharacters = /[\t\n\r"&<>]/g;
55

6-
const esca = {
7-
'\xA0': '&#160;',
6+
const htmlTextContentCharacters = /[&<>\xA0]/g;
7+
const xmlTextContentCharacters = /[&<>]/g;
8+
9+
const characterEntities = {
10+
'\t': '&#x9;',
11+
'\n': '&#xA;',
12+
'\r': '&#xD;',
13+
'"': '&quot;',
814
'&': '&amp;',
915
'<': '&lt;',
10-
'>': '&gt;'
16+
'>': '&gt;',
17+
'\xA0': '&nbsp;'
1118
};
1219

13-
const pe = m => esca[m];
20+
const replaceCharacterByEntity = character => characterEntities[character];
21+
22+
/**
23+
* Safely escape HTML entities such as `"`, `&`, `<`, `>` and U+00A0 NO-BREAK SPACE only.
24+
* @param {string} value the input to safely escape
25+
* @returns {string} the escaped input, and it **throws** an error if
26+
* the input type is unexpected, except for boolean and numbers,
27+
* converted as string.
28+
*/
29+
export const escapeHtmlAttributeValue = value => replace.call(value, htmlAttributeValueCharacters, replaceCharacterByEntity);
30+
31+
/**
32+
* Safely escape XML entities such as `\t`, `\n`, `\r`, `"`, `&`, `<` and `>` only.
33+
* @param {string} value the input to safely escape
34+
* @returns {string} the escaped input, and it **throws** an error if
35+
* the input type is unexpected, except for boolean and numbers,
36+
* converted as string.
37+
*/
38+
export const escapeXmlAttributeValue = value => replace.call(value, xmlAttributeValueCharacters, replaceCharacterByEntity);
39+
40+
/**
41+
* Safely escape HTML entities such as `&`, `<`, `>` and U+00A0 NO-BREAK SPACE only.
42+
* @param {string} content the input to safely escape
43+
* @returns {string} the escaped input, and it **throws** an error if
44+
* the input type is unexpected, except for boolean and numbers,
45+
* converted as string.
46+
*/
47+
export const escapeHtmlTextContent = content => replace.call(content, htmlTextContentCharacters, replaceCharacterByEntity);
1448

1549
/**
16-
* Safely escape HTML entities such as `&`, `<`, `>` only.
17-
* @param {string} es the input to safely escape
50+
* Safely escape XML entities such as `&`, `<` and `>` only.
51+
* @param {string} content the input to safely escape
1852
* @returns {string} the escaped input, and it **throws** an error if
1953
* the input type is unexpected, except for boolean and numbers,
2054
* converted as string.
2155
*/
22-
export const escape = es => replace.call(es, ca, pe);
56+
export const escapeXmlTextContent = content => replace.call(content, xmlTextContentCharacters, replaceCharacterByEntity);

test/html/anchor-element.js

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,9 @@ const {document} = parseHTML('<a href="https://google.com/?q=1&page=2">click me<
66

77
const {lastElementChild: a} = document;
88

9-
assert(a.toString(), '<a href="https://google.com/?q=1&page=2">click me</a>');
9+
assert(a.toString(), '<a href="https://google.com/?q=1&amp;page=2">click me</a>');
1010
a.setAttribute('href', 'https://google.com/?q=1&page=2&test="');
11-
assert(a.toString(), '<a href="https://google.com/?q=1&page=2&test=&quot;">click me</a>');
11+
assert(a.toString(), '<a href="https://google.com/?q=1&amp;page=2&amp;test=&quot;">click me</a>');
1212
a.setAttribute('href', 'https://google.com/?q=asd&lol=<2>"');
1313
assert(a.href, 'https://google.com/?q=asd&lol=%3C2%3E%22');
1414
a.setAttribute('href', 'https://google.com/path%20to%20some%20file.pdf');

test/html/document.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ document.title = 'I';
4545
assert(document.title + document.title + document.title, 'III', 'side-effects detected when inspecting the title');
4646

4747
document.title = '&';
48-
assert(document.toString(), '<!DOCTYPE html><html><head><title>&</title></head><body></body></html>');
48+
assert(document.toString(), '<!DOCTYPE html><html><head><title>&amp;</title></head><body></body></html>');
4949

5050
assert(document.all.length, 4);
5151
assert(document.all[0], document.querySelector('html'));

test/html/i-frame-element.js

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ assert(iframe.src, './test.html', 'Issue #82 - <iframe>.src');
1616
iframe.srcdoc = `<html><span style="color: red">Test</span></html>`;
1717
assert(
1818
document.body.innerHTML,
19-
`<iframe srcdoc="<html><span style=&quot;color: red&quot;>Test</span></html>"></iframe>`
19+
`<iframe srcdoc="&lt;html&gt;&lt;span style=&quot;color: red&quot;&gt;Test&lt;/span&gt;&lt;/html&gt;"></iframe>`
2020
);
2121
}
2222

@@ -50,12 +50,12 @@ assert(iframe.src, './test.html', 'Issue #82 - <iframe>.src');
5050
{
5151
const { document } = parseHTML(
5252
`<html><body><iframe loading="lazy" referrerpolicy="no-referrer" name="iframe-name" allow="geolocation"></iframe></body></html>`
53-
);
53+
);
5454

5555
const iframe = document.body.querySelector("iframe");
5656
assert(iframe.allowFullscreen, false);
5757
assert(iframe.loading, 'lazy');
5858
assert(iframe.referrerPolicy, "no-referrer");
5959
assert(iframe.name, "iframe-name");
6060
assert(iframe.allow, "geolocation");
61-
}
61+
}

test/html/meta-element.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ assert(b.charset, 'utf-8');
2424
const {document: httpEquivRefresh} = parseHTML('<meta http-equiv="refresh" content="0; url=https://google.com/?q=1&page=2">');
2525
const {lastElementChild: c} = httpEquivRefresh;
2626
// assert toString
27-
assert(c.toString(), '<meta http-equiv="refresh" content="0; url=https://google.com/?q=1&page=2">');
27+
assert(c.toString(), '<meta http-equiv="refresh" content="0; url=https://google.com/?q=1&amp;page=2">');
2828
// assert httpEquiv & content attribute
2929
assert(c.httpEquiv, 'refresh');
3030
assert(c.content, '0; url=https://google.com/?q=1&page=2');

test/interface/element.js

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@ const parser = new DOMParser();
2323
const htmlDoc = parser.parseFromString(`<div><span content-desc="text3&amp;more"/></div>`, 'text/html').documentElement;
2424

2525
assert(htmlDoc.firstChild.getAttribute('content-desc'), 'text3&more');
26-
assert(htmlDoc.firstChild.outerHTML, '<span content-desc="text3&more"></span>');
27-
assert(htmlDoc.innerHTML, '<span content-desc="text3&more"></span>');
26+
assert(htmlDoc.firstChild.outerHTML, '<span content-desc="text3&amp;more"></span>');
27+
assert(htmlDoc.innerHTML, '<span content-desc="text3&amp;more"></span>');
2828

2929
htmlDoc.firstChild.setAttribute('content-desc', ''); // attribute is not in emptyAttributes set is empty
3030
assert(htmlDoc.firstChild.getAttribute('content-desc'), '');
@@ -39,7 +39,7 @@ assert(htmlDocWithEmptyAttrFromSet.innerHTML, '<span></span>');
3939

4040
const xmlDoc = parser.parseFromString(`<hierarchy><android.view.View content-desc="text3&amp;more"/></hierarchy>`, 'text/xml').documentElement;
4141

42-
assert(xmlDoc.firstChild.getAttribute('content-desc'), 'text3&amp;more');
42+
assert(xmlDoc.firstChild.getAttribute('content-desc'), 'text3&more');
4343
assert(xmlDoc.firstChild.outerHTML, '<android.view.View content-desc="text3&amp;more" />');
4444
assert(xmlDoc.innerHTML, '<android.view.View content-desc="text3&amp;more" />');
4545

test/shared/text-escaper.js

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
const BODY = '<body>Foo&#160;&quot;&#160;&quot;&#160;Bar</body>';
2-
const REBODY = BODY.replace(/&quot;/g, '"');
2+
const REBODY = '<body>Foo&nbsp;"&nbsp;"&nbsp;Bar</body>';
33
const HTML = `<html id="html" class="live">${BODY}</html>`;
44
const REHTML = `<html id="html" class="live">${REBODY}</html>`;
55

@@ -17,4 +17,3 @@ assert(document.documentElement.toString(), REHTML);
1717

1818
document.documentElement.innerHTML = '<body>&amp;amp;</body>';
1919
assert(document.documentElement.toString(), `<html id="html" class="live"><body>&amp;amp;</body></html>`);
20-

types/esm/shared/text-escaper.d.ts

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,4 @@
1-
export function escape(es: string): string;
1+
export function escapeHtmlAttributeValue(value: string): string;
2+
export function escapeXmlAttributeValue(value: string): string;
3+
export function escapeHtmlTextContent(content: string): string;
4+
export function escapeXmlTextContent(content: string): string;

0 commit comments

Comments
 (0)