From b94c7a05b302f0eed76e54ef69dba98888c3acab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hu=C3=A1ng=20J=C3=B9nli=C3=A0ng?= Date: Wed, 11 Oct 2023 02:17:52 -0400 Subject: [PATCH] update to Unicode 15.1 (#3424) --- CHANGELOG.md | 6 ++++++ internal/js_ast/unicode.go | 3 +++ scripts/gen-unicode-table.js | 14 +++++++++++--- scripts/package-lock.json | 18 +++++++++--------- scripts/package.json | 2 +- 5 files changed, 30 insertions(+), 13 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index dc743d93e6a..0c2ad8117f4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # Changelog +## Unreleased + +* Update to Unicode 15.1.0 + + The character tables that determine which characters form valid JavaScript identifiers have been updated from Unicode version 15.0.0 to the newly-released Unicode version 15.1.0. I'm not putting an example in the release notes because all of the new characters will likely just show up as little squares since fonts haven't been updated yet. But you can read https://www.unicode.org/versions/Unicode15.1.0/#Summary for more information about the changes. + ## 0.19.4 * Fix printing of JavaScript decorators in tricky cases ([#3396](https://github.com/evanw/esbuild/issues/3396)) diff --git a/internal/js_ast/unicode.go b/internal/js_ast/unicode.go index 73df7b31e83..f1d67206aa4 100644 --- a/internal/js_ast/unicode.go +++ b/internal/js_ast/unicode.go @@ -1280,6 +1280,7 @@ var idStartES5OrESNext = &unicode.RangeTable{ {Lo: 0x2b740, Hi: 0x2b81d, Stride: 1}, {Lo: 0x2b820, Hi: 0x2cea1, Stride: 1}, {Lo: 0x2ceb0, Hi: 0x2ebe0, Stride: 1}, + {Lo: 0x2ebf0, Hi: 0x2ee5d, Stride: 1}, {Lo: 0x2f800, Hi: 0x2fa1d, Stride: 1}, {Lo: 0x30000, Hi: 0x3134a, Stride: 1}, {Lo: 0x31350, Hi: 0x323af, Stride: 1}, @@ -1591,6 +1592,7 @@ var idContinueES5OrESNext = &unicode.RangeTable{ {Lo: 0x1fe0, Hi: 0x1fec, Stride: 1}, {Lo: 0x1ff2, Hi: 0x1ff4, Stride: 1}, {Lo: 0x1ff6, Hi: 0x1ffc, Stride: 1}, + {Lo: 0x200c, Hi: 0x200d, Stride: 1}, {Lo: 0x203f, Hi: 0x2040, Stride: 1}, {Lo: 0x2054, Hi: 0x2054, Stride: 1}, {Lo: 0x2071, Hi: 0x2071, Stride: 1}, @@ -2054,6 +2056,7 @@ var idContinueES5OrESNext = &unicode.RangeTable{ {Lo: 0x2b740, Hi: 0x2b81d, Stride: 1}, {Lo: 0x2b820, Hi: 0x2cea1, Stride: 1}, {Lo: 0x2ceb0, Hi: 0x2ebe0, Stride: 1}, + {Lo: 0x2ebf0, Hi: 0x2ee5d, Stride: 1}, {Lo: 0x2f800, Hi: 0x2fa1d, Stride: 1}, {Lo: 0x30000, Hi: 0x3134a, Stride: 1}, {Lo: 0x31350, Hi: 0x323af, Stride: 1}, diff --git a/scripts/gen-unicode-table.js b/scripts/gen-unicode-table.js index 8f968e6f0d7..b720f507f28 100644 --- a/scripts/gen-unicode-table.js +++ b/scripts/gen-unicode-table.js @@ -45,16 +45,24 @@ const idContinueES5 = idStartES5.concat( // is presumed to be the Unicode set, collection 10646. // // UnicodeIDStart: any Unicode code point with the Unicode property “ID_Start” -const idStartESNext = require('@unicode/unicode-15.0.0/Binary_Property/ID_Start/code-points') +const idStartESNext = require('@unicode/unicode-15.1.0/Binary_Property/ID_Start/code-points') const idStartESNextSet = new Set(idStartESNext) +// Unicode 4.1 through Unicode 15 omitted these two characters from ID_Continue +// by accident. However, this accident was corrected in Unicode 15.1. Any JS VM +// that supports ES6+ but that uses a version of Unicode earlier than 15.1 will +// consider these to be a syntax error, so we deliberately omit these characters +// from the set of identifiers that are valid in both ES5 and ES6+. For more info +// see 2.2 in https://www.unicode.org/L2/L2023/23160-utc176-properties-recs.pdf +const ID_Continue_mistake = new Set([0x30FB, 0xFF65]) + // UnicodeIDContinue: any Unicode code point with the Unicode property “ID_Continue” -const idContinueESNext = require('@unicode/unicode-15.0.0/Binary_Property/ID_Continue/code-points') +const idContinueESNext = require('@unicode/unicode-15.1.0/Binary_Property/ID_Continue/code-points') const idContinueESNextSet = new Set(idContinueESNext) // These identifiers are valid in both ES5 and ES6+ (i.e. an intersection of both) const idStartES5AndESNext = idStartES5.filter(n => idStartESNextSet.has(n)) -const idContinueES5AndESNext = idContinueES5.filter(n => idContinueESNextSet.has(n)) +const idContinueES5AndESNext = idContinueES5.filter(n => idContinueESNextSet.has(n) && !ID_Continue_mistake.has(n)) // These identifiers are valid in either ES5 or ES6+ (i.e. a union of both) const idStartES5OrESNext = [...new Set(idStartES5.concat(idStartESNext))].sort((a, b) => a - b) diff --git a/scripts/package-lock.json b/scripts/package-lock.json index cf6b989ce67..317b61d933d 100644 --- a/scripts/package-lock.json +++ b/scripts/package-lock.json @@ -6,7 +6,7 @@ "": { "dependencies": { "@types/node": "14.14.6", - "@unicode/unicode-15.0.0": "1.3.1", + "@unicode/unicode-15.1.0": "1.5.2", "@unicode/unicode-3.0.0": "1.0.6", "fuse.js": "3.2.0", "js-yaml": "3.14.0", @@ -20,10 +20,10 @@ "resolved": "https://registry.npmjs.org/@types/node/-/node-14.14.6.tgz", "integrity": "sha512-6QlRuqsQ/Ox/aJEQWBEJG7A9+u7oSYl3mem/K8IzxXG/kAGbV1YPD9Bg9Zw3vyxC/YP+zONKwy8hGkSt1jxFMw==" }, - "node_modules/@unicode/unicode-15.0.0": { - "version": "1.3.1", - "resolved": "https://registry.npmjs.org/@unicode/unicode-15.0.0/-/unicode-15.0.0-1.3.1.tgz", - "integrity": "sha512-zxm5Cx0v9vGxFOM8tVuArWHxxJTk+stiLA+ZHKt2mJO3HHmM6uN8OFcDGuvcix3MqguQ75am0XvpUgEz4P4vFw==" + "node_modules/@unicode/unicode-15.1.0": { + "version": "1.5.2", + "resolved": "https://registry.npmjs.org/@unicode/unicode-15.1.0/-/unicode-15.1.0-1.5.2.tgz", + "integrity": "sha512-7PAgnShDr8ziK6XeHB/TUVFboDFEhaQKKyrw55/Kx9o6AQDy1s7dJ9KRpRerW9nrR5qMGUQvOqTXOAek6ZIXkg==" }, "node_modules/@unicode/unicode-3.0.0": { "version": "1.0.6", @@ -135,10 +135,10 @@ "resolved": "https://registry.npmjs.org/@types/node/-/node-14.14.6.tgz", "integrity": "sha512-6QlRuqsQ/Ox/aJEQWBEJG7A9+u7oSYl3mem/K8IzxXG/kAGbV1YPD9Bg9Zw3vyxC/YP+zONKwy8hGkSt1jxFMw==" }, - "@unicode/unicode-15.0.0": { - "version": "1.3.1", - "resolved": "https://registry.npmjs.org/@unicode/unicode-15.0.0/-/unicode-15.0.0-1.3.1.tgz", - "integrity": "sha512-zxm5Cx0v9vGxFOM8tVuArWHxxJTk+stiLA+ZHKt2mJO3HHmM6uN8OFcDGuvcix3MqguQ75am0XvpUgEz4P4vFw==" + "@unicode/unicode-15.1.0": { + "version": "1.5.2", + "resolved": "https://registry.npmjs.org/@unicode/unicode-15.1.0/-/unicode-15.1.0-1.5.2.tgz", + "integrity": "sha512-7PAgnShDr8ziK6XeHB/TUVFboDFEhaQKKyrw55/Kx9o6AQDy1s7dJ9KRpRerW9nrR5qMGUQvOqTXOAek6ZIXkg==" }, "@unicode/unicode-3.0.0": { "version": "1.0.6", diff --git a/scripts/package.json b/scripts/package.json index 48a8f570de3..39f802ad92f 100644 --- a/scripts/package.json +++ b/scripts/package.json @@ -1,7 +1,7 @@ { "dependencies": { "@types/node": "14.14.6", - "@unicode/unicode-15.0.0": "1.3.1", + "@unicode/unicode-15.1.0": "1.5.2", "@unicode/unicode-3.0.0": "1.0.6", "fuse.js": "3.2.0", "js-yaml": "3.14.0",