diff --git a/runtime/mlBytes.js b/runtime/mlBytes.js index fe7e7f9c3d..61ca734ba3 100644 --- a/runtime/mlBytes.js +++ b/runtime/mlBytes.js @@ -82,115 +82,6 @@ function caml_subarray_to_jsbytes(a, i, len) { return s; } -//Provides: caml_utf8_of_utf16 -function caml_utf8_of_utf16(s) { - for (var b = "", t = b, c, d, i = 0, l = s.length; i < l; i++) { - c = s.charCodeAt(i); - if (c < 0x80) { - for (var j = i + 1; j < l && (c = s.charCodeAt(j)) < 0x80; j++); - if (j - i > 512) { - t.slice(0, 1); - b += t; - t = ""; - b += s.slice(i, j); - } else t += s.slice(i, j); - if (j === l) break; - i = j; - } - if (c < 0x800) { - t += String.fromCharCode(0xc0 | (c >> 6)); - t += String.fromCharCode(0x80 | (c & 0x3f)); - } else if (c < 0xd800 || c >= 0xdfff) { - t += String.fromCharCode( - 0xe0 | (c >> 12), - 0x80 | ((c >> 6) & 0x3f), - 0x80 | (c & 0x3f), - ); - } else if ( - c >= 0xdbff || - i + 1 === l || - (d = s.charCodeAt(i + 1)) < 0xdc00 || - d > 0xdfff - ) { - // Unmatched surrogate pair, replaced by \ufffd (replacement character) - t += "\xef\xbf\xbd"; - } else { - i++; - c = (c << 10) + d - 0x35fdc00; - t += String.fromCharCode( - 0xf0 | (c >> 18), - 0x80 | ((c >> 12) & 0x3f), - 0x80 | ((c >> 6) & 0x3f), - 0x80 | (c & 0x3f), - ); - } - if (t.length > 1024) { - t.slice(0, 1); - b += t; - t = ""; - } - } - return b + t; -} - -//Provides: caml_utf16_of_utf8 -function caml_utf16_of_utf8(s) { - for (var b = "", t = "", c, c1, c2, v, i = 0, l = s.length; i < l; i++) { - c1 = s.charCodeAt(i); - if (c1 < 0x80) { - for (var j = i + 1; j < l && (c1 = s.charCodeAt(j)) < 0x80; j++); - if (j - i > 512) { - t.slice(0, 1); - b += t; - t = ""; - b += s.slice(i, j); - } else t += s.slice(i, j); - if (j === l) break; - i = j; - } - v = 1; - if (++i < l && ((c2 = s.charCodeAt(i)) & -64) === 128) { - c = c2 + (c1 << 6); - if (c1 < 0xe0) { - v = c - 0x3080; - if (v < 0x80) v = 1; - } else { - v = 2; - if (++i < l && ((c2 = s.charCodeAt(i)) & -64) === 128) { - c = c2 + (c << 6); - if (c1 < 0xf0) { - v = c - 0xe2080; - if (v < 0x800 || (v >= 0xd7ff && v < 0xe000)) v = 2; - } else { - v = 3; - if ( - ++i < l && - ((c2 = s.charCodeAt(i)) & -64) === 128 && - c1 < 0xf5 - ) { - v = c2 - 0x3c82080 + (c << 6); - if (v < 0x10000 || v > 0x10ffff) v = 3; - } - } - } - } - } - if (v < 4) { - // Invalid sequence - i -= v; - t += "\ufffd"; - } else if (v > 0xffff) - t += String.fromCharCode(0xd7c0 + (v >> 10), 0xdc00 + (v & 0x3ff)); - else t += String.fromCharCode(v); - if (t.length > 1024) { - t.slice(0, 1); - b += t; - t = ""; - } - } - return b + t; -} - //Provides: jsoo_is_ascii function jsoo_is_ascii(s) { // The regular expression gets better at around this point for all browsers @@ -429,17 +320,29 @@ function caml_bytes_set(s, i, c) { return caml_bytes_unsafe_set(s, i, c); } +//Provides: jsoo_text_encoder +var jsoo_text_encoder = new TextEncoder(); + +//Provides: jsoo_text_decoder +var jsoo_text_decoder = new TextDecoder(); + //Provides: caml_bytes_of_utf16_jsstring -//Requires: jsoo_is_ascii, caml_utf8_of_utf16, MlBytes +//Requires: MlBytes, jsoo_text_encoder +//Requires: jsoo_is_ascii function caml_bytes_of_utf16_jsstring(s) { - var tag = 9 /* BYTES | ASCII */; - if (!jsoo_is_ascii(s)) - (tag = 8) /* BYTES | NOT_ASCII */, (s = caml_utf8_of_utf16(s)); - return new MlBytes(tag, s, s.length); + if(jsoo_is_ascii(s)){ + return new MlBytes(9, s, s.length) + } + else { + var a = jsoo_text_encoder.encode(s); + return new MlBytes(4, a, a.length); + } } //Provides: MlBytes -//Requires: caml_convert_string_to_bytes, jsoo_is_ascii, caml_utf16_of_utf8 +//Requires: caml_convert_string_to_bytes, jsoo_is_ascii +//Requires: caml_uint8_array_of_bytes +//Requires: jsoo_text_decoder function MlBytes(tag, contents, length) { this.t = tag; this.c = contents; @@ -462,9 +365,9 @@ MlBytes.prototype.toString = function () { } }; MlBytes.prototype.toUtf16 = function () { - var r = this.toString(); - if (this.t === 9) return r; - return caml_utf16_of_utf8(r); + if (this.t === 9) return this.c; + var a = caml_uint8_array_of_bytes(this); + return jsoo_text_decoder.decode(a); }; MlBytes.prototype.slice = function () { var content = this.t === 4 ? this.c.slice() : this.c; @@ -777,20 +680,35 @@ function caml_jsbytes_of_string(x) { return x; } +//Provides: jsoo_text_decoder_buff +var jsoo_text_decoder_buff = new ArrayBuffer(1024); + //Provides: caml_jsstring_of_string const -//Requires: jsoo_is_ascii, caml_utf16_of_utf8 +//Requires: jsoo_is_ascii +//Requires: jsoo_text_decoder +//Requires: jsoo_text_decoder_buff //If: js-string function caml_jsstring_of_string(s) { if (jsoo_is_ascii(s)) return s; - return caml_utf16_of_utf8(s); + var a = + s.length <= jsoo_text_decoder_buff.length + ? new Uint8Array(jsoo_text_decoder_buff, 0, s.length) + : new Uint8Array(s.length); + for (var i = 0; i < s.length; i++) { + a[i] = s.charCodeAt(i); + } + return jsoo_text_decoder.decode(a); } //Provides: caml_string_of_jsstring const -//Requires: jsoo_is_ascii, caml_utf8_of_utf16, caml_string_of_jsbytes +//Requires: caml_string_of_array +//Requires: jsoo_text_encoder +//Requires: jsoo_is_ascii, caml_string_of_jsbytes //If: js-string function caml_string_of_jsstring(s) { - if (jsoo_is_ascii(s)) return caml_string_of_jsbytes(s); - else return caml_string_of_jsbytes(caml_utf8_of_utf16(s)); + if(jsoo_is_ascii(s)) return caml_string_of_jsbytes(s); + var a = jsoo_text_encoder.encode(s); + return caml_string_of_array(a); } //Provides: caml_bytes_of_jsbytes const @@ -910,7 +828,6 @@ function caml_ml_bytes_content(s) { } //Provides: caml_is_ml_string -//Requires: jsoo_is_ascii //If: js-string function caml_is_ml_string(s) { // biome-ignore lint/suspicious/noControlCharactersInRegex: expected