Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Text encode decode #1645

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions compiler/lib/reserved.ml
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,7 @@ let provided =
; "Int32Array"
; "Int8Array"
; "TextDecoder"
; "TextEncoder"
; "Uint16Array"
; "Uint32Array"
; "Uint8Array"
Expand Down
136 changes: 35 additions & 101 deletions runtime/mlBytes.js
Original file line number Diff line number Diff line change
Expand Up @@ -79,88 +79,6 @@ function caml_subarray_to_jsbytes (a, i, len) {
return s;
}

//Provides: caml_utf8_of_utf16
function caml_utf8_of_utf16(s) {
for (var b = "", t = b, c, d, i = 0, l = s.length; i < l; i++) {
c = s.charCodeAt(i);
if (c < 0x80) {
for (var j = i + 1; (j < l) && (c = s.charCodeAt(j)) < 0x80; j++);
if (j - i > 512) { t.substr(0, 1); b += t; t = ""; b += s.slice(i, j) }
else t += s.slice(i, j);
if (j == l) break;
i = j;
}
if (c < 0x800) {
t += String.fromCharCode(0xc0 | (c >> 6));
t += String.fromCharCode(0x80 | (c & 0x3f));
} else if (c < 0xd800 || c >= 0xdfff) {
t += String.fromCharCode(0xe0 | (c >> 12),
0x80 | ((c >> 6) & 0x3f),
0x80 | (c & 0x3f));
} else if (c >= 0xdbff || i + 1 == l ||
(d = s.charCodeAt(i + 1)) < 0xdc00 || d > 0xdfff) {
// Unmatched surrogate pair, replaced by \ufffd (replacement character)
t += "\xef\xbf\xbd";
} else {
i++;
c = (c << 10) + d - 0x35fdc00;
t += String.fromCharCode(0xf0 | (c >> 18),
0x80 | ((c >> 12) & 0x3f),
0x80 | ((c >> 6) & 0x3f),
0x80 | (c & 0x3f));
}
if (t.length > 1024) {t.substr(0, 1); b += t; t = "";}
}
return b+t;
}

//Provides: caml_utf16_of_utf8
function caml_utf16_of_utf8(s) {
for (var b = "", t = "", c, c1, c2, v, i = 0, l = s.length; i < l; i++) {
c1 = s.charCodeAt(i);
if (c1 < 0x80) {
for (var j = i + 1; (j < l) && (c1 = s.charCodeAt(j)) < 0x80; j++);
if (j - i > 512) { t.substr(0, 1); b += t; t = ""; b += s.slice(i, j) }
else t += s.slice(i, j);
if (j == l) break;
i = j;
}
v = 1;
if ((++i < l) && (((c2 = s.charCodeAt(i)) & -64) == 128)) {
c = c2 + (c1 << 6);
if (c1 < 0xe0) {
v = c - 0x3080;
if (v < 0x80) v = 1;
} else {
v = 2;
if ((++i < l) && (((c2 = s.charCodeAt(i)) & -64) == 128)) {
c = c2 + (c << 6);
if (c1 < 0xf0) {
v = c - 0xe2080;
if ((v < 0x800) || ((v >= 0xd7ff) && (v < 0xe000))) v = 2;
} else {
v = 3;
if ((++i < l) && (((c2 = s.charCodeAt(i)) & -64) == 128) &&
(c1 < 0xf5)) {
v = c2 - 0x3c82080 + (c << 6);
if (v < 0x10000 || v > 0x10ffff) v = 3;
}
}
}
}
}
if (v < 4) { // Invalid sequence
i -= v;
t += "\ufffd";
} else if (v > 0xffff)
t += String.fromCharCode(0xd7c0 + (v >> 10), 0xdc00 + (v & 0x3FF))
else
t += String.fromCharCode(v);
if (t.length > 1024) {t.substr(0, 1); b += t; t = "";}
}
return b+t;
}

//Provides: jsoo_is_ascii
function jsoo_is_ascii (s) {
// The regular expression gets better at around this point for all browsers
Expand Down Expand Up @@ -397,18 +315,23 @@ function caml_bytes_set (s, i, c) {
return caml_bytes_unsafe_set (s, i, c);
}

//Provides: jsoo_text_encoder
var jsoo_text_encoder = new TextEncoder ();

//Provides: jsoo_text_decoder
var jsoo_text_decoder = new TextDecoder ();

//Provides: caml_bytes_of_utf16_jsstring
//Requires: jsoo_is_ascii, caml_utf8_of_utf16, MlBytes
//Requires: MlBytes, jsoo_text_encoder
function caml_bytes_of_utf16_jsstring (s) {
var tag = 9 /* BYTES | ASCII */;
if (!jsoo_is_ascii(s))
tag = 8 /* BYTES | NOT_ASCII */, s = caml_utf8_of_utf16(s);
return new MlBytes(tag, s, s.length);
var a = jsoo_text_encoder.encode(s);
return new MlBytes(4, a, a.length);
}


//Provides: MlBytes
//Requires: caml_convert_string_to_bytes, jsoo_is_ascii, caml_utf16_of_utf8
//Requires: caml_convert_string_to_bytes, jsoo_is_ascii
//Requires: caml_uint8_array_of_bytes
//Requires: jsoo_text_decoder
function MlBytes (tag, contents, length) {
this.t=tag; this.c=contents; this.l=length;
}
Expand All @@ -429,9 +352,9 @@ MlBytes.prototype.toString = function(){
}
};
MlBytes.prototype.toUtf16 = function (){
var r = this.toString();
if(this.t == 9) return r
return caml_utf16_of_utf8(r);
if(this.t == 9) return this.c;
var a = caml_uint8_array_of_bytes(this);
return jsoo_text_decoder.decode(a);
}
MlBytes.prototype.slice = function (){
var content = this.t == 4 ? this.c.slice() : this.c;
Expand Down Expand Up @@ -726,21 +649,33 @@ function caml_string_of_jsbytes(x) { return x }
//If: js-string
function caml_jsbytes_of_string(x) { return x }

//Provides: jsoo_text_decoder_buff
var jsoo_text_decoder_buff = new ArrayBuffer(1024);

//Provides: caml_jsstring_of_string const
//Requires: jsoo_is_ascii, caml_utf16_of_utf8
//Requires: jsoo_is_ascii
//Requires: jsoo_text_decoder
//Requires: jsoo_text_decoder_buff
//If: js-string
function caml_jsstring_of_string(s) {
if(jsoo_is_ascii(s))
return s;
return caml_utf16_of_utf8(s); }
if(jsoo_is_ascii(s)) return s;
var a =
(s.length <= jsoo_text_decoder_buff.length)
? Uint8Array(jsoo_text_decoder_buff, 0, s.length)
: (new Uint8Array(s.length));
for(var i = 0; i < s.length; i++){
a[i] = s.charCodeAt(i);
}
return jsoo_text_decoder.decode(a);
}

//Provides: caml_string_of_jsstring const
//Requires: jsoo_is_ascii, caml_utf8_of_utf16, caml_string_of_jsbytes
//Requires: caml_string_of_array
//Requires: jsoo_text_encoder
//If: js-string
function caml_string_of_jsstring (s) {
if (jsoo_is_ascii(s))
return caml_string_of_jsbytes(s)
else return caml_string_of_jsbytes(caml_utf8_of_utf16(s));
var a = jsoo_text_encoder.encode(s);
return caml_string_of_array(a);
}

//Provides: caml_bytes_of_jsbytes const
Expand Down Expand Up @@ -852,7 +787,6 @@ function caml_ml_bytes_content(s) {
}

//Provides: caml_is_ml_string
//Requires: jsoo_is_ascii
//If: js-string
function caml_is_ml_string(s) {
return (typeof s === "string" && !/[^\x00-\xff]/.test(s));
Expand Down
Loading