Skip to content

Commit b56a82d

Browse files
authored
Normalize inputs to String.prototype.localeCompare (#97)
NFC-normalize the inputs, otherwise strings like "Å" and "A\u030A" (latin A with combining diacritical mark) don't compare equal.
1 parent a721bda commit b56a82d

File tree

2 files changed

+74
-36
lines changed

2 files changed

+74
-36
lines changed

quickjs.c

Lines changed: 74 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1151,6 +1151,11 @@ static const JSClassExoticMethods js_proxy_exotic_methods;
11511151
static const JSClassExoticMethods js_module_ns_exotic_methods;
11521152
static JSClassID js_class_id_alloc = JS_CLASS_INIT_COUNT;
11531153

1154+
static int compare_u32(uint32_t a, uint32_t b)
1155+
{
1156+
return -(a < b) + (b < a); // -1, 0 or 1
1157+
}
1158+
11541159
static JSValue js_int32(int32_t v)
11551160
{
11561161
return JS_MKVAL(JS_TAG_INT, v);
@@ -3930,14 +3935,8 @@ static int js_string_compare(JSContext *ctx,
39303935
int res, len;
39313936
len = min_int(p1->len, p2->len);
39323937
res = js_string_memcmp(p1, p2, len);
3933-
if (res == 0) {
3934-
if (p1->len == p2->len)
3935-
res = 0;
3936-
else if (p1->len < p2->len)
3937-
res = -1;
3938-
else
3939-
res = 1;
3940-
}
3938+
if (res == 0)
3939+
res = compare_u32(p1->len, p2->len);
39413940
return res;
39423941
}
39433942

@@ -39138,24 +39137,80 @@ static BOOL test_final_sigma(JSString *p, int sigma_pos)
3913839137
return !lre_is_cased(c1);
3913939138
}
3914039139

39140+
static int to_utf32_buf(JSContext *ctx, JSString *p, uint32_t **pbuf)
39141+
{
39142+
uint32_t *b;
39143+
int i, j, n;
39144+
39145+
j = -1;
39146+
n = p->len;
39147+
b = js_malloc(ctx, max_int(1, n) * sizeof(*b));
39148+
if (b)
39149+
for (i = j = 0; i < n;)
39150+
b[j++] = string_getc(p, &i);
39151+
*pbuf = b;
39152+
return j;
39153+
}
39154+
3914139155
static JSValue js_string_localeCompare(JSContext *ctx, JSValueConst this_val,
3914239156
int argc, JSValueConst *argv)
3914339157
{
39144-
JSValue a, b;
39145-
int cmp;
39158+
int i, n, an, bn, cmp;
39159+
uint32_t *as, *bs, *ts;
39160+
JSValue a, b, ret;
39161+
39162+
ret = JS_EXCEPTION;
39163+
as = NULL;
39164+
bs = NULL;
3914639165

3914739166
a = JS_ToStringCheckObject(ctx, this_val);
3914839167
if (JS_IsException(a))
3914939168
return JS_EXCEPTION;
39169+
3915039170
b = JS_ToString(ctx, argv[0]);
39151-
if (JS_IsException(b)) {
39152-
JS_FreeValue(ctx, a);
39153-
return JS_EXCEPTION;
39154-
}
39155-
cmp = js_string_compare(ctx, JS_VALUE_GET_STRING(a), JS_VALUE_GET_STRING(b));
39171+
if (JS_IsException(b))
39172+
goto exception;
39173+
39174+
an = to_utf32_buf(ctx, JS_VALUE_GET_STRING(a), &as);
39175+
if (an == -1)
39176+
goto exception;
39177+
39178+
bn = to_utf32_buf(ctx, JS_VALUE_GET_STRING(b), &bs);
39179+
if (bn == -1)
39180+
goto exception;
39181+
39182+
// TODO(bnoordhuis) skip normalization when input is latin1
39183+
an = unicode_normalize(&ts, as, an, UNICODE_NFC, ctx,
39184+
(DynBufReallocFunc *)js_realloc);
39185+
if (an == -1)
39186+
goto exception;
39187+
js_free(ctx, as);
39188+
as = ts;
39189+
39190+
// TODO(bnoordhuis) skip normalization when input is latin1
39191+
bn = unicode_normalize(&ts, bs, bn, UNICODE_NFC, ctx,
39192+
(DynBufReallocFunc *)js_realloc);
39193+
if (bn == -1)
39194+
goto exception;
39195+
js_free(ctx, bs);
39196+
bs = ts;
39197+
39198+
n = min_int(an, bn);
39199+
for (i = 0; i < n; i++)
39200+
if (as[i] != bs[i])
39201+
break;
39202+
if (i < n)
39203+
cmp = compare_u32(as[i], bs[i]);
39204+
else
39205+
cmp = compare_u32(an, bn);
39206+
ret = js_int32(cmp);
39207+
39208+
exception:
3915639209
JS_FreeValue(ctx, a);
3915739210
JS_FreeValue(ctx, b);
39158-
return JS_NewInt32(ctx, cmp);
39211+
js_free(ctx, as);
39212+
js_free(ctx, bs);
39213+
return ret;
3915939214
}
3916039215

3916139216
static JSValue js_string_toLowerCase(JSContext *ctx, JSValueConst this_val,
@@ -39200,29 +39255,14 @@ static JSValue js_string_toLowerCase(JSContext *ctx, JSValueConst this_val,
3920039255
static int JS_ToUTF32String(JSContext *ctx, uint32_t **pbuf, JSValueConst val1)
3920139256
{
3920239257
JSValue val;
39203-
JSString *p;
39204-
uint32_t *buf;
39205-
int i, j, len;
39258+
int len;
3920639259

3920739260
val = JS_ToString(ctx, val1);
3920839261
if (JS_IsException(val))
3920939262
return -1;
39210-
p = JS_VALUE_GET_STRING(val);
39211-
len = p->len;
39212-
/* UTF32 buffer length is len minus the number of correct surrogates pairs */
39213-
buf = js_malloc(ctx, sizeof(buf[0]) * max_int(len, 1));
39214-
if (!buf) {
39215-
JS_FreeValue(ctx, val);
39216-
goto fail;
39217-
}
39218-
for(i = j = 0; i < len;)
39219-
buf[j++] = string_getc(p, &i);
39263+
len = to_utf32_buf(ctx, JS_VALUE_GET_STRING(val), pbuf);
3922039264
JS_FreeValue(ctx, val);
39221-
*pbuf = buf;
39222-
return j;
39223-
fail:
39224-
*pbuf = NULL;
39225-
return -1;
39265+
return len;
3922639266
}
3922739267

3922839268
static JSValue JS_NewUTF32String(JSContext *ctx, const uint32_t *buf, int len)

test262_errors.txt

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,6 @@ test262/test/built-ins/Function/internals/Construct/derived-this-uninitialized-r
3737
test262/test/built-ins/Function/internals/Construct/derived-this-uninitialized-realm.js:20: strict mode: Test262Error: Expected a ReferenceError but got a different error constructor with the same name
3838
test262/test/built-ins/RegExp/lookahead-quantifier-match-groups.js:27: Test262Error: Expected [a, abc] and [a, undefined] to have the same contents. ? quantifier
3939
test262/test/built-ins/RegExp/lookahead-quantifier-match-groups.js:27: strict mode: Test262Error: Expected [a, abc] and [a, undefined] to have the same contents. ? quantifier
40-
test262/test/built-ins/String/prototype/localeCompare/15.5.4.9_CE.js:62: Test262Error: String.prototype.localeCompare considers ö (\u006f\u0308) ≠ ö (\u00f6).
41-
test262/test/built-ins/String/prototype/localeCompare/15.5.4.9_CE.js:62: strict mode: Test262Error: String.prototype.localeCompare considers ö (\u006f\u0308) ≠ ö (\u00f6).
4240
test262/test/built-ins/TypedArray/prototype/set/array-arg-targetbuffer-detached-on-get-src-value-no-throw.js:30: TypeError: out-of-bound numeric index (Testing with Float64Array.)
4341
test262/test/built-ins/TypedArray/prototype/set/array-arg-targetbuffer-detached-on-get-src-value-no-throw.js:30: strict mode: TypeError: out-of-bound numeric index (Testing with Float64Array.)
4442
test262/test/built-ins/TypedArray/prototype/sort/sort-tonumber.js:30: TypeError: ArrayBuffer is detached (Testing with Float64Array.)

0 commit comments

Comments
 (0)