Skip to content

Commit

Permalink
Normalize inputs to String.prototype.localeCompare (#97)
Browse files Browse the repository at this point in the history
NFC-normalize the inputs, otherwise strings like "Å" and "A\u030A"
(latin A with combining diacritical mark) don't compare equal.
  • Loading branch information
bnoordhuis authored Nov 20, 2023
1 parent a721bda commit b56a82d
Show file tree
Hide file tree
Showing 2 changed files with 74 additions and 36 deletions.
108 changes: 74 additions & 34 deletions quickjs.c
Original file line number Diff line number Diff line change
Expand Up @@ -1151,6 +1151,11 @@ static const JSClassExoticMethods js_proxy_exotic_methods;
static const JSClassExoticMethods js_module_ns_exotic_methods;
static JSClassID js_class_id_alloc = JS_CLASS_INIT_COUNT;

static int compare_u32(uint32_t a, uint32_t b)
{
return -(a < b) + (b < a); // -1, 0 or 1
}

static JSValue js_int32(int32_t v)
{
return JS_MKVAL(JS_TAG_INT, v);
Expand Down Expand Up @@ -3930,14 +3935,8 @@ static int js_string_compare(JSContext *ctx,
int res, len;
len = min_int(p1->len, p2->len);
res = js_string_memcmp(p1, p2, len);
if (res == 0) {
if (p1->len == p2->len)
res = 0;
else if (p1->len < p2->len)
res = -1;
else
res = 1;
}
if (res == 0)
res = compare_u32(p1->len, p2->len);
return res;
}

Expand Down Expand Up @@ -39138,24 +39137,80 @@ static BOOL test_final_sigma(JSString *p, int sigma_pos)
return !lre_is_cased(c1);
}

static int to_utf32_buf(JSContext *ctx, JSString *p, uint32_t **pbuf)
{
uint32_t *b;
int i, j, n;

j = -1;
n = p->len;
b = js_malloc(ctx, max_int(1, n) * sizeof(*b));
if (b)
for (i = j = 0; i < n;)
b[j++] = string_getc(p, &i);
*pbuf = b;
return j;
}

static JSValue js_string_localeCompare(JSContext *ctx, JSValueConst this_val,
int argc, JSValueConst *argv)
{
JSValue a, b;
int cmp;
int i, n, an, bn, cmp;
uint32_t *as, *bs, *ts;
JSValue a, b, ret;

ret = JS_EXCEPTION;
as = NULL;
bs = NULL;

a = JS_ToStringCheckObject(ctx, this_val);
if (JS_IsException(a))
return JS_EXCEPTION;

b = JS_ToString(ctx, argv[0]);
if (JS_IsException(b)) {
JS_FreeValue(ctx, a);
return JS_EXCEPTION;
}
cmp = js_string_compare(ctx, JS_VALUE_GET_STRING(a), JS_VALUE_GET_STRING(b));
if (JS_IsException(b))
goto exception;

an = to_utf32_buf(ctx, JS_VALUE_GET_STRING(a), &as);
if (an == -1)
goto exception;

bn = to_utf32_buf(ctx, JS_VALUE_GET_STRING(b), &bs);
if (bn == -1)
goto exception;

// TODO(bnoordhuis) skip normalization when input is latin1
an = unicode_normalize(&ts, as, an, UNICODE_NFC, ctx,
(DynBufReallocFunc *)js_realloc);
if (an == -1)
goto exception;
js_free(ctx, as);
as = ts;

// TODO(bnoordhuis) skip normalization when input is latin1
bn = unicode_normalize(&ts, bs, bn, UNICODE_NFC, ctx,
(DynBufReallocFunc *)js_realloc);
if (bn == -1)
goto exception;
js_free(ctx, bs);
bs = ts;

n = min_int(an, bn);
for (i = 0; i < n; i++)
if (as[i] != bs[i])
break;
if (i < n)
cmp = compare_u32(as[i], bs[i]);
else
cmp = compare_u32(an, bn);
ret = js_int32(cmp);

exception:
JS_FreeValue(ctx, a);
JS_FreeValue(ctx, b);
return JS_NewInt32(ctx, cmp);
js_free(ctx, as);
js_free(ctx, bs);
return ret;
}

static JSValue js_string_toLowerCase(JSContext *ctx, JSValueConst this_val,
Expand Down Expand Up @@ -39200,29 +39255,14 @@ static JSValue js_string_toLowerCase(JSContext *ctx, JSValueConst this_val,
static int JS_ToUTF32String(JSContext *ctx, uint32_t **pbuf, JSValueConst val1)
{
JSValue val;
JSString *p;
uint32_t *buf;
int i, j, len;
int len;

val = JS_ToString(ctx, val1);
if (JS_IsException(val))
return -1;
p = JS_VALUE_GET_STRING(val);
len = p->len;
/* UTF32 buffer length is len minus the number of correct surrogates pairs */
buf = js_malloc(ctx, sizeof(buf[0]) * max_int(len, 1));
if (!buf) {
JS_FreeValue(ctx, val);
goto fail;
}
for(i = j = 0; i < len;)
buf[j++] = string_getc(p, &i);
len = to_utf32_buf(ctx, JS_VALUE_GET_STRING(val), pbuf);
JS_FreeValue(ctx, val);
*pbuf = buf;
return j;
fail:
*pbuf = NULL;
return -1;
return len;
}

static JSValue JS_NewUTF32String(JSContext *ctx, const uint32_t *buf, int len)
Expand Down
2 changes: 0 additions & 2 deletions test262_errors.txt
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,6 @@ test262/test/built-ins/Function/internals/Construct/derived-this-uninitialized-r
test262/test/built-ins/Function/internals/Construct/derived-this-uninitialized-realm.js:20: strict mode: Test262Error: Expected a ReferenceError but got a different error constructor with the same name
test262/test/built-ins/RegExp/lookahead-quantifier-match-groups.js:27: Test262Error: Expected [a, abc] and [a, undefined] to have the same contents. ? quantifier
test262/test/built-ins/RegExp/lookahead-quantifier-match-groups.js:27: strict mode: Test262Error: Expected [a, abc] and [a, undefined] to have the same contents. ? quantifier
test262/test/built-ins/String/prototype/localeCompare/15.5.4.9_CE.js:62: Test262Error: String.prototype.localeCompare considers ö (\u006f\u0308) ≠ ö (\u00f6).
test262/test/built-ins/String/prototype/localeCompare/15.5.4.9_CE.js:62: strict mode: Test262Error: String.prototype.localeCompare considers ö (\u006f\u0308) ≠ ö (\u00f6).
test262/test/built-ins/TypedArray/prototype/set/array-arg-targetbuffer-detached-on-get-src-value-no-throw.js:30: TypeError: out-of-bound numeric index (Testing with Float64Array.)
test262/test/built-ins/TypedArray/prototype/set/array-arg-targetbuffer-detached-on-get-src-value-no-throw.js:30: strict mode: TypeError: out-of-bound numeric index (Testing with Float64Array.)
test262/test/built-ins/TypedArray/prototype/sort/sort-tonumber.js:30: TypeError: ArrayBuffer is detached (Testing with Float64Array.)
Expand Down

0 comments on commit b56a82d

Please sign in to comment.