Skip to content

Commit d89658a

Browse files
committed
Add ref-counted string slices
Instead of copying long substring slices, store a reference to and offset into the parent string. Gets more profitable as strings get longer. For 64k string slices, it's about 6.5x faster than copying. The downside of ref-counted string slices is that they can keep the parent string alive for longer than it otherwise would have been, leading to memory usage that is higher than without string slices. That's why this optimization is only applied to long-ish slices, currently slices > 1,024 bytes. Possible future enhancements are slicing only when the substring is > x% of the parent string, or copying lazily when the slice string is the only thing referencing the parent string.
1 parent 0721419 commit d89658a

File tree

4 files changed

+147
-22
lines changed

4 files changed

+147
-22
lines changed

quickjs.c

Lines changed: 89 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -545,7 +545,12 @@ typedef enum {
545545
JS_ATOM_KIND_PRIVATE,
546546
} JSAtomKindEnum;
547547

548-
#define JS_ATOM_HASH_MASK ((1 << 30) - 1)
548+
typedef enum {
549+
JS_STRING_KIND_NORMAL,
550+
JS_STRING_KIND_SLICE,
551+
} JSStringKind;
552+
553+
#define JS_ATOM_HASH_MASK ((1 << 29) - 1)
549554

550555
struct JSString {
551556
JSRefCountHeader header; /* must come first, 32-bit */
@@ -554,7 +559,8 @@ struct JSString {
554559
/* for JS_ATOM_TYPE_SYMBOL: hash = 0, atom_type = 3,
555560
for JS_ATOM_TYPE_PRIVATE: hash = 1, atom_type = 3
556561
XXX: could change encoding to have one more bit in hash */
557-
uint32_t hash : 30;
562+
uint32_t hash : 29;
563+
uint8_t kind : 1;
558564
uint8_t atom_type : 2; /* != 0 if atom, JS_ATOM_TYPE_x */
559565
uint32_t hash_next; /* atom_index for JS_ATOM_TYPE_SYMBOL */
560566
JSWeakRefRecord *first_weak_ref;
@@ -563,14 +569,39 @@ struct JSString {
563569
#endif
564570
};
565571

572+
typedef struct JSStringSlice {
573+
JSString *parent;
574+
uint32_t start; // in characters, not bytes
575+
} JSStringSlice;
576+
566577
static inline uint8_t *str8(JSString *p)
567578
{
568-
return (void *)(p + 1);
579+
JSStringSlice *slice;
580+
581+
switch (p->kind) {
582+
case JS_STRING_KIND_NORMAL:
583+
return (void *)&p[1];
584+
case JS_STRING_KIND_SLICE:
585+
slice = (void *)&p[1];
586+
return str8(slice->parent) + slice->start;
587+
}
588+
abort();
589+
return NULL;
569590
}
570591

571592
static inline uint16_t *str16(JSString *p)
572593
{
573-
return (void *)(p + 1);
594+
JSStringSlice *slice;
595+
596+
switch (p->kind) {
597+
case JS_STRING_KIND_NORMAL:
598+
return (void *)&p[1];
599+
case JS_STRING_KIND_SLICE:
600+
slice = (void *)&p[1];
601+
return str16(slice->parent) + slice->start;
602+
}
603+
abort();
604+
return NULL;
574605
}
575606

576607
typedef struct JSClosureVar {
@@ -2049,6 +2080,7 @@ static JSString *js_alloc_string_rt(JSRuntime *rt, int max_len, int is_wide_char
20492080
str->header.ref_count = 1;
20502081
str->is_wide_char = is_wide_char;
20512082
str->len = max_len;
2083+
str->kind = JS_STRING_KIND_NORMAL;
20522084
str->atom_type = 0;
20532085
str->hash = 0; /* optional but costless */
20542086
str->hash_next = 0; /* optional */
@@ -2069,18 +2101,28 @@ static JSString *js_alloc_string(JSContext *ctx, int max_len, int is_wide_char)
20692101
return p;
20702102
}
20712103

2104+
static inline void js_free_string0(JSRuntime *rt, JSString *str);
2105+
20722106
/* same as JS_FreeValueRT() but faster */
20732107
static inline void js_free_string(JSRuntime *rt, JSString *str)
20742108
{
2075-
if (--str->header.ref_count <= 0) {
2076-
if (str->atom_type) {
2077-
JS_FreeAtomStruct(rt, str);
2078-
} else {
2109+
if (--str->header.ref_count <= 0)
2110+
js_free_string0(rt, str);
2111+
}
2112+
2113+
static inline void js_free_string0(JSRuntime *rt, JSString *str)
2114+
{
2115+
if (str->atom_type) {
2116+
JS_FreeAtomStruct(rt, str);
2117+
} else {
20792118
#ifdef ENABLE_DUMPS // JS_DUMP_LEAKS
2080-
list_del(&str->link);
2119+
list_del(&str->link);
20812120
#endif
2082-
js_free_rt(rt, str);
2121+
if (str->kind == JS_STRING_KIND_SLICE) {
2122+
JSStringSlice *slice = (void *)&str[1];
2123+
js_free_string(rt, slice->parent); // safe, recurses only 1 level
20832124
}
2125+
js_free_rt(rt, str);
20842126
}
20852127
}
20862128

@@ -2962,6 +3004,7 @@ static JSAtom __JS_NewAtom(JSRuntime *rt, JSString *str, int atom_type)
29623004
p->header.ref_count = 1;
29633005
p->is_wide_char = str->is_wide_char;
29643006
p->len = str->len;
3007+
p->kind = JS_STRING_KIND_NORMAL;
29653008
#ifdef ENABLE_DUMPS // JS_DUMP_LEAKS
29663009
list_add_tail(&p->link, &rt->string_list);
29673010
#endif
@@ -2976,6 +3019,7 @@ static JSAtom __JS_NewAtom(JSRuntime *rt, JSString *str, int atom_type)
29763019
p->header.ref_count = 1;
29773020
p->is_wide_char = 1; /* Hack to represent NULL as a JSString */
29783021
p->len = 0;
3022+
p->kind = JS_STRING_KIND_NORMAL;
29793023
#ifdef ENABLE_DUMPS // JS_DUMP_LEAKS
29803024
list_add_tail(&p->link, &rt->string_list);
29813025
#endif
@@ -3680,13 +3724,39 @@ static JSValue js_new_string_char(JSContext *ctx, uint16_t c)
36803724

36813725
static JSValue js_sub_string(JSContext *ctx, JSString *p, int start, int end)
36823726
{
3683-
int len = end - start;
3727+
JSStringSlice *slice;
3728+
JSString *q;
3729+
int len;
3730+
3731+
len = end - start;
36843732
if (start == 0 && end == p->len) {
36853733
return js_dup(JS_MKPTR(JS_TAG_STRING, p));
36863734
}
36873735
if (len <= 0) {
36883736
return js_empty_string(ctx->rt);
36893737
}
3738+
// 1024 is about the cutoff point where it starts getting more profitable
3739+
// to ref slice than to copy
3740+
if (len > (1024 >> p->is_wide_char)) {
3741+
if (p->kind == JS_STRING_KIND_SLICE) {
3742+
slice = (void *)&p[1];
3743+
p = slice->parent;
3744+
start += slice->start;
3745+
}
3746+
// allocate as 16 bit wide string to avoid wastage;
3747+
// js_alloc_string allocates 1 byte extra for 8 bit strings;
3748+
q = js_alloc_string(ctx, sizeof(*slice)/2, /*is_wide_char*/true);
3749+
if (!q)
3750+
return JS_EXCEPTION;
3751+
q->is_wide_char = p->is_wide_char;
3752+
q->kind = JS_STRING_KIND_SLICE;
3753+
q->len = len;
3754+
slice = (void *)&q[1];
3755+
slice->parent = p;
3756+
slice->start = start;
3757+
p->header.ref_count++;
3758+
return JS_MKPTR(JS_TAG_STRING, q);
3759+
}
36903760
if (p->is_wide_char) {
36913761
JSString *str;
36923762
int i;
@@ -5750,17 +5820,7 @@ static void js_free_value_rt(JSRuntime *rt, JSValue v)
57505820

57515821
switch(tag) {
57525822
case JS_TAG_STRING:
5753-
{
5754-
JSString *p = JS_VALUE_GET_STRING(v);
5755-
if (p->atom_type) {
5756-
JS_FreeAtomStruct(rt, p);
5757-
} else {
5758-
#ifdef ENABLE_DUMPS // JS_DUMP_LEAKS
5759-
list_del(&p->link);
5760-
#endif
5761-
js_free_rt(rt, p);
5762-
}
5763-
}
5823+
js_free_string0(rt, JS_VALUE_GET_STRING(v));
57645824
break;
57655825
case JS_TAG_OBJECT:
57665826
case JS_TAG_FUNCTION_BYTECODE:
@@ -58038,6 +58098,13 @@ uintptr_t js_std_cmd(int cmd, ...) {
5803858098
*pv = ctx->error_back_trace;
5803958099
ctx->error_back_trace = JS_UNDEFINED;
5804058100
break;
58101+
case 3: // GetStringKind
58102+
ctx = va_arg(ap, JSContext *);
58103+
pv = va_arg(ap, JSValue *);
58104+
rv = -1;
58105+
if (JS_IsString(*pv))
58106+
rv = JS_VALUE_GET_STRING(*pv)->kind;
58107+
break;
5804158108
default:
5804258109
rv = -1;
5804358110
}

run-test262.c

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1700,15 +1700,32 @@ void update_stats(JSRuntime *rt, const char *filename) {
17001700
js_mutex_unlock(&stats_mutex);
17011701
}
17021702

1703+
static JSValue qjs_black_box(JSContext *ctx, JSValueConst this_val,
1704+
int argc, JSValueConst argv[], int magic)
1705+
{
1706+
return JS_NewInt32(ctx, js_std_cmd(magic, ctx, &argv[0]));
1707+
}
1708+
1709+
static const JSCFunctionListEntry qjs_methods[] = {
1710+
JS_CFUNC_MAGIC_DEF("getStringKind", 1, qjs_black_box, /*GetStringKind*/3),
1711+
};
1712+
1713+
static const JSCFunctionListEntry qjs_object =
1714+
JS_OBJECT_DEF("qjs", qjs_methods, countof(qjs_methods), JS_PROP_C_W_E);
1715+
17031716
JSContext *JS_NewCustomContext(JSRuntime *rt)
17041717
{
17051718
JSContext *ctx;
1719+
JSValue obj;
17061720

17071721
ctx = JS_NewContext(rt);
17081722
if (ctx && local) {
17091723
js_init_module_std(ctx, "qjs:std");
17101724
js_init_module_os(ctx, "qjs:os");
17111725
js_init_module_bjson(ctx, "qjs:bjson");
1726+
obj = JS_GetGlobalObject(ctx);
1727+
JS_SetPropertyFunctionList(ctx, obj, &qjs_object, 1);
1728+
JS_FreeValue(ctx, obj);
17121729
}
17131730
return ctx;
17141731
}

tests/microbench.js

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -797,6 +797,39 @@ function string_build4(n)
797797
return n * 100;
798798
}
799799

800+
function string_slice1(n)
801+
{
802+
var i, j, s;
803+
s = "x".repeat(1<<16);
804+
for (i = 0; i < n; i++) {
805+
for (j = 0; j < 1000; j++)
806+
s.slice(-1); // too short for JSStringSlice
807+
}
808+
return n * 1000;
809+
}
810+
811+
function string_slice2(n)
812+
{
813+
var i, j, s;
814+
s = "x".repeat(1<<16);
815+
for (i = 0; i < n; i++) {
816+
for (j = 0; j < 1000; j++)
817+
s.slice(-1024);
818+
}
819+
return n * 1000;
820+
}
821+
822+
function string_slice3(n)
823+
{
824+
var i, j, s;
825+
s = "x".repeat(1<<16);
826+
for (i = 0; i < n; i++) {
827+
for (j = 0; j < 1000; j++)
828+
s.slice(1);
829+
}
830+
return n * 1000;
831+
}
832+
800833
/* sort bench */
801834

802835
function sort_bench(text) {
@@ -1114,6 +1147,9 @@ function main(argc, argv, g)
11141147
string_build2,
11151148
//string_build3,
11161149
//string_build4,
1150+
string_slice1,
1151+
string_slice2,
1152+
string_slice3,
11171153
sort_bench,
11181154
int_to_string,
11191155
int_toString,

tests/test_builtin.js

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -378,6 +378,11 @@ function test_string()
378378
assert(eval('"\0"'), "\0");
379379

380380
assert("abc".padStart(Infinity, ""), "abc");
381+
382+
assert(qjs.getStringKind("xyzzy".slice(1)),
383+
/*JS_STRING_KIND_NORMAL*/0);
384+
assert(qjs.getStringKind("xyzzy".repeat(512).slice(1)),
385+
/*JS_STRING_KIND_SLICE*/1);
381386
}
382387

383388
function test_math()

0 commit comments

Comments
 (0)