Skip to content

Commit 08db51a

Browse files
authored
Add ref-counted string slices (#1175)
Instead of copying long substring slices, store a reference to and offset into the parent string. Gets more profitable as strings get longer. For 64k string slices, it's about 6.5x faster than copying. The downside of ref-counted string slices is that they can keep the parent string alive for longer than it otherwise would have been, leading to memory usage that is higher than without string slices. That's why this optimization is only applied to long-ish slices, currently slices > 1,024 bytes. Possible future enhancements are slicing only when the substring is > x% of the parent string, or copying lazily when the slice string is the only thing referencing the parent string.
1 parent 0721419 commit 08db51a

File tree

4 files changed

+148
-22
lines changed

4 files changed

+148
-22
lines changed

quickjs.c

Lines changed: 90 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,9 @@ typedef enum JSErrorEnum {
211211
#define JS_MAX_LOCAL_VARS 65535
212212
#define JS_STACK_SIZE_MAX 65534
213213
#define JS_STRING_LEN_MAX ((1 << 30) - 1)
214+
// 1,024 bytes is about the cutoff point where it starts getting
215+
// more profitable to ref slice than to copy
216+
#define JS_STRING_SLICE_LEN_MAX 1024 // in bytes
214217

215218
#define __exception __attribute__((warn_unused_result))
216219

@@ -545,7 +548,12 @@ typedef enum {
545548
JS_ATOM_KIND_PRIVATE,
546549
} JSAtomKindEnum;
547550

548-
#define JS_ATOM_HASH_MASK ((1 << 30) - 1)
551+
typedef enum {
552+
JS_STRING_KIND_NORMAL,
553+
JS_STRING_KIND_SLICE,
554+
} JSStringKind;
555+
556+
#define JS_ATOM_HASH_MASK ((1 << 29) - 1)
549557

550558
struct JSString {
551559
JSRefCountHeader header; /* must come first, 32-bit */
@@ -554,7 +562,8 @@ struct JSString {
554562
/* for JS_ATOM_TYPE_SYMBOL: hash = 0, atom_type = 3,
555563
for JS_ATOM_TYPE_PRIVATE: hash = 1, atom_type = 3
556564
XXX: could change encoding to have one more bit in hash */
557-
uint32_t hash : 30;
565+
uint32_t hash : 29;
566+
uint8_t kind : 1;
558567
uint8_t atom_type : 2; /* != 0 if atom, JS_ATOM_TYPE_x */
559568
uint32_t hash_next; /* atom_index for JS_ATOM_TYPE_SYMBOL */
560569
JSWeakRefRecord *first_weak_ref;
@@ -563,14 +572,39 @@ struct JSString {
563572
#endif
564573
};
565574

575+
typedef struct JSStringSlice {
576+
JSString *parent;
577+
uint32_t start; // in characters, not bytes
578+
} JSStringSlice;
579+
566580
static inline uint8_t *str8(JSString *p)
567581
{
568-
return (void *)(p + 1);
582+
JSStringSlice *slice;
583+
584+
switch (p->kind) {
585+
case JS_STRING_KIND_NORMAL:
586+
return (void *)&p[1];
587+
case JS_STRING_KIND_SLICE:
588+
slice = (void *)&p[1];
589+
return str8(slice->parent) + slice->start;
590+
}
591+
abort();
592+
return NULL;
569593
}
570594

571595
static inline uint16_t *str16(JSString *p)
572596
{
573-
return (void *)(p + 1);
597+
JSStringSlice *slice;
598+
599+
switch (p->kind) {
600+
case JS_STRING_KIND_NORMAL:
601+
return (void *)&p[1];
602+
case JS_STRING_KIND_SLICE:
603+
slice = (void *)&p[1];
604+
return str16(slice->parent) + slice->start;
605+
}
606+
abort();
607+
return NULL;
574608
}
575609

576610
typedef struct JSClosureVar {
@@ -2049,6 +2083,7 @@ static JSString *js_alloc_string_rt(JSRuntime *rt, int max_len, int is_wide_char
20492083
str->header.ref_count = 1;
20502084
str->is_wide_char = is_wide_char;
20512085
str->len = max_len;
2086+
str->kind = JS_STRING_KIND_NORMAL;
20522087
str->atom_type = 0;
20532088
str->hash = 0; /* optional but costless */
20542089
str->hash_next = 0; /* optional */
@@ -2069,18 +2104,28 @@ static JSString *js_alloc_string(JSContext *ctx, int max_len, int is_wide_char)
20692104
return p;
20702105
}
20712106

2107+
static inline void js_free_string0(JSRuntime *rt, JSString *str);
2108+
20722109
/* same as JS_FreeValueRT() but faster */
20732110
static inline void js_free_string(JSRuntime *rt, JSString *str)
20742111
{
2075-
if (--str->header.ref_count <= 0) {
2076-
if (str->atom_type) {
2077-
JS_FreeAtomStruct(rt, str);
2078-
} else {
2112+
if (--str->header.ref_count <= 0)
2113+
js_free_string0(rt, str);
2114+
}
2115+
2116+
static inline void js_free_string0(JSRuntime *rt, JSString *str)
2117+
{
2118+
if (str->atom_type) {
2119+
JS_FreeAtomStruct(rt, str);
2120+
} else {
20792121
#ifdef ENABLE_DUMPS // JS_DUMP_LEAKS
2080-
list_del(&str->link);
2122+
list_del(&str->link);
20812123
#endif
2082-
js_free_rt(rt, str);
2124+
if (str->kind == JS_STRING_KIND_SLICE) {
2125+
JSStringSlice *slice = (void *)&str[1];
2126+
js_free_string(rt, slice->parent); // safe, recurses only 1 level
20832127
}
2128+
js_free_rt(rt, str);
20842129
}
20852130
}
20862131

@@ -2962,6 +3007,7 @@ static JSAtom __JS_NewAtom(JSRuntime *rt, JSString *str, int atom_type)
29623007
p->header.ref_count = 1;
29633008
p->is_wide_char = str->is_wide_char;
29643009
p->len = str->len;
3010+
p->kind = JS_STRING_KIND_NORMAL;
29653011
#ifdef ENABLE_DUMPS // JS_DUMP_LEAKS
29663012
list_add_tail(&p->link, &rt->string_list);
29673013
#endif
@@ -2976,6 +3022,7 @@ static JSAtom __JS_NewAtom(JSRuntime *rt, JSString *str, int atom_type)
29763022
p->header.ref_count = 1;
29773023
p->is_wide_char = 1; /* Hack to represent NULL as a JSString */
29783024
p->len = 0;
3025+
p->kind = JS_STRING_KIND_NORMAL;
29793026
#ifdef ENABLE_DUMPS // JS_DUMP_LEAKS
29803027
list_add_tail(&p->link, &rt->string_list);
29813028
#endif
@@ -3680,13 +3727,37 @@ static JSValue js_new_string_char(JSContext *ctx, uint16_t c)
36803727

36813728
static JSValue js_sub_string(JSContext *ctx, JSString *p, int start, int end)
36823729
{
3683-
int len = end - start;
3730+
JSStringSlice *slice;
3731+
JSString *q;
3732+
int len;
3733+
3734+
len = end - start;
36843735
if (start == 0 && end == p->len) {
36853736
return js_dup(JS_MKPTR(JS_TAG_STRING, p));
36863737
}
36873738
if (len <= 0) {
36883739
return js_empty_string(ctx->rt);
36893740
}
3741+
if (len > (JS_STRING_SLICE_LEN_MAX >> p->is_wide_char)) {
3742+
if (p->kind == JS_STRING_KIND_SLICE) {
3743+
slice = (void *)&p[1];
3744+
p = slice->parent;
3745+
start += slice->start;
3746+
}
3747+
// allocate as 16 bit wide string to avoid wastage;
3748+
// js_alloc_string allocates 1 byte extra for 8 bit strings;
3749+
q = js_alloc_string(ctx, sizeof(*slice)/2, /*is_wide_char*/true);
3750+
if (!q)
3751+
return JS_EXCEPTION;
3752+
q->is_wide_char = p->is_wide_char;
3753+
q->kind = JS_STRING_KIND_SLICE;
3754+
q->len = len;
3755+
slice = (void *)&q[1];
3756+
slice->parent = p;
3757+
slice->start = start;
3758+
p->header.ref_count++;
3759+
return JS_MKPTR(JS_TAG_STRING, q);
3760+
}
36903761
if (p->is_wide_char) {
36913762
JSString *str;
36923763
int i;
@@ -5750,17 +5821,7 @@ static void js_free_value_rt(JSRuntime *rt, JSValue v)
57505821

57515822
switch(tag) {
57525823
case JS_TAG_STRING:
5753-
{
5754-
JSString *p = JS_VALUE_GET_STRING(v);
5755-
if (p->atom_type) {
5756-
JS_FreeAtomStruct(rt, p);
5757-
} else {
5758-
#ifdef ENABLE_DUMPS // JS_DUMP_LEAKS
5759-
list_del(&p->link);
5760-
#endif
5761-
js_free_rt(rt, p);
5762-
}
5763-
}
5824+
js_free_string0(rt, JS_VALUE_GET_STRING(v));
57645825
break;
57655826
case JS_TAG_OBJECT:
57665827
case JS_TAG_FUNCTION_BYTECODE:
@@ -58038,6 +58099,13 @@ uintptr_t js_std_cmd(int cmd, ...) {
5803858099
*pv = ctx->error_back_trace;
5803958100
ctx->error_back_trace = JS_UNDEFINED;
5804058101
break;
58102+
case 3: // GetStringKind
58103+
ctx = va_arg(ap, JSContext *);
58104+
pv = va_arg(ap, JSValue *);
58105+
rv = -1;
58106+
if (JS_IsString(*pv))
58107+
rv = JS_VALUE_GET_STRING(*pv)->kind;
58108+
break;
5804158109
default:
5804258110
rv = -1;
5804358111
}

run-test262.c

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1700,15 +1700,32 @@ void update_stats(JSRuntime *rt, const char *filename) {
17001700
js_mutex_unlock(&stats_mutex);
17011701
}
17021702

1703+
static JSValue qjs_black_box(JSContext *ctx, JSValueConst this_val,
1704+
int argc, JSValueConst argv[], int magic)
1705+
{
1706+
return JS_NewInt32(ctx, js_std_cmd(magic, ctx, &argv[0]));
1707+
}
1708+
1709+
static const JSCFunctionListEntry qjs_methods[] = {
1710+
JS_CFUNC_MAGIC_DEF("getStringKind", 1, qjs_black_box, /*GetStringKind*/3),
1711+
};
1712+
1713+
static const JSCFunctionListEntry qjs_object =
1714+
JS_OBJECT_DEF("qjs", qjs_methods, countof(qjs_methods), JS_PROP_C_W_E);
1715+
17031716
JSContext *JS_NewCustomContext(JSRuntime *rt)
17041717
{
17051718
JSContext *ctx;
1719+
JSValue obj;
17061720

17071721
ctx = JS_NewContext(rt);
17081722
if (ctx && local) {
17091723
js_init_module_std(ctx, "qjs:std");
17101724
js_init_module_os(ctx, "qjs:os");
17111725
js_init_module_bjson(ctx, "qjs:bjson");
1726+
obj = JS_GetGlobalObject(ctx);
1727+
JS_SetPropertyFunctionList(ctx, obj, &qjs_object, 1);
1728+
JS_FreeValue(ctx, obj);
17121729
}
17131730
return ctx;
17141731
}

tests/microbench.js

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -797,6 +797,39 @@ function string_build4(n)
797797
return n * 100;
798798
}
799799

800+
function string_slice1(n)
801+
{
802+
var i, j, s;
803+
s = "x".repeat(1<<16);
804+
for (i = 0; i < n; i++) {
805+
for (j = 0; j < 1000; j++)
806+
s.slice(-1); // too short for JSStringSlice
807+
}
808+
return n * 1000;
809+
}
810+
811+
function string_slice2(n)
812+
{
813+
var i, j, s;
814+
s = "x".repeat(1<<16);
815+
for (i = 0; i < n; i++) {
816+
for (j = 0; j < 1000; j++)
817+
s.slice(-1024);
818+
}
819+
return n * 1000;
820+
}
821+
822+
function string_slice3(n)
823+
{
824+
var i, j, s;
825+
s = "x".repeat(1<<16);
826+
for (i = 0; i < n; i++) {
827+
for (j = 0; j < 1000; j++)
828+
s.slice(1);
829+
}
830+
return n * 1000;
831+
}
832+
800833
/* sort bench */
801834

802835
function sort_bench(text) {
@@ -1114,6 +1147,9 @@ function main(argc, argv, g)
11141147
string_build2,
11151148
//string_build3,
11161149
//string_build4,
1150+
string_slice1,
1151+
string_slice2,
1152+
string_slice3,
11171153
sort_bench,
11181154
int_to_string,
11191155
int_toString,

tests/test_builtin.js

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -378,6 +378,11 @@ function test_string()
378378
assert(eval('"\0"'), "\0");
379379

380380
assert("abc".padStart(Infinity, ""), "abc");
381+
382+
assert(qjs.getStringKind("xyzzy".slice(1)),
383+
/*JS_STRING_KIND_NORMAL*/0);
384+
assert(qjs.getStringKind("xyzzy".repeat(512).slice(1)),
385+
/*JS_STRING_KIND_SLICE*/1);
381386
}
382387

383388
function test_math()

0 commit comments

Comments
 (0)