Skip to content

Commit

Permalink
src: detect whether the string is one byte representation or not
Browse files Browse the repository at this point in the history
References: #56090
  • Loading branch information
theweipeng committed Dec 28, 2024
1 parent c4aa34a commit 65e06d9
Show file tree
Hide file tree
Showing 5 changed files with 191 additions and 0 deletions.
109 changes: 109 additions & 0 deletions doc/api/v8.md
Original file line number Diff line number Diff line change
Expand Up @@ -1304,6 +1304,115 @@ setTimeout(() => {
}, 1000);
```

## `v8.isStringOneByteRepresentation(content)`

<!-- YAML
added: REPLACEME
-->

* `content` {string}
* Returns: {boolean}

V8 only supports `Latin-1/ISO-8859-1` and `UTF16` as the underlying representations.
If the `content` uses `Latin-1/ISO-8859-1` as the underlying representation, this function will return true;
otherwise, it returns false.

If this method returns false, that does not mean that the string contains some characters not in `Latin-1/ISO-8859-1`.
Sometimes a `Latin-1` string may also be represented as `UTF16`.

```js
const { isStringOneByteRepresentation } = require('node:v8');
const assert = require('node:assert');

const Encoding = {
latin1: 1,
utf16le: 2,
};

/**
* Read strings from the buffer.
*
* Note that this example ignores flag checks and boundary checks.
*/
class Deserializer {
buffer;
cursor;

constructor(buffer) {
this.buffer = buffer;
this.cursor = 0;
}

readString() {
const encoding = this.buffer.readUint8(this.cursor++);
const length = this.buffer.readUint32LE(this.cursor);
this.cursor += 4;
if (encoding === Encoding.latin1) {
const result = this.buffer.toString('latin1', this.cursor, this.cursor + length);
this.cursor += length;
return result;
}
const result = this.buffer.toString('utf16le', this.cursor, this.cursor + length);
this.cursor += length;
return result;
}
}

/**
* By means of the `isStringOneByteRepresentation` function,
* we can write strings into the buffer with high performance,
* and it only takes the time of a memcopy.
*
* Note that this example ignores boundary checks.
*/
class Serializer {
buffer;
cursor;

constructor() {
this.buffer = Buffer.alloc(100);
this.cursor = 0;
}

/**
* step1: Write the encoding flag to the first byte.
* step2: Write the byte length of the string to the next four bytes.
* step3: Write the string to the buffer.
* @param {string} input
*/
writeString(input) {
if (isStringOneByteRepresentation(input)) {
this.buffer.writeUint8(Encoding.latin1, this.cursor++);
this.buffer.writeUint32LE(input.length, this.cursor);
this.cursor += 4;
this.buffer.write(input, this.cursor, 'latin1');
this.cursor += input.length;
} else {
this.buffer.writeUint8(Encoding.utf16le, this.cursor++);
this.buffer.writeUint32LE(input.length * 2, this.cursor);
this.cursor += 4;
this.buffer.write(input, this.cursor, 'utf16le');
this.cursor += input.length * 2;
}
}

finish() {
return this.buffer.subarray(0, this.cursor);
}
}

// Write strings to the buffer.
const serializer = new Serializer();
serializer.writeString('hello');
serializer.writeString('你好');
const data = serializer.finish();

// Read strings from the buffer.
const deserializer = new Deserializer(data);
assert(deserializer.readString() === 'hello');
assert(deserializer.readString() === '你好');
```

[HTML structured clone algorithm]: https://developer.mozilla.org/en-US/docs/Web/API/Web_Workers_API/Structured_clone_algorithm
[Hook Callbacks]: #hook-callbacks
[V8]: https://developers.google.com/v8/
Expand Down
13 changes: 13 additions & 0 deletions lib/v8.js
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ const binding = internalBinding('v8');
const {
cachedDataVersionTag,
setFlagsFromString: _setFlagsFromString,
isStringOneByteRepresentation: _isStringOneByteRepresentation,
updateHeapStatisticsBuffer,
updateHeapSpaceStatisticsBuffer,
updateHeapCodeStatisticsBuffer,
Expand Down Expand Up @@ -155,6 +156,17 @@ function setFlagsFromString(flags) {
_setFlagsFromString(flags);
}

/**
* Return whether this string uses one byte as underlying representation or not.
* @param {string} content
* @returns {boolean}
*/
function isStringOneByteRepresentation(content) {
validateString(content, 'content');
return _isStringOneByteRepresentation(content);
}


/**
* Gets the current V8 heap statistics.
* @returns {{
Expand Down Expand Up @@ -439,4 +451,5 @@ module.exports = {
startupSnapshot,
setHeapSnapshotNearHeapLimit,
GCProfiler,
isStringOneByteRepresentation,
};
4 changes: 4 additions & 0 deletions src/node_external_reference.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ namespace node {

using CFunctionCallbackWithOneByteString =
uint32_t (*)(v8::Local<v8::Value>, const v8::FastOneByteString&);

using CFunctionCallbackReturnBool = bool (*)(v8::Local<v8::Value> unused,
v8::Local<v8::Value> receiver);
using CFunctionCallback = void (*)(v8::Local<v8::Value> unused,
v8::Local<v8::Value> receiver);
using CFunctionCallbackReturnDouble =
Expand Down Expand Up @@ -90,6 +93,7 @@ class ExternalReferenceRegistry {
#define ALLOWED_EXTERNAL_REFERENCE_TYPES(V) \
V(CFunctionCallback) \
V(CFunctionCallbackWithOneByteString) \
V(CFunctionCallbackReturnBool) \
V(CFunctionCallbackReturnDouble) \
V(CFunctionCallbackReturnInt32) \
V(CFunctionCallbackValueReturnDouble) \
Expand Down
28 changes: 28 additions & 0 deletions src/node_v8.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
namespace node {
namespace v8_utils {
using v8::Array;
using v8::CFunction;
using v8::Context;
using v8::FunctionCallbackInfo;
using v8::FunctionTemplate;
Expand Down Expand Up @@ -238,6 +239,23 @@ void SetFlagsFromString(const FunctionCallbackInfo<Value>& args) {
V8::SetFlagsFromString(*flags, static_cast<size_t>(flags.length()));
}

static void IsStringOneByteRepresentation(
const FunctionCallbackInfo<Value>& args) {
CHECK_EQ(args.Length(), 1);
CHECK(args[0]->IsString());
bool is_one_byte = args[0].As<String>()->IsOneByte();
args.GetReturnValue().Set(is_one_byte);
}

static bool FastIsStringOneByteRepresentation(Local<Value> receiver,
const Local<Value> target) {
CHECK(target->IsString());
return target.As<String>()->IsOneByte();
}

CFunction fast_is_string_one_byte_representation_(
CFunction::Make(FastIsStringOneByteRepresentation));

static const char* GetGCTypeName(v8::GCType gc_type) {
switch (gc_type) {
case v8::GCType::kGCTypeScavenge:
Expand Down Expand Up @@ -479,6 +497,13 @@ void Initialize(Local<Object> target,
// Export symbols used by v8.setFlagsFromString()
SetMethod(context, target, "setFlagsFromString", SetFlagsFromString);

// Export symbols used by v8.isStringOneByteRepresentation()
SetFastMethodNoSideEffect(context,
target,
"isStringOneByteRepresentation",
IsStringOneByteRepresentation,
&fast_is_string_one_byte_representation_);

// GCProfiler
Local<FunctionTemplate> t =
NewFunctionTemplate(env->isolate(), GCProfiler::New);
Expand All @@ -498,6 +523,9 @@ void RegisterExternalReferences(ExternalReferenceRegistry* registry) {
registry->Register(GCProfiler::New);
registry->Register(GCProfiler::Start);
registry->Register(GCProfiler::Stop);
registry->Register(IsStringOneByteRepresentation);
registry->Register(FastIsStringOneByteRepresentation);
registry->Register(fast_is_string_one_byte_representation_.GetTypeInfo());
}

} // namespace v8_utils
Expand Down
37 changes: 37 additions & 0 deletions test/parallel/test-v8-string-is-one-byte-representation.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
// Flags: --expose-internals
'use strict';
require('../common');
const assert = require('assert');
const { isStringOneByteRepresentation } = require('v8');

[
undefined,
null,
false,
5n,
5,
Symbol(),
() => {},
{},
].forEach((value) => {
assert.throws(
() => { isStringOneByteRepresentation(value); },
/The "content" argument must be of type string/
);
});

{
const latin1String = 'hello world!';
// Run this inside a for loop to trigger the fast API
for (let i = 0; i < 10_000; i++) {
assert.strictEqual(isStringOneByteRepresentation(latin1String), true);
}
}

{
const utf16String = '你好😀😃';
// Run this inside a for loop to trigger the fast API
for (let i = 0; i < 10_000; i++) {
assert.strictEqual(isStringOneByteRepresentation(utf16String), false);
}
}

0 comments on commit 65e06d9

Please sign in to comment.