From ec064e3930e03f74df50359dc89836e778a48b2d Mon Sep 17 00:00:00 2001 From: Petr Pucil Date: Wed, 18 Sep 2024 16:20:25 +0200 Subject: [PATCH] JavaScript: make bytes literals use `Uint8Array` (not `number[]`) Until now, byte array literals translated for JavaScript were of type `number[]`, as TypeScript would call this type (in pure JS terminology, it's an `Array` object with numbers). For example, a value instance defined as `instances/v/value: '[65, 126]'` would be translated as `this._m_v = [65, 126];` in the generated JS code. However, byte arrays created in any other way would use the [`Uint8Array`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Uint8Array) type, which is the standard type for byte arrays in JavaScript. Namely any byte array parsed from stream would be a `Uint8Array` object and also a *non-literal* byte array like `[123, 246 + 0].as` would be generated as `new Uint8Array([123, 246 + 0])`. I believe that using a different type for byte array literals than for byte arrays created in any other way was never intentional. It seems we simply forgot to override `doByteArrayLiteral` in `JavaScriptTranslator` (but `doByteArrayNonLiteral` is overridden correctly, which is weird), so we ended up using the default `BaseTranslator.doByteArrayLiteral` implementation, which produces the `[65, 126]` syntax. JavaScript is dynamically typed, so you can treat `number[]` the same as `Uint8Array` in most cases (e.g. indexing will work the same), but this discrepancy now causes problems when porting our JavaScript runtime library to TypeScript (see https://github.com/kaitai-io/kaitai_struct_javascript_runtime/pull/25), because you would have to annotate a lot of parameter types as `number[] | Uint8Array` (which doesn't really make sense). Moreover, I discovered that generating byte array literals as `number[]` creates a bug in this case: ```ksy meta: id: bytes_to_s instances: literal: value: '[65, 126].to_s("UTF-8")' ``` Before this commit, `[65, 126].to_s("UTF-8")` would be translated as `KaitaiStream.bytesToStr([65, 126], "UTF-8")` for JavaScript. But our `bytesToStr` implementation won't actually accept `number[]` for non-ASCII encodings. See [`KaitaiStream.js:650-657`](https://github.com/kaitai-io/kaitai_struct_javascript_runtime/blob/2acb0d8b09fde9c95fc77ee3019f6d6b1c73f040/KaitaiStream.js#L650-L657): ```js KaitaiStream.bytesToStr = function(arr, encoding) { if (encoding == null || encoding.toLowerCase() === "ascii") { return KaitaiStream.createStringFromArray(arr); } else { if (typeof TextDecoder === 'function') { // we're in the browser that supports TextDecoder return (new TextDecoder(encoding)).decode(arr); } else { ``` If `TextDecoder` is available (which is the case in a browser or since Node.js 11), the error when trying to access the `literal` instance above will look like this in the browser (Google Chrome 129): ``` TypeError: Failed to execute 'decode' on 'TextDecoder': parameter 1 is not of type 'ArrayBuffer'. at KaitaiStream.bytesToStr (https://ide.kaitai.io/devel/lib/_npm/kaitai-struct/KaitaiStream.js:656:42) at BytesToS.get (eval at initCode (https://ide.kaitai.io/devel/js/v1/kaitaiWorker.js:156:9), :27:38) at fetchInstance (https://ide.kaitai.io/devel/js/v1/kaitaiWorker.js:123:20) ... ``` ... and like this in Node.js 12: ``` TypeError [ERR_INVALID_ARG_TYPE]: The "input" argument must be an instance of ArrayBuffer or ArrayBufferView. Received an instance of Array at TextDecoder.decode (internal/encoding.js:412:15) at Function.KaitaiStream.bytesToStr (/runtime/javascript/KaitaiStream.js:656:42) at BytesToS.get (compiled/javascript/BytesToS.js:26:38) at /tests/spec/javascript/test_bytes_to_s.js:8:24 ... ``` So using `Uint8Array` for all byte arrays will not only be more consistent, but will also fix this bug. --- .../io/kaitai/struct/translators/TranslatorSpec.scala | 8 ++++---- .../kaitai/struct/translators/JavaScriptTranslator.scala | 2 ++ 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/jvm/src/test/scala/io/kaitai/struct/translators/TranslatorSpec.scala b/jvm/src/test/scala/io/kaitai/struct/translators/TranslatorSpec.scala index 8db8d279a..353e36418 100644 --- a/jvm/src/test/scala/io/kaitai/struct/translators/TranslatorSpec.scala +++ b/jvm/src/test/scala/io/kaitai/struct/translators/TranslatorSpec.scala @@ -406,7 +406,7 @@ class TranslatorSpec extends AnyFunSpec { CSharpCompiler -> "new byte[] { 34, 0, 10, 64, 65, 66, 92 }", GoCompiler -> "[]uint8{34, 0, 10, 64, 65, 66, 92}", JavaCompiler -> "new byte[] { 34, 0, 10, 64, 65, 66, 92 }", - JavaScriptCompiler -> "[34, 0, 10, 64, 65, 66, 92]", + JavaScriptCompiler -> "new Uint8Array([34, 0, 10, 64, 65, 66, 92])", LuaCompiler -> "\"\\034\\000\\010\\064\\065\\066\\092\"", PerlCompiler -> "pack('C*', (34, 0, 10, 64, 65, 66, 92))", PHPCompiler -> "\"\\x22\\x00\\x0A\\x40\\x41\\x42\\x5C\"", @@ -419,7 +419,7 @@ class TranslatorSpec extends AnyFunSpec { CSharpCompiler -> "new byte[] { 255, 0, 255 }", GoCompiler -> "[]uint8{255, 0, 255}", JavaCompiler -> "new byte[] { -1, 0, -1 }", - JavaScriptCompiler -> "[255, 0, 255]", + JavaScriptCompiler -> "new Uint8Array([255, 0, 255])", LuaCompiler -> "\"\\255\\000\\255\"", PerlCompiler -> "pack('C*', (255, 0, 255))", PHPCompiler -> "\"\\xFF\\x00\\xFF\"", @@ -434,7 +434,7 @@ class TranslatorSpec extends AnyFunSpec { CSharpCompiler -> "new byte[] { 0, 1, 2 }.Length", GoCompiler -> "len([]uint8{0, 1, 2})", JavaCompiler -> "new byte[] { 0, 1, 2 }.length", - JavaScriptCompiler -> "[0, 1, 2].length", + JavaScriptCompiler -> "new Uint8Array([0, 1, 2]).length", LuaCompiler -> "#\"\\000\\001\\002\"", PerlCompiler -> "length(pack('C*', (0, 1, 2)))", PHPCompiler -> "strlen(\"\\x00\\x01\\x02\")", @@ -816,7 +816,7 @@ class TranslatorSpec extends AnyFunSpec { CSharpCompiler -> "new byte[] { }", GoCompiler -> "[]uint8{}", JavaCompiler -> "new byte[] { }", - JavaScriptCompiler -> "[]", + JavaScriptCompiler -> "new Uint8Array([])", LuaCompiler -> "\"\"", PerlCompiler -> "pack('C*', ())", PHPCompiler -> "\"\"", diff --git a/shared/src/main/scala/io/kaitai/struct/translators/JavaScriptTranslator.scala b/shared/src/main/scala/io/kaitai/struct/translators/JavaScriptTranslator.scala index 867282002..d05440f8d 100644 --- a/shared/src/main/scala/io/kaitai/struct/translators/JavaScriptTranslator.scala +++ b/shared/src/main/scala/io/kaitai/struct/translators/JavaScriptTranslator.scala @@ -8,6 +8,8 @@ import io.kaitai.struct.format.{EnumSpec, Identifier} import io.kaitai.struct.languages.JavaScriptCompiler class JavaScriptTranslator(provider: TypeProvider, importList: ImportList) extends BaseTranslator(provider) { + override def doByteArrayLiteral(arr: Seq[Byte]): String = + s"new Uint8Array([${arr.map(_ & 0xff).mkString(", ")}])" override def doByteArrayNonLiteral(elts: Seq[Ast.expr]): String = s"new Uint8Array([${elts.map(translate).mkString(", ")}])"