From 9a627bd6f9911b0b040833dd9667b3d409281d93 Mon Sep 17 00:00:00 2001 From: Mingun <alexander_sergey@mail.ru> Date: Thu, 28 Mar 2024 21:14:20 +0500 Subject: [PATCH] Use hex escape sequences instead of octal escape sequences. Octal escape sequences the least used form of escape sequences and hex supported everywhere. The only outsiders are Java, C++ and Rust --- .../struct/translators/TranslatorSpec.scala | 18 +++++++++--------- .../src/main/scala/io/kaitai/struct/JSON.scala | 1 - .../struct/translators/CommonLiterals.scala | 10 +++------- .../struct/translators/CppTranslator.scala | 15 +++++++++++++++ .../translators/JavaScriptTranslator.scala | 13 ------------- .../struct/translators/JavaTranslator.scala | 15 +++++++++++++++ .../struct/translators/LuaTranslator.scala | 13 +------------ .../struct/translators/RustTranslator.scala | 7 +++++++ 8 files changed, 50 insertions(+), 42 deletions(-) diff --git a/jvm/src/test/scala/io/kaitai/struct/translators/TranslatorSpec.scala b/jvm/src/test/scala/io/kaitai/struct/translators/TranslatorSpec.scala index 157c473f6..8b155ca86 100644 --- a/jvm/src/test/scala/io/kaitai/struct/translators/TranslatorSpec.scala +++ b/jvm/src/test/scala/io/kaitai/struct/translators/TranslatorSpec.scala @@ -407,7 +407,7 @@ class TranslatorSpec extends AnyFunSpec { GoCompiler -> "[]uint8{34, 0, 10, 64, 65, 66, 92}", JavaCompiler -> "new byte[] { 34, 0, 10, 64, 65, 66, 92 }", JavaScriptCompiler -> "new Uint8Array([34, 0, 10, 64, 65, 66, 92])", - LuaCompiler -> "\"\\034\\000\\010\\064\\065\\066\\092\"", + LuaCompiler -> "\"\\x22\\x00\\x0A\\x40\\x41\\x42\\x5C\"", PerlCompiler -> "pack('C*', (34, 0, 10, 64, 65, 66, 92))", PHPCompiler -> "\"\\x22\\x00\\x0A\\x40\\x41\\x42\\x5C\"", PythonCompiler -> "b\"\\x22\\x00\\x0A\\x40\\x41\\x42\\x5C\"", @@ -420,7 +420,7 @@ class TranslatorSpec extends AnyFunSpec { GoCompiler -> "[]uint8{255, 0, 255}", JavaCompiler -> "new byte[] { -1, 0, -1 }", JavaScriptCompiler -> "new Uint8Array([255, 0, 255])", - LuaCompiler -> "\"\\255\\000\\255\"", + LuaCompiler -> "\"\\xFF\\x00\\xFF\"", PerlCompiler -> "pack('C*', (255, 0, 255))", PHPCompiler -> "\"\\xFF\\x00\\xFF\"", PythonCompiler -> "b\"\\xFF\\x00\\xFF\"", @@ -435,7 +435,7 @@ class TranslatorSpec extends AnyFunSpec { GoCompiler -> "len([]uint8{0, 1, 2})", JavaCompiler -> "new byte[] { 0, 1, 2 }.length", JavaScriptCompiler -> "new Uint8Array([0, 1, 2]).length", - LuaCompiler -> "#\"\\000\\001\\002\"", + LuaCompiler -> "#\"\\x00\\x01\\x02\"", PerlCompiler -> "length(pack('C*', (0, 1, 2)))", PHPCompiler -> "strlen(\"\\x00\\x01\\x02\")", PythonCompiler -> "len(b\"\\x00\\x01\\x02\")", @@ -555,14 +555,14 @@ class TranslatorSpec extends AnyFunSpec { full("\"str\\0next\"", CalcIntType, CalcStrType, ResultMap( CppCompiler -> "std::string(\"str\\000next\", 8)", CSharpCompiler -> "\"str\\0next\"", - GoCompiler -> "\"str\\000next\"", + GoCompiler -> "\"str\\x00next\"", JavaCompiler -> "\"str\\000next\"", JavaScriptCompiler -> "\"str\\x00next\"", - LuaCompiler -> "\"str\\000next\"", - PerlCompiler -> "\"str\\000next\"", - PHPCompiler -> "\"str\\000next\"", - PythonCompiler -> "u\"str\\000next\"", - RubyCompiler -> "\"str\\000next\"" + LuaCompiler -> "\"str\\x00next\"", + PerlCompiler -> "\"str\\x00next\"", + PHPCompiler -> "\"str\\x00next\"", + PythonCompiler -> "u\"str\\x00next\"", + RubyCompiler -> "\"str\\x00next\"" )) } diff --git a/shared/src/main/scala/io/kaitai/struct/JSON.scala b/shared/src/main/scala/io/kaitai/struct/JSON.scala index b86b3a25f..f30906d1e 100644 --- a/shared/src/main/scala/io/kaitai/struct/JSON.scala +++ b/shared/src/main/scala/io/kaitai/struct/JSON.scala @@ -28,7 +28,6 @@ object JSON extends CommonLiterals { } } - /** octal escapes (which [[translators.CommonLiterals.strLiteralGenericCC]] uses by default) are not allowed in JSON */ override def strLiteralGenericCC(code: Char): String = strLiteralUnicode(code) def stringToJson(str: String): String = diff --git a/shared/src/main/scala/io/kaitai/struct/translators/CommonLiterals.scala b/shared/src/main/scala/io/kaitai/struct/translators/CommonLiterals.scala index 11e87e7cb..8189b33bf 100644 --- a/shared/src/main/scala/io/kaitai/struct/translators/CommonLiterals.scala +++ b/shared/src/main/scala/io/kaitai/struct/translators/CommonLiterals.scala @@ -34,7 +34,7 @@ trait CommonLiterals { /** * Handle ASCII character conversion for inlining into string literals. * Default implementation consults [[asciiCharQuoteMap]] first, then - * just dumps it as is if it's a printable ASCII charcter, or calls + * just dumps it as is if it's a printable ASCII character, or calls * [[strLiteralGenericCC]] if it's a control character. * @param code character code to convert into string for inclusion in * a string literal @@ -53,18 +53,14 @@ trait CommonLiterals { /** * Converts generic control character code into something that's allowed - * inside a string literal. Default implementation uses octal encoding, + * inside a string literal. Default implementation uses hex encoding, * which is ok for most C-derived languages. * - * Note that we use strictly 3 octal digits to work around potential - * problems with following decimal digits, i.e. "\0" + "2" that would be - * parsed as single character "\02" = "\x02", instead of two characters - * "\x00\x32". * @param code character code to represent * @return string literal representation of given code */ def strLiteralGenericCC(code: Char): String = - "\\%03o".format(code.toInt) + "\\x%02X".format(code.toInt) /** * Converts Unicode (typically, non-ASCII) character code into something diff --git a/shared/src/main/scala/io/kaitai/struct/translators/CppTranslator.scala b/shared/src/main/scala/io/kaitai/struct/translators/CppTranslator.scala index 6d29c800e..ac82844c9 100644 --- a/shared/src/main/scala/io/kaitai/struct/translators/CppTranslator.scala +++ b/shared/src/main/scala/io/kaitai/struct/translators/CppTranslator.scala @@ -131,6 +131,21 @@ class CppTranslator(provider: TypeProvider, importListSrc: CppImportList, import } } + /** + * Hex escapes in C++ does not limited in length, so we use octal, as they are shorter. + * + * Note that we use strictly 3 octal digits to work around potential + * problems with following decimal digits, i.e. "\0" + "2" that would be + * parsed as single character "\02" = "\x02", instead of two characters + * "\x00\x32". + * + * @see https://en.cppreference.com/w/cpp/language/escape + * @param code character code to represent + * @return string literal representation of given code + */ + override def strLiteralGenericCC(code: Char): String = + "\\%03o".format(code.toInt) + override def genericBinOp(left: Ast.expr, op: Ast.operator, right: Ast.expr, extPrec: Int) = { (detectType(left), detectType(right), op) match { case (_: IntType, _: IntType, Ast.operator.Mod) => diff --git a/shared/src/main/scala/io/kaitai/struct/translators/JavaScriptTranslator.scala b/shared/src/main/scala/io/kaitai/struct/translators/JavaScriptTranslator.scala index d05440f8d..8ca399358 100644 --- a/shared/src/main/scala/io/kaitai/struct/translators/JavaScriptTranslator.scala +++ b/shared/src/main/scala/io/kaitai/struct/translators/JavaScriptTranslator.scala @@ -13,19 +13,6 @@ class JavaScriptTranslator(provider: TypeProvider, importList: ImportList) exten override def doByteArrayNonLiteral(elts: Seq[Ast.expr]): String = s"new Uint8Array([${elts.map(translate).mkString(", ")}])" - /** - * JavaScript rendition of common control character that would use hex form, - * not octal. "Octal" control character string literals might be accepted - * in non-strict JS mode, but in strict mode only hex or unicode are ok. - * Here we'll use hex, as they are shorter. - * - * @see https://github.com/kaitai-io/kaitai_struct/issues/279 - * @param code character code to represent - * @return string literal representation of given code - */ - override def strLiteralGenericCC(code: Char): String = - "\\x%02x".format(code.toInt) - override def genericBinOp(left: Ast.expr, op: Ast.operator, right: Ast.expr, extPrec: Int) = { (detectType(left), detectType(right), op) match { case (_: IntType, _: IntType, Ast.operator.Div) => diff --git a/shared/src/main/scala/io/kaitai/struct/translators/JavaTranslator.scala b/shared/src/main/scala/io/kaitai/struct/translators/JavaTranslator.scala index 18eb78dda..9a1b8e941 100644 --- a/shared/src/main/scala/io/kaitai/struct/translators/JavaTranslator.scala +++ b/shared/src/main/scala/io/kaitai/struct/translators/JavaTranslator.scala @@ -45,6 +45,21 @@ class JavaTranslator(provider: TypeProvider, importList: ImportList) extends Bas override def doByteArrayNonLiteral(elts: Seq[expr]): String = s"new byte[] { ${elts.map(translate).mkString(", ")} }" + /** + * Java does not support two-digit hex escape sequences, so use octal, as they are shorter. + * + * Note that we use strictly 3 octal digits to work around potential + * problems with following decimal digits, i.e. "\0" + "2" that would be + * parsed as single character "\02" = "\x02", instead of two characters + * "\x00\x32". + * + * @see https://docs.oracle.com/javase/specs/jls/se7/html/jls-3.html#jls-3.10.6 + * @param code character code to represent + * @return string literal representation of given code + */ + override def strLiteralGenericCC(code: Char): String = + "\\%03o".format(code.toInt) + override def genericBinOp(left: Ast.expr, op: Ast.operator, right: Ast.expr, extPrec: Int) = { (detectType(left), detectType(right), op) match { case (_: IntType, _: IntType, Ast.operator.Mod) => diff --git a/shared/src/main/scala/io/kaitai/struct/translators/LuaTranslator.scala b/shared/src/main/scala/io/kaitai/struct/translators/LuaTranslator.scala index f2dcad1ec..c25b7d6e7 100644 --- a/shared/src/main/scala/io/kaitai/struct/translators/LuaTranslator.scala +++ b/shared/src/main/scala/io/kaitai/struct/translators/LuaTranslator.scala @@ -39,7 +39,6 @@ class LuaTranslator(provider: TypeProvider, importList: ImportList) extends Base '\b' -> "\\b", '\u000b' -> "\\v", '\f' -> "\\f", - '\u001b' -> "\\027" ) override def strLiteralUnicode(code: Char): String = @@ -71,7 +70,7 @@ class LuaTranslator(provider: TypeProvider, importList: ImportList) extends Base override def doArrayLiteral(t: DataType, value: Seq[Ast.expr]): String = "{" + value.map((v) => translate(v)).mkString(", ") + "}" override def doByteArrayLiteral(arr: Seq[Byte]): String = - "\"" + decEscapeByteArray(arr) + "\"" + "\"" + Utils.hexEscapeByteArray(arr) + "\"" override def doByteArrayNonLiteral(values: Seq[Ast.expr]): String = // It is assumed that every expression produces integer in the range [0; 255] "string.char(" + values.map(translate).mkString(", ") + ")" @@ -189,14 +188,4 @@ class LuaTranslator(provider: TypeProvider, importList: ImportList) extends Base case Ast.unaryop.Not => "not" case _ => super.unaryOp(op) } - - /** - * Converts byte array (Seq[Byte]) into decimal-escaped Lua-style literal - * characters (i.e. like \255). - * - * @param arr byte array to escape - * @return array contents decimal-escaped as string - */ - private def decEscapeByteArray(arr: Seq[Byte]): String = - arr.map((x) => "\\%03d".format(x & 0xff)).mkString } diff --git a/shared/src/main/scala/io/kaitai/struct/translators/RustTranslator.scala b/shared/src/main/scala/io/kaitai/struct/translators/RustTranslator.scala index 788d49182..97aaad0c4 100644 --- a/shared/src/main/scala/io/kaitai/struct/translators/RustTranslator.scala +++ b/shared/src/main/scala/io/kaitai/struct/translators/RustTranslator.scala @@ -38,6 +38,13 @@ class RustTranslator(provider: TypeProvider, config: RuntimeConfig) override def strLiteralGenericCC(code: Char): String = strLiteralUnicode(code) + /** + * Hex escapes in form `\xHH` in Rust allows only codes in the range 0x00 - 0x7f. + * + * @see https://doc.rust-lang.org/reference/tokens.html#examples + * @param code character code to represent + * @return string literal representation of given code + */ override def strLiteralUnicode(code: Char): String = "\\u{%x}".format(code.toInt)