From 9a627bd6f9911b0b040833dd9667b3d409281d93 Mon Sep 17 00:00:00 2001
From: Mingun <alexander_sergey@mail.ru>
Date: Thu, 28 Mar 2024 21:14:20 +0500
Subject: [PATCH] Use hex escape sequences instead of octal escape sequences.

Octal escape sequences the least used form of escape sequences and hex supported everywhere.
The only outsiders are Java, C++ and Rust
---
 .../struct/translators/TranslatorSpec.scala    | 18 +++++++++---------
 .../src/main/scala/io/kaitai/struct/JSON.scala |  1 -
 .../struct/translators/CommonLiterals.scala    | 10 +++-------
 .../struct/translators/CppTranslator.scala     | 15 +++++++++++++++
 .../translators/JavaScriptTranslator.scala     | 13 -------------
 .../struct/translators/JavaTranslator.scala    | 15 +++++++++++++++
 .../struct/translators/LuaTranslator.scala     | 13 +------------
 .../struct/translators/RustTranslator.scala    |  7 +++++++
 8 files changed, 50 insertions(+), 42 deletions(-)

diff --git a/jvm/src/test/scala/io/kaitai/struct/translators/TranslatorSpec.scala b/jvm/src/test/scala/io/kaitai/struct/translators/TranslatorSpec.scala
index 157c473f6..8b155ca86 100644
--- a/jvm/src/test/scala/io/kaitai/struct/translators/TranslatorSpec.scala
+++ b/jvm/src/test/scala/io/kaitai/struct/translators/TranslatorSpec.scala
@@ -407,7 +407,7 @@ class TranslatorSpec extends AnyFunSpec {
         GoCompiler -> "[]uint8{34, 0, 10, 64, 65, 66, 92}",
         JavaCompiler -> "new byte[] { 34, 0, 10, 64, 65, 66, 92 }",
         JavaScriptCompiler -> "new Uint8Array([34, 0, 10, 64, 65, 66, 92])",
-        LuaCompiler -> "\"\\034\\000\\010\\064\\065\\066\\092\"",
+        LuaCompiler -> "\"\\x22\\x00\\x0A\\x40\\x41\\x42\\x5C\"",
         PerlCompiler -> "pack('C*', (34, 0, 10, 64, 65, 66, 92))",
         PHPCompiler -> "\"\\x22\\x00\\x0A\\x40\\x41\\x42\\x5C\"",
         PythonCompiler -> "b\"\\x22\\x00\\x0A\\x40\\x41\\x42\\x5C\"",
@@ -420,7 +420,7 @@ class TranslatorSpec extends AnyFunSpec {
         GoCompiler -> "[]uint8{255, 0, 255}",
         JavaCompiler -> "new byte[] { -1, 0, -1 }",
         JavaScriptCompiler -> "new Uint8Array([255, 0, 255])",
-        LuaCompiler -> "\"\\255\\000\\255\"",
+        LuaCompiler -> "\"\\xFF\\x00\\xFF\"",
         PerlCompiler -> "pack('C*', (255, 0, 255))",
         PHPCompiler -> "\"\\xFF\\x00\\xFF\"",
         PythonCompiler -> "b\"\\xFF\\x00\\xFF\"",
@@ -435,7 +435,7 @@ class TranslatorSpec extends AnyFunSpec {
         GoCompiler -> "len([]uint8{0, 1, 2})",
         JavaCompiler -> "new byte[] { 0, 1, 2 }.length",
         JavaScriptCompiler -> "new Uint8Array([0, 1, 2]).length",
-        LuaCompiler -> "#\"\\000\\001\\002\"",
+        LuaCompiler -> "#\"\\x00\\x01\\x02\"",
         PerlCompiler -> "length(pack('C*', (0, 1, 2)))",
         PHPCompiler -> "strlen(\"\\x00\\x01\\x02\")",
         PythonCompiler -> "len(b\"\\x00\\x01\\x02\")",
@@ -555,14 +555,14 @@ class TranslatorSpec extends AnyFunSpec {
       full("\"str\\0next\"", CalcIntType, CalcStrType, ResultMap(
         CppCompiler -> "std::string(\"str\\000next\", 8)",
         CSharpCompiler -> "\"str\\0next\"",
-        GoCompiler -> "\"str\\000next\"",
+        GoCompiler -> "\"str\\x00next\"",
         JavaCompiler -> "\"str\\000next\"",
         JavaScriptCompiler -> "\"str\\x00next\"",
-        LuaCompiler -> "\"str\\000next\"",
-        PerlCompiler -> "\"str\\000next\"",
-        PHPCompiler -> "\"str\\000next\"",
-        PythonCompiler -> "u\"str\\000next\"",
-        RubyCompiler -> "\"str\\000next\""
+        LuaCompiler -> "\"str\\x00next\"",
+        PerlCompiler -> "\"str\\x00next\"",
+        PHPCompiler -> "\"str\\x00next\"",
+        PythonCompiler -> "u\"str\\x00next\"",
+        RubyCompiler -> "\"str\\x00next\""
       ))
     }
 
diff --git a/shared/src/main/scala/io/kaitai/struct/JSON.scala b/shared/src/main/scala/io/kaitai/struct/JSON.scala
index b86b3a25f..f30906d1e 100644
--- a/shared/src/main/scala/io/kaitai/struct/JSON.scala
+++ b/shared/src/main/scala/io/kaitai/struct/JSON.scala
@@ -28,7 +28,6 @@ object JSON extends CommonLiterals {
     }
   }
 
-  /** octal escapes (which [[translators.CommonLiterals.strLiteralGenericCC]] uses by default) are not allowed in JSON */
   override def strLiteralGenericCC(code: Char): String = strLiteralUnicode(code)
 
   def stringToJson(str: String): String =
diff --git a/shared/src/main/scala/io/kaitai/struct/translators/CommonLiterals.scala b/shared/src/main/scala/io/kaitai/struct/translators/CommonLiterals.scala
index 11e87e7cb..8189b33bf 100644
--- a/shared/src/main/scala/io/kaitai/struct/translators/CommonLiterals.scala
+++ b/shared/src/main/scala/io/kaitai/struct/translators/CommonLiterals.scala
@@ -34,7 +34,7 @@ trait CommonLiterals {
   /**
     * Handle ASCII character conversion for inlining into string literals.
     * Default implementation consults [[asciiCharQuoteMap]] first, then
-    * just dumps it as is if it's a printable ASCII charcter, or calls
+    * just dumps it as is if it's a printable ASCII character, or calls
     * [[strLiteralGenericCC]] if it's a control character.
     * @param code character code to convert into string for inclusion in
     *             a string literal
@@ -53,18 +53,14 @@ trait CommonLiterals {
 
   /**
     * Converts generic control character code into something that's allowed
-    * inside a string literal. Default implementation uses octal encoding,
+    * inside a string literal. Default implementation uses hex encoding,
     * which is ok for most C-derived languages.
     *
-    * Note that we use strictly 3 octal digits to work around potential
-    * problems with following decimal digits, i.e. "\0" + "2" that would be
-    * parsed as single character "\02" = "\x02", instead of two characters
-    * "\x00\x32".
     * @param code character code to represent
     * @return string literal representation of given code
     */
   def strLiteralGenericCC(code: Char): String =
-    "\\%03o".format(code.toInt)
+    "\\x%02X".format(code.toInt)
 
   /**
     * Converts Unicode (typically, non-ASCII) character code into something
diff --git a/shared/src/main/scala/io/kaitai/struct/translators/CppTranslator.scala b/shared/src/main/scala/io/kaitai/struct/translators/CppTranslator.scala
index 6d29c800e..ac82844c9 100644
--- a/shared/src/main/scala/io/kaitai/struct/translators/CppTranslator.scala
+++ b/shared/src/main/scala/io/kaitai/struct/translators/CppTranslator.scala
@@ -131,6 +131,21 @@ class CppTranslator(provider: TypeProvider, importListSrc: CppImportList, import
     }
   }
 
+  /**
+    * Hex escapes in C++ does not limited in length, so we use octal, as they are shorter.
+    *
+    * Note that we use strictly 3 octal digits to work around potential
+    * problems with following decimal digits, i.e. "\0" + "2" that would be
+    * parsed as single character "\02" = "\x02", instead of two characters
+    * "\x00\x32".
+    *
+    * @see https://en.cppreference.com/w/cpp/language/escape
+    * @param code character code to represent
+    * @return string literal representation of given code
+    */
+  override def strLiteralGenericCC(code: Char): String =
+    "\\%03o".format(code.toInt)
+
   override def genericBinOp(left: Ast.expr, op: Ast.operator, right: Ast.expr, extPrec: Int) = {
     (detectType(left), detectType(right), op) match {
       case (_: IntType, _: IntType, Ast.operator.Mod) =>
diff --git a/shared/src/main/scala/io/kaitai/struct/translators/JavaScriptTranslator.scala b/shared/src/main/scala/io/kaitai/struct/translators/JavaScriptTranslator.scala
index d05440f8d..8ca399358 100644
--- a/shared/src/main/scala/io/kaitai/struct/translators/JavaScriptTranslator.scala
+++ b/shared/src/main/scala/io/kaitai/struct/translators/JavaScriptTranslator.scala
@@ -13,19 +13,6 @@ class JavaScriptTranslator(provider: TypeProvider, importList: ImportList) exten
   override def doByteArrayNonLiteral(elts: Seq[Ast.expr]): String =
     s"new Uint8Array([${elts.map(translate).mkString(", ")}])"
 
-  /**
-    * JavaScript rendition of common control character that would use hex form,
-    * not octal. "Octal" control character string literals might be accepted
-    * in non-strict JS mode, but in strict mode only hex or unicode are ok.
-    * Here we'll use hex, as they are shorter.
-    *
-    * @see https://github.com/kaitai-io/kaitai_struct/issues/279
-    * @param code character code to represent
-    * @return string literal representation of given code
-    */
-  override def strLiteralGenericCC(code: Char): String =
-    "\\x%02x".format(code.toInt)
-
   override def genericBinOp(left: Ast.expr, op: Ast.operator, right: Ast.expr, extPrec: Int) = {
     (detectType(left), detectType(right), op) match {
       case (_: IntType, _: IntType, Ast.operator.Div) =>
diff --git a/shared/src/main/scala/io/kaitai/struct/translators/JavaTranslator.scala b/shared/src/main/scala/io/kaitai/struct/translators/JavaTranslator.scala
index 18eb78dda..9a1b8e941 100644
--- a/shared/src/main/scala/io/kaitai/struct/translators/JavaTranslator.scala
+++ b/shared/src/main/scala/io/kaitai/struct/translators/JavaTranslator.scala
@@ -45,6 +45,21 @@ class JavaTranslator(provider: TypeProvider, importList: ImportList) extends Bas
   override def doByteArrayNonLiteral(elts: Seq[expr]): String =
     s"new byte[] { ${elts.map(translate).mkString(", ")} }"
 
+  /**
+    * Java does not support two-digit hex escape sequences, so use octal, as they are shorter.
+    *
+    * Note that we use strictly 3 octal digits to work around potential
+    * problems with following decimal digits, i.e. "\0" + "2" that would be
+    * parsed as single character "\02" = "\x02", instead of two characters
+    * "\x00\x32".
+    *
+    * @see https://docs.oracle.com/javase/specs/jls/se7/html/jls-3.html#jls-3.10.6
+    * @param code character code to represent
+    * @return string literal representation of given code
+    */
+  override def strLiteralGenericCC(code: Char): String =
+    "\\%03o".format(code.toInt)
+
   override def genericBinOp(left: Ast.expr, op: Ast.operator, right: Ast.expr, extPrec: Int) = {
     (detectType(left), detectType(right), op) match {
       case (_: IntType, _: IntType, Ast.operator.Mod) =>
diff --git a/shared/src/main/scala/io/kaitai/struct/translators/LuaTranslator.scala b/shared/src/main/scala/io/kaitai/struct/translators/LuaTranslator.scala
index f2dcad1ec..c25b7d6e7 100644
--- a/shared/src/main/scala/io/kaitai/struct/translators/LuaTranslator.scala
+++ b/shared/src/main/scala/io/kaitai/struct/translators/LuaTranslator.scala
@@ -39,7 +39,6 @@ class LuaTranslator(provider: TypeProvider, importList: ImportList) extends Base
     '\b' -> "\\b",
     '\u000b' -> "\\v",
     '\f' -> "\\f",
-    '\u001b' -> "\\027"
   )
 
   override def strLiteralUnicode(code: Char): String =
@@ -71,7 +70,7 @@ class LuaTranslator(provider: TypeProvider, importList: ImportList) extends Base
   override def doArrayLiteral(t: DataType, value: Seq[Ast.expr]): String =
     "{" + value.map((v) => translate(v)).mkString(", ") + "}"
   override def doByteArrayLiteral(arr: Seq[Byte]): String =
-    "\"" + decEscapeByteArray(arr) + "\""
+    "\"" + Utils.hexEscapeByteArray(arr) + "\""
   override def doByteArrayNonLiteral(values: Seq[Ast.expr]): String =
     // It is assumed that every expression produces integer in the range [0; 255]
     "string.char(" + values.map(translate).mkString(", ") + ")"
@@ -189,14 +188,4 @@ class LuaTranslator(provider: TypeProvider, importList: ImportList) extends Base
     case Ast.unaryop.Not => "not"
     case _ => super.unaryOp(op)
   }
-
-  /**
-   * Converts byte array (Seq[Byte]) into decimal-escaped Lua-style literal
-   * characters (i.e. like \255).
-   *
-   * @param arr byte array to escape
-   * @return array contents decimal-escaped as string
-   */
-  private def decEscapeByteArray(arr: Seq[Byte]): String =
-    arr.map((x) => "\\%03d".format(x & 0xff)).mkString
 }
diff --git a/shared/src/main/scala/io/kaitai/struct/translators/RustTranslator.scala b/shared/src/main/scala/io/kaitai/struct/translators/RustTranslator.scala
index 788d49182..97aaad0c4 100644
--- a/shared/src/main/scala/io/kaitai/struct/translators/RustTranslator.scala
+++ b/shared/src/main/scala/io/kaitai/struct/translators/RustTranslator.scala
@@ -38,6 +38,13 @@ class RustTranslator(provider: TypeProvider, config: RuntimeConfig)
   override def strLiteralGenericCC(code: Char): String =
     strLiteralUnicode(code)
 
+  /**
+    * Hex escapes in form `\xHH` in Rust allows only codes in the range 0x00 - 0x7f.
+    *
+    * @see https://doc.rust-lang.org/reference/tokens.html#examples
+    * @param code character code to represent
+    * @return string literal representation of given code
+    */
   override def strLiteralUnicode(code: Char): String =
     "\\u{%x}".format(code.toInt)