diff --git a/jvm/src/test/scala/io/kaitai/struct/translators/TranslatorSpec.scala b/jvm/src/test/scala/io/kaitai/struct/translators/TranslatorSpec.scala index 355bf975b..884568a1a 100644 --- a/jvm/src/test/scala/io/kaitai/struct/translators/TranslatorSpec.scala +++ b/jvm/src/test/scala/io/kaitai/struct/translators/TranslatorSpec.scala @@ -407,7 +407,7 @@ class TranslatorSpec extends AnyFunSpec { GoCompiler -> "[]uint8{34, 0, 10, 64, 65, 66, 92}", JavaCompiler -> "new byte[] { 34, 0, 10, 64, 65, 66, 92 }", JavaScriptCompiler -> "[34, 0, 10, 64, 65, 66, 92]", - LuaCompiler -> "\"\\034\\000\\010\\064\\065\\066\\092\"", + LuaCompiler -> "\"\\x22\\x00\\x0A\\x40\\x41\\x42\\x5C\"", PerlCompiler -> "pack('C*', (34, 0, 10, 64, 65, 66, 92))", PHPCompiler -> "\"\\x22\\x00\\x0A\\x40\\x41\\x42\\x5C\"", PythonCompiler -> "b\"\\x22\\x00\\x0A\\x40\\x41\\x42\\x5C\"", @@ -420,7 +420,7 @@ class TranslatorSpec extends AnyFunSpec { GoCompiler -> "[]uint8{255, 0, 255}", JavaCompiler -> "new byte[] { -1, 0, -1 }", JavaScriptCompiler -> "[255, 0, 255]", - LuaCompiler -> "\"\\255\\000\\255\"", + LuaCompiler -> "\"\\xFF\\x00\\xFF\"", PerlCompiler -> "pack('C*', (255, 0, 255))", PHPCompiler -> "\"\\xFF\\x00\\xFF\"", PythonCompiler -> "b\"\\xFF\\x00\\xFF\"", @@ -435,7 +435,7 @@ class TranslatorSpec extends AnyFunSpec { GoCompiler -> "len([]uint8{0, 1, 2})", JavaCompiler -> "new byte[] { 0, 1, 2 }.length", JavaScriptCompiler -> "[0, 1, 2].length", - LuaCompiler -> "#\"\\000\\001\\002\"", + LuaCompiler -> "#\"\\x00\\x01\\x02\"", PerlCompiler -> "length(pack('C*', (0, 1, 2)))", PHPCompiler -> "strlen(\"\\x00\\x01\\x02\")", PythonCompiler -> "len(b\"\\x00\\x01\\x02\")", @@ -555,14 +555,14 @@ class TranslatorSpec extends AnyFunSpec { full("\"str\\0next\"", CalcIntType, CalcStrType, ResultMap( CppCompiler -> "std::string(\"str\\000next\", 8)", CSharpCompiler -> "\"str\\0next\"", - GoCompiler -> "\"str\\000next\"", + GoCompiler -> "\"str\\x00next\"", JavaCompiler -> "\"str\\000next\"", JavaScriptCompiler -> "\"str\\x00next\"", - LuaCompiler -> "\"str\\000next\"", - PerlCompiler -> "\"str\\000next\"", - PHPCompiler -> "\"str\\000next\"", - PythonCompiler -> "u\"str\\000next\"", - RubyCompiler -> "\"str\\000next\"" + LuaCompiler -> "\"str\\x00next\"", + PerlCompiler -> "\"str\\x00next\"", + PHPCompiler -> "\"str\\x00next\"", + PythonCompiler -> "u\"str\\x00next\"", + RubyCompiler -> "\"str\\x00next\"" )) } diff --git a/shared/src/main/scala/io/kaitai/struct/JSON.scala b/shared/src/main/scala/io/kaitai/struct/JSON.scala index b86b3a25f..f30906d1e 100644 --- a/shared/src/main/scala/io/kaitai/struct/JSON.scala +++ b/shared/src/main/scala/io/kaitai/struct/JSON.scala @@ -28,7 +28,6 @@ object JSON extends CommonLiterals { } } - /** octal escapes (which [[translators.CommonLiterals.strLiteralGenericCC]] uses by default) are not allowed in JSON */ override def strLiteralGenericCC(code: Char): String = strLiteralUnicode(code) def stringToJson(str: String): String = diff --git a/shared/src/main/scala/io/kaitai/struct/translators/CommonLiterals.scala b/shared/src/main/scala/io/kaitai/struct/translators/CommonLiterals.scala index 11e87e7cb..b1e9b039a 100644 --- a/shared/src/main/scala/io/kaitai/struct/translators/CommonLiterals.scala +++ b/shared/src/main/scala/io/kaitai/struct/translators/CommonLiterals.scala @@ -34,7 +34,7 @@ trait CommonLiterals { /** * Handle ASCII character conversion for inlining into string literals. * Default implementation consults [[asciiCharQuoteMap]] first, then - * just dumps it as is if it's a printable ASCII charcter, or calls + * just dumps it as is if it's a printable ASCII character, or calls * [[strLiteralGenericCC]] if it's a control character. * @param code character code to convert into string for inclusion in * a string literal @@ -53,18 +53,14 @@ trait CommonLiterals { /** * Converts generic control character code into something that's allowed - * inside a string literal. Default implementation uses octal encoding, + * inside a string literal. Default implementation uses hex encoding, * which is ok for most C-derived languages. * - * Note that we use strictly 3 octal digits to work around potential - * problems with following decimal digits, i.e. "\0" + "2" that would be - * parsed as single character "\02" = "\x02", instead of two characters - * "\x00\x32". * @param code character code to represent * @return string literal representation of given code */ def strLiteralGenericCC(code: Char): String = - "\\%03o".format(code.toInt) + "\\x%02x".format(code.toInt) /** * Converts Unicode (typically, non-ASCII) character code into something diff --git a/shared/src/main/scala/io/kaitai/struct/translators/CppTranslator.scala b/shared/src/main/scala/io/kaitai/struct/translators/CppTranslator.scala index b5686af2a..3dfb18cb3 100644 --- a/shared/src/main/scala/io/kaitai/struct/translators/CppTranslator.scala +++ b/shared/src/main/scala/io/kaitai/struct/translators/CppTranslator.scala @@ -118,6 +118,21 @@ class CppTranslator(provider: TypeProvider, importListSrc: CppImportList, import override def doByteArrayLiteral(arr: Seq[Byte]): String = "std::string(\"" + Utils.hexEscapeByteArray(arr) + "\", " + arr.length + ")" + /** + * Hex escapes in C++ does not limited in length, so we use octal, as they are shorter. + * + * Note that we use strictly 3 octal digits to work around potential + * problems with following decimal digits, i.e. "\0" + "2" that would be + * parsed as single character "\02" = "\x02", instead of two characters + * "\x00\x32". + * + * @see https://en.cppreference.com/w/cpp/language/escape + * @param code character code to represent + * @return string literal representation of given code + */ + override def strLiteralGenericCC(code: Char): String = + "\\%03o".format(code.toInt) + override def genericBinOp(left: Ast.expr, op: Ast.operator, right: Ast.expr, extPrec: Int) = { (detectType(left), detectType(right), op) match { case (_: IntType, _: IntType, Ast.operator.Mod) => diff --git a/shared/src/main/scala/io/kaitai/struct/translators/JavaScriptTranslator.scala b/shared/src/main/scala/io/kaitai/struct/translators/JavaScriptTranslator.scala index 9f0cafbab..c8fa55686 100644 --- a/shared/src/main/scala/io/kaitai/struct/translators/JavaScriptTranslator.scala +++ b/shared/src/main/scala/io/kaitai/struct/translators/JavaScriptTranslator.scala @@ -11,19 +11,6 @@ class JavaScriptTranslator(provider: TypeProvider) extends BaseTranslator(provid override def doByteArrayNonLiteral(elts: Seq[Ast.expr]): String = s"new Uint8Array([${elts.map(translate).mkString(", ")}])" - /** - * JavaScript rendition of common control character that would use hex form, - * not octal. "Octal" control character string literals might be accepted - * in non-strict JS mode, but in strict mode only hex or unicode are ok. - * Here we'll use hex, as they are shorter. - * - * @see https://github.com/kaitai-io/kaitai_struct/issues/279 - * @param code character code to represent - * @return string literal representation of given code - */ - override def strLiteralGenericCC(code: Char): String = - "\\x%02x".format(code.toInt) - override def genericBinOp(left: Ast.expr, op: Ast.operator, right: Ast.expr, extPrec: Int) = { (detectType(left), detectType(right), op) match { case (_: IntType, _: IntType, Ast.operator.Div) => diff --git a/shared/src/main/scala/io/kaitai/struct/translators/JavaTranslator.scala b/shared/src/main/scala/io/kaitai/struct/translators/JavaTranslator.scala index aa584b0ce..884c015c3 100644 --- a/shared/src/main/scala/io/kaitai/struct/translators/JavaTranslator.scala +++ b/shared/src/main/scala/io/kaitai/struct/translators/JavaTranslator.scala @@ -45,6 +45,21 @@ class JavaTranslator(provider: TypeProvider, importList: ImportList) extends Bas override def doByteArrayNonLiteral(elts: Seq[expr]): String = s"new byte[] { ${elts.map(translate).mkString(", ")} }" + /** + * Java does not support two-digit hex escape sequences, so use octal, as they are shorter. + * + * Note that we use strictly 3 octal digits to work around potential + * problems with following decimal digits, i.e. "\0" + "2" that would be + * parsed as single character "\02" = "\x02", instead of two characters + * "\x00\x32". + * + * @see https://docs.oracle.com/javase/specs/jls/se7/html/jls-3.html#jls-3.10.6 + * @param code character code to represent + * @return string literal representation of given code + */ + override def strLiteralGenericCC(code: Char): String = + "\\%03o".format(code.toInt) + override def genericBinOp(left: Ast.expr, op: Ast.operator, right: Ast.expr, extPrec: Int) = { (detectType(left), detectType(right), op) match { case (_: IntType, _: IntType, Ast.operator.Mod) => diff --git a/shared/src/main/scala/io/kaitai/struct/translators/LuaTranslator.scala b/shared/src/main/scala/io/kaitai/struct/translators/LuaTranslator.scala index c9ef6ce0f..94afc1e1d 100644 --- a/shared/src/main/scala/io/kaitai/struct/translators/LuaTranslator.scala +++ b/shared/src/main/scala/io/kaitai/struct/translators/LuaTranslator.scala @@ -39,7 +39,6 @@ class LuaTranslator(provider: TypeProvider, importList: ImportList) extends Base '\b' -> "\\b", '\u000b' -> "\\v", '\f' -> "\\f", - '\u001b' -> "\\027" ) override def strLiteralUnicode(code: Char): String = @@ -187,12 +186,12 @@ class LuaTranslator(provider: TypeProvider, importList: ImportList) extends Base } /** - * Converts byte array (Seq[Byte]) into decimal-escaped Lua-style literal - * characters (i.e. like \255). + * Converts byte array (Seq[Byte]) into hex-escaped Lua-style literal + * characters (i.e. like \xFF). * * @param arr byte array to escape * @return array contents decimal-escaped as string */ private def decEscapeByteArray(arr: Seq[Byte]): String = - arr.map((x) => "\\%03d".format(x & 0xff)).mkString + arr.map((x) => "\\x%02x".format(x & 0xff)).mkString } diff --git a/shared/src/main/scala/io/kaitai/struct/translators/RustTranslator.scala b/shared/src/main/scala/io/kaitai/struct/translators/RustTranslator.scala index d72ac5573..e6990b92e 100644 --- a/shared/src/main/scala/io/kaitai/struct/translators/RustTranslator.scala +++ b/shared/src/main/scala/io/kaitai/struct/translators/RustTranslator.scala @@ -23,6 +23,13 @@ class RustTranslator(provider: TypeProvider, config: RuntimeConfig) extends Base '\\' -> "\\\\" ) + /** + * Hex escapes in form `\xHH` in Rust allows only codes in the range 0x00 - 0x7f. + * + * @see https://doc.rust-lang.org/reference/tokens.html#examples + * @param code character code to represent + * @return string literal representation of given code + */ override def strLiteralUnicode(code: Char): String = "\\u{%x}".format(code.toInt)