Skip to content

Commit

Permalink
Merge branch 'master' into rust_basic_support_v2
Browse files Browse the repository at this point in the history
  • Loading branch information
generalmimon committed Sep 1, 2024
2 parents 24f04d9 + 8610e1d commit f360132
Show file tree
Hide file tree
Showing 48 changed files with 917 additions and 360 deletions.
2 changes: 1 addition & 1 deletion RELEASE_NOTES.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@
* Implement parsed data validations using `valid` key ([#435](https://github.com/kaitai-io/kaitai_struct/issues/435))
* Implement compile-time `sizeof` and `bitsizeof` operators ([#84](https://github.com/kaitai-io/kaitai_struct/issues/84))
* Type-based: `sizeof<u4>`, `bitsizeof<b13>`, `sizeof<user_type>`
* Value-based: `file_header._sizeof`, `flags._bitsizeof` (`file_header`, `flags` are fields defined in the current type)
* Value-based: `file_header._sizeof` (`file_header` is a field defined in the current type)
* Implement little-endian bit-sized integers ([docs](https://doc.kaitai.io/user_guide.html#bit-ints-le))
* Support choosing endianness using `le` / `be` suffix: `type: b12le`, `type: b1be`
* Add `meta/bit-endian` key for selecting default bit endianness (`le` / `be`)
Expand Down
80 changes: 58 additions & 22 deletions jvm/src/main/scala/io/kaitai/struct/JavaMain.scala
Original file line number Diff line number Diff line change
Expand Up @@ -210,35 +210,71 @@ object JavaMain {
// Windows, custom install path with spaces and non-latin chars:
// /G:/%d0%b3%d0%b4%d0%b5-%d1%82%d0%be%20%d1%82%d0%b0%d0%bc/lib/io.kaitai.kaitai-struct-compiler-0.10-SNAPSHOT.jar

val fStr = classOf[JavaMain].getProtectionDomain.getCodeSource.getLocation.getPath
Log.importOps.info(() => s"home path: location = $fStr")

if (fStr.endsWith(".jar")) {
val fDec = URLDecoder.decode(fStr, "UTF-8")
Log.importOps.info(() => s"... URL-decoded = $fDec")

val homeFile = new File(fDec).getParentFile.getParentFile
Log.importOps.info(() => s"... home = $homeFile")

if (homeFile.exists) {
val homeFormat = new File(homeFile, "formats")
Log.importOps.info(() => s"... formats = $homeFormat")
if (homeFormat.exists) {
Some(homeFormat.toString)
try {
optionOrLog(
classOf[JavaMain].getProtectionDomain.getCodeSource,
"home path: unable to run getCodeSource(), got null"
).flatMap(sourceCode => optionOrLog(
sourceCode.getLocation,
"home path: unable to run getLocation(), got null"
)).flatMap(location => optionOrLog(
location.getPath,
"home path: unable to run getPath(), got null"
)).flatMap(fStr => {
Log.importOps.info(() => s"home path: location = $fStr")

if (fStr.endsWith(".jar")) {
val fDec = URLDecoder.decode(fStr, "UTF-8")
Log.importOps.info(() => s"... URL-decoded = $fDec")

val homeFile = new File(fDec).getParentFile.getParentFile
Log.importOps.info(() => s"... home = $homeFile")

if (homeFile.exists) {
val homeFormat = new File(homeFile, "formats")
Log.importOps.info(() => s"... formats = $homeFormat")
if (homeFormat.exists) {
Some(homeFormat.toString)
} else {
Log.importOps.info(() => "... home formats dir doesn't exist => fail")
None
}
} else {
Log.importOps.info(() => s"... home doesn't exist => no home import paths")
None
}
} else {
Log.importOps.info(() => "... home formats dir doesn't exist => fail")
Log.importOps.info(() => s"... not a jar, we're not running a packaged app => no home")
None
}
} else {
Log.importOps.info(() => s"... home doesn't exist => no home import paths")
})
} catch {
case se: SecurityException =>
Log.importOps.info(() => s"home path: unable to run getProtectionDomain(), got SecurityException $se")
None
}
} else {
Log.importOps.info(() => s"... not a jar, we're not running a packaged app => no home")
None
}
}

/**
* Helper method to wrap nullable value (coming from Java API) into Option.
* If it's null, we will bail out and won't process any longer due to a chain
* of flatMap(), but if we use this method, we'll also note in our logging which
* step failed, making it easier to diagnose.
* @param nullableValue value which is potentially null
* @param errMsg error message to show in case if it's null
* @tparam T type of potentially nullable value
* @return option-wrapped value
* @see [[scala.Option.apply()]]
*/
private def optionOrLog[T](nullableValue: T, errMsg: String): Option[T] =
Option(nullableValue) match {
case None =>
Log.importOps.info(() => errMsg)
None
case someValue =>
someValue
}

private def envPaths: List[String] =
sys.env.get("KSPATH").toList.flatMap((x) => x.split(File.pathSeparatorChar))

Expand Down
15 changes: 15 additions & 0 deletions jvm/src/test/scala/io/kaitai/struct/exprlang/ExpressionsSpec.scala
Original file line number Diff line number Diff line change
Expand Up @@ -404,6 +404,21 @@ class ExpressionsSpec extends AnyFunSpec {
Expressions.parse("foo.bar") should be (Attribute(Name(identifier("foo")),identifier("bar")))
}

describe("strings") {
it("single-quoted") {
// \" -> \"
// \\ -> \\
Expressions.parse(""" ' \" \\ ' """) should be(Str(" \\\" \\\\ "))
Expressions.parse(""" 'ASCII\\x' """) should be(Str("ASCII\\\\x"))
}
it("double-quoted") {
// \" -> "
// \\ -> \
Expressions.parse(""" " \" \\ " """) should be(Str(" \" \\ "))
Expressions.parse(""" "ASCII\\'x" """) should be(Str("ASCII\\'x"))
}
}

describe("f-strings") {
it("parses f-string with just a string") {
Expressions.parse("f\"abc\"") should be(InterpolatedStr(Seq(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ class TranslatorSpec extends AnyFunSpec {
full("42.to_s", CalcIntType, CalcStrType, ResultMap(
CppCompiler -> "kaitai::kstream::to_string(42)",
CSharpCompiler -> "42.ToString()",
GoCompiler -> "strconv.Itoa(int64(42))",
GoCompiler -> "strconv.FormatInt(int64(42), 10)",
JavaCompiler -> "Long.toString(42)",
JavaScriptCompiler -> "(42).toString()",
LuaCompiler -> "tostring(42)",
Expand All @@ -200,7 +200,7 @@ class TranslatorSpec extends AnyFunSpec {
full("(a + 42).to_s", CalcIntType, CalcStrType, ResultMap(
CppCompiler -> "kaitai::kstream::to_string(a() + 42)",
CSharpCompiler -> "(A + 42).ToString()",
GoCompiler -> "strconv.Itoa(int64(this.A + 42))",
GoCompiler -> "strconv.FormatInt(int64(this.A + 42), 10)",
JavaCompiler -> "Long.toString(a() + 42)",
JavaScriptCompiler -> "(this.a + 42).toString()",
LuaCompiler -> "tostring(self.a + 42)",
Expand All @@ -213,7 +213,7 @@ class TranslatorSpec extends AnyFunSpec {
full("a + 42.to_s", CalcStrType, CalcStrType, ResultMap(
CppCompiler -> "a() + kaitai::kstream::to_string(42)",
CSharpCompiler -> "A + 42.ToString()",
GoCompiler -> "this.A + strconv.Itoa(int64(42))",
GoCompiler -> "this.A + strconv.FormatInt(int64(42), 10)",
JavaCompiler -> "a() + Long.toString(42)",
JavaScriptCompiler -> "this.a + (42).toString()",
LuaCompiler -> "self.a .. tostring(42)",
Expand Down Expand Up @@ -701,7 +701,7 @@ class TranslatorSpec extends AnyFunSpec {
PerlCompiler -> "substr(\"foobar\", 2, 4 - 2)",
PHPCompiler -> "\\Kaitai\\Struct\\Stream::substring(\"foobar\", 2, 4)",
PythonCompiler -> "u\"foobar\"[2:4]",
RubyCompiler -> "\"foobar\"[2..4 - 1]"
RubyCompiler -> "\"foobar\"[2...4]"
))

// substring() call on concatenation of strings: for some languages, concatenation needs to be
Expand All @@ -716,7 +716,7 @@ class TranslatorSpec extends AnyFunSpec {
PerlCompiler -> "substr($self->foo() . $self->bar(), 2, 4 - 2)",
PHPCompiler -> "\\Kaitai\\Struct\\Stream::substring($this->foo() . $this->bar(), 2, 4)",
PythonCompiler -> "(self.foo + self.bar)[2:4]",
RubyCompiler -> "(foo + bar)[2..4 - 1]"
RubyCompiler -> "(foo + bar)[2...4]"
))

// substring() call with non-left-associative "from" and "to": for languages where subtraction
Expand All @@ -731,7 +731,7 @@ class TranslatorSpec extends AnyFunSpec {
PerlCompiler -> "substr($self->foo(), 10 - 7, (10 - 3) - (10 - 7))", // TODO: PerlCompiler -> "substr($self->foo(), 10 - 7, 10 - 3 - (10 - 7))",
PHPCompiler -> "\\Kaitai\\Struct\\Stream::substring($this->foo(), 10 - 7, 10 - 3)",
PythonCompiler -> "self.foo[10 - 7:10 - 3]",
RubyCompiler -> "foo[10 - 7..(10 - 3) - 1]" // TODO: RubyCompiler -> "foo[10 - 7..10 - 3 - 1]"
RubyCompiler -> "foo[10 - 7...10 - 3]"
))

// substring() call with "to" using `<<` which is lower precedence than `+` or `-`: if such
Expand All @@ -746,7 +746,7 @@ class TranslatorSpec extends AnyFunSpec {
PerlCompiler -> "substr($self->foo(), 10 - 7, (10 << 2) - (10 - 7))",
PHPCompiler -> "\\Kaitai\\Struct\\Stream::substring($this->foo(), 10 - 7, 10 << 2)",
PythonCompiler -> "self.foo[10 - 7:10 << 2]",
RubyCompiler -> "foo[10 - 7..(10 << 2) - 1]"
RubyCompiler -> "foo[10 - 7...10 << 2]"
))

// substring() call with "from" using `<<` which is lower precedence than `+` or `-`: if such
Expand All @@ -761,7 +761,7 @@ class TranslatorSpec extends AnyFunSpec {
PerlCompiler -> "substr($self->foo(), 10 << 1, 42 - (10 << 1))",
PHPCompiler -> "\\Kaitai\\Struct\\Stream::substring($this->foo(), 10 << 1, 42)",
PythonCompiler -> "self.foo[10 << 1:42]",
RubyCompiler -> "foo[10 << 1..42 - 1]"
RubyCompiler -> "foo[10 << 1...42]"
))
}
}
Expand Down
2 changes: 1 addition & 1 deletion project/build.properties
Original file line number Diff line number Diff line change
@@ -1 +1 @@
sbt.version = 1.9.7
sbt.version = 1.10.0
2 changes: 1 addition & 1 deletion shared/src/main/scala/io/kaitai/struct/ClassCompiler.scala
Original file line number Diff line number Diff line change
Expand Up @@ -316,12 +316,12 @@ class ClassCompiler(
compileInstanceDoc(instName, instSpec)
lang.instanceCheckCacheAndReturn(instName, dataType)

lang.instanceSetCalculated(instName)
instSpec match {
case vi: ValueInstanceSpec =>
lang.attrParseIfHeader(instName, vi.ifExpr)
lang.instanceCalculate(instName, dataType, vi.value)
lang.attrParseIfFooter(vi.ifExpr)
lang.instanceSetCalculated(instName)
case pi: ParseInstanceSpec =>
lang.attrParse(pi, instName, endian)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -163,8 +163,7 @@ class ConstructClassCompiler(classSpecs: ClassSpecs, topClass: ClassSpec) extend
val subcon2 = blt.terminator match {
case None => subcon
case Some(term) =>
val termStr = "\\x%02X".format(term & 0xff)
s"NullTerminated($subcon, term=b'$termStr', include=${translator.doBoolLiteral(blt.include)})"
s"NullTerminated($subcon, term=${translator.doByteArrayLiteral(term)}, include=${translator.doBoolLiteral(blt.include)})"
}
val subcon3 = blt.padRight match {
case None => subcon2
Expand All @@ -176,9 +175,8 @@ class ConstructClassCompiler(classSpecs: ClassSpecs, topClass: ClassSpec) extend
}

def attrBytesTerminatedType(btt: BytesTerminatedType, subcon: String): String = {
val termStr = "\\x%02X".format(btt.terminator & 0xff)
s"NullTerminated($subcon, " +
s"term=b'$termStr', " +
s"term=${translator.doByteArrayLiteral(btt.terminator)}, " +
s"include=${translator.doBoolLiteral(btt.include)}, " +
s"consume=${translator.doBoolLiteral(btt.consume)})"
}
Expand Down
67 changes: 35 additions & 32 deletions shared/src/main/scala/io/kaitai/struct/EncodingList.scala
Original file line number Diff line number Diff line change
Expand Up @@ -14,37 +14,40 @@ object EncodingList {
* generate a KSC warning no matter which form was used.
*/
val canonicalToAlias = Map(
"ASCII" -> Set("US-ASCII"),
"UTF-8" -> Set("UTF8", "ISO-10646/UTF-8", "ISO-10646/UTF8"),
"UTF-16LE" -> Set("UTF16LE"),
"UTF-16BE" -> Set("UTF16BE"),
"UTF-32LE" -> Set("UTF32LE"),
"UTF-32BE" -> Set("UTF32BE"),
"ISO-8859-1" -> Set("ISO8859-1", "ISO_8859-1", "ISO88591", "8859_1", "8859-1", "88591", "LATIN1", "IBM819", "CP819", "csISOLatin1", "iso-ir-100", "windows-28591", "WE8ISO8859P1"),
"ISO-8859-2" -> Set("ISO8859-2", "ISO_8859-2", "ISO88592", "8859_2", "8859-2", "88592", "LATIN2", "IBM1111", "CP1111", "csISOLatin2", "iso-ir-101", "windows-28592"),
"ISO-8859-3" -> Set("ISO8859-3", "ISO_8859-3", "ISO88593", "8859_3", "8859-3", "88593", "LATIN3", "csISOLatin3", "iso-ir-109", "windows-28593"),
"ISO-8859-4" -> Set("ISO8859-4", "ISO_8859-4", "ISO88594", "8859_4", "8859-4", "88594", "LATIN4", "csISOLatin4", "iso-ir-110", "windows-28594"),
"ISO-8859-5" -> Set("ISO8859-5", "ISO_8859-5", "ISO88595", "8859_5", "8859-5", "88595", "csISOLatinCyrillic", "ISO-IR-144", "windows-28595"),
"ISO-8859-6" -> Set("ISO8859-6", "ISO_8859-6", "ISO88596", "8859_6", "8859-6", "88596", "iso-ir-127", "ECMA-114", "ASMO-708", "csISOLatinArabic", "windows-28596"),
"ISO-8859-7" -> Set("ISO8859-7", "ISO_8859-7", "ISO88597", "8859_7", "8859-7", "88597"),
"ISO-8859-8" -> Set("ISO8859-8", "ISO_8859-8", "ISO88598", "8859_8", "8859-8", "88598"),
"ISO-8859-9" -> Set("ISO8859-9", "ISO_8859-9", "ISO88599", "8859_9", "8859-9", "88599"),
"ISO-8859-10" -> Set("ISO8859-10", "ISO_8859-10", "ISO885910", "8859_10", "8859-10", "885910"),
"ISO-8859-11" -> Set("ISO8859-11", "ISO_8859-11", "ISO885911", "8859_11", "8859-11", "885911"),
"ISO-8859-13" -> Set("ISO8859-13", "ISO_8859-13", "ISO885913", "8859_13", "8859-13", "885913"),
"ISO-8859-14" -> Set("ISO8859-14", "ISO_8859-14", "ISO885914", "8859_14", "8859-14", "885914"),
"ISO-8859-15" -> Set("ISO8859-15", "ISO_8859-15", "ISO885915", "8859_15", "8859-15", "885915"),
"ISO-8859-16" -> Set("ISO8859-16", "ISO_8859-16", "ISO885916", "8859_16", "8859-16", "885916"),
"windows-1250" -> Set("CP1250"),
"windows-1251" -> Set("CP1251"),
"windows-1252" -> Set("CP1252"),
"windows-1253" -> Set("CP1253"),
"windows-1254" -> Set("CP1254"),
"windows-1255" -> Set("CP1255"),
"windows-1256" -> Set("CP1256"),
"windows-1257" -> Set("CP1257"),
"windows-1258" -> Set("CP1258"),
"IBM437" -> Set("cp437", "437", "csibm437"),
"IBM866" -> Set("cp866", "866", "csibm866"),
"ASCII" -> Set("US-ASCII", "US_ASCII", "IBM367", "cp367", "csASCII", "iso-ir-6"),
"UTF-8" -> Set("UTF8", "UTF_8", "ISO-10646/UTF-8", "ISO-10646/UTF8", "cp65001", "csUTF8", "unicode-1-1-utf-8", "unicode-2-0-utf-8"),
"UTF-16BE" -> Set("UTF16BE", "UTF16-BE", "UTF-16-BE", "UTF_16BE", "UTF16_BE", "UTF_16_BE", "csUTF16BE"),
"UTF-16LE" -> Set("UTF16LE", "UTF16-LE", "UTF-16-LE", "UTF_16LE", "UTF16_LE", "UTF_16_LE", "csUTF16LE"),
"UTF-32BE" -> Set("UTF32BE", "UTF32-BE", "UTF-32-BE", "UTF_32BE", "UTF32_BE", "UTF_32_BE", "csUTF32BE"),
"UTF-32LE" -> Set("UTF32LE", "UTF32-LE", "UTF-32-LE", "UTF_32LE", "UTF32_LE", "UTF_32_LE", "csUTF32LE"),
"ISO-8859-1" -> Set("ISO8859-1", "ISO_8859-1", "ISO88591", "ISO_8859_1", "ISO8859_1", "8859_1", "8859-1", "88591", "latin1", "L1", "csISOLatin1", "iso-ir-100", "IBM819", "cp819", "windows-28591"),
"ISO-8859-2" -> Set("ISO8859-2", "ISO_8859-2", "ISO88592", "ISO_8859_2", "ISO8859_2", "8859_2", "8859-2", "88592", "latin2", "L2", "csISOLatin2", "iso-ir-101", "IBM1111", "windows-28592"),
"ISO-8859-3" -> Set("ISO8859-3", "ISO_8859-3", "ISO88593", "ISO_8859_3", "ISO8859_3", "8859_3", "8859-3", "88593", "latin3", "L3", "csISOLatin3", "iso-ir-109", "windows-28593"),
"ISO-8859-4" -> Set("ISO8859-4", "ISO_8859-4", "ISO88594", "ISO_8859_4", "ISO8859_4", "8859_4", "8859-4", "88594", "latin4", "L4", "csISOLatin4", "iso-ir-110", "windows-28594"),
"ISO-8859-5" -> Set("ISO8859-5", "ISO_8859-5", "ISO88595", "ISO_8859_5", "ISO8859_5", "8859_5", "8859-5", "88595", "cyrillic", "csISOLatinCyrillic", "iso-ir-144", "windows-28595"),
"ISO-8859-6" -> Set("ISO8859-6", "ISO_8859-6", "ISO88596", "ISO_8859_6", "ISO8859_6", "8859_6", "8859-6", "88596", "arabic", "csISOLatinArabic", "iso-ir-127", "windows-28596", "ECMA-114", "ASMO-708"),
"ISO-8859-7" -> Set("ISO8859-7", "ISO_8859-7", "ISO88597", "ISO_8859_7", "ISO8859_7", "8859_7", "8859-7", "88597", "greek", "greek8", "csISOLatinGreek", "iso-ir-126", "windows-28597", "ECMA-118", "ELOT_928"),
"ISO-8859-8" -> Set("ISO8859-8", "ISO_8859-8", "ISO88598", "ISO_8859_8", "ISO8859_8", "8859_8", "8859-8", "88598", "hebrew", "csISOLatinHebrew", "iso-ir-138", "windows-28598"),
"ISO-8859-9" -> Set("ISO8859-9", "ISO_8859-9", "ISO88599", "ISO_8859_9", "ISO8859_9", "8859_9", "8859-9", "88599", "latin5", "L5", "csISOLatin5", "iso-ir-148", "windows-28599"),
"ISO-8859-10" -> Set("ISO8859-10", "ISO_8859-10", "ISO885910", "ISO_8859_10", "ISO8859_10", "8859_10", "8859-10", "885910", "latin6", "L6", "csISOLatin6", "iso-ir-157"),
"ISO-8859-11" -> Set("ISO8859-11", "ISO_8859-11", "ISO885911", "ISO_8859_11", "ISO8859_11", "8859_11", "8859-11", "885911", "thai", "csTIS620", "TIS-620"),
"ISO-8859-13" -> Set("ISO8859-13", "ISO_8859-13", "ISO885913", "ISO_8859_13", "ISO8859_13", "8859_13", "8859-13", "885913", "latin7", "L7", "csISO885913", "windows-28603"),
"ISO-8859-14" -> Set("ISO8859-14", "ISO_8859-14", "ISO885914", "ISO_8859_14", "ISO8859_14", "8859_14", "8859-14", "885914", "latin8", "L8", "csISO885914", "iso-ir-199", "iso-celtic"),
"ISO-8859-15" -> Set("ISO8859-15", "ISO_8859-15", "ISO885915", "ISO_8859_15", "ISO8859_15", "8859_15", "8859-15", "885915", "latin9", "L9", "csISO885915", "windows-28605"),
"ISO-8859-16" -> Set("ISO8859-16", "ISO_8859-16", "ISO885916", "ISO_8859_16", "ISO8859_16", "8859_16", "8859-16", "885916", "latin10", "L10", "csISO885916"),
"windows-1250" -> Set("cp1250", "cswindows1250"),
"windows-1251" -> Set("cp1251", "cswindows1251"),
"windows-1252" -> Set("cp1252", "cswindows1252"),
"windows-1253" -> Set("cp1253", "cswindows1253"),
"windows-1254" -> Set("cp1254", "cswindows1254"),
"windows-1255" -> Set("cp1255", "cswindows1255"),
"windows-1256" -> Set("cp1256", "cswindows1256"),
"windows-1257" -> Set("cp1257", "cswindows1257"),
"windows-1258" -> Set("cp1258", "cswindows1258"),
"IBM437" -> Set("cp437", "437", "csPC8CodePage437"),
"IBM866" -> Set("cp866", "866", "csIBM866"),
"Shift_JIS" -> Set("Shift-JIS", "ShiftJIS", "S-JIS", "S_JIS", "SJIS", "PCK", "csShiftJIS"),
"Big5" -> Set("csBig5"),
"EUC-KR" -> Set("EUCKR", "EUC_KR", "csEUCKR", "korean", "iso-ir-149"),
)
}
6 changes: 3 additions & 3 deletions shared/src/main/scala/io/kaitai/struct/GoClassCompiler.scala
Original file line number Diff line number Diff line change
Expand Up @@ -70,16 +70,16 @@ class GoClassCompiler(
lang.instanceHeader(className, instName, dataType, instSpec.isNullable)
lang.instanceCheckCacheAndReturn(instName, dataType)

lang.instanceSetCalculated(instName)
instSpec match {
case vi: ValueInstanceSpec =>
lang.attrParseIfHeader(instName, vi.ifExpr)
lang.instanceCalculate(instName, dataType, vi.value)
lang.attrParseIfFooter(vi.ifExpr)
case i: ParseInstanceSpec =>
lang.attrParse(i, instName, endian)
case pi: ParseInstanceSpec =>
lang.attrParse(pi, instName, endian)
}

lang.instanceSetCalculated(instName)
lang.instanceReturn(instName, dataType)
lang.instanceFooter
}
Expand Down
Loading

0 comments on commit f360132

Please sign in to comment.