From eca16ce9ade73bb73faf70adeb68cf75a41d9f65 Mon Sep 17 00:00:00 2001 From: Mingun Date: Fri, 15 Mar 2024 20:28:08 +0500 Subject: [PATCH 1/4] Remove unused argument --- .../io/kaitai/struct/precompile/CanonicalizeEncodingNames.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/shared/src/main/scala/io/kaitai/struct/precompile/CanonicalizeEncodingNames.scala b/shared/src/main/scala/io/kaitai/struct/precompile/CanonicalizeEncodingNames.scala index 67f090937..ad5b1c308 100644 --- a/shared/src/main/scala/io/kaitai/struct/precompile/CanonicalizeEncodingNames.scala +++ b/shared/src/main/scala/io/kaitai/struct/precompile/CanonicalizeEncodingNames.scala @@ -43,7 +43,7 @@ class CanonicalizeEncodingNames(specs: ClassSpecs) extends PrecompileStep { } object CanonicalizeEncodingNames { - def canonicalizeName(original: String, unrecognizedIsError: Boolean = true): (String, Option[CompilationProblem with PathLocalizable]) = { + def canonicalizeName(original: String): (String, Option[CompilationProblem with PathLocalizable]) = { // Try exact match with canonical list if (EncodingList.canonicalToAlias.contains(original)) { (original, None) From 8fc05aa4bbb1c158bb4f3c14a5dc3069be96af82 Mon Sep 17 00:00:00 2001 From: Mingun Date: Fri, 15 Mar 2024 20:52:57 +0500 Subject: [PATCH 2/4] Make all methods in CanonicalizeEncodingNames static so they can be tested --- .../kaitai/struct/precompile/CanonicalizeEncodingNames.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/shared/src/main/scala/io/kaitai/struct/precompile/CanonicalizeEncodingNames.scala b/shared/src/main/scala/io/kaitai/struct/precompile/CanonicalizeEncodingNames.scala index ad5b1c308..6f1120a9c 100644 --- a/shared/src/main/scala/io/kaitai/struct/precompile/CanonicalizeEncodingNames.scala +++ b/shared/src/main/scala/io/kaitai/struct/precompile/CanonicalizeEncodingNames.scala @@ -9,7 +9,9 @@ import io.kaitai.struct.Platform class CanonicalizeEncodingNames(specs: ClassSpecs) extends PrecompileStep { override def run(): Iterable[CompilationProblem] = specs.mapRec(canonicalize) +} +object CanonicalizeEncodingNames { def canonicalize(curClass: ClassSpec): Iterable[CompilationProblem] = { val metaProblems = canonicalizeMeta(curClass.meta) val seqProblems = curClass.seq.flatMap(attr => canonicalizeMember(attr)) @@ -40,9 +42,7 @@ class CanonicalizeEncodingNames(specs: ClassSpecs) extends PrecompileStep { None }).map(problem => problem.localizedInPath(member.path ++ List("encoding"))) } -} -object CanonicalizeEncodingNames { def canonicalizeName(original: String): (String, Option[CompilationProblem with PathLocalizable]) = { // Try exact match with canonical list if (EncodingList.canonicalToAlias.contains(original)) { From 6f9bbb518c3d1e2263cfa2fe78ff409e89838909 Mon Sep 17 00:00:00 2001 From: Mingun Date: Fri, 15 Mar 2024 21:28:43 +0500 Subject: [PATCH 3/4] Make parse and value instance constructors available for unit testing Private parameters prevents creating classes in tests ant the argument order not very pleasant --- .../kaitai/struct/format/InstanceSpec.scala | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/shared/src/main/scala/io/kaitai/struct/format/InstanceSpec.scala b/shared/src/main/scala/io/kaitai/struct/format/InstanceSpec.scala index 840d32c96..4a66307ed 100644 --- a/shared/src/main/scala/io/kaitai/struct/format/InstanceSpec.scala +++ b/shared/src/main/scala/io/kaitai/struct/format/InstanceSpec.scala @@ -11,10 +11,10 @@ sealed abstract class InstanceSpec(val doc: DocSpec) extends MemberSpec { case class ValueInstanceSpec( id: InstanceIdentifier, path: List[String], - private val _doc: DocSpec, value: Ast.expr, - ifExpr: Option[Ast.expr], - var dataTypeOpt: Option[DataType] + ifExpr: Option[Ast.expr] = None, + var dataTypeOpt: Option[DataType] = None, + val _doc: DocSpec = DocSpec.EMPTY, ) extends InstanceSpec(_doc) { override def dataType: DataType = { dataTypeOpt match { @@ -29,12 +29,12 @@ case class ValueInstanceSpec( case class ParseInstanceSpec( id: InstanceIdentifier, path: List[String], - private val _doc: DocSpec, dataType: DataType, - cond: ConditionalSpec, - pos: Option[Ast.expr], - io: Option[Ast.expr], - valid: Option[ValidationSpec] + cond: ConditionalSpec = ConditionalSpec(None, NoRepeat), + pos: Option[Ast.expr] = None, + io: Option[Ast.expr] = None, + valid: Option[ValidationSpec] = None, + val _doc: DocSpec = DocSpec.EMPTY, ) extends InstanceSpec(_doc) with AttrLikeSpec { override def isLazy = true } @@ -69,10 +69,10 @@ object InstanceSpec { ValueInstanceSpec( id, path, - DocSpec.fromYaml(srcMap, path), value2, ifExpr, - None + None, + DocSpec.fromYaml(srcMap, path), ) case None => // normal parse instance @@ -86,7 +86,7 @@ object InstanceSpec { val a = AttrSpec.fromYaml(fakeAttrMap, path, metaDef, id) val valid = srcMap.get("valid").map(ValidationSpec.fromYaml(_, path ++ List("valid"))) - ParseInstanceSpec(id, path, a.doc, a.dataType, a.cond, pos, io, valid) + ParseInstanceSpec(id, path, a.dataType, a.cond, pos, io, valid, a.doc) } } } From 9374160222d8f24910722adc2c6bff1005d1b476 Mon Sep 17 00:00:00 2001 From: Mingun Date: Fri, 15 Mar 2024 22:04:34 +0500 Subject: [PATCH 4/4] Fix excess warnings about inherited encoding attributes --- .../CanonicalizeEncodingNames$Test.scala | 64 +++++++++++++++++++ .../struct/ConstructClassCompiler.scala | 2 +- .../kaitai/struct/GraphvizClassCompiler.scala | 4 +- .../io/kaitai/struct/datatype/DataType.scala | 21 +++++- .../struct/precompile/CalculateSeqSizes.scala | 2 +- .../CanonicalizeEncodingNames.scala | 3 +- 6 files changed, 89 insertions(+), 7 deletions(-) diff --git a/jvm/src/test/scala/io/kaitai/struct/precompile/CanonicalizeEncodingNames$Test.scala b/jvm/src/test/scala/io/kaitai/struct/precompile/CanonicalizeEncodingNames$Test.scala index ea7fb8046..010303484 100644 --- a/jvm/src/test/scala/io/kaitai/struct/precompile/CanonicalizeEncodingNames$Test.scala +++ b/jvm/src/test/scala/io/kaitai/struct/precompile/CanonicalizeEncodingNames$Test.scala @@ -1,5 +1,8 @@ package io.kaitai.struct.precompile +import io.kaitai.struct.datatype.DataType.{CalcBytesType, StrFromBytesType} +import io.kaitai.struct.exprlang.Ast +import io.kaitai.struct.format._ import io.kaitai.struct.problems._ import org.scalatest.funspec.AnyFunSpec import org.scalatest.matchers.should.Matchers._ @@ -44,5 +47,66 @@ class CanonicalizeEncodingNames$Test extends AnyFunSpec { Locale.setDefault(oldLocale) } } + + describe("do not report warnings for members with derived encoding") { + it("in parameters") { + val problems = CanonicalizeEncodingNames.canonicalizeMember(ParamDefSpec( + path = List("param"), + id = NumberedIdentifier(1), + dataType = StrFromBytesType(CalcBytesType, "utf-8", false), + )).toList + problems should be(List(EncodingNameWarning( + "UTF-8", "utf-8", + ProblemCoords(None, Some(List("param", "encoding")), None, None) + ))) + + val noProblems = CanonicalizeEncodingNames.canonicalizeMember(ParamDefSpec( + path = List("param"), + id = NumberedIdentifier(1), + dataType = StrFromBytesType(CalcBytesType, "utf-8", true), + )).toList + noProblems should be(List()) + } + + it("in parse instance") { + val problems = CanonicalizeEncodingNames.canonicalizeMember(ParseInstanceSpec( + id = InstanceIdentifier("parse_instance"), + path = List("parse", "instance"), + dataType = StrFromBytesType(CalcBytesType, "utf-8", false), + )).toList + problems should be(List(EncodingNameWarning( + "UTF-8", "utf-8", + ProblemCoords(None, Some(List("parse", "instance", "encoding")), None, None) + ))) + + val noProblems = CanonicalizeEncodingNames.canonicalizeMember(ParseInstanceSpec( + id = InstanceIdentifier("parse_instance"), + path = List("parse", "instance"), + dataType = StrFromBytesType(CalcBytesType, "utf-8", true), + )).toList + noProblems should be(List()) + } + + it("in value instance") { + val problems = CanonicalizeEncodingNames.canonicalizeMember(ValueInstanceSpec( + id = InstanceIdentifier("value_instance"), + path = List("value", "instance"), + value = Ast.expr.Str("value"), + dataTypeOpt = Some(StrFromBytesType(CalcBytesType, "utf-8", false)), + )).toList + problems should be(List(EncodingNameWarning( + "UTF-8", "utf-8", + ProblemCoords(None, Some(List("value", "instance", "encoding")), None, None) + ))) + + val noProblems = CanonicalizeEncodingNames.canonicalizeMember(ValueInstanceSpec( + id = InstanceIdentifier("value_instance"), + path = List("value", "instance"), + value = Ast.expr.Str("value"), + dataTypeOpt = Some(StrFromBytesType(CalcBytesType, "utf-8", true)), + )).toList + noProblems should be(List()) + } + } } } diff --git a/shared/src/main/scala/io/kaitai/struct/ConstructClassCompiler.scala b/shared/src/main/scala/io/kaitai/struct/ConstructClassCompiler.scala index 7e85e6a81..192259fff 100644 --- a/shared/src/main/scala/io/kaitai/struct/ConstructClassCompiler.scala +++ b/shared/src/main/scala/io/kaitai/struct/ConstructClassCompiler.scala @@ -133,7 +133,7 @@ class ConstructClassCompiler(classSpecs: ClassSpecs, topClass: ClassSpec) extend attrBytesLimitType(blt) case btt: BytesTerminatedType => attrBytesTerminatedType(btt, "GreedyBytes") - case StrFromBytesType(bytes, encoding) => + case StrFromBytesType(bytes, encoding, _) => bytes match { case BytesEosType(terminator, include, padRight, process) => s"GreedyString(encoding='$encoding')" diff --git a/shared/src/main/scala/io/kaitai/struct/GraphvizClassCompiler.scala b/shared/src/main/scala/io/kaitai/struct/GraphvizClassCompiler.scala index b41a45023..0e8386cb1 100644 --- a/shared/src/main/scala/io/kaitai/struct/GraphvizClassCompiler.scala +++ b/shared/src/main/scala/io/kaitai/struct/GraphvizClassCompiler.scala @@ -241,7 +241,7 @@ class GraphvizClassCompiler(classSpecs: ClassSpecs, topClass: ClassSpec) extends dataType match { case _: BytesEosType => END_OF_STREAM case blt: BytesLimitType => expressionSize(blt.size, attrName) - case StrFromBytesType(basedOn, _) => dataTypeSizeAsString(basedOn, attrName) + case StrFromBytesType(basedOn, _, _) => dataTypeSizeAsString(basedOn, attrName) case utb: UserTypeFromBytes => dataTypeSizeAsString(utb.bytes, attrName) case EnumType(_, basedOn) => dataTypeSizeAsString(basedOn, attrName) case _ => @@ -425,7 +425,7 @@ object GraphvizClassCompiler extends LanguageCompilerStatic { args += "ignore EOS" args.mkString(", ") case _: BytesType => "" - case StrFromBytesType(basedOn, encoding) => + case StrFromBytesType(basedOn, encoding, _) => val bytesStr = dataTypeName(basedOn) val comma = if (bytesStr.isEmpty) "" else ", " s"str($bytesStr$comma$encoding)" diff --git a/shared/src/main/scala/io/kaitai/struct/datatype/DataType.scala b/shared/src/main/scala/io/kaitai/struct/datatype/DataType.scala index f32fc2d5f..9df11ca94 100644 --- a/shared/src/main/scala/io/kaitai/struct/datatype/DataType.scala +++ b/shared/src/main/scala/io/kaitai/struct/datatype/DataType.scala @@ -92,7 +92,24 @@ object DataType { abstract class StrType extends DataType case object CalcStrType extends StrType - case class StrFromBytesType(bytes: BytesType, var encoding: String) extends StrType + /** + * A type that have the `str` and `strz` built-in Kaitai types. + * + * @param bytes An underlying bytes of the string + * @param encoding An encoding used to convert byte array into string + * @param isEncodingDerived A flag that is `true` when encoding is derived from + * `meta/encoding` key of a type in which attribute with this type is + * defined and `false` when encoding defined in the attribute + */ + case class StrFromBytesType( + bytes: BytesType, + var encoding: String, + // FIXME: Actually, this should not be a part of KSY model used to code generation, + // but be a part of intermediate model. The current design of KSC does not have + // that intermediate model yet, so we put this flag here. See discussion at + // https://github.com/kaitai-io/kaitai_struct_compiler/pull/278#discussion_r1527312479 + isEncodingDerived: Boolean, + ) extends StrType case object CalcBooleanType extends BooleanType @@ -400,7 +417,7 @@ object DataType { } val bat = arg2.getByteArrayType(path) - StrFromBytesType(bat, enc) + StrFromBytesType(bat, enc, arg.encoding.isEmpty) case _ => val typeWithArgs = Expressions.parseTypeRef(dt) if (arg.size.isEmpty && !arg.sizeEos && arg.terminator.isEmpty) { diff --git a/shared/src/main/scala/io/kaitai/struct/precompile/CalculateSeqSizes.scala b/shared/src/main/scala/io/kaitai/struct/precompile/CalculateSeqSizes.scala index 61f21ae69..4d28c263b 100644 --- a/shared/src/main/scala/io/kaitai/struct/precompile/CalculateSeqSizes.scala +++ b/shared/src/main/scala/io/kaitai/struct/precompile/CalculateSeqSizes.scala @@ -113,7 +113,7 @@ object CalculateSeqSizes { case None => DynamicSized } case _: BytesTerminatedType => DynamicSized - case StrFromBytesType(basedOn, _) => dataTypeByteSize(basedOn) + case StrFromBytesType(basedOn, _, _) => dataTypeByteSize(basedOn) case utb: UserTypeFromBytes => dataTypeByteSize(utb.bytes) case cutb: CalcUserTypeFromBytes => dataTypeByteSize(cutb.bytes) case st: SwitchType => DynamicSized // FIXME: it's really possible get size if st.hasSize diff --git a/shared/src/main/scala/io/kaitai/struct/precompile/CanonicalizeEncodingNames.scala b/shared/src/main/scala/io/kaitai/struct/precompile/CanonicalizeEncodingNames.scala index 6f1120a9c..80ff95fe2 100644 --- a/shared/src/main/scala/io/kaitai/struct/precompile/CanonicalizeEncodingNames.scala +++ b/shared/src/main/scala/io/kaitai/struct/precompile/CanonicalizeEncodingNames.scala @@ -36,7 +36,8 @@ object CanonicalizeEncodingNames { case strType: StrFromBytesType => val (newEncoding, problem1) = canonicalizeName(strType.encoding) strType.encoding = newEncoding - problem1 + // Do not report problem if encoding was derived from `meta/encoding` key + if (strType.isEncodingDerived) None else problem1 case _ => // not a string type = no problem None