Skip to content

Commit

Permalink
Merge pull request #265 from kaitai-io/fix-touppercase
Browse files Browse the repository at this point in the history
CanonicalizeEncodingNames: fix locale-sensitive `toUpperCase`
  • Loading branch information
generalmimon authored Mar 3, 2024
2 parents 89cf9b7 + 89226c1 commit 29bced3
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package io.kaitai.struct.precompile
import io.kaitai.struct.problems._
import org.scalatest.funspec.AnyFunSpec
import org.scalatest.matchers.should.Matchers._
import java.util.Locale

class CanonicalizeEncodingNames$Test extends AnyFunSpec {
describe("CanonicalizeEncodingNames.") {
Expand All @@ -29,5 +30,19 @@ class CanonicalizeEncodingNames$Test extends AnyFunSpec {
newEncoding should be("ISO-8859-1")
problem should be(Some(EncodingNameWarning("ISO-8859-1", "iSo-8859-1")))
}

it("reports warning and fixes bad capitalization for 'iSo-8859-1' even in Turkish locale") {
// This test only covers the case conversion in the `canonicalizeName` implementation,
// not the case conversions used when initializing the `aliasToCanonical` map.
val oldLocale = Locale.getDefault
Locale.setDefault(new Locale("tr"))
try {
val (newEncoding, problem) = CanonicalizeEncodingNames.canonicalizeName("iSo-8859-1")
newEncoding should be("ISO-8859-1")
problem should be(Some(EncodingNameWarning("ISO-8859-1", "iSo-8859-1")))
} finally {
Locale.setDefault(oldLocale)
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import io.kaitai.struct.datatype.DataType.StrFromBytesType
import io.kaitai.struct.format._
import io.kaitai.struct.precompile.CanonicalizeEncodingNames._
import io.kaitai.struct.problems._
import io.kaitai.struct.Platform

class CanonicalizeEncodingNames(specs: ClassSpecs) extends PrecompileStep {
override def run(): Iterable[CompilationProblem] = specs.mapRec(canonicalize)
Expand Down Expand Up @@ -48,7 +49,7 @@ object CanonicalizeEncodingNames {
(original, None)
} else {
// See if any aliases match
aliasToCanonical.get(original.toUpperCase) match {
aliasToCanonical.get(Platform.toUpperLocaleInsensitive(original)) match {
case Some(canonical) =>
(
canonical,
Expand All @@ -65,7 +66,7 @@ object CanonicalizeEncodingNames {

private val aliasToCanonical: Map[String, String] =
EncodingList.canonicalToAlias.flatMap { case (canonical, aliases) =>
aliases.map(alias => (alias.toUpperCase, canonical))
aliases.map(alias => (Platform.toUpperLocaleInsensitive(alias), canonical))
} ++
EncodingList.canonicalToAlias.keys.map(x => x.toUpperCase -> x)
EncodingList.canonicalToAlias.keys.map(x => Platform.toUpperLocaleInsensitive(x) -> x)
}

0 comments on commit 29bced3

Please sign in to comment.