From 7c3f52cf3c8158b63776df353e78e8ee4730d1d7 Mon Sep 17 00:00:00 2001 From: Petr Pucil Date: Wed, 6 Mar 2024 21:35:17 +0100 Subject: [PATCH 1/2] Fix occasional failed imports due to race conditions Fixes https://github.com/kaitai-io/kaitai_struct/issues/951 Until now, KSC had occasional problems with resolving imported top-level types, reporting `unable to find type ''` errors from time to time (see https://github.com/kaitai-io/kaitai_struct/issues/951 for more details and reproduction instructions). It turned out that the importing code ran on multiple threads that were concurrently modifying/reading shared non-thread-safe `HashMap`s without any synchronization, which resulted in race conditions. I thought it would be best to eliminate the race conditions by switching to some thread-safe counterpart to `mutable.HashMap`. After some googling, it turned out that there are two viable options, [`scala.collection.concurrent.TrieMap`](https://www.scala-lang.org/api/2.13.13/scala/collection/concurrent/TrieMap.html) and [`java.util.concurrent.ConcurrentHashMap`](https://docs.oracle.com/javase/8/docs/api/java/util/concurrent/ConcurrentHashMap.html). However, `TrieMap` doesn't seem to be implemented for Scala.js (https://github.com/scala-js/scala-js/issues/4866) and it is `final`, so we cannot use it as a base class of `ClassSpecs` instead of `mutable.HashMap` that we're using now. `ConcurrentHashMap` (despite being in the `java.util.concurrent.*` package, suggesting that it would only be available on JVM) surprisingly appears to be available in Scala.js (https://github.com/scala-js/scala-js/issues/1487), so in theory we could use it even in `shared/src/...` code, but I haven't figured out how to make `ClassSpecs` extend from it. It must be added that since Scala 2.13, the inheritance from `HashMap` was deprecated, so we'll have to find another way, but for now it works. The unsynchronized hash map accesses in the `JavaClassSpecs.cached()` are in JVM-specific code (`jvm/src/...` folder), so I've just changed the private `relFiles` and `absFiles` fields to use `ConcurrentHashMap`, thus resolving the race conditions here. (`TrieMap` would work too - there's probably no practical difference for our use case.) For the `LoadImports.loadImport()` method, I've just wrapped the accesses to shared `ClassSpecs` in a manual `synchronized` block. According to some on the internet, using `synchronized` is kind of discouraged due to being error-prone in favor of using existing thread-safe types or immutable types (Scala generally seems to prefer immutable types, but I can't imagine how they could be used in this case), but I believe it's the easiest way to solve the problem here. --- .../io/kaitai/struct/formats/JavaClassSpecs.scala | 7 +++++-- .../io/kaitai/struct/precompile/LoadImports.scala | 10 ++++++++-- .../io/kaitai/struct/precompile/ResolveTypes.scala | 1 + 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/jvm/src/main/scala/io/kaitai/struct/formats/JavaClassSpecs.scala b/jvm/src/main/scala/io/kaitai/struct/formats/JavaClassSpecs.scala index 4966dc5d8..55583788f 100644 --- a/jvm/src/main/scala/io/kaitai/struct/formats/JavaClassSpecs.scala +++ b/jvm/src/main/scala/io/kaitai/struct/formats/JavaClassSpecs.scala @@ -8,6 +8,9 @@ import java.io.{File, FileNotFoundException} import scala.collection.mutable import scala.concurrent.ExecutionContext.Implicits.global import scala.concurrent.Future +import scala.collection.concurrent +import java.util.concurrent.ConcurrentHashMap +import scala.jdk.CollectionConverters._ /** * Java implementation of ClassSpec container, doing imports from local files. @@ -15,8 +18,8 @@ import scala.concurrent.Future class JavaClassSpecs(relPath: String, absPaths: Seq[String], firstSpec: ClassSpec) extends ClassSpecs(firstSpec) { - private val relFiles = mutable.Map[String, ClassSpec]() - private val absFiles = mutable.Map[String, ClassSpec]() + private val relFiles: concurrent.Map[String, ClassSpec] = new ConcurrentHashMap[String, ClassSpec]().asScala + private val absFiles: concurrent.Map[String, ClassSpec] = new ConcurrentHashMap[String, ClassSpec]().asScala override def importRelative(name: String, path: List[String], inFile: Option[String]): Future[Option[ClassSpec]] = Future { Log.importOps.info(() => s".. importing relative $name") diff --git a/shared/src/main/scala/io/kaitai/struct/precompile/LoadImports.scala b/shared/src/main/scala/io/kaitai/struct/precompile/LoadImports.scala index b04e40af4..304a3e3f1 100644 --- a/shared/src/main/scala/io/kaitai/struct/precompile/LoadImports.scala +++ b/shared/src/main/scala/io/kaitai/struct/precompile/LoadImports.scala @@ -76,8 +76,14 @@ class LoadImports(specs: ClassSpecs) { // // In theory, duplicate imports shouldn't be returned at all by // import* methods due to caching, but we won't rely on it here. - if (!specs.contains(specName)) { - specs(specName) = spec + val isNewSpec = specs.synchronized { + val isNew = !specs.contains(specName) + if (isNew) { + specs(specName) = spec + } + isNew + } + if (isNewSpec) { processClass(spec, ImportPath.updateWorkDir(workDir, impPath)) } else { Log.importOps.warn(() => s"... we have that already, ignoring") diff --git a/shared/src/main/scala/io/kaitai/struct/precompile/ResolveTypes.scala b/shared/src/main/scala/io/kaitai/struct/precompile/ResolveTypes.scala index 23730902e..cbdbabd74 100644 --- a/shared/src/main/scala/io/kaitai/struct/precompile/ResolveTypes.scala +++ b/shared/src/main/scala/io/kaitai/struct/precompile/ResolveTypes.scala @@ -80,6 +80,7 @@ class ResolveTypes(specs: ClassSpecs, topClass: ClassSpec, opaqueTypes: Boolean) (Some(ClassSpec.opaquePlaceholder(typeName)), None) } else { // Opaque types are disabled => that is an error + Log.typeResolve.info(() => " => ??? (opaque type are disabled => error)") (None, Some(TypeNotFoundErr(typeName, curClass, path))) } case Some(x) => From 1a713100c40159df202f5582c93afb4ba2e87964 Mon Sep 17 00:00:00 2001 From: Petr Pucil Date: Fri, 8 Mar 2024 22:42:06 +0100 Subject: [PATCH 2/2] Explain why hash maps for imports need synchronization See https://github.com/kaitai-io/kaitai_struct_compiler/pull/270#discussion_r1518229002 --- .../scala/io/kaitai/struct/formats/JavaClassSpecs.scala | 6 ++++++ .../scala/io/kaitai/struct/precompile/LoadImports.scala | 9 +++++++++ 2 files changed, 15 insertions(+) diff --git a/jvm/src/main/scala/io/kaitai/struct/formats/JavaClassSpecs.scala b/jvm/src/main/scala/io/kaitai/struct/formats/JavaClassSpecs.scala index 55583788f..368084e57 100644 --- a/jvm/src/main/scala/io/kaitai/struct/formats/JavaClassSpecs.scala +++ b/jvm/src/main/scala/io/kaitai/struct/formats/JavaClassSpecs.scala @@ -18,6 +18,12 @@ import scala.jdk.CollectionConverters._ class JavaClassSpecs(relPath: String, absPaths: Seq[String], firstSpec: ClassSpec) extends ClassSpecs(firstSpec) { + // We're using thread-safe `ConcurrentHashMap` for `relFiles` and `absFiles`, + // because these hash maps may be mutated concurrently by multiple threads in + // `JavaClassSpecs.cached()`. Using a non-thread-safe hash map here could + // occasionally cause `cacheMap.get(name)` in `JavaClassSpecs.cached()` to + // fail internally and throw an `ArrayIndexOutOfBoundsException`, see + // https://github.com/kaitai-io/kaitai_struct/issues/951 private val relFiles: concurrent.Map[String, ClassSpec] = new ConcurrentHashMap[String, ClassSpec]().asScala private val absFiles: concurrent.Map[String, ClassSpec] = new ConcurrentHashMap[String, ClassSpec]().asScala diff --git a/shared/src/main/scala/io/kaitai/struct/precompile/LoadImports.scala b/shared/src/main/scala/io/kaitai/struct/precompile/LoadImports.scala index 304a3e3f1..084df223e 100644 --- a/shared/src/main/scala/io/kaitai/struct/precompile/LoadImports.scala +++ b/shared/src/main/scala/io/kaitai/struct/precompile/LoadImports.scala @@ -76,6 +76,15 @@ class LoadImports(specs: ClassSpecs) { // // In theory, duplicate imports shouldn't be returned at all by // import* methods due to caching, but we won't rely on it here. + // + // The `synchronized` block is necessary because this code is run + // concurrently by multiple threads (each resolving different imports) + // and `specs` is a shared non-thread-safe `HashMap`. Without this + // synchronization, a few imports were occasionally missing from + // `specs` due to a race condition, and even (though rarely) the + // implementation of `specs.contains()` could fail internally with an + // `ArrayIndexOutOfBoundsException`. For more details, see + // https://github.com/kaitai-io/kaitai_struct/issues/951 val isNewSpec = specs.synchronized { val isNew = !specs.contains(specName) if (isNew) {