From 05ac22643b101e2eb1b5847af40f07f9997d3b44 Mon Sep 17 00:00:00 2001 From: Sebastian Schuberth Date: Mon, 3 Jan 2022 18:06:30 +0100 Subject: [PATCH] analyzer: Change `PackageCurationProvider` to support bulk requests Requesting curations for multiple packages / ids at once can be much more performant than single request, depending on the actual provider implementation. Partly resolves #3905. Signed-off-by: Sebastian Schuberth --- .../src/main/kotlin/AnalyzerResultBuilder.kt | 9 ++- .../main/kotlin/PackageCurationProvider.kt | 6 +- .../ClearlyDefinedPackageCurationProvider.kt | 71 +++++++++++-------- .../FallbackPackageCurationProvider.kt | 15 +++- .../curation/SimplePackageCurationProvider.kt | 3 +- .../curation/Sw360PackageCurationProvider.kt | 4 +- ...yDefinedPackageCurationProviderMockTest.kt | 5 +- ...earlyDefinedPackageCurationProviderTest.kt | 16 ++--- .../FilePackageCurationProviderTest.kt | 12 ++-- 9 files changed, 84 insertions(+), 57 deletions(-) diff --git a/analyzer/src/main/kotlin/AnalyzerResultBuilder.kt b/analyzer/src/main/kotlin/AnalyzerResultBuilder.kt index 17adf050ee1e4..235aaa419c78d 100644 --- a/analyzer/src/main/kotlin/AnalyzerResultBuilder.kt +++ b/analyzer/src/main/kotlin/AnalyzerResultBuilder.kt @@ -80,9 +80,12 @@ class AnalyzerResultBuilder(private val curationProvider: PackageCurationProvide * independently of a [ProjectAnalyzerResult]. */ fun addPackages(packageSet: Set): AnalyzerResultBuilder { - packages += packageSet.map { pkg -> - val curations = curationProvider.getCurationsFor(pkg.id) - curations.fold(pkg.toCuratedPackage()) { cur, packageCuration -> + val idsToPackages = packageSet.associateBy { it.id } + + curationProvider.getCurationsFor(idsToPackages.keys).forEach { (id, curations) -> + val pkg = idsToPackages.getValue(id) + + packages += curations.fold(pkg.toCuratedPackage()) { cur, packageCuration -> log.debug { "Applying curation '$packageCuration' to package '${pkg.id.toCoordinates()}'." } diff --git a/analyzer/src/main/kotlin/PackageCurationProvider.kt b/analyzer/src/main/kotlin/PackageCurationProvider.kt index 263632845e715..b8aad8f461b4a 100644 --- a/analyzer/src/main/kotlin/PackageCurationProvider.kt +++ b/analyzer/src/main/kotlin/PackageCurationProvider.kt @@ -31,11 +31,11 @@ fun interface PackageCurationProvider { * A provider that does not provide any curations. */ @JvmField - val EMPTY = PackageCurationProvider { emptyList() } + val EMPTY = PackageCurationProvider { emptyMap() } } /** - * Get all available [PackageCuration]s for the provided [pkgId]. + * Get all available [PackageCuration]s for the provided [pkgIds]. */ - fun getCurationsFor(pkgId: Identifier): List + fun getCurationsFor(pkgIds: Collection): Map> } diff --git a/analyzer/src/main/kotlin/curation/ClearlyDefinedPackageCurationProvider.kt b/analyzer/src/main/kotlin/curation/ClearlyDefinedPackageCurationProvider.kt index 9f8d9ce156f96..91a2663d057ee 100644 --- a/analyzer/src/main/kotlin/curation/ClearlyDefinedPackageCurationProvider.kt +++ b/analyzer/src/main/kotlin/curation/ClearlyDefinedPackageCurationProvider.kt @@ -30,6 +30,7 @@ import okhttp3.OkHttpClient import org.ossreviewtoolkit.analyzer.PackageCurationProvider import org.ossreviewtoolkit.clients.clearlydefined.ClearlyDefinedService +import org.ossreviewtoolkit.clients.clearlydefined.ClearlyDefinedService.Coordinates import org.ossreviewtoolkit.clients.clearlydefined.ClearlyDefinedService.Server import org.ossreviewtoolkit.clients.clearlydefined.ClearlyDefinedService.SourceLocation import org.ossreviewtoolkit.clients.clearlydefined.ComponentType @@ -92,14 +93,18 @@ class ClearlyDefinedPackageCurationProvider( private val service by lazy { ClearlyDefinedService.create(serverUrl, client ?: OkHttpClientHelper.buildClient()) } - override fun getCurationsFor(pkgId: Identifier): List { - val (type, provider) = pkgId.toClearlyDefinedTypeAndProvider() ?: return emptyList() - val namespace = pkgId.namespace.takeUnless { it.isEmpty() } ?: "-" + override fun getCurationsFor(pkgIds: Collection): Map> { + val coordinatesToIds = pkgIds.mapNotNull { pkgId -> + pkgId.toClearlyDefinedTypeAndProvider()?.let { (type, provider) -> + val namespace = pkgId.namespace.takeUnless { it.isEmpty() } ?: "-" + Coordinates(type, provider, namespace, pkgId.name, pkgId.version) to pkgId + } + }.toMap() - val curation = runCatching { + val contributedCurations = runCatching { // TODO: Maybe make PackageCurationProvider.getCurationsFor() a suspend function; then all derived // classes could deal with coroutines more easily. - runBlocking(Dispatchers.IO) { service.getCuration(type, provider, namespace, pkgId.name, pkgId.version) } + runBlocking(Dispatchers.IO) { service.getCurations(coordinatesToIds.keys) } }.onFailure { e -> when (e) { is HttpException -> { @@ -110,47 +115,51 @@ class ClearlyDefinedPackageCurationProvider( log.warn { val message = e.response()?.errorBody()?.string() ?: e.collectMessagesAsString() - "Getting curations for '${pkgId.toCoordinates()}' failed with code ${e.code()}: $message" + "Getting curations failed with code ${e.code()}: $message" } } } is JsonMappingException -> { e.showStackTrace() - - log.warn { "Deserializing the ClearlyDefined curation for '${pkgId.toCoordinates()}' failed." } + log.warn { "Deserializing the curations failed." } } else -> { e.showStackTrace() - - log.warn { - "Querying ClearlyDefined curation for '${pkgId.toCoordinates()}' failed: " + - e.collectMessagesAsString() - } + log.warn { "Querying curations failed: ${e.collectMessagesAsString()}" } } } - }.getOrNull() ?: return emptyList() + }.getOrNull() ?: return emptyMap() - val declaredLicenseParsed = curation.licensed?.declared?.let { declaredLicense -> - // Only take curations of good quality (i.e. those not using deprecated identifiers) and in particular none - // that contain "OTHER" as a license, also see https://github.com/clearlydefined/curated-data/issues/7836. - runCatching { declaredLicense.toSpdx(SpdxExpression.Strictness.ALLOW_CURRENT) }.getOrNull() - } + val curations = mutableMapOf>() - val sourceLocation = curation.described?.sourceLocation.toArtifactOrVcs() + contributedCurations.forEach { (_, contributions) -> + contributions.curations.forEach { (coordinates, curation) -> + val pkgId = coordinatesToIds.getValue(coordinates) - val pkgCuration = PackageCuration( - id = pkgId, - data = PackageCurationData( - concludedLicense = declaredLicenseParsed, - homepageUrl = curation.described?.projectWebsite?.toString(), - sourceArtifact = sourceLocation as? RemoteArtifact, - vcs = sourceLocation as? VcsInfoCurationData, - comment = "Provided by ClearlyDefined." - ) - ) + val declaredLicenseParsed = curation.licensed?.declared?.let { declaredLicense -> + // Only take curations of good quality (i.e. those not using deprecated identifiers) and in + // particular none that contain "OTHER" as a license, also see + // https://github.com/clearlydefined/curated-data/issues/7836. + runCatching { declaredLicense.toSpdx(SpdxExpression.Strictness.ALLOW_CURRENT) }.getOrNull() + } + + val sourceLocation = curation.described?.sourceLocation.toArtifactOrVcs() + + curations.getOrPut(pkgId) { mutableListOf() } += PackageCuration( + id = pkgId, + data = PackageCurationData( + concludedLicense = declaredLicenseParsed, + homepageUrl = curation.described?.projectWebsite?.toString(), + sourceArtifact = sourceLocation as? RemoteArtifact, + vcs = sourceLocation as? VcsInfoCurationData, + comment = "Provided by ClearlyDefined." + ) + ) + } + } - return listOf(pkgCuration) + return curations } } diff --git a/analyzer/src/main/kotlin/curation/FallbackPackageCurationProvider.kt b/analyzer/src/main/kotlin/curation/FallbackPackageCurationProvider.kt index afa8f6cf3c067..f80d1574bcc2d 100644 --- a/analyzer/src/main/kotlin/curation/FallbackPackageCurationProvider.kt +++ b/analyzer/src/main/kotlin/curation/FallbackPackageCurationProvider.kt @@ -21,12 +21,23 @@ package org.ossreviewtoolkit.analyzer.curation import org.ossreviewtoolkit.analyzer.PackageCurationProvider import org.ossreviewtoolkit.model.Identifier +import org.ossreviewtoolkit.model.PackageCuration /** * A curation provider that does not provide any curations on its own, but searches the given list of [providers] for * the first matching curation, and falls back to the next provider in the list if there is no match. */ class FallbackPackageCurationProvider(private val providers: List) : PackageCurationProvider { - override fun getCurationsFor(pkgId: Identifier) = - providers.asSequence().map { it.getCurationsFor(pkgId) }.find { it.isNotEmpty() }.orEmpty() + override fun getCurationsFor(pkgIds: Collection): Map> { + val curations = mutableMapOf>() + val remainingPkgIds = pkgIds.toMutableSet() + + for (provider in providers) { + if (remainingPkgIds.isEmpty()) break + curations += provider.getCurationsFor(remainingPkgIds) + remainingPkgIds -= curations.keys + } + + return curations + } } diff --git a/analyzer/src/main/kotlin/curation/SimplePackageCurationProvider.kt b/analyzer/src/main/kotlin/curation/SimplePackageCurationProvider.kt index a8a9d093f587d..875472ad3bda0 100644 --- a/analyzer/src/main/kotlin/curation/SimplePackageCurationProvider.kt +++ b/analyzer/src/main/kotlin/curation/SimplePackageCurationProvider.kt @@ -29,5 +29,6 @@ import org.ossreviewtoolkit.model.PackageCuration open class SimplePackageCurationProvider( val packageCurations: Collection ) : PackageCurationProvider { - override fun getCurationsFor(pkgId: Identifier) = packageCurations.filter { it.isApplicable(pkgId) } + override fun getCurationsFor(pkgIds: Collection) = + pkgIds.associateWith { pkgId -> packageCurations.filter { it.isApplicable(pkgId) } } } diff --git a/analyzer/src/main/kotlin/curation/Sw360PackageCurationProvider.kt b/analyzer/src/main/kotlin/curation/Sw360PackageCurationProvider.kt index e3d20722772df..70cf918de9bf6 100644 --- a/analyzer/src/main/kotlin/curation/Sw360PackageCurationProvider.kt +++ b/analyzer/src/main/kotlin/curation/Sw360PackageCurationProvider.kt @@ -54,7 +54,9 @@ class Sw360PackageCurationProvider(sw360Configuration: Sw360StorageConfiguration jsonMapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false) ) - override fun getCurationsFor(pkgId: Identifier): List { + override fun getCurationsFor(pkgIds: Collection) = pkgIds.associateWith { getCurationsFor(it) } + + private fun getCurationsFor(pkgId: Identifier): List { val name = listOfNotNull(pkgId.namespace, pkgId.name).joinToString("/") val sw360ReleaseClient = sw360Connection.releaseAdapter diff --git a/analyzer/src/test/kotlin/curation/ClearlyDefinedPackageCurationProviderMockTest.kt b/analyzer/src/test/kotlin/curation/ClearlyDefinedPackageCurationProviderMockTest.kt index 1449b5eb83281..4345f8a244012 100644 --- a/analyzer/src/test/kotlin/curation/ClearlyDefinedPackageCurationProviderMockTest.kt +++ b/analyzer/src/test/kotlin/curation/ClearlyDefinedPackageCurationProviderMockTest.kt @@ -26,7 +26,7 @@ import com.github.tomakehurst.wiremock.client.WireMock.get import com.github.tomakehurst.wiremock.core.WireMockConfiguration import io.kotest.core.spec.style.WordSpec -import io.kotest.matchers.collections.beEmpty +import io.kotest.matchers.maps.beEmpty import io.kotest.matchers.should import java.time.Duration @@ -68,8 +68,9 @@ class ClearlyDefinedPackageCurationProviderMockTest : WordSpec({ } val provider = ClearlyDefinedPackageCurationProvider("http://localhost:${server.port()}", client) + val ids = listOf(Identifier("Maven:some-ns:some-component:1.2.3")) - provider.getCurationsFor(Identifier("Maven:some-ns:some-component:1.2.3")) should beEmpty() + provider.getCurationsFor(ids) should beEmpty() } } }) diff --git a/analyzer/src/test/kotlin/curation/ClearlyDefinedPackageCurationProviderTest.kt b/analyzer/src/test/kotlin/curation/ClearlyDefinedPackageCurationProviderTest.kt index 55b04f3ca239c..65d8788f3ea67 100644 --- a/analyzer/src/test/kotlin/curation/ClearlyDefinedPackageCurationProviderTest.kt +++ b/analyzer/src/test/kotlin/curation/ClearlyDefinedPackageCurationProviderTest.kt @@ -20,8 +20,8 @@ package org.ossreviewtoolkit.analyzer.curation import io.kotest.core.spec.style.WordSpec -import io.kotest.matchers.collections.beEmpty -import io.kotest.matchers.collections.haveSize +import io.kotest.matchers.maps.beEmpty +import io.kotest.matchers.maps.haveSize import io.kotest.matchers.should import io.kotest.matchers.shouldBe @@ -35,10 +35,10 @@ class ClearlyDefinedPackageCurationProviderTest : WordSpec({ val provider = ClearlyDefinedPackageCurationProvider() val identifier = Identifier("Maven:javax.servlet:javax.servlet-api:3.1.0") - val curations = provider.getCurationsFor(identifier) + val curations = provider.getCurationsFor(listOf(identifier)) curations should haveSize(1) - curations.first().data.concludedLicense shouldBe + curations.values.flatten().first().data.concludedLicense shouldBe "CDDL-1.0 OR GPL-2.0-only WITH Classpath-exception-2.0".toSpdx() } @@ -46,7 +46,7 @@ class ClearlyDefinedPackageCurationProviderTest : WordSpec({ val provider = ClearlyDefinedPackageCurationProvider() val identifier = Identifier("NPM:@scope:name:1.2.3") - val curations = provider.getCurationsFor(identifier) + val curations = provider.getCurationsFor(listOf(identifier)) curations should beEmpty() } @@ -57,17 +57,17 @@ class ClearlyDefinedPackageCurationProviderTest : WordSpec({ val provider = ClearlyDefinedPackageCurationProvider(Server.DEVELOPMENT) val identifier = Identifier("NPM:@nestjs:platform-express:6.2.3") - val curations = provider.getCurationsFor(identifier) + val curations = provider.getCurationsFor(listOf(identifier)) curations should haveSize(1) - curations.first().data.concludedLicense shouldBe "Apache-1.0".toSpdx() + curations.values.flatten().first().data.concludedLicense shouldBe "Apache-1.0".toSpdx() } "return no curation for a non-existing dummy Maven package" { val provider = ClearlyDefinedPackageCurationProvider() val identifier = Identifier("Maven:group:name:1.2.3") - val curations = provider.getCurationsFor(identifier) + val curations = provider.getCurationsFor(listOf(identifier)) curations should beEmpty() } diff --git a/analyzer/src/test/kotlin/curation/FilePackageCurationProviderTest.kt b/analyzer/src/test/kotlin/curation/FilePackageCurationProviderTest.kt index 90c8cfd0e72da..f7402858921d6 100644 --- a/analyzer/src/test/kotlin/curation/FilePackageCurationProviderTest.kt +++ b/analyzer/src/test/kotlin/curation/FilePackageCurationProviderTest.kt @@ -50,7 +50,7 @@ class FilePackageCurationProviderTest : StringSpec() { ) idsWithExistingCurations.forEach { - val curations = provider.getCurationsFor(it) + val curations = provider.getCurationsFor(listOf(it)).values.flatten() curations should haveSize(1) } @@ -68,7 +68,7 @@ class FilePackageCurationProviderTest : StringSpec() { ) idsWithExistingCurations.forEach { - val curations = provider.getCurationsFor(it) + val curations = provider.getCurationsFor(listOf(it)).values.flatten() curations should haveSize(1) } @@ -78,7 +78,7 @@ class FilePackageCurationProviderTest : StringSpec() { val provider = FilePackageCurationProvider(curationsFile) val identifier = Identifier("maven", "org.hamcrest", "hamcrest-core", "1.3") - val curations = provider.getCurationsFor(identifier) + val curations = provider.getCurationsFor(listOf(identifier)).values.flatten() curations should haveSize(4) curations.forEach { @@ -96,9 +96,9 @@ class FilePackageCurationProviderTest : StringSpec() { val idMaxVersion = Identifier("npm", "", "ramda", "0.25.0") val idOutVersion = Identifier("npm", "", "ramda", "0.26.0") - val curationsMinVersion = provider.getCurationsFor(idMinVersion) - val curationsMaxVersion = provider.getCurationsFor(idMaxVersion) - val curationsOutVersion = provider.getCurationsFor(idOutVersion) + val curationsMinVersion = provider.getCurationsFor(listOf(idMinVersion)).values.flatten() + val curationsMaxVersion = provider.getCurationsFor(listOf(idMaxVersion)).values.flatten() + val curationsOutVersion = provider.getCurationsFor(listOf(idOutVersion)).values.flatten() curationsMinVersion should haveSize(1) (provider.packageCurations - curationsMinVersion).forEach {