Skip to content

Commit

Permalink
analyzer: Change PackageCurationProvider to support bulk requests
Browse files Browse the repository at this point in the history
Requesting curations for multiple packages / ids at once can be much
more performant than single request, depending on the actual provider
implementation.

Partly resolves #3905.

Signed-off-by: Sebastian Schuberth <[email protected]>
  • Loading branch information
sschuberth committed Jan 4, 2022
1 parent 89f21c7 commit 05ac226
Show file tree
Hide file tree
Showing 9 changed files with 84 additions and 57 deletions.
9 changes: 6 additions & 3 deletions analyzer/src/main/kotlin/AnalyzerResultBuilder.kt
Original file line number Diff line number Diff line change
Expand Up @@ -80,9 +80,12 @@ class AnalyzerResultBuilder(private val curationProvider: PackageCurationProvide
* independently of a [ProjectAnalyzerResult].
*/
fun addPackages(packageSet: Set<Package>): AnalyzerResultBuilder {
packages += packageSet.map { pkg ->
val curations = curationProvider.getCurationsFor(pkg.id)
curations.fold(pkg.toCuratedPackage()) { cur, packageCuration ->
val idsToPackages = packageSet.associateBy { it.id }

curationProvider.getCurationsFor(idsToPackages.keys).forEach { (id, curations) ->
val pkg = idsToPackages.getValue(id)

packages += curations.fold(pkg.toCuratedPackage()) { cur, packageCuration ->
log.debug {
"Applying curation '$packageCuration' to package '${pkg.id.toCoordinates()}'."
}
Expand Down
6 changes: 3 additions & 3 deletions analyzer/src/main/kotlin/PackageCurationProvider.kt
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,11 @@ fun interface PackageCurationProvider {
* A provider that does not provide any curations.
*/
@JvmField
val EMPTY = PackageCurationProvider { emptyList() }
val EMPTY = PackageCurationProvider { emptyMap() }
}

/**
* Get all available [PackageCuration]s for the provided [pkgId].
* Get all available [PackageCuration]s for the provided [pkgIds].
*/
fun getCurationsFor(pkgId: Identifier): List<PackageCuration>
fun getCurationsFor(pkgIds: Collection<Identifier>): Map<Identifier, List<PackageCuration>>
}
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import okhttp3.OkHttpClient

import org.ossreviewtoolkit.analyzer.PackageCurationProvider
import org.ossreviewtoolkit.clients.clearlydefined.ClearlyDefinedService
import org.ossreviewtoolkit.clients.clearlydefined.ClearlyDefinedService.Coordinates
import org.ossreviewtoolkit.clients.clearlydefined.ClearlyDefinedService.Server
import org.ossreviewtoolkit.clients.clearlydefined.ClearlyDefinedService.SourceLocation
import org.ossreviewtoolkit.clients.clearlydefined.ComponentType
Expand Down Expand Up @@ -92,14 +93,18 @@ class ClearlyDefinedPackageCurationProvider(

private val service by lazy { ClearlyDefinedService.create(serverUrl, client ?: OkHttpClientHelper.buildClient()) }

override fun getCurationsFor(pkgId: Identifier): List<PackageCuration> {
val (type, provider) = pkgId.toClearlyDefinedTypeAndProvider() ?: return emptyList()
val namespace = pkgId.namespace.takeUnless { it.isEmpty() } ?: "-"
override fun getCurationsFor(pkgIds: Collection<Identifier>): Map<Identifier, List<PackageCuration>> {
val coordinatesToIds = pkgIds.mapNotNull { pkgId ->
pkgId.toClearlyDefinedTypeAndProvider()?.let { (type, provider) ->
val namespace = pkgId.namespace.takeUnless { it.isEmpty() } ?: "-"
Coordinates(type, provider, namespace, pkgId.name, pkgId.version) to pkgId
}
}.toMap()

val curation = runCatching {
val contributedCurations = runCatching {
// TODO: Maybe make PackageCurationProvider.getCurationsFor() a suspend function; then all derived
// classes could deal with coroutines more easily.
runBlocking(Dispatchers.IO) { service.getCuration(type, provider, namespace, pkgId.name, pkgId.version) }
runBlocking(Dispatchers.IO) { service.getCurations(coordinatesToIds.keys) }
}.onFailure { e ->
when (e) {
is HttpException -> {
Expand All @@ -110,47 +115,51 @@ class ClearlyDefinedPackageCurationProvider(

log.warn {
val message = e.response()?.errorBody()?.string() ?: e.collectMessagesAsString()
"Getting curations for '${pkgId.toCoordinates()}' failed with code ${e.code()}: $message"
"Getting curations failed with code ${e.code()}: $message"
}
}
}

is JsonMappingException -> {
e.showStackTrace()

log.warn { "Deserializing the ClearlyDefined curation for '${pkgId.toCoordinates()}' failed." }
log.warn { "Deserializing the curations failed." }
}

else -> {
e.showStackTrace()

log.warn {
"Querying ClearlyDefined curation for '${pkgId.toCoordinates()}' failed: " +
e.collectMessagesAsString()
}
log.warn { "Querying curations failed: ${e.collectMessagesAsString()}" }
}
}
}.getOrNull() ?: return emptyList()
}.getOrNull() ?: return emptyMap()

val declaredLicenseParsed = curation.licensed?.declared?.let { declaredLicense ->
// Only take curations of good quality (i.e. those not using deprecated identifiers) and in particular none
// that contain "OTHER" as a license, also see https://github.com/clearlydefined/curated-data/issues/7836.
runCatching { declaredLicense.toSpdx(SpdxExpression.Strictness.ALLOW_CURRENT) }.getOrNull()
}
val curations = mutableMapOf<Identifier, MutableList<PackageCuration>>()

val sourceLocation = curation.described?.sourceLocation.toArtifactOrVcs()
contributedCurations.forEach { (_, contributions) ->
contributions.curations.forEach { (coordinates, curation) ->
val pkgId = coordinatesToIds.getValue(coordinates)

val pkgCuration = PackageCuration(
id = pkgId,
data = PackageCurationData(
concludedLicense = declaredLicenseParsed,
homepageUrl = curation.described?.projectWebsite?.toString(),
sourceArtifact = sourceLocation as? RemoteArtifact,
vcs = sourceLocation as? VcsInfoCurationData,
comment = "Provided by ClearlyDefined."
)
)
val declaredLicenseParsed = curation.licensed?.declared?.let { declaredLicense ->
// Only take curations of good quality (i.e. those not using deprecated identifiers) and in
// particular none that contain "OTHER" as a license, also see
// https://github.com/clearlydefined/curated-data/issues/7836.
runCatching { declaredLicense.toSpdx(SpdxExpression.Strictness.ALLOW_CURRENT) }.getOrNull()
}

val sourceLocation = curation.described?.sourceLocation.toArtifactOrVcs()

curations.getOrPut(pkgId) { mutableListOf() } += PackageCuration(
id = pkgId,
data = PackageCurationData(
concludedLicense = declaredLicenseParsed,
homepageUrl = curation.described?.projectWebsite?.toString(),
sourceArtifact = sourceLocation as? RemoteArtifact,
vcs = sourceLocation as? VcsInfoCurationData,
comment = "Provided by ClearlyDefined."
)
)
}
}

return listOf(pkgCuration)
return curations
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,23 @@ package org.ossreviewtoolkit.analyzer.curation

import org.ossreviewtoolkit.analyzer.PackageCurationProvider
import org.ossreviewtoolkit.model.Identifier
import org.ossreviewtoolkit.model.PackageCuration

/**
* A curation provider that does not provide any curations on its own, but searches the given list of [providers] for
* the first matching curation, and falls back to the next provider in the list if there is no match.
*/
class FallbackPackageCurationProvider(private val providers: List<PackageCurationProvider>) : PackageCurationProvider {
override fun getCurationsFor(pkgId: Identifier) =
providers.asSequence().map { it.getCurationsFor(pkgId) }.find { it.isNotEmpty() }.orEmpty()
override fun getCurationsFor(pkgIds: Collection<Identifier>): Map<Identifier, List<PackageCuration>> {
val curations = mutableMapOf<Identifier, List<PackageCuration>>()
val remainingPkgIds = pkgIds.toMutableSet()

for (provider in providers) {
if (remainingPkgIds.isEmpty()) break
curations += provider.getCurationsFor(remainingPkgIds)
remainingPkgIds -= curations.keys
}

return curations
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -29,5 +29,6 @@ import org.ossreviewtoolkit.model.PackageCuration
open class SimplePackageCurationProvider(
val packageCurations: Collection<PackageCuration>
) : PackageCurationProvider {
override fun getCurationsFor(pkgId: Identifier) = packageCurations.filter { it.isApplicable(pkgId) }
override fun getCurationsFor(pkgIds: Collection<Identifier>) =
pkgIds.associateWith { pkgId -> packageCurations.filter { it.isApplicable(pkgId) } }
}
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,9 @@ class Sw360PackageCurationProvider(sw360Configuration: Sw360StorageConfiguration
jsonMapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false)
)

override fun getCurationsFor(pkgId: Identifier): List<PackageCuration> {
override fun getCurationsFor(pkgIds: Collection<Identifier>) = pkgIds.associateWith { getCurationsFor(it) }

private fun getCurationsFor(pkgId: Identifier): List<PackageCuration> {
val name = listOfNotNull(pkgId.namespace, pkgId.name).joinToString("/")
val sw360ReleaseClient = sw360Connection.releaseAdapter

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ import com.github.tomakehurst.wiremock.client.WireMock.get
import com.github.tomakehurst.wiremock.core.WireMockConfiguration

import io.kotest.core.spec.style.WordSpec
import io.kotest.matchers.collections.beEmpty
import io.kotest.matchers.maps.beEmpty
import io.kotest.matchers.should

import java.time.Duration
Expand Down Expand Up @@ -68,8 +68,9 @@ class ClearlyDefinedPackageCurationProviderMockTest : WordSpec({
}

val provider = ClearlyDefinedPackageCurationProvider("http://localhost:${server.port()}", client)
val ids = listOf(Identifier("Maven:some-ns:some-component:1.2.3"))

provider.getCurationsFor(Identifier("Maven:some-ns:some-component:1.2.3")) should beEmpty()
provider.getCurationsFor(ids) should beEmpty()
}
}
})
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@
package org.ossreviewtoolkit.analyzer.curation

import io.kotest.core.spec.style.WordSpec
import io.kotest.matchers.collections.beEmpty
import io.kotest.matchers.collections.haveSize
import io.kotest.matchers.maps.beEmpty
import io.kotest.matchers.maps.haveSize
import io.kotest.matchers.should
import io.kotest.matchers.shouldBe

Expand All @@ -35,18 +35,18 @@ class ClearlyDefinedPackageCurationProviderTest : WordSpec({
val provider = ClearlyDefinedPackageCurationProvider()

val identifier = Identifier("Maven:javax.servlet:javax.servlet-api:3.1.0")
val curations = provider.getCurationsFor(identifier)
val curations = provider.getCurationsFor(listOf(identifier))

curations should haveSize(1)
curations.first().data.concludedLicense shouldBe
curations.values.flatten().first().data.concludedLicense shouldBe
"CDDL-1.0 OR GPL-2.0-only WITH Classpath-exception-2.0".toSpdx()
}

"return no curation for a non-existing dummy NPM package" {
val provider = ClearlyDefinedPackageCurationProvider()

val identifier = Identifier("NPM:@scope:name:1.2.3")
val curations = provider.getCurationsFor(identifier)
val curations = provider.getCurationsFor(listOf(identifier))

curations should beEmpty()
}
Expand All @@ -57,17 +57,17 @@ class ClearlyDefinedPackageCurationProviderTest : WordSpec({
val provider = ClearlyDefinedPackageCurationProvider(Server.DEVELOPMENT)

val identifier = Identifier("NPM:@nestjs:platform-express:6.2.3")
val curations = provider.getCurationsFor(identifier)
val curations = provider.getCurationsFor(listOf(identifier))

curations should haveSize(1)
curations.first().data.concludedLicense shouldBe "Apache-1.0".toSpdx()
curations.values.flatten().first().data.concludedLicense shouldBe "Apache-1.0".toSpdx()
}

"return no curation for a non-existing dummy Maven package" {
val provider = ClearlyDefinedPackageCurationProvider()

val identifier = Identifier("Maven:group:name:1.2.3")
val curations = provider.getCurationsFor(identifier)
val curations = provider.getCurationsFor(listOf(identifier))

curations should beEmpty()
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ class FilePackageCurationProviderTest : StringSpec() {
)

idsWithExistingCurations.forEach {
val curations = provider.getCurationsFor(it)
val curations = provider.getCurationsFor(listOf(it)).values.flatten()

curations should haveSize(1)
}
Expand All @@ -68,7 +68,7 @@ class FilePackageCurationProviderTest : StringSpec() {
)

idsWithExistingCurations.forEach {
val curations = provider.getCurationsFor(it)
val curations = provider.getCurationsFor(listOf(it)).values.flatten()

curations should haveSize(1)
}
Expand All @@ -78,7 +78,7 @@ class FilePackageCurationProviderTest : StringSpec() {
val provider = FilePackageCurationProvider(curationsFile)

val identifier = Identifier("maven", "org.hamcrest", "hamcrest-core", "1.3")
val curations = provider.getCurationsFor(identifier)
val curations = provider.getCurationsFor(listOf(identifier)).values.flatten()

curations should haveSize(4)
curations.forEach {
Expand All @@ -96,9 +96,9 @@ class FilePackageCurationProviderTest : StringSpec() {
val idMaxVersion = Identifier("npm", "", "ramda", "0.25.0")
val idOutVersion = Identifier("npm", "", "ramda", "0.26.0")

val curationsMinVersion = provider.getCurationsFor(idMinVersion)
val curationsMaxVersion = provider.getCurationsFor(idMaxVersion)
val curationsOutVersion = provider.getCurationsFor(idOutVersion)
val curationsMinVersion = provider.getCurationsFor(listOf(idMinVersion)).values.flatten()
val curationsMaxVersion = provider.getCurationsFor(listOf(idMaxVersion)).values.flatten()
val curationsOutVersion = provider.getCurationsFor(listOf(idOutVersion)).values.flatten()

curationsMinVersion should haveSize(1)
(provider.packageCurations - curationsMinVersion).forEach {
Expand Down

0 comments on commit 05ac226

Please sign in to comment.