From faee3a8f3241fd6b6f318c8a16d2cb72575f272f Mon Sep 17 00:00:00 2001 From: kwahlin Date: Thu, 16 Nov 2023 16:36:59 +0100 Subject: [PATCH] Guarantee meaningful closeMatch links by matching primary contributors --- librisworks/scripts/merge-works.groovy | 20 +++++++++++-------- .../groovy/se/kb/libris/mergeworks/Doc.groovy | 4 ++++ 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/librisworks/scripts/merge-works.groovy b/librisworks/scripts/merge-works.groovy index e9e4a8b5cd..3888450c26 100644 --- a/librisworks/scripts/merge-works.groovy +++ b/librisworks/scripts/merge-works.groovy @@ -50,14 +50,14 @@ new File(System.getProperty('clusters')).splitEachLine(~/[\t ]+/) { cluster -> } } - List linkableWorkIris = uniqueWorksAndTheirInstances.findResults { it.getV1().workIri() } + List linkableWorks = uniqueWorksAndTheirInstances.findResults { workDoc, _ -> workDoc.workIri() ? workDoc : null } uniqueWorksAndTheirInstances.each { Doc workDoc, List instanceDocs -> // Link more instances to existing linked work if (workDoc.existsInStorage && !workDoc.instanceData && instanceDocs) { replaceWorkData(workDoc, c.merge([workDoc] + instanceDocs)) // TODO: Update adminMetadata? To say that additional instances may have contributed to the linked work. - addCloseMatch(workDoc, linkableWorkIris) + addCloseMatch(workDoc, linkableWorks) saveAndLink(workDoc, instanceDocs, workDoc.existsInStorage) writeWorkReport(docs, workDoc, instanceDocs, WorkStatus.UPDATED) return @@ -65,13 +65,13 @@ new File(System.getProperty('clusters')).splitEachLine(~/[\t ]+/) { cluster -> // New merged work if (!workDoc.existsInStorage && !workDoc.instanceData) { addAdminMetadata(workDoc, instanceDocs.collect { ['@id': it.recordIri()] }) - addCloseMatch(workDoc, linkableWorkIris) + addCloseMatch(workDoc, linkableWorks) saveAndLink(workDoc, instanceDocs, workDoc.existsInStorage) writeWorkReport(docs, workDoc, instanceDocs, WorkStatus.NEW) return } // Local work, save if new closeMatch links created - if (workDoc.instanceData && addCloseMatch(workDoc, linkableWorkIris)) { + if (workDoc.instanceData && addCloseMatch(workDoc, linkableWorks)) { saveAndLink(workDoc) } } @@ -171,12 +171,16 @@ static void replaceWorkData(Doc workDoc, Map replacement) { workDoc.workData.putAll(replacement) } -boolean addCloseMatch(Doc workDoc, List workIris) { - def linkable = (workIris - workDoc.thingIri()).collect { ['@id': it] } +boolean addCloseMatch(Doc workDoc, List linkableWorks) { + def linkTo = linkableWorks.findAll { d -> + d.workIri() != workDoc.thingIri() + && d.primaryContributor() == workDoc.primaryContributor() + }.collect { ['@id': it.workIri()] } + def closeMatch = asList(workDoc.workData['closeMatch']) - if (linkable && !closeMatch.containsAll(linkable)) { - workDoc.workData['closeMatch'] = (closeMatch + linkable).unique() + if (linkTo && !closeMatch.containsAll(linkTo)) { + workDoc.workData['closeMatch'] = (closeMatch + linkTo).unique() return true } diff --git a/librisworks/src/main/groovy/se/kb/libris/mergeworks/Doc.groovy b/librisworks/src/main/groovy/se/kb/libris/mergeworks/Doc.groovy index e6e45c8896..a06ae51cc3 100644 --- a/librisworks/src/main/groovy/se/kb/libris/mergeworks/Doc.groovy +++ b/librisworks/src/main/groovy/se/kb/libris/mergeworks/Doc.groovy @@ -178,6 +178,10 @@ class Doc { asList(instanceData?.reproductionOf) } + Map primaryContributor() { + contribution().findResult { it['@type'] == 'PrimaryContribution' ? asList(it.agent).find() : null } + } + String editionStatement() { instanceData?.editionStatement }