From 0e247939c972df2a356137f31daa53d4bea1e679 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kalle=20W=C3=A5hlin?= <72360110+kwahlin@users.noreply.github.com> Date: Thu, 16 Nov 2023 08:01:31 +0100 Subject: [PATCH] Feature/lxl 4279 (#1325) Finalize work extraction/merging logic * Add more generic titles * Remove commented-out code used for testing stuff * Don't try to correct illustrator/translator * Add cleanup scripts to globalchanges-1.33.sh * Make sure PrimaryContribution comes first in contribution * Reorganize scripts Put "cluster bound" script in librisworks/run.sh and not cluster bound script in whelktool/globalchanges-1.33.sh * Make 9pu follow illustrator to instance * Add missing lifeSpan to local agent if found in cluster * Make all subtitles non-distinguishing * Fix work title shape (exclude subtitle) * Remove redundant method * Raise too-large-result limit * Remove irrelevant title components * Remove unnecessary condition since checked elsewhere * Include work titles when clustering * If the work title of one record matches the instance title of another record, these two records now ends up in the same cluster. * Use existing methods in find-work-clusters.groovy instead of declaring new ones. * Exit bash script immediately if Whelktool script fails * Handle unexpected datatype in instanceOf * Add missing exclamation mark * Change clustering order * Cluster by titles before merging overlapping clusters * Side effect: Necessary to make Elastic queries from each record individually in first clustering step * Add missing -D parameter * Save only modified * Avoid IndexOutOfBoundsException * Avoid null in tryAddLifeSpanToLocalAgent * Update path to relators * Fix broken linkedAgentToLifeSpan * Avoid repeated period in work mainTitle * Make subtitle/titleRemainder distinguishing again * Restore structure * Save before reporting to make sure no data is changed before saving * Extend ignored-subtitles.txt * Require that PrimaryContribution has an agent in SVSK selection * Add explanation + example on when to drop only part of subtitle --- librisworks/run.sh | 42 ++-- .../add-missing-contribution-data.groovy | 71 +++--- .../scripts/contributions-to-instance.groovy | 15 +- librisworks/scripts/find-work-clusters.groovy | 119 +++------- .../lxl-4150-deduplicate-contribution.groovy | 17 +- ...ove-illustrativecontent-to-instance.groovy | 19 ++ librisworks/scripts/merge-works.groovy | 41 ++-- librisworks/scripts/title-clusters.groovy | 4 +- .../se/kb/libris/mergeworks/DisplayDoc.groovy | 15 +- .../groovy/se/kb/libris/mergeworks/Doc.groovy | 21 +- .../se/kb/libris/mergeworks/Util.groovy | 59 +++-- .../mergeworks/compare/TranslationOf.groovy | 2 +- .../resources/merge-works/generic-titles.txt | 31 +++ .../merge-works/ignored-subtitles.txt | 210 +++++++++++++++++- whelktool/globalchanges-1.33.sh | 4 +- .../10}/elib-unspecified-contributor.groovy | 0 ...ove-illustrativecontent-to-instance.groovy | 15 -- 17 files changed, 438 insertions(+), 247 deletions(-) create mode 100644 librisworks/scripts/lxl-4221-move-illustrativecontent-to-instance.groovy rename {librisworks/scripts => whelktool/scripts/2023/10}/elib-unspecified-contributor.groovy (100%) delete mode 100644 whelktool/scripts/cleanups/2023/07/lxl-4221-move-illustrativecontent-to-instance.groovy diff --git a/librisworks/run.sh b/librisworks/run.sh index 18648843b5..d258e1f4f8 100755 --- a/librisworks/run.sh +++ b/librisworks/run.sh @@ -1,4 +1,5 @@ #!/bin/bash +set -eu count_lines() { if [ -f $1 ]; then @@ -30,20 +31,20 @@ NORMALIZATIONS_DIR=$REPORT_DIR/normalizations MERGED_WORKS_DIR=$REPORT_DIR/merged-works ALL=$CLUSTERS_DIR/1-all -MERGED=$CLUSTERS_DIR/2-merged -TITLES=$CLUSTERS_DIR/3-titles +TITLES=$CLUSTERS_DIR/2-titles +MERGED=$CLUSTERS_DIR/3-merged SWEDISH_FICTION=$CLUSTERS_DIR/4-swedish-fiction NO_ANONYMOUS_TRANSLATIONS=$CLUSTERS_DIR/5-no-anonymous-translations mkdir -p $CLUSTERS_DIR $NORMALIZATIONS_DIR $MERGED_WORKS_DIR $ALL $MERGED $TITLES $SWEDISH_FICTION $NO_ANONYMOUS_TRANSLATIONS LANGUAGE_IN_TITLE=$NORMALIZATIONS_DIR/1-titles-with-language -ELIB_DESIGNERS=$NORMALIZATIONS_DIR/2-elib-cover-designer +ILL_CONTENT=$NORMALIZATIONS_DIR/2-illustrative-content DEDUPLICATE_CONTRIBUTIONS=$NORMALIZATIONS_DIR/3-deduplicate-contributions ADD_MISSING_CONTRIBUTION_DATA=$NORMALIZATIONS_DIR/4-add-missing-contribution-data ROLES_TO_INSTANCE=$NORMALIZATIONS_DIR/5-roles-to-instance -# Clustering step 1 TODO: run only on recently updated records after first run +# Clustering TODO: run only on recently updated records after first run echo "Finding new clusters..." time java -Dxl.secret.properties=$HOME/secret.properties-$ENV -jar $JAR_FILE \ $ARGS --report $ALL/$WHELKTOOL_REPORT $SCRIPTS_DIR/find-work-clusters.groovy >$ALL/$CLUSTER_TSV 2>/dev/null @@ -53,24 +54,25 @@ if [ $NUM_CLUSTERS == 0 ]; then exit 0 fi -# Clustering step 2 +# Filter out duplicates +sort -uo $ALL/$CLUSTER_TSV $ALL/$CLUSTER_TSV + echo -echo "Merging clusters..." +echo "Finding title clusters..." time java -Dxl.secret.properties=$HOME/secret.properties-$ENV -Dclusters=$ALL/$CLUSTER_TSV -jar $JAR_FILE \ - $ARGS --report $MERGED/$WHELKTOOL_REPORT $SCRIPTS_DIR/merge-clusters.groovy >$MERGED/$CLUSTER_TSV 2>/dev/null -NUM_CLUSTERS=$(count_lines $MERGED/$CLUSTER_TSV) -echo "Merged into $NUM_CLUSTERS clusters" + $ARGS --report $TITLES/$WHELKTOOL_REPORT $SCRIPTS_DIR/title-clusters.groovy >$TITLES/$CLUSTER_TSV 2>/dev/null +NUM_CLUSTERS=$(count_lines $TITLES/$CLUSTER_TSV) +echo "$NUM_CLUSTERS title clusters found" if [ $NUM_CLUSTERS == 0 ]; then exit 0 fi -# Clustering step 3 echo -echo "Finding title clusters..." -time java -Dxl.secret.properties=$HOME/secret.properties-$ENV -Dclusters=$MERGED/$CLUSTER_TSV -jar $JAR_FILE \ - $ARGS --report $TITLES/$WHELKTOOL_REPORT $SCRIPTS_DIR/title-clusters.groovy >$TITLES/$CLUSTER_TSV 2>/dev/null -NUM_CLUSTERS=$(count_lines $TITLES/$CLUSTER_TSV) -echo "$NUM_CLUSTERS title clusters found" +echo "Merging clusters..." +time java -Dxl.secret.properties=$HOME/secret.properties-$ENV -Dclusters=$TITLES/$CLUSTER_TSV -jar $JAR_FILE \ + $ARGS --report $MERGED/$WHELKTOOL_REPORT $SCRIPTS_DIR/merge-clusters.groovy >$MERGED/$CLUSTER_TSV 2>/dev/null +NUM_CLUSTERS=$(count_lines $MERGED/$CLUSTER_TSV) +echo "Merged into $NUM_CLUSTERS clusters" if [ $NUM_CLUSTERS == 0 ]; then exit 0 fi @@ -78,7 +80,7 @@ fi # Filter: Swedish fiction echo echo "Filtering on Swedish fiction..." -time java -Dxl.secret.properties=$HOME/secret.properties-$ENV -Dclusters=$TITLES/$CLUSTER_TSV -jar $JAR_FILE \ +time java -Dxl.secret.properties=$HOME/secret.properties-$ENV -Dclusters=$MERGED/$CLUSTER_TSV -jar $JAR_FILE \ $ARGS --report $SWEDISH_FICTION/$WHELKTOOL_REPORT $SCRIPTS_DIR/swedish-fiction.groovy >$SWEDISH_FICTION/$CLUSTER_TSV 2>/dev/null NUM_CLUSTERS=$(count_lines $SWEDISH_FICTION/$CLUSTER_TSV) echo "Found $NUM_CLUSTERS title clusters with Swedish fiction" @@ -94,10 +96,10 @@ time java -Dxl.secret.properties=$HOME/secret.properties-$ENV -Dclusters=$SWEDIS echo "$(count_lines $LANGUAGE_IN_TITLE/MODIFIED.txt) records affected, report in $LANGUAGE_IN_TITLE" echo -echo "Specifying designer roles in Elib records..." # NOTE: Not dependent on clustering, can be run anytime after ContributionByRoleStep has been deployed. -time java -Dxl.secret.properties=$HOME/secret.properties-$ENV -jar $JAR_FILE \ - $ARGS --report $ELIB_DESIGNERS $SCRIPTS_DIR/elib-unspecified-contributor.groovy 2>/dev/null -echo "$(count_lines $ELIB_DESIGNERS/MODIFIED.txt) records affected, report in $ELIB_DESIGNERS" +echo "Moving illustrativeContent to instance..." +time java -Dxl.secret.properties=$HOME/secret.properties-$ENV -Dclusters=$SWEDISH_FICTION/$CLUSTER_TSV -jar $JAR_FILE \ + $ARGS --report $ILL_CONTENT $SCRIPTS_DIR/lxl-4221-move-illustrativecontent-to-instance.groovy 2>/dev/null +echo "$(count_lines $ILL_CONTENT/MODIFIED.txt) records affected, report in $ILL_CONTENT" echo echo "Merging contribution objects with same agent..." diff --git a/librisworks/scripts/add-missing-contribution-data.groovy b/librisworks/scripts/add-missing-contribution-data.groovy index c8ec1384b7..e9f87032cd 100644 --- a/librisworks/scripts/add-missing-contribution-data.groovy +++ b/librisworks/scripts/add-missing-contribution-data.groovy @@ -1,5 +1,6 @@ import groovy.transform.Memoized import org.apache.commons.lang3.StringUtils + import whelk.Document import java.util.concurrent.ConcurrentHashMap @@ -20,6 +21,9 @@ linkedFoundInCluster.println(['id', 'matched agent', 'agent occurs in (examples) roleAddedFromRespStatement = getReportWriter("role-added-from-respStatement.tsv") roleAddedFromRespStatement.println(['id', 'agent name', 'added roles', 'resp statement'].join('\t')) +lifeSpanFoundInCluster = getReportWriter("life-span-found-in-cluster.tsv") +lifeSpanFoundInCluster.println(['id', 'agent name', 'lifeSpan', 'agent occurs with lifeSpan in (examples)'].join('\t')) + respStatementLinkedAgentFoundInCluster = getReportWriter("respStatement-linked-agent-found-in-cluster.tsv") respStatementLinkedAgentFoundInCluster.println(['id', 'agent name', 'matched agent', 'resp statement roles', 'agent occurs in (examples)', 'resp statement'].join('\t')) @@ -37,15 +41,13 @@ titleMovedToTranslationOf = getReportWriter("title-moved-to-translationOf.tsv") originalWorkFoundInCluster = getReportWriter("original-work-found-in-cluster.tsv") originalWorkFoundInCluster.println(['id', 'added translationOf', 'translationOf occurs in (examples)'].join('\t')) -illVsTrl = getReportWriter("ill-vs-trl.tsv") -illVsTrl.println(['id', 'removed/replaced role', 'agent name', 'resp statement'].join('\t')) - def clusters = new File(System.getProperty('clusters')).collect { it.split('\t').collect { it.trim() } } idToCluster = initIdToCluster(clusters) nameToAgents = new ConcurrentHashMap() agentToRolesToIds = new ConcurrentHashMap>() -agentToLifeSpan = new ConcurrentHashMap() +linkedAgentToLifeSpan = new ConcurrentHashMap() +localAgentToLifeSpansToIds = new ConcurrentHashMap>() idToTranslationOf = new ConcurrentHashMap() // Populate maps @@ -59,8 +61,13 @@ selectByIds(clusters.flatten()) { bib -> asList(c.agent).each { Map agent -> def agentStr = toString(agent) def loadedAgent = loadIfLink(agent) - if (agent.containsKey(ID_KEY)) { - agentToLifeSpan.computeIfAbsent(agentStr, f -> lifeSpan(loadedAgent)) + if (loadedAgent.lifeSpan) { + if (agent.containsKey(ID_KEY)) { + linkedAgentToLifeSpan.computeIfAbsent(agentStr, f -> lifeSpan(loadedAgent)) + } else { + def lifeSpansToIds = localAgentToLifeSpansToIds.computeIfAbsent(agentStr, f -> new ConcurrentHashMap()) + lifeSpansToIds.computeIfAbsent(agent.lifeSpan, f -> new ConcurrentHashMap().newKeySet()).add(id) + } } ([loadedAgent] + asList(loadedAgent.hasVariant)).each { a -> String agentName = name(a) @@ -125,6 +132,7 @@ selectByIds(clusters.flatten()) { bib -> modified |= tryLinkAgent(c, id) // if there are more roles stated in responsibilityStatement other than the existing ones in this contribution, add those modified |= tryAddRolesFromRespStatement(c, contributionsInRespStatement, respStatement, id) + modified |= tryAddLifeSpanToLocalAgent(c, id) } // drop "implicit authors", e.g. Astrid Lindgren in "Astrid Lindgren ; illustrerad av Ilon Wikland" (likely to already exist) @@ -202,7 +210,7 @@ boolean tryLinkAgent(Map contribution, String id) { if (!names) return // get linked agents with matching name def matchingLinkedAgents = nameToAgents.subMap(names).values().flatten().toSet().findAll { a -> - looksLikeIri(a) && !yearMismatch(lifeSpan(agent), agentToLifeSpan[a]) + looksLikeIri(a) && !yearMismatch(lifeSpan(agent), linkedAgentToLifeSpan[a]) } for (agentIri in matchingLinkedAgents) { // roles that the linked agent appears as and in which records respectively @@ -260,15 +268,6 @@ boolean tryAddRolesFromRespStatement(Map contribution, Map contributionsInRespSt } def modified = false - - def incorrectIllOrTrl = findIncorrectIllVsTrl(currentRoles, rolesOfInterest) - if (incorrectIllOrTrl) { - currentRoles.remove(toIdMap(incorrectIllOrTrl)) - contribution['role'] = currentRoles - roleToIds[toIdMap(incorrectIllOrTrl)].remove(id) - illVsTrl.println([id, roleShort(incorrectIllOrTrl), name, respStatement].join('\t')) - modified = true - } def newRoles = rolesOfInterest - currentRoles if (newRoles) { // add new roles (replace existing unspecifiedContributor) @@ -287,6 +286,28 @@ boolean tryAddRolesFromRespStatement(Map contribution, Map contributionsInRespSt return modified } +boolean tryAddLifeSpanToLocalAgent(Map contribution, String id) { + def agent = asList(contribution.agent).find() + if (agent instanceof Map && !agent[ID_KEY] && !agent.lifeSpan) { + def names = agentToNames[toString(agent)] + if (!names) return + def matchingLocalAgentsWithLifeSpan = nameToAgents.subMap(names).values().flatten().toSet().findAll { a -> + !looksLikeIri(a) && localAgentToLifeSpansToIds[a] + } + for (localAgent in matchingLocalAgentsWithLifeSpan) { + def lifeSpanToIds = localAgentToLifeSpansToIds[localAgent] + def lifeSpanInCluster = lifeSpanToIds.find { _, ids -> idToCluster[id].intersect(ids) }?.key + if (lifeSpanInCluster) { + agent['lifeSpan'] = lifeSpanInCluster + def examples = idToCluster[id].intersect(lifeSpanToIds[lifeSpanInCluster]).take(3) + lifeSpanFoundInCluster.println([id, name(agent), lifeSpanInCluster, examples].join('\t')) + return true + } + } + } + return false +} + boolean tryAddLinkedAgentContributionsFromRespStatement(List contribution, Map contributionsInRespStatement, String respStatement, String id) { if (contributionsInRespStatement.isEmpty()) return false @@ -431,13 +452,6 @@ boolean tryAddRole(Map contribution, String id) { || r == toIdMap(Relator.PRIMARY_RIGHTS_HOLDER.iri) || (r in adapterEditor && currentRoles.intersect(adapterEditor))) }.collect { it.key } - - def illAndTrl = [toIdMap(Relator.TRANSLATOR.iri), toIdMap(Relator.ILLUSTRATOR.iri)] - - if ((currentRoles + rolesInCluster).containsAll(illAndTrl)) { - rolesInCluster -= illAndTrl - } - def newRoles = rolesInCluster - currentRoles if (newRoles) { contribution['role'] = noRole(currentRoles) ? newRoles : currentRoles + newRoles @@ -647,17 +661,6 @@ static List nameParts(String s) { s.split(' ').findAll() } -static String findIncorrectIllVsTrl(List currentRoles, List rolesInRespStatement) { - if ((currentRoles + rolesInRespStatement)[ID_KEY].containsAll([Relator.ILLUSTRATOR.iri, Relator.TRANSLATOR.iri])) { - if (!rolesInRespStatement[ID_KEY].contains(Relator.ILLUSTRATOR.iri)) { - return Relator.ILLUSTRATOR.iri - } - if (!rolesInRespStatement[ID_KEY].contains(Relator.TRANSLATOR.iri)) { - return Relator.TRANSLATOR.iri - } - } -} - def toIdMap(String iri) { [(ID_KEY): iri] } \ No newline at end of file diff --git a/librisworks/scripts/contributions-to-instance.groovy b/librisworks/scripts/contributions-to-instance.groovy index b114ee1036..02c458640f 100644 --- a/librisworks/scripts/contributions-to-instance.groovy +++ b/librisworks/scripts/contributions-to-instance.groovy @@ -12,12 +12,13 @@ report = getReportWriter('report.tsv') def clusters = new File(System.getProperty('clusters')).collect { it.split('\t').collect { it.trim() } } def whelk = getWhelk() -def instanceRolesByDomain = whelk.resourceCache.relators.findResults { +def instanceRolesByDomain = whelk.resourceCache.relatorResources.relators.findResults { if (it.domain) { def domain = whelk.jsonld.toTermKey(it.domain[ID_KEY]) if (whelk.jsonld.isSubClassOf(domain, 'Embodiment')) it.subMap([ID_KEY]) } } + def instanceRoles = instanceRolesByDomain + [Relator.ILLUSTRATOR, Relator.AUTHOR_OF_INTRO, Relator.AUTHOR_OF_AFTERWORD].collect { [(ID_KEY): it.iri] } def ill = [(ID_KEY): Relator.ILLUSTRATOR.iri] @@ -77,6 +78,10 @@ selectByIds(clusters.flatten()) { bib -> if (id in keepIllustratorOnWorkForIds[illustrator]) { toInstance.remove(ill) } + def pu = asList(contribution.role).find { it == [(ID_KEY): Relator.PRIMARY_RIGHTS_HOLDER.iri] } + if (pu) { + toInstance.add(pu) + } } if (toInstance) { instance['contribution'] = asList(instance['contribution']) + c.clone().tap { it['role'] = toInstance } @@ -103,14 +108,6 @@ boolean isPrimaryContribution(Map contribution) { contribution[TYPE_KEY] == 'PrimaryContribution' } -//boolean has9pu(Map contribution) { -// asList(contribution.role).contains([(ID_KEY): Relator.PRIMARY_RIGHTS_HOLDER.iri]) -//} -// -//boolean isStillImage(Map work) { -// asList(work.contentType).contains([(ID_KEY): 'https://id.kb.se/term/rda/StillImage']) -//} - boolean isPictureBook(Map work) { def picBookTerms = [ 'https://id.kb.se/term/barngf/Bilderb%C3%B6cker', diff --git a/librisworks/scripts/find-work-clusters.groovy b/librisworks/scripts/find-work-clusters.groovy index 90bcfaa446..b69a4b0ffb 100644 --- a/librisworks/scripts/find-work-clusters.groovy +++ b/librisworks/scripts/find-work-clusters.groovy @@ -2,54 +2,32 @@ * (When running, redirect STDERR to avoid annoying prints from whelktool) */ -import java.util.concurrent.ConcurrentHashMap - PrintWriter failedQueries = getReportWriter("failed-queries") PrintWriter tooLargeResult = getReportWriter("too-large-result") -//def yesterday = new SimpleDateFormat('yyyy-MM-dd').with { sdf -> -// Calendar.getInstance().with { c -> -// c.add(Calendar.DATE, -1) -// sdf.format(c.getTime()) -// } -//} - -//def where = """ -// collection = '%s' -// AND (modified::date = '$yesterday' -// OR (data#>>'{@graph,0,generationDate}')::date = '$yesterday') -//""" - -visited = Collections.newSetFromMap(new ConcurrentHashMap()) // TODO: remove? -//instancesOfUpdatedLinkedWorks = Collections.synchronizedSet([] as Set) -// -//selectBySqlWhere(String.format(where, 'auth')) { -// def thing = it.graph[1] -// if (Normalizers.isInstanceOf(it.whelk.jsonld, thing, 'Work')) { -// selectBySqlWhere("collection = 'bib' and data#>>'{@graph,1,instanceOf,@id}' = '${thing['@id']}'") { -// instancesOfUpdatedLinkedWorks.add(it.doc.shortId) -// } -// } -//} - def process = { bib -> - if (!visited.add(bib.doc.shortId)) - return - try { - def q = buildQuery(bib) - if (!q) { - return - } + def instance = bib.graph[1] + def work = loadIfLink(instance.instanceOf) - List ids = queryIds(q).collect() + if (!work) return - if (ids.size() > 200) { - tooLargeResult.println("Results: ${ids.size()} Query: ${q}") + def titles = [instance, work].grep().collect { title(it) }.grep().unique() + + Set ids = [] + + titles.each { + def q = buildQuery(work, it) + if (!q) { + return + } + ids.addAll(queryIds(q)) } - else if (ids.size() > 1) { - visited.addAll(ids) - println(ids.join('\t')) + + if (ids.size() > 1000) { + tooLargeResult.println("Results: ${ids.size()} Id: ${bib.doc.shortId} Titles: ${titles}") + } else if (ids.size() > 1) { + println(ids.sort().join('\t')) } } catch (Exception e) { @@ -59,49 +37,39 @@ def process = { bib -> } } -//selectByIds(instancesOfUpdatedLinkedWorks) { -// process(it) -//} - -// TODO: Change when starting to run regularly -//selectBySqlWhere(String.format(where, 'bib')) { selectByCollection('bib') { process(it) } -Map> buildQuery(bib) { - def title = title(bib) - - if (!title) - return null - +Map> buildQuery(Map work, String title) { Map> query = [ "q" : ["*"], "@type" : ["Instance"], "hasTitle.mainTitle": [esSafe(title)], ] - insertLinkedAgents(bib) - def card = bib.asCard(true) + insertLinkedAgents(work) + def card = getWhelk().jsonld.toCard(work, false, true) - def author = primaryContributor(card).collect{ esSafe(it) } + def author = primaryContributor(card).collect { esSafe(it) } if (author) { query["or-instanceOf.contribution._str"] = author query["or-instanceOf.contribution.agent._str"] = author return query } - def allContributors = contributors(card).collect{ esSafe(it) } + def allContributors = contributors(card).collect { esSafe(it) } if (allContributors) { query["or-instanceOf.contribution._str"] = allContributors query["or-instanceOf.contribution.agent._str"] = allContributors return query } + return null } -private void insertLinkedAgents(bib) { - getPathSafe(bib.doc.data, ['@graph', 1, 'instanceOf', 'contribution']).each { +private void insertLinkedAgents(work) { + asList(work['contribution']).each { def agent = asList(it.agent).find() if (agent && agent['@id']) { it.agent = loadThing(agent['@id']) @@ -109,21 +77,21 @@ private void insertLinkedAgents(bib) { } } -private String title(bib) { - return getPathSafe(bib.doc.data, ['@graph', 1, 'hasTitle', 0, 'mainTitle']) +private String title(Map thing) { + return getAtPath(thing, ['hasTitle', 0, 'mainTitle']) } -private List primaryContributor(bib) { - contributorStrings(getPathSafe(bib, ['@graph', 1, 'instanceOf', 'contribution'], []).find { it['@type'] == "PrimaryContribution" }) +private List primaryContributor(work) { + contributorStrings(asList(work['contribution']).find { it['@type'] == "PrimaryContribution" }) } -private List contributors(bib) { - getPathSafe(bib, ['@graph', 1, 'instanceOf', 'contribution'], []).collect { contributorStrings(it) }.grep().flatten() +private List contributors(work) { + asList(work['contribution']).collect { contributorStrings(it) }.grep().flatten() } -//getPathSafe(contribution, ['_str'])?.with { String s -> s.replaceAll(/[^ \p{IsAlphabetic}]/, '') } +//getAtPath(contribution, ['_str'])?.with { String s -> s.replaceAll(/[^ \p{IsAlphabetic}]/, '') } private List contributorStrings(contribution) { - List variants = asList(contribution?.agent) + asList(getPathSafe(contribution, ['agent', 'hasVariant'])) + List variants = asList(contribution?.agent) + asList(getAtPath(contribution, ['agent', 'hasVariant'])) variants.grep().collect { name(it) }.grep() } @@ -139,19 +107,8 @@ private String esSafe(String s) { s.replaceAll('[+|"\\-*~]', " ") } -private Object getPathSafe(item, path, defaultTo = null) { - if (!item) { - return defaultTo - } - - for (p in path) { - if (item[p] != null) { - item = item[p] - } else { - return defaultTo - } - } - return item +private loadIfLink(Map work) { + work?['@id'] ? loadThing(work['@id']) : work } private Map loadThing(def id) { @@ -160,8 +117,4 @@ private Map loadThing(def id) { thing = t.graph[1] } return thing -} - -private static List asList(Object o) { - (o ?: []).with { it instanceof List ? it : [it] } } \ No newline at end of file diff --git a/librisworks/scripts/lxl-4150-deduplicate-contribution.groovy b/librisworks/scripts/lxl-4150-deduplicate-contribution.groovy index daff28bdee..8174b3d5d5 100644 --- a/librisworks/scripts/lxl-4150-deduplicate-contribution.groovy +++ b/librisworks/scripts/lxl-4150-deduplicate-contribution.groovy @@ -14,19 +14,16 @@ selectByIds(ids) { bib -> ? primaryContributionIdx : contribution.findIndexOf { asList(it.agent) == d } def mergeInto = contribution[mergeIntoIdx] - def roles = [] + def roles = contribution.findResults { asList(it.agent) == d ? asList(it.role) : null }.flatten().unique() + if (roles) mergeInto['role'] = roles + + def idx = 0 contribution.removeAll { - if (asList(it.agent) == d) { - roles += asList(it.role) - return true - } - return false + def removeIf = asList(it.agent) == d && idx != mergeIntoIdx + idx += 1 + return removeIf } - - if (roles) mergeInto['role'] = roles.unique() - - contribution.add(mergeIntoIdx, mergeInto) } if (duplicates) { diff --git a/librisworks/scripts/lxl-4221-move-illustrativecontent-to-instance.groovy b/librisworks/scripts/lxl-4221-move-illustrativecontent-to-instance.groovy new file mode 100644 index 0000000000..a44ea82fea --- /dev/null +++ b/librisworks/scripts/lxl-4221-move-illustrativecontent-to-instance.groovy @@ -0,0 +1,19 @@ +def ids = new File(System.getProperty('clusters')) + .readLines() + .collect { it.split('\t').collect { it.trim()} } + .flatten() + +selectByIds(ids) { + def instance = it.graph[1] + def work = instance.instanceOf + + if (!work || work['@id']) return + + def illContent = work.remove('illustrativeContent') + + if (illContent) { + instance['illustrativeContent'] = (asList(instance['illustrativeContent']) + asList(illContent)).unique() + + it.scheduleSave() + } +} \ No newline at end of file diff --git a/librisworks/scripts/merge-works.groovy b/librisworks/scripts/merge-works.groovy index 5dc5842083..e9e4a8b5cd 100644 --- a/librisworks/scripts/merge-works.groovy +++ b/librisworks/scripts/merge-works.groovy @@ -53,23 +53,26 @@ new File(System.getProperty('clusters')).splitEachLine(~/[\t ]+/) { cluster -> List linkableWorkIris = uniqueWorksAndTheirInstances.findResults { it.getV1().workIri() } uniqueWorksAndTheirInstances.each { Doc workDoc, List instanceDocs -> - if (!workDoc.instanceData) { - if (workDoc.existsInStorage) { - if (instanceDocs) { - replaceWorkData(workDoc, c.merge([workDoc] + instanceDocs)) - // TODO: Update adminMetadata? To say that additional instances may have contributed to the linked work. - writeWorkReport(docs, workDoc, instanceDocs, WorkStatus.UPDATED) - } - } else { - addAdminMetadata(workDoc, instanceDocs.collect { ['@id': it.recordIri()] }) - writeWorkReport(docs, workDoc, instanceDocs, WorkStatus.NEW) - } + // Link more instances to existing linked work + if (workDoc.existsInStorage && !workDoc.instanceData && instanceDocs) { + replaceWorkData(workDoc, c.merge([workDoc] + instanceDocs)) + // TODO: Update adminMetadata? To say that additional instances may have contributed to the linked work. addCloseMatch(workDoc, linkableWorkIris) saveAndLink(workDoc, instanceDocs, workDoc.existsInStorage) - } else { - if (addCloseMatch(workDoc, linkableWorkIris)) { - saveAndLink(workDoc) - } + writeWorkReport(docs, workDoc, instanceDocs, WorkStatus.UPDATED) + return + } + // New merged work + if (!workDoc.existsInStorage && !workDoc.instanceData) { + addAdminMetadata(workDoc, instanceDocs.collect { ['@id': it.recordIri()] }) + addCloseMatch(workDoc, linkableWorkIris) + saveAndLink(workDoc, instanceDocs, workDoc.existsInStorage) + writeWorkReport(docs, workDoc, instanceDocs, WorkStatus.NEW) + return + } + // Local work, save if new closeMatch links created + if (workDoc.instanceData && addCloseMatch(workDoc, linkableWorkIris)) { + saveAndLink(workDoc) } } @@ -96,9 +99,11 @@ void saveAndLink(Doc workDoc, Collection instanceDocs = [], boolean existsI } } - selectByIds(instanceDocs.collect { it.shortId() }) { - it.graph[1]['instanceOf'] = ['@id': workDoc.thingIri()] - it.scheduleSave(changedBy: changedBy, generationProcess: generationProcess) + if (!instanceDocs.isEmpty()) { + selectByIds(instanceDocs.collect { it.shortId() }) { + it.graph[1]['instanceOf'] = ['@id': workDoc.thingIri()] + it.scheduleSave(changedBy: changedBy, generationProcess: generationProcess) + } } } diff --git a/librisworks/scripts/title-clusters.groovy b/librisworks/scripts/title-clusters.groovy index 38bf8ab226..28e6a0eab8 100644 --- a/librisworks/scripts/title-clusters.groovy +++ b/librisworks/scripts/title-clusters.groovy @@ -22,7 +22,9 @@ Collection> titleClusters(Collection docs) { static Collection> partitionByTitle(Collection docs) { return partition(docs) { Doc a, Doc b -> - !a.flatInstanceTitle().intersect(b.flatInstanceTitle()).isEmpty() + def aTitles = a.flatInstanceTitle() + a.flatWorkTitle() + def bTitles = b.flatInstanceTitle() + b.flatWorkTitle() + !aTitles.intersect(bTitles).isEmpty() } } diff --git a/librisworks/src/main/groovy/se/kb/libris/mergeworks/DisplayDoc.groovy b/librisworks/src/main/groovy/se/kb/libris/mergeworks/DisplayDoc.groovy index 5de250c66b..3fed6e1987 100644 --- a/librisworks/src/main/groovy/se/kb/libris/mergeworks/DisplayDoc.groovy +++ b/librisworks/src/main/groovy/se/kb/libris/mergeworks/DisplayDoc.groovy @@ -2,6 +2,7 @@ package se.kb.libris.mergeworks import whelk.Document import whelk.JsonLd +import whelk.util.DocumentUtil class DisplayDoc { Doc doc @@ -75,7 +76,7 @@ class DisplayDoc { private List contributorStrings() { List path = doc.instanceData ? ['instanceOf', 'contribution'] : ['contribution'] - List contribution = Util.getPathSafe(getFramed(), path, []) + List contribution = DocumentUtil.getAtPath(getFramed(), path, []) return contribution.collect { Map c -> contributionStr(c) @@ -101,7 +102,7 @@ class DisplayDoc { List classificationStrings() { List path = doc.instanceData ? ['instanceOf', 'classification'] : ['classification'] - List classification = Util.getPathSafe(getFramed(), path, []) + List classification = DocumentUtil.getAtPath(getFramed(), path, []) classification.collect { c -> StringBuilder s = new StringBuilder() @@ -146,13 +147,9 @@ class DisplayDoc { Map getFramed() { if (!framed) { - if (doc.existsInStorage) { - framed = JsonLd.frame(doc.thingIri(), doc.whelk.loadEmbellished(doc.shortId()).data) - } else { - Document copy = doc.document.clone() - doc.whelk.embellish(copy) - framed = JsonLd.frame(doc.thingIri(), copy.data) - } + Document copy = doc.document.clone() + doc.whelk.embellish(copy) + framed = JsonLd.frame(doc.thingIri(), copy.data) } return framed diff --git a/librisworks/src/main/groovy/se/kb/libris/mergeworks/Doc.groovy b/librisworks/src/main/groovy/se/kb/libris/mergeworks/Doc.groovy index 8118d34a87..e6e45c8896 100644 --- a/librisworks/src/main/groovy/se/kb/libris/mergeworks/Doc.groovy +++ b/librisworks/src/main/groovy/se/kb/libris/mergeworks/Doc.groovy @@ -4,6 +4,7 @@ import whelk.Document import whelk.JsonLd import whelk.Whelk import whelk.datatool.DocumentItem +import whelk.util.DocumentUtil import static Util.asList import static Util.Relator @@ -40,6 +41,7 @@ class Doc { Map workData List flatInstanceTitle + List flatWorkTitle DisplayDoc display @@ -66,7 +68,7 @@ class Doc { void setData() { if (mainEntity()['instanceOf']) { instanceData = mainEntity() - workData = instanceData['instanceOf'] + workData = asList(instanceData['instanceOf']).find() } else { workData = mainEntity() } @@ -112,6 +114,14 @@ class Doc { asList(workData['hasTitle']) } + List flatWorkTitle() { + if (!flatWorkTitle) { + flatWorkTitle = Util.getFlatTitle(workTitle()) + } + + return flatWorkTitle + } + List instanceTitle() { asList(instanceData?.hasTitle) } @@ -181,7 +191,7 @@ class Doc { } int numPages() { - String extent = Util.getPathSafe(extent(), [0, 'label', 0]) ?: Util.getPathSafe(extent(), [0, 'label'], '') + String extent = DocumentUtil.getAtPath(extent(), [0, 'label', 0]) ?: DocumentUtil.getAtPath(extent(), [0, 'label'], '') return numPages(extent) } @@ -215,7 +225,7 @@ class Doc { boolean isMaybeAggregate() { hasPart() || classification().any { it.inScheme?.code =~ /[Kk]ssb/ && it.code?.contains('(s)') } - || !contribution().any { it['@type'] == 'PrimaryContribution' } + || !contribution().any { it['@type'] == 'PrimaryContribution' && it['agent'] } || hasRelationshipWithContribution() } @@ -318,4 +328,9 @@ class Doc { workData.remove('_editionStatement') workData.remove('_numPages') } + + void sortContribution() { + // PrimaryContribution first + contribution()?.sort {it['@type'] != 'PrimaryContribution' } + } } \ No newline at end of file diff --git a/librisworks/src/main/groovy/se/kb/libris/mergeworks/Util.groovy b/librisworks/src/main/groovy/se/kb/libris/mergeworks/Util.groovy index 1f9149af5e..e67ba0dd66 100644 --- a/librisworks/src/main/groovy/se/kb/libris/mergeworks/Util.groovy +++ b/librisworks/src/main/groovy/se/kb/libris/mergeworks/Util.groovy @@ -8,10 +8,7 @@ import whelk.util.Unicode import static se.kb.libris.mergeworks.compare.IntendedAudience.preferredComparisonOrder class Util { - static def titleComponents = ['mainTitle', 'titleRemainder', 'subtitle', 'hasPart', 'partNumber', 'partName', 'marc:parallelTitle', 'marc:equalTitle'] - - static def titleVariant = ['Title', 'ParallelTitle'] - // removed 'VariantTitle', 'CoverTitle' since they sometimes contain random generic stuff like "Alibis filmroman", "Kompisböcker för de yngsta" + static def titleComponents = ['mainTitle', 'titleRemainder', 'subtitle', 'hasPart', 'partNumber', 'partName'] static enum Relator { TRANSLATOR('https://id.kb.se/relator/translator'), @@ -36,6 +33,9 @@ class Util { } } + static def noise = + [",", '"', "'", "ʹ", "ʼ", '[', ']', ',', '.', '.', ':', ';', '-', '(', ')', ' the ', '-', '–', '+', '!', '?',].collectEntries { [it, ' '] } + private static Set IGNORED_SUBTITLES = Util.class.getClassLoader() .getResourceAsStream('merge-works/ignored-subtitles.txt') .readLines().grep().collect(Util.&normalize) as Set @@ -44,9 +44,6 @@ class Util { .getResourceAsStream('merge-works/generic-titles.txt') .readLines().grep().collect(Util.&normalize) as Set - static def noise = - [",", '"', "'", "ʹ", "ʼ", '[', ']', ',', '.', '.', ':', ';', '-', '(', ')', ' the ', '-', '–', '+', '!', '?',].collectEntries { [it, ' '] } - static List asList(Object o) { (o ?: []).with { it instanceof List ? it : [it] } @@ -94,6 +91,8 @@ class Util { if (genericSubtitle(value)) { new DocumentUtil.Remove() } else { + // Remove substring after colon if identified as generic + // Example: "klanen Kennedy : roman" -> "klanen Kennedy" ((List) value.split(':')).with { if (it.size() > 1 && genericSubtitle(it.last().trim())) { new DocumentUtil.Replace(value.replaceFirst(~/\s*:.+$/, '')) @@ -131,17 +130,6 @@ class Util { return Unicode.removeDiacritics(Unicode.normalizeForSearch(StringUtils.normalizeSpace(" $s ".toLowerCase().replace(noise)))) } - static Object getPathSafe(item, path, defaultTo = null) { - for (p in path) { - if ((item instanceof Collection || item instanceof Map) && item[p] != null) { - item = item[p] - } else { - return defaultTo - } - } - return item - } - static List getFlatTitle(List hasTitle) { flatTitles(hasTitle) .grep(isTitle) @@ -186,20 +174,17 @@ class Util { null ] - static Map appendTitlePartsToMainTitle(Map title, String partNumber, String partName = null) { + static void appendTitlePartsToMainTitle(Map title, String partNumber, String partName = null) { + if (title['mainTitle'][-1] != '.') { + title['mainTitle'] += '.' + } if (partNumber && partName) { - title['mainTitle'] += ". $partNumber, $partName" + title['mainTitle'] += " $partNumber, $partName" } else if (partNumber) { - title['mainTitle'] += ". $partNumber" + title['mainTitle'] += " $partNumber" } else if (partName) { - title['mainTitle'] += ". $partName" + title['mainTitle'] += " $partName" } - - title.remove('partNumber') - title.remove('partName') - title.remove('hasPart') - - return title } static String findTitlePart(List title, String prop) { @@ -210,7 +195,6 @@ class Util { // Return the most common title for the best encodingLevel static def bestTitle(Collection docs) { - // TODO: which title to pick when matched with already existing linked work? def linkedWorkTitle = docs.findResult { it.workIri() ? it.workData['hasTitle'] : null } if (linkedWorkTitle) { return linkedWorkTitle @@ -222,11 +206,15 @@ class Util { def partNumber = findTitlePart(bestInstanceTitle, 'partNumber') def partName = findTitlePart(bestInstanceTitle, 'partName') + def workTitleShape = { it.subMap(['@type', 'mainTitle', 'subtitle', 'titleRemainder', 'source']) } + if (bestWorkTitle) { - return bestWorkTitle.collect { appendTitlePartsToMainTitle(it, partNumber) } + return bestWorkTitle.each { appendTitlePartsToMainTitle(it, partNumber) } + .collect(workTitleShape) } - return bestInstanceTitle.collect { appendTitlePartsToMainTitle(it, partNumber, partName) } + return bestInstanceTitle.each { appendTitlePartsToMainTitle(it, partNumber, partName) } + .collect(workTitleShape) } static def mostCommonHighestEncodingLevel(Collection docs, Closure> findMostCommon) { @@ -311,7 +299,14 @@ class Util { }.with { preferredComparisonOrder(it) } def workClusters = partition(docs, { Doc a, Doc b -> c.sameWork(a, b) }) - .each { work -> work.each { doc -> doc.removeComparisonProps() } } + .each { work -> + work.each { doc -> + doc.removeComparisonProps() + // List order may be shuffled when comparing works. + // Make sure PrimaryContribution always comes first in contribution. + doc.sortContribution() + } + } return workClusters } diff --git a/librisworks/src/main/groovy/se/kb/libris/mergeworks/compare/TranslationOf.groovy b/librisworks/src/main/groovy/se/kb/libris/mergeworks/compare/TranslationOf.groovy index 717b528e85..dd0dc578d7 100644 --- a/librisworks/src/main/groovy/se/kb/libris/mergeworks/compare/TranslationOf.groovy +++ b/librisworks/src/main/groovy/se/kb/libris/mergeworks/compare/TranslationOf.groovy @@ -14,7 +14,7 @@ class TranslationOf implements ValuePicker { // We assume that there are never more than one object in translationOf a = Util.asList(a)[0] b = Util.asList(b)[0] - (!a && !b) || (a && b && c.isEqual(noTypeNoTitle(a), noTypeNoTitle(b)) && noTitleOrSameTitle(a, b)) + a && b && c.isEqual(noTypeNoTitle(a), noTypeNoTitle(b)) && noTitleOrSameTitle(a, b) } @Override diff --git a/librisworks/src/main/resources/merge-works/generic-titles.txt b/librisworks/src/main/resources/merge-works/generic-titles.txt index afc73c6ba8..ccd0d3440e 100644 --- a/librisworks/src/main/resources/merge-works/generic-titles.txt +++ b/librisworks/src/main/resources/merge-works/generic-titles.txt @@ -1,15 +1,31 @@ artiklar +C. A. Ehrensvärds skrifter collected plays dagböcker dikter +Dikter i urval dramatik +E. J. Stagnelii samlade skrifter +Elsa Beskows sagor +Erik Axel Karlfeldt +Erik Gustaf Geijers samlade skrifter +Esaias Tegnérs samlade skrifter essäer folksagor folkvisor fragment +Fredmans epistlar +Fredmans sånger +Fria fantasier, hvilka betraktade såsom ett helt, af herr Hugo Löwenstjerna stundom kallades Törnrosens bok stundom En irrande hind +Gluntarne +H. C. Andersens bästa sagor +Johan Ludvig Runebergs efterlemnade skrifter +Johan Ludvig Runebergs samlade arbeten +Johan Ludvig Runebergs samlade skrifter korrespondens krönikor lyrik +Läsning för barn memoarer noveller pjäser @@ -23,12 +39,27 @@ rapport report romaner sagor +Samlade arbeten +Samlade berättelser samlade dikter +Samlade noveller och berättelser samlade pjäser samlade skrifter samlade verk +Samlade vitterhetsarbeten +Samlade vitterhets-arbeten +Shakespeares dramatiska arbeten skrifter skådespel sonetter +Svenska ordspråk +Svenska ordstäv tecknade serier +Tusen och en natt +Valda berättelser +Valda dikter +Valda skrifter +Valda stycken +Valda verk +Visor urval \ No newline at end of file diff --git a/librisworks/src/main/resources/merge-works/ignored-subtitles.txt b/librisworks/src/main/resources/merge-works/ignored-subtitles.txt index 4dea8de2e6..74bfc3f693 100644 --- a/librisworks/src/main/resources/merge-works/ignored-subtitles.txt +++ b/librisworks/src/main/resources/merge-works/ignored-subtitles.txt @@ -1,77 +1,265 @@ a comedy +aforismer a history +äktenskapshistoria +alex king-thriller a novel a play a romance a tale -aforismer +äventyrsberättelse +äventyrsberättelse för ungdom +äventyrsroman +barnberättelse för små och stora berättelse berättelse för barn +berättelse för barn och ungdom berättelse för flickor berättelse för pojkar berättelse för unga flickor +berättelse för ungdom +berättelse från 1700-talets senare del +berättelse från kristi tid +berättelse från nittonde seklet +berättelse från öarna +berättelse från okristen tid +berättelse från sista finska kriget +berättelse från skärgården +berättelse från sörmland +berättelse från västra skärgården +berättelsen om en amerikansk familj berättelser berättelser för barn +berättelser för unga och gamla +berättelser från alla tidehvarf +berättelser och skisser +berättelse ur folklifvet bilderbok +bonderoman +bröderna paine på nya äventyr +bröderna perry +bygdeberättelser +chesapeake shores-roman comédie contos +coq rouge +dalgliesh-deckare +dalziel och pascoe-roman deckare +deckare med ester karlsson med k deckarroman +den tyske tonsättaren adrian leverkühns liv skildrat av en vän +detektivhistoria från antikens rom detektivroman dikt dikter +dikt i tre sånger +diktsamling +dirk pitt-roman +dokumentärroman drama efterlämnade dikter ein coq-rouge-thriller -ein roman eine erzählung +ein roman +elak skolgosses minnen +elva brev från marcus mezentius manilianus om våren år trettio efter kristus erzählung erzählungen -essays +espen arnakkes kommentarer till jantelagen essäer +essayer +essays +ett äktenskaps roman +ett fall för anastasia kamenskaja vid moskvapolisen +ett fall för dr siri ett fall för kay scarpetta +ett fall för kommissarie brunetti +ett fall för kommissarie çetin ikmen +ett fall för kommissarie santos +ett fall med dunder brak +ett nytt fall för maria wern spänningsroman +familjeroman +familjs förfall +fantasyroman +femton böcker ur den egyptiske läkaren sinuhes liv omkr 1390-1335 f kr +folklivsberättelse fortælling +framtidsroman +göran persson-deckare +hägringar från reformationstiden +hans ungdoms öden och äventyr i många länder intill år 1527 sanningsenligt framställda av honom själv i tio böcker +harry bosch-deckare +harry hole-thriller +heroisk berättelse +historia från idyllens och revolutionernas tidehvarf +historia om rätt och orätt +historier +historisk äventyrsroman +historisk berättelse +historisk homan-deckare +historisk kriminalroman historisk roman +historisk romantisk skildring +historiskt-romantiska skildringar från snapphanefejden +homan-bok homandeckare +homan-deckare i 1700-talsmiljö +humoresker +humoristisk berättelse +indisk berättelse jack reacher-thriller +jakt- fiske- och bygdehistorier +kärlekshistoria +kärleksroman +kåserier +kent mortland-thriller komedi komedi i fyra akter +kommissarie gamache-deckare +konrad sejer-deckare krimi kriminalroman -kärlekshistoria -kärleksroman -kåserier +kriminalroman från sandhamn +kustroman +lagerlöfs homeros erland lagerlöfs klassiska översättning +läkarroman lustspel i en akt +människa ur det förgångna rannsakad och hörd om sina levnadsomständigheter roman +morden i sandhamn +nathalie svensson- och johan axberg-deckare nouvelles novela novell novelle noveller +novelletter +novellsamling +novell ur spelreglerna +nutidsroman +ny kriminalroman om hammarbypolisen +öfvers +originalberättelse +originalroman +pennritning pjäs +poem +poėma +poesi polisroman +politisk thriller povesti powieść -poėma +prosa +prosadikter +psykologisk thriller +rammakaren theodor marklunds egen redogörelse +religiöst poem reseguide resehandbok +revy om människan i tid och rum rikosromaani +robert krüger-deckare +roland hassel-thriller romaani romaani rikoksesta roman -roman om ett brott -roman om skivvärlden romanas romance +romanen om Elling +roman från 2000-talet +roman från värend 1650 +roman från värend på 1790-talet +roman i femtiotre tablåer +roman i två delar +roman om baltutlämningen +roman om det närvarande +roman om ett brott +roman om frihet +roman om skivvärlden +romans +romantisk berättelse romanzo +romaunt i tolv böcker rövarroman runoja saga +saga på vers med rim som barnen får hitta på alldeles själva +saga på vers med rim som barnen får hitta på alldeles själva bilderbok +saga på vers med rim som barnen får hitta på själva sagor +sällsam historia +samling berättelser +sång +sånger sann historia -skildringar +sannsaga +science fiction +serieroman +själavårdsbok +själs utvecklingshistoria +självbiografisk berättelse +sjöroman +skaldestycke skáldsaga +skälmroman +skärgårdsberättelse +sketch +skildring +skildringar +skildringar från attentatens och jubelfesternas tidehvarf +skildringar från attentatens och jubelfesternas tidevarv +skildringar ur artist- och författarelivet +skildringar ur artist- och författarlifvet +skildringar ur artist- och författarlivet +skildringar ur stockholmslifvet +skildring från franska revolutionen +skisser +skizz +skolflickshistoria +släkten lackland +släktsaga +småstadskrönika +soldatfamiljs historia +sommarbok för stora och små spänningsroman +spänningsroman med anna pigeon +spänningsroman med erik winter +spionroman +spökhistoria +sprakfåle-bok +standardupplaga +sten wall-deckare +stockholmsroman stories +svensk originalberättelse +svensk originalroman +svensk sjömans äventyr +svenskt original +svenskt original för stockholms-tidningen +sv orig +tavla ur livet +teckningar +teckningar med text +teckningar ur hvardagslifvet +teckningar ur vardagslivet +teckning ur det verkliga lifvet +teckning ur det verkliga livet thriller +tio böcker om hans jordiska liv omkring 520-450 +tom grip-thriller +tommy flisen-deckare +tvillingdetektiverna +tvillingdetektiverna på nya äventyr +ungdomsbok ungdomsroman -(Efterlämnade dikter.) +ur Dan Henrys minnen +ur en stockholms-detektivs minnen +ur Kalle Svenssons dagbok +ur numas arkiv +värmlandsberättelse +växelringning på ett gammalt tema i två korta anslag och två hela långringningar +vildmarksroman +virgin river-roman +wexforddeckare +will trent-deckare diff --git a/whelktool/globalchanges-1.33.sh b/whelktool/globalchanges-1.33.sh index c9f0d8025b..5d1011e1d2 100644 --- a/whelktool/globalchanges-1.33.sh +++ b/whelktool/globalchanges-1.33.sh @@ -1,4 +1,6 @@ #!/bin/bash set -euxo pipefail time java -Dxl.secret.properties=$HOME/secret.properties-$ENV -jar build/libs/whelktool.jar --report reports/$ENV-$(date +%Y%m%d-%H%M%S) --skip-index scripts/2023/08/lxl-4243-move-out-solitary-contentType-from-hasPart.groovy -time java -Dxl.secret.properties=$HOME/secret.properties-$ENV -jar build/libs/whelktool.jar --report reports/$ENV-$(date +%Y%m%d-%H%M%S) --skip-index scripts/cleanups/2023/07/lxl-4221-move-illustrativecontent-to-instance.groovy \ No newline at end of file +time java -Dxl.secret.properties=$HOME/secret.properties-$ENV -jar build/libs/whelktool.jar --report reports/$ENV-$(date +%Y%m%d-%H%M%S) --skip-index scripts/cleanups/2023/05/gf-cleanup.groovy +time java -Dxl.secret.properties=$HOME/secret.properties-$ENV -jar build/libs/whelktool.jar --report reports/$ENV-$(date +%Y%m%d-%H%M%S) --skip-index scripts/cleanups/2020/08/lxl-3294-move-bearer-like-gfs-from-work-to-instance.groovy +time java -Dxl.secret.properties=$HOME/secret.properties-$ENV -jar build/libs/whelktool.jar --report reports/$ENV-$(date +%Y%m%d-%H%M%S) --skip-index scripts/2023/10/elib-unspecified-contributor.groovy diff --git a/librisworks/scripts/elib-unspecified-contributor.groovy b/whelktool/scripts/2023/10/elib-unspecified-contributor.groovy similarity index 100% rename from librisworks/scripts/elib-unspecified-contributor.groovy rename to whelktool/scripts/2023/10/elib-unspecified-contributor.groovy diff --git a/whelktool/scripts/cleanups/2023/07/lxl-4221-move-illustrativecontent-to-instance.groovy b/whelktool/scripts/cleanups/2023/07/lxl-4221-move-illustrativecontent-to-instance.groovy deleted file mode 100644 index ebca8f7133..0000000000 --- a/whelktool/scripts/cleanups/2023/07/lxl-4221-move-illustrativecontent-to-instance.groovy +++ /dev/null @@ -1,15 +0,0 @@ -def where = """ - collection = 'bib' - and deleted = false - and data#>>'{@graph,1,instanceOf,@type}' = 'Text' - and data#>'{@graph,1,instanceOf, illustrativeContent}' is not null -""" - -selectBySqlWhere(where) { - def instance = it.graph[1] - def work = instance.instanceOf - - instance['illustrativeContent'] = (asList(instance['illustrativeContent']) + asList(work.remove('illustrativeContent'))).unique() - - it.scheduleSave() -} \ No newline at end of file