From 86d697b2ea6d210a25ba9aff8443572ed1311cd2 Mon Sep 17 00:00:00 2001 From: kwahlin Date: Fri, 22 Nov 2024 08:49:01 +0100 Subject: [PATCH] Support replacing Subdivision with GenreForm --- .../datatool/bulkchange/BulkJobDocument.java | 3 +- .../datatool/bulkchange/Specification.java | 8 +-- .../removeSubdivision.groovy | 49 ++++++++++++++----- 3 files changed, 39 insertions(+), 21 deletions(-) diff --git a/whelktool/src/main/java/whelk/datatool/bulkchange/BulkJobDocument.java b/whelktool/src/main/java/whelk/datatool/bulkchange/BulkJobDocument.java index 41b9bb250e..8949dd5175 100644 --- a/whelktool/src/main/java/whelk/datatool/bulkchange/BulkJobDocument.java +++ b/whelktool/src/main/java/whelk/datatool/bulkchange/BulkJobDocument.java @@ -66,11 +66,10 @@ public String key() { public static final String TARGET_FORM_KEY = "bulk:targetForm"; public static final String COMMENT_KEY = "comment"; public static final String LABEL_KEY = "label"; - public static final String ADD_KEY = "bulk:add"; public static final String KEEP_KEY = "bulk:keep"; public static final String DEPRECATE_KEY = "bulk:deprecate"; public static final String REMOVE_SUBDIVISION_KEY = "bulk:removeSubdivision"; - public static final String ADD_SUBJECT_KEY = "bulk:addSubject"; + public static final String ADD_TERM_KEY = "bulk:addTerm"; public static final String SCRIPT_KEY = "bulk:script"; public static final String EXECUTION_KEY = "bulk:execution"; public static final String EXECUTION_TYPE = "bulk:Execution"; diff --git a/whelktool/src/main/java/whelk/datatool/bulkchange/Specification.java b/whelktool/src/main/java/whelk/datatool/bulkchange/Specification.java index ff6abc78b9..55cd67e404 100644 --- a/whelktool/src/main/java/whelk/datatool/bulkchange/Specification.java +++ b/whelktool/src/main/java/whelk/datatool/bulkchange/Specification.java @@ -2,10 +2,8 @@ import com.google.common.collect.Maps; import org.apache.commons.io.IOUtils; -import whelk.Document; import whelk.Whelk; import whelk.datatool.Script; -import whelk.datatool.form.ModifiedThing; import whelk.datatool.form.Transform; import java.io.IOException; @@ -15,9 +13,7 @@ import java.util.List; import java.util.Map; -import static whelk.JsonLd.GRAPH_KEY; -import static whelk.JsonLd.RECORD_KEY; -import static whelk.datatool.bulkchange.BulkJobDocument.ADD_SUBJECT_KEY; +import static whelk.datatool.bulkchange.BulkJobDocument.ADD_TERM_KEY; import static whelk.datatool.bulkchange.BulkJobDocument.KEEP_KEY; import static whelk.datatool.bulkchange.BulkJobDocument.MATCH_FORM_KEY; import static whelk.datatool.bulkchange.BulkJobDocument.DEPRECATE_KEY; @@ -80,7 +76,7 @@ public Script getScript(String bulkJobId) { record Other(String name, Map parameters) implements Specification { private static final Map> ALLOWED_SCRIPTS_PARAMS = Map.of( - "removeSubdivision", List.of(REMOVE_SUBDIVISION_KEY, ADD_SUBJECT_KEY) + "removeSubdivision", List.of(REMOVE_SUBDIVISION_KEY, ADD_TERM_KEY) ); @Override diff --git a/whelktool/src/main/resources/bulk-change-scripts/removeSubdivision.groovy b/whelktool/src/main/resources/bulk-change-scripts/removeSubdivision.groovy index 276634d58f..10e37c6dfb 100644 --- a/whelktool/src/main/resources/bulk-change-scripts/removeSubdivision.groovy +++ b/whelktool/src/main/resources/bulk-change-scripts/removeSubdivision.groovy @@ -8,34 +8,37 @@ */ -import whelk.JsonLd import whelk.Whelk +import whelk.datatool.DocumentItem import whelk.util.DocumentUtil import static whelk.JsonLd.ID_KEY +import static whelk.JsonLd.TYPE_KEY import static whelk.JsonLd.asList import static whelk.converter.JsonLDTurtleConverter.toTurtleNoPrelude -import static whelk.datatool.bulkchange.BulkJobDocument.ADD_SUBJECT_KEY +import static whelk.datatool.bulkchange.BulkJobDocument.ADD_TERM_KEY import static whelk.datatool.bulkchange.BulkJobDocument.REMOVE_SUBDIVISION_KEY +Whelk whelk = getWhelk() + Map inScheme List removeSubdivision = asList(parameters.get(REMOVE_SUBDIVISION_KEY)).collect { Map copy = new HashMap((Map) it) - inScheme = copy.remove('inScheme') + inScheme = (Map) copy.remove('inScheme') return copy } -Map addSubject = parameters.get(ADD_SUBJECT_KEY) +Map addTerm = parameters.get(ADD_TERM_KEY) +String addTermType = addTerm ? getType(addTerm) : null -def process = { doc -> +def process = { DocumentItem doc -> Map thing = doc.graph[1] as Map - if (thing[JsonLd.TYPE_KEY] == 'ComplexSubject') { + if (thing[TYPE_KEY] == 'ComplexSubject') { return } - Set modifiedListPaths = [] as Set def modified = DocumentUtil.traverse(thing) { value, path -> - if (value instanceof Map && value[JsonLd.TYPE_KEY] == 'ComplexSubject') { + if (value instanceof Map && value[TYPE_KEY] == 'ComplexSubject') { var t = asList(value.get('termComponentList')) if ((!inScheme || inScheme == value['inScheme']) && t.containsAll(removeSubdivision)) { var parentPath = path.size() > 1 ? path.dropRight(1) : null @@ -43,8 +46,17 @@ def process = { doc -> var parent = DocumentUtil.getAtPath(thing, parentPath) if (parent instanceof List) { modifiedListPaths.add(parentPath) - if (addSubject) { - parent.add(addSubject) + if (whelk.jsonld.isSubClassOf(addTermType, 'Subject')) { + parent.add(addTerm) + } else if (whelk.jsonld.isSubClassOf(addTermType, 'GenreForm')) { + var grandParent = DocumentUtil.getAtPath(thing, parentPath.dropRight(1)) + if (grandParent instanceof Map) { + def genreForm = asList(grandParent['genreForm']) + if (!genreForm.contains(addTerm)) { + genreForm.add(addTerm) + } + grandParent['genreForm'] = genreForm + } } } } @@ -81,7 +93,6 @@ linked.each { l -> } } if (!blank.isEmpty()) { - Whelk whelk = getWhelk() /* Querying records containing the given combination of blank subdivisions is very slow so we have to run a separate query for each subdivision. However the maximum number of results from a Sparql query is 100k so if we just take the @@ -109,7 +120,7 @@ static DocumentUtil.Operation mapSubject(Map complexSubject, termComponentList, } if (t2.size() == 1) { def remaining = t2.first() - if (complexSubject['inScheme'] && !remaining[ID_KEY]) { + if (complexSubject['inScheme'] && !remaining['inScheme'] && !remaining[ID_KEY]) { remaining['inScheme'] = complexSubject['inScheme'] } return new DocumentUtil.Replace(remaining) @@ -118,4 +129,16 @@ static DocumentUtil.Operation mapSubject(Map complexSubject, termComponentList, Map result = new HashMap(complexSubject) result.termComponentList = t2 return new DocumentUtil.Replace(result) -} \ No newline at end of file +} + +String getType(Map term) { + if (term[ID_KEY]) { + String type + selectByIds([term[ID_KEY]]) { + type = it.doc.getThingType() + } + return type + } + return term[TYPE_KEY] +} +