diff --git a/housekeeping/src/main/groovy/whelk/housekeeping/BulkChangePreviewAPI.java b/housekeeping/src/main/groovy/whelk/housekeeping/BulkChangePreviewAPI.java index 7efe1fd7cd..e413692468 100644 --- a/housekeeping/src/main/groovy/whelk/housekeeping/BulkChangePreviewAPI.java +++ b/housekeeping/src/main/groovy/whelk/housekeeping/BulkChangePreviewAPI.java @@ -26,7 +26,6 @@ import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import java.io.IOException; -import java.util.Collections; import java.util.HashMap; import java.util.LinkedHashMap; import java.util.List; @@ -179,6 +178,7 @@ private Map makePreviewChangeSet(RecordedChange recordChange) { var id = (String) recordCopy.get(ID_KEY); var result = getChangeSetsMap(beforeDoc, afterDoc, id); + ((Map) DocumentUtil.getAtPath(result, List.of("changeSets", 0))).put("version", beforeDoc.getThing()); ((Map) DocumentUtil.getAtPath(result, List.of("changeSets", 1))).put("version", diff --git a/whelktool/src/main/groovy/whelk/datatool/WhelkTool.groovy b/whelktool/src/main/groovy/whelk/datatool/WhelkTool.groovy index d73ae891ed..566bbb812f 100644 --- a/whelktool/src/main/groovy/whelk/datatool/WhelkTool.groovy +++ b/whelktool/src/main/groovy/whelk/datatool/WhelkTool.groovy @@ -25,6 +25,7 @@ import javax.script.Bindings import javax.script.CompiledScript import javax.script.ScriptEngineManager import javax.script.SimpleBindings +import java.nio.charset.StandardCharsets import java.time.ZonedDateTime import java.util.concurrent.ConcurrentHashMap import java.util.concurrent.ConcurrentLinkedQueue @@ -762,6 +763,7 @@ class WhelkTool { cli.idchg(longOpt: 'allow-id-removal', '[UNSAFE] Allow script to remove document ids, e.g. sameAs.') cli.sv(longOpt: 'skip-validation', '[UNSAFE] Skip JSON-LD validation before saving to database.') cli.n(longOpt: 'stats-num-ids', args: 1, 'Number of ids to print per entry in STATISTICS.txt.') + cli.p(longOpt: 'parameters', args: 1, argName: 'PARAMETER-FILE', 'Path to JSON file with parameters to script') def options = cli.parse(args) if (options.h) { @@ -776,6 +778,11 @@ class WhelkTool { Script script = null try { script = new FileScript(scriptPath) + + String paramPath = options.p + if (paramPath) { + script.setParameters(mapper.readValue(new File(paramPath).getText("UTF-8"), Map)) + } } catch (IOException e) { System.err.println("Could not load script [$scriptPath] : $e") diff --git a/whelktool/src/main/java/whelk/datatool/bulkchange/BulkJob.java b/whelktool/src/main/java/whelk/datatool/bulkchange/BulkJob.java index e6ce4cae66..68da78ece3 100644 --- a/whelktool/src/main/java/whelk/datatool/bulkchange/BulkJob.java +++ b/whelktool/src/main/java/whelk/datatool/bulkchange/BulkJob.java @@ -85,6 +85,7 @@ protected WhelkTool buildWhelkTool(BulkJobDocument jobDoc) throws IOException { var bulkJobThingId = stripSuffix(id, HASH_IT) + HASH_IT; Script script = jobDoc.getSpecification().getScript(bulkJobThingId); + WhelkTool tool = new WhelkTool(whelk, script, reportDir(systemId), WhelkTool.getDEFAULT_STATS_NUM_IDS()); // TODO for now setting changedBy only works for loud changes (!minorChange in PostgreSQLComponent) tool.setDefaultChangedBy(jobDoc.getChangeAgentId()); diff --git a/whelktool/src/main/java/whelk/datatool/bulkchange/BulkJobDocument.java b/whelktool/src/main/java/whelk/datatool/bulkchange/BulkJobDocument.java index 889d553b69..9c37cc774e 100644 --- a/whelktool/src/main/java/whelk/datatool/bulkchange/BulkJobDocument.java +++ b/whelktool/src/main/java/whelk/datatool/bulkchange/BulkJobDocument.java @@ -20,7 +20,8 @@ public enum SpecType implements JsonLdKey { Update("bulk:Update"), Delete("bulk:Delete"), Create("bulk:Create"), - Merge("bulk:Merge"); + Merge("bulk:Merge"), + Other("bulk:Other"); private final String key; @@ -63,6 +64,7 @@ public String key() { public static final String LABEL_KEY = "label"; public static final String KEEP_KEY = "bulk:keep"; public static final String DEPRECATE_KEY = "bulk:deprecate"; + public static final String SCRIPT_KEY = "bulk:script"; private static final List STATUS_PATH = List.of(JsonLd.GRAPH_KEY, 1, STATUS_KEY); private static final List UPDATE_TIMESTAMP_PATH = List.of(JsonLd.GRAPH_KEY, 1, SHOULD_UPDATE_TIMESTAMP_KEY); @@ -128,6 +130,10 @@ public Specification getSpecification() { get(spec, List.of(DEPRECATE_KEY, "*", ID_KEY), Collections.emptyList()), get(spec, List.of(KEEP_KEY, ID_KEY), "") ); + case SpecType.Other -> new Specification.Other( + get(spec, SCRIPT_KEY, null), + spec + ); case null -> throw new ModelValidationException(String.format("Bad %s %s: %s", CHANGE_SPEC_KEY, JsonLd.TYPE_KEY, specType)); }; diff --git a/whelktool/src/main/java/whelk/datatool/bulkchange/Specification.java b/whelktool/src/main/java/whelk/datatool/bulkchange/Specification.java index a3bfc77f58..d338686037 100644 --- a/whelktool/src/main/java/whelk/datatool/bulkchange/Specification.java +++ b/whelktool/src/main/java/whelk/datatool/bulkchange/Specification.java @@ -1,5 +1,6 @@ package whelk.datatool.bulkchange; +import com.google.common.collect.Maps; import org.apache.commons.io.IOUtils; import whelk.Document; import whelk.Whelk; @@ -11,6 +12,7 @@ import java.io.InputStream; import java.io.InputStreamReader; import java.util.Collection; +import java.util.HashMap; import java.util.List; import java.util.Map; @@ -21,7 +23,7 @@ import static whelk.datatool.bulkchange.BulkJobDocument.DEPRECATE_KEY; import static whelk.datatool.bulkchange.BulkJobDocument.TARGET_FORM_KEY; -public sealed interface Specification permits Specification.Create, Specification.Delete, Specification.Merge, Specification.Update { +public sealed interface Specification permits Specification.Create, Specification.Delete, Specification.Merge, Specification.Update, Specification.Other { Script getScript(String bulkJobId); @@ -123,6 +125,26 @@ public Script getScript(String bulkJobId) { } } + record Other(String name, Map parameters) implements Specification { + private static final Map> ALLOWED_SCRIPTS_PARAMS = Map.of( + "removeTopicSubdivision", List.of(DEPRECATE_KEY, KEEP_KEY) + ); + + @Override + public Script getScript(String bulkJobId) { + if (!ALLOWED_SCRIPTS_PARAMS.containsKey(name)) { + throw new IllegalArgumentException("Script " + name + " not supported"); + } + + Script s = new Script(loadClasspathScriptSource(name +".groovy"), bulkJobId); + + Map params = new HashMap<>(); + params.putAll(Maps.filterKeys(parameters, k -> ALLOWED_SCRIPTS_PARAMS.get(name).contains(k))); + s.setParameters(params); + return s; + } + } + private static String loadClasspathScriptSource(String scriptName) { String path = "bulk-change-scripts/" + scriptName; try (InputStream scriptStream = Specification.class.getClassLoader().getResourceAsStream(path)) { diff --git a/whelktool/src/main/resources/bulk-change-scripts/removeTopicSubdivision.groovy b/whelktool/src/main/resources/bulk-change-scripts/removeTopicSubdivision.groovy new file mode 100644 index 0000000000..dc625f1d42 --- /dev/null +++ b/whelktool/src/main/resources/bulk-change-scripts/removeTopicSubdivision.groovy @@ -0,0 +1,66 @@ +/** + * Remove all uses of a certain TopicSubdivision within ComplexSubject + * The TopicSubdivision itself is not removed, only the usages. + * + * Parameters: + * bulk:deprecate - The subdivision(s) to be removed + * bulk:keep - If specified, add this regular Topic to :subject instead + */ + +import whelk.JsonLd +import whelk.util.DocumentUtil + +import static whelk.JsonLd.ID_KEY +import static whelk.datatool.bulkchange.BulkJobDocument.DEPRECATE_KEY +import static whelk.datatool.bulkchange.BulkJobDocument.KEEP_KEY + +List deprecateLinks = asList(parameters.get(DEPRECATE_KEY)) +Map keepLink = parameters.get(KEEP_KEY) + +deprecateLinks.each { deprecate -> + selectByIds([deprecate[ID_KEY]]) { obsoleteSubdivision -> + selectByIds(obsoleteSubdivision.getDependers()) { depender -> + Map thing = depender.graph[1] as Map + + if (thing[JsonLd.TYPE_KEY] == 'ComplexSubject') { + return + } + + def modified = DocumentUtil.traverse(thing) { value, path -> + if (value instanceof Map && value[JsonLd.TYPE_KEY] == 'ComplexSubject') { + var t = asList(value.get('termComponentList')) + if (deprecate in t) { + // TODO? add way to do this with an op? SplitReplace? [Replace, Insert]? + if (keepLink && path.size() > 1) { + var parent = DocumentUtil.getAtPath(thing, path.dropRight(1)) + if (parent instanceof List && !parent.contains(keepLink)) { + parent.add(keepLink) + } + } + + return mapSubject(value, t, deprecate) + } + } + return DocumentUtil.NOP + } + + if (modified) { + depender.scheduleSave(loud: isLoudAllowed) + } + } + } +} + +static DocumentUtil.Operation mapSubject(Map subject, termComponentList, deprecateLink) { + var t2 = termComponentList.findAll { it != deprecateLink } + if (t2.size() == 0) { + return new DocumentUtil.Remove() + } + if (t2.size() == 1) { + return new DocumentUtil.Replace(t2.first()) + } + + Map result = new HashMap(subject) + result.termComponentList = t2 + return new DocumentUtil.Replace(result) +} \ No newline at end of file