Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/bulk merge #1509

Merged
merged 8 commits into from
Nov 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
134 changes: 87 additions & 47 deletions rest/src/main/groovy/whelk/rest/api/BulkChangePreviewAPI.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import java.util.List;
import java.util.Map;

import static whelk.JsonLd.GRAPH_KEY;
import static whelk.JsonLd.ID_KEY;
import static whelk.JsonLd.RECORD_KEY;
import static whelk.util.Unicode.stripPrefix;
Expand Down Expand Up @@ -55,14 +56,18 @@ protected void doGet(HttpServletRequest request, HttpServletResponse response) t
int limit = nonNegativeInt(request, "_limit", DEFAULT_LIMIT);
int offset = nonNegativeInt(request, "_offset", 0);

var changeDoc = load(systemId);
var jobDoc = load(systemId);
var spec = jobDoc.getSpecification();

// TODO? let Specifications create their own previews?
var result = switch (changeDoc.getSpecification()) {
case Specification.Update update -> makePreview(update, offset, limit, id);
case Specification.Delete delete -> makePreview(delete, offset, limit, id);
case Specification.Create create -> Collections.emptyMap(); //TODO
};
// TODO: Fetch ready-made changes
var ids = getIds(spec);
var totalItems = ids.size();
List<RecordChange> recordChanges = getRecordChanges(spec, ids, offset, limit);

var result = makePreview(recordChanges, totalItems, offset, limit, id);
if (spec instanceof Specification.Update) {
result.put("changeSets", ((Specification.Update) spec).getTransform(whelk).getChangeSets());
}

// TODO support turtle etc?
HttpTools.sendResponse(response, result, (String) MimeTypes.getJSONLD());
Expand All @@ -71,24 +76,58 @@ protected void doGet(HttpServletRequest request, HttpServletResponse response) t
}
}

private Map<Object, Object> makePreview(Specification spec, int offset, int limit, String id) {
Map<Object, Object> result = new LinkedHashMap<>();
result.put(JsonLd.TYPE_KEY, BULK_CHANGE_PREVIEW_TYPE);
// TODO: Should be defined elsewhere (Whelktool)
private record RecordChange(Document before, Document after) {}

private List<RecordChange> getRecordChanges(Specification spec, List<String> ids, int offset, int limit) {
return switch (spec) {
case Specification.Update update ->
whelk.bulkLoad(slice(ids, offset, offset + limit))
.values()
.stream()
.map(before -> {
var after = before.clone();
update.modify(after, whelk);
return new RecordChange(before, after);
})
.toList();
case Specification.Delete ignored ->
whelk.bulkLoad(slice(ids, offset, offset + limit))
.values()
.stream()
.map(before -> new RecordChange(before, null))
.toList();
case Specification.Create ignored -> Collections.emptyList();
case Specification.Merge ignored -> Collections.emptyList();
};
}

var transform = spec.getTransform(whelk);
private List<String> getIds(Specification spec) {
return switch (spec) {
case Specification.Update update -> queryIdsByForm(update.getTransform(whelk));
case Specification.Delete delete -> queryIdsByForm(delete.getMatchForm(whelk));
case Specification.Create ignored -> Collections.emptyList();
case Specification.Merge ignored -> Collections.emptyList();
};
}

// TODO use COUNT + LIMIT & OFFSET and don't fetch all ids every time
private List<String> queryIdsByForm(Transform transform) {
var sparqlPattern = transform.getSparqlPattern(whelk.getJsonld().context);
var ids = whelk.getSparqlQueryClient().queryIdsByPattern(sparqlPattern).stream().sorted().toList();

var itemIds = slice(ids, offset, offset + limit);
var items = whelk.bulkLoad(itemIds)
.values()
return whelk.getSparqlQueryClient()
.queryIdsByPattern(sparqlPattern)
.stream()
.map(doc -> makePreviewChangeSet(doc, transform))
.sorted()
.toList();
}

private Map<Object, Object> makePreview(List<RecordChange> recordChanges, int totalItems, int offset, int limit, String id) {
Map<Object, Object> result = new LinkedHashMap<>();
result.put(JsonLd.TYPE_KEY, BULK_CHANGE_PREVIEW_TYPE);

var items = recordChanges.stream()
.map(this::makePreviewChangeSet)
.toList();

int totalItems = ids.size();
Offsets offsets = new Offsets(totalItems, limit, offset);
result.putAll(makeLink(id, offset, limit));
if (offsets.hasFirst()) {
Expand All @@ -106,9 +145,6 @@ private Map<Object, Object> makePreview(Specification spec, int offset, int limi
result.put("itemOffset", offset);
result.put("itemsPerPage", limit);
result.put("totalItems", totalItems);
if (spec instanceof Specification.Update) {
result.put("changeSets", transform.getChangeSets());
}
result.put("items", items);

return result;
Expand All @@ -130,39 +166,43 @@ private static Map<String, String> makeLink(String id, int offset, int limit) {

// FIXME mangle the data in a more ergonomic way
@SuppressWarnings("unchecked")
private Map<?,?> makePreviewChangeSet(Document doc, Transform transform) {
var thing = new LinkedHashMap<String, Object>(doc.getThing());
var record = new LinkedHashMap<String, Object>(doc.getRecord());
// Remove @id from record to prevent from being shown as a link in the diff view
record.remove(ID_KEY);
thing.put(RECORD_KEY, record);
if (transform instanceof Transform.MatchForm) {
var result = getChangeSetsMap(List.of(doc));
((Map<String,Object>) DocumentUtil.getAtPath(result, List.of("changeSets", 0))).put("version", doc.getThing());
return result;
private Map<?,?> makePreviewChangeSet(RecordChange recordChange) {
Document before = recordChange.before();
Document after = recordChange.after();
String id = null;
if (before != null) {
// Remove @id from record to prevent from being shown as a link in the diff view
id = (String) before.getRecord().remove(ID_KEY);
before.getThing().put(RECORD_KEY, before.getRecord());
} else {
// If there is no before version, create one with an empty main entity
before = new Document(Map.of(GRAPH_KEY, List.of(after.getRecord(), Collections.emptyMap())));
}
if (after != null) {
id = (String) after.getRecord().remove(ID_KEY);
after.getThing().put(RECORD_KEY, before.getRecord());
} else {
// If there is no after version, create one with an empty main entity
after = new Document(Map.of(GRAPH_KEY, List.of(before.getRecord(), Collections.emptyMap())));
}
var modified = new ModifiedThing(thing, transform, whelk.getJsonld().repeatableTerms);
var beforeDoc = doc.clone();
var afterDoc = doc.clone();
((List<Map<?,?>>) beforeDoc.data.get(JsonLd.GRAPH_KEY)).set(1, modified.getBefore());
((List<Map<?,?>>) afterDoc.data.get(JsonLd.GRAPH_KEY)).set(1, modified.getAfter());
var result = getChangeSetsMap(List.of(beforeDoc, afterDoc));
((Map<String,Object>) DocumentUtil.getAtPath(result, List.of("changeSets", 0))).put("version", modified.getBefore());
((Map<String,Object>) DocumentUtil.getAtPath(result, List.of("changeSets", 1))).put("version", modified.getAfter());
var result = getChangeSetsMap(before, after, id);
((Map<String,Object>) DocumentUtil.getAtPath(result, List.of("changeSets", 0))).put("version",
before.getThing());
((Map<String,Object>) DocumentUtil.getAtPath(result, List.of("changeSets", 1))).put("version",
after.getThing());

return result;
}


@SuppressWarnings("unchecked")
private Map<?, ?> getChangeSetsMap(List<Document> docs) {
var beforeDoc = docs.getFirst();
var versions = docs.stream()
.map(version -> new DocumentVersion(version, "", ""))
.toList();
private Map<?, ?> getChangeSetsMap(Document before, Document after, String id) {
var versions = List.of(
new DocumentVersion(before, "", ""),
new DocumentVersion(after, "", "")
);
History history = new History(versions, whelk.getJsonld());
var result = history.m_changeSetsMap;
result.put(ID_KEY, beforeDoc.getCompleteId());
result.put(ID_KEY, id);
return result;
}

Expand Down
25 changes: 4 additions & 21 deletions whelktool/src/main/groovy/whelk/datatool/WhelkTool.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -912,29 +912,12 @@ class DocumentItem {
whelk.storage.loadAllVersions(doc.shortId)
}

Map asCard(boolean withSearchKey = false) {
return whelk.jsonld.toCard(doc.data, false, withSearchKey)
}

boolean modify(Map matchForm, Map targetForm) {
Map thing = (Map) this.graph[1]
thing[RECORD_KEY] = (Map) this.graph[0]

var m = new ModifiedThing(
thing,
new Transform(matchForm, targetForm, whelk),
whelk.jsonld.repeatableTerms)

this.graph[1] = m.after
this.graph[0] = m.after.remove(RECORD_KEY)

return m.isModified()
def getDependers() {
whelk.storage.getDependers(doc.shortId)
}

boolean matches(Map matchForm) {
Map thing = (Map) this.graph[1]
thing[RECORD_KEY] = (Map) this.graph[0]
return new Transform.MatchForm(matchForm, whelk).matches(thing)
Map asCard(boolean withSearchKey = false) {
return whelk.jsonld.toCard(doc.data, false, withSearchKey)
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -356,10 +356,6 @@ class Transform {
}

boolean mapMatches(Map matchForm, Map bNode) {
return _mapMatches(matchForm, bNode)
}

private boolean _mapMatches(Map matchForm, Map bNode) {
if (matchForm == null || bNode == null) {
return false
}
Expand Down Expand Up @@ -389,7 +385,7 @@ class Transform {
if (matchForm.size() > bNode.size()) {
return false
}
return comparator.isSubset(matchForm, bNode, this::_mapMatches)
return comparator.isSubset(matchForm, bNode, this::mapMatches)
}

private boolean exactMatches(Map matchForm, Map bNode) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import java.util.List;
import java.util.Map;

import static whelk.JsonLd.ID_KEY;
import static whelk.util.JsonLdKey.fromKey;

// All terms are defined in https://github.com/libris/definitions/blob/develop/source/vocab/platform.ttl
Expand All @@ -18,7 +19,8 @@ public class BulkJobDocument extends Document {
public enum SpecType implements JsonLdKey {
Update("bulk:Update"),
Delete("bulk:Delete"),
Create("bulk:Create");
Create("bulk:Create"),
Merge("bulk:Merge");

private final String key;

Expand Down Expand Up @@ -59,6 +61,8 @@ public String key() {
public static final String TARGET_FORM_KEY = "bulk:targetForm";
public static final String COMMENT_KEY = "comment";
public static final String LABEL_KEY = "label";
public static final String KEEP_KEY = "bulk:keep";
public static final String DEPRECATE_KEY = "bulk:deprecate";

private static final List<Object> STATUS_PATH = List.of(JsonLd.GRAPH_KEY, 1, STATUS_KEY);
private static final List<Object> UPDATE_TIMESTAMP_PATH = List.of(JsonLd.GRAPH_KEY, 1, SHOULD_UPDATE_TIMESTAMP_KEY);
Expand Down Expand Up @@ -120,6 +124,10 @@ public Specification getSpecification() {
case SpecType.Create -> new Specification.Create(
get(spec, TARGET_FORM_KEY, Collections.emptyMap())
);
case SpecType.Merge -> new Specification.Merge(
get(spec, List.of(DEPRECATE_KEY, "*", ID_KEY), Collections.emptyList()),
get(spec, List.of(KEEP_KEY, ID_KEY), "")
);
case null -> throw new ModelValidationException(String.format("Bad %s %s: %s",
CHANGE_SPEC_KEY, JsonLd.TYPE_KEY, specType));
};
Expand Down
Loading
Loading