From 4755c9ed90234f05e4d73db4cf0b06e7cf393648 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lars=20Rosenstr=C3=B6m?= Date: Tue, 17 Dec 2024 11:27:37 +0100 Subject: [PATCH] Add functionality for sorting facets/observations into a tree --- .../groovy/whelk/rest/api/SearchUtils2.java | 4 +- .../main/groovy/whelk/search2/FacetTree.java | 68 ++++++++ .../src/main/groovy/whelk/search2/Stats.java | 11 +- .../groovy/whelk/search2/FacetTreeSpec.groovy | 164 ++++++++++++++++++ 4 files changed, 244 insertions(+), 3 deletions(-) create mode 100644 whelk-core/src/main/groovy/whelk/search2/FacetTree.java create mode 100644 whelk-core/src/test/groovy/whelk/search2/FacetTreeSpec.groovy diff --git a/rest/src/main/groovy/whelk/rest/api/SearchUtils2.java b/rest/src/main/groovy/whelk/rest/api/SearchUtils2.java index 80d73f8ea9..0658f0f5e3 100644 --- a/rest/src/main/groovy/whelk/rest/api/SearchUtils2.java +++ b/rest/src/main/groovy/whelk/rest/api/SearchUtils2.java @@ -27,10 +27,12 @@ public class SearchUtils2 { private final QueryUtil queryUtil; private final Disambiguate disambiguate; + private final JsonLd jsonLd; SearchUtils2(Whelk whelk) { this.queryUtil = new QueryUtil(whelk); this.disambiguate = new Disambiguate(whelk); + this.jsonLd = whelk.getJsonld(); } Map doSearch(Map queryParameters) throws InvalidQueryException, IOException { @@ -114,7 +116,7 @@ public Map getPartialCollectionView(QueryResult queryResult, view.put("search", Map.of("mapping", List.of(qt.toSearchMapping(queryParams.getNonQueryParams(0))))); view.putAll(Pagination.makeLinks(queryResult.numHits, queryUtil.maxItems(), freeText, fullQuery, queryParams)); view.put("items", queryResult.collectItems(queryUtil.getApplyLensFunc(queryParams))); - view.put("stats", new Stats(disambiguate, queryUtil, qt, queryResult, queryParams, appParams).build()); + view.put("stats", new Stats(disambiguate, queryUtil, qt, queryResult, queryParams, appParams, jsonLd).build()); if (!queryResult.spell.isEmpty()) { view.put("_spell", buildSpellSuggestions(queryResult, qt, queryParams.getNonQueryParams(0))); } diff --git a/whelk-core/src/main/groovy/whelk/search2/FacetTree.java b/whelk-core/src/main/groovy/whelk/search2/FacetTree.java new file mode 100644 index 0000000000..6d11eed385 --- /dev/null +++ b/whelk-core/src/main/groovy/whelk/search2/FacetTree.java @@ -0,0 +1,68 @@ +package whelk.search2; + +import whelk.JsonLd; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.Queue; +import java.util.concurrent.ConcurrentLinkedQueue; +import java.util.stream.Collectors; + +import static whelk.util.DocumentUtil.getAtPath; + +public class FacetTree { + + private final JsonLd jsonLd; + + public FacetTree(JsonLd jsonLd) { + this.jsonLd = jsonLd; + } + + public List> sortObservationsAsTree(List> observations) { + List> tree = new ArrayList<>(); + Queue> queue = new ConcurrentLinkedQueue<>(); + + observations.forEach(observation -> { + var parent = findParent(observation, observations); + + if (parent == null) { + tree.add(observation); + queue.add(observation); + } + }); + + while (!queue.isEmpty()) { + var observation = queue.remove(); + var children = findChildren(observation, observations); + if (!children.isEmpty()) { + queue.addAll(children); + observation.put("children", children); + } + } + return List.copyOf(tree); + } + + private Map findParent(Map observation, List> observations) { + return observations.stream() + .filter(o -> isSubCLass(observation, o)) + .findFirst().orElse(null); + } + + private List> findChildren(Map observation, List> observations) { + return observations.stream() + .filter(o -> isSubCLass(o, observation)) + .collect(Collectors.toList()); + } + + private boolean isSubCLass(Map obsA, Map obsB) { + String idA = get(obsA, List.of("object", "@id"), ""); + String idB = get(obsB, List.of("object", "@id"), ""); + return !idA.equals(idB) && jsonLd.isSubClassOf(idA, idB); + } + + @SuppressWarnings("unchecked") + private static T get(Object m, List path, T defaultTo) { + return (T) getAtPath(m, path, defaultTo); + } +} diff --git a/whelk-core/src/main/groovy/whelk/search2/Stats.java b/whelk-core/src/main/groovy/whelk/search2/Stats.java index fb0af4008b..778065b817 100644 --- a/whelk-core/src/main/groovy/whelk/search2/Stats.java +++ b/whelk-core/src/main/groovy/whelk/search2/Stats.java @@ -34,19 +34,23 @@ public class Stats { private final AppParams appParams; private final QueryTree queryTree; private final QueryUtil queryUtil; + private final JsonLd jsonLd; public Stats(Disambiguate disambiguate, QueryUtil queryUtil, QueryTree queryTree, QueryResult queryResult, QueryParams queryParams, - AppParams appParams) { + AppParams appParams, + JsonLd jsonLd + ) { this.disambiguate = disambiguate; this.queryResult = queryResult; this.queryParams = queryParams; this.appParams = appParams; this.queryTree = queryTree; this.queryUtil = queryUtil; + this.jsonLd = jsonLd; } public Map build() { @@ -123,6 +127,9 @@ private Map buildSliceByDimension(Map(); var isRange = rangeProps.contains(property); var observations = getObservations(buckets, isRange ? queryTree.removeTopLevelPropValueWithRangeIfPropEquals(property) : queryTree, nonQueryParams); + if (property.name().equals(Disambiguate.RDF_TYPE)) { + observations = new FacetTree(jsonLd).sortObservationsAsTree(observations); + } if (!observations.isEmpty()) { if (isRange) { sliceNode.put("search", getRangeTemplate(property, makeParams(nonQueryParams))); @@ -139,7 +146,7 @@ private Map buildSliceByDimension(Map> getObservations(Map buckets, QueryTree qt, Map nonQueryParams) { - List> observations = new ArrayList<>(); + List> observations = new ArrayList<>(); buckets.forEach((pv, count) -> { Map observation = new LinkedHashMap<>(); diff --git a/whelk-core/src/test/groovy/whelk/search2/FacetTreeSpec.groovy b/whelk-core/src/test/groovy/whelk/search2/FacetTreeSpec.groovy new file mode 100644 index 0000000000..f69bc1a349 --- /dev/null +++ b/whelk-core/src/test/groovy/whelk/search2/FacetTreeSpec.groovy @@ -0,0 +1,164 @@ +package whelk.search2 + +import spock.lang.Specification +import whelk.JsonLd + +class FacetTreeSpec extends Specification { + + JsonLd jsonLd + + void setup() { + jsonLd = GroovyMock(JsonLd.class) + } + + def "Single observation should return list with one observation"() { + expect: + def tree = new FacetTree(jsonLd) + tree.sortObservationsAsTree(observations) == sorted + + where: + observations | sorted + [["object": ["@id": "parent"]]] | [["object": ["@id": "parent"]]] + } + + def "Sort one parent and one child"() { + given: + jsonLd.isSubClassOf("child", "parent") >> { + true + } + jsonLd.isSubClassOf("parent", "child") >> { + false + } + + expect: + def tree = new FacetTree(jsonLd) + tree.sortObservationsAsTree(observations) == sorted + + where: + observations | sorted + [["object": ["@id": "parent"]], + ["object": ["@id": "child"]]] | [["object": ["@id": "parent"], "children": [["object": ["@id": "child"]]]]] + } + + def "Sort one parent with two children"() { + given: + jsonLd.isSubClassOf("child1", "parent") >> { + true + } + jsonLd.isSubClassOf("child2", "parent") >> { + true + } + jsonLd.isSubClassOf("parent", "child1") >> { + false + } + jsonLd.isSubClassOf("parent", "child2") >> { + false + } + + expect: + def tree = new FacetTree(jsonLd) + tree.sortObservationsAsTree(observations) == sorted + + where: + observations | sorted + [["object": ["@id": "parent"]], + ["object": ["@id": "child1"]], + ["object": ["@id": "child2"]]] | [["object": ["@id": "parent"], + "children": [["object": ["@id": "child1"]], + ["object": ["@id": "child2"]]]]] + } + + def "Sort one parent with one child that has one child"() { + given: + jsonLd.isSubClassOf("child1", "parent") >> { + true + } + jsonLd.isSubClassOf("child2", "parent") >> { + false + } + jsonLd.isSubClassOf("child2", "child1") >> { + true + } + jsonLd.isSubClassOf("parent", "child1") >> { + false + } + jsonLd.isSubClassOf("parent", "child2") >> { + false + } + + expect: + def tree = new FacetTree(jsonLd) + tree.sortObservationsAsTree(observations) == sorted + + where: + observations | sorted + [["object": ["@id": "parent"]], + ["object": ["@id": "child1"]], + ["object": ["@id": "child2"]]] | [["object": ["@id": "parent"], + "children": [["object": ["@id": "child1"], + "children": [["object": ["@id": "child2"]]]]]]] + } + + def "One parent, two children"() { + given: + jsonLd.isSubClassOf("child1", "root") >> { + true + } + jsonLd.isSubClassOf("child2", "root") >> { + true + } + jsonLd.isSubClassOf("root", "child1") >> { + false + } + jsonLd.isSubClassOf("root", "child2") >> { + false + } + jsonLd.isSubClassOf("child1", "child2") >> { + false + } + jsonLd.isSubClassOf("child2", "child1") >> { + false + } + + expect: + def tree = new FacetTree(jsonLd) + tree.sortObservationsAsTree(observations) == sorted + + where: + observations | sorted + [["object": ["@id": "root"]], + ["object": ["@id": "child1"]], + ["object": ["@id": "child2"]]] | [["object": ["@id": "root"], "children" : [["object": ["@id": "child1"]], + ["object": ["@id": "child2"]]]]] + } + + def "Three root nodes"() { + given: + jsonLd.isSubClassOf(_, _) >> false + + expect: + def tree = new FacetTree(jsonLd) + tree.sortObservationsAsTree(observations) == sorted + + where: + observations | sorted + [["object": ["@id": "root1"]], + ["object": ["@id": "root2"]], + ["object": ["@id": "root3"]]] | [["object": ["@id": "root1"]], + ["object": ["@id": "root2"]], + ["object": ["@id": "root3"]]] + } + + def "Children should not be considered parents of themselves"() { + given: + jsonLd.isSubClassOf(_, _) >> true + + expect: + def tree = new FacetTree(jsonLd) + tree.sortObservationsAsTree(observations) == sorted + + where: + observations | sorted + [["object": ["@id": "A"]], ["object": ["@id": "A"]]] | [["object": ["@id": "A"]], ["object": ["@id": "A"]]] + } +}