diff --git a/rest/src/main/groovy/whelk/rest/api/SearchUtils2.java b/rest/src/main/groovy/whelk/rest/api/SearchUtils2.java index 47f2f549f2..c893941cc3 100644 --- a/rest/src/main/groovy/whelk/rest/api/SearchUtils2.java +++ b/rest/src/main/groovy/whelk/rest/api/SearchUtils2.java @@ -64,7 +64,7 @@ Map doSearch(Map queryParameters) throws Inval Map esQueryDsl = getEsQueryDsl(qTree, queryParams, appParams.statsRepr); - QueryResult queryRes = new QueryResult(queryUtil.query(esQueryDsl)); + QueryResult queryRes = new QueryResult(queryUtil.query(esQueryDsl), queryParams.debug); Map partialCollectionView = getPartialCollectionView(queryRes, qTree, queryParams, appParams); @@ -73,7 +73,7 @@ Map doSearch(Map queryParameters) throws Inval debugView.put(QueryParams.Debug.ES_QUERY, esQueryDsl); } if (queryParams.debug.contains(QueryParams.Debug.ES_SCORE)) { - debugView.put(QueryParams.Debug.ES_SCORE, queryRes.scores); + debugView.put(QueryParams.Debug.ES_SCORE, queryRes.collectScores()); } if (!debugView.isEmpty()) { partialCollectionView.put(QueryParams.ApiParams.DEBUG, debugView); diff --git a/whelk-core/src/main/groovy/whelk/search2/QueryResult.java b/whelk-core/src/main/groovy/whelk/search2/QueryResult.java index 89a5c1b167..9a685c2e8c 100644 --- a/whelk-core/src/main/groovy/whelk/search2/QueryResult.java +++ b/whelk-core/src/main/groovy/whelk/search2/QueryResult.java @@ -2,68 +2,78 @@ import whelk.Document; import whelk.JsonLd; +import whelk.util.DocumentUtil; import java.net.URLEncoder; import java.nio.charset.StandardCharsets; import java.util.Collections; +import java.util.HashMap; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Optional; import java.util.function.Function; +import java.util.function.Predicate; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import java.util.stream.Collectors; import static whelk.search2.QueryUtil.castToStringObjectMap; import static whelk.util.DocumentUtil.getAtPath; +import static whelk.util.DocumentUtil.traverse; public class QueryResult { public final int numHits; - private final List esItems; public final List aggs; public final List pAggs; public final List spell; - public final List> scores; - public QueryResult(Map esResponse) { + private final List esItems; + private final List debug; + + public QueryResult(Map esResponse, List debug) { var normResponse = normalizeResponse(esResponse); + this.debug = debug; this.numHits = getNumHits(normResponse); this.esItems = collectEsItems(normResponse); this.aggs = Aggs.collectAggResult(normResponse); this.pAggs = Aggs.collectPAggResult(normResponse); this.spell = Spell.collectSuggestions(normResponse); - this.scores = collectScores(normResponse); + } + + public QueryResult(Map esResponse) { + this(esResponse, List.of()); } public List> collectItems(Function, Map> applyLens) { return esItems.stream().map(item -> item.toLd(applyLens)).toList(); } + public List> collectScores() { + return esItems.stream().map(EsItem::getScoreData).filter(Predicate.not(Map::isEmpty)).toList(); + } + private static int getNumHits(Map esResponse) { return (int) getAtPath(esResponse, List.of("hits", "total", "value"), 1); } - private static List collectEsItems(Map esResponse) { + private List collectEsItems(Map esResponse) { return ((List) getAtPath(esResponse, List.of("hits", "hits"), Collections.emptyList())) .stream() .map(Map.class::cast) .map(hit -> { var item = castToStringObjectMap(hit.get("_source")); item.put("_id", hit.get("_id")); + if (debug.contains(QueryParams.Debug.ES_SCORE)) { + item.put("_score", hit.get("_score")); + item.put("_explanation", hit.get("_explanation")); + } return item; }) .map(EsItem::new) .toList(); } - private static List> collectScores(Map esResponse) { - return ((List) getAtPath(esResponse, List.of("hits", "hits"), Collections.emptyList())) - .stream() - .filter(m -> ((Map) m).get("_score") != null) - .map(QueryUtil::castToStringObjectMap) - .filter(m -> m.keySet().retainAll(List.of("_id", "_score", "_explanation"))) - .toList(); - } - private static Map normalizeResponse(Map esResponse) { var norm = new LinkedHashMap(); esResponse.forEach((k, v) -> @@ -76,29 +86,42 @@ private static Map normalizeResponse(Map esResponse) { return norm; } - static class EsItem { - private final Map map; - - EsItem(Map map) { - this.map = map; - } - + private record EsItem(Map map) { private Map toLd(Function, Map> applyLens) { LdItem ldItem = new LdItem(applyLens.apply(map)); + // ISNIs and ORCIDs are indexed with and without spaces, remove the one with spaces. ldItem.normalizeIsniAndOrcid(); // reverseLinks must be re-added because they might get filtered out in applyLens(). getReverseLinks().ifPresent(ldItem::addReverseLinks); + + getScoreExplanation().ifPresent(ldItem::addScore); + return ldItem.map; } + private Map getScoreData() { + Map scoreMap = new LinkedHashMap<>(); + if (map.get("_score") != null) { + scoreMap.put("_id", map.get("_id")); + scoreMap.put("_score", map.get("_score")); + scoreMap.put("_explanation", map.get("_explanation")); + } + return scoreMap; + } + private Optional> getReverseLinks() { return Optional.ofNullable(map.get("reverseLinks")) .map(QueryUtil::castToStringObjectMap); } + + private Optional> getScoreExplanation() { + return Optional.ofNullable(map.get("_explanation")) + .map(QueryUtil::castToStringObjectMap); + } } - static class LdItem { + private static class LdItem { private final Map map; LdItem(Map map) { @@ -127,6 +150,44 @@ private void addReverseLinks(Map reverseLinks) { map.put("reverseLinks", reverseLinks); } + private void addScore(Map scoreExplanation) { + var scorePerField = getScorePerField(scoreExplanation); + if (!scorePerField.isEmpty()) { + var totalScore = scorePerField.values().stream().reduce((double) 0, Double::sum); + map.put("_debug", Map.of("_score", Map.of("_total", totalScore, "_perField", scorePerField))); + } + } + + private static Map getScorePerField(Map scoreExplanation) { + Map scorePerField = new HashMap<>(); + + traverse(scoreExplanation, (value, path) -> { + if (value instanceof Map m) { + String description = (String) m.get("description"); + if (description.contains("[PerFieldSimilarity]")) { + Double score = (Double) m.get("value"); + if (score > 0) { + scorePerField.put(parseField(description), score); + } + } + } + return new DocumentUtil.Nop(); + }); + + return scorePerField.entrySet() + .stream() + .sorted(Map.Entry.comparingByValue(Collections.reverseOrder())) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue, (o, n) -> n, LinkedHashMap::new)); + } + + private static String parseField(String description) { + Matcher m = Pattern.compile("^weight\\(.+:((\".+\")|[^ ]+)").matcher(description); + if (m.find()) { + return m.group().replace("weight(", ""); + } + return description; + } + private static String makeFindOLink(String iri) { return Document.getBASE_URI() .resolve("find?o=" + URLEncoder.encode(iri, StandardCharsets.UTF_8))