Skip to content

Commit

Permalink
Include score by field for each item in result (#1540)
Browse files Browse the repository at this point in the history
* Include score by field for each item in result

* Include complete explain in each item too
  • Loading branch information
kwahlin authored Dec 18, 2024
1 parent b1a03ec commit 30390c2
Show file tree
Hide file tree
Showing 2 changed files with 70 additions and 32 deletions.
13 changes: 3 additions & 10 deletions rest/src/main/groovy/whelk/rest/api/SearchUtils2.java
Original file line number Diff line number Diff line change
Expand Up @@ -64,19 +64,12 @@ Map<String, Object> doSearch(Map<String, String[]> queryParameters) throws Inval

Map<String, Object> esQueryDsl = getEsQueryDsl(qTree, queryParams, appParams.statsRepr);

QueryResult queryRes = new QueryResult(queryUtil.query(esQueryDsl));
QueryResult queryRes = new QueryResult(queryUtil.query(esQueryDsl), queryParams.debug);

Map<String, Object> partialCollectionView = getPartialCollectionView(queryRes, qTree, queryParams, appParams);

Map<String, Object> debugView = new HashMap<>();
if (queryParams.debug.contains(QueryParams.Debug.ES_QUERY)) {
debugView.put(QueryParams.Debug.ES_QUERY, esQueryDsl);
}
if (queryParams.debug.contains(QueryParams.Debug.ES_SCORE)) {
debugView.put(QueryParams.Debug.ES_SCORE, queryRes.scores);
}
if (!debugView.isEmpty()) {
partialCollectionView.put(QueryParams.ApiParams.DEBUG, debugView);
if (!queryParams.debug.contains(QueryParams.Debug.ES_QUERY)) {
partialCollectionView.put(QueryParams.ApiParams.DEBUG, Map.of(QueryParams.Debug.ES_QUERY, esQueryDsl));
}

return partialCollectionView;
Expand Down
89 changes: 67 additions & 22 deletions whelk-core/src/main/groovy/whelk/search2/QueryResult.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,36 +2,46 @@

import whelk.Document;
import whelk.JsonLd;
import whelk.util.DocumentUtil;

import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.function.Function;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

import static whelk.search2.QueryUtil.castToStringObjectMap;
import static whelk.util.DocumentUtil.getAtPath;
import static whelk.util.DocumentUtil.traverse;

public class QueryResult {
public final int numHits;
private final List<EsItem> esItems;
public final List<Aggs.Aggregation> aggs;
public final List<Aggs.Bucket> pAggs;
public final List<Spell.Suggestion> spell;
public final List<Map<String, Object>> scores;

public QueryResult(Map<?, ?> esResponse) {
private final List<EsItem> esItems;
private final List<String> debug;

public QueryResult(Map<?, ?> esResponse, List<String> debug) {
var normResponse = normalizeResponse(esResponse);
this.debug = debug;
this.numHits = getNumHits(normResponse);
this.esItems = collectEsItems(normResponse);
this.aggs = Aggs.collectAggResult(normResponse);
this.pAggs = Aggs.collectPAggResult(normResponse);
this.spell = Spell.collectSuggestions(normResponse);
this.scores = collectScores(normResponse);
}

public QueryResult(Map<?, ?> esResponse) {
this(esResponse, List.of());
}

public List<Map<String, Object>> collectItems(Function<Map<String, Object>, Map<String, Object>> applyLens) {
Expand All @@ -42,28 +52,23 @@ private static int getNumHits(Map<String, Object> esResponse) {
return (int) getAtPath(esResponse, List.of("hits", "total", "value"), 1);
}

private static List<EsItem> collectEsItems(Map<String, Object> esResponse) {
private List<EsItem> collectEsItems(Map<String, Object> esResponse) {
return ((List<?>) getAtPath(esResponse, List.of("hits", "hits"), Collections.emptyList()))
.stream()
.map(Map.class::cast)
.map(hit -> {
var item = castToStringObjectMap(hit.get("_source"));
item.put("_id", hit.get("_id"));
if (debug.contains(QueryParams.Debug.ES_SCORE)) {
item.put("_score", hit.get("_score"));
item.put("_explanation", hit.get("_explanation"));
}
return item;
})
.map(EsItem::new)
.toList();
}

private static List<Map<String, Object>> collectScores(Map<String, Object> esResponse) {
return ((List<?>) getAtPath(esResponse, List.of("hits", "hits"), Collections.emptyList()))
.stream()
.filter(m -> ((Map<?, ?>) m).get("_score") != null)
.map(QueryUtil::castToStringObjectMap)
.filter(m -> m.keySet().retainAll(List.of("_id", "_score", "_explanation")))
.toList();
}

private static Map<String, Object> normalizeResponse(Map<?, ?> esResponse) {
var norm = new LinkedHashMap<String, Object>();
esResponse.forEach((k, v) ->
Expand All @@ -76,29 +81,32 @@ private static Map<String, Object> normalizeResponse(Map<?, ?> esResponse) {
return norm;
}

static class EsItem {
private final Map<String, Object> map;

EsItem(Map<String, Object> map) {
this.map = map;
}

private record EsItem(Map<String, Object> map) {
private Map<String, Object> toLd(Function<Map<String, Object>, Map<String, Object>> applyLens) {
LdItem ldItem = new LdItem(applyLens.apply(map));

// ISNIs and ORCIDs are indexed with and without spaces, remove the one with spaces.
ldItem.normalizeIsniAndOrcid();
// reverseLinks must be re-added because they might get filtered out in applyLens().
getReverseLinks().ifPresent(ldItem::addReverseLinks);

getScoreExplanation().ifPresent(ldItem::addScore);

return ldItem.map;
}

private Optional<Map<String, Object>> getReverseLinks() {
return Optional.ofNullable(map.get("reverseLinks"))
.map(QueryUtil::castToStringObjectMap);
}

private Optional<Map<String, Object>> getScoreExplanation() {
return Optional.ofNullable(map.get("_explanation"))
.map(QueryUtil::castToStringObjectMap);
}
}

static class LdItem {
private static class LdItem {
private final Map<String, Object> map;

LdItem(Map<String, Object> map) {
Expand Down Expand Up @@ -127,6 +135,43 @@ private void addReverseLinks(Map<String, Object> reverseLinks) {
map.put("reverseLinks", reverseLinks);
}

private void addScore(Map<String, Object> scoreExplanation) {
var scorePerField = getScorePerField(scoreExplanation);
var totalScore = scorePerField.values().stream().reduce((double) 0, Double::sum);
var scoreData = Map.of("_total", totalScore, "_perField", scorePerField, "_explain", scoreExplanation);
map.put("_debug", Map.of("_score", scoreData));
}

private static Map<String, Double> getScorePerField(Map<String, Object> scoreExplanation) {
Map<String, Double> scorePerField = new HashMap<>();

traverse(scoreExplanation, (value, path) -> {
if (value instanceof Map<?, ?> m) {
String description = (String) m.get("description");
if (description.contains("[PerFieldSimilarity]")) {
Double score = (Double) m.get("value");
if (score > 0) {
scorePerField.put(parseField(description), score);
}
}
}
return new DocumentUtil.Nop();
});

return scorePerField.entrySet()
.stream()
.sorted(Map.Entry.comparingByValue(Collections.reverseOrder()))
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue, (o, n) -> n, LinkedHashMap::new));
}

private static String parseField(String description) {
Matcher m = Pattern.compile("^weight\\(.+:((\".+\")|[^ ]+)").matcher(description);
if (m.find()) {
return m.group().replace("weight(", "");
}
return description;
}

private static String makeFindOLink(String iri) {
return Document.getBASE_URI()
.resolve("find?o=" + URLEncoder.encode(iri, StandardCharsets.UTF_8))
Expand Down

0 comments on commit 30390c2

Please sign in to comment.