diff --git a/rest/src/main/groovy/whelk/rest/api/SearchUtils.groovy b/rest/src/main/groovy/whelk/rest/api/SearchUtils.groovy index 499a7f9e1f..3e8654c063 100644 --- a/rest/src/main/groovy/whelk/rest/api/SearchUtils.groovy +++ b/rest/src/main/groovy/whelk/rest/api/SearchUtils.groovy @@ -16,9 +16,11 @@ import whelk.search.ElasticFind import whelk.search.RangeParameterPrefix import whelk.util.DocumentUtil +import static whelk.search.ESQuery.Connective.AND +import static whelk.search.ESQuery.Connective.OR + @Log class SearchUtils { - final static int DEFAULT_LIMIT = 200 final static int MAX_LIMIT = 4000 final static int DEFAULT_OFFSET = 0 @@ -165,10 +167,16 @@ class SearchUtils { return item } } - + + def multiSelectable = ESQuery.multiSelectFacets(queryParameters) def aggregations = ((Map) esResult['aggregations']) - def selectedFacets = ((Map) mappingsAndPageParams.v2) + def selectedFacets = ((Map) mappingsAndPageParams.v2) + + // Filter out already selected facets (if not in multi-select group) selectedFacets.each { k, v -> + if (k in multiSelectable) { + return + } k = stripPrefix((String) k, ESQuery.AND_PREFIX) k = stripPrefix((String) k, ESQuery.OR_PREFIX) ((List) aggregations[k]?['buckets'])?.removeIf { it['key'] in v } @@ -178,7 +186,8 @@ class SearchUtils { if (addStats == null || (addStats == 'true' || addStats == 'on')) { stats = buildStats(lookup, aggregations, makeFindUrl(SearchType.ELASTIC, stripNonStatsParams(pageParams)), - (total > 0 && !predicates) ? reverseObject : null) + (total > 0 && !predicates) ? reverseObject : null, + multiSelectable.collectEntries{ [(it) : selectedFacets[it] ?: []] } as Map) } if (!stats) { log.debug("No stats found for query: ${queryParameters}") @@ -341,11 +350,11 @@ class SearchUtils { * Build aggregation statistics for ES result. * */ - private Map buildStats(Lookup lookup, Map aggregations, String baseUrl, String reverseObject) { - return addSlices(lookup, [:], aggregations, baseUrl, reverseObject) + private Map buildStats(Lookup lookup, Map aggregations, String baseUrl, String reverseObject, Map multiSelected) { + return addSlices(lookup, [:], aggregations, baseUrl, reverseObject, multiSelected) } - private Map addSlices(Lookup lookup, Map stats, Map aggregations, String baseUrl, String reverseObject) { + private Map addSlices(Lookup lookup, Map stats, Map aggregations, String baseUrl, String reverseObject, Map multiSelected) { Map sliceMap = aggregations.inject([:]) { acc, key, aggregation -> String baseUrlForKey = removeWildcardForKey(baseUrl, key) List observations = [] @@ -354,13 +363,30 @@ class SearchUtils { aggregation['buckets'].each { bucket -> String itemId = bucket['key'] - String searchPageUrl = "${baseUrlForKey}&${ESQuery.AND_PREFIX}${makeParam(key, itemId)}" - Map observation = ['totalItems': bucket.getAt('doc_count'), - 'view': [(JsonLd.ID_KEY): searchPageUrl], - 'object': lookup.chip(itemId)] - - observations << observation + if (key in multiSelected.keySet()) { + String param = makeParam(key, itemId) + boolean isSelected = escapeQueryParam(itemId) in multiSelected[key] + String searchPageUrl = isSelected + ? baseUrlForKey.replace("&${param}", '') // FIXME: generate up-link in a cleaner way + : "${baseUrlForKey}&${param}" + + Map observation = ['totalItems': bucket.getAt('doc_count'), + 'view': [(JsonLd.ID_KEY): searchPageUrl], + '_selected': isSelected, + 'object': lookup.chip(itemId)] + + observations << observation + } + else { + String searchPageUrl = "${baseUrlForKey}&${ESQuery.AND_PREFIX}${makeParam(key, itemId)}" + + Map observation = ['totalItems': bucket.getAt('doc_count'), + 'view': [(JsonLd.ID_KEY): searchPageUrl], + 'object': lookup.chip(itemId)] + + observations << observation + } } if (observations) { @@ -803,7 +829,8 @@ class SearchUtils { } String key = path.join('.') int limit = slice['itemLimit'] - statsfind[key] = ['sort': 'value', 'sortOrder': 'desc', 'size': limit] + def connective = slice['connective']?.equals(OR.toString()) ? OR : AND + statsfind[key] = ['sort': 'value', 'sortOrder': 'desc', 'size': limit, 'connective': connective] } return statsfind } diff --git a/rest/src/test/groovy/whelk/rest/api/SearchUtilsSpec.groovy b/rest/src/test/groovy/whelk/rest/api/SearchUtilsSpec.groovy index 1383d7a429..081c64ca40 100644 --- a/rest/src/test/groovy/whelk/rest/api/SearchUtilsSpec.groovy +++ b/rest/src/test/groovy/whelk/rest/api/SearchUtilsSpec.groovy @@ -6,6 +6,10 @@ import spock.lang.Specification import whelk.JsonLd import whelk.exception.InvalidQueryException import whelk.rest.api.SearchUtils.SearchType +import whelk.search.ESQuery + +import static whelk.search.ESQuery.Connective.AND +import static whelk.search.ESQuery.Connective.OR class SearchUtilsSpec extends Specification { @@ -313,17 +317,17 @@ class SearchUtilsSpec extends Specification { def slices = [ [ "dimensionChain": [["inverseOfTerm": "itemOf"], "heldBy"], "itemLimit": 1000 ], [ "dimensionChain": ["instanceOf", "language"], "itemLimit": 100 ], - [ "dimensionChain": ["publication", "year"], "itemLimit": 500 ], + [ "dimensionChain": ["publication", "year"], "itemLimit": 500, connective: "OR" ], [ "dimensionChain": ["rdf:type"], "itemLimit": 100 ], [ "dimensionChain": ["inScheme"], "itemLimit": 100 ], ] def expected = [ - // NOTE: the .@id should be added if the leafs are defined as ObjectProperty - '@reverse.itemOf.heldBy': [sort: 'value', sortOrder: 'desc', size:1000], // .@id - 'instanceOf.language': [sort: 'value', sortOrder: 'desc', size:100], // .@id - 'publication.year': [sort: 'value', sortOrder: 'desc', size:500], - '@type': [sort: 'value', sortOrder: 'desc', size:100], - 'inScheme': [sort: 'value', sortOrder: 'desc', size:100], // .@id + // NOTE: the .@id should be added if the leafs are defined as ObjectProperty + '@reverse.itemOf.heldBy': [sort: 'value', sortOrder: 'desc', size:1000, connective: AND], // .@id + 'instanceOf.language': [sort: 'value', sortOrder: 'desc', size:100, connective: AND], // .@id + 'publication.year': [sort: 'value', sortOrder: 'desc', size:500, connective: OR], + '@type': [sort: 'value', sortOrder: 'desc', size:100, connective: AND], + 'inScheme': [sort: 'value', sortOrder: 'desc', size:100, connective: AND], // .@id ] expect: search.buildStatsReprFromSliceSpec(slices) == expected diff --git a/whelk-core/src/main/groovy/whelk/search/ESQuery.groovy b/whelk-core/src/main/groovy/whelk/search/ESQuery.groovy index 6135b51c94..f3cfd8dfcd 100644 --- a/whelk-core/src/main/groovy/whelk/search/ESQuery.groovy +++ b/whelk-core/src/main/groovy/whelk/search/ESQuery.groovy @@ -7,7 +7,6 @@ import groovy.transform.TypeCheckingMode import groovy.util.logging.Log4j2 as Log import whelk.JsonLd import whelk.Whelk -import whelk.component.ElasticSearch import whelk.exception.InvalidQueryException import whelk.util.DocumentUtil import whelk.util.Unicode @@ -18,6 +17,11 @@ import static whelk.util.Jackson.mapper @CompileStatic @Log class ESQuery { + enum Connective { + AND, + OR + } + private Whelk whelk private JsonLd jsonld private Set keywordFields @@ -34,6 +38,8 @@ class ESQuery { private static final String NOT_PREFIX = 'not-' private static final String EXISTS_PREFIX = 'exists-' + private static final String FILTERED_AGG = 'a' + private static final Map recordsOverCacheRecordsBoost = [ 'bool': ['should': [ ['constant_score': [ @@ -88,7 +94,7 @@ class ESQuery { @CompileStatic(TypeCheckingMode.SKIP) Map doQuery(Map queryParameters, suggest = null) { Map esQuery = getESQuery(queryParameters, suggest) - Map esResponse = hideKeywordFields(whelk.elastic.query(esQuery)) + Map esResponse = hideKeywordFields(moveFilteredAggregationsToTopLevel(whelk.elastic.query(esQuery))) if ('esQuery' in queryParameters.get('_debug')) { esResponse._debug = [esQuery: esQuery] } @@ -98,7 +104,7 @@ class ESQuery { @CompileStatic(TypeCheckingMode.SKIP) Map doQueryIds(Map queryParameters) { Map esQuery = getESQuery(queryParameters) - Map esResponse = hideKeywordFields(whelk.elastic.queryIds(esQuery)) + Map esResponse = hideKeywordFields(moveFilteredAggregationsToTopLevel(whelk.elastic.queryIds(esQuery))) if ('esQuery' in queryParameters.get('_debug')) { esResponse._debug = [esQuery: esQuery] } @@ -122,6 +128,7 @@ class ESQuery { List siteFilter // any k=v param - FILTER query (same key => OR, different key => AND) List filters + Map multiSelectFilters if (suggest && !whelk.jsonld.locales.contains(suggest)) { throw new InvalidQueryException("Parameter '_suggest' value '${suggest}' invalid, must be one of ${whelk.jsonld.locales}") @@ -136,13 +143,13 @@ class ESQuery { if (queryParameters.containsKey('o')) { queryParameters.put('_links', queryParameters.get('o')) } - + q = Unicode.normalizeForSearch(getQueryString(queryParameters)) (limit, offset) = getPaginationParams(queryParameters) sortBy = getSortClauses(queryParameters) siteFilter = getSiteFilter(queryParameters) - aggQuery = getAggQuery(queryParameters) - filters = getFilters(queryParameters) + (filters, multiSelectFilters) = getFilters(queryParameters) + aggQuery = getAggQuery(queryParameters, multiSelectFilters) def isSimple = isSimple(q) String queryMode = isSimple ? 'simple_query_string' : 'query_string' @@ -270,6 +277,10 @@ class ESQuery { query['aggs'] = aggQuery } + if (multiSelectFilters) { + query['post_filter'] = ['bool': ['must' : multiSelectFilters.values()]] + } + query['track_total_hits'] = true return query @@ -515,10 +526,11 @@ class ESQuery { * */ @CompileStatic(TypeCheckingMode.SKIP) - List getFilters(Map queryParameters) { + Tuple2 getFilters(Map queryParameters) { def queryParametersCopy = new HashMap<>(queryParameters) List filters = [] List filtersForNot = [] + Map multiSelectFilters = [:] def (handledParameters, rangeFilters) = makeRangeFilters(queryParametersCopy) handledParameters.each {queryParametersCopy.remove(it)} @@ -546,8 +558,15 @@ class ESQuery { } } - getOrGroups(notNested).each { m -> - filters << createBoolFilter(m) + Set multiSelectable = multiSelectFacets(queryParameters) + getOrGroups(notNested).each { Map m -> + if (m.size() == 1 && m.keySet().first() in multiSelectable) { + multiSelectFilters[m.keySet().first()] = createBoolFilter(m) + } + else { + filters << createBoolFilter(m) + } + } notNestedGroupsForNot.each { m -> filtersForNot << createBoolFilter(m) @@ -561,11 +580,7 @@ class ESQuery { allFilters['must_not'] = filtersForNot } - if (allFilters) { - return [['bool': allFilters]] - } else { - return null - } + return new Tuple2(allFilters ? [['bool': allFilters]] : null, multiSelectFilters) } private getPrefixIfExists(String key) { @@ -771,25 +786,20 @@ class ESQuery { * */ @CompileStatic(TypeCheckingMode.SKIP) - Map getAggQuery(Map queryParameters) { - if (!('_statsrepr' in queryParameters)) { + Map getAggQuery(Map queryParameters, Map multiSelectFilters) { + Map statsrepr = getStatsRepr(queryParameters) + if (statsrepr.isEmpty()) { Map defaultQuery = [(JsonLd.TYPE_KEY): ['terms': ['field': JsonLd.TYPE_KEY]]] return defaultQuery } - - Map statsrepr = mapper.readValue(queryParameters.get('_statsrepr')[0], Map) - - return buildAggQuery(statsrepr) + return buildAggQuery(statsrepr, multiSelectFilters) } @CompileStatic(TypeCheckingMode.SKIP) - private Map buildAggQuery(def tree, int size=10) { + private Map buildAggQuery(def tree, Map multiSelectFilters, int size=10) { Map query = [:] List keys = [] - // In Python, the syntax for iterating over each item in a - // list and for iterating over each key in a dict is the - // same. That's not the case for Groovy, hence the following if (tree instanceof Map) { keys = tree.keySet() as List } else if (tree instanceof List) { @@ -799,18 +809,38 @@ class ESQuery { keys.each { key -> String sort = tree[key]?.sort =='key' ? '_key' : '_count' def sortOrder = tree[key]?.sortOrder =='asc' ? 'asc' : 'desc' + def filters = multiSelectFilters.findAll { it.key != key }.values() String termPath = getInferredTermPath(key) - query[termPath] = ['terms': [ - 'field': termPath, - 'size': tree[key]?.size ?: size, - 'order': [(sort):sortOrder]]] + + query[termPath] = [ + 'aggs' : [ + (FILTERED_AGG): ['terms': [ + 'field': termPath, + 'size' : tree[key]?.size ?: size, + 'order': [(sort): sortOrder]] + ] + ], + 'filter': ['bool': ['must': filters]] + ] if (tree[key].subItems instanceof Map) { - query[termPath]['aggs'] = buildAggQuery(tree[key].subItems, size) + query[termPath]['aggs'] = buildAggQuery(tree[key].subItems, multiSelectFilters, size) } } return query } + @CompileStatic(TypeCheckingMode.SKIP) + static Set multiSelectFacets(Map queryParameters) { + getStatsRepr(queryParameters).findResults { key, value -> + value['connective'] == Connective.OR.toString() ? key : null + } as Set + } + + @CompileStatic(TypeCheckingMode.SKIP) + private static Map getStatsRepr(Map queryParameters) { + mapper.readValue(queryParameters.get('_statsrepr')?[0] ?: '{}', Map) + } + /** * Construct "range query" filters for parameters prefixed with any {@link RangeParameterPrefix} * Ranges for different parameters are ANDed together @@ -931,6 +961,21 @@ class ESQuery { return result } + static Map moveFilteredAggregationsToTopLevel(Map esResponse) { + if (!esResponse['aggregations']) { + return esResponse + } + + Map aggregations = (Map) esResponse['aggregations'] + aggregations.keySet().each { k -> + if (aggregations[k][FILTERED_AGG]) { + aggregations[k] = aggregations[k][FILTERED_AGG] + } + } + + return esResponse + } + /** * Hide any `.keyword` field in ES response. * diff --git a/whelk-core/src/test/groovy/whelk/search/ESQuerySpec.groovy b/whelk-core/src/test/groovy/whelk/search/ESQuerySpec.groovy index 5dd44f0e2e..43ba18b2c6 100644 --- a/whelk-core/src/test/groovy/whelk/search/ESQuerySpec.groovy +++ b/whelk-core/src/test/groovy/whelk/search/ESQuerySpec.groovy @@ -1,5 +1,6 @@ package whelk.search +import groovy.json.JsonOutput import spock.lang.Specification import whelk.JsonLd @@ -68,8 +69,10 @@ class ESQuerySpec extends Specification { } def "should get filters"(Map params, List result) { + given: + def (filters, postFilters) = es.getFilters(params) expect: - es.getFilters(params) == result + filters == result where: params | result [:] | null @@ -179,22 +182,14 @@ class ESQuerySpec extends Specification { Map emptyAggs = [:] Map emptyAggsResult = [(JsonLd.TYPE_KEY): ['terms': ['field': JsonLd.TYPE_KEY]]] Map simpleAggs = ['_statsrepr': ['{"foo": {"sort": "key", "sortOrder": "desc", "size": 5}}']] - Map simpleAggsResult = ['foo': ['terms': ['field': 'foo', 'size': 5, 'order': ['_key': 'desc']]]] + Map simpleAggsResult = ['foo':['aggs':['a':['terms':['field':'foo', 'size':5, 'order':['_key':'desc']]]], 'filter':['bool':['must':[]]]]] Map subAggs = ['_statsrepr': ['{"bar": {"subItems": {"foo": {"sort": "key"}}}}']] // `bar` has a keyword field in the mappings - Map subAggsResult = ['bar.keyword': ['terms': ['field': 'bar.keyword', - 'size': 10, - 'order': ['_count': 'desc']], - 'aggs': [ - 'foo': ['terms': ['field': 'foo', - 'size': 10, - 'order': ['_key': 'desc']]] - ]]] - + Map subAggsResult = ['bar.keyword':[aggs:[foo:[aggs:[a:[terms:[field:'foo', size:10, order:[_key:'desc']]]], filter:[bool:[must:[]]]]], filter:[bool:[must:[]]]]] then: - es.getAggQuery(emptyAggs) == emptyAggsResult - es.getAggQuery(simpleAggs) == simpleAggsResult - es.getAggQuery(subAggs) == subAggsResult + es.getAggQuery(emptyAggs, [:]) == emptyAggsResult + JsonOutput.toJson(es.getAggQuery(simpleAggs, [:])) == JsonOutput.toJson(simpleAggsResult) + JsonOutput.toJson(es.getAggQuery(subAggs, [:])) == JsonOutput.toJson(subAggsResult) } def "should get keyword fields"() {