From c28af7b33f609a23b3c2c2e60883abd75726ab91 Mon Sep 17 00:00:00 2001 From: Nicola Tarocco Date: Fri, 30 Aug 2024 18:13:14 +0200 Subject: [PATCH] search: enable AND operator and cross_field - enable searching in multiple fields with AND operator --- .github/workflows/tests.yml | 45 ++++++-------- cds_ils/config.py | 17 ++++++ run-tests.sh | 32 ++++++++-- setup.cfg | 2 +- tests/api/test_documents_search.py | 88 +++++++++++++++++++++++++++ tests/conftest.py | 1 - tests/migrator/rules/test_books.py | 2 - tests/migrator/rules/test_journals.py | 3 +- tests/migrator/rules/test_series.py | 3 +- tests/migrator/rules/test_standard.py | 3 +- tests/migrator/test_fields_utils.py | 3 +- 11 files changed, 154 insertions(+), 45 deletions(-) create mode 100644 tests/api/test_documents_search.py diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 56c969058..fe87bb8e6 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2020 CERN. +# Copyright (C) 2024 CERN. # # CDS-ILS is free software; you can redistribute it and/or modify it # under the terms of the MIT License; see LICENSE file for more details. @@ -23,7 +23,7 @@ on: default: "Manual trigger" jobs: - Python_Tests: + Python: runs-on: ubuntu-latest strategy: matrix: @@ -35,54 +35,43 @@ jobs: DB: ${{ matrix.db-service }} SEARCH: ${{ matrix.search-service }} EXTRAS: tests + steps: - - name: Install ldap dependencies + - name: Install LDAP dependencies run: | sudo apt-get update sudo apt-get install libsasl2-dev libldap2-dev libssl-dev - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - - - name: Generate dependencies - run: | - python -m pip install --upgrade pip py setuptools wheel requirements-builder - requirements-builder -e $EXTRAS --level=pypi setup.py > .pypi-${{ matrix.python-version }}-requirements.txt - - - name: Cache pip - uses: actions/cache@v3 - with: - path: ~/.cache/pip - key: ${{ runner.os }}-pip-${{ hashFiles('.pypi-${{ matrix.python-version }}-requirements.txt') }} + cache: pip + cache-dependency-path: setup.cfg - name: Install dependencies run: | - pip install -r .pypi-${{ matrix.python-version }}-requirements.txt - pip install .[$EXTRAS] + pip install ".[$EXTRAS]" pip freeze - docker --version - docker compose --version + docker version - name: Run tests - run: | - ./run-tests.sh + run: ./run-tests.sh - Node_Tests: - runs-on: ubuntu-20.04 + JS: + runs-on: ubuntu-latest steps: - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 - - name: Use Node.js v18.x - uses: actions/setup-node@v3 + - name: Use Node.js v14.x + uses: actions/setup-node@v4 with: - node-version: "18.x" + node-version: "14.x" - name: Install & Build working-directory: ./ui diff --git a/cds_ils/config.py b/cds_ils/config.py index 7b08eb16a..fdbc826d4 100644 --- a/cds_ils/config.py +++ b/cds_ils/config.py @@ -87,7 +87,24 @@ def _parse_env_bool(var_name, default=None): def query_params_modifier(extra_params): """Modifier for parameters to dsl Query function.""" + # The `AND` operator, by default, will return hits where the query tokens are all + # in the same field. `cross_fields` must be applied to enable cross fields searches. extra_params["default_operator"] = "AND" + # The `cross_fields` search type requires all searchable fields to have the same + # analyzers. In the example below, `title` and `authors.full_name` must have the + # same analyzer defined in the mappings. + # + # "query_string": { + # "query": "Theory Schwartz", + # "type": "cross_fields", + # "fields": ["title", "authors.full_name", "*"] + # "analyzer": "custom_analyzer" + # } + # + # Explanation: + # "((+title:theory +authors.full_name:theory) | + # (+authors.full_name:schwartz +title:schwartz))" + extra_params["type"] = "cross_fields" ############################################################################### diff --git a/run-tests.sh b/run-tests.sh index e93f7332e..a784cac84 100755 --- a/run-tests.sh +++ b/run-tests.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash # -*- coding: utf-8 -*- # -# Copyright (C) 2019-2020 CERN. +# Copyright (C) 2019-2024 CERN. # # CDS-ILS is free software; you can redistribute it and/or modify it under # the terms of the MIT License; see LICENSE file for more details. @@ -12,13 +12,35 @@ set -o errexit # Quit on unbound symbols set -o nounset -# Always bring down docker services -function cleanup() { +# Define function for bringing down services +function cleanup { eval "$(docker-services-cli down --env)" } -trap cleanup EXIT + +# Check for arguments +# Note: "-k" would clash with "pytest" +keep_services=0 +pytest_args=() +for arg in $@; do + # from the CLI args, filter out some known values and forward the rest to "pytest" + # note: we don't use "getopts" here b/c of some limitations (e.g. long options), + # which means that we can't combine short options (e.g. "./run-tests -Kk pattern") + case ${arg} in + -K|--keep-services) + keep_services=1 + ;; + *) + pytest_args+=( ${arg} ) + ;; + esac +done + +if [[ ${keep_services} -eq 0 ]]; then + trap cleanup EXIT +fi + python -m check_manifest eval "$(docker-services-cli up --db ${DB:-postgresql} --search ${SEARCH:-opensearch2} --cache ${CACHE:-redis} --env)" -python -m pytest +python -m pytest ${pytest_args[@]+"${pytest_args[@]}"} tests_exit_code=$? exit "$tests_exit_code" diff --git a/setup.cfg b/setup.cfg index e35ce941d..ae1c4e6cf 100644 --- a/setup.cfg +++ b/setup.cfg @@ -27,7 +27,7 @@ zip_safe = False install_requires = fuzzywuzzy>=0.18.0 python-ldap>=3.4.0,<3.5.0 - invenio-app-ils[lorem,opensearch2,postgresql]==4.0.0rc1 + invenio-app-ils[lorem,opensearch2]==4.1.0 sentry-sdk>=0.10.2 # migrator deps cds-dojson==0.9.0 diff --git a/tests/api/test_documents_search.py b/tests/api/test_documents_search.py new file mode 100644 index 000000000..2b6aa8d7a --- /dev/null +++ b/tests/api/test_documents_search.py @@ -0,0 +1,88 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2024 CERN. +# +# CDS-ILS is free software; you can redistribute it and/or modify it under +# the terms of the MIT License; see LICENSE file for more details. + +"""Tests search OR/AND operators. + +These tests asserts that when using the AND operator, the `cross_fields` +parameter is required. The `Dreams` word is contained in the `title`, the +`Caroline` word in the author. +The assumption here is that the documents index has the same analyzers +for all searchable fields. +""" + +from copy import deepcopy + +import pytest +from flask import url_for + +from cds_ils import config + + +@pytest.fixture(scope="module") +def app_config_or(app_config): + """Change config to use the search OR operator.""" + + def and_operator(extra_params={}): + extra_params["default_operator"] = "OR" + + app_config["RECORDS_REST_ENDPOINTS"] = deepcopy(config.RECORDS_REST_ENDPOINTS) + app_config["RECORDS_REST_ENDPOINTS"]["docid"]["search_query_parser"] = and_operator + return app_config + + +def test_search_or_operator(app_config_or, app, client, testdata, json_headers): + """Test searching using OR operator.""" + url = url_for("invenio_records_rest.docid_list") + # test that searching with OR operator will find the document + response = client.get(f"{url}?q=Dreams%20Caroline", headers=json_headers) + assert response.status_code == 200 + assert response.get_json()["hits"]["total"] == 1 + + +@pytest.fixture(scope="module") +def app_config_and(app_config): + """Change config to use the search AND operator.""" + + def and_operator(extra_params={}): + extra_params["default_operator"] = "AND" + + app_config["RECORDS_REST_ENDPOINTS"] = deepcopy(config.RECORDS_REST_ENDPOINTS) + app_config["RECORDS_REST_ENDPOINTS"]["docid"]["search_query_parser"] = and_operator + return app_config + + +def test_search_and_operator(app_config_only_and, app, client, testdata, json_headers): + """Test searching using AND operator.""" + url = url_for("invenio_records_rest.docid_list") + # test that searching with AND operator without cross-fields will return 0 results + response = client.get(f"{url}?q=Dreams%20Caroline", headers=json_headers) + assert response.status_code == 200 + assert response.get_json()["hits"]["total"] == 0 + + +@pytest.fixture(scope="module") +def app_config_and_cross_fields(app_config): + """Change config to use the search AND operator and cross_fields.""" + + def and_operator(extra_params={}): + extra_params["default_operator"] = "AND" + extra_params["type"] = "cross_fields" + + app_config["RECORDS_REST_ENDPOINTS"] = deepcopy(config.RECORDS_REST_ENDPOINTS) + app_config["RECORDS_REST_ENDPOINTS"]["docid"]["search_query_parser"] = and_operator + return app_config + + +def test_search_and_operator( + app_config_and_cross_fields, app, client, testdata, json_headers +): + """Test searching using AND operator.""" + url = url_for("invenio_records_rest.docid_list") + # test that searching with AND operator with cross-fields will return the result + response = client.get(f"{url}?q=Dreams%20Caroline", headers=json_headers) + assert response.status_code == 200 + assert response.get_json()["hits"]["total"] == 1 diff --git a/tests/conftest.py b/tests/conftest.py index cc53e9499..aa3702b27 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -6,7 +6,6 @@ # the terms of the MIT License; see LICENSE file for more details. """Common pytest fixtures and plugins.""" -from __future__ import absolute_import, print_function import os diff --git a/tests/migrator/rules/test_books.py b/tests/migrator/rules/test_books.py index 8fd9a0393..9a04f322b 100644 --- a/tests/migrator/rules/test_books.py +++ b/tests/migrator/rules/test_books.py @@ -18,8 +18,6 @@ # 59 Temple Place, Suite 330, Boston, MA 02D111-1307, USA. """Book fields tests.""" -from __future__ import absolute_import, print_function, unicode_literals - import datetime import pytest diff --git a/tests/migrator/rules/test_journals.py b/tests/migrator/rules/test_journals.py index 6509b9dee..d5553cb23 100644 --- a/tests/migrator/rules/test_journals.py +++ b/tests/migrator/rules/test_journals.py @@ -16,9 +16,8 @@ # You should have received a copy of the GNU General Public License # along with Invenio; if not, write to the Free Software Foundation, Inc., # 59 Temple Place, Suite 330, Boston, MA 02D111-1307, USA. -"""Journal fields tests.""" -from __future__ import absolute_import, print_function, unicode_literals +"""Journal fields tests.""" import pytest from cds_dojson.marc21.utils import create_record diff --git a/tests/migrator/rules/test_series.py b/tests/migrator/rules/test_series.py index dbbc5d15d..b04473cd6 100644 --- a/tests/migrator/rules/test_series.py +++ b/tests/migrator/rules/test_series.py @@ -4,9 +4,8 @@ # # CDS-ILS is free software; you can redistribute it and/or modify it under # the terms of the MIT License; see LICENSE file for more details. -"""CDS-ILS Test series fields.""" -from __future__ import absolute_import, print_function, unicode_literals +"""CDS-ILS Test series fields.""" import pytest from cds_dojson.marc21.utils import create_record diff --git a/tests/migrator/rules/test_standard.py b/tests/migrator/rules/test_standard.py index 78b2dcbcf..c9a4ab1cd 100644 --- a/tests/migrator/rules/test_standard.py +++ b/tests/migrator/rules/test_standard.py @@ -4,9 +4,8 @@ # # CDS-ILS is free software; you can redistribute it and/or modify it under # the terms of the MIT License; see LICENSE file for more details. -"""Standard fields tests.""" -from __future__ import absolute_import, print_function, unicode_literals +"""Standard fields tests.""" from cds_dojson.marc21.utils import create_record from flask import current_app diff --git a/tests/migrator/test_fields_utils.py b/tests/migrator/test_fields_utils.py index 830dadf2b..9cd0c80d1 100644 --- a/tests/migrator/test_fields_utils.py +++ b/tests/migrator/test_fields_utils.py @@ -4,9 +4,8 @@ # # CDS-ILS is free software; you can redistribute it and/or modify it under # the terms of the MIT License; see LICENSE file for more details. -"""CDS-ILS fields utilities.""" -from __future__ import absolute_import +"""CDS-ILS fields utilities.""" import pytest from dojson.errors import IgnoreKey