Skip to content

Commit

Permalink
Merge pull request #1614 from MTG/solr9
Browse files Browse the repository at this point in the history
Solr 9
  • Loading branch information
ffont authored Dec 19, 2023
2 parents 1fff715 + 01f64e5 commit daeba9e
Show file tree
Hide file tree
Showing 55 changed files with 2,813 additions and 3,559 deletions.
2 changes: 1 addition & 1 deletion DEVELOPERS.md
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ If a new search engine backend class is to be implemented, it must closely follo
utils.search.SearchEngineBase docstrings. There is a Django management command that can be used in order to test
the implementation of a search backend. You can run it like:

docker-compose run --rm web python manage.py test_search_engine_backend -fsw --backend utils.search.backends.solr451custom.Solr451CustomSearchEngine
docker-compose run --rm web python manage.py test_search_engine_backend -fsw --backend utils.search.backends.solr9pysolr.Solr9PySolrSearchEngine

Please read carefully the documentation of the management command to better understand how it works and how is it
doing the testing.
Expand Down
14 changes: 13 additions & 1 deletion docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ volumes:
pgdata:
m2home:
solr5data:
solr9data:
fswebhome:

services:
Expand Down Expand Up @@ -52,11 +53,22 @@ services:
image: solr:5.5.5
volumes:
- solr5data:/opt/solr/server/solr/mycores
- ./utils/search/solr5.5.5/cores:/cores
- ./utils/search/solr5.5.5/cores:/opt/solr/server/solr/configsets/
- ./utils/search/solr5.5.5/docker-entrypoint:/docker-entrypoint-initdb.d
ports:
- "${FS_BIND_HOST:-127.0.0.1}:${LOCAL_PORT_PREFIX}8983:8983"

solr9:
build:
context: ./
dockerfile: ./docker/Dockerfile.solr
volumes:
- ./utils/search/solr9/cores:/var/solr/data/configsets
- solr9data:/var/solr
- ./utils/search/solr9/docker-entrypoint:/docker-entrypoint-initdb.d
ports:
- "${FS_BIND_HOST:-127.0.0.1}:${LOCAL_PORT_PREFIX}8984:8983"

# Redis (caches)
redis:
image: redis:alpine
Expand Down
21 changes: 21 additions & 0 deletions docker/Dockerfile.solr
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Freesound development solr image

# This file is needed to fix file permission issues when running solr in docker.
#
# In our test_search_engine_backend command, we use the solr API to create a core from a configSet
# In order to use this API, configSets must be available at $SOLR_HOME/configsets
# however, the official solr Dockerfile defines $SOLR_HOME as /var/solr/data, but only creates /var/solr
# in the Dockerfile.
# When mounting ./utils/search/solr9/cores:/var/solr/data/configsets as a volume in docker-compose.yml,
# docker will create any non-existent directories with an owner of root.
# In this case, /var/solr/data/ doesn't yet exist so it gets created as owned by root when mounting
# configSets as /var/solr/data/configsets in the container.
# This means that the precreate-core script (running as the solr user, uid 8983) is unable to write a new core to
# the data directory, as it's owned by root.
#
# By creating /var/solr/data ahead of time owned by the solr user, docker won't set the owner to root, allowing
# the setup scripts to work and configSets to be used.

FROM solr:9.2.1

RUN mkdir /var/solr/data
24 changes: 0 additions & 24 deletions docker/search/src/main/resources/log4j.properties

This file was deleted.

8 changes: 8 additions & 0 deletions forum/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,14 @@ def forum_pre_save_set_slug(sender, instance, **kwargs):
instance.name_slug = slugify(instance.name)


@receiver(pre_save, sender=Forum)
def forum_pre_save_set_slug(sender, instance, **kwargs):
"""If a forum has a name set but not a slug, automatically generate the slug
"""
if not instance.id and not instance.name_slug:
instance.name_slug = slugify(instance.name)


class Thread(models.Model):
forum = models.ForeignKey(Forum, on_delete=models.CASCADE)
author = models.ForeignKey(User, on_delete=models.CASCADE)
Expand Down
5 changes: 2 additions & 3 deletions freesound/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -618,9 +618,8 @@
SEARCH_FORUM_SORT_DEFAULT = SEARCH_FORUM_SORT_OPTION_THREAD_DATE_FIRST

SEARCH_ENGINE_BACKEND_CLASS = 'utils.search.backends.solr555pysolr.Solr555PySolrSearchEngine'
SOLR5_SOUNDS_URL = "http://search:8983/solr/freesound/"
SOLR5_FORUM_URL = "http://search:8983/solr/forum/"

SOLR5_BASE_URL = "http://search:8983/solr"
SOLR9_BASE_URL = "http://solr9:8983/solr"

# -------------------------------------------------------------------------------
# Similarity client settings
Expand Down
2 changes: 1 addition & 1 deletion search/management/commands/reindex_search_engine_forum.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,6 @@ def handle(self, *args, **options):
# passed, this bit of code should remove no posts.
indexed_post_ids = get_all_post_ids_from_search_engine()
post_ids_to_delete = list(set(indexed_post_ids).difference(all_posts.values_list('id', flat=True)))
console_logger.info("Deleting %d non-existing posts form the search engine", len(post_ids_to_delete))
console_logger.info("Deleting %d non-existing posts from the search engine", len(post_ids_to_delete))
if post_ids_to_delete:
delete_posts_from_search_engine(post_ids_to_delete)
6 changes: 2 additions & 4 deletions search/management/commands/reindex_search_engine_sounds.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,6 @@ def add_arguments(self, parser):
def handle(self, *args, **options):
# Get all indexed sound IDs and remove them
clear_index = options['clear_index']
indexed_sound_ids = None
if clear_index:
delete_all_sounds_from_search_engine()

Expand All @@ -68,9 +67,8 @@ def handle(self, *args, **options):
# Delete all sounds in the search engine which are not found in the DB. This part of code is to make sure that
# no "leftover" sounds remain in the search engine, but should normally do nothing, specially if the
# "clear_index" option is passed
if indexed_sound_ids is None:
indexed_sound_ids = get_all_sound_ids_from_search_engine()
indexed_sound_ids = get_all_sound_ids_from_search_engine()
sound_ids_to_delete = list(set(indexed_sound_ids).difference(sounds_to_index_ids))
console_logger.info("Deleting %d non-existing sounds form the search engine", len(sound_ids_to_delete))
console_logger.info("Deleting %d non-existing sounds from the search engine", len(sound_ids_to_delete))
if sound_ids_to_delete:
delete_sounds_from_search_engine(sound_ids_to_delete)
55 changes: 46 additions & 9 deletions search/management/commands/test_search_engine_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

from django.conf import settings
from django.core.management.base import BaseCommand
import requests

from utils.search import get_search_engine
from utils.search.backends.test_search_engine_backend import TestSearchEngineBackend
Expand All @@ -34,10 +35,30 @@
global_output_file = None


def core_exists(solr_base_url, core_name):
r = requests.get(f'{solr_base_url}/admin/cores?action=STATUS&core={core_name}')
r.raise_for_status()
try:
status = r.json()
return status['status'][core_name] != {}
except ValueError:
# Solr 5 returns xml. "Empty list" means that the core does not exist
return f"""<lst name="{core_name}"/></lst>""" not in r.text


def create_core(solr_base_url, core_name, configSet, delete_core=False):
if core_exists(solr_base_url, core_name):
if delete_core:
requests.get(f'{solr_base_url}/admin/cores?action=UNLOAD&core={core_name}&deleteInstanceDir=true')
else:
raise Exception(f"Core {core_name} already exists, use --force to delete it.")
requests.get(f'{solr_base_url}/admin/cores?action=CREATE&name={core_name}&configSet={configSet}')


class Command(BaseCommand):
help = 'Test a search engine backend and output test results. To run these tests, a search engine backend is' \
'expected to be running with some sounds/forum posts indexed in accordance to Sound and Post objects' \
'from the database. The Freesound development data will work nicely with these tests. After running the' \
help = 'Test a search engine backend and output test results. To run these tests, a search engine backend is ' \
'expected to be running. A new core is created for these tests and is populated with some with some ' \
'sounds/forum posts. The Freesound development data will work nicely with these tests. After running the' \
'tests, DB contents will not be changed, but it could happen that the search engine index is not left' \
'in the exact same state. Therefore, this command SHOULD NOT be run in a production database.' \
'' \
Expand All @@ -53,7 +74,14 @@ def add_arguments(self, parser):
action='store',
dest='backend_class',
default=settings.SEARCH_ENGINE_BACKEND_CLASS,
help='Path to the backend class to test, eg: utils.search.backends.solr451custom.Solr451CustomSearchEngine')
help='Path to the backend class to test, eg: utils.search.backends.solr9pysolr.Solr9PySolrSearchEngine')

parser.add_argument(
'--force',
action='store_true',
dest='force_create_core',
default=False,
help='Test sound-related methods of the SearchEngine')

parser.add_argument(
'-s', '--sound_methods',
Expand All @@ -79,29 +107,38 @@ def add_arguments(self, parser):
def handle(self, *args, **options):

if not settings.DEBUG:
raise Exception('Running search engine tests in a production deployment. This should not be done as'
'running these tests will modify the contents of the production search engine index'
raise Exception('Running search engine tests in a production deployment. This should not be done as '
'running these tests will modify the contents of the production search engine index '
'and leave it in a "wrong" state.')

# Instantiate search engine
try:
search_engine = get_search_engine(backend_class=options['backend_class'])
search_engine = get_search_engine(
backend_class=options['backend_class']
)
except ValueError:
raise Exception('Wrong backend name format. Should be a path like '
'utils.search.backends.solr451custom.Solr451CustomSearchEngine')
'utils.search.backends.solr9pysolr.Solr9PySolrSearchEngine')
except ImportError as e:
raise Exception(f'Backend class to test could not be imported: {e}')

console_logger.info(f"Testing search engine backend: {options['backend_class']}")
backend_name = options['backend_class']
write_output = options['write_output']

# Create the engine above to get the base url for that engine and check that the given class exists.
# Then create temporary cores using this base url and re-create the engine with these core urls.
create_core(search_engine.solr_base_url, "engine_test_freesound", "freesound", delete_core=options['force_create_core'])
create_core(search_engine.solr_base_url, "engine_test_forum", "forum", delete_core=options['force_create_core'])
sounds_index_url = f'{search_engine.solr_base_url}/engine_test_freesound'
forum_index_url = f'{search_engine.solr_base_url}/engine_test_forum'

if not options['sound_methods'] and not options['forum_methods']:
console_logger.info('None of sound methods or forum methods were selected, so nothing will be tested. '
'Use the -s, -f or both options to test sound and/or forum methods.')


backend_test = TestSearchEngineBackend(backend_name, write_output)
backend_test = TestSearchEngineBackend(backend_name, write_output, sounds_index_url=sounds_index_url, forum_index_url=forum_index_url)
if options['sound_methods']:
backend_test.test_search_enginge_backend_sounds()

Expand Down
6 changes: 4 additions & 2 deletions utils/search/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,18 +23,20 @@
from django.conf import settings


def get_search_engine(backend_class=settings.SEARCH_ENGINE_BACKEND_CLASS):
def get_search_engine(backend_class=settings.SEARCH_ENGINE_BACKEND_CLASS, sounds_index_url=None, forum_index_url=None):
"""Return SearchEngine class instance to carry out search engine actions
Args:
backend_class: path to the search engine backend class (defaults to settings.SEARCH_ENGINE_BACKEND_CLASS)
sounds_index_url: url of the sounds index in solr. If not set, use the default URL for the backend
forum_index_url: url of the forum index in solr. If not set, use the default URL for the backend
Returns:
utils.search.SearchEngineBase: search engine backend class instance
"""
module_name, class_name = backend_class.rsplit('.', 1)
module = importlib.import_module(module_name)
return getattr(module, class_name)()
return getattr(module, class_name)(sounds_index_url, forum_index_url)


class SearchResults:
Expand Down
Loading

0 comments on commit daeba9e

Please sign in to comment.