From 91ece985202769b992d0f061b6e36053c4cd20ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ben=20St=C3=A4hli?= Date: Mon, 22 Jan 2024 17:48:28 +0100 Subject: [PATCH] initial --- .github/workflows/ci.yml | 59 ++++++++ .github/workflows/release.yml | 28 ++++ .gitignore | 63 +++++++++ AUTHORS | 7 + CHANGELOG.txt | 2 + DESCRIPTION | 2 + LICENSE | 21 +++ MANIFEST.in | 10 ++ README.md | 127 ++++++++++++++++++ manage.py | 12 ++ postgres_searchindex/__init__.py | 0 postgres_searchindex/admin.py | 19 +++ postgres_searchindex/apps.py | 11 ++ postgres_searchindex/base.py | 62 +++++++++ postgres_searchindex/conf.py | 5 + postgres_searchindex/contrib/__init__.py | 0 .../contrib/djangocms/__init__.py | 0 .../contrib/djangocms/base.py | 33 +++++ .../contrib/djangocms/cms_apps.py | 14 ++ .../contrib/djangocms/helpers.py | 73 ++++++++++ .../contrib/djangocms/index_sources.py | 52 +++++++ postgres_searchindex/exceptions.py | 6 + postgres_searchindex/management/__init__.py | 0 .../management/commands/__init__.py | 0 .../commands/postgres_searchindex_rebuild.py | 10 ++ .../commands/postgres_searchindex_update.py | 41 ++++++ .../migrations/0001_initial.py | 37 +++++ postgres_searchindex/migrations/__init__.py | 0 postgres_searchindex/models.py | 40 ++++++ postgres_searchindex/source_pool.py | 84 ++++++++++++ .../postgres_searchindex/search.html | 54 ++++++++ postgres_searchindex/urls.py | 7 + postgres_searchindex/views.py | 41 ++++++ release.txt | 83 ++++++++++++ requirements_dev.txt | 11 ++ setup.cfg | 3 + setup.py | 60 +++++++++ tox.ini | 48 +++++++ translations.sh | 5 + 39 files changed, 1130 insertions(+) create mode 100644 .github/workflows/ci.yml create mode 100644 .github/workflows/release.yml create mode 100644 .gitignore create mode 100644 AUTHORS create mode 100644 CHANGELOG.txt create mode 100644 DESCRIPTION create mode 100644 LICENSE create mode 100644 MANIFEST.in create mode 100644 README.md create mode 100755 manage.py create mode 100644 postgres_searchindex/__init__.py create mode 100644 postgres_searchindex/admin.py create mode 100644 postgres_searchindex/apps.py create mode 100644 postgres_searchindex/base.py create mode 100644 postgres_searchindex/conf.py create mode 100644 postgres_searchindex/contrib/__init__.py create mode 100644 postgres_searchindex/contrib/djangocms/__init__.py create mode 100644 postgres_searchindex/contrib/djangocms/base.py create mode 100644 postgres_searchindex/contrib/djangocms/cms_apps.py create mode 100644 postgres_searchindex/contrib/djangocms/helpers.py create mode 100644 postgres_searchindex/contrib/djangocms/index_sources.py create mode 100644 postgres_searchindex/exceptions.py create mode 100644 postgres_searchindex/management/__init__.py create mode 100644 postgres_searchindex/management/commands/__init__.py create mode 100644 postgres_searchindex/management/commands/postgres_searchindex_rebuild.py create mode 100644 postgres_searchindex/management/commands/postgres_searchindex_update.py create mode 100644 postgres_searchindex/migrations/0001_initial.py create mode 100644 postgres_searchindex/migrations/__init__.py create mode 100644 postgres_searchindex/models.py create mode 100644 postgres_searchindex/source_pool.py create mode 100644 postgres_searchindex/templates/postgres_searchindex/search.html create mode 100644 postgres_searchindex/urls.py create mode 100644 postgres_searchindex/views.py create mode 100644 release.txt create mode 100644 requirements_dev.txt create mode 100644 setup.cfg create mode 100644 setup.py create mode 100644 tox.ini create mode 100755 translations.sh diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..5e83fb3 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,59 @@ +name: CI + +on: [push, pull_request, ] + +jobs: + test: + runs-on: ubuntu-latest + strategy: + matrix: + python: [3.8, 3.9, "3.10", "3.11"] + steps: + - uses: actions/checkout@v2 + - name: Setup Python ${{ matrix.python }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python }} + - name: Install Tox and any other packages + run: pip install tox tox-gh-actions + - name: Run Tox + # Run tox using the version of Python in `PATH` + # run: tox -e py${{ matrix.python }}-${{ matrix.django }} + run: tox + + coverage: + runs-on: ubuntu-latest + strategy: + matrix: + python: [3.9] + steps: + - uses: actions/checkout@v2 + - name: Setup Python ${{ matrix.python }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python }} + - name: Install flake8 + run: pip install tox + - name: Run tox with coverage + # Run tox using the version of Python in `PATH` + # run: tox -e py${{ matrix.python }}-${{ matrix.django }} + run: tox -e py39-django32-coverage + + + lint: + runs-on: ubuntu-latest + strategy: + matrix: + python: [3.9] + steps: + - uses: actions/checkout@v2 + - name: Setup Python ${{ matrix.python }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python }} + - name: Install flake8 + run: pip install flake8 + - name: Run flake8 + # Run tox using the version of Python in `PATH` + # run: tox -e py${{ matrix.python }}-${{ matrix.django }} + run: flake8 diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..a8779f5 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,28 @@ +name: Release on PYPI +on: + push: + branches: [main] + + +jobs: + build-and-publish: + runs-on: ubuntu-latest + if: github.repository == 'bnzk/django-postgres-searchindex' + steps: + - name: Checkout + uses: actions/checkout@v2 + - name: Set up Python + uses: actions/setup-python@v2 + - name: Install build dependencies + run: | + python -m pip install --upgrade pip + pip install --upgrade setuptools wheel twine + - name: Build + run: | + python setup.py sdist + python setup.py bdist_wheel --universal + - name: Publish + env: + TWINE_USERNAME: ${{ secrets.PYPI_USER_BNZK }} + TWINE_PASSWORD: ${{ secrets.PYPI_PW_BNZK }} + run: twine upload dist/* diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e7bc9fa --- /dev/null +++ b/.gitignore @@ -0,0 +1,63 @@ +.idea/ +*.egg-info/ +*.pyc +*.coverage +*coverage/ +db.sqlite +dist/ +docs/_build/ + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] + +# C extensions +*.so + +# Distribution / packaging +.Python +env/ +bin/ +build/ +develop-eggs/ +dist/ +eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +*.egg-info/ +.installed.cfg +*.egg + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.cache +nosetests.xml +coverage.xml + +# Translations +*.mo + +# Mr Developer +.mr.developer.cfg +.project +.pydevproject + +# Rope +.ropeproject + +# Django stuff: +*.log +*.pot + +# Sphinx documentation +docs/_build/ + diff --git a/AUTHORS b/AUTHORS new file mode 100644 index 0000000..6e334cc --- /dev/null +++ b/AUTHORS @@ -0,0 +1,7 @@ +Current or previous core committers + +Ben Stähli + +Contributors (in alphabetical order) + +* Your name could stand here :) diff --git a/CHANGELOG.txt b/CHANGELOG.txt new file mode 100644 index 0000000..8cd4949 --- /dev/null +++ b/CHANGELOG.txt @@ -0,0 +1,2 @@ +=== (ongoing) === + diff --git a/DESCRIPTION b/DESCRIPTION new file mode 100644 index 0000000..80e63f3 --- /dev/null +++ b/DESCRIPTION @@ -0,0 +1,2 @@ +Everything in postgres, accessible via Django ORM, using +postgres fullext search capabilites \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..c029d84 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2014 Ben Stähli + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..8f172a6 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,10 @@ +include AUTHORS +include LICENSE +include DESCRIPTION +include CHANGELOG.txt +include README.md +graft postgres_searchindex +global-exclude __pycache__ +global-exclude *.orig *.pyc *.log *.swp +prune postgres_searchindex/tests/coverage +prune postgres_searchindex/.ropeproject diff --git a/README.md b/README.md new file mode 100644 index 0000000..580ecf8 --- /dev/null +++ b/README.md @@ -0,0 +1,127 @@ +# django-postgres-searchindex + +[![CI](https://img.shields.io/github/actions/workflow/status/bnzk/django-postgres-searchindex/ci.yml?style=flat-square&logo=github "CI")](https://github.com/bnzk/django-postgres-searchindex/actions/workflows/ci.yml) +[![Version](https://img.shields.io/pypi/v/django-postgres-searchindex.svg?style=flat-square "Version")](https://pypi.python.org/pypi/django-postgres-searchindex/) +[![Licence](https://img.shields.io/github/license/bnzk/django-postgres-searchindex.svg?style=flat-square "Licence")](https://pypi.python.org/pypi/django-postgres-searchindex/) +[![PyPI Downloads](https://img.shields.io/pypi/dm/django-postgres-searchindex?style=flat-square "PyPi Downloads")](https://pypistats.org/packages/django-postgres-searchindex) + +A bit like django-haystack, but everything in postgres, accessible via Django ORM, using +postgres fullext search capabilites. The goal is to ease setup and +maintainance for smaller and medium sized projects - without dependencies on +search technology like elastic, solr or whoosh. + +During conception, I was thinking about developing a backend for django-haystack, but +decided against, to be able to develop from the ground up, as simple as possible. The +project could still provide a haystack backend one day, but it was just not my priority. + +## Quickstart + +Describe, index, search. + +### Define index(es) in django settings + +Default value, simplest possible configuration: + +``` +POSTGRES_SEARCHINDEX = { + "default": {}, +} +``` + +Example for a multilanguage setup: + +``` +POSTGRES_SEARCHINDEX = { + "de": { + "kwargs": { + "language": "de", + } + }, + "fr": { + "kwargs": { + "language": "fr", + } + }, +} +``` + +More complex configurations could include django's `SITE_ID` or other relevant infos +in searchindex key and kwargs. + +### Define sources + +Example, hopefully self explaining. + +``` +from django.utils.html import strip_tags +from postgres_searchindex.base import IndexSource / MultiLanguageIndexSource +from postgres_searchindex.source_pool import source_pool + +from news.models import News + + +@source_pool.register +class NewsIndexSource(IndexSource / MultiLanguageIndexSource): + model = News + + def get_title(self, obj): + return strip_tags(obj.description) + + def get_content(self, obj): + return strip_tags(obj.description) + + def get_queryset(self): + return self.model.objects.published() +``` + +### Populate the index + +Run `./manage.py postgres_searchindex_update` to update/build the index. + +If you want to control how things get indexed, you can check +your `IndexEntry` instances in Django admin. + +### Search! + +You can now search in your index. You are free to use [Django's builtin fulltext](https://docs.djangoproject.com/en/dev/ref/contrib/postgres/search/) +features as you like - as in the following example, or in a way more advanced manner. + +``` +from django.contrib.postgres.search import SearchVector +from postgres_searchindex.models import IndexEntry + +# this will return entries containing "überhaupt" and "uberhaupt" +IndexEntry.objects.annotate( + search=SearchVector("content", "title", config="german") +).filter(index_key=self.request.LANGUAGE_CODE, search="uberhaupt") + +``` + +There is a full example in the source: `views.py` and `urls.py` will give you an idea. + +To be done: |highlight:query templatefilter, to highlight the serach query in the +search result text. + +### Keep the index fresh + +Either you'll regularly run `./manage.py postgres_searchindex_update`, or you'll +implement a realtime or near realtime solution, with signals, throug the +`POSTGRES_SEARCHINDEX_SIGNAL_PROCESSOR` setting. + +There are ~~two~~ currently one builtin processors: + - `postgres_searchindex.signal_processors.RealtimeSyncedSignalProcessor` + - To be done! `postgres_searchindex.signal_processors.RealtimeCelerySignalProcessor` + +The async signal processor will require you to have celery configured. + + +## TODO + +- properly handle removal of instances +- use trigram search? +- create an index for `content` and title +- instant update index via signals (update/delete models) + - with celery? + - https://www.world-of-knives.ch/de/messershop/m-88-acier-japonais/ + - manage command: INdexEntry.objects.filter(original=None).delete() doesnt work? + check each model... diff --git a/manage.py b/manage.py new file mode 100755 index 0000000..6c87038 --- /dev/null +++ b/manage.py @@ -0,0 +1,12 @@ +#!/usr/bin/env python +import os +import sys + +if __name__ == "__main__": + os.environ.setdefault('DJANGO_SETTINGS_MODULE', + # 'postgres_searchindex.tests.south_settings') + 'postgres_searchindex.tests.settings') + + from django.core.management import execute_from_command_line + + execute_from_command_line(sys.argv) diff --git a/postgres_searchindex/__init__.py b/postgres_searchindex/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/postgres_searchindex/admin.py b/postgres_searchindex/admin.py new file mode 100644 index 0000000..4674942 --- /dev/null +++ b/postgres_searchindex/admin.py @@ -0,0 +1,19 @@ +from django.contrib import admin + +from .models import IndexEntry + + +@admin.register(IndexEntry) +class IndexEntryAdmin(admin.ModelAdmin): + list_display_links = ("title",) + list_display = ( + "index_key", + "title", + "url", + "modified_at", + ) + search_fields = ("title", "content") + list_filter = ( + "index_key", + "content_type", + ) diff --git a/postgres_searchindex/apps.py b/postgres_searchindex/apps.py new file mode 100644 index 0000000..2361458 --- /dev/null +++ b/postgres_searchindex/apps.py @@ -0,0 +1,11 @@ +from django.apps import AppConfig + + +class PostgresSearchIndexConfig(AppConfig): + name = "postgres_searchindex" + + def ready(self): + # discover all search indexes. + from postgres_searchindex.source_pool import source_pool + + source_pool.discover() diff --git a/postgres_searchindex/base.py b/postgres_searchindex/base.py new file mode 100644 index 0000000..a7b0487 --- /dev/null +++ b/postgres_searchindex/base.py @@ -0,0 +1,62 @@ +from django.utils.translation import override + +from postgres_searchindex.models import IndexEntry + +# IndexEntry = apps.get_model("postgres_searchindex", "IndexEntry", require_ready=False) + + +class IndexSource: + def __init__(self, **kwargs): + self.kwargs = kwargs + + def get_queryset(self): + return self.model.objects.all() + + def get_related_query_name(self): + related_query_name = self.model.index_entries.field.related_query_name() + return related_query_name + + def get_data(self, obj): + data = { + "id": obj.id, + "title": self.get_title(obj), + "content": self.get_content(obj), + "url": self.get_url(obj), + } + return data + + def update(self, index_key, obj): + kwargs = {self.get_related_query_name(): obj} + # why not get_or_create? because generic foreign key. + try: + index_entry = IndexEntry.objects.get(index_key=index_key, **kwargs) + except IndexEntry.DoesNotExist: + index_entry = obj.index_entries.create(index_key=index_key) + data = self.get_data(obj) + index_entry.title = data["title"] + index_entry.content = str(data["content"]) + index_entry.url = data["url"] + index_entry.save() + + def get_title(self, obj): + return getattr(obj, "title", "") + + def get_content(self, obj): + return getattr(obj, "content", "") + + def get_url(self, obj): + return obj.get_absolute_url() + + def get_json(self, obj): + pass + + +class MultiLanguageIndexSource(IndexSource): + def __init__(self, **kwargs): + self.language = kwargs.pop("language") + self.kwargs = kwargs + + def get_data(self, obj): + with override(self.language): + data = super().get_data(obj) + return data diff --git a/postgres_searchindex/conf.py b/postgres_searchindex/conf.py new file mode 100644 index 0000000..346e3f1 --- /dev/null +++ b/postgres_searchindex/conf.py @@ -0,0 +1,5 @@ +LANGUAGE_2_PGCONFIG = { + "en": "english", + "de": "german", + "fr": "french", +} diff --git a/postgres_searchindex/contrib/__init__.py b/postgres_searchindex/contrib/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/postgres_searchindex/contrib/djangocms/__init__.py b/postgres_searchindex/contrib/djangocms/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/postgres_searchindex/contrib/djangocms/base.py b/postgres_searchindex/contrib/djangocms/base.py new file mode 100644 index 0000000..3c304e7 --- /dev/null +++ b/postgres_searchindex/contrib/djangocms/base.py @@ -0,0 +1,33 @@ +from cms.models import CMSPlugin + +from postgres_searchindex.contrib.djangocms.helpers import ( + get_plugin_index_content, + get_request, +) + + +class PlaceholderIndexSourceMixin: + """ + helper mixin, for easier indexing of placeholderfields + use MultiLanguageIndexSource as source base class when using this mixin + """ + + def get_content(self, obj): + request = get_request(self.language) + return self.get_placeholder_content(obj, self.language, request) + + def get_plugin_queryset(self, language): + queryset = CMSPlugin.objects.filter(language=language) + return queryset + + def get_placeholder_content(self, obj, language, request): + placeholder = getattr(obj, self.placeholder_field_name, "content") + plugins = self.get_plugin_queryset(language).filter(placeholder=placeholder) + text = "" + for base_plugin in plugins: + text += self.get_plugin_search_text(base_plugin, request) + return text + + def get_plugin_search_text(self, base_plugin, request): + plugin_content = get_plugin_index_content(base_plugin, request) + return plugin_content.strip() diff --git a/postgres_searchindex/contrib/djangocms/cms_apps.py b/postgres_searchindex/contrib/djangocms/cms_apps.py new file mode 100644 index 0000000..2320820 --- /dev/null +++ b/postgres_searchindex/contrib/djangocms/cms_apps.py @@ -0,0 +1,14 @@ +from cms.app_base import CMSApp +from cms.apphook_pool import apphook_pool +from django.utils.translation import gettext_lazy as _ + + +@apphook_pool.register +class SearchAppHook(CMSApp): + name = _("Search Form") + # menus = [CategoryMenu, ] + + def get_urls(self, page=None, language=None, **kwargs): + return [ + "postgres_searchindex.urls", + ] diff --git a/postgres_searchindex/contrib/djangocms/helpers.py b/postgres_searchindex/contrib/djangocms/helpers.py new file mode 100644 index 0000000..59a9d8f --- /dev/null +++ b/postgres_searchindex/contrib/djangocms/helpers.py @@ -0,0 +1,73 @@ +from cms.toolbar.toolbar import CMSToolbar +from django.conf import settings +from django.contrib.auth.models import AnonymousUser +from django.template import Engine, RequestContext +from django.test import RequestFactory +from django.utils.html import strip_tags + + +def _render_plugin(plugin, context, renderer=None): + if renderer: + content = renderer.render_plugin( + instance=plugin, + context=context, + editable=False, + ) + else: + content = plugin.render_plugin(context) + return content + + +def get_plugin_index_content(base_plugin, request): + instance, plugin_type = base_plugin.get_plugin_instance() + + search_fields = getattr(instance, "search_fields", []) + if hasattr(instance, "search_fulltext"): + # check if the plugin instance has search enabled + search_contents = instance.search_fulltext + elif hasattr(base_plugin, "search_fulltext"): + # now check in the base plugin instance (CMSPlugin) + search_contents = base_plugin.search_fulltext + elif hasattr(plugin_type, "search_fulltext"): + # last check in the plugin class (CMSPluginBase) + search_contents = plugin_type.search_fulltext + else: + # disabled if there's search fields defined, + # otherwise it's enabled. + search_contents = not bool(search_fields) + + if search_contents: + context = RequestContext(request) + updates = {} + engine = Engine.get_default() + + for processor in engine.template_context_processors: + updates.update(processor(context.request)) + context.dicts[context._processors_index] = updates + + try: + # django-cms>=3.5 + renderer = request.toolbar.content_renderer + except AttributeError: + # django-cms>=3.4 + renderer = context.get("cms_content_renderer") + + plugin_content = _render_plugin(instance, context, renderer) + plugin_content = strip_tags(plugin_content) + + return plugin_content + + +def get_request(language=None): + """ + Returns a Request instance populated with cms specific attributes. + """ + request_factory = RequestFactory(HTTP_HOST=settings.ALLOWED_HOSTS[0]) + request = request_factory.get("/") + request.session = {} + request.LANGUAGE_CODE = language or settings.LANGUAGE_CODE + # Needed for plugin rendering. + request.current_page = None + request.user = AnonymousUser() + request.toolbar = CMSToolbar(request) + return request diff --git a/postgres_searchindex/contrib/djangocms/index_sources.py b/postgres_searchindex/contrib/djangocms/index_sources.py new file mode 100644 index 0000000..ffd68e9 --- /dev/null +++ b/postgres_searchindex/contrib/djangocms/index_sources.py @@ -0,0 +1,52 @@ +from cms.models import Title +from django.db.models import Q +from django.utils import timezone + +from postgres_searchindex.base import MultiLanguageIndexSource +from postgres_searchindex.contrib.djangocms.base import PlaceholderIndexSourceMixin +from postgres_searchindex.source_pool import source_pool + + +class TitleIndexSource(PlaceholderIndexSourceMixin, MultiLanguageIndexSource): + model = Title + + def get_url(self, obj): + return obj.page.get_absolute_url() + + def get_page_placeholders(self, page): + """ + one day: allow specific configs, to include/exclude placeholders from indexing + """ + return page.placeholders.all() + + def get_placeholder_content(self, obj, language, request): + current_page = obj.page + placeholders = self.get_page_placeholders(current_page) + plugins = self.get_plugin_queryset(language).filter( + placeholder__in=placeholders + ) + text = "" + for base_plugin in plugins: + text = " " + self.get_plugin_search_text(base_plugin, request) + + return text + + def get_queryset(self): + queryset = ( + Title.objects.public() + .filter( + Q(page__publication_date__lt=timezone.now()) + | Q(page__publication_date__isnull=True), + Q(page__publication_end_date__gte=timezone.now()) + | Q(page__publication_end_date__isnull=True), + Q(redirect__exact="") | Q(redirect__isnull=True), + language=self.language, + ) + .select_related("page") + ) + # if GTE_CMS_35: + queryset = queryset.select_related("page__node") + return queryset.distinct() + + +source_pool.register(TitleIndexSource) diff --git a/postgres_searchindex/exceptions.py b/postgres_searchindex/exceptions.py new file mode 100644 index 0000000..05dcd40 --- /dev/null +++ b/postgres_searchindex/exceptions.py @@ -0,0 +1,6 @@ +class SourceAlreadyRegistered(Exception): + pass + + +class SourceNotRegistered(Exception): + pass diff --git a/postgres_searchindex/management/__init__.py b/postgres_searchindex/management/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/postgres_searchindex/management/commands/__init__.py b/postgres_searchindex/management/commands/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/postgres_searchindex/management/commands/postgres_searchindex_rebuild.py b/postgres_searchindex/management/commands/postgres_searchindex_rebuild.py new file mode 100644 index 0000000..e2140ed --- /dev/null +++ b/postgres_searchindex/management/commands/postgres_searchindex_rebuild.py @@ -0,0 +1,10 @@ +from django.core.management import BaseCommand + + +class Command(BaseCommand): + help = "Try send something to Sentry!" + + def handle(self, *args, **options): + from xxx import yyy # noqa + + return diff --git a/postgres_searchindex/management/commands/postgres_searchindex_update.py b/postgres_searchindex/management/commands/postgres_searchindex_update.py new file mode 100644 index 0000000..9c1f8df --- /dev/null +++ b/postgres_searchindex/management/commands/postgres_searchindex_update.py @@ -0,0 +1,41 @@ +from django.conf import settings +from django.core.management import BaseCommand +from django.contrib.contenttypes.models import ContentType + +from postgres_searchindex.models import IndexEntry +from postgres_searchindex.source_pool import source_pool + + +class Command(BaseCommand): + help = "Update/build index" + + def handle(self, *args, **options): + for index_key, index in settings.POSTGRES_SEARCHINDEX.items(): + self.stdout.write("====================================") + self.stdout.write( + f"Updating index \"{index_key}\" with kwargs {index.get('kwargs', {})}" + ) + for source_name, source_cls in source_pool.get_sources().items(): + source = source_cls(**index.get("kwargs", {})) + self.stdout.write( + f"{source.model.__name__}. " + f"Indexing {source.get_queryset().count()} entries" + ) + # index + current_ids = [] + for obj in source.get_queryset(): + source.update(index_key, obj) + current_ids.append(obj.id) + # remove no more existing + content_type = ContentType.objects.get_for_model(source.model) + delete_result = ( + IndexEntry.objects.filter( + index_key=index_key, + content_type=content_type, + ) + .exclude( + object_id__in=current_ids, + ) + .delete() + ) + self.stdout.write(f"> Done. Removed from index: {delete_result[0]}") diff --git a/postgres_searchindex/migrations/0001_initial.py b/postgres_searchindex/migrations/0001_initial.py new file mode 100644 index 0000000..f8614c7 --- /dev/null +++ b/postgres_searchindex/migrations/0001_initial.py @@ -0,0 +1,37 @@ +# Generated by Django 3.2.21 on 2024-01-14 17:21 + +from django.db import migrations, models +import django.db.models.deletion +import django.utils.timezone + + +class Migration(migrations.Migration): + + initial = True + + dependencies = [ + ('contenttypes', '0002_remove_content_type_name'), + ('sites', '0002_alter_domain_unique'), + ] + + operations = [ + migrations.CreateModel( + name='IndexEntry', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('created_at', models.DateTimeField(auto_now_add=True)), + ('modified_at', models.DateTimeField(auto_now=True)), + ('index_key', models.CharField(default='default', max_length=32)), + ('object_id', models.PositiveIntegerField()), + ('original_modified_at', models.DateTimeField(default=django.utils.timezone.now)), + ('title', models.CharField(max_length=1024)), + ('content', models.TextField(default='')), + ('url', models.TextField()), + ('content_type', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='contenttypes.contenttype')), + ('site_id', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, to='sites.site')), + ], + options={ + 'abstract': False, + }, + ), + ] diff --git a/postgres_searchindex/migrations/__init__.py b/postgres_searchindex/migrations/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/postgres_searchindex/models.py b/postgres_searchindex/models.py new file mode 100644 index 0000000..676225f --- /dev/null +++ b/postgres_searchindex/models.py @@ -0,0 +1,40 @@ +from django.contrib.contenttypes.fields import GenericForeignKey +from django.contrib.contenttypes.models import ContentType +from django.db import models +from django.utils import timezone + + +class IndexEntryBase(models.Model): + # meta fields + created_at = models.DateTimeField(auto_now_add=True) + modified_at = models.DateTimeField(auto_now=True) + index_key = models.CharField(max_length=32, default="default") + site_id = models.ForeignKey( + "sites.Site", + null=True, + on_delete=models.SET_NULL, + blank=True, + ) + # reference original + # TODO: rename content_type to original_content_type and object_id to original_id? + content_type = models.ForeignKey(ContentType, on_delete=models.CASCADE) + object_id = models.PositiveIntegerField() + original = GenericForeignKey("content_type", "object_id") + original_modified_at = models.DateTimeField(default=timezone.now) + + # to be filled by apps/models + title = models.CharField( + max_length=1024, + ) + content = models.TextField(default="") + url = models.TextField() + + class Meta: + abstract = True + + def __str__(self): + return self.title + + +class IndexEntry(IndexEntryBase): + pass diff --git a/postgres_searchindex/source_pool.py b/postgres_searchindex/source_pool.py new file mode 100644 index 0000000..1b99158 --- /dev/null +++ b/postgres_searchindex/source_pool.py @@ -0,0 +1,84 @@ +from django.contrib.contenttypes.fields import GenericRelation +from django.core.exceptions import ImproperlyConfigured +from django.utils.module_loading import autodiscover_modules + +from postgres_searchindex.base import IndexSource +from postgres_searchindex.exceptions import ( + SourceAlreadyRegistered, + SourceNotRegistered, +) +from postgres_searchindex.models import IndexEntry + + +class SourcePool: + def __init__(self): + self.sources = {} + self.discovered = False + + def discover(self): + if self.discovered: + return + autodiscover_modules("index_sources") + self.discovered = True + + def clear(self): + self.discovered = False + self.sources = {} + + def register(self, source): + """ + Registers the given search source. + If an source is already registered, this will raise. + """ + if not issubclass(source, IndexSource): + raise ImproperlyConfigured( + "Sources must be subclasses of postgres_searchindex.source.IndexSource," + " %r is not." % source + ) + source_name = source.__name__ + if source_name in self.sources: + raise SourceAlreadyRegistered( + "Cannot register {!r}, an source with this name ({!r}) is already " + "registered.".format(source, source_name) + ) + # add generic relation to model + related_query_name = ( + f"original_{source.model._meta.app_label}_{source.model._meta.model_name}" + ) + source.model.add_to_class( + "index_entries", + GenericRelation( + IndexEntry, + related_query_name=related_query_name, + content_type_field="content_type", + object_id_field="object_id", + ), + ) + # add to registry + self.sources[source_name] = source + return source + + def unregister(self, source): + """ + Unregisters the given source(s). + + If a source isn't already registered, this will raise sourceNotRegistered. + """ + source_name = source.__name__ + if source_name not in self.sources: + raise SourceNotRegistered("The source %r is not registered" % source) + del self.sources[source_name] + + def get_sources(self): + self.discover() + return self.sources + + def get_source(self, name): + """ + Retrieve a source from the cache. + """ + self.discover() + return self.sources[name] + + +source_pool = SourcePool() diff --git a/postgres_searchindex/templates/postgres_searchindex/search.html b/postgres_searchindex/templates/postgres_searchindex/search.html new file mode 100644 index 0000000..50971a9 --- /dev/null +++ b/postgres_searchindex/templates/postgres_searchindex/search.html @@ -0,0 +1,54 @@ +{% extends "base.html" %} +{% load thumbnail %} +{% load static %} + + +{% block content %} + +
+ +
+
+ {{ form.q }} + +
+
+ + {% for object in object_list %} + + +
+
+ {% if object.original.image %} + {% thumbnail object.original.image 300x300 as resized %} + {{ object.title }} + {% endif %} +
+
+

+ {{ object.title }} +

+

+ {{ object.content|truncatechars:150 }} +

+

+ {{ object.url }} +

+
+
+
+ + {% empty %} + {% if request.GET.q %} +
+
+

+ Nichts gefunden! +

+
+
+ {% endif %} + {% endfor %} +
+ +{% endblock %} diff --git a/postgres_searchindex/urls.py b/postgres_searchindex/urls.py new file mode 100644 index 0000000..ff72b51 --- /dev/null +++ b/postgres_searchindex/urls.py @@ -0,0 +1,7 @@ +from django.urls import path + +from .views import SearchView + +urlpatterns = [ + path("", SearchView.as_view(), name="postgres_searchindex_search"), +] diff --git a/postgres_searchindex/views.py b/postgres_searchindex/views.py new file mode 100644 index 0000000..2c27b8d --- /dev/null +++ b/postgres_searchindex/views.py @@ -0,0 +1,41 @@ +from django import forms +from django.contrib.postgres.search import SearchVector +from django.views.generic import ListView +from textblocks.utils import textblock_lazy as _t + +from . import conf +from postgres_searchindex.models import IndexEntry + + +class SearchForm(forms.Form): + q = forms.CharField( + widget=forms.TextInput(attrs={"placeholder": _t("Suchbegriff")}) + ) + + +class SearchView(ListView): + model = IndexEntry + template_name = "postgres_searchindex/search.html" + + def dispatch(self, request, *args, **kwargs): + self.form = SearchForm(self.request.GET) + return super().dispatch(request, *args, **kwargs) + + def get_context_data(self): + c = super().get_context_data() + c["form"] = self.form + return c + + def get_queryset(self): + if self.form.is_valid(): + q = self.form.cleaned_data["q"] + config = conf.LANGUAGE_2_PGCONFIG.get(self.request.LANGUAGE_CODE, "english") + print(config) + return IndexEntry.objects.annotate( + search=SearchVector( + "content", + "title", + config=config, + ) + ).filter(index_key=self.request.LANGUAGE_CODE, search=q) + return IndexEntry.objects.filter(pk=-1) diff --git a/release.txt b/release.txt new file mode 100644 index 0000000..5716914 --- /dev/null +++ b/release.txt @@ -0,0 +1,83 @@ +# dont change any files besides CHANGELOG.txt and __init__.py during procedure. + +### V2: no git flow, main branch pushes trigger github actions that then makes the release on PYPI + +# update everything +git checkout main +# make sure we're up to date +git pull ; git pull upstream main +git checkout develop +# merge release changes +git merge main + +# version bump in postgres_searchindex/__init__.py +# last chance for changelog +git commit -am'x.x.x release' +git tag x.x.x + +# got to main branch +git checkout main +# merge +git merge develop + +# push (and wait for build/publish) +git push --all; git push --tags +git push upstream --all; git push upstream --tags + +git checkout develop +add next version and DEV to version + + +### V1: NO git flow, manual publishing + +# update everything +git checkout release +# make sure we're up to date +git pull ; git pull upstream release +git checkout develop +# merge release changes +git merge release + +# version bump in postgres_searchindex/__init__.py +# last chance for changelog +git commit -am'x.x.x release' +git tag x.x.x + +# got to release branch +git checkout release +# merge +git merge develop + +# push & build +git push --all; git push --tags +git push upstream --all; git push upstream --tags +python setup.py sdist && python setup.py bdist_wheel --universal + +# upload +twine upload dist/* -r pypi + +# back to normal +git checkout develop + +# version bump to x.x.xDEV + + +### V0: git flow, manual publishing + +# git flow release start xxxx +# version bump in postgres_searchindex/__init__.py +# last chance to update CHANGELOG! +# git commit -am'version bump / changelog' +# git flow release finish xxxx +# git push --all; git push --tags +# upstream: depends +# git push upstream --all; git push upstream --tags +# python setup.py sdist && python setup.py bdist_wheel --universal +# twine upload dist/* -r pypitest +# twine upload dist/* -r pypi +# version bump to ne.xt.version.dev + + +# legacy +# python setup.py register -r pypitest && twine upload dist/* -r pypitest +# python setup.py register -r pypi && twine upload dist/* -r pypi diff --git a/requirements_dev.txt b/requirements_dev.txt new file mode 100644 index 0000000..6b5e950 --- /dev/null +++ b/requirements_dev.txt @@ -0,0 +1,11 @@ +# mainly to have migrations compatible with the oldest supported django version +django<3 +django-cms<3.8 + +# test utils, so you can do a quick manage.py test without tox (that is, included as well) +coverage +django-coverage +factory_boy +mock +selenium +tox \ No newline at end of file diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..c5e1ba0 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,3 @@ +[flake8] +ignore = F999,E501,E128,E124 +exclude = .git,*/migrations/*,*/south_migrations/*,*/static/CACHE/*,.tox,build diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..f9daf69 --- /dev/null +++ b/setup.py @@ -0,0 +1,60 @@ +# -*- encoding: utf-8 -*- +""" +Python setup file for the postgres_searchindex app. + +In order to register your app at pypi.python.org, create an account at +pypi.python.org and login, then register your new app like so: + + python setup.py register + +If your name is still free, you can now make your first release but first you +should check if you are uploading the correct files: + + python setup.py sdist + +Inspect the output thoroughly. There shouldn't be any temp files and if your +app includes staticfiles or templates, make sure that they appear in the list. +If something is wrong, you need to edit MANIFEST.in and run the command again. + +If all looks good, you can make your first release: + + python setup.py sdist upload + +For new releases, you need to bump the version number in +postgres_searchindex/__init__.py and re-run the above command. + +For more information on creating source distributions, see +http://docs.python.org/2/distutils/sourcedist.html + +""" +import os +from setuptools import setup, find_packages +import postgres_searchindex as app + + +def read(fname): + # read the contents of a text file + return open(os.path.join(os.path.dirname(__file__), fname)).read() + + +install_requires = [ + 'django', +] + + +setup( + name="django-postgres-searchindex", + version=app.__version__, + description=read('DESCRIPTION'), + long_description=read('README.md'), + long_description_content_type="text/markdown", + license='The MIT License', + platforms=['OS Independent'], + keywords='django, redirect', + author='Ben Stähli', + author_email='bnzk@bnzk.ch', + url="https://github.com/bnzk/django-postgres-searchindex", + packages=find_packages(), + include_package_data=True, + install_requires=install_requires, +) diff --git a/tox.ini b/tox.ini new file mode 100644 index 0000000..42ba66e --- /dev/null +++ b/tox.ini @@ -0,0 +1,48 @@ + +[tox] +envlist = py{38,39,310,311}-django{22,32,40,41,42}, py39-django32-coverage, flake8 + +[gh-actions] +python = + 3.7: py37 + 3.8: py38 + 3.9: py39 + 3.10: py310 + 3.11: py311 + +[testenv:flake8] +deps = + flake8 + flake8-print +commands = flake8 + +[testenv:py39-django32-coverage] +allowlist_externals = echo +commands = + # https://github.com/nedbat/coveragepy/issues/1272 + pip install coverage<6 + coverage erase + coverage run ./manage.py test + coverage report --include='postgres_searchindex*' --omit='*/tests/*' --omit='*/migrations/*' + coverage html --include='postgres_searchindex*' --omit='*/tests/*' --omit='*/migrations/*' + echo "opener htmlcov/index.html" + +[testenv] +commands = python manage.py test +setenv = + DJANGO_SETTINGS_MODULE=postgres_searchindex.tests.settings + PYTHONPATH={toxinidir} +deps = + django22: Django>=2.2,<2.3 + django32: Django>=3.1,<3.3 + django40: Django>=4.0,<4.1 + django41: Django>=4.1,<4.2 + django42: Django>=4.2,<4.3 + coverage + factory_boy + mock + +[testenv:py37-django40] +allowlist_externals = echo +deps = +commands= echo "no django 4.0 on python 3.7!" diff --git a/translations.sh b/translations.sh new file mode 100755 index 0000000..9fb42e9 --- /dev/null +++ b/translations.sh @@ -0,0 +1,5 @@ + +cd postgres_searchindex +django-admin.py makemessages -l en -l de -l fr +django-admin.py compilemessages +cd ..