diff --git a/boofilsic/settings.py b/boofilsic/settings.py index 2009f46a..0c5bc493 100644 --- a/boofilsic/settings.py +++ b/boofilsic/settings.py @@ -83,6 +83,8 @@ NEODB_DOWNLOADER_RETRIES=(int, 3), # Number of marks required for an item to be included in discover NEODB_MIN_MARKS_FOR_DISCOVER=(int, 1), + # if True, only show title language with NEODB_PREFERRED_LANGUAGES + NEODB_FILTER_LANGUAGE_FOR_DISCOVER=(bool, False), # Disable cron jobs, * for all NEODB_DISABLE_CRON_JOBS=(list, []), # federated search peers @@ -215,6 +217,8 @@ MIN_MARKS_FOR_DISCOVER = env("NEODB_MIN_MARKS_FOR_DISCOVER") +FILTER_LANGUAGE_FOR_DISCOVER = env("NEODB_FILTER_LANGUAGE_FOR_DISCOVER") + MASTODON_ALLOWED_SITES = env("NEODB_LOGIN_MASTODON_WHITELIST") # Allow user to login via any Mastodon/Pleroma sites diff --git a/catalog/jobs/discover.py b/catalog/jobs/discover.py index 0078b94a..8c5b15f0 100644 --- a/catalog/jobs/discover.py +++ b/catalog/jobs/discover.py @@ -3,13 +3,14 @@ from django.conf import settings from django.core.cache import cache -from django.db.models import Count, F +from django.db.models import Count, F, Q from django.utils import timezone from loguru import logger from boofilsic.settings import MIN_MARKS_FOR_DISCOVER from catalog.models import * from common.models import BaseJob, JobManager +from common.models.lang import PREFERRED_LOCALES from journal.models import ( Collection, Comment, @@ -33,12 +34,23 @@ class DiscoverGenerator(BaseJob): interval = timedelta(hours=1) def get_popular_marked_item_ids(self, category, days, exisiting_ids): - item_ids = [ - m["item_id"] - for m in ShelfMember.objects.filter(q_item_in_category(category)) + qs = ( + ShelfMember.objects.filter(q_item_in_category(category)) .filter(created_time__gt=timezone.now() - timedelta(days=days)) .exclude(item_id__in=exisiting_ids) - .values("item_id") + ) + if settings.FILTER_LANGUAGE_FOR_DISCOVER: + q = None + for loc in PREFERRED_LOCALES: + if q: + q = q | Q(item__metadata__localized_title__contains=[{"lang": loc}]) + else: + q = Q(item__metadata__localized_title__contains=[{"lang": loc}]) + if q: + qs = qs.filter(q) + item_ids = [ + m["item_id"] + for m in qs.values("item_id") .annotate(num=Count("item_id")) .filter(num__gte=MIN_MARKS) .order_by("-num")[:MAX_ITEMS_PER_PERIOD] diff --git a/catalog/sites/douban_game.py b/catalog/sites/douban_game.py index 66c4b054..b15f65f0 100644 --- a/catalog/sites/douban_game.py +++ b/catalog/sites/douban_game.py @@ -5,6 +5,7 @@ from catalog.common import * from catalog.models import * from common.models.lang import detect_language +from common.models.misc import uniq from .douban import DoubanDownloader @@ -89,7 +90,7 @@ def scrape(self): ) img_url = img_url_elem[0].strip() if img_url_elem else None - titles = set([title] + other_title + ([orig_title] if orig_title else [])) + titles = uniq([title] + other_title + ([orig_title] if orig_title else [])) localized_title = [{"lang": detect_language(t), "text": t} for t in titles] localized_desc = [{"lang": detect_language(brief), "text": brief}] diff --git a/common/models/lang.py b/common/models/lang.py index f797fb64..b40a5b1e 100644 --- a/common/models/lang.py +++ b/common/models/lang.py @@ -241,7 +241,20 @@ "hak": _("Hakka Chinese"), } -ZH_LOCALE_SUBTAGS_PRIO.keys() + +def get_preferred_locales(): + locales = [] + for k in PREFERRED_LANGUAGES: + if k == "zh": + locales += list(ZH_LOCALE_SUBTAGS_PRIO.keys()) + list( + ZH_LOCALE_SUBTAGS.keys() + ) + else: + locales.append(k) + return locales + + +PREFERRED_LOCALES = get_preferred_locales() def get_base_lang_list(): diff --git a/compose.yml b/compose.yml index 6d9966b5..93a11423 100644 --- a/compose.yml +++ b/compose.yml @@ -25,7 +25,7 @@ x-shared: NEODB_SITE_LINKS: NEODB_SITE_DESCRIPTION: NEODB_ALTERNATIVE_DOMAINS: - NEODB_LANGUAGE: + NEODB_PREFERRED_LANGUAGES: NEODB_ADMIN_USERNAMES: NEODB_INVITE_ONLY: NEODB_LOGIN_MASTODON_WHITELIST: @@ -34,6 +34,7 @@ x-shared: NEODB_DISABLE_CRON_JOBS: NEODB_SEARCH_PEERS: NEODB_MIN_MARKS_FOR_DISCOVER: + NEODB_FILTER_LANGUAGE_FOR_DISCOVER: NEODB_SENTRY_DSN: TAKAHE_SENTRY_DSN: NEODB_SENTRY_SAMPLE_RATE: