From 560032985d62e2e8b1534e9967a02e37f5ad7217 Mon Sep 17 00:00:00 2001 From: Your Name Date: Sat, 30 Nov 2024 13:23:21 -0500 Subject: [PATCH] generalize some language utils and add pt-BR --- catalog/book/models.py | 4 +- catalog/common/models.py | 13 +-- catalog/common/tests.py | 14 ++- catalog/forms.py | 6 +- catalog/jobs/discover.py | 5 +- catalog/sites/apple_music.py | 8 +- catalog/sites/steam.py | 4 +- catalog/sites/tmdb.py | 4 +- common/models/__init__.py | 7 +- common/models/lang.py | 170 ++++++++++++++++++++++------------- users/middlewares.py | 5 +- 11 files changed, 144 insertions(+), 96 deletions(-) diff --git a/catalog/book/models.py b/catalog/book/models.py index 1b8fcf28..323c5b5e 100644 --- a/catalog/book/models.py +++ b/catalog/book/models.py @@ -40,11 +40,9 @@ from catalog.common.models import ( LIST_OF_ONE_PLUS_STR_SCHEMA, LOCALE_CHOICES_JSONFORM, - SCRIPT_CHOICES, LanguageListField, ) -from common.models.lang import get_current_locales -from common.models.misc import uniq +from common.models import uniq from .utils import * diff --git a/catalog/common/models.py b/catalog/common/models.py index b1435531..2c290460 100644 --- a/catalog/common/models.py +++ b/catalog/common/models.py @@ -20,14 +20,7 @@ from polymorphic.models import PolymorphicModel from catalog.common import jsondata -from common.models import ( - LANGUAGE_CHOICES, - LOCALE_CHOICES, - PREFERRED_LOCALES, - SCRIPT_CHOICES, -) -from common.models.lang import get_current_locales -from common.models.misc import uniq +from common.models import LANGUAGE_CHOICES, LOCALE_CHOICES, get_current_locales, uniq from .utils import item_cover_path, resource_cover_path @@ -614,7 +607,7 @@ def class_name(self) -> str: def get_localized_title(self) -> str | None: if self.localized_title: - locales = get_current_locales() + PREFERRED_LOCALES + locales = get_current_locales() for loc in locales: v = next( filter(lambda t: t["lang"] == loc, self.localized_title), {} @@ -624,7 +617,7 @@ def get_localized_title(self) -> str | None: def get_localized_description(self) -> str | None: if self.localized_description: - locales = get_current_locales() + PREFERRED_LOCALES + locales = get_current_locales() for loc in locales: v = next( filter(lambda t: t["lang"] == loc, self.localized_description), {} diff --git a/catalog/common/tests.py b/catalog/common/tests.py index b4395dc8..0df16a31 100644 --- a/catalog/common/tests.py +++ b/catalog/common/tests.py @@ -1,6 +1,14 @@ from django.test import TestCase -from common.models import detect_language +from common.models import ( + LANGUAGE_CHOICES, + LOCALE_CHOICES, + SCRIPT_CHOICES, + SITE_DEFAULT_LANGUAGE, + SITE_PREFERRED_LANGUAGES, + SITE_PREFERRED_LOCALES, + detect_language, +) class CommonTestCase(TestCase): @@ -13,3 +21,7 @@ def test_detect_lang(self): self.assertEqual(lang, "zh-cn") lang = detect_language("巫师3:狂猎 The Witcher 3: Wild Hunt") self.assertEqual(lang, "zh-cn") + + def test_lang_list(self): + self.assertGreaterEqual(len(SITE_PREFERRED_LANGUAGES), 1) + self.assertGreaterEqual(len(SITE_PREFERRED_LOCALES), 1) diff --git a/catalog/forms.py b/catalog/forms.py index a12d2ded..2cb49876 100644 --- a/catalog/forms.py +++ b/catalog/forms.py @@ -3,7 +3,7 @@ from catalog.models import * from common.forms import PreviewImageInput -from common.models import DEFAULT_CATALOG_LANGUAGE, detect_language, uniq +from common.models import SITE_DEFAULT_LANGUAGE, detect_language, uniq CatalogForms = {} @@ -77,9 +77,7 @@ def migrate_initial(self): for t in self.instance.other_title: titles.append({"lang": detect_language(t), "text": t}) if not titles: - titles = [ - {"lang": DEFAULT_CATALOG_LANGUAGE, "text": ""} - ] + titles = [{"lang": SITE_DEFAULT_LANGUAGE, "text": ""}] self.initial["localized_title"] = uniq(titles) # type:ignore if ( "localized_description" in self.Meta.fields diff --git a/catalog/jobs/discover.py b/catalog/jobs/discover.py index 53216b75..dc00bb07 100644 --- a/catalog/jobs/discover.py +++ b/catalog/jobs/discover.py @@ -9,8 +9,7 @@ from boofilsic.settings import MIN_MARKS_FOR_DISCOVER from catalog.models import * -from common.models import BaseJob, JobManager -from common.models.lang import PREFERRED_LOCALES +from common.models import SITE_PREFERRED_LOCALES, BaseJob, JobManager from journal.models import ( Collection, Comment, @@ -43,7 +42,7 @@ def get_popular_marked_item_ids(self, category, days, exisiting_ids): qs = qs.filter(local=True) if settings.DISCOVER_FILTER_LANGUAGE: q = None - for loc in PREFERRED_LOCALES: + for loc in SITE_PREFERRED_LOCALES: if q: q = q | Q(item__metadata__localized_title__contains=[{"lang": loc}]) else: diff --git a/catalog/sites/apple_music.py b/catalog/sites/apple_music.py index 4d11a3ca..3f8ca6bd 100644 --- a/catalog/sites/apple_music.py +++ b/catalog/sites/apple_music.py @@ -17,8 +17,8 @@ from catalog.common import * from catalog.models import * from common.models.lang import ( - DEFAULT_CATALOG_LANGUAGE, - PREFERRED_LANGUAGES, + SITE_DEFAULT_LANGUAGE, + SITE_PREFERRED_LANGUAGES, detect_language, ) from common.models.misc import uniq @@ -56,7 +56,7 @@ def id_to_url(cls, id_value): def get_locales(self): locales = {} - for lang in PREFERRED_LANGUAGES: + for lang in SITE_PREFERRED_LANGUAGES: match lang: case "zh": locales.update({"zh": ["cn", "tw", "hk", "sg"]}) @@ -98,7 +98,7 @@ def scrape(self): localized_title.append({"lang": tl, "text": title}) if brief: localized_desc.append({"lang": tl, "text": brief}) - if lang == DEFAULT_CATALOG_LANGUAGE or not matched_content: + if lang == SITE_DEFAULT_LANGUAGE or not matched_content: matched_content = content break except Exception: diff --git a/catalog/sites/steam.py b/catalog/sites/steam.py index 22bd7351..11867315 100644 --- a/catalog/sites/steam.py +++ b/catalog/sites/steam.py @@ -6,7 +6,7 @@ from catalog.common import * from catalog.models import * -from common.models.lang import PREFERRED_LANGUAGES +from common.models.lang import SITE_PREFERRED_LANGUAGES from journal.models.renderers import html_to_text from .igdb import search_igdb_by_3p_url @@ -16,7 +16,7 @@ def _get_preferred_languages(): langs = {} - for la in PREFERRED_LANGUAGES: + for la in SITE_PREFERRED_LANGUAGES: if la == "zh": langs.update({"zh-cn": "zh-CN", "zh-tw": "zh-TW"}) # zh-HK data is not good diff --git a/catalog/sites/tmdb.py b/catalog/sites/tmdb.py index b17b9ccf..760a8155 100644 --- a/catalog/sites/tmdb.py +++ b/catalog/sites/tmdb.py @@ -18,7 +18,7 @@ from catalog.common import * from catalog.movie.models import * from catalog.tv.models import * -from common.models.lang import PREFERRED_LANGUAGES +from common.models.lang import SITE_PREFERRED_LANGUAGES from .douban import * @@ -37,7 +37,7 @@ def _get_language_code(): def _get_preferred_languages(): langs = {} - for lang in PREFERRED_LANGUAGES: + for lang in SITE_PREFERRED_LANGUAGES: if lang == "zh": langs.update({"zh-cn": "zh-CN", "zh-tw": "zh-TW", "zh-hk": "zh-HK"}) else: diff --git a/common/models/__init__.py b/common/models/__init__.py index ee8c1fe0..5281699a 100644 --- a/common/models/__init__.py +++ b/common/models/__init__.py @@ -1,11 +1,12 @@ from .cron import BaseJob, JobManager from .lang import ( - DEFAULT_CATALOG_LANGUAGE, LANGUAGE_CHOICES, LOCALE_CHOICES, - PREFERRED_LANGUAGES, - PREFERRED_LOCALES, SCRIPT_CHOICES, + SITE_DEFAULT_LANGUAGE, + SITE_PREFERRED_LANGUAGES, + SITE_PREFERRED_LOCALES, detect_language, + get_current_locales, ) from .misc import uniq diff --git a/common/models/lang.py b/common/models/lang.py index e03e5dec..a15e1798 100644 --- a/common/models/lang.py +++ b/common/models/lang.py @@ -1,6 +1,23 @@ """ - language support utilities + Language support utilities + get site wide preferences: + SITE_DEFAULT_LANGUAGE + SITE_PREFERRED_LANGUAGES + SITE_PREFERRED_LOCALES + + get available choices based on site wide preferences: + LANGUAGE_CHOICES + LOCALE_CHOICES + SCRIPT_CHOICES + + based on user preferences: + get_current_locales() + + detect language based on text: + detect_language() + + refereneces: https://en.wikipedia.org/wiki/IETF_language_tag """ @@ -13,9 +30,11 @@ from langdetect import detect from loguru import logger -PREFERRED_LANGUAGES: list[str] = settings.PREFERRED_LANGUAGES - -DEFAULT_CATALOG_LANGUAGE = PREFERRED_LANGUAGES[0] if PREFERRED_LANGUAGES else "en" +FALLBACK_LANGUAGE = "en" +SITE_PREFERRED_LANGUAGES: list[str] = settings.PREFERRED_LANGUAGES or [ + FALLBACK_LANGUAGE +] +SITE_DEFAULT_LANGUAGE: str = SITE_PREFERRED_LANGUAGES[0] ISO_639_1 = { "aa": _("Afar"), @@ -203,9 +222,8 @@ "ch": _("Chamorro"), "be": _("Belarusian"), "yo": _("Yoruba"), - "x": _("Unknown or Other"), } -TOP_USED_LANG = [ +TOP_USED_LANGUAGES = [ "en", "de", "es", @@ -221,46 +239,13 @@ "ar", "bn", ] -ZH_LOCALE_SUBTAGS_PRIO = { - "zh-cn": _("Simplified Chinese (Mainland)"), - "zh-tw": _("Traditional Chinese (Taiwan)"), - "zh-hk": _("Traditional Chinese (Hongkong)"), -} -ZH_LOCALE_SUBTAGS = { - "zh-sg": _("Simplified Chinese (Singapore)"), - "zh-my": _("Simplified Chinese (Malaysia)"), - "zh-mo": _("Traditional Chinese (Macau)"), -} -ZH_LANGUAGE_SUBTAGS_PRIO = { - "cmn": _("Mandarin Chinese"), - "yue": _("Yue Chinese"), -} -ZH_LANGUAGE_SUBTAGS = { - "nan": _("Min Nan Chinese"), - "wuu": _("Wu Chinese"), - "hak": _("Hakka Chinese"), -} +_UNKNOWN_LANGUAGE = ("x", _("Unknown or Other")) RE_LOCALIZED_SEASON_NUMBERS = re.compile( r"〇|一|二|三|四|五|六|七|八|九|零|十|\d|\s|\.|Season|Serie|S|#|第|季", flags=re.IGNORECASE, ) -def get_preferred_locales(): - locales = [] - for k in PREFERRED_LANGUAGES: - if k == "zh": - locales += list(ZH_LOCALE_SUBTAGS_PRIO.keys()) + list( - ZH_LOCALE_SUBTAGS.keys() - ) - else: - locales.append(k) - return locales - - -PREFERRED_LOCALES = get_preferred_locales() - - def localize_number(i: int) -> str: lang = get_language().lower() if lang == "zh" or lang.startswith("zh-"): @@ -278,9 +263,9 @@ def localize_number(i: int) -> str: return str(i) -def get_base_lang_list(): +def _get_base_language_list() -> dict[str, str]: langs = {} - for k in PREFERRED_LANGUAGES + TOP_USED_LANG: + for k in SITE_PREFERRED_LANGUAGES + TOP_USED_LANGUAGES: if k not in langs: if k in ISO_639_1: langs[k] = ISO_639_1[k] @@ -292,50 +277,111 @@ def get_base_lang_list(): return langs -BASE_LANG_LIST: dict[str, Any] = get_base_lang_list() +_BASE_LANGUAGE_LIST: dict[str, Any] = _get_base_language_list() + + +_LOCALE_SUBTAGS_PRIO = { + "zh": { + "zh-cn": _("Simplified Chinese (Mainland)"), + "zh-tw": _("Traditional Chinese (Taiwan)"), + "zh-hk": _("Traditional Chinese (Hongkong)"), + }, + "pt": { + "pt": _("Portuguese"), + }, +} +_LOCALE_SUBTAGS_ADD = { + "pt": { + "pt-br": _("Portuguese (Brazil)"), + }, + "zh": { + "zh-sg": _("Simplified Chinese (Singapore)"), + "zh-my": _("Simplified Chinese (Malaysia)"), + "zh-mo": _("Traditional Chinese (Macau)"), + }, +} +_LOCALE_SUBTAGS_FALLBACK = ["zh"] +_LANGUAGE_SUBTAGS_PRIO = { + "zh": { + "cmn": _("Mandarin Chinese"), + "yue": _("Yue Chinese"), + } +} +_LANGUAGE_SUBTAGS_ADD = { + "nan": _("Min Nan Chinese"), + "wuu": _("Wu Chinese"), + "hak": _("Hakka Chinese"), +} + + +def get_preferred_locales() -> list[str]: + locales = [] + for k in SITE_PREFERRED_LANGUAGES: + if k in _LOCALE_SUBTAGS_PRIO: + locales += list(_LOCALE_SUBTAGS_PRIO[k].keys()) + list( + _LOCALE_SUBTAGS_ADD[k].keys() + ) + else: + locales.append(k) + return locales + + +SITE_PREFERRED_LOCALES = get_preferred_locales() -def get_locale_choices(): +def _get_locale_choices() -> list[tuple[str, str]]: choices = [] - for k, v in BASE_LANG_LIST.items(): - if k == "zh": - choices += ZH_LOCALE_SUBTAGS_PRIO.items() + for k, v in _BASE_LANGUAGE_LIST.items(): + if k in _LOCALE_SUBTAGS_PRIO: + choices += _LOCALE_SUBTAGS_PRIO[k].items() else: choices.append((k, v)) - choices += ZH_LOCALE_SUBTAGS.items() - choices.append(("zh", ISO_639_1["zh"])) + for v in _LOCALE_SUBTAGS_ADD.values(): + choices += v.items() + for k in _LOCALE_SUBTAGS_PRIO.keys(): + p = (k, ISO_639_1[k]) + if p not in choices: + choices.append(p) + choices.append(_UNKNOWN_LANGUAGE) return choices -def get_script_choices(): - return list(BASE_LANG_LIST.items()) +def _get_script_choices() -> list[tuple[str, str]]: + return list(_BASE_LANGUAGE_LIST.items()) + [_UNKNOWN_LANGUAGE] -def get_language_choices(): +def _get_language_choices() -> list[tuple[str, str]]: choices = [] - for k, v in BASE_LANG_LIST.items(): - if k == "zh": - choices += ZH_LANGUAGE_SUBTAGS_PRIO.items() + for k, v in _BASE_LANGUAGE_LIST.items(): + if k in _LANGUAGE_SUBTAGS_PRIO: + choices += _LANGUAGE_SUBTAGS_PRIO[k].items() else: choices.append((k, v)) - choices += ZH_LANGUAGE_SUBTAGS.items() + choices += _LANGUAGE_SUBTAGS_ADD.items() + choices.append(_UNKNOWN_LANGUAGE) return choices -LOCALE_CHOICES: list[tuple[str, Any]] = get_locale_choices() -SCRIPT_CHOICES: list[tuple[str, Any]] = get_script_choices() -LANGUAGE_CHOICES: list[tuple[str, Any]] = get_language_choices() +LOCALE_CHOICES: list[tuple[str, str]] = _get_locale_choices() +SCRIPT_CHOICES: list[tuple[str, str]] = _get_script_choices() +LANGUAGE_CHOICES: list[tuple[str, str]] = _get_language_choices() def get_current_locales() -> list[str]: lang = get_language().lower() if lang == "zh-hans": - return ["zh-cn", "zh-sg", "zh-my", "zh-hk", "zh-tw", "zh-mo", "en"] + locales = ["zh-cn", "zh-sg", "zh-my", "zh-hk", "zh-tw", "zh-mo", "en"] elif lang == "zh-hant": - return ["zh-tw", "zh-hk", "zh-mo", "zh-cn", "zh-sg", "zh-my", "en"] + locales = ["zh-tw", "zh-hk", "zh-mo", "zh-cn", "zh-sg", "zh-my", "en"] else: lng = lang.split("-") - return ["en"] if lng[0] == "en" else [lng[0], "en"] + locales = ["en"] if lng[0] == "en" else [lng[0], "en"] + for locale in SITE_PREFERRED_LOCALES: + if locale not in locales: + locales.append(locale) + if FALLBACK_LANGUAGE not in locales: + locales.append(FALLBACK_LANGUAGE) + return locales _eng = re.compile(r"^[A-Z-a-z0-9]+$") diff --git a/users/middlewares.py b/users/middlewares.py index ce9a1735..1bd2572a 100644 --- a/users/middlewares.py +++ b/users/middlewares.py @@ -14,10 +14,11 @@ def activate_language_for_user(user, request=None): user_language = settings.LANGUAGE_CODE # if user_language in dict(settings.LANGUAGES).keys(): translation.activate(user_language) - return translation.get_language() + if request: + request.LANGUAGE_CODE = translation.get_language() class LanguageMiddleware(LocaleMiddleware): def process_request(self, request): user = getattr(request, "user", None) - request.LANGUAGE_CODE = activate_language_for_user(user, request) + activate_language_for_user(user, request)