From db5ac01b55f5ed35b4ae44ff0b10648fef82d91e Mon Sep 17 00:00:00 2001 From: Aaron Luna Date: Sat, 4 May 2024 05:44:13 -0700 Subject: [PATCH 01/11] chore: :arrow_up: update python version to 3.12 --- Dockerfile | 2 +- pyproject.toml | 11 +++++++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/Dockerfile b/Dockerfile index 654c28f..d6a913b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3.11 +FROM python:3.12 SHELL ["/bin/bash", "-c"] ARG ENV diff --git a/pyproject.toml b/pyproject.toml index 73e72ae..b851f19 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,9 +1,9 @@ [project] name = "unicode-api" -version = "15.0.0" +version = "0.1.0" description = "API that provides access to detailed information for all characters, blocks and planes in the Unicode Standard (Built using FastAPI)." readme = "README.md" -requires-python = ">=3.10" +requires-python = ">=3.12" license = {file = "LICENSE"} authors = [ {name = "Aaron Luna", email = "contact@aaronluna.dev"} @@ -16,7 +16,7 @@ build-backend = "setuptools.build_meta" [tool.black] line-length = 120 -target-version = ['py311'] +target-version = ['py312'] include = '\.pyi?$' exclude = ''' /( @@ -114,4 +114,7 @@ exclude_lines = [ ] [tool.coverage.html] -directory = "coverage_html" \ No newline at end of file +directory = "coverage_html" + +[tool.mypy] +python_version = "3.12" \ No newline at end of file From 816981335369a5f760acb8631cffc01010892bdd Mon Sep 17 00:00:00 2001 From: Aaron Luna Date: Sat, 4 May 2024 05:55:42 -0700 Subject: [PATCH 02/11] refactor: :recycle: move app.data.cache module to app.core.cache and update import statements --- .../dependencies/block_name_resolver.py | 2 +- app/api/api_v1/dependencies/filter_params.py | 2 +- app/api/api_v1/dependencies/list_params.py | 2 +- .../dependencies/plane_abbrev_resolver.py | 2 +- app/api/api_v1/endpoints/blocks.py | 2 +- app/api/api_v1/endpoints/characters.py | 2 +- app/api/api_v1/endpoints/planes.py | 2 +- app/{data => core}/cache.py | 25 ++++--------------- app/db/character_props.py | 2 +- app/db/procs/get_char_details.py | 2 +- app/docs/api_docs/content/plane.py | 2 +- app/docs/dependencies/custom_parameters.py | 2 +- app/docs/dependencies/filter_tables.py | 2 +- app/main.py | 2 +- app/schemas/enums/block_name.py | 2 +- .../test_cached_data/test_cached_data.py | 2 +- .../test_get_character_details.py | 2 +- 17 files changed, 21 insertions(+), 36 deletions(-) rename app/{data => core}/cache.py (94%) diff --git a/app/api/api_v1/dependencies/block_name_resolver.py b/app/api/api_v1/dependencies/block_name_resolver.py index 19383dd..df65efd 100644 --- a/app/api/api_v1/dependencies/block_name_resolver.py +++ b/app/api/api_v1/dependencies/block_name_resolver.py @@ -3,7 +3,7 @@ from fastapi import HTTPException, Path, Query, status import app.db.models as db -from app.data.cache import cached_data +from app.core.cache import cached_data from app.docs.dependencies.custom_parameters import BLOCK_NAME_DESCRIPTION, CHAR_SEARCH_BLOCK_NAME_DESCRIPTION from app.schemas.enums.block_name import UnicodeBlockName diff --git a/app/api/api_v1/dependencies/filter_params.py b/app/api/api_v1/dependencies/filter_params.py index 851f515..b6a4057 100644 --- a/app/api/api_v1/dependencies/filter_params.py +++ b/app/api/api_v1/dependencies/filter_params.py @@ -3,7 +3,7 @@ from fastapi import HTTPException, Query, status from app.api.api_v1.dependencies.filter_param_matcher import filter_param_matcher -from app.data.cache import cached_data +from app.core.cache import cached_data from app.docs.dependencies.custom_parameters import ( CHAR_NAME_FILTER_DESCRIPTION, CJK_DEFINITION_FILTER_DESCRIPTION, diff --git a/app/api/api_v1/dependencies/list_params.py b/app/api/api_v1/dependencies/list_params.py index 2878e85..b5e2e14 100644 --- a/app/api/api_v1/dependencies/list_params.py +++ b/app/api/api_v1/dependencies/list_params.py @@ -3,7 +3,7 @@ from fastapi import HTTPException, Query, status from app.api.api_v1.dependencies.util import get_decimal_number_from_hex_codepoint -from app.data.cache import cached_data +from app.core.cache import cached_data from app.docs.dependencies.custom_parameters import ( ENDING_BEFORE_BLOCK_ID_DESCRIPTION, ENDING_BEFORE_CODEPOINT_DESCRIPTION, diff --git a/app/api/api_v1/dependencies/plane_abbrev_resolver.py b/app/api/api_v1/dependencies/plane_abbrev_resolver.py index 77d501f..712e9a5 100644 --- a/app/api/api_v1/dependencies/plane_abbrev_resolver.py +++ b/app/api/api_v1/dependencies/plane_abbrev_resolver.py @@ -2,7 +2,7 @@ from fastapi import HTTPException, Query, status -from app.data.cache import cached_data +from app.core.cache import cached_data from app.docs.dependencies.custom_parameters import PLANE_NAME_DESCRIPTION diff --git a/app/api/api_v1/endpoints/blocks.py b/app/api/api_v1/endpoints/blocks.py index 830a10b..6cd0c7c 100644 --- a/app/api/api_v1/endpoints/blocks.py +++ b/app/api/api_v1/endpoints/blocks.py @@ -11,7 +11,7 @@ ) from app.api.api_v1.pagination import paginate_search_results from app.config.api_settings import get_settings -from app.data.cache import cached_data +from app.core.cache import cached_data router = APIRouter() diff --git a/app/api/api_v1/endpoints/characters.py b/app/api/api_v1/endpoints/characters.py index 05d40af..9515d6d 100644 --- a/app/api/api_v1/endpoints/characters.py +++ b/app/api/api_v1/endpoints/characters.py @@ -12,7 +12,7 @@ from app.api.api_v1.dependencies.filter_param_matcher import filter_param_matcher from app.api.api_v1.endpoints.util import get_character_details from app.api.api_v1.pagination import paginate_search_results -from app.data.cache import cached_data +from app.core.cache import cached_data from app.data.encoding import get_codepoint_string from app.db.session import DBSession, get_session from app.docs.dependencies.custom_parameters import ( diff --git a/app/api/api_v1/endpoints/planes.py b/app/api/api_v1/endpoints/planes.py index 8d322f3..5b8e8a6 100644 --- a/app/api/api_v1/endpoints/planes.py +++ b/app/api/api_v1/endpoints/planes.py @@ -4,7 +4,7 @@ import app.db.models as db from app.config.api_settings import get_settings -from app.data.cache import cached_data +from app.core.cache import cached_data router = APIRouter() diff --git a/app/data/cache.py b/app/core/cache.py similarity index 94% rename from app/data/cache.py rename to app/core/cache.py index 0d3e964..df9a7be 100644 --- a/app/data/cache.py +++ b/app/core/cache.py @@ -5,7 +5,7 @@ import app.db.models as db from app.config.api_settings import get_settings -from app.data.constants import ( +from app.constants import ( ALL_CONTROL_CHARACTERS, ALL_UNICODE_CODEPOINTS, ASCII_HEX, @@ -163,16 +163,6 @@ def all_surrogate_codepoints(self) -> set[int]: def all_private_use_codepoints(self) -> set[int]: return self.get_all_codepoints_in_block_id_list(self.private_use_block_ids) - @property - def all_assigned_codepoints(self) -> set[int]: - return set( - list(self.all_non_unihan_codepoints) - + list(self.all_cjk_codepoints) - + list(self.all_tangut_codepoints) - + list(self.all_surrogate_codepoints) - + list(self.all_private_use_codepoints) - ) - @property def official_number_of_unicode_characters(self) -> int: # The "official" number of characters listed for each version of Unicode is the total number @@ -242,9 +232,6 @@ def get_unicode_plane_containing_block_id(self, block_id: int) -> db.UnicodePlan def codepoint_is_in_unicode_space(self, codepoint: int) -> bool: return codepoint in self.all_codepoints_in_unicode_space - def codepoint_is_assigned(self, codepoint: int) -> bool: - return codepoint in self.all_assigned_codepoints - def codepoint_is_noncharacter(self, codepoint: int) -> bool: return codepoint in self.all_noncharacter_codepoints @@ -335,13 +322,11 @@ def get_mapped_codepoint_from_hex(self, codepoint_hex: str) -> str: # pragma: n return self.get_mapped_codepoint_from_int(int(codepoint_hex, 16)) def get_mapped_codepoint_from_int(self, codepoint_dec: int) -> str: # pragma: no cover - if codepoint_dec not in ALL_UNICODE_CODEPOINTS: + if not codepoint_dec: + return "" + if not self.codepoint_is_in_unicode_space(codepoint_dec): return f"Invalid Codepoint ({codepoint_dec} is not within the Unicode codespace)" - return ( - f"{chr(codepoint_dec)} (U+{codepoint_dec:04X} {cached_data.get_character_name(codepoint_dec)})" - if codepoint_dec - else "" - ) + return f"{chr(codepoint_dec)} (U+{codepoint_dec:04X} {cached_data.get_character_name(codepoint_dec)})" def get_all_codepoints_in_block_id_list(self, block_id_list: list[int]) -> set[int]: blocks = [self.get_unicode_block_by_id(block_id) for block_id in block_id_list] diff --git a/app/db/character_props.py b/app/db/character_props.py index 722c587..4e55afd 100644 --- a/app/db/character_props.py +++ b/app/db/character_props.py @@ -9,7 +9,7 @@ DEFAULT_VO_U_BLOCK_NAMES, DEFAULT_VO_U_PLANE_NUMBERS, ) -from app.data.encoding import ( +from app.core.cache import cached_data get_codepoint_string, get_html_entities, get_uri_encoded_value, diff --git a/app/db/procs/get_char_details.py b/app/db/procs/get_char_details.py index 340eda7..a290237 100644 --- a/app/db/procs/get_char_details.py +++ b/app/db/procs/get_char_details.py @@ -6,7 +6,7 @@ from sqlmodel import column, select import app.db.models as db -from app.data.cache import cached_data +from app.core.cache import cached_data from app.db.character_props import PROPERTY_GROUPS from app.schemas.enums import CharacterFilterFlags, CharPropertyGroup diff --git a/app/docs/api_docs/content/plane.py b/app/docs/api_docs/content/plane.py index ef9a8fc..fe05a67 100644 --- a/app/docs/api_docs/content/plane.py +++ b/app/docs/api_docs/content/plane.py @@ -1,5 +1,5 @@ # flake8: noqa -from app.data.cache import cached_data +from app.core.cache import cached_data PLANE_ENDPOINTS = """
diff --git a/app/docs/dependencies/custom_parameters.py b/app/docs/dependencies/custom_parameters.py index 6d6cf16..5eb1bff 100644 --- a/app/docs/dependencies/custom_parameters.py +++ b/app/docs/dependencies/custom_parameters.py @@ -1,5 +1,5 @@ from app.config.api_settings import get_settings -from app.data.cache import cached_data +from app.core.cache import cached_data from app.data.encoding import get_uri_encoded_value from app.docs.dependencies import ( BIDI_CLASS_VALUES_TABLE, diff --git a/app/docs/dependencies/filter_tables.py b/app/docs/dependencies/filter_tables.py index 132c60a..0c62950 100644 --- a/app/docs/dependencies/filter_tables.py +++ b/app/docs/dependencies/filter_tables.py @@ -2,7 +2,7 @@ from sqlmodel import Session, distinct, select import app.db.models as db -from app.data.cache import cached_data +from app.core.cache import cached_data from app.db.engine import engine from app.docs.util import slugify from app.schemas.enums import ( diff --git a/app/main.py b/app/main.py index ab2fc67..089530c 100644 --- a/app/main.py +++ b/app/main.py @@ -12,10 +12,10 @@ from app.api.api_v1.api import router from app.config.api_settings import UnicodeApiSettings, get_settings +from app.core.cache import cached_data from app.core.logging import LOGGING_CONFIG from app.core.rate_limit import rate_limit from app.core.redis_client import redis -from app.data.cache import cached_data from app.docs.api_docs.swagger_ui import get_api_docs_for_swagger_ui, get_swagger_ui_html APP_FOLDER = Path(__file__).parent diff --git a/app/schemas/enums/block_name.py b/app/schemas/enums/block_name.py index bd21e73..7b0c70f 100644 --- a/app/schemas/enums/block_name.py +++ b/app/schemas/enums/block_name.py @@ -1,7 +1,7 @@ from enum import Enum import app.db.models as db -from app.data.cache import cached_data +from app.core.cache import cached_data from app.schemas.util import normalize_string_lm3 UnicodeBlockName = Enum( diff --git a/app/tests/test_cached_data/test_cached_data.py b/app/tests/test_cached_data/test_cached_data.py index 77ddcee..078ba7c 100644 --- a/app/tests/test_cached_data/test_cached_data.py +++ b/app/tests/test_cached_data/test_cached_data.py @@ -1,4 +1,4 @@ -from app.data.cache import cached_data +from app.core.cache import cached_data TOTAL_CHARACTERS_IN_UNICODE_V15_0 = 149186 diff --git a/app/tests/test_character_endpoints/test_get_unicode_character_details/test_get_character_details.py b/app/tests/test_character_endpoints/test_get_unicode_character_details/test_get_character_details.py index 4b47ce5..654aa92 100644 --- a/app/tests/test_character_endpoints/test_get_unicode_character_details/test_get_character_details.py +++ b/app/tests/test_character_endpoints/test_get_unicode_character_details/test_get_character_details.py @@ -3,7 +3,7 @@ import pytest -from app.data.cache import cached_data +from app.core.cache import cached_data from app.data.encoding import get_uri_encoded_value from app.db.character_props import PROPERTY_GROUPS from app.db.procs.get_char_details import get_prop_groups From 2eed229a5bfbb0a4927b5904a4336cbd853d3f47 Mon Sep 17 00:00:00 2001 From: Aaron Luna Date: Sat, 4 May 2024 06:06:18 -0700 Subject: [PATCH 03/11] refactor: :recycle: move app.data.constants module to app.constants and update import statements --- app/config/api_settings.py | 2 +- app/{data => }/constants.py | 10 +++++++++- app/core/util.py | 2 +- app/data/scripts/bootstrap_unicode_data.py | 2 +- app/data/scripts/parse_xml_unicode_db.py | 1 + app/db/character_props.py | 5 ++--- app/schemas/enums/char_filter_flags.py | 2 +- app/schemas/enums/unicode_age.py | 2 +- 8 files changed, 17 insertions(+), 9 deletions(-) rename app/{data => }/constants.py (98%) diff --git a/app/config/api_settings.py b/app/config/api_settings.py index f91c4ba..9a02a29 100644 --- a/app/config/api_settings.py +++ b/app/config/api_settings.py @@ -8,7 +8,7 @@ import app.db.models as db from app.config.dotenv_file import read_dotenv_file -from app.data.constants import UNICODE_PLANES_DEFAULT, UNICODE_VERSION_RELEASE_DATES +from app.constants import UNICODE_PLANES_DEFAULT, UNICODE_VERSION_RELEASE_DATES class ApiSettingsDict(TypedDict): diff --git a/app/data/constants.py b/app/constants.py similarity index 98% rename from app/data/constants.py rename to app/constants.py index fdc16dc..a4e1cab 100644 --- a/app/data/constants.py +++ b/app/constants.py @@ -5,7 +5,15 @@ MAX_CODEPOINT = 1114111 ALL_UNICODE_CODEPOINTS = range(MAX_CODEPOINT + 1) ASCII_HEX = "0123456789ABCDEFabcdef" -CODEPOINT_WITH_PREFIX_REGEX = re.compile(r"(?:U\+([A-Fa-f0-9]{4,6}))") + +DATE_MONTH_NAME = "%b %d, %Y" + +CP_PREFIX_1_REGEX = re.compile(r"(?:U\+([A-Fa-f0-9]{4,6}))") +CP_PREFIX_1_REGEX_STRICT = re.compile(r"^U\+([A-Fa-f0-9]{4,6})$") +CP_PREFIX_2_REGEX_STRICT = re.compile(r"^0x([A-Fa-f0-9]{2,6})$") +CP_NO_PREFIX_REGEX_STRICT = re.compile(r"^([A-Fa-f0-9]{2,6})$") +CP_NEED_LEADING_ZEROS_REGEX = re.compile(r"^U\+([A-Fa-f0-9]{1,3})$") +CP_OUT_OF_RANGE_REGEX = re.compile(r"^(?:U\+)([A-Fa-f0-9]+)|(?:0x)?([A-Fa-f0-9]{7,})$") CharacterFlag = namedtuple("CharacterFlag", ["name", "alias", "db_column"]) diff --git a/app/core/util.py b/app/core/util.py index a1588fb..d3aa301 100644 --- a/app/core/util.py +++ b/app/core/util.py @@ -1,7 +1,7 @@ import time from datetime import datetime, timedelta, timezone, tzinfo -from app.data.constants import UNICODE_VERSION_RELEASE_DATES +from app.constants import DATE_MONTH_NAME, UNICODE_VERSION_RELEASE_DATES DATE_MONTH_NAME = "%b %d, %Y" diff --git a/app/data/scripts/bootstrap_unicode_data.py b/app/data/scripts/bootstrap_unicode_data.py index 2475f9a..ee7fced 100644 --- a/app/data/scripts/bootstrap_unicode_data.py +++ b/app/data/scripts/bootstrap_unicode_data.py @@ -2,8 +2,8 @@ import re from app.config.api_settings import UnicodeApiSettings, get_settings +from app.constants import SUPPORTED_UNICODE_VERSIONS from app.core.result import Result -from app.data.constants import SUPPORTED_UNICODE_VERSIONS SEMVER_REGEX = re.compile(r"^(?P(?:[1-9]\d*))\.(?P(?:[0-9]\d*))(?:\.(?P(?:[0-9]\d*)))?") diff --git a/app/data/scripts/parse_xml_unicode_db.py b/app/data/scripts/parse_xml_unicode_db.py index 6a0b059..460dfd1 100644 --- a/app/data/scripts/parse_xml_unicode_db.py +++ b/app/data/scripts/parse_xml_unicode_db.py @@ -5,6 +5,7 @@ from lxml.etree import _Element, _ElementTree from app.config.api_settings import UnicodeApiSettings +from app.constants import NULL_BLOCK, NULL_PLANE from app.core.result import Result from app.data.constants import NULL_BLOCK, NULL_PLANE from app.data.encoding import get_codepoint_string diff --git a/app/db/character_props.py b/app/db/character_props.py index 4e55afd..0695e19 100644 --- a/app/db/character_props.py +++ b/app/db/character_props.py @@ -1,8 +1,7 @@ from typing import Any -from app.data.cache import cached_data -from app.data.constants import ( - CODEPOINT_WITH_PREFIX_REGEX, +from app.constants import ( + CP_PREFIX_1_REGEX, DEFAULT_BC_AL_CODEPOINTS, DEFAULT_BC_ET_CODEPOINTS, DEFAULT_BC_R_CODEPOINTS, diff --git a/app/schemas/enums/char_filter_flags.py b/app/schemas/enums/char_filter_flags.py index a7c1090..54d0ec6 100644 --- a/app/schemas/enums/char_filter_flags.py +++ b/app/schemas/enums/char_filter_flags.py @@ -1,7 +1,7 @@ from enum import IntFlag, auto from typing import Self -from app.data.constants import CHAR_FLAG_MAP +from app.constants import CHAR_FLAG_MAP from app.schemas.util import normalize_string_lm3 diff --git a/app/schemas/enums/unicode_age.py b/app/schemas/enums/unicode_age.py index 9f3d5e4..23944d1 100644 --- a/app/schemas/enums/unicode_age.py +++ b/app/schemas/enums/unicode_age.py @@ -1,7 +1,7 @@ from enum import Enum from typing import Self -from app.data.constants import UNICODE_VERSION_RELEASE_DATES +from app.constants import UNICODE_VERSION_RELEASE_DATES UnicodeAge = Enum( "UnicodeAge", {f'V{ver.replace(".", "_")}': ver[:-2] for ver in list(UNICODE_VERSION_RELEASE_DATES.keys())} From 305874d2957f69263a06fc4cb39a3b6ec19f946b Mon Sep 17 00:00:00 2001 From: Aaron Luna Date: Sat, 4 May 2024 06:10:13 -0700 Subject: [PATCH 04/11] =?UTF-8?q?refactor:=20=E2=99=BB=EF=B8=8F=20move=20a?= =?UTF-8?q?pp.data.encoding=20module=20to=20app.core.encoding=20and=20upda?= =?UTF-8?q?te=20import=20statements?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/api/api_v1/endpoints/characters.py | 2 +- app/{data => core}/encoding.py | 0 app/data/scripts/parse_xml_unicode_db.py | 3 +-- app/db/character_props.py | 13 ++++--------- app/docs/dependencies/custom_parameters.py | 2 +- .../test_get_character_details.py | 2 +- 6 files changed, 8 insertions(+), 14 deletions(-) rename app/{data => core}/encoding.py (100%) diff --git a/app/api/api_v1/endpoints/characters.py b/app/api/api_v1/endpoints/characters.py index 9515d6d..a5b3288 100644 --- a/app/api/api_v1/endpoints/characters.py +++ b/app/api/api_v1/endpoints/characters.py @@ -13,7 +13,7 @@ from app.api.api_v1.endpoints.util import get_character_details from app.api.api_v1.pagination import paginate_search_results from app.core.cache import cached_data -from app.data.encoding import get_codepoint_string +from app.core.encoding import get_codepoint_string from app.db.session import DBSession, get_session from app.docs.dependencies.custom_parameters import ( UNICODE_CHAR_STRING_DESCRIPTION, diff --git a/app/data/encoding.py b/app/core/encoding.py similarity index 100% rename from app/data/encoding.py rename to app/core/encoding.py diff --git a/app/data/scripts/parse_xml_unicode_db.py b/app/data/scripts/parse_xml_unicode_db.py index 460dfd1..685d6a4 100644 --- a/app/data/scripts/parse_xml_unicode_db.py +++ b/app/data/scripts/parse_xml_unicode_db.py @@ -6,9 +6,8 @@ from app.config.api_settings import UnicodeApiSettings from app.constants import NULL_BLOCK, NULL_PLANE +from app.core.encoding import get_codepoint_string from app.core.result import Result -from app.data.constants import NULL_BLOCK, NULL_PLANE -from app.data.encoding import get_codepoint_string from app.data.scripts.script_types import AllParsedUnicodeData, BlockOrPlaneDetailsDict, CharDetailsDict from app.data.util.spinners import Spinner diff --git a/app/db/character_props.py b/app/db/character_props.py index 0695e19..c751f92 100644 --- a/app/db/character_props.py +++ b/app/db/character_props.py @@ -9,6 +9,7 @@ DEFAULT_VO_U_PLANE_NUMBERS, ) from app.core.cache import cached_data +from app.core.encoding import ( get_codepoint_string, get_html_entities, get_uri_encoded_value, @@ -747,7 +748,7 @@ "char_property": "EqUIdeo", "db_required": True, "db_column": True, - "response_value": lambda char: get_string_prop_value(char, "equivalent_unified_ideograph"), + "response_value": lambda char: get_char_and_unicode_hex_value(char, "equivalent_unified_ideograph"), }, { "name_in": "radical", @@ -1023,19 +1024,13 @@ def get_int_prop_value(char_props: dict[str, Any], prop_name: str) -> int: def get_char_and_unicode_hex_value(char_props: dict[str, Any], prop_name: str) -> str: prop_value = get_string_prop_value(char_props, prop_name) - return ( - cached_data.get_mapped_codepoint_from_hex(prop_value) - if prop_value and cached_data.codepoint_is_assigned(char_props["codepoint_dec"]) - else "" - ) + return cached_data.get_mapped_codepoint_from_hex(prop_value) def get_list_of_mapped_codepoints(input: str) -> list[str]: if not input: return [""] - return [ - cached_data.get_mapped_codepoint_from_hex(codepoint) for codepoint in CODEPOINT_WITH_PREFIX_REGEX.findall(input) - ] + return [cached_data.get_mapped_codepoint_from_hex(codepoint) for codepoint in CP_PREFIX_1_REGEX.findall(input)] def get_default_age(codepoint: int) -> str: diff --git a/app/docs/dependencies/custom_parameters.py b/app/docs/dependencies/custom_parameters.py index 5eb1bff..aa80367 100644 --- a/app/docs/dependencies/custom_parameters.py +++ b/app/docs/dependencies/custom_parameters.py @@ -1,6 +1,6 @@ from app.config.api_settings import get_settings from app.core.cache import cached_data -from app.data.encoding import get_uri_encoded_value +from app.core.encoding import get_uri_encoded_value from app.docs.dependencies import ( BIDI_CLASS_VALUES_TABLE, BLOCK_NAME_NO_LEGEND_TABLE, diff --git a/app/tests/test_character_endpoints/test_get_unicode_character_details/test_get_character_details.py b/app/tests/test_character_endpoints/test_get_unicode_character_details/test_get_character_details.py index 654aa92..d432fbd 100644 --- a/app/tests/test_character_endpoints/test_get_unicode_character_details/test_get_character_details.py +++ b/app/tests/test_character_endpoints/test_get_unicode_character_details/test_get_character_details.py @@ -4,7 +4,7 @@ import pytest from app.core.cache import cached_data -from app.data.encoding import get_uri_encoded_value +from app.core.encoding import get_uri_encoded_value from app.db.character_props import PROPERTY_GROUPS from app.db.procs.get_char_details import get_prop_groups from app.schemas.enums import CharPropertyGroup From a3036f396bb4cf35c4bb2eec7ff192d465130772 Mon Sep 17 00:00:00 2001 From: Aaron Luna Date: Sat, 4 May 2024 06:14:39 -0700 Subject: [PATCH 05/11] =?UTF-8?q?refactor:=20=E2=99=BB=EF=B8=8F=20consolid?= =?UTF-8?q?ate=20all=20util=20functions=20into=20=20app.core.util=20module?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/config/api_settings.py | 5 ++-- app/core/rate_limit.py | 7 +---- app/core/util.py | 38 +++++++++++++++++--------- app/docs/api_docs/readme.py | 2 +- app/docs/api_docs/swagger_ui.py | 2 +- app/docs/dependencies/filter_tables.py | 2 +- app/docs/util.py | 10 ------- 7 files changed, 32 insertions(+), 34 deletions(-) delete mode 100644 app/docs/util.py diff --git a/app/config/api_settings.py b/app/config/api_settings.py index 9a02a29..ba2ed15 100644 --- a/app/config/api_settings.py +++ b/app/config/api_settings.py @@ -9,6 +9,7 @@ import app.db.models as db from app.config.dotenv_file import read_dotenv_file from app.constants import UNICODE_PLANES_DEFAULT, UNICODE_VERSION_RELEASE_DATES +from app.core.util import s class ApiSettingsDict(TypedDict): @@ -152,9 +153,9 @@ def api_settings_report(self) -> str: @property def rate_limit_settings_report(self) -> str: - rate = f"{self.RATE_LIMIT_PER_PERIOD} request{'s' if self.RATE_LIMIT_PER_PERIOD > 1 else ''}" + rate = f"{self.RATE_LIMIT_PER_PERIOD} request{s(self.RATE_LIMIT_PER_PERIOD)}" interval = self.RATE_LIMIT_PERIOD_SECONDS.total_seconds() - period = f"{interval} seconds" if interval > 1 else "second" + period = f"{interval}second{s(interval)}" rate_limit_settings = f"Rate Limit Settings: {rate} per {period}" burst_enabled = self.RATE_LIMIT_BURST > 1 if burst_enabled: # pragma: no cover diff --git a/app/core/rate_limit.py b/app/core/rate_limit.py index 24eee76..c30ff43 100644 --- a/app/core/rate_limit.py +++ b/app/core/rate_limit.py @@ -14,6 +14,7 @@ format_timedelta_str, get_duration_between_timestamps, get_time_until_timestamp, + s, ) RATE_LIMIT_ROUTE_REGEX = re.compile(r"^\/v1\/blocks|characters|codepoints|planes") @@ -157,10 +158,4 @@ def get_time_portion(ts: float) -> str: return dtaware_fromtimestamp(ts).time().strftime("%I:%M:%S.%f %p") -def s(x: list | int | float) -> str: - if isinstance(x, list): - return "s" if len(x) > 1 else "" - return "s" if x > 1 else "" - - rate_limit = RateLimit(redis) diff --git a/app/core/util.py b/app/core/util.py index d3aa301..ae2ded5 100644 --- a/app/core/util.py +++ b/app/core/util.py @@ -1,17 +1,29 @@ +import re import time from datetime import datetime, timedelta, timezone, tzinfo from app.constants import DATE_MONTH_NAME, UNICODE_VERSION_RELEASE_DATES -DATE_MONTH_NAME = "%b %d, %Y" + +def s(x: list | int | float) -> str: + if isinstance(x, list): + return "s" if len(x) > 1 else "" + return "s" if x > 1 else "" + + +def slugify(text: str) -> str: + text = text.lower().strip() + text = re.compile(r"\s+").sub("-", text) + text = re.compile(r"([^A-Za-z0-9-])+").sub("-", text) + text = re.compile(r"--+").sub("-", text) + text = re.compile(r"(^-|-$)").sub("", text) + return text def get_unicode_version_release_date(version: str) -> str: - return ( - release_date.strftime(DATE_MONTH_NAME) - if (release_date := UNICODE_VERSION_RELEASE_DATES.get(version, None)) - else "" - ) + if release_date := UNICODE_VERSION_RELEASE_DATES.get(version, None): + return release_date.strftime(DATE_MONTH_NAME) + return "" def make_tzaware(dt: datetime, use_tz: tzinfo | None = None, localize: bool = True) -> datetime: @@ -38,24 +50,24 @@ def format_timedelta_str(td: timedelta, precise: bool = True) -> str: if td.days < 0: td = -td duration = "-" - (milliseconds, microseconds) = divmod(td.microseconds, 1000) + (ms, us) = divmod(td.microseconds, 1000) (minutes, seconds) = divmod(td.seconds, 60) (hours, minutes) = divmod(minutes, 60) (years, days) = divmod(td.days, 365) if years > 0: duration += f"{years}y {days}d {hours:.0f}h {minutes:.0f}m {seconds}s" if precise else f"{years}y {days} days" elif days > 0: - duration += f"{days}d {hours:.0f}h {minutes:.0f}m {seconds}s" if precise else f"{days} days" + duration += f"{days}d {hours:.0f}h {minutes:.0f}m {seconds}s" if precise else f"{days} days {hours:.0f} hours" elif hours > 0: duration += f"{hours:.0f}h {minutes:.0f}m {seconds}s" if precise else f"{hours:.0f} hours {minutes:.0f} minutes" elif minutes > 0: - duration += f"{minutes:.0f}m {seconds}s" if precise else f"{minutes:.0f} minutes" + duration += f"{minutes:.0f}m {seconds}s {ms:.0f}ms" if precise else f"{minutes:.0f} minutes {seconds} seconds" elif seconds > 0: - duration += f"{seconds}s {milliseconds:.0f}ms" if precise else f"{seconds} seconds" - elif milliseconds > 0: - duration += f"{milliseconds}ms {microseconds}us" if precise else f"{milliseconds}ms" + duration += f"{seconds}s {ms:.0f}ms" if precise else f"{seconds} seconds" + elif ms > 0: + duration += f"{ms}ms {us}us" if precise else f"{ms}ms" else: - duration += f"{microseconds}us" + duration += f"{us}us" return duration diff --git a/app/docs/api_docs/readme.py b/app/docs/api_docs/readme.py index 3de258f..1e2e0e8 100644 --- a/app/docs/api_docs/readme.py +++ b/app/docs/api_docs/readme.py @@ -5,6 +5,7 @@ from app.config.api_settings import get_settings from app.core.result import Result +from app.core.util import slugify from app.docs.api_docs.content.block import BLOCK_ENDPOINTS, UNICODE_BLOCK_OBJECT_INTRO, UNICODE_BLOCK_OBJECT_PROPERTIES from app.docs.api_docs.content.character import ( CHARACTER_ENDPOINTS, @@ -44,7 +45,6 @@ SEARCH_HTML, ) from app.docs.api_docs.content.plane import PLANE_ENDPOINTS, UNICODE_PLANE_OBJECT_INTRO, UNICODE_PLANE_OBJECT_PROPERTIES -from app.docs.util import slugify @dataclass diff --git a/app/docs/api_docs/swagger_ui.py b/app/docs/api_docs/swagger_ui.py index 760c571..9e70a7e 100644 --- a/app/docs/api_docs/swagger_ui.py +++ b/app/docs/api_docs/swagger_ui.py @@ -6,6 +6,7 @@ from starlette.responses import HTMLResponse from app.config.api_settings import get_settings +from app.core.util import slugify from app.docs.api_docs.content.block import BLOCK_ENDPOINTS, UNICODE_BLOCK_OBJECT_INTRO, UNICODE_BLOCK_OBJECT_PROPERTIES from app.docs.api_docs.content.character import ( CHARACTER_ENDPOINTS, @@ -45,7 +46,6 @@ SEARCH_HTML, ) from app.docs.api_docs.content.plane import PLANE_ENDPOINTS, UNICODE_PLANE_OBJECT_INTRO, UNICODE_PLANE_OBJECT_PROPERTIES -from app.docs.util import slugify # fmt: off diff --git a/app/docs/dependencies/filter_tables.py b/app/docs/dependencies/filter_tables.py index 0c62950..da6297b 100644 --- a/app/docs/dependencies/filter_tables.py +++ b/app/docs/dependencies/filter_tables.py @@ -3,8 +3,8 @@ import app.db.models as db from app.core.cache import cached_data +from app.core.util import slugify from app.db.engine import engine -from app.docs.util import slugify from app.schemas.enums import ( BidirectionalClass, CharacterFilterFlags, diff --git a/app/docs/util.py b/app/docs/util.py deleted file mode 100644 index 0fb3409..0000000 --- a/app/docs/util.py +++ /dev/null @@ -1,10 +0,0 @@ -import re - - -def slugify(text: str) -> str: - text = text.lower().strip() - text = re.compile(r"\s+").sub("-", text) - text = re.compile(r"([^A-Za-z0-9-])+").sub("-", text) - text = re.compile(r"--+").sub("-", text) - text = re.compile(r"(^-|-$)").sub("", text) - return text From 56c966dfed4d995fa1ff23b21a59732db181490f Mon Sep 17 00:00:00 2001 From: Aaron Luna Date: Sat, 4 May 2024 06:15:37 -0700 Subject: [PATCH 06/11] =?UTF-8?q?refactor:=20=E2=99=BB=EF=B8=8F=20various?= =?UTF-8?q?=20refactorings=20to=20app.api=5Fv1.dependencies.util=20module?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/api/api_v1/dependencies/util.py | 83 +++++++++++++---------------- 1 file changed, 38 insertions(+), 45 deletions(-) diff --git a/app/api/api_v1/dependencies/util.py b/app/api/api_v1/dependencies/util.py index 68d9006..334a7b9 100644 --- a/app/api/api_v1/dependencies/util.py +++ b/app/api/api_v1/dependencies/util.py @@ -1,16 +1,17 @@ -import re - from fastapi import HTTPException, status +from app.constants import ( + ASCII_HEX, + CP_NEED_LEADING_ZEROS_REGEX, + CP_NO_PREFIX_REGEX_STRICT, + CP_OUT_OF_RANGE_REGEX, + CP_PREFIX_1_REGEX_STRICT, + CP_PREFIX_2_REGEX_STRICT, + MAX_CODEPOINT, +) +from app.core.encoding import get_codepoint_string from app.core.result import Result -from app.data.constants import ASCII_HEX, MAX_CODEPOINT -from app.data.encoding import get_codepoint_string - -CP_PREFIX_1_REGEX = re.compile(r"^U\+([A-Fa-f0-9]{4,6})$") -CP_PREFIX_2_REGEX = re.compile(r"^0x([A-Fa-f0-9]{2,6})$") -CP_NO_PREFIX_REGEX = re.compile(r"^([A-Fa-f0-9]{2,6})$") -CP_NEED_LEADING_ZEROS_REGEX = re.compile(r"^U\+([A-Fa-f0-9]{1,3})$") -CP_OUT_OF_RANGE_REGEX = re.compile(r"^(?:U\+)([A-Fa-f0-9]+)|(?:0x)?([A-Fa-f0-9]{7,})$") +from app.core.util import s def get_decimal_number_from_hex_codepoint(codepoint: str, starting_after: bool = True) -> int: @@ -18,51 +19,53 @@ def get_decimal_number_from_hex_codepoint(codepoint: str, starting_after: bool = if result.failure: raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=result.error) cp_hex = result.value or "0" - codepoint_dec = int(cp_hex, 16) - result = check_codepoint_is_in_unicode_range(codepoint_dec, starting_after) + cp_dec = int(cp_hex, 16) + result = check_codepoint_is_in_unicode_range(cp_dec, starting_after) if result.success: - return codepoint_dec + return cp_dec raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=result.error) -def get_codepoint_hex_from_string(s: str) -> Result[str]: - match = CP_PREFIX_1_REGEX.match(s) - if match: +def get_codepoint_hex_from_string(cp: str) -> Result[str]: + if match := CP_PREFIX_1_REGEX_STRICT.match(cp): return Result.Ok(match[1]) - match = CP_PREFIX_2_REGEX.match(s) - if match: + if match := CP_PREFIX_2_REGEX_STRICT.match(cp): return Result.Ok(match[1]) - match = CP_NO_PREFIX_REGEX.match(s) - if match: + if match := CP_NO_PREFIX_REGEX_STRICT.match(cp): return Result.Ok(match[1]) - return Result.Fail(get_error_message_for_invalid_codepoint_value(s)) + return Result.Fail(get_error_message_for_invalid_codepoint_value(cp)) + + +def check_codepoint_is_in_unicode_range(codepoint: int, starting_after: bool) -> Result[int]: + lower_limit = 0 if starting_after else 1 + upper_limit = MAX_CODEPOINT if starting_after else MAX_CODEPOINT + 1 + if codepoint in range(lower_limit, upper_limit + 1): + return Result.Ok(codepoint) + error = f"{get_codepoint_string(codepoint)} is not within the Unicode codespace (U+0000 to U+10FFFF)." + return Result.Fail(error) -def get_error_message_for_invalid_codepoint_value(s: str) -> str: - sanitized_codepoint = sanitize_codepoint_value(s) - if match := CP_NEED_LEADING_ZEROS_REGEX.search(s): +def get_error_message_for_invalid_codepoint_value(cp: str) -> str: + sanitized_codepoint = sanitize_codepoint_value(cp) + if match := CP_NEED_LEADING_ZEROS_REGEX.search(cp): return ( f"The value provided (U+{sanitized_codepoint.upper()}) is invalid because Unicode codepoint values " "prefixed with 'U+' must contain at least 4 hexadecimal digits. The correct way to request " f"the character assigned to codepoint 0x{match[1].upper()} is with the value " f"'{get_codepoint_string(int(match[1], 16))}', which adds the necessary leading zeros." ) - invalid_chars = get_invalid_hex_characters(sanitized_codepoint) - if invalid_chars: + if invalid_chars := get_invalid_hex_characters(sanitized_codepoint): return ( - f"The value provided ({s}) contains {len(invalid_chars)} invalid hexadecimal " - f"character{'s' if len(invalid_chars) > 1 else ''}: [{', '.join(invalid_chars)}]. " - "The codepoint value must be expressed as a hexadecimal value within range 0000...10FFFF, " - "optionally prefixed by 'U+'' or '0x'." + f"The value provided ({cp}) contains {len(invalid_chars)} invalid hexadecimal " + f"character{s(invalid_chars)}: [{', '.join(invalid_chars)}]. The codepoint value must be expressed " + "as a hexadecimal value within range 0000...10FFFF, optionally prefixed by 'U+'' or '0x'." ) - return ( - ( + if match := CP_OUT_OF_RANGE_REGEX.match(cp): + return ( f"U+{match[1] or match[2]} is not within the range of valid codepoints for Unicode characters " "(U+0000 to U+10FFFF)." ) - if (match := CP_OUT_OF_RANGE_REGEX.match(s)) - else "Error! Value provided is not a valid hexadecimal number." - ) + return "Error! Value provided is not a valid hexadecimal number." def sanitize_codepoint_value(codepoint: str) -> str: @@ -71,13 +74,3 @@ def sanitize_codepoint_value(codepoint: str) -> str: def get_invalid_hex_characters(s: str) -> list[str]: return sorted({char for char in s if char not in ASCII_HEX}) - - -def check_codepoint_is_in_unicode_range(codepoint: int, starting_after: bool) -> Result[int]: - lower_limit = 0 if starting_after else 1 - upper_limit = MAX_CODEPOINT if starting_after else MAX_CODEPOINT + 1 - if codepoint in range(lower_limit, upper_limit + 1): - return Result.Ok(codepoint) - cp_hex = get_codepoint_string(codepoint) - error = f"{cp_hex} is not within the range of valid codepoints for Unicode characters (U+0000 to U+10FFFF)." - return Result.Fail(error) From 29fecb51d7f1f4be6822e699b891e3b3f64a9dd1 Mon Sep 17 00:00:00 2001 From: Aaron Luna Date: Sat, 4 May 2024 06:16:23 -0700 Subject: [PATCH 07/11] feat: :sparkles: improve rate limit logic and add client ip tracking --- app/core/rate_limit.py | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/app/core/rate_limit.py b/app/core/rate_limit.py index c30ff43..a112d64 100644 --- a/app/core/rate_limit.py +++ b/app/core/rate_limit.py @@ -82,9 +82,9 @@ def is_exceeded(self, request: Request) -> Result[None]: Adapted for Python from this article: https://vikas-kumar.medium.com/rate-limiting-techniques-245c3a5e9cad """ - if not self.apply_rate_limit_to_request(request): + client_ip = get_client_ip_address(request) + if not self.apply_rate_limit_to_request(request, client_ip): return Result.Ok() - client_ip = request.client.host if request.client else "localhost" arrived_at = self.redis.time() self.redis.setnx(client_ip, "0") try: @@ -101,10 +101,10 @@ def is_exceeded(self, request: Request) -> Result[None]: except LockError: # pragma: no cover return self.lock_error(client_ip) - def apply_rate_limit_to_request(self, request: Request): + def apply_rate_limit_to_request(self, request: Request, client_ip: str): if self.settings.is_test: return enable_rate_limit_feature_for_test(request) - return request_origin_is_external(request) and requested_route_is_rate_limited(request) # pragma: no cover + return rate_limit_applies_to_route(request) and client_ip_is_external(request, client_ip) # pragma: no cover def get_allowed_at(self, tat: float) -> float: return (dtaware_fromtimestamp(tat) - self.delay_tolerance_ms).timestamp() @@ -132,6 +132,12 @@ def lock_error(self, client) -> Result[None]: # pragma: no cover return Result.Fail(error) +def get_client_ip_address(request: Request) -> str: + if "x-forwarded-for" in request.headers: + return request.headers["x-forwarded-for"] + return request.client.host if request.client else "localhost" + + def enable_rate_limit_feature_for_test(request: Request) -> bool: if "x-verify-rate-limiting" in request.headers: return request.headers["x-verify-rate-limiting"] == "true" @@ -142,18 +148,18 @@ def enable_rate_limit_feature_for_test(request: Request) -> bool: return False # pragma: no cover -def request_origin_is_external(request: Request) -> bool: # pragma: no cover - if request.client.host in ["localhost", "127.0.0.1", "testserver"]: +def rate_limit_applies_to_route(request: Request) -> bool: # pragma: no cover + return bool(RATE_LIMIT_ROUTE_REGEX.search(request.url.path)) + + +def client_ip_is_external(request: Request, client_ip: str) -> bool: # pragma: no cover + if client_ip in ["localhost", "127.0.0.1", "testserver"] or client_ip.startswith("172.17.0."): return False if "sec-fetch-site" in request.headers: return request.headers["sec-fetch-site"] != "same-site" return True -def requested_route_is_rate_limited(request: Request) -> bool: # pragma: no cover - return bool(RATE_LIMIT_ROUTE_REGEX.search(request.url.path)) - - def get_time_portion(ts: float) -> str: return dtaware_fromtimestamp(ts).time().strftime("%I:%M:%S.%f %p") From a6b3a4d360ef1b3488ff02710b73b1b136f8e9e0 Mon Sep 17 00:00:00 2001 From: Aaron Luna Date: Sat, 4 May 2024 06:21:57 -0700 Subject: [PATCH 08/11] chore: :label: add missing type references to app.data.scripts modules --- app/data/scripts/save_parsed_data_to_csv.py | 23 ++++++++++++++------- app/data/scripts/script_types.py | 1 + app/data/scripts/sync_req_files.py | 4 ++-- app/data/scripts/update_all_data.py | 2 +- 4 files changed, 20 insertions(+), 10 deletions(-) diff --git a/app/data/scripts/save_parsed_data_to_csv.py b/app/data/scripts/save_parsed_data_to_csv.py index 291b773..971bfa1 100644 --- a/app/data/scripts/save_parsed_data_to_csv.py +++ b/app/data/scripts/save_parsed_data_to_csv.py @@ -1,5 +1,9 @@ +from pathlib import Path + import app.db.models as db +from app.config.api_settings import UnicodeApiSettings from app.core.result import Result +from app.data.scripts.script_types import BlockOrPlaneDetailsDict, CharDetailsDict, ParsedUnicodeData, UnicodeModel from app.data.util.spinners import Spinner from app.schemas.enums import ( BidirectionalBracketType, @@ -20,7 +24,12 @@ ONE_PERCENT = 0.01 -def save_parsed_data_to_csv(config, all_planes, all_blocks, all_chars): +def save_parsed_data_to_csv( + config: UnicodeApiSettings, + all_planes: list[BlockOrPlaneDetailsDict], + all_blocks: list[BlockOrPlaneDetailsDict], + all_chars: list[CharDetailsDict], +) -> Result[None]: all_non_unihan_chars = [update_char_dict_enum_values(char) for char in all_chars if not char["_unihan"]] all_unihan_chars = [update_char_dict_enum_values(char) for char in all_chars if char["_unihan"]] @@ -51,7 +60,7 @@ def save_parsed_data_to_csv(config, all_planes, all_blocks, all_chars): return Result.Ok() -def update_char_dict_enum_values(char_dict): +def update_char_dict_enum_values(char_dict: CharDetailsDict) -> CharDetailsDict: char_dict["general_category"] = GeneralCategory.from_code(char_dict["general_category"]).code char_dict["combining_class"] = get_combining_class(char_dict["combining_class"]).value char_dict["bidirectional_class"] = BidirectionalClass.from_code(char_dict["bidirectional_class"]).value @@ -71,19 +80,19 @@ def update_char_dict_enum_values(char_dict): return char_dict -def get_column_names(db_model, parsed): +def get_column_names(db_model: UnicodeModel, parsed: ParsedUnicodeData) -> list[str]: return [name for name in db_model.__fields__ if name in parsed] -def get_csv_rows_for_chunk(chunk, column_names): +def get_csv_rows_for_chunk(chunk: list[ParsedUnicodeData], column_names: list[str]) -> str: return "\n".join(get_csv_row_for_parsed_data(parsed, column_names) for parsed in chunk) -def get_csv_row_for_parsed_data(db_obj, column_names): +def get_csv_row_for_parsed_data(db_obj: ParsedUnicodeData, column_names: list[str]) -> str: return ",".join(sanitize_value_for_csv(db_obj.get(name, "")) for name in column_names) -def sanitize_value_for_csv(val): +def sanitize_value_for_csv(val: bool | int | str | float) -> str: if isinstance(val, str): val = val.replace(",", ";").replace("Nan", "") return ( @@ -101,7 +110,7 @@ def sanitize_value_for_csv(val): ) -def append_to_csv(csv_file, text): +def append_to_csv(csv_file: Path, text: str) -> None: with csv_file.open("a") as csv: csv.write(f"{text}\n") diff --git a/app/data/scripts/script_types.py b/app/data/scripts/script_types.py index 04a3c06..1b50f34 100644 --- a/app/data/scripts/script_types.py +++ b/app/data/scripts/script_types.py @@ -2,5 +2,6 @@ CharDetailsDict = dict[str, bool | int | str] BlockOrPlaneDetailsDict = dict[str, int | str] +ParsedUnicodeData = BlockOrPlaneDetailsDict | CharDetailsDict AllParsedUnicodeData = tuple[list[BlockOrPlaneDetailsDict], list[BlockOrPlaneDetailsDict], list[CharDetailsDict]] UnicodeModel = db.UnicodePlane | db.UnicodeBlock | db.UnicodeCharacter | db.UnicodeCharacterUnihan diff --git a/app/data/scripts/sync_req_files.py b/app/data/scripts/sync_req_files.py index b75ffc3..304b771 100644 --- a/app/data/scripts/sync_req_files.py +++ b/app/data/scripts/sync_req_files.py @@ -7,7 +7,7 @@ REQ_REGEX = re.compile(r"(?P[\w-]+)==(?P[\w.]+)") -def sync_requirements_files(project_dir: Path): +def sync_requirements_files(project_dir: Path) -> Result[None]: result = pin_requirements(project_dir) if result.failure: return Result.Fail(result.error) @@ -50,7 +50,7 @@ def parse_lock_file_entry(req: str) -> tuple[str, str] | None: return (package, version) -def update_req_file(req_file: Path, pinned: dict[str, str]): +def update_req_file(req_file: Path, pinned: dict[str, str]) -> None: updated_versions = {package: pinned.get(package) for package in parse_lock_file(req_file) if package in pinned} req_file.write_text("\n".join([f"{name}=={ver}" for name, ver in updated_versions.items()])) diff --git a/app/data/scripts/update_all_data.py b/app/data/scripts/update_all_data.py index 1da779d..69b6a2e 100644 --- a/app/data/scripts/update_all_data.py +++ b/app/data/scripts/update_all_data.py @@ -67,7 +67,7 @@ def update_json_files( all_planes: list[BlockOrPlaneDetailsDict], all_blocks: list[BlockOrPlaneDetailsDict], all_chars: list[CharDetailsDict], -): +) -> None: spinner = Spinner() spinner.start("Creating JSON files for parsed Unicode data...") config.PLANES_JSON.write_text(json.dumps(all_planes, indent=4)) From a8815f38be46f148b9040879eb172cc4ad96b28e Mon Sep 17 00:00:00 2001 From: Aaron Luna Date: Sat, 4 May 2024 06:22:02 -0700 Subject: [PATCH 09/11] chore: :label: improve type references for app.core.result module --- app/core/result.py | 36 +++++++++++++++++++++++------------- 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/app/core/result.py b/app/core/result.py index 85ee32e..236c2b8 100644 --- a/app/core/result.py +++ b/app/core/result.py @@ -1,12 +1,8 @@ -from __future__ import annotations - from collections.abc import Callable -from typing import Generic, TypeVar - -T = TypeVar("T") +from typing import Self, overload -class Result(Generic[T]): +class Result[T]: """Represent the outcome of an operation.""" def __init__(self, success: bool, value: T | None, error: str | None) -> None: @@ -22,32 +18,46 @@ def __str__(self) -> str: def __repr__(self) -> str: """Official string representation of a result.""" - detail = f", error={self.error!r}" if self.failure else f", value={self.value}" if self.value else "" - return f"" + detail = ( + f"value=None, error={self.error!r}" + if self.failure + else f"value={self.value!r}, error=None" + if self.value + else "value=None, error=None" + ) + return f"Result({"True" if self.success else "False"}, {detail})" @property def failure(self) -> bool: """Flag that indicates if the operation failed.""" return not self.success - def on_success(self, func: Callable, *args, **kwargs) -> Result[T]: + def on_success(self, func: Callable, *args, **kwargs) -> Self: """Pass result of successful operation (if any) to subsequent function.""" return self if self.failure else func(self.value, *args, **kwargs) if self.value else func(*args, **kwargs) - def on_failure(self, func: Callable, *args, **kwargs) -> Result[T]: + def on_failure(self, func: Callable, *args, **kwargs) -> Self: """Pass error message from failed operation to subsequent function.""" return self if self.success else func(self.error, *args, **kwargs) - def on_both(self, func: Callable, *args, **kwargs) -> Result[T]: + def on_both(self, func: Callable, *args, **kwargs) -> Self: """Pass result (either succeeded/failed) to subsequent function.""" return func(self, *args, **kwargs) @staticmethod - def Fail(error_message: str) -> Result[T]: # noqa: N802 + def Fail(error_message: str) -> Self: # noqa: N802 """Create a Result object for a failed operation.""" return Result(False, value=None, error=error_message) + @overload + @staticmethod + def Ok() -> "Result": ... + + @overload + @staticmethod + def Ok(value: T) -> "Result[T]": ... + @staticmethod - def Ok(value: T | None = None) -> Result[T]: # noqa: N802 + def Ok(value: T | None = None) -> "Result | Result[T]": # noqa: N802 """Create a Result object for a successful operation.""" return Result(True, value=value, error=None) From 8b349e4ae6b87677eada38948233a2596ea39e97 Mon Sep 17 00:00:00 2001 From: Aaron Luna Date: Sat, 4 May 2024 06:23:00 -0700 Subject: [PATCH 10/11] test: :white_check_mark: update test case data for error message when user provides invalie codepoint format --- .../test_list_all_unicode_characters/data.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/app/tests/test_character_endpoints/test_list_all_unicode_characters/data.py b/app/tests/test_character_endpoints/test_list_all_unicode_characters/data.py index 16f470d..38ced09 100644 --- a/app/tests/test_character_endpoints/test_list_all_unicode_characters/data.py +++ b/app/tests/test_character_endpoints/test_list_all_unicode_characters/data.py @@ -156,9 +156,7 @@ "detail": "The value provided (U+49) is invalid because Unicode codepoint values prefixed with 'U+' must contain at least 4 hexadecimal digits. The correct way to request the character assigned to codepoint 0x49 is with the value 'U+0049', which adds the necessary leading zeros." } -INVALID_CODEPOINT_STRING_2 = { - "detail": "U+110000 is not within the range of valid codepoints for Unicode characters (U+0000 to U+10FFFF)." -} +INVALID_CODEPOINT_STRING_2 = {"detail": "U+110000 is not within the Unicode codespace (U+0000 to U+10FFFF)."} INVALID_BLOCK_NAME = { "detail": "'ancient numbers' does not match any valid Unicode block name. The following block names are similar to the name you provided: Ancient_Greek_Numbers (U+10140...U+1018F), Sinhala_Archaic_Numbers (U+111E0...U+111FF), Cuneiform_Numbers_and_Punctuation (U+12400...U+1247F), Ancient_Greek_Musical_Notation (U+1D200...U+1D24F), Aegean_Numbers (U+10100...U+1013F), Ancient_Symbols (U+10190...U+101CF)" From 2def64b172eb301b2dcf9349c38452f5b0ea24e9 Mon Sep 17 00:00:00 2001 From: Aaron Luna Date: Sat, 4 May 2024 06:23:19 -0700 Subject: [PATCH 11/11] chore: :arrow_up: update dependencies --- requirements-dev.txt | 12 +++++------ requirements-lock.txt | 48 ++++++++++++++++++++++++++++--------------- requirements.txt | 10 ++++----- 3 files changed, 43 insertions(+), 27 deletions(-) diff --git a/requirements-dev.txt b/requirements-dev.txt index 0fadeca..91eab32 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,10 +1,10 @@ -black==24.4.0 -coverage==7.4.4 +black==24.4.2 +coverage==7.5.0 ipython==8.23.0 isort==5.13.2 -mypy==1.9.0 +mypy==1.10.0 pip-upgrader==1.4.15 -pytest==8.1.1 +pytest==8.2.0 pytest-black==0.3.12 pytest-clarity==1.0.1 pytest-cov==5.0.0 @@ -13,7 +13,7 @@ pytest-mock==3.14.0 pytest-random-order==1.1.1 pytest-sugar==1.0.0 pyupgrade==3.15.2 -ruff==0.4.1 +ruff==0.4.3 snoop==0.4.3 -tox==4.14.2 +tox==4.15.0 trogon==0.5.0 \ No newline at end of file diff --git a/requirements-lock.txt b/requirements-lock.txt index 6746dd1..3c06270 100644 --- a/requirements-lock.txt +++ b/requirements-lock.txt @@ -7,7 +7,7 @@ asttokens==2.4.1 async-timeout==4.0.3 attrs==23.2.0 backcall==0.2.0 -black==24.4.0 +black==24.4.2 cachetools==5.3.3 certifi==2024.2.2 chardet==5.2.0 @@ -16,19 +16,23 @@ cheap-repr==0.5.1 click==8.1.7 colorama==0.4.6 colorclass==2.2.2 -coverage==7.4.4 +coverage==7.5.0 decorator==5.1.1 distlib==0.3.8 +dnspython==2.6.1 docopt==0.6.2 +email_validator==2.1.1 exceptiongroup==1.2.1 executing==2.0.1 fakeredis==2.22.0 -fastapi==0.110.2 -filelock==3.13.4 +fastapi==0.111.0 +fastapi-cli==0.0.2 +filelock==3.14.0 frozenlist==1.4.1 h11==0.14.0 halo==0.0.31 httpcore==1.0.5 +httptools==0.6.1 httpx==0.27.0 idna==3.7 importlib_metadata==7.1.0 @@ -36,34 +40,37 @@ iniconfig==2.0.0 ipython==8.23.0 isort==5.13.2 jedi==0.19.1 +Jinja2==3.1.3 linkify-it-py==2.0.3 log-symbols==0.0.14 lupa==2.1 lxml==5.2.1 markdown-it-py==3.0.0 +MarkupSafe==2.1.5 matplotlib-inline==0.1.7 mdit-py-plugins==0.4.0 mdurl==0.1.2 multidict==6.0.5 -mypy==1.9.0 +mypy==1.10.0 mypy-extensions==1.0.0 +orjson==3.10.3 packaging==24.0 parso==0.8.4 pathspec==0.12.1 pexpect==4.9.0 pickleshare==0.7.5 pip-upgrader==1.4.15 -platformdirs==4.2.0 +platformdirs==4.2.1 pluggy==1.5.0 pprintpp==0.4.0 prompt-toolkit==3.0.43 ptyprocess==0.7.0 pure-eval==0.2.2 -pydantic==2.7.0 -pydantic_core==2.18.1 +pydantic==2.7.1 +pydantic_core==2.18.2 Pygments==2.17.2 pyproject-api==1.6.1 -pytest==8.1.1 +pytest==8.2.0 pytest-black==0.3.12 pytest-clarity==1.0.1 pytest-cov==5.0.0 @@ -73,12 +80,16 @@ pytest-random-order==1.1.1 pytest-sugar==1.0.0 python-dateutil==2.9.0 python-dotenv==1.0.1 +python-multipart==0.0.9 pyupgrade==3.15.2 -rapidfuzz==3.8.1 -redis==5.0.3 +PyYAML==6.0.1 +rapidfuzz==3.9.0 +redis==5.0.4 requests==2.31.0 rich==13.7.1 -ruff==0.4.1 +ruff==0.4.3 +setuptools==69.5.1 +shellingham==1.5.4 six==1.16.0 sniffio==1.3.1 snoop==0.4.3 @@ -87,26 +98,31 @@ spinners==0.0.24 SQLAlchemy==2.0.29 SQLAlchemy-Utils==0.41.2 sqlalchemy2-stubs==0.0.2a37 -sqlmodel==0.0.16 +sqlmodel==0.0.18 stack-data==0.6.3 starlette==0.37.2 termcolor==2.4.0 terminaltables==3.1.10 -textual==0.57.1 +textual==0.58.1 tokenize-rt==5.2.0 toml==0.10.2 tomli==2.0.1 -tox==4.14.2 +tox==4.15.0 traitlets==5.14.3 tree-sitter==0.21.3 tree-sitter-languages==1.10.2 trogon==0.5.0 +typer==0.12.3 typing_extensions==4.11.0 uc-micro-py==1.0.3 +ujson==5.9.0 urllib3==2.2.1 uvicorn==0.29.0 -virtualenv==20.25.3 +uvloop==0.19.0 +virtualenv==20.26.1 watchfiles==0.21.0 wcwidth==0.2.13 +websockets==12.0 +wheel==0.43.0 yarl==1.9.4 zipp==3.18.1 diff --git a/requirements.txt b/requirements.txt index 86e628e..eda6332 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,17 +1,17 @@ click==8.1.7 fakeredis==2.22.0 -fastapi==0.110.2 +fastapi==0.111.0 halo==0.0.31 httpx==0.27.0 lupa==2.1 lxml==5.2.1 -pydantic==2.7.0 +pydantic==2.7.1 python-dateutil==2.9.0 -rapidfuzz==3.8.1 -redis==5.0.3 +rapidfuzz==3.9.0 +redis==5.0.4 requests==2.31.0 SQLAlchemy==2.0.29 SQLAlchemy-Utils==0.41.2 -sqlmodel==0.0.16 +sqlmodel==0.0.18 uvicorn==0.29.0 watchfiles==0.21.0 \ No newline at end of file