From 9f9726bd99f014622ba8ed888ce6fcc8daf6da7d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bartosz=20Zaczy=C5=84ski?= Date: Sun, 3 Sep 2023 12:34:41 +0200 Subject: [PATCH 1/4] Sort Unicode: Initial Commit With the Materials --- python-sort-unicode-strings/README.md | 36 + .../python-sort-unicode-strings.html | 8494 +++++++++++++++++ .../python-sort-unicode-strings.ipynb | 841 ++ python-sort-unicode-strings/requirements.txt | 90 + 4 files changed, 9461 insertions(+) create mode 100644 python-sort-unicode-strings/README.md create mode 100644 python-sort-unicode-strings/python-sort-unicode-strings.html create mode 100644 python-sort-unicode-strings/python-sort-unicode-strings.ipynb create mode 100644 python-sort-unicode-strings/requirements.txt diff --git a/python-sort-unicode-strings/README.md b/python-sort-unicode-strings/README.md new file mode 100644 index 0000000000..8a1d47c6ad --- /dev/null +++ b/python-sort-unicode-strings/README.md @@ -0,0 +1,36 @@ +# How to Sort Unicode Strings Alphabetically in Python? + +This folder contains sample code for the Real Python tutorial [How to Sort Unicode Strings Alphabetically in Python?](https://realpython.com/python-sort-unicode-strings/). + +## Usage + +You can either open the HTML file with the rendered Jupyter notebook or use a Jupyter server to view and interact with the notebook directly. + +## Installation + +Create and activate a new virtual environment: + +```shell +$ python3 -m venv venv/ +$ source venv/bin/activate +``` + +Install the necessary requirements into your virtual environment: + +```shell +(venv) $ python -m pip install -r requirements.txt +``` + +In case of problems, you can try installing these dependencies manually: + +- [JupyterLab](https://pypi.org/project/jupyterlab/) +- [natsort](https://pypi.org/project/natsort/) +- [PyICU](https://pypi.org/project/PyICU/) +- [pyuca](https://pypi.org/project/pyuca/) +- [Unidecode](https://pypi.org/project/Unidecode/) + +Open the notebook in Jupyter Lab: + +```shell +(venv) python -m jupyter lab python-sort-unicode-strings.ipynb +``` diff --git a/python-sort-unicode-strings/python-sort-unicode-strings.html b/python-sort-unicode-strings/python-sort-unicode-strings.html new file mode 100644 index 0000000000..99bf80ae34 --- /dev/null +++ b/python-sort-unicode-strings/python-sort-unicode-strings.html @@ -0,0 +1,8494 @@ + + + + + +python-sort-unicode-strings + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + +
+ + diff --git a/python-sort-unicode-strings/python-sort-unicode-strings.ipynb b/python-sort-unicode-strings/python-sort-unicode-strings.ipynb new file mode 100644 index 0000000000..24dd4ce225 --- /dev/null +++ b/python-sort-unicode-strings/python-sort-unicode-strings.ipynb @@ -0,0 +1,841 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "a9c43238-2d32-42bc-8d55-2470951d30fd", + "metadata": {}, + "source": [ + "# How to Sort Unicode Strings Alphabetically in Python?" + ] + }, + { + "cell_type": "markdown", + "id": "1f606efe-37d9-4da2-9b91-0786ee06fae0", + "metadata": {}, + "source": [ + "## Lexicographic Sorting" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "03e763c4-4c4d-40ab-9e26-72eaa7fbab09", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['Ludmiła', 'Zbigniew', 'Łukasz', 'Żaneta']" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "polish_names = [\"Zbigniew\", \"Ludmiła\", \"Żaneta\", \"Łukasz\"]\n", + "sorted(polish_names)" + ] + }, + { + "cell_type": "markdown", + "id": "c020b0a1-4a4a-44df-8c6b-332f5c4e57b2", + "metadata": {}, + "source": [ + "## Unicode Collation Algorithm" + ] + }, + { + "cell_type": "markdown", + "id": "b5cfebf0-3272-4a3a-a45d-fd24e0094994", + "metadata": {}, + "source": [ + "### pyuca" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "54c6e6b6-18ad-4f68-a791-5b2de4169e41", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['Ludmiła', 'Łukasz', 'Żaneta', 'Zbigniew']" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pyuca\n", + "\n", + "collator = pyuca.Collator()\n", + "sorted(polish_names, key=collator.sort_key)" + ] + }, + { + "cell_type": "markdown", + "id": "bedd5ce9-3135-46b8-b797-9da4d499c875", + "metadata": {}, + "source": [ + "### PyICU" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "1c46a105-984e-43f2-8182-7d487e9e9622", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['Ludmiła', 'Łukasz', 'Zbigniew', 'Żaneta']" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from icu import Collator, Locale\n", + "\n", + "collator = Collator.createInstance(Locale(\"pl_PL\"))\n", + "sorted(polish_names, key=collator.getSortKey)" + ] + }, + { + "cell_type": "markdown", + "id": "e60cef37-5a80-4cce-91c7-16801690ea09", + "metadata": {}, + "source": [ + "#### Locales" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "a18046a8-cc6b-4ef3-84fc-8d58da68c9d8", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['af', 'af_NA', 'af_ZA', 'agq', 'agq_CM']" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from icu import Locale\n", + "list(Locale.getAvailableLocales())[:5]" + ] + }, + { + "cell_type": "markdown", + "id": "6645de46-7f9d-4f5b-be72-2683d197a527", + "metadata": {}, + "source": [ + "#### Rule-Based Collator" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "969077e1-c524-4173-9cd6-f514b0865244", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['Ludmiła', 'Łukasz', 'Zbigniew', 'Żaneta']" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from icu import RuleBasedCollator\n", + "collator = RuleBasedCollator(\n", + " \"\"\"\n", + " &A<ą<<<Ą\n", + " &C<ć<<<Ć\n", + " &E<ę<<<Ę\n", + " &L<ł<<<Ł\n", + " &N<ń<<<Ń\n", + " &O<ó<<<Ó\n", + " &S<ś<<<Ś\n", + " &Z<ź<<<Ź<ż<<<Ż\n", + " \"\"\"\n", + ")\n", + "sorted(polish_names, key=collator.getSortKey)" + ] + }, + { + "cell_type": "markdown", + "id": "468cdf0d-39c3-47f0-a3ae-54add7f45270", + "metadata": {}, + "source": [ + "## Python's `locale` Module" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "b5f46445-b166-4858-9559-f9a90c7a9062", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['Ludmiła', 'Łukasz', 'Zbigniew', 'Żaneta']" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import locale\n", + "\n", + "locale.setlocale(locale.LC_COLLATE, \"pl_PL.UTF-8\")\n", + "sorted(polish_names, key=locale.strxfrm)" + ] + }, + { + "cell_type": "markdown", + "id": "0f6cc858-c872-409d-ae07-7587a7eb1412", + "metadata": {}, + "source": [ + "## Transliteration" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "8eb2cbb1-eff4-4f0b-a301-fc39835ffa69", + "metadata": {}, + "outputs": [], + "source": [ + "pangrams = {\n", + " \"Czech\": \"Příliš žluťoučký kůň úpěl ďábelské ódy\",\n", + " \"Polish\": \"Pójdźmyż haftnąć z wklęsłości guberń\",\n", + " \"Icelandic\": \"Kæmi ný öxi hér, ykist þjófum nú bæði víl og ádrepa\",\n", + "}" + ] + }, + { + "cell_type": "markdown", + "id": "82afd3c6-424a-441b-a720-3110c05c75a3", + "metadata": {}, + "source": [ + "### Python's `unicodedata` Module" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "b328bee5-1da2-4968-9f8e-c27943214820", + "metadata": {}, + "outputs": [], + "source": [ + "import unicodedata\n", + "\n", + "def transliterate_v1(text: str) -> str:\n", + " return (\n", + " unicodedata.normalize(\"NFD\", text)\n", + " .encode(\"ascii\", errors=\"ignore\")\n", + " .decode(\"ascii\")\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "952a8a5c-10bc-47db-b9c0-d136e2246fbd", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Czech\n", + "Příliš žluťoučký kůň úpěl ďábelské ódy\n", + "Prilis zlutoucky kun upel dabelske ody\n", + "\n", + "Polish\n", + "Pójdźmyż haftnąć z wklęsłości guberń\n", + "Pojdzmyz haftnac z wklesosci gubern\n", + "\n", + "Icelandic\n", + "Kæmi ný öxi hér, ykist þjófum nú bæði víl og ádrepa\n", + "Kmi ny oxi her, ykist jofum nu bi vil og adrepa\n", + "\n" + ] + } + ], + "source": [ + "for label, pangram in pangrams.items():\n", + " print(label, pangram, transliterate_v1(pangram), \"\", sep=\"\\n\")" + ] + }, + { + "cell_type": "markdown", + "id": "066447d0-a6ef-4b72-8f3b-327c5716cb82", + "metadata": {}, + "source": [ + "### Custom Translation Table" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "4b0a0b51-3d18-4c59-af87-6d22527ee07e", + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "import unicodedata\n", + "\n", + "def transliterate_v2(text: str, mapping: dict[str, str] = None) -> str:\n", + " combining_characters = \"\".join(\n", + " character\n", + " for code_point in range(sys.maxunicode)\n", + " if unicodedata.combining(character := chr(code_point))\n", + " )\n", + " if mapping:\n", + " src, dst = [\"\".join(x) for x in zip(*mapping.items())]\n", + " table = str.maketrans(src, dst, combining_characters)\n", + " else:\n", + " table = str.maketrans(dict.fromkeys(combining_characters))\n", + " return unicodedata.normalize(\"NFD\", text).translate(table)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "e7e5852c-3344-4cfc-bea2-28d0eb6c9e26", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Czech\n", + "Příliš žluťoučký kůň úpěl ďábelské ódy\n", + "Prilis zlutoucky kun upel dabelske ody\n", + "\n", + "Polish\n", + "Pójdźmyż haftnąć z wklęsłości guberń\n", + "Pojdzmyz haftnac z wkleslosci gubern\n", + "\n", + "Icelandic\n", + "Kæmi ný öxi hér, ykist þjófum nú bæði víl og ádrepa\n", + "Kæmi ny oxi her, ykist þjofum nu bæði vil og adrepa\n", + "\n" + ] + } + ], + "source": [ + "mapping = {\"Ł\": \"L\", \"ł\": \"l\"}\n", + "for label, pangram in pangrams.items():\n", + " print(label, pangram, transliterate_v2(pangram, mapping), \"\", sep=\"\\n\")" + ] + }, + { + "cell_type": "markdown", + "id": "e9dc5201-ae4c-4240-bfe7-0b6fd9c47076", + "metadata": {}, + "source": [ + "### Unidecode" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "2b42a243-7adf-442a-820b-f420be5c2b04", + "metadata": {}, + "outputs": [], + "source": [ + "import unidecode\n", + "\n", + "def transliterate_v3(text: str) -> str:\n", + " return unidecode.unidecode(text)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "349015e1-2167-4521-ac9f-f9b4c0ad7d08", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Czech\n", + "Příliš žluťoučký kůň úpěl ďábelské ódy\n", + "Prilis zlutoucky kun upel dabelske ody\n", + "\n", + "Polish\n", + "Pójdźmyż haftnąć z wklęsłości guberń\n", + "Pojdzmyz haftnac z wkleslosci gubern\n", + "\n", + "Icelandic\n", + "Kæmi ný öxi hér, ykist þjófum nú bæði víl og ádrepa\n", + "Kaemi ny oxi her, ykist thjofum nu baedi vil og adrepa\n", + "\n" + ] + } + ], + "source": [ + "for label, pangram in pangrams.items():\n", + " print(label, pangram, transliterate_v3(pangram), \"\", sep=\"\\n\")" + ] + }, + { + "cell_type": "markdown", + "id": "8bf35bce-44b5-4c1f-851c-f175dfbb9c19", + "metadata": {}, + "source": [ + "### PyICU Transliterator" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "cbda5081-f987-403f-a0a8-7616766e5d86", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Czech\n", + "Příliš žluťoučký kůň úpěl ďábelské ódy\n", + "Prilis zlutoucky kun upel dabelske ody\n", + "\n", + "Polish\n", + "Pójdźmyż haftnąć z wklęsłości guberń\n", + "Pojdzmyz haftnac z wkleslosci gubern\n", + "\n", + "Icelandic\n", + "Kæmi ný öxi hér, ykist þjófum nú bæði víl og ádrepa\n", + "Kaemi ny oxi her, ykist thjofum nu baedi vil og adrepa\n", + "\n" + ] + } + ], + "source": [ + "import icu\n", + "tr = icu.Transliterator.createInstance(\"Any-ASCII\")\n", + "\n", + "for label, pangram in pangrams.items():\n", + " print(label, pangram, tr.transliterate(pangram), \"\", sep=\"\\n\")" + ] + }, + { + "cell_type": "markdown", + "id": "680566f6-79c9-46a2-8755-fcf2c674435e", + "metadata": {}, + "source": [ + "## Case-Insensitive Sorting" + ] + }, + { + "cell_type": "markdown", + "id": "d445b3a0-afb7-41c5-95cd-b7c73ce01c2d", + "metadata": {}, + "source": [ + "### Case Folding" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "531999cf-a828-490b-a42b-a0b968280c04", + "metadata": {}, + "outputs": [], + "source": [ + "import functools\n", + "import unicodedata\n", + "\n", + "def case_insensitive(text: str) -> str:\n", + " nfd = functools.partial(unicodedata.normalize, \"NFD\")\n", + " return nfd(nfd(text).casefold())" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "29a9f7cf-cb6f-441c-b1f0-8816374bf90d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['Éléphant', 'éléphant', 'hérissonne', 'poisson', 'Tortue']" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "animaux = [\"Tortue\", \"hérissonne\", \"Éléphant\", \"poisson\", \"éléphant\"]\n", + "sorted(animaux, key=case_insensitive)" + ] + }, + { + "cell_type": "markdown", + "id": "034ae6ce-0e6e-46df-b643-8fa578cba15a", + "metadata": {}, + "source": [ + "### PyICU" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "115bd56c-9a72-4964-985d-d70aa5642444", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['Éléphant', 'éléphant', 'hérissonne', 'poisson', 'Tortue']" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from icu import Collator, Locale\n", + "\n", + "collator = Collator.createInstance(Locale(\"fr_FR\"))\n", + "collator.setStrength(Collator.SECONDARY)\n", + "sorted(animaux, key=collator.getSortKey)" + ] + }, + { + "cell_type": "markdown", + "id": "0fa814fb-7e8f-4830-bc23-0b581bc156d3", + "metadata": {}, + "source": [ + "### pyuca\n", + "\n", + "Lowercase letters always come before uppercase letters:" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "28cf5632-b03f-41f6-a680-f23a197c7dcd", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['éléphant', 'Éléphant', 'hérissonne', 'poisson', 'Tortue']" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from pyuca import Collator\n", + "sorted(animaux, key=Collator().sort_key)" + ] + }, + { + "cell_type": "markdown", + "id": "1be29770-ec9c-46fb-8282-49a92a72a73d", + "metadata": {}, + "source": [ + "## Natural Sort Order" + ] + }, + { + "cell_type": "markdown", + "id": "aa4c482b-2650-46d2-a43b-163aa7406e3b", + "metadata": {}, + "source": [ + "### Regular Expressions" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "6522bf3c-a7f3-41f2-ad2a-dda9acb46af7", + "metadata": {}, + "outputs": [], + "source": [ + "import re\n", + "\n", + "def natural_order(text: str) -> tuple[str | int, ...]:\n", + " return tuple(\n", + " int(chunk) if chunk.isdigit() else chunk\n", + " for chunk in re.split(r\"(\\d+)\", text)\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "b68c42d5-9d08-4896-9b0d-3dcb6ea5872d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['log.1',\n", + " 'log.2',\n", + " 'log.3',\n", + " 'log.4',\n", + " 'log.5',\n", + " 'log.6',\n", + " 'log.7',\n", + " 'log.8',\n", + " 'log.9',\n", + " 'log.10',\n", + " 'log.11',\n", + " 'log.12',\n", + " 'log.13',\n", + " 'log.14',\n", + " 'log.15']" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sorted([f\"log.{i}\" for i in range(1, 111)], key=natural_order)[:15]" + ] + }, + { + "cell_type": "markdown", + "id": "f09a10ff-004b-436a-bb19-6184f1f5db23", + "metadata": {}, + "source": [ + "### PyICU + natsort" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "c60c52b9-40bf-4fc7-a2a6-51069370c83b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['błędy.log.1',\n", + " 'błędy.log.3',\n", + " 'Błędy.log.101',\n", + " 'Raport_kwiecień_2023.xlsx',\n", + " 'raport_maj-2023.xlsx']" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "filenames = [\n", + " \"raport_maj-2023.xlsx\",\n", + " \"błędy.log.3\",\n", + " \"błędy.log.1\",\n", + " \"Raport_kwiecień_2023.xlsx\",\n", + " \"Błędy.log.101\",\n", + "]\n", + "\n", + "import locale\n", + "locale.setlocale(locale.LC_ALL, \"pl_PL.UTF-8\")\n", + "\n", + "from natsort import natsorted, ns\n", + "natsorted(filenames, alg=ns.LOCALE | ns.IGNORECASE)" + ] + }, + { + "cell_type": "markdown", + "id": "110b932b-6187-4625-8bda-b7f9a63d152c", + "metadata": {}, + "source": [ + "## Sorting Complex Objects" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "719b79fe-f1d7-42c9-8043-e66d43bef002", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[Anna Nowak,\n", + " Anna Wójcik,\n", + " Ludmiła Wiśniewska,\n", + " Zbigniew Nowak,\n", + " Łukasz Kowalski,\n", + " Żaneta Jabłońska]" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from typing import NamedTuple\n", + "\n", + "class Person(NamedTuple):\n", + " first_name: str\n", + " last_name: str\n", + "\n", + " def __repr__(self):\n", + " return f\"{self.first_name} {self.last_name}\"\n", + "\n", + "people = [\n", + " Person(\"Zbigniew\", \"Nowak\"),\n", + " Person(\"Anna\", \"Wójcik\"),\n", + " Person(\"Łukasz\", \"Kowalski\"),\n", + " Person(\"Żaneta\", \"Jabłońska\"),\n", + " Person(\"Anna\", \"Nowak\"),\n", + " Person(\"Ludmiła\", \"Wiśniewska\"),\n", + "]\n", + "\n", + "sorted(people)" + ] + }, + { + "cell_type": "markdown", + "id": "56c08bf3-a482-4d7d-929f-61cb51c8f3c4", + "metadata": {}, + "source": [ + "### pyuca" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "9659e59f-6071-42a2-b822-61c7e5abdd69", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[Anna Nowak,\n", + " Anna Wójcik,\n", + " Ludmiła Wiśniewska,\n", + " Łukasz Kowalski,\n", + " Żaneta Jabłońska,\n", + " Zbigniew Nowak]" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pyuca\n", + "\n", + "collator = pyuca.Collator()\n", + "\n", + "def compound_key(person: Person) -> tuple:\n", + " return (\n", + " collator.sort_key(person.first_name),\n", + " collator.sort_key(person.last_name),\n", + " )\n", + "\n", + "sorted(people, key=compound_key)" + ] + }, + { + "cell_type": "markdown", + "id": "33cf460f-6937-4566-b4f0-9d06d5a8b29c", + "metadata": {}, + "source": [ + "### PyICU" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "5be4e5d9-42b3-4369-9cb8-eb18adff4144", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[Anna Nowak,\n", + " Anna Wójcik,\n", + " Ludmiła Wiśniewska,\n", + " Łukasz Kowalski,\n", + " Zbigniew Nowak,\n", + " Żaneta Jabłońska]" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from icu import Collator, Locale\n", + "\n", + "collator = Collator.createInstance(Locale(\"pl_PL\"))\n", + "\n", + "def compound_key(person: Person) -> tuple:\n", + " return (\n", + " collator.getSortKey(person.first_name),\n", + " collator.getSortKey(person.last_name),\n", + " )\n", + "\n", + "sorted(people, key=compound_key)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.5" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/python-sort-unicode-strings/requirements.txt b/python-sort-unicode-strings/requirements.txt new file mode 100644 index 0000000000..3ed53c3638 --- /dev/null +++ b/python-sort-unicode-strings/requirements.txt @@ -0,0 +1,90 @@ +anyio==4.0.0 +argon2-cffi==23.1.0 +argon2-cffi-bindings==21.2.0 +arrow==1.2.3 +asttokens==2.3.0 +async-lru==2.0.4 +attrs==23.1.0 +Babel==2.12.1 +backcall==0.2.0 +beautifulsoup4==4.12.2 +bleach==6.0.0 +certifi==2023.7.22 +cffi==1.15.1 +charset-normalizer==3.2.0 +comm==0.1.4 +debugpy==1.6.7.post1 +decorator==5.1.1 +defusedxml==0.7.1 +executing==1.2.0 +fastjsonschema==2.18.0 +fqdn==1.5.1 +idna==3.4 +ipykernel==6.25.1 +ipython==8.15.0 +isoduration==20.11.0 +jedi==0.19.0 +Jinja2==3.1.2 +json5==0.9.14 +jsonpointer==2.4 +jsonschema==4.19.0 +jsonschema-specifications==2023.7.1 +jupyter-events==0.7.0 +jupyter-lsp==2.2.0 +jupyter_client==8.3.1 +jupyter_core==5.3.1 +jupyter_server==2.7.3 +jupyter_server_terminals==0.4.4 +jupyterlab==4.0.5 +jupyterlab-pygments==0.2.2 +jupyterlab_server==2.24.0 +MarkupSafe==2.1.3 +matplotlib-inline==0.1.6 +mistune==3.0.1 +natsort==8.4.0 +nbclient==0.8.0 +nbconvert==7.8.0 +nbformat==5.9.2 +nest-asyncio==1.5.7 +notebook_shim==0.2.3 +overrides==7.4.0 +packaging==23.1 +pandocfilters==1.5.0 +parso==0.8.3 +pexpect==4.8.0 +pickleshare==0.7.5 +platformdirs==3.10.0 +prometheus-client==0.17.1 +prompt-toolkit==3.0.39 +psutil==5.9.5 +ptyprocess==0.7.0 +pure-eval==0.2.2 +pycparser==2.21 +Pygments==2.16.1 +PyICU==2.11 +python-dateutil==2.8.2 +python-json-logger==2.0.7 +pyuca==1.2 +PyYAML==6.0.1 +pyzmq==25.1.1 +referencing==0.30.2 +requests==2.31.0 +rfc3339-validator==0.1.4 +rfc3986-validator==0.1.1 +rpds-py==0.10.0 +Send2Trash==1.8.2 +six==1.16.0 +sniffio==1.3.0 +soupsieve==2.5 +stack-data==0.6.2 +terminado==0.17.1 +tinycss2==1.2.1 +tornado==6.3.3 +traitlets==5.9.0 +Unidecode==1.3.6 +uri-template==1.3.0 +urllib3==2.0.4 +wcwidth==0.2.6 +webcolors==1.13 +webencodings==0.5.1 +websocket-client==1.6.2 From d715f3a379c4ccc20ca8a1c2bf9831d19f8246dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bartosz=20Zaczy=C5=84ski?= Date: Sun, 3 Sep 2023 12:39:35 +0200 Subject: [PATCH 2/4] Sort Unicode: Fix Materials Linters --- .../python-sort-unicode-strings.html | 21 +++++++++++++++++-- .../python-sort-unicode-strings.ipynb | 21 +++++++++++++++++-- 2 files changed, 38 insertions(+), 4 deletions(-) diff --git a/python-sort-unicode-strings/python-sort-unicode-strings.html b/python-sort-unicode-strings/python-sort-unicode-strings.html index 99bf80ae34..ef0eaeaf08 100644 --- a/python-sort-unicode-strings/python-sort-unicode-strings.html +++ b/python-sort-unicode-strings/python-sort-unicode-strings.html @@ -7635,6 +7635,7 @@

Locales