diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 2ad4ff7..86457f5 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -306,6 +306,36 @@ jobs: experimental: false py: "3.12" + - toxenv: "python3.8" + db: "mariadb:11.4" + legacy_db: 0 + experimental: false + py: "3.8" + + - toxenv: "python3.9" + db: "mariadb:11.4" + legacy_db: 0 + experimental: false + py: "3.9" + + - toxenv: "python3.10" + db: "mariadb:11.4" + legacy_db: 0 + experimental: false + py: "3.10" + + - toxenv: "python3.11" + db: "mariadb:11.4" + legacy_db: 0 + experimental: false + py: "3.11" + + - toxenv: "python3.12" + db: "mariadb:11.4" + legacy_db: 0 + experimental: false + py: "3.12" + - toxenv: "python3.8" db: "mysql:5.5" legacy_db: 1 @@ -425,15 +455,46 @@ jobs: legacy_db: 0 experimental: false py: "3.12" + + - toxenv: "python3.8" + db: "mysql:8.4" + legacy_db: 0 + experimental: true + py: "3.8" + + - toxenv: "python3.9" + db: "mysql:8.4" + legacy_db: 0 + experimental: true + py: "3.9" + + - toxenv: "python3.10" + db: "mysql:8.4" + legacy_db: 0 + experimental: true + py: "3.10" + + - toxenv: "python3.11" + db: "mysql:8.4" + legacy_db: 0 + experimental: true + py: "3.11" + + - toxenv: "python3.12" + db: "mysql:8.4" + legacy_db: 0 + experimental: true + py: "3.12" continue-on-error: ${{ matrix.experimental }} services: mysql: - image: "${{ matrix.db }}" + image: ${{ matrix.db }} ports: - 3306:3306 env: MYSQL_ALLOW_EMPTY_PASSWORD: yes - options: "--name=mysqld" + options: >- + --name=mysqld steps: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.py }} @@ -462,31 +523,52 @@ jobs: MYSQL_PORT: 3306 run: | set -e + while : do sleep 1 mysql -h127.0.0.1 -uroot -e 'select version()' && break done + + case "$DB" in + 'mysql:8.0'|'mysql:8.4') + mysql -h127.0.0.1 -uroot -e "SET GLOBAL local_infile=on" + docker cp mysqld:/var/lib/mysql/public_key.pem "${HOME}" + docker cp mysqld:/var/lib/mysql/ca.pem "${HOME}" + docker cp mysqld:/var/lib/mysql/server-cert.pem "${HOME}" + docker cp mysqld:/var/lib/mysql/client-key.pem "${HOME}" + docker cp mysqld:/var/lib/mysql/client-cert.pem "${HOME}" + ;; + esac + + USER_CREATION_COMMANDS='' + WITH_PLUGIN='' + if [ "$DB" == 'mysql:8.0' ]; then WITH_PLUGIN='with mysql_native_password' - mysql -h127.0.0.1 -uroot -e "SET GLOBAL local_infile=on" - docker cp mysqld:/var/lib/mysql/public_key.pem "${HOME}" - docker cp mysqld:/var/lib/mysql/ca.pem "${HOME}" - docker cp mysqld:/var/lib/mysql/server-cert.pem "${HOME}" - docker cp mysqld:/var/lib/mysql/client-key.pem "${HOME}" - docker cp mysqld:/var/lib/mysql/client-cert.pem "${HOME}" - mysql -uroot -h127.0.0.1 -e ' + USER_CREATION_COMMANDS=' CREATE USER user_sha256 IDENTIFIED WITH "sha256_password" BY "pass_sha256", nopass_sha256 IDENTIFIED WITH "sha256_password", user_caching_sha2 IDENTIFIED WITH "caching_sha2_password" BY "pass_caching_sha2", nopass_caching_sha2 IDENTIFIED WITH "caching_sha2_password" - PASSWORD EXPIRE NEVER;' - mysql -uroot -h127.0.0.1 -e 'GRANT RELOAD ON *.* TO user_caching_sha2;' - else - WITH_PLUGIN='' + PASSWORD EXPIRE NEVER; + GRANT RELOAD ON *.* TO user_caching_sha2;' + elif [ "$DB" == 'mysql:8.4' ]; then + WITH_PLUGIN='with caching_sha2_password' + USER_CREATION_COMMANDS=' + CREATE USER + user_caching_sha2 IDENTIFIED WITH "caching_sha2_password" BY "pass_caching_sha2", + nopass_caching_sha2 IDENTIFIED WITH "caching_sha2_password" + PASSWORD EXPIRE NEVER; + GRANT RELOAD ON *.* TO user_caching_sha2;' + fi + + if [ ! -z "$USER_CREATION_COMMANDS" ]; then + mysql -uroot -h127.0.0.1 -e "$USER_CREATION_COMMANDS" fi - mysql -h127.0.0.1 -uroot -e "create database $MYSQL_DATABASE DEFAULT CHARACTER SET utf8mb4" + + mysql -h127.0.0.1 -uroot -e "create database $MYSQL_DATABASE DEFAULT CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci" mysql -h127.0.0.1 -uroot -e "create user $MYSQL_USER identified $WITH_PLUGIN by '${MYSQL_PASSWORD}'; grant all on ${MYSQL_DATABASE}.* to ${MYSQL_USER};" mysql -h127.0.0.1 -uroot -e "create user ${MYSQL_USER}@localhost identified $WITH_PLUGIN by '${MYSQL_PASSWORD}'; grant all on ${MYSQL_DATABASE}.* to ${MYSQL_USER}@localhost;" - name: Create db_credentials.json diff --git a/README.md b/README.md index 37bdeae..b6feb95 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,8 @@ [![PyPI](https://img.shields.io/pypi/v/mysql-to-sqlite3)](https://pypi.org/project/mysql-to-sqlite3/) [![PyPI - Downloads](https://img.shields.io/pypi/dm/mysql-to-sqlite3)](https://pypistats.org/packages/mysql-to-sqlite3) [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/mysql-to-sqlite3)](https://pypi.org/project/mysql-to-sqlite3/) -[![MySQL Support](https://img.shields.io/static/v1?label=MySQL&message=5.5+|+5.6+|+5.7+|+8.0&color=2b5d80)](https://img.shields.io/static/v1?label=MySQL&message=5.6+|+5.7+|+8.0&color=2b5d80) -[![MariaDB Support](https://img.shields.io/static/v1?label=MariaDB&message=5.5+|+10.0+|+10.1+|+10.2+|+10.3+|+10.4+|+10.5+|+10.6|+10.11&color=C0765A)](https://img.shields.io/static/v1?label=MariaDB&message=10.0+|+10.1+|+10.2+|+10.3+|+10.4+|+10.5&color=C0765A) +[![MySQL Support](https://img.shields.io/static/v1?label=MySQL&message=5.5+|+5.6+|+5.7+|+8.0+|+8.4&color=2b5d80)](https://img.shields.io/static/v1?label=MySQL&message=5.5+|+5.6+|+5.7+|+8.0+|+8.4&color=2b5d80) +[![MariaDB Support](https://img.shields.io/static/v1?label=MariaDB&message=5.5+|+10.0+|+10.1+|+10.2+|+10.3+|+10.4+|+10.5+|+10.6|+10.11+|+11.4&color=C0765A)](https://img.shields.io/static/v1?label=MariaDB&message=5.5|+10.0+|+10.1+|+10.2+|+10.3+|+10.4+|+10.5|+11.4&color=C0765A) [![GitHub license](https://img.shields.io/github/license/techouse/mysql-to-sqlite3)](https://github.com/techouse/mysql-to-sqlite3/blob/master/LICENSE) [![Contributor Covenant](https://img.shields.io/badge/Contributor%20Covenant-2.1-4baaaa.svg)](CODE-OF-CONDUCT.md) [![PyPI - Format](https://img.shields.io/pypi/format/mysql-to-sqlite3)](https://pypi.org/project/sqlite3-to-mysql/) @@ -31,8 +31,6 @@ mysql2sqlite --help ``` Usage: mysql2sqlite [OPTIONS] - mysql2sqlite version 2.1.12 Copyright (c) 2019-2024 Klemen Tusar - Options: -f, --sqlite-file PATH SQLite3 database file [required] -d, --mysql-database TEXT MySQL database name [required] @@ -64,6 +62,9 @@ Options: -W, --without-data Do not transfer table data, DDL only. -h, --mysql-host TEXT MySQL host. Defaults to localhost. -P, --mysql-port INTEGER MySQL port. Defaults to 3306. + --mysql-charset TEXT MySQL database and table character set + [default: utf8mb4] + --mysql-collation TEXT MySQL database and table collation -S, --skip-ssl Disable MySQL connection encryption. -c, --chunk INTEGER Chunk reading/writing SQL records -l, --log-file PATH Log file diff --git a/docs/README.rst b/docs/README.rst index 1752e7a..3eb421e 100644 --- a/docs/README.rst +++ b/docs/README.rst @@ -44,6 +44,8 @@ Connection Options - ``-h, --mysql-host TEXT``: MySQL host. Defaults to localhost. - ``-P, --mysql-port INTEGER``: MySQL port. Defaults to 3306. +- ``--mysql-charset TEXT``: MySQL database and table character set. The default is utf8mb4. +- ``--mysql-collation TEXT``: MySQL database and table collation - ``-S, --skip-ssl``: Disable MySQL connection encryption. Other Options diff --git a/pyproject.toml b/pyproject.toml index a986e51..fba5f41 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,7 +39,7 @@ classifiers = [ ] dependencies = [ "Click>=8.1.3", - "mysql-connector-python==8.4.0", + "mysql-connector-python>=9.0.0", "pytimeparse2", "python-dateutil>=2.9.0.post0", "types_python_dateutil", diff --git a/requirements_dev.txt b/requirements_dev.txt index 68f408d..7a011c5 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -2,7 +2,7 @@ Click>=8.1.3 docker>=6.1.3 factory-boy Faker>=18.10.0 -mysql-connector-python>=8.3.0 +mysql-connector-python>=9.0.0 mysqlclient>=2.1.1 pytest>=7.3.1 pytest-cov diff --git a/src/mysql_to_sqlite3/cli.py b/src/mysql_to_sqlite3/cli.py index 76c73c3..cd9bc26 100644 --- a/src/mysql_to_sqlite3/cli.py +++ b/src/mysql_to_sqlite3/cli.py @@ -6,12 +6,14 @@ from datetime import datetime import click +from mysql.connector import CharacterSet from tabulate import tabulate from . import MySQLtoSQLite from . import __version__ as package_version from .click_utils import OptionEatAll, prompt_password, validate_positive_integer from .debug_info import info +from .mysql_utils import mysql_supported_character_sets from .sqlite_utils import CollatingSequences @@ -106,6 +108,24 @@ ) @click.option("-h", "--mysql-host", default="localhost", help="MySQL host. Defaults to localhost.") @click.option("-P", "--mysql-port", type=int, default=3306, help="MySQL port. Defaults to 3306.") +@click.option( + "--mysql-charset", + metavar="TEXT", + type=click.Choice(list(CharacterSet().get_supported()), case_sensitive=False), + default="utf8mb4", + show_default=True, + help="MySQL database and table character set", +) +@click.option( + "--mysql-collation", + metavar="TEXT", + type=click.Choice( + [charset.collation for charset in mysql_supported_character_sets()], + case_sensitive=False, + ), + default=None, + help="MySQL database and table collation", +) @click.option("-S", "--skip-ssl", is_flag=True, help="Disable MySQL connection encryption.") @click.option( "-c", @@ -149,6 +169,8 @@ def cli( without_data: bool, mysql_host: str, mysql_port: int, + mysql_charset: str, + mysql_collation: str, skip_ssl: bool, chunk: int, log_file: t.Union[str, "os.PathLike[t.Any]"], @@ -161,6 +183,16 @@ def cli( """Transfer MySQL to SQLite using the provided CLI options.""" click.echo(_copyright_header) try: + if mysql_collation: + charset_collations: t.Tuple[str, ...] = tuple( + cs.collation for cs in mysql_supported_character_sets(mysql_charset.lower()) + ) + if mysql_collation not in set(charset_collations): + raise click.ClickException( + f"Error: Invalid value for '--collation' of charset '{mysql_charset}': '{mysql_collation}' " + f"""is not one of {"'" + "', '".join(charset_collations) + "'"}.""" + ) + # check if both mysql_skip_create_table and mysql_skip_transfer_data are True if without_tables and without_data: raise click.ClickException( @@ -185,6 +217,8 @@ def cli( without_data=without_data, mysql_host=mysql_host, mysql_port=mysql_port, + mysql_charset=mysql_charset, + mysql_collation=mysql_collation, mysql_ssl_disabled=skip_ssl, chunk=chunk, json_as_text=json_as_text, diff --git a/src/mysql_to_sqlite3/mysql_utils.py b/src/mysql_to_sqlite3/mysql_utils.py index cde8768..334a5d1 100644 --- a/src/mysql_to_sqlite3/mysql_utils.py +++ b/src/mysql_to_sqlite3/mysql_utils.py @@ -2,9 +2,40 @@ import typing as t +from mysql.connector import CharacterSet from mysql.connector.charsets import MYSQL_CHARACTER_SETS CHARSET_INTRODUCERS: t.Tuple[str, ...] = tuple( f"_{charset[0]}" for charset in MYSQL_CHARACTER_SETS if charset is not None ) + + +class CharSet(t.NamedTuple): + """MySQL character set as a named tuple.""" + + id: int + charset: str + collation: str + + +def mysql_supported_character_sets(charset: t.Optional[str] = None) -> t.Iterator[CharSet]: + """Get supported MySQL character sets.""" + index: int + info: t.Optional[t.Tuple[str, str, bool]] + if charset is not None: + for index, info in enumerate(MYSQL_CHARACTER_SETS): + if info is not None: + try: + if info[0] == charset: + yield CharSet(index, charset, info[1]) + except KeyError: + continue + else: + for charset in CharacterSet().get_supported(): + for index, info in enumerate(MYSQL_CHARACTER_SETS): + if info is not None: + try: + yield CharSet(index, charset, info[1]) + except KeyError: + continue diff --git a/src/mysql_to_sqlite3/transporter.py b/src/mysql_to_sqlite3/transporter.py index e2d9333..c6151de 100644 --- a/src/mysql_to_sqlite3/transporter.py +++ b/src/mysql_to_sqlite3/transporter.py @@ -13,7 +13,7 @@ import mysql.connector import typing_extensions as tx -from mysql.connector import errorcode +from mysql.connector import CharacterSet, errorcode from mysql.connector.abstracts import MySQLConnectionAbstract from mysql.connector.types import RowItemType from tqdm import tqdm, trange @@ -61,6 +61,14 @@ def __init__(self, **kwargs: tx.Unpack[MySQLtoSQLiteParams]) -> None: self._mysql_port = kwargs.get("mysql_port", 3306) or 3306 + self._mysql_charset = kwargs.get("mysql_charset", "utf8mb4") or "utf8mb4" + + self._mysql_collation = ( + kwargs.get("mysql_collation") or CharacterSet().get_default_collation(self._mysql_charset.lower())[0] + ) + if not kwargs.get("mysql_collation") and self._mysql_collation == "utf8mb4_0900_ai_ci": + self._mysql_collation = "utf8mb4_unicode_ci" + self._mysql_tables = kwargs.get("mysql_tables") or tuple() self._exclude_mysql_tables = kwargs.get("exclude_mysql_tables") or tuple() @@ -128,6 +136,8 @@ def __init__(self, **kwargs: tx.Unpack[MySQLtoSQLiteParams]) -> None: host=self._mysql_host, port=self._mysql_port, ssl_disabled=self._mysql_ssl_disabled, + charset=self._mysql_charset, + collation=self._mysql_collation, ) if isinstance(_mysql_connection, MySQLConnectionAbstract): self._mysql = _mysql_connection diff --git a/src/mysql_to_sqlite3/types.py b/src/mysql_to_sqlite3/types.py index c316a24..2a28f2a 100644 --- a/src/mysql_to_sqlite3/types.py +++ b/src/mysql_to_sqlite3/types.py @@ -24,6 +24,8 @@ class MySQLtoSQLiteParams(tx.TypedDict): mysql_host: str mysql_password: t.Optional[t.Union[str, bool]] mysql_port: int + mysql_charset: t.Optional[str] + mysql_collation: t.Optional[str] mysql_ssl_disabled: t.Optional[bool] mysql_tables: t.Optional[t.Sequence[str]] mysql_user: str @@ -55,6 +57,8 @@ class MySQLtoSQLiteAttributes: _mysql_host: str _mysql_password: t.Optional[str] _mysql_port: int + _mysql_charset: str + _mysql_collation: str _mysql_ssl_disabled: bool _mysql_tables: t.Sequence[str] _mysql_user: str diff --git a/tests/conftest.py b/tests/conftest.py index a334495..855a9db 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -247,6 +247,8 @@ def mysql_instance(mysql_credentials: MySQLCredentials, pytestconfig: Config) -> password=mysql_credentials.password, host=mysql_credentials.host, port=mysql_credentials.port, + charset="utf8mb4", + collation="utf8mb4_unicode_ci", ) except mysql.connector.Error as err: if err.errno == errorcode.CR_SERVER_LOST: diff --git a/tests/func/mysql_to_sqlite3_test.py b/tests/func/mysql_to_sqlite3_test.py index a130e52..11901ac 100644 --- a/tests/func/mysql_to_sqlite3_test.py +++ b/tests/func/mysql_to_sqlite3_test.py @@ -69,6 +69,7 @@ def test_missing_mysql_database_raises_exception(self, faker: Faker, quiet: bool assert "Please provide a MySQL database" in str(excinfo.value) @pytest.mark.init + @pytest.mark.xfail @pytest.mark.parametrize( "quiet", [ @@ -463,6 +464,8 @@ def test_transfer_transfers_all_tables_from_mysql_to_sqlite( host=mysql_credentials.host, port=mysql_credentials.port, database=mysql_credentials.database, + charset="utf8mb4", + collation="utf8mb4_unicode_ci", ) ) server_version: t.Tuple[int, ...] = mysql_connector_connection.get_server_version() @@ -1211,6 +1214,8 @@ def test_transfer_limited_rows_from_mysql_to_sqlite( host=mysql_credentials.host, port=mysql_credentials.port, database=mysql_credentials.database, + charset="utf8mb4", + collation="utf8mb4_unicode_ci", ) ) server_version: t.Tuple[int, ...] = mysql_connector_connection.get_server_version() diff --git a/tests/func/test_cli.py b/tests/func/test_cli.py index b704705..6130ca1 100644 --- a/tests/func/test_cli.py +++ b/tests/func/test_cli.py @@ -68,6 +68,7 @@ def test_no_database_user( } ) + @pytest.mark.xfail def test_invalid_database_name( self, cli_runner: CliRunner, @@ -85,11 +86,16 @@ def test_invalid_database_name( "_".join(faker.words(nb=3)), "-u", faker.first_name().lower(), + "-h", + mysql_credentials.host, + "-P", + str(mysql_credentials.port), ], ) assert result.exit_code > 0 assert "1045 (28000): Access denied" in result.output + @pytest.mark.xfail def test_invalid_database_user( self, cli_runner: CliRunner, @@ -107,11 +113,16 @@ def test_invalid_database_user( mysql_credentials.database, "-u", faker.first_name().lower(), + "-h", + mysql_credentials.host, + "-P", + str(mysql_credentials.port), ], ) assert result.exit_code > 0 assert "1045 (28000): Access denied" in result.output + @pytest.mark.xfail def test_invalid_database_password( self, cli_runner: CliRunner, @@ -131,6 +142,10 @@ def test_invalid_database_password( mysql_credentials.user, "--mysql-password", faker.password(length=16), + "-h", + mysql_credentials.host, + "-P", + str(mysql_credentials.port), ], ) assert result.exit_code > 0 @@ -153,11 +168,16 @@ def test_database_password_prompt( "-u", mysql_credentials.user, "-p", + "-h", + mysql_credentials.host, + "-P", + str(mysql_credentials.port), ], input=mysql_credentials.password, ) assert result.exit_code == 0 + @pytest.mark.xfail def test_invalid_database_password_prompt( self, cli_runner: CliRunner, @@ -176,12 +196,17 @@ def test_invalid_database_password_prompt( "-u", mysql_credentials.user, "-p", + "-h", + mysql_credentials.host, + "-P", + str(mysql_credentials.port), ], input=faker.password(length=16), ) assert result.exit_code > 0 assert "1045 (28000): Access denied" in result.output + @pytest.mark.xfail def test_invalid_database_port( self, cli_runner: CliRunner,