Skip to content

Commit

Permalink
Merge pull request #393 from roedoejet/dev.ej/revamp-schema-versioning
Browse files Browse the repository at this point in the history
Revamp schema versioning and dev version trigger
  • Loading branch information
joanise authored Sep 12, 2024
2 parents 54d4e18 + 208a8e0 commit 5631210
Show file tree
Hide file tree
Showing 9 changed files with 153 additions and 49 deletions.
1 change: 1 addition & 0 deletions .SETUPTOOLS_SCM_PRETEND_VERSION
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
2.1
3 changes: 2 additions & 1 deletion .github/workflows/matrix-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,9 @@ jobs:
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
shell: bash
run: |
python -m pip install --upgrade pip
pip install -e .[test]
SETUPTOOLS_SCM_PRETEND_VERSION=`cat .SETUPTOOLS_SCM_PRETEND_VERSION` pip install -e .[test]
- name: Run tests
run: python run_tests.py dev
3 changes: 2 additions & 1 deletion .github/workflows/pythonpublish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ name: Publish g2p to PyPI and create a GitHub release
on:
push:
tags:
- v[0-9]+.**
# We publish only actual releases, not dev, alpha or beta versions
- v[0-9]+.[0-9]+.[0-9]+

jobs:
tests:
Expand Down
16 changes: 8 additions & 8 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,11 @@ on:
- push
- workflow_call

# Since we don't checkout the full history, set a default version so
# certain tests (pep440, update_schema) will still function. NOTE:
# This **must** match the Major.Minor version of the JSON schema file
# in g2p/mappings/.schema!!!
# Since we don't checkout the full history, we use a default version so certain tests
# (pep440, update_schema) will still function. The fake version is set in file
# .SETUPTOOLS_SCM_PRETEND_VERSION

env:
SETUPTOOLS_SCM_PRETEND_VERSION: "2.1"
G2P_STUDIO_DEBUG: 1

jobs:
Expand All @@ -25,7 +24,7 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -e .[test]
SETUPTOOLS_SCM_PRETEND_VERSION=`cat .SETUPTOOLS_SCM_PRETEND_VERSION` pip install -e .[test]
pip install pip-licenses
if pip-licenses | grep -v 'Artistic License' | grep -v LGPL | grep GNU; then echo 'Please avoid introducing *GPL dependencies'; false; fi
pip install coverage
Expand Down Expand Up @@ -92,9 +91,10 @@ jobs:
# Note: this is where we also test that the g2p library still works on 3.7
python-version: "3.7"
- name: Install dependencies
shell: bash
run: |
python -m pip install --upgrade pip
pip install -e .[test]
SETUPTOOLS_SCM_PRETEND_VERSION=`cat .SETUPTOOLS_SCM_PRETEND_VERSION` pip install -e .[test]
- name: Run tests on Windows
run: python run_tests.py dev
- name: Make sure the CLI outputs utf8 on Windows
Expand All @@ -121,7 +121,7 @@ jobs:
run: pip install -r requirements.txt
- name: Install all test dependencies
run: |
pip install -e .[test]
SETUPTOOLS_SCM_PRETEND_VERSION=`cat .SETUPTOOLS_SCM_PRETEND_VERSION` pip install -e .[test]
python -m playwright install --with-deps chromium
- name: Overwrite dependencies and g2p, the Heroku way, to replicate the production env
run: |
Expand Down
2 changes: 1 addition & 1 deletion bin/post_compile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/bin/sh

# Heroku has no understanding of dynamic versioning so we have to hack this to make things work
export SETUPTOOLS_SCM_PRETEND_VERSION=2.0
export SETUPTOOLS_SCM_PRETEND_VERSION=`cat .SETUPTOOLS_SCM_PRETEND_VERSION`
# Also it expects to run your app in place, but doesn't actually do this for you because it's old
pip install -e .
69 changes: 51 additions & 18 deletions g2p/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,13 @@
import re
import sys
from pathlib import Path
from textwrap import dedent
from typing import List, Tuple

import click

import g2p._version
from g2p import make_g2p, make_tokenizer
from g2p._version import VERSION
from g2p.constants import (
DISTANCE_METRICS,
LANGS_DIR,
Expand Down Expand Up @@ -104,7 +105,7 @@ def parse_from_or_to_lang_spec(lang_spec):
return mappings


@click.version_option(version=VERSION, prog_name="g2p")
@click.version_option(version=g2p._version.VERSION, prog_name="g2p")
@click.group(context_settings=CONTEXT_SETTINGS)
def cli():
"""Management script for G2P"""
Expand Down Expand Up @@ -700,34 +701,66 @@ def update(in_dir, out_dir):
def update_schema(out_dir):
"""Generate a schema for the model configuration.
This should only be done once for each Minor version.
This should be done every time the model changes in a way that affects the schema.
Changes to the schema should result in a minor version bump.
But not every minor or major version bump requires a schema update.
"""
# Defer expensive imports
from g2p.mappings import MappingConfig

# We should not be changing the schema for patches, so only include major/minor version
MAJOR_MINOR_VERSION = ".".join(VERSION.split(".")[:2])
# We shall not change the schema for patches, so only include major/minor version
(major, minor, *_rest) = g2p._version.version_tuple
major_minor = f"{major}.{minor}"

# Determine path
if out_dir is None:
schema_path = (
Path(LANGS_DIR) / f"../.schema/g2p-config-schema-{MAJOR_MINOR_VERSION}.json"
)
out_dir = Path(LANGS_DIR).parent / ".schema"
else:
schema_path = Path(out_dir) / f"g2p-config-schema-{MAJOR_MINOR_VERSION}.json"
out_dir = Path(out_dir)
schema_path = out_dir / f"g2p-config-schema-{major_minor}.json"

# Generate schema
if schema_path.exists():
raise FileExistsError(
f"Sorry a schema already exists for version {MAJOR_MINOR_VERSION}. "
"Please bump the minor version number and generate the schema again."
)
json_schema = MappingConfig.model_json_schema()
# Add explicit schema dialect for SchemaStore
# Note that pydantic actually targets
# Add explicit schema dialect for SchemaStore that pydantic actually targets
json_schema["$schema"] = "http://json-schema.org/draft-07/schema#"
with open(schema_path, "w") as f:
json.dump(json_schema, f, indent=2)

if schema_path.exists():
with open(schema_path, encoding="utf8") as f:
old_schema = json.load(f)
if old_schema == json_schema:
print(f"Schema {schema_path} is already up to date.")
else:
print(
dedent(
f"""
Schema {schema_path}
exists for version {major_minor} but is not up to date. If it was already published to
the SchemaStore (see
https://github.com/SchemaStore/schemastore/blob/master/src/api/json/catalog.json)
then you must bump the minor or major version number of g2p and generate the
schema again. If not and you want to overwrite it, please delete the existing
schema and try again.
"""
),
file=sys.stderr,
)
raise click.UsageError("Schema already exists but is not up to date.")
else:
prev_schema_files = sorted(out_dir.glob("g2p-config-schema-*.json"))
if prev_schema_files:
with open(prev_schema_files[-1], encoding="utf8") as f:
old_schema = json.load(f)
else:
old_schema = None
if old_schema == json_schema:
print(f"Schema {prev_schema_files[-1]}")
print(
f"is still up to date. No need to generate a new schema for version {major_minor}."
)
else:
with open(schema_path, "w", encoding="ascii") as f:
json.dump(json_schema, f, indent=2)
print(f"Wrote {schema_path}.")


@click.argument("path", type=click.Path(exists=True, file_okay=True, dir_okay=False))
Expand Down
83 changes: 65 additions & 18 deletions g2p/tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,15 @@
import re
import shutil
import tempfile
from contextlib import contextmanager
from pathlib import Path
from unittest import TestCase, main

import jsonschema
import yaml
from click.testing import CliRunner

from g2p._version import VERSION
import g2p._version
from g2p.cli import (
convert,
doctor,
Expand All @@ -34,6 +35,29 @@
from g2p.tests.public.data import DATA_DIR, load_public_test_data


def set_g2p_version(version_tuple, version_string=None):
if version_string is None:
version_string = ".".join(str(part) for part in version_tuple)
g2p._version.VERSION = g2p._version.__version__ = g2p._version.version = (
version_string
)
g2p._version.__version_tuple__ = g2p._version.version_tuple = tuple(version_tuple)


@contextmanager
def monkey_patch_g2p_version(increment_tuple):
saved_version = g2p._version.VERSION
saved_version_tuple = g2p._version.version_tuple
incremented_version = list(g2p._version.version_tuple)
while len(incremented_version) < len(increment_tuple):
incremented_version.append(0)
for part, increment in enumerate(increment_tuple):
incremented_version[part] += increment
set_g2p_version(incremented_version)
yield
set_g2p_version(saved_version_tuple, saved_version)


class CliTest(TestCase):
"""Test suite for the g2p Command Line Interface"""

Expand Down Expand Up @@ -96,31 +120,54 @@ def test_update(self):
result = self.runner.invoke(update, ["-i", bad_langs_dir, "-o", tmpdir])
self.assertEqual(result.exit_code, 0)

def test_schema_ci_version(self):
"""Make sure that the version (possibly a fake version - see
.github/workflows/tests.yml) matches the one in the schema."""
MAJOR_MINOR_VERSION = ".".join(VERSION.split(".")[:2])
self.assertTrue(
(
Path(__file__).parent.parent
/ "mappings"
/ ".schema"
/ f"g2p-config-schema-{MAJOR_MINOR_VERSION}.json"
).exists()
)

def test_update_schema(self):
# It's an error for the currently saved schema to be out of date
result = self.runner.invoke(update_schema)
self.assertNotEqual(result.exit_code, 0)
self.assertIn("FileExistsError", str(result))
self.assertEqual(result.exit_code, 0)
self.assertIn("up to date", result.output)

with tempfile.TemporaryDirectory() as tmpdir:
# Exercise writing a new schema to disk even if up to date
result = self.runner.invoke(update_schema, ["-o", tmpdir])
MAJOR_MINOR_VERSION = ".".join(VERSION.split(".")[:2])
self.assertEqual(result.exit_code, 0)
self.assertIn("Wrote", result.output)

# Reload the written schema for further unit tests
(major, minor, *_rest) = g2p._version.version_tuple
major_minor = f"{major}.{minor}"
with open(
Path(tmpdir) / f"g2p-config-schema-{MAJOR_MINOR_VERSION}.json",
Path(tmpdir) / f"g2p-config-schema-{major_minor}.json",
encoding="utf8",
) as f:
schema = json.load(f)

# A second run will necessarily already be up to date even if the patch is bumped
with monkey_patch_g2p_version((0, 0, +1)):
result_rerun = self.runner.invoke(update_schema, ["-o", tmpdir])
self.assertEqual(result_rerun.exit_code, 0)
self.assertIn("already up to date", result_rerun.output)

# Monkey patch the version to test a previous version still being up to date
with monkey_patch_g2p_version((+0, +1)):
result_new = self.runner.invoke(update_schema, ["-o", tmpdir])
self.assertEqual(result_new.exit_code, 0)
self.assertIn("still up to date", result_new.output)

# Monkey patch the version and the model to require a schema update
with monkey_patch_g2p_version((+1, +0)):
saved_doc = MappingConfig.__doc__
MappingConfig.__doc__ = "Changed docstring"
result_update = self.runner.invoke(update_schema, ["-o", tmpdir])
MappingConfig.__doc__ = saved_doc
self.assertEqual(result_update.exit_code, 0)
self.assertIn("Wrote", result_update.output)

# Require a schema update when it's already written: that's an error
with monkey_patch_g2p_version((+1, +0)):
result_bad_update = self.runner.invoke(update_schema, ["-o", tmpdir])
self.assertNotEqual(result_bad_update.exit_code, 0)
self.assertIn("but is not up to date", result_bad_update.output)

# Validate all configurations against the current schema, quietly unless there's an error:
for config in Path(LANGS_DIR).glob("**/config-g2p.yaml"):
with open(config, encoding="utf8") as f:
Expand Down
21 changes: 20 additions & 1 deletion g2p/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,15 @@
import os
import re
from collections import defaultdict
from pathlib import Path
from unittest import TestCase, main

import yaml
from pep440 import is_canonical

import g2p
from g2p import get_arpabet_langs
from g2p._version import VERSION
from g2p._version import VERSION, version_tuple
from g2p.exceptions import IncorrectFileType, RecursionError
from g2p.log import LOGGER
from g2p.mappings import Mapping, utils
Expand Down Expand Up @@ -304,6 +306,23 @@ def test_version_is_pep440_compliant(self):
main_version, _, _ = VERSION.partition("+")
self.assertTrue(is_canonical(main_version))

def test_scm_pretend_version_is_up_to_date(self):
""".SETUPTOOLS_SCM_PRETEND_VERSION is set to the version in pyproject.toml"""
filename = Path(g2p.__file__).parent.parent / ".SETUPTOOLS_SCM_PRETEND_VERSION"
try:
with open(filename) as f:
pretend_version = f.read().strip()
(major, minor, *_rest) = version_tuple
major_minor = f"{major}.{minor}"
self.assertEqual(
major_minor,
pretend_version,
"Mismatch between .SETUPTOOLS_SCM_PRETEND_VERSION and the version setuptools_scm determined dynamically. Try: 1) fetch recent tags from GitHub, 2) rerun \"pip install -e .\", 3) if you're working on the next major or minor release, update .SETUPTOOLS_SCM_PRETEND_VERSION to match the dynamic version's major.minor.",
)
except FileNotFoundError:
# This is fine, it's only used in development
pass


if __name__ == "__main__":
main()
4 changes: 3 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ dependencies = [
"panphon>=0.19",
"panphon<0.21; python_version<'3.9'",
"panphon<0.21; platform_system=='Windows'",
"pydantic>=2.4",
"pydantic>=2.4, <2.9", # pydantic 2.9.0 changes our schema
"pyyaml>=5.2",
"regex",
"text_unidecode",
Expand Down Expand Up @@ -89,6 +89,8 @@ Homepage = "https://github.com/roedoejet/g2p"
packages = ["g2p"]
include-package-data = true

[tool.setuptools_scm]

[tool.hatch.version]
source = "vcs"

Expand Down

0 comments on commit 5631210

Please sign in to comment.