Skip to content

Commit

Permalink
refactor: switch to pydantic 2
Browse files Browse the repository at this point in the history
BREAKING CHANGE: requires python 3.7
  • Loading branch information
roedoejet committed Sep 1, 2023
1 parent eec8e82 commit 1d8e4fb
Show file tree
Hide file tree
Showing 87 changed files with 698 additions and 741 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/matrix-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ jobs:
strategy:
matrix:
os: [ubuntu-20.04, windows-latest, macos-latest]
python-version: ["3.6", "3.7", "3.8", "3.9", "3.10"]
python-version: ["3.7", "3.8", "3.9", "3.10"]
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v3
Expand Down
19 changes: 0 additions & 19 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,25 +45,6 @@ jobs:
token: ${{ secrets.CODECOV_TOKEN }}
fail_ci_if_error: false # too many upload errors to keep "true"

test-36:
# This test job exercises only the g2p library code with Python 3.6, which
# is what we still keep compatible with 3.6
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: "3.6"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
pip install -e .
- name: Run tests
run: |
./run_tests.py dev
test-on-windows:
# Make sure stuff stays compatible with Windows by testing there too.
runs-on: windows-latest
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ mappings:
type: mapping
authors:
- <YourNameHere>
mapping: <FilenameOfMapping>
rules: <FilenameOfMapping>
<<: *shared
```
Expand All @@ -241,7 +241,7 @@ mappings:
type: mapping
authors:
- Aidan Pine
mapping: dan_to_ipa.csv
rules: dan_to_ipa.csv
abbreviations: dan_abbs.csv
rule_ordering: as-written
case_sensitive: false
Expand Down
10 changes: 4 additions & 6 deletions g2p/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@
from g2p.mappings import Mapping
from g2p.mappings.langs import LANGS, LANGS_NETWORK
from g2p.mappings.tokenizer import Tokenizer, make_tokenizer
from g2p.mappings.utils import _MappingModelDefinition
from g2p.transducer import CompositeTransducer, TokenizingTransducer, Transducer

_g2p_cache: Dict[
Expand Down Expand Up @@ -106,7 +105,7 @@ def make_g2p( # noqa: C901
# Find all mappings needed
mappings_needed = []
for lang1, lang2 in zip(path[:-1], path[1:]):
mapping = Mapping(in_lang=lang1, out_lang=lang2)
mapping = Mapping.find_mapping(in_lang=lang1, out_lang=lang2)
LOGGER.debug(
f"Adding mapping between {lang1} and {lang2} to composite transducer."
)
Expand Down Expand Up @@ -181,11 +180,10 @@ def get_arpabet_langs():
for _, v in LANGS.items():
for mapping in v["mappings"]:
# add mapping to names hash table
config: _MappingModelDefinition = mapping
full_lang_names[config.in_lang] = config.language_name
full_lang_names[mapping["in_lang"]] = mapping["language_name"]
# add input id to all available langs list
if config.in_lang not in langs_available:
langs_available.append(config.in_lang)
if mapping["in_lang"] not in langs_available:
langs_available.append(mapping["in_lang"])

# get the key from all networks in g2p module that have a path to 'eng-arpabet',
# which is needed for the readalongs
Expand Down
5 changes: 3 additions & 2 deletions g2p/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@
from g2p import make_g2p
from g2p.exceptions import InvalidLanguageCode, NoPath
from g2p.log import LOGGER
from g2p.mappings.langs import LANGS_NETWORK, MAPPINGS_AVAILABLE
from g2p.mappings import MAPPINGS_AVAILABLE
from g2p.mappings.langs import LANGS_NETWORK
from g2p.static import __file__ as static_file


Expand All @@ -36,7 +37,7 @@ class Langs(Resource):
def __init__(self):
# TODO: exclude parent dir and maybe null values too
self.AVAILABLE_MAPPINGS = [
json.loads(mapping.json())
json.loads(mapping.model_dump_json())
for mapping in sorted(MAPPINGS_AVAILABLE, key=lambda x: x.in_lang)
]
self.parser = reqparse.RequestParser()
Expand Down
6 changes: 3 additions & 3 deletions g2p/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ def convert(message):
mapping_args["abbreviations"] = flatten_abbreviations_format(
mapping["abbreviations"]
)
mapping_args["mapping"] = mapping["mapping"]
mapping_args["rules"] = mapping["rules"]
mappings_obj = Mapping(**mapping_args)
transducer = Transducer(mappings_obj)
transducers.append(transducer)
Expand Down Expand Up @@ -187,7 +187,7 @@ def change_table(message):
out_lang="custom",
type="mapping",
norm_form="NFC",
).dict()
).model_dump()
kwargs["include"] = False
emit(
"table response",
Expand All @@ -211,7 +211,7 @@ def change_table(message):
{
"mappings": x.plain_mapping(),
"abbs": expand_abbreviations_format(x.abbreviations),
"kwargs": json.loads(x.mapping_config.json()),
"kwargs": json.loads(x.model_dump_json()),
}
for x in mappings
],
Expand Down
41 changes: 23 additions & 18 deletions g2p/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from g2p.app import APP
from g2p.exceptions import InvalidLanguageCode, MappingMissing, NoPath
from g2p.log import LOGGER
from g2p.mappings import Mapping
from g2p.mappings import MAPPINGS_AVAILABLE, Mapping, Rule
from g2p.mappings.create_fallback_mapping import (
DUMMY_INVENTORY,
align_to_dummy_fallback,
Expand All @@ -34,7 +34,6 @@
LANGS_DIR,
LANGS_NETWORK,
LANGS_PKL_NAME,
MAPPINGS_AVAILABLE,
NETWORK_PKL_NAME,
reload_db,
)
Expand All @@ -43,7 +42,7 @@
check_ipa_known_segs,
network_to_echart,
)
from g2p.mappings.utils import is_ipa, is_xsampa, load_mapping_from_path, normalize
from g2p.mappings.utils import is_ipa, is_xsampa, normalize
from g2p.static import __file__ as static_file
from g2p.transducer import Transducer

Expand Down Expand Up @@ -82,7 +81,7 @@ def parse_from_or_to_lang_spec(lang_spec):

if out_lang:
try:
mapping = Mapping(in_lang=in_lang, out_lang=out_lang)
mapping = Mapping.find_mapping(in_lang=in_lang, out_lang=out_lang)
except MappingMissing as e:
raise click.BadParameter(
f'Cannot find mapping {in_lang}->{out_lang} for --from or --to spec "{lang_spec}": {e}'
Expand Down Expand Up @@ -112,14 +111,14 @@ def parse_from_or_to_lang_spec(lang_spec):
"supported with the full in-lang_to_out-lang[[in]|[out]] syntax."
)
if in_lang == "eng":
mapping = Mapping(in_lang="eng-ipa", out_lang="eng-arpabet")
mapping = Mapping.find_mapping(in_lang="eng-ipa", out_lang="eng-arpabet")
in_or_out = "in"
return [(mapping, in_or_out)]
else:
out_lang = in_lang + "-ipa"
# check_ipa_known_segs([out_lang]) # this outputs a lot of spurious noise...
mappings = [
(Mapping(in_lang=m.in_lang, out_lang=m.out_lang), "out")
(Mapping.find_mapping(in_lang=m.in_lang, out_lang=m.out_lang), "out")
for m in MAPPINGS_AVAILABLE
if m.out_lang == out_lang and not is_ipa(m.in_lang)
]
Expand Down Expand Up @@ -366,7 +365,9 @@ def generate_mapping( # noqa: C901
source_mappings = []
for in_lang in in_langs:
try:
source_mapping = Mapping(in_lang=in_lang, out_lang=out_lang)
source_mapping = Mapping.find_mapping(
in_lang=in_lang, out_lang=out_lang
)
except MappingMissing as e:
raise click.BadParameter(
f'Cannot find IPA mapping from "{in_lang}" to "{out_lang}": {e}',
Expand All @@ -376,7 +377,7 @@ def generate_mapping( # noqa: C901

if ipa:
check_ipa_known_segs([f"{in_lang}-ipa"])
eng_ipa = Mapping(in_lang="eng-ipa", out_lang="eng-arpabet")
eng_ipa = Mapping.find_mapping(in_lang="eng-ipa", out_lang="eng-arpabet")
click.echo(f"Writing English IPA mapping for {out_lang} to file")
new_mapping = create_mapping(source_mappings[0], eng_ipa, distance=distance)
for m in source_mappings[1:]:
Expand Down Expand Up @@ -418,11 +419,11 @@ def generate_mapping( # noqa: C901

for from_mapping, in_or_out in from_mappings:
LOGGER.info(
f"From mapping: {from_mapping.mapping_config.in_lang}_to_{from_mapping.mapping_config.out_lang}[{in_or_out}]"
f"From mapping: {from_mapping.in_lang}_to_{from_mapping.out_lang}[{in_or_out}]"
)
for to_mapping, in_or_out in to_mappings:
LOGGER.info(
f"To mapping: {to_mapping.mapping_config.in_lang}_to_{to_mapping.mapping_config.out_lang}[{in_or_out}]"
f"To mapping: {to_mapping.in_lang}_to_{to_mapping.out_lang}[{in_or_out}]"
)

new_mapping = create_multi_mapping(
Expand Down Expand Up @@ -526,9 +527,9 @@ def convert( # noqa: C901
data["mappings"][index]["out_lang"],
)
)
data["mappings"][index] = load_mapping_from_path(config, index)
data["mappings"][index] = Mapping.load_mapping_from_path(config, index)
else:
mapping = load_mapping_from_path(config)
mapping = Mapping.load_mapping_from_path(config)
data["mappings"] = [mapping]
mappings_legal_pairs.append((mapping.in_lang, mapping.out_lang))
for pair in mappings_legal_pairs:
Expand Down Expand Up @@ -686,6 +687,7 @@ def scan(lang, path):
Displays the set of un-mapped characters in a document.
Accounts for case sensitivity in the configuration.
"""
# breakpoint()
# Check input lang exists
if lang not in LANGS_NETWORK.nodes:
raise click.UsageError(f"'{lang}' is not a valid value for 'LANG'")
Expand All @@ -703,8 +705,10 @@ def scan(lang, path):
# Get input chars in mapping
mapped_chars = set()
for lang_mapping in mappings:
for x in lang_mapping.mapping:
mapped_chars.add(normalize(x["in"], "NFD"))
assert isinstance(lang_mapping, Mapping)
for x in lang_mapping.rules:
assert isinstance(x, Rule)
mapped_chars.add(normalize(x.in_char, "NFD"))
# Find unmapped chars
filter_chars = " \n"
mapped_string = "".join(mapped_chars)
Expand Down Expand Up @@ -754,7 +758,7 @@ def show_mappings(lang1, lang2, verbose, csv):

elif lang1 is not None:
mappings = [
Mapping(in_lang=m.in_lang, out_lang=m.out_lang)
Mapping.find_mapping(in_lang=m.in_lang, out_lang=m.out_lang)
for m in MAPPINGS_AVAILABLE
if m.in_lang == lang1 or m.out_lang == lang1
]
Expand All @@ -765,14 +769,15 @@ def show_mappings(lang1, lang2, verbose, csv):

else:
mappings = (
Mapping(in_lang=m.in_lang, out_lang=m.out_lang) for m in MAPPINGS_AVAILABLE
Mapping.find_mapping(in_lang=m.in_lang, out_lang=m.out_lang)
for m in MAPPINGS_AVAILABLE
)

file_type = "csv" if csv else "json"
if verbose:
for m in mappings:
json.dump(
json.loads(m.mapping_config.json()),
json.loads(m.model_dump_json()),
sys.stdout,
indent=4,
ensure_ascii=False,
Expand All @@ -783,4 +788,4 @@ def show_mappings(lang1, lang2, verbose, csv):
print()
else:
for i, m in enumerate(mappings):
print(f"{i+1}: {m.mapping_config.in_lang}{m.mapping_config.out_lang}")
print(f"{i+1}: {m.in_lang}{m.out_lang}")
9 changes: 9 additions & 0 deletions g2p/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,15 @@ def __str__(self):
)


class MappingNotInitializedProperlyError(CommandLineError):
def __init__(self, msg="Your Mapping object was not properly initialized"):
super().__init__(self)
self.msg = msg

def __str__(self):
return self.render(self.msg)


class IncorrectFileType(CommandLineError):
def __init__(self, msg):
super().__init__(self)
Expand Down
Loading

0 comments on commit 1d8e4fb

Please sign in to comment.