Skip to content

Commit

Permalink
Merge pull request #356 from roedoejet/dev.fix-354
Browse files Browse the repository at this point in the history
Correct some issues with `g2p generate-mapping`
  • Loading branch information
dhdaines authored Apr 3, 2024
2 parents 43d3d37 + ea04262 commit 232ec0f
Show file tree
Hide file tree
Showing 4 changed files with 76 additions and 17 deletions.
24 changes: 16 additions & 8 deletions g2p/mappings/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,9 @@ class Mapping(_MappingModelDefinition):
"""Class for lookup tables"""

def model_post_init(self, *_args, **_kwargs) -> None:
"""After the model is constructed, we process the model specs by applying all the configuration to the rules (ie prevent feeding, unicode normalization etc..)"""
"""After the model is constructed, we process the model specs by
applying all the configuration to the rules (ie prevent feeding,
unicode normalization etc..)"""
if self.type == MAPPING_TYPE.mapping or self.type is None:
# load abbreviations from path
if self.abbreviations_path is not None and not self.abbreviations:
Expand Down Expand Up @@ -81,17 +83,18 @@ def find_mapping(

@staticmethod
def find_mapping_by_id(map_id: str) -> "Mapping":
"""Find the mapping with a given ID, i.e., the "id" found in the mapping, like in the "panphon_preprocessor" mapping."""
"""Find the mapping with a given ID, i.e., the "id" found in the
mapping, like in the "panphon_preprocessor" mapping."""
for mapping in MAPPINGS_AVAILABLE:
if mapping.id == map_id:
return deepcopy(mapping)
raise exceptions.MappingMissing(map_id, None)

@staticmethod
def load_mapping_from_path(path_to_mapping_config: Union[str, Path], index=0):
"""Loads a mapping from a path, if there is more than one mapping, then it loads based on the int
provided to the 'index' argument. Default is 0.
"""
"""Loads a mapping from a path, if there is more than one mapping,
then it loads based on the int provided to the 'index'
argument. Default is 0."""
mapping_config = MappingConfig.load_mapping_config_from_path(
path_to_mapping_config
)
Expand Down Expand Up @@ -122,13 +125,17 @@ def index(self, item):
"""Find the location of an item in self"""
return self.rules.index(item)

def inventory(self, in_or_out: str = "in"):
def inventory(self, in_or_out: str = "in", non_empty: bool = False):
"""Return just inputs or outputs as inventory of mapping"""
if in_or_out == "in":
in_or_out = "rule_input"
if in_or_out == "out":
in_or_out = "rule_output"
return [getattr(x, in_or_out) for x in self.rules]
inv = [getattr(x, in_or_out) for x in self.rules]
if non_empty:
return [sym for sym in inv if sym != ""]
else:
return inv

def plain_mapping(self):
"""Return the plain mapping for displaying or saving to disk.
Expand Down Expand Up @@ -274,7 +281,8 @@ def rule_to_regex(self, rule: Union[Rule, dict]) -> Union[Pattern, None]:
)
raise exceptions.MalformedMapping(
f"Your regex in mapping between {in_lang} and {out_lang} is malformed. "
f"Do you have un-escaped regex characters in your input {inp}, contexts {rule.context_before}, {rule.context_after}?"
f"Do you have un-escaped regex characters in your input {inp}, "
f"contexts {rule.context_before}, {rule.context_after}?"
) from e
return rule_regex

Expand Down
8 changes: 4 additions & 4 deletions g2p/mappings/create_ipa_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,17 +190,17 @@ def create_mapping(
)
l1_is_xsampa, l2_is_xsampa = is_xsampa(map_1_name), is_xsampa(map_2_name)
rules = align_inventories(
mapping_1.inventory(mapping_1_io),
mapping_2.inventory(mapping_2_io),
mapping_1.inventory(mapping_1_io, non_empty=True),
mapping_2.inventory(mapping_2_io, non_empty=True),
l1_is_xsampa,
l2_is_xsampa,
distance=distance,
quiet=quiet,
)

# Initialize mapping with input language parameters (as_is,
# case_sensitive, prevent_feeding, etc)
config = mapping_1.model_copy().model_dump()
# case_sensitive, prevent_feeding, etc) but *not* the same filename!
config = mapping_1.model_copy().model_dump(exclude={"rules_path"})
config = {
**config,
"authors": [f"Generated {dt.datetime.now()}"],
Expand Down
41 changes: 36 additions & 5 deletions g2p/tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
update_schema,
)
from g2p.log import LOGGER
from g2p.mappings import MappingConfig
from g2p.mappings.langs import (
LANGS_DIR,
LANGS_FILE_NAME,
Expand Down Expand Up @@ -281,14 +282,44 @@ def test_convert_option_tl(self):
result = self.runner.invoke(convert, "--tok-lang fra e\\'i oji oji-ipa")
self.assertIn("eː'i", result.stdout)

def test_generate_mapping_config(self):
"""Ensure that generate-mapping creates valid configuration."""
# The underlying create_mapping() function is tested in
# test_create_mapping.py, and align_to_dummy_fallback() in
# test_fallback.py, with less expensive inputs than our real
# g2p mappings, and with predictable results. However, we do
# need to ensure that it creates/updates a correct
# configuration, so we test that here.
with tempfile.TemporaryDirectory() as tmpdir:
results = self.runner.invoke(
generate_mapping, ["--ipa", "atj", "--out-dir", tmpdir]
)
self.assertEqual(results.exit_code, 0)
rulespath = os.path.join(tmpdir, "atj-ipa_to_eng-ipa.json")
self.assertTrue(os.path.exists(rulespath))
confpath = os.path.join(tmpdir, "config-g2p.yaml")
config = MappingConfig.load_mapping_config_from_path(confpath)
self.assertEqual(len(config.mappings), 1)
self.assertEqual(config.mappings[0].rules_path, Path(rulespath))
# Run it again, should get the same result
results = self.runner.invoke(
generate_mapping, ["--ipa", "atj", "--out-dir", tmpdir]
)
self.assertEqual(results.exit_code, 0)
config = MappingConfig.load_mapping_config_from_path(confpath)
self.assertEqual(len(config.mappings), 1)
self.assertEqual(config.mappings[0].rules_path, Path(rulespath))
# Run it with a different language, should get more config
results = self.runner.invoke(
generate_mapping, ["--ipa", "alq", "--out-dir", tmpdir]
)
self.assertEqual(results.exit_code, 0)
config = MappingConfig.load_mapping_config_from_path(confpath)
self.assertEqual(len(config.mappings), 2)

def test_generate_mapping_errors(self):
"""Exercise various error situations with the g2p generate-mapping CLI command"""

# We don't exercise valid calls to generate_mapping here. The underlying
# create_mapping() function is tested in test_create_mapping.py, and
# align_to_dummy_fallback() in test_fallback.py, with less expensive
# inputs than our real g2p mappings, and with predictable results.

results = self.runner.invoke(generate_mapping)
self.assertNotEqual(results.exit_code, 0)
self.assertIn("Nothing to do", results.output)
Expand Down
20 changes: 20 additions & 0 deletions g2p/tests/test_create_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
Test all Mappings
"""

import io
from contextlib import redirect_stderr
from unittest import TestCase, main

from g2p.log import LOGGER
Expand Down Expand Up @@ -155,6 +157,24 @@ def test_distances(self):
set_of_mappings.add(tuple(rule.rule_output for rule in mapping.rules))
self.assertGreater(len(set_of_mappings), 3)

def test_deletion_mapping(self):
"""Ensure that deletion rules do not lead to spurious warnings."""
src_mappings = [
{"in": "foo", "out": ""},
{"in": "ᐃ", "out": "i"},
{"in": "ᐅ", "out": "u"},
{"in": "ᐊ", "out": "a"},
]
src_mapping = Mapping(rules=src_mappings, in_lang="crj", out_lang="crj-ipa")
log_output = io.StringIO()
with redirect_stderr(log_output):
mapping = create_mapping(src_mapping, self.target_mapping)
self.assertFalse("WARNING" in log_output.getvalue())
transducer = Transducer(mapping)
self.assertEqual(transducer("a").output_string, "ɑ")
self.assertEqual(transducer("i").output_string, "i")
self.assertEqual(transducer("u").output_string, "u")


if __name__ == "__main__":
main()

0 comments on commit 232ec0f

Please sign in to comment.