Skip to content

Commit

Permalink
fix: do not try to generate mappings for empty outputs
Browse files Browse the repository at this point in the history
  • Loading branch information
dhdaines committed Apr 3, 2024
1 parent d9a07e5 commit ea04262
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 10 deletions.
24 changes: 16 additions & 8 deletions g2p/mappings/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,9 @@ class Mapping(_MappingModelDefinition):
"""Class for lookup tables"""

def model_post_init(self, *_args, **_kwargs) -> None:
"""After the model is constructed, we process the model specs by applying all the configuration to the rules (ie prevent feeding, unicode normalization etc..)"""
"""After the model is constructed, we process the model specs by
applying all the configuration to the rules (ie prevent feeding,
unicode normalization etc..)"""
if self.type == MAPPING_TYPE.mapping or self.type is None:
# load abbreviations from path
if self.abbreviations_path is not None and not self.abbreviations:
Expand Down Expand Up @@ -81,17 +83,18 @@ def find_mapping(

@staticmethod
def find_mapping_by_id(map_id: str) -> "Mapping":
"""Find the mapping with a given ID, i.e., the "id" found in the mapping, like in the "panphon_preprocessor" mapping."""
"""Find the mapping with a given ID, i.e., the "id" found in the
mapping, like in the "panphon_preprocessor" mapping."""
for mapping in MAPPINGS_AVAILABLE:
if mapping.id == map_id:
return deepcopy(mapping)
raise exceptions.MappingMissing(map_id, None)

@staticmethod
def load_mapping_from_path(path_to_mapping_config: Union[str, Path], index=0):
"""Loads a mapping from a path, if there is more than one mapping, then it loads based on the int
provided to the 'index' argument. Default is 0.
"""
"""Loads a mapping from a path, if there is more than one mapping,
then it loads based on the int provided to the 'index'
argument. Default is 0."""
mapping_config = MappingConfig.load_mapping_config_from_path(
path_to_mapping_config
)
Expand Down Expand Up @@ -122,13 +125,17 @@ def index(self, item):
"""Find the location of an item in self"""
return self.rules.index(item)

def inventory(self, in_or_out: str = "in"):
def inventory(self, in_or_out: str = "in", non_empty: bool = False):
"""Return just inputs or outputs as inventory of mapping"""
if in_or_out == "in":
in_or_out = "rule_input"
if in_or_out == "out":
in_or_out = "rule_output"
return [getattr(x, in_or_out) for x in self.rules]
inv = [getattr(x, in_or_out) for x in self.rules]
if non_empty:
return [sym for sym in inv if sym != ""]
else:
return inv

def plain_mapping(self):
"""Return the plain mapping for displaying or saving to disk.
Expand Down Expand Up @@ -274,7 +281,8 @@ def rule_to_regex(self, rule: Union[Rule, dict]) -> Union[Pattern, None]:
)
raise exceptions.MalformedMapping(
f"Your regex in mapping between {in_lang} and {out_lang} is malformed. "
f"Do you have un-escaped regex characters in your input {inp}, contexts {rule.context_before}, {rule.context_after}?"
f"Do you have un-escaped regex characters in your input {inp}, "
f"contexts {rule.context_before}, {rule.context_after}?"
) from e
return rule_regex

Expand Down
4 changes: 2 additions & 2 deletions g2p/mappings/create_ipa_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,8 +190,8 @@ def create_mapping(
)
l1_is_xsampa, l2_is_xsampa = is_xsampa(map_1_name), is_xsampa(map_2_name)
rules = align_inventories(
mapping_1.inventory(mapping_1_io),
mapping_2.inventory(mapping_2_io),
mapping_1.inventory(mapping_1_io, non_empty=True),
mapping_2.inventory(mapping_2_io, non_empty=True),
l1_is_xsampa,
l2_is_xsampa,
distance=distance,
Expand Down
20 changes: 20 additions & 0 deletions g2p/tests/test_create_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
Test all Mappings
"""

import io
from contextlib import redirect_stderr
from unittest import TestCase, main

from g2p.log import LOGGER
Expand Down Expand Up @@ -155,6 +157,24 @@ def test_distances(self):
set_of_mappings.add(tuple(rule.rule_output for rule in mapping.rules))
self.assertGreater(len(set_of_mappings), 3)

def test_deletion_mapping(self):
"""Ensure that deletion rules do not lead to spurious warnings."""
src_mappings = [
{"in": "foo", "out": ""},
{"in": "ᐃ", "out": "i"},
{"in": "ᐅ", "out": "u"},
{"in": "ᐊ", "out": "a"},
]
src_mapping = Mapping(rules=src_mappings, in_lang="crj", out_lang="crj-ipa")
log_output = io.StringIO()
with redirect_stderr(log_output):
mapping = create_mapping(src_mapping, self.target_mapping)
self.assertFalse("WARNING" in log_output.getvalue())
transducer = Transducer(mapping)
self.assertEqual(transducer("a").output_string, "ɑ")
self.assertEqual(transducer("i").output_string, "i")
self.assertEqual(transducer("u").output_string, "u")


if __name__ == "__main__":
main()

0 comments on commit ea04262

Please sign in to comment.