diff --git a/g2p/mappings/__init__.py b/g2p/mappings/__init__.py index 068c4e25..6006c611 100644 --- a/g2p/mappings/__init__.py +++ b/g2p/mappings/__init__.py @@ -44,7 +44,9 @@ class Mapping(_MappingModelDefinition): """Class for lookup tables""" def model_post_init(self, *_args, **_kwargs) -> None: - """After the model is constructed, we process the model specs by applying all the configuration to the rules (ie prevent feeding, unicode normalization etc..)""" + """After the model is constructed, we process the model specs by + applying all the configuration to the rules (ie prevent feeding, + unicode normalization etc..)""" if self.type == MAPPING_TYPE.mapping or self.type is None: # load abbreviations from path if self.abbreviations_path is not None and not self.abbreviations: @@ -81,7 +83,8 @@ def find_mapping( @staticmethod def find_mapping_by_id(map_id: str) -> "Mapping": - """Find the mapping with a given ID, i.e., the "id" found in the mapping, like in the "panphon_preprocessor" mapping.""" + """Find the mapping with a given ID, i.e., the "id" found in the + mapping, like in the "panphon_preprocessor" mapping.""" for mapping in MAPPINGS_AVAILABLE: if mapping.id == map_id: return deepcopy(mapping) @@ -89,9 +92,9 @@ def find_mapping_by_id(map_id: str) -> "Mapping": @staticmethod def load_mapping_from_path(path_to_mapping_config: Union[str, Path], index=0): - """Loads a mapping from a path, if there is more than one mapping, then it loads based on the int - provided to the 'index' argument. Default is 0. - """ + """Loads a mapping from a path, if there is more than one mapping, + then it loads based on the int provided to the 'index' + argument. Default is 0.""" mapping_config = MappingConfig.load_mapping_config_from_path( path_to_mapping_config ) @@ -122,13 +125,17 @@ def index(self, item): """Find the location of an item in self""" return self.rules.index(item) - def inventory(self, in_or_out: str = "in"): + def inventory(self, in_or_out: str = "in", non_empty: bool = False): """Return just inputs or outputs as inventory of mapping""" if in_or_out == "in": in_or_out = "rule_input" if in_or_out == "out": in_or_out = "rule_output" - return [getattr(x, in_or_out) for x in self.rules] + inv = [getattr(x, in_or_out) for x in self.rules] + if non_empty: + return [sym for sym in inv if sym != ""] + else: + return inv def plain_mapping(self): """Return the plain mapping for displaying or saving to disk. @@ -274,7 +281,8 @@ def rule_to_regex(self, rule: Union[Rule, dict]) -> Union[Pattern, None]: ) raise exceptions.MalformedMapping( f"Your regex in mapping between {in_lang} and {out_lang} is malformed. " - f"Do you have un-escaped regex characters in your input {inp}, contexts {rule.context_before}, {rule.context_after}?" + f"Do you have un-escaped regex characters in your input {inp}, " + f"contexts {rule.context_before}, {rule.context_after}?" ) from e return rule_regex diff --git a/g2p/mappings/create_ipa_mapping.py b/g2p/mappings/create_ipa_mapping.py index 237ce232..6e8213cb 100644 --- a/g2p/mappings/create_ipa_mapping.py +++ b/g2p/mappings/create_ipa_mapping.py @@ -190,8 +190,8 @@ def create_mapping( ) l1_is_xsampa, l2_is_xsampa = is_xsampa(map_1_name), is_xsampa(map_2_name) rules = align_inventories( - mapping_1.inventory(mapping_1_io), - mapping_2.inventory(mapping_2_io), + mapping_1.inventory(mapping_1_io, non_empty=True), + mapping_2.inventory(mapping_2_io, non_empty=True), l1_is_xsampa, l2_is_xsampa, distance=distance, diff --git a/g2p/tests/test_create_mapping.py b/g2p/tests/test_create_mapping.py index daae0a48..4e9ad740 100755 --- a/g2p/tests/test_create_mapping.py +++ b/g2p/tests/test_create_mapping.py @@ -4,6 +4,8 @@ Test all Mappings """ +import io +from contextlib import redirect_stderr from unittest import TestCase, main from g2p.log import LOGGER @@ -155,6 +157,24 @@ def test_distances(self): set_of_mappings.add(tuple(rule.rule_output for rule in mapping.rules)) self.assertGreater(len(set_of_mappings), 3) + def test_deletion_mapping(self): + """Ensure that deletion rules do not lead to spurious warnings.""" + src_mappings = [ + {"in": "foo", "out": ""}, + {"in": "ᐃ", "out": "i"}, + {"in": "ᐅ", "out": "u"}, + {"in": "ᐊ", "out": "a"}, + ] + src_mapping = Mapping(rules=src_mappings, in_lang="crj", out_lang="crj-ipa") + log_output = io.StringIO() + with redirect_stderr(log_output): + mapping = create_mapping(src_mapping, self.target_mapping) + self.assertFalse("WARNING" in log_output.getvalue()) + transducer = Transducer(mapping) + self.assertEqual(transducer("a").output_string, "ɑ") + self.assertEqual(transducer("i").output_string, "i") + self.assertEqual(transducer("u").output_string, "u") + if __name__ == "__main__": main()