Skip to content

Commit

Permalink
Add & update transformers & tests (WiP)
Browse files Browse the repository at this point in the history
  • Loading branch information
khoidt committed Nov 5, 2024
1 parent 60a809f commit 9f75e7f
Show file tree
Hide file tree
Showing 4 changed files with 95 additions and 62 deletions.
73 changes: 39 additions & 34 deletions ebl/atf_importer/domain/legacy_atf_transformers.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,23 +29,24 @@ def transform(self, tree: Tree) -> Tree:
result = super().transform(tree)
return result if result else tree

def _transform_children(self, children):
def _transform_children(self, children: Sequence[Tree]):
index_correction = 0
for index, child in enumerate(children):
self._enter_node(index)
if self.is_classes_break_at(self.get_ancestors()):
result = child
elif isinstance(child, Tree):
result = self._transform_tree(child)
elif self.__visit_tokens__ and isinstance(child, Token):
result = self._call_userfunc_token(child)
else:
result = child

self._enter_node(index - index_correction)
result = self._get_child_result(child)
self._exit_node()
if result is not Discard:
self._exit_node()
yield result
else:
self._exit_node()

def _get_child_result(self, child: Tree) -> Tree:
if self.is_classes_break_at(self.get_ancestors()):
return child
elif isinstance(child, Tree):
return self._transform_tree(child)
elif self.__visit_tokens__ and isinstance(child, Token):
return self._call_userfunc_token(child)
else:
return child

def _enter_node(self, index: int = 0) -> None:
self.current_path.append(index)
Expand All @@ -60,7 +61,8 @@ def get_ancestors(self) -> Sequence:
tree = self.current_tree
ancestors = [tree.data]
for parent_index in self.current_path[:-1]:
ancestors.append(tree.children[parent_index].data)
ancestor = tree.children[parent_index]
ancestors.append(ancestor.data)
tree = tree.children[parent_index]
return ancestors

Expand All @@ -69,8 +71,6 @@ def is_classes_break_at(self, node_classes: Sequence[str]) -> bool:


class HalfBracketsTransformer(LegacyTransformer):
# ToDo: Check if works

def __init__(self, **kwargs):
super().__init__(**kwargs)
self.open = False
Expand All @@ -80,40 +80,36 @@ def clear(self):
self.open = False

@v_args(inline=True)
def ebl_atf_text_line__LEGACY_OPEN_HALF_BRACKET(self, bracket: str) -> str:
print("! bbbbbb", bracket)
input()
def ebl_atf_text_line__open_legacy_damage(self, bracket: str) -> Discard:
self.legacy_found = True
self.open = True
return ""
return Discard

@v_args(inline=True)
def ebl_atf_text_line__LEGACY_CLOSE_HALF_BRACKET(self, bracket: str) -> str:
print("! bbbbbb", bracket)
input()
def ebl_atf_text_line__close_legacy_damage(self, bracket: str) -> Discard:
self.legacy_found = True
self.open = False
return ""
return Discard

@v_args(inline=True)
def ebl_atf_text_line__flags(self, flags: str):
print("! bbbbbb", flags)
input()
return flags + "#" if self.open else flags
def ebl_atf_text_line__flags(self, *flags) -> Tree:
damage_flag = Token("ebl_atf_text_line__DAMAGE", "#") if self.open else None
_flags = [flag for flag in [*flags, damage_flag] if flag]
return Tree("ebl_atf_text_line__flags", _flags if _flags else [])


class OraccJoinerTransformer(LegacyTransformer):
@v_args(inline=True)
def ebl_atf_text_line__LEGACY_ORACC_JOINER(self, bracket: str) -> str:
print("!!!!!!!!!!!!!!!!!!!! LEGACY_ORACC_JOINER")
self.legacy_found = True
return "-"
def ebl_atf_text_line__joiner(self, joiner: Token) -> Tree:
if joiner.type == "ebl_atf_text_line__LEGACY_ORACC_JOINER":
self.legacy_found = True
return Tree("ebl_atf_text_line__joiner", [Token("MINUS", "-")])
return Tree("ebl_atf_text_line__joiner", [joiner])


class OraccSpecialTransformer(LegacyTransformer):
@v_args(inline=True)
def ebl_atf_text_line__LEGACY_ORACC_DISH_DIVIDER(self, child: str) -> Tree:
print("!!!!!!!!!!!!!!!!!!!! LEGACY_ORACC_DISH_DIVIDER")
self.legacy_found = True
return Tree(
"ebl_atf_text_line__logogram_name_part",
Expand Down Expand Up @@ -194,3 +190,12 @@ def _set_sub_index(self, sub_index: str) -> None:
"ebl_atf_text_line__sub_index",
[sub_index_value],
)


class UncertainSignTransformer(LegacyTransformer):
@v_args(inline=True)
def ebl_atf_text_line__legacy_uncertain_sign(
self, prefix: Tree, sign: Tree
) -> Tree:
self.legacy_found = True
return sign
14 changes: 9 additions & 5 deletions ebl/atf_importer/domain/legacy_atf_visitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
HalfBracketsTransformer,
OraccJoinerTransformer,
OraccSpecialTransformer,
UncertainSignTransformer,
)
from ebl.atf_importer.domain.legacy_atf_transformers import LegacyTransformer

Expand All @@ -18,8 +19,9 @@


index_and_accented_transformer = (AccentedIndexTransformer(), "all_children")
half_brackets_transformer = (HalfBracketsTransformer(), "first_child")
oracc_joiner_transformer = (OraccJoinerTransformer(), "first_child")
uncertain_sign_transformer = (UncertainSignTransformer(), "all_children")
half_brackets_transformer = (HalfBracketsTransformer(), "all_children")
oracc_joiner_transformer = (OraccJoinerTransformer(), "all_children")
oracc_special_transformer = (OraccSpecialTransformer(), "first_child")


Expand All @@ -34,9 +36,11 @@ class LegacyAtfVisitor(Visitor):
],
"surrogate": [index_and_accented_transformer],
"grapheme": [index_and_accented_transformer],
"_parts_pattern": [half_brackets_transformer],
"_parts_pattern_gloss": [half_brackets_transformer],
"LEGACY_ORACC_JOINER": [oracc_joiner_transformer],
"word": [
uncertain_sign_transformer,
oracc_joiner_transformer,
],
"text": [half_brackets_transformer],
}

def __init__(self):
Expand Down
61 changes: 41 additions & 20 deletions ebl/tests/atf_importer/test_atf_preprocessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,14 @@
import json
from ebl.atf_importer.domain.atf_preprocessor import AtfPreprocessor

PROBLEMATIC_TEXT_LINES = [
(
"8. KAR <:> e-ṭe-ri :* KAR : e-ke-mu : LUGAL ina di-bi-ri : LUGAL ina "
"ud-da-a-ta",
"8. KAR < : > e-ṭe-ri :* KAR : e-ke-mu : LUGAL ina di-bi-ri : LUGAL ina "
"ud-da-a-ta",
),
# ToDo: All transformers should be tested

PARSE_AND_TRANSFORM_LEGACY = [
("1. ⸢16?! 15! 12⸣ 17", "1. 16?!# 15!# 12# 17"),
("1. $BAD.$É $ME", "1. BAD.E₂ ME"),
(
"14. [...] x (x) še-e-hu $BAD $É $ME : ina GAŠAN-ia₅ {d}SUEN {"
"d}INANA--<É>.AN.NA",
"14. [...] x (x) še-e-hu BAD E₂ ME : ina GAŠAN-ia₅ {d}SUEN {"
"d}INANA-<E₂>.AN.NA",
"1. {d}INANA--<É>.AN.NA",
"1. {d}INANA-<E₂>.AN.NA",
),
(
"1. ŠÚ ù ŠÚ<(šumma)> |ŠÚ+ŠÚ|",
Expand All @@ -27,19 +23,40 @@
"1. [*] * *-*",
"1. [DIŠ] DIŠ DIŠ-DIŠ",
),
(
"1. <:>",
"1. < : >",
),
]

PROBLEMATIC_TEXT_LINES = [
(
"1. [*] AN#.GE₆ GAR-ma U₄ ŠÚ{+up} * AN.GE₆ GAR-ma {d}IŠKUR KA-šú ŠUB{"
"+di} * AN.GE₆",
"1. [DIŠ] AN#.GE₆ GAR-ma U₄ ŠU₂{+up} DIŠ AN.GE₆ GAR-ma {d}IŠKUR KA-šu₂ "
"ŠUB{+di} DIŠ AN.GE₆",
),
(
"8. KAR <:> e-ṭe-ri :* KAR : e-ke-mu : LUGAL ina di-bi-ri : LUGAL ina "
"ud-da-a-ta",
"8. KAR < : > e-ṭe-ri :* KAR : e-ke-mu : LUGAL ina di-bi-ri : LUGAL ina "
"ud-da-a-ta",
),
(
"14. [...] x (x) še-e-hu $BAD $É $ME : ina GAŠAN-ia₅ {d}SUEN {"
"d}INANA--<É>.AN.NA",
"14. [...] x (x) še-e-hu BAD E₂ ME : ina GAŠAN-ia₅ {d}SUEN {"
"d}INANA-<E₂>.AN.NA",
),
]


FOLLOWING_SIGN_IS_NOT_A_LOGOGRAM = (
"5'. [...] x [...] x-šu₂? : kal : nap-ha-ri : $WA-wa-ru : ia-ar₂-ru",
"5'. [...] x [...] x-šu₂? : kal : nap-ha-ri : WA-wa-ru : ia-ar₂-ru",
)
FOLLOWING_SIGN_IS_NOT_A_LOGOGRAM = [
(
"5'. [...] x [...] x-šu₂? : kal : nap-ha-ri : $WA-wa-ru : ia-ar₂-ru",
"5'. [...] x [...] x-šu₂? : kal : nap-ha-ri : WA-wa-ru : ia-ar₂-ru",
)
]

LEGACY_GRAMMAR_SIGNS = [
(
Expand All @@ -53,19 +70,23 @@

@pytest.mark.parametrize(
"legacy_line,ebl_line",
[*PROBLEMATIC_TEXT_LINES, FOLLOWING_SIGN_IS_NOT_A_LOGOGRAM, *LEGACY_GRAMMAR_SIGNS],
[
*PARSE_AND_TRANSFORM_LEGACY,
*PROBLEMATIC_TEXT_LINES,
*FOLLOWING_SIGN_IS_NOT_A_LOGOGRAM,
*LEGACY_GRAMMAR_SIGNS,
],
)
def test_text_lines(legacy_line, ebl_line):
# ToDo: fix
atf_preprocessor = AtfPreprocessor("../logs", 0)
legacy_tree = atf_preprocessor.ebl_parser.parse(legacy_line)
legacy_tree = atf_preprocessor.transform_legacy_atf(legacy_tree)
expected_tree = atf_preprocessor.ebl_parser.parse(ebl_line)
print("RESULT:\n", legacy_tree.pretty())
print("EXPECTED:\n", expected_tree.pretty())
# print("RESULT:\n", legacy_tree) # .pretty())
# print("EXPECTED:\n", expected_tree) # .pretty())
# input() <- With `task test`: "OSError: pytest: reading from stdin while output is captured!"

assert legacy_tree == expected_tree
input()


lemma_lines = []
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,11 +64,14 @@ _head: _have_both
_tail: _WORD_SEPARATOR _omit_right* _have_both?
| _WORD_SEPARATOR? _omit_left+
| _WORD_SEPARATOR _require_both
| _WORD_SEPARATOR? _require_both_optional_ws

_require_both: commentary_protocol
| divider_variant
| divider
| line_break
| line_break

_require_both_optional_ws: divider_variant
| divider

_have_both: tabulation
| column_token
| erasure
Expand Down

0 comments on commit 9f75e7f

Please sign in to comment.