From 2615771ccb11abdf7a40b0c177547307597a133b Mon Sep 17 00:00:00 2001 From: Greg Kempe Date: Mon, 25 Nov 2024 16:02:18 +0200 Subject: [PATCH] blockContainer --- bluebell/akn.peg | 27 +++- bluebell/akn.py | 240 ++++++++++++++++---------------- bluebell/akn_text.xsl | 16 +++ bluebell/types.py | 10 +- tests/roundtrip/act-escapes.txt | 2 + tests/roundtrip/act.txt | 12 ++ tests/test_blocks.py | 40 +++--- tests/test_fuzzing.py | 2 +- 8 files changed, 192 insertions(+), 157 deletions(-) diff --git a/bluebell/akn.peg b/bluebell/akn.peg index dddd936..fcebf93 100644 --- a/bluebell/akn.peg +++ b/bluebell/akn.peg @@ -257,7 +257,7 @@ grammar akn nested_block_element <- indent content:block_element+ dedent - block_elements <- block_list / bullet_list / generic_block / table / longtitle / footnote / block_quote / p / line + block_elements <- block_list / bullet_list / table / longtitle / footnote / block_quote / blocks / p / line # ------------------------------------------------------------------------------ # Speech blocks for debates @@ -277,11 +277,6 @@ grammar akn # Actual block elements # ------------------------------------------------------------------------------ - generic_block <- 'BLOCK' attrs:block_attrs? eol - indent - content:block_element+ - dedent - longtitle <- 'LONGTITLE' body:(space content:inline+)? eol subheading <- 'SUBHEADING' body:(space content:inline+)? eol @@ -297,6 +292,26 @@ grammar akn # could start a line. line <- !dedent content:inline+ eol + # ------------------------------------------------------------------------------ + # Block container, which can contain other blocks + # + # Note: the intro and wrapUp contain blocks, which actually means they're not necessary because the blockContainer + # can contain any block. So we just don't use them. + # + # BLOCKS + # intro + # + # ITEMS + # ITEM 1 + # + # wrapUp + + # block container element, for grouping blocks + blocks <- 'BLOCKS' attrs:block_attrs? eol + indent + content:block_element+ + dedent + # ------------------------------------------------------------------------------ # Block lists diff --git a/bluebell/akn.py b/bluebell/akn.py index eb65bef..092ef17 100644 --- a/bluebell/akn.py +++ b/bluebell/akn.py @@ -468,69 +468,69 @@ def __init__(self, text, offset, elements): class TreeNode63(TreeNode): def __init__(self, text, offset, elements): super(TreeNode63, self).__init__(text, offset, elements) - self.attrs = elements[1] + self.body = elements[1] self.eol = elements[2] - self.indent = elements[3] - self.content = elements[4] - self.dedent = elements[5] class TreeNode64(TreeNode): def __init__(self, text, offset, elements): super(TreeNode64, self).__init__(text, offset, elements) - self.body = elements[1] - self.eol = elements[2] + self.space = elements[0] + self.content = elements[1] class TreeNode65(TreeNode): def __init__(self, text, offset, elements): super(TreeNode65, self).__init__(text, offset, elements) - self.space = elements[0] - self.content = elements[1] + self.body = elements[1] + self.eol = elements[2] class TreeNode66(TreeNode): def __init__(self, text, offset, elements): super(TreeNode66, self).__init__(text, offset, elements) - self.body = elements[1] - self.eol = elements[2] + self.space = elements[0] + self.content = elements[1] class TreeNode67(TreeNode): def __init__(self, text, offset, elements): super(TreeNode67, self).__init__(text, offset, elements) - self.space = elements[0] - self.content = elements[1] + self.body = elements[1] + self.eol = elements[2] class TreeNode68(TreeNode): def __init__(self, text, offset, elements): super(TreeNode68, self).__init__(text, offset, elements) - self.body = elements[1] - self.eol = elements[2] + self.space = elements[0] + self.content = elements[1] class TreeNode69(TreeNode): def __init__(self, text, offset, elements): super(TreeNode69, self).__init__(text, offset, elements) - self.space = elements[0] - self.content = elements[1] + self.attrs = elements[1] + self.space = elements[2] + self.content = elements[3] + self.eol = elements[4] class TreeNode70(TreeNode): def __init__(self, text, offset, elements): super(TreeNode70, self).__init__(text, offset, elements) - self.attrs = elements[1] - self.space = elements[2] - self.content = elements[3] - self.eol = elements[4] + self.content = elements[1] + self.eol = elements[2] class TreeNode71(TreeNode): def __init__(self, text, offset, elements): super(TreeNode71, self).__init__(text, offset, elements) - self.content = elements[1] + self.attrs = elements[1] self.eol = elements[2] + self.indent = elements[3] + self.content = elements[4] + self.dedent = elements[5] class TreeNode72(TreeNode): @@ -5042,19 +5042,19 @@ def _read_block_elements(self): address0 = self._read_bullet_list() if address0 is FAILURE: self._offset = index1 - address0 = self._read_generic_block() + address0 = self._read_table() if address0 is FAILURE: self._offset = index1 - address0 = self._read_table() + address0 = self._read_longtitle() if address0 is FAILURE: self._offset = index1 - address0 = self._read_longtitle() + address0 = self._read_footnote() if address0 is FAILURE: self._offset = index1 - address0 = self._read_footnote() + address0 = self._read_block_quote() if address0 is FAILURE: self._offset = index1 - address0 = self._read_block_quote() + address0 = self._read_blocks() if address0 is FAILURE: self._offset = index1 address0 = self._read_p() @@ -5284,92 +5284,6 @@ def _read_speech_block_name(self): self._cache['speech_block_name'][index0] = (address0, self._offset) return address0 - def _read_generic_block(self): - address0, index0 = FAILURE, self._offset - cached = self._cache['generic_block'].get(index0) - if cached: - self._offset = cached[1] - return cached[0] - index1, elements0 = self._offset, [] - address1 = FAILURE - chunk0, max0 = None, self._offset + 5 - if max0 <= self._input_size: - chunk0 = self._input[self._offset:max0] - if chunk0 == 'BLOCK': - address1 = TreeNode(self._input[self._offset:self._offset + 5], self._offset, []) - self._offset = self._offset + 5 - else: - address1 = FAILURE - if self._offset > self._failure: - self._failure = self._offset - self._expected = [] - if self._offset == self._failure: - self._expected.append('\'BLOCK\'') - if address1 is not FAILURE: - elements0.append(address1) - address2 = FAILURE - index2 = self._offset - address2 = self._read_block_attrs() - if address2 is FAILURE: - address2 = TreeNode(self._input[index2:index2], index2, []) - self._offset = index2 - if address2 is not FAILURE: - elements0.append(address2) - address3 = FAILURE - address3 = self._read_eol() - if address3 is not FAILURE: - elements0.append(address3) - address4 = FAILURE - address4 = self._read_indent() - if address4 is not FAILURE: - elements0.append(address4) - address5 = FAILURE - remaining0, index3, elements1, address6 = 1, self._offset, [], True - while address6 is not FAILURE: - address6 = self._read_block_element() - if address6 is not FAILURE: - elements1.append(address6) - remaining0 -= 1 - if remaining0 <= 0: - address5 = TreeNode(self._input[index3:self._offset], index3, elements1) - self._offset = self._offset - else: - address5 = FAILURE - if address5 is not FAILURE: - elements0.append(address5) - address7 = FAILURE - address7 = self._read_dedent() - if address7 is not FAILURE: - elements0.append(address7) - else: - elements0 = None - self._offset = index1 - else: - elements0 = None - self._offset = index1 - else: - elements0 = None - self._offset = index1 - else: - elements0 = None - self._offset = index1 - else: - elements0 = None - self._offset = index1 - else: - elements0 = None - self._offset = index1 - if elements0 is None: - address0 = FAILURE - else: - address0 = TreeNode63(self._input[index1:self._offset], index1, elements0) - self._offset = self._offset - if address0 is not FAILURE: - cls0 = type(address0) - address0.__class__ = type(cls0.__name__ + 'GenericBlock', (cls0, self._types.GenericBlock), {}) - self._cache['generic_block'][index0] = (address0, self._offset) - return address0 - def _read_longtitle(self): address0, index0 = FAILURE, self._offset cached = self._cache['longtitle'].get(index0) @@ -5423,7 +5337,7 @@ def _read_longtitle(self): if elements1 is None: address2 = FAILURE else: - address2 = TreeNode65(self._input[index3:self._offset], index3, elements1) + address2 = TreeNode64(self._input[index3:self._offset], index3, elements1) self._offset = self._offset if address2 is FAILURE: address2 = TreeNode(self._input[index2:index2], index2, []) @@ -5446,7 +5360,7 @@ def _read_longtitle(self): if elements0 is None: address0 = FAILURE else: - address0 = TreeNode64(self._input[index1:self._offset], index1, elements0) + address0 = TreeNode63(self._input[index1:self._offset], index1, elements0) self._offset = self._offset if address0 is not FAILURE: cls0 = type(address0) @@ -5507,7 +5421,7 @@ def _read_subheading(self): if elements1 is None: address2 = FAILURE else: - address2 = TreeNode67(self._input[index3:self._offset], index3, elements1) + address2 = TreeNode66(self._input[index3:self._offset], index3, elements1) self._offset = self._offset if address2 is FAILURE: address2 = TreeNode(self._input[index2:index2], index2, []) @@ -5530,7 +5444,7 @@ def _read_subheading(self): if elements0 is None: address0 = FAILURE else: - address0 = TreeNode66(self._input[index1:self._offset], index1, elements0) + address0 = TreeNode65(self._input[index1:self._offset], index1, elements0) self._offset = self._offset if address0 is not FAILURE: cls0 = type(address0) @@ -5591,7 +5505,7 @@ def _read_crossheading(self): if elements1 is None: address2 = FAILURE else: - address2 = TreeNode69(self._input[index3:self._offset], index3, elements1) + address2 = TreeNode68(self._input[index3:self._offset], index3, elements1) self._offset = self._offset if address2 is FAILURE: address2 = TreeNode(self._input[index2:index2], index2, []) @@ -5614,7 +5528,7 @@ def _read_crossheading(self): if elements0 is None: address0 = FAILURE else: - address0 = TreeNode68(self._input[index1:self._offset], index1, elements0) + address0 = TreeNode67(self._input[index1:self._offset], index1, elements0) self._offset = self._offset if address0 is not FAILURE: cls0 = type(address0) @@ -5693,7 +5607,7 @@ def _read_p(self): if elements0 is None: address0 = FAILURE else: - address0 = TreeNode70(self._input[index1:self._offset], index1, elements0) + address0 = TreeNode69(self._input[index1:self._offset], index1, elements0) self._offset = self._offset if address0 is not FAILURE: cls0 = type(address0) @@ -5749,7 +5663,7 @@ def _read_line(self): if elements0 is None: address0 = FAILURE else: - address0 = TreeNode71(self._input[index1:self._offset], index1, elements0) + address0 = TreeNode70(self._input[index1:self._offset], index1, elements0) self._offset = self._offset if address0 is not FAILURE: cls0 = type(address0) @@ -5757,6 +5671,92 @@ def _read_line(self): self._cache['line'][index0] = (address0, self._offset) return address0 + def _read_blocks(self): + address0, index0 = FAILURE, self._offset + cached = self._cache['blocks'].get(index0) + if cached: + self._offset = cached[1] + return cached[0] + index1, elements0 = self._offset, [] + address1 = FAILURE + chunk0, max0 = None, self._offset + 6 + if max0 <= self._input_size: + chunk0 = self._input[self._offset:max0] + if chunk0 == 'BLOCKS': + address1 = TreeNode(self._input[self._offset:self._offset + 6], self._offset, []) + self._offset = self._offset + 6 + else: + address1 = FAILURE + if self._offset > self._failure: + self._failure = self._offset + self._expected = [] + if self._offset == self._failure: + self._expected.append('\'BLOCKS\'') + if address1 is not FAILURE: + elements0.append(address1) + address2 = FAILURE + index2 = self._offset + address2 = self._read_block_attrs() + if address2 is FAILURE: + address2 = TreeNode(self._input[index2:index2], index2, []) + self._offset = index2 + if address2 is not FAILURE: + elements0.append(address2) + address3 = FAILURE + address3 = self._read_eol() + if address3 is not FAILURE: + elements0.append(address3) + address4 = FAILURE + address4 = self._read_indent() + if address4 is not FAILURE: + elements0.append(address4) + address5 = FAILURE + remaining0, index3, elements1, address6 = 1, self._offset, [], True + while address6 is not FAILURE: + address6 = self._read_block_element() + if address6 is not FAILURE: + elements1.append(address6) + remaining0 -= 1 + if remaining0 <= 0: + address5 = TreeNode(self._input[index3:self._offset], index3, elements1) + self._offset = self._offset + else: + address5 = FAILURE + if address5 is not FAILURE: + elements0.append(address5) + address7 = FAILURE + address7 = self._read_dedent() + if address7 is not FAILURE: + elements0.append(address7) + else: + elements0 = None + self._offset = index1 + else: + elements0 = None + self._offset = index1 + else: + elements0 = None + self._offset = index1 + else: + elements0 = None + self._offset = index1 + else: + elements0 = None + self._offset = index1 + else: + elements0 = None + self._offset = index1 + if elements0 is None: + address0 = FAILURE + else: + address0 = TreeNode71(self._input[index1:self._offset], index1, elements0) + self._offset = self._offset + if address0 is not FAILURE: + cls0 = type(address0) + address0.__class__ = type(cls0.__name__ + 'BlockContainer', (cls0, self._types.BlockContainer), {}) + self._cache['blocks'][index0] = (address0, self._offset) + return address0 + def _read_block_list(self): address0, index0 = FAILURE, self._offset cached = self._cache['block_list'].get(index0) diff --git a/bluebell/akn_text.xsl b/bluebell/akn_text.xsl index e7c8a6e..04fcdd6 100644 --- a/bluebell/akn_text.xsl +++ b/bluebell/akn_text.xsl @@ -261,6 +261,7 @@ starts-with($text, 'ART') or starts-with($text, 'ARTICLE') or starts-with($text, 'ATTACHMENT') or + starts-with($text, 'BLOCKS') or starts-with($text, 'BLOCKLIST') or starts-with($text, 'BOOK') or starts-with($text, 'BULLETS') or @@ -625,6 +626,21 @@ + + 0 + + + + + BLOCKS + + + + + + + + diff --git a/bluebell/types.py b/bluebell/types.py index e380e3c..29a21a3 100644 --- a/bluebell/types.py +++ b/bluebell/types.py @@ -497,7 +497,7 @@ def to_dict(self): } -class GenericBlock: +class BlockContainer: def to_dict(self): kids = [] @@ -508,16 +508,12 @@ def to_dict(self): info = { 'type': 'block', - 'name': 'block', + 'name': 'blockContainer', 'children': kids, - 'attribs': {}, } if self.attrs.text: - info['attribs'].update(self.attrs.to_dict()) - - if not info['attribs'].get('name'): - info['attribs']['name'] = 'block' + info['attribs'] = self.attrs.to_dict() return info diff --git a/tests/roundtrip/act-escapes.txt b/tests/roundtrip/act-escapes.txt index 16385be..76bcb6d 100644 --- a/tests/roundtrip/act-escapes.txt +++ b/tests/roundtrip/act-escapes.txt @@ -104,6 +104,8 @@ CHAP 1 - Heading \ATTACHMENT + \BLOCKS + CHAP 2 - Speech \SPEECH diff --git a/tests/roundtrip/act.txt b/tests/roundtrip/act.txt index d66a5d9..7978ca8 100644 --- a/tests/roundtrip/act.txt +++ b/tests/roundtrip/act.txt @@ -63,6 +63,18 @@ CHAP 1 - Heading TC cell 2 + BLOCKS + some block text + + ITEMS + ITEM 1 + item 1 + + ITEM 2 + item 2 + + tail + text in the middle SEC 2 diff --git a/tests/test_blocks.py b/tests/test_blocks.py index 05e8971..39e9a32 100644 --- a/tests/test_blocks.py +++ b/tests/test_blocks.py @@ -420,7 +420,7 @@ def test_generic_block(self): tree = self.parse(""" PART A - BLOCK.cls{a b} + BLOCKS.cls{a b} foo ITEMS @@ -429,14 +429,11 @@ def test_generic_block(self): end - BLOCK{name my-block} + BLOCKS foo bar - BLOCK{name } - bad name - - BLOCK + BLOCKS tail """, 'hier_element_block') @@ -445,28 +442,25 @@ def test_generic_block(self): self.assertEqual(""" A - -

foo

- - + +

foo

+ + bar -

+

- + baz -

+

-

end

-
- -

foo

-

bar

-
- -

bad name

-
-

BLOCK

+

end

+
+ +

foo

+

bar

+
+

BLOCKS

tail

diff --git a/tests/test_fuzzing.py b/tests/test_fuzzing.py index 5fabda5..3106189 100644 --- a/tests/test_fuzzing.py +++ b/tests/test_fuzzing.py @@ -20,7 +20,7 @@ class FuzzingTestCase(ParserSupport, TestCase): ARGUMENTS ATTACHMENT BACKGROUND - BLOCK + BLOCKS BODY BULLETS *