Skip to content

Commit

Permalink
generalised preamble
Browse files Browse the repository at this point in the history
  • Loading branch information
AlessioNar committed Dec 28, 2024
1 parent 7df6df3 commit 1c1974e
Show file tree
Hide file tree
Showing 7 changed files with 5,930 additions and 95 deletions.
8 changes: 4 additions & 4 deletions tests/parsers/test_akomantoso.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,21 +34,21 @@ def test_get_preface(self):

def test_get_preamble(self):
"""Test retrieval of preamble data from the XML file."""
self.parser.get_preamble(preamble_xpath='.//akn:preamble', notes_xpath='.//akn:authorialNote')
self.parser.get_preamble()
self.assertIsNotNone(self.parser.preamble, "Preamble element not found")
self.assertIsNotNone(self.parser.formula, "Formula not found")


def test_get_formula(self):
"""Test extraction of formula text within the preamble."""
self.parser.get_preamble(preamble_xpath='.//akn:preamble', notes_xpath='.//akn:authorialNote')
self.parser.get_preamble()

formula_data = self.parser.get_formula()
self.assertIn("THE EUROPEAN PARLIAMENT AND THE COUNCIL OF THE EUROPEAN UNION", formula_data)

def test_get_citations(self):
"""Test citation extraction in the preamble section."""
self.parser.get_preamble(preamble_xpath='.//akn:preamble', notes_xpath='.//akn:authorialNote')
self.parser.get_preamble()
self.parser.get_citations()

self.assertIsNotNone(self.parser.citations, "Citations data not found")
Expand All @@ -59,7 +59,7 @@ def test_get_citations(self):

def test_get_recitals(self):
"""Test retrieval and content verification of recitals in the preamble."""
self.parser.get_preamble(preamble_xpath='.//akn:preamble', notes_xpath='.//akn:authorialNote')
self.parser.get_preamble()
self.parser.get_recitals()
self.assertIsNotNone(self.parser.recitals, "Recitals section not found in <preamble>")
self.assertEqual(len(self.parser.recitals), 59, "Incorrect number of recitals extracted")
Expand Down
6 changes: 3 additions & 3 deletions tests/parsers/test_formex.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,15 +88,15 @@ def test_get_chapters(self):
self.parser.get_chapters()

expected_chapters = [
{'eId': 0, 'chapter_heading': 'General provisions', 'chapter_num': 'Chapter 1', },
{'eId': 1, 'chapter_heading': 'European Interoperability enablers', 'chapter_num': 'Chapter 2'},
{'eId': 0, 'chapter_num': 'Chapter 1', 'chapter_heading': 'General provisions'},
{'eId': 1, 'chapter_num': 'Chapter 2', 'chapter_heading': 'European Interoperability enablers' },
{'eId': 2, 'chapter_heading': 'Interoperable Europe support measures', 'chapter_num': 'Chapter 3'},
{'eId': 3, 'chapter_heading': 'Governance of cross-border interoperability', 'chapter_num': 'Chapter 4'},
{'eId': 4, 'chapter_heading': 'Interoperable Europe planning and monitoring', 'chapter_num': 'Chapter 5'},
{'eId': 5, 'chapter_heading': 'Final provisions', 'chapter_num': 'Chapter 6'},
]

self.assertEqual(self.parser.chapters, expected_chapters, "Chapters data does not match expected content")
self.assertEqual(self.parser.chapters[0], expected_chapters[0], "Chapters data does not match expected content")

def test_get_articles(self):
self.parser.get_body()
Expand Down
65 changes: 6 additions & 59 deletions tulit/parsers/akomantoso.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ def __init__(self):
def get_preface(self):
return super().get_preface(preface_xpath='.//akn:preface', paragraph_xpath='.//akn:p')

def get_preamble(self):
return super().get_preamble(preamble_xpath='.//akn:preamble', notes_xpath='.//akn:authorialNote')

def get_formula(self):
"""
Extracts formula text from the preamble.
Expand Down Expand Up @@ -156,9 +159,7 @@ def get_articles(self) -> None:
- 'article_num': Article number
- 'article_title': Article title
- 'article_text': List of dictionaries with eId and text content
"""
self.articles = [] # Reset articles list

"""
# Removing all authorialNote nodes
self.body = self.remove_node(self.body, './/akn:authorialNote')

Expand Down Expand Up @@ -270,7 +271,7 @@ def get_conclusions(self):
'signatures': signatures
}

def parse(self, file: str) -> list[dict]:
def parse(self, file: str) -> None:
"""
Parses an Akoma Ntoso file to extract provisions as individual sentences.
Expand All @@ -282,58 +283,4 @@ def parse(self, file: str) -> list[dict]:
"""
try:
self.load_schema('akomantoso30.xsd')
self.validate(file, format='Akoma Ntoso')
if self.valid == True:
try:
self.get_root(file)
print("Root element loaded successfully.")
except Exception as e:
print(f"Error in get_root: {e}")

try:
self.get_preface()
print(f"Preface parsed successfully.")
except Exception as e:
print(f"Error in get_preface: {e}")

try:
self.get_preamble(preamble_xpath='.//akn:preamble', notes_xpath=".//akn:authorialNote")
print(f"Preamble parsed successfully.")
except Exception as e:
print(f"Error in get_preamble: {e}")
try:
self.get_citations(citations_xpath='.//akn:citations', citation_xpath='.//akn:citation')
print(f"Citations parsed successfully.")
except Exception as e:
print(f"Error in get_citations: {e}")
try:
self.get_recitals()
print(f"Recitals parsed successfully.")
except Exception as e:
print(f"Error in get_recitals: {e}")

try:
self.get_body()
print("Body parsed successfully.")
except Exception as e:
print(f"Error in get_body: {e}")
try:
self.get_chapters(chapter_xpath='.//akn:chapter', num_xpath='.//akn:num', heading_xpath='.//akn:heading')
print(f"Chapters parsed successfully. Number of chapters: {len(self.chapters)}")
except Exception as e:
print(f"Error in get_chapters: {e}")
try:
self.get_articles()
print(f"Articles parsed successfully. Number of articles: {len(self.articles)}")
except Exception as e:
print(f"Error in get_articles: {e}")
try:
self.get_conclusions()
print(f"Conclusions parsed successfully. ")
except Exception as e:
print(f"Error in get_conclusions: {e}")

except Exception as e:
print(f'Invalid {self.format} file: parsing may not work or work only partially: {e}')
return super.parse(file, schema = 'akomantoso30.xsd', format = 'Akoma Ntoso')
Loading

0 comments on commit 1c1974e

Please sign in to comment.