Moved body_xpath selection to subclass

AlessioNar · Dec 27, 2024 · 88a5a5c · 88a5a5c
1 parent c618eb0
commit 88a5a5c
Show file tree

Hide file tree

Showing 5 changed files with 18 additions and 24 deletions.
diff --git a/tests/parsers/test_akomantoso.py b/tests/parsers/test_akomantoso.py
@@ -84,12 +84,12 @@ def test_get_act(self):
 
     def test_get_body(self):
         """Test retrieval of the body element."""
-        self.parser.get_body(body_xpath='.//akn:body')
+        self.parser.get_body()
         self.assertIsInstance(self.parser.body, etree._Element, "Body element should be an etree._Element")
 
     def test_get_chapters(self):
         """Test retrieval and content of chapter headings."""
-        self.parser.get_body(body_xpath='.//akn:body')
+        self.parser.get_body()
         self.parser.get_chapters()
 
         expected_chapters = [
@@ -105,7 +105,7 @@ def test_get_chapters(self):
 
     def test_get_articles(self):
         """Test retrieval of articles within the body."""
-        self.parser.get_body(body_xpath='.//akn:body')
+        self.parser.get_body()
         self.parser.get_articles()
 
         self.assertEqual(len(self.parser.articles), 31, "Incorrect number of articles extracted")

diff --git a/tests/parsers/test_formex.py b/tests/parsers/test_formex.py
@@ -83,14 +83,14 @@ def test_get_recitals(self):
         self.assertEqual(self.parser.recitals, recitals)      
 
     def test_get_body(self):
-        self.parser.get_body(body_xpath='.//ENACTING.TERMS')
+        self.parser.get_body()
         self.assertIsNotNone(self.parser.body, "Body element should not be None")    
 
     def test_get_chapters(self):
         """Test retrieval and content of chapter headings."""
         self.parser = Formex4Parser()
         self.parser.get_root(iopa)
-        self.parser.get_body(body_xpath='.//ENACTING.TERMS')
+        self.parser.get_body()
         self.parser.get_chapters()
 
         expected_chapters = [
@@ -105,7 +105,7 @@ def test_get_chapters(self):
         self.assertEqual(self.parser.chapters, expected_chapters, "Chapters data does not match expected content")
 
     def test_get_articles(self):
-        self.parser.get_body(body_xpath='.//ENACTING.TERMS')
+        self.parser.get_body()
         self.parser.get_articles()
 
         # Expected articles based on sample data in XML file

diff --git a/tulit/parsers/akomantoso.py b/tulit/parsers/akomantoso.py
@@ -23,8 +23,6 @@ def __init__(self):
         super().__init__()
 
         self.act = None
-        self.debug_info = {}
-
 
         # Define the namespace mapping
         self.namespaces = {
@@ -84,7 +82,6 @@ def get_recitals(self):
         """
 
         def extract_intro(recitals_section):
-            # Intro - different implementation
             recitals_intro = recitals_section.find('.//akn:intro', namespaces=self.namespaces)
             intro_eId = recitals_intro.get('eId')
             intro_text = ''.join(p.text.strip() for p in recitals_intro.findall('.//akn:p', namespaces=self.namespaces) if p.text)
@@ -117,6 +114,9 @@ def get_act(self) -> None:
         if self.act is None:
             # Fallback: try without namespace
             self.act = self.root.find('.//act')
+
+    def get_body(self):
+        return super().get_body('.//akn:body')
 
     def get_chapters(self) -> None:
         """
@@ -273,15 +273,12 @@ def parse(self, file: str) -> list[dict]:
 
         This method sequentially calls various parsing functions to extract metadata,
         preface, preamble, body, chapters, articles, and conclusions from the XML file.
-        It logs errors encountered during parsing and provides debug information about
-        the structure of the document.
 
         Args:
             file (str): The path to the Akoma Ntoso XML file.
 
 
         """
-        debug_info = {}
         try:
             self.load_schema('akomantoso30.xsd')
             self.validate(file, format='Akoma Ntoso')
@@ -294,14 +291,12 @@ def parse(self, file: str) -> list[dict]:
 
                 try:
                     self.get_metadata()
-                    debug_info['meta'] = self.metadata if hasattr(self, 'metadata') else "Meta not parsed."
                     print("Metadata parsed successfully.")
                 except Exception as e:
                     print(f"Error in get_meta: {e}")
 
                 try:
                     self.get_preface(preface_xpath='.//akn:preface', paragraph_xpath='akn:p')
-                    debug_info['preface'] = self.preface if hasattr(self, 'preface') else 0
                     print(f"Preface parsed successfully.")
                 except Exception as e:
                     print(f"Error in get_preface: {e}")
@@ -316,31 +311,28 @@ def parse(self, file: str) -> list[dict]:
                 except Exception as e:
                     print(f"Error in get_citations: {e}")
                 try:
-                    self.get_body(body_xpath='.//akn:body')
+                    self.get_body()
                     print("Body parsed successfully.")
                 except Exception as e:
                     print(f"Error in get_body: {e}")
 
                 try:
                     self.get_chapters(chapter_xpath='.//akn:chapter', num_xpath='.//akn:num', heading_xpath='.//akn:heading')
-                    debug_info['chapters'] = len(self.chapters) if hasattr(self, 'chapters') else 0
-                    print(f"Chapters parsed successfully. Number of chapters: {debug_info['chapters']}")
+                    print(f"Chapters parsed successfully. Number of chapters: {len(self.chapters)}")
                 except Exception as e:
                     print(f"Error in get_chapters: {e}")
 
                 try:
                     self.get_articles()
-                    debug_info['articles'] = len(self.articles) if hasattr(self, 'articles') else 0
-                    print(f"Articles parsed successfully. Number of articles: {debug_info['articles']}")
+                    print(f"Articles parsed successfully. Number of articles: {len(self.articles)}")
                 except Exception as e:
                     print(f"Error in get_articles: {e}")
 
                 try:
-                    self.get_conclusions()
-                    debug_info['conclusions'] = self.conclusions if hasattr(self, 'conclusions') else "Conclusions not parsed."
+                    self.get_conclusions()                    
                     print(f"Conclusions parsed successfully. ")
                 except Exception as e:
                     print(f"Error in get_conclusions: {e}")
 
         except Exception as e:
-            print(f'Invalid Akoma Ntoso file: parsing may not work or work only partially: {e}')
+            print(f'Invalid {self.format} file: parsing may not work or work only partially: {e}')
diff --git a/tulit/parsers/formex.py b/tulit/parsers/formex.py
@@ -93,6 +93,9 @@ def extract_eId(recital):
             extract_eId=extract_eId
         )
 
+    def get_body(self):
+        return super().get_body('.//ENACTING.TERMS')
+
     def get_chapters(self) -> None:
         """
         Extracts chapter information from the document.
@@ -160,6 +163,6 @@ def parse(self, file):
         self.get_metadata()
         self.get_preface(preface_xpath='.//TITLE', paragraph_xpath='.//P')
         self.get_preamble(preamble_xpath='.//PREAMBLE', notes_xpath='.//NOTE')
-        self.get_body(body_xpath='.//ENACTING.TERMS')
+        self.get_body()
         self.get_chapters()
         self.get_articles()
diff --git a/tulit/parsers/parser.py b/tulit/parsers/parser.py
@@ -330,7 +330,6 @@ def get_recitals(self, recitals_xpath, recital_xpath, text_xpath, extract_intro=
 
         self.recitals = recitals
 
-    ### Enacting terms block
     def get_body(self, body_xpath) -> None:
         """
         Extracts the body element from the document.