Harmonised methods between formex and akn

AlessioNar · Dec 14, 2024 · 26b559a · 26b559a
1 parent 95558f0
commit 26b559a
Show file tree

Hide file tree

Showing 2 changed files with 44 additions and 41 deletions.
diff --git a/tests/parsers/test_formex.py b/tests/parsers/test_formex.py
@@ -4,19 +4,26 @@
 
 import os 
 
-DATA_DIR = os.path.join(os.path.dirname(__file__), "../data/formex")
+DATA_DIR = os.path.join(os.path.dirname(__file__), "..\\data\\formex")
+file_path = os.path.join(DATA_DIR, "L_2011334EN.01002501.xml")
 
 class TestFormex4Parser(unittest.TestCase):
     def setUp(self):
-        self.formex_parser = Formex4Parser()
+        self.maxDiff = None  # Allow full diff if needed        
+        self.parser = Formex4Parser()
+        self.parser.get_root(file_path)
 
-    def test_parse_metadata(self):
+    def test_get_root(self):
+        """Test parsing and root element retrieval from the Akoma Ntoso file."""
+        self.assertTrue(os.path.exists(file_path), f"Test file not found at {file_path}")
+        self.assertIsNotNone(self.parser.root, "Root element should not be None")
+
+    def test_get_metadata(self):
         self.maxDiff = None  # Allow the full diff to be displayed
-        file_path = os.path.join(DATA_DIR, "L_2011334EN.01002501.xml")
 
-        self.formex_parser.load_xml(file_path)
+        self.parser.load_xml(file_path)
 
-        result = self.formex_parser.get_metadata()
+        result = self.parser.get_metadata()
         expected = {
             "file": "L_2011334EN.01002501.doc.xml",
             "collection": "L",
@@ -35,34 +42,23 @@ def test_parse_metadata(self):
         }
         self.assertEqual(result, expected)
 
-    def test_parse_title(self):
+    def test_get_preface(self):
         self.maxDiff = None  # Allow full diff if needed
-        file_path = os.path.join(DATA_DIR, "L_2011334EN.01002501.xml")
-
-        # Parse the XML tree and pass the root to _parse_title
-        with open(file_path, 'r', encoding='utf-8') as f:
-            tree = ET.parse(f)
-            root = tree.getroot()
-
-        result = self.formex_parser.get_title(root)
+
+
+        result = self.parser.get_preface()
         expected = (
             "Commission Implementing Regulation (EU) No 1319/2011 of 15 December 2011 "
             "fixing representative prices in the poultrymeat and egg sectors and for egg "
             "albumin, and amending Regulation (EC) No 1484/95"
         )
-        self.assertEqual(result, expected)
+        self.assertEqual(self.parser.preface, expected)
 
-    def test_parse_preamble(self):
+    def test_get_preamble(self):
         """Test parsing the preamble section with quotations and numbered considerations in Formex4Parser."""
         self.maxDiff = None  # Allow full diff if needed
-        file_path = os.path.join(DATA_DIR, "L_2011334EN.01002501.xml")
-
-        # Parse the XML tree and pass the root to _parse_preamble
-        with open(file_path, 'r', encoding='utf-8') as f:
-            tree = ET.parse(f)
-            root = tree.getroot()
 
-        result = self.formex_parser.get_preamble(root)
+        result = self.parser.get_preamble()
 
         # Expected preamble structure
         # @todo - see main function
@@ -84,18 +80,14 @@ def test_parse_preamble(self):
         }
 
         self.assertEqual(result, expected)
-
-
-
-    def test_parse_articles(self):
-        self.maxDiff = None  # Allow full diff if needed
-        file_path = os.path.join(DATA_DIR, "L_2011334EN.01002501.xml")
-
-        self.formex_parser.load_xml(file_path)
-        self.formex_parser.get_body()
-
-        self.formex_parser.get_articles()
+
+    def test_get_body(self):
+        self.parser.get_body()
+        self.assertIsNotNone(self.parser.body, "Body element should not be None")    
 
+    def test_get_articles(self):
+        self.parser.get_body()
+        self.parser.get_articles()
 
         # Expected articles based on sample data in XML file
         expected = [
@@ -111,7 +103,7 @@ def test_parse_articles(self):
             }
         ]
 
-        self.assertEqual(self.formex_parser.articles, expected)
+        self.assertEqual(self.parser.articles, expected)
 
 # Run the tests
 if __name__ == "__main__":

diff --git a/ulit/parsers/formex.py b/ulit/parsers/formex.py
@@ -20,6 +20,12 @@ def __init__(self):
         
         """
         # Define the namespace mapping
+        self.root = None
+        self.namespaces = {}
+
+        self.preface = None
+        self.metadata = {}
+
         self.namespaces = FMX_NAMESPACES
 
 
@@ -67,7 +73,7 @@ def get_metadata(self):
 
         return metadata
 
-    def get_title(self, root):
+    def get_preface(self):
         """
         Extracts title information from the TITLE section.
 
@@ -77,17 +83,18 @@ def get_title(self, root):
         Returns:
         str: Concatenated title text.
         """
-        title_element = root.find('TITLE')
+        title_element = self.root.find('TITLE')
         title_text = ""
 
         if title_element is not None:
             for paragraph in title_element.iter('P'):
                 paragraph_text = "".join(paragraph.itertext()).strip()
                 title_text += paragraph_text + " "
+        self.preface = title_text.strip()
 
-        return title_text.strip()
+        return self.preface
 
-    def get_preamble(self, root):
+    def get_preamble(self):
         """
         Extracts the preamble section, including initial statements and considerations.
 
@@ -98,7 +105,7 @@ def get_preamble(self, root):
             dict: Preamble details, including quotations and considerations.
         """
         preamble_data = {"initial_statement": None, "quotations": [], "consid_init": None, "considerations": [], "preamble_final": None}
-        preamble = root.find('PREAMBLE')
+        preamble = self.root.find('PREAMBLE')
 
         if preamble is not None:
             # Initial statement
@@ -119,6 +126,8 @@ def get_preamble(self, root):
                 text = text.replace('\n', '').replace('\t', '').replace('\r', '')  # remove newline and tab characters
                 text = re.sub(' +', ' ', text)  # replace multiple spaces with a single space
                 preamble_data["quotations"].append(text)
+
+            self.citations = preamble_data['quotations']
 
             preamble_data["consid_init"] = preamble.findtext('.//GR.CONSID/GR.CONSID.INIT')
 
@@ -183,5 +192,7 @@ def parse(self, file):
         dict: Parsed data containing metadata, title, preamble, and articles.
         """
         self.load_xml(file)
+        self.get_preface()
+        self.get_preamble()
         self.get_body()
         self.get_articles()