Skip to content

Commit

Permalink
Created all common functions needed for version 0.1.0 of the package
Browse files Browse the repository at this point in the history
  • Loading branch information
AlessioNar committed Dec 28, 2024
1 parent 864ae70 commit 3c2f862
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 15 deletions.
11 changes: 8 additions & 3 deletions tulit/parsers/akomantoso.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,12 +50,14 @@ def get_formula(self):
Returns None if no formula is found.
"""
formula = self.preamble.find('.//akn:formula', namespaces=self.namespaces)
# Extract text from <p> within <formula>
formula_text = ' '.join(p.text.strip() for p in formula.findall('akn:p', namespaces=self.namespaces) if p.text)
if formula is None:
return None

# Extract text from <p> within <formula>
formula_text = ' '.join(p.text.strip() for p in formula.findall('akn:p', namespaces=self.namespaces) if p.text)
return formula_text
self.formula = formula_text
return self.formula


def get_citations(self) -> list:
"""
Expand Down Expand Up @@ -105,6 +107,9 @@ def extract_eId(recital):
extract_eId=extract_eId,

)

def get_preamble_final(self):
return super().get_preamble_final()

### Act block
def get_act(self) -> None:
Expand Down
13 changes: 12 additions & 1 deletion tulit/parsers/formex.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,12 @@ def get_formula(self):
str
Formula text from the preamble.
"""
self.formula = self.preamble.findtext('PREAMBLE.INIT')
formula = self.preamble.findtext('PREAMBLE.INIT')

if formula is None:
return None

self.formula = formula

return self.formula

Expand Down Expand Up @@ -99,6 +104,9 @@ def extract_eId(recital):
extract_eId=extract_eId
)

def get_preamble_final(self):
return super().get_preamble_final()

def get_body(self):
return super().get_body('.//ENACTING.TERMS')

Expand Down Expand Up @@ -162,6 +170,9 @@ def get_articles(self):
self.articles.append(article_data)
else:
print('No enacting terms XML tag has been found')

def get_conclusions(self):
return super().get_conclusions()


def parse(self, file):
Expand Down
35 changes: 24 additions & 11 deletions tulit/parsers/parser.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from abc import ABC, abstractmethod
from abc import ABC
from lxml import etree
import os
import re
Expand Down Expand Up @@ -146,9 +146,9 @@ def validate(self, file: str, format: str) -> bool:
print(f"{file} is not a valid {format} file. Validation errors: {e}")
self.valid = False
self.validation_errors = e.error_log
#except Exception as e:
# print(f"An error occurred during validation: {e}")
# self.valid = False
except Exception as e:
print(f"An error occurred during validation: {e}")
self.valid = False

def remove_node(self, tree, node):
"""
Expand Down Expand Up @@ -231,7 +231,10 @@ def get_preface(self, preface_xpath, paragraph_xpath) -> None:
paragraph_text = ''.join(p.itertext()).strip()
paragraphs.append(paragraph_text)

self.preface = ' '.join(paragraphs)
# Join all paragraphs into a single string and remove duplicate spaces or newlines
self.preface = ' '.join(paragraphs).replace('\n', '').replace('\t', '').replace('\r', '')
self.preface = re.sub(' +', ' ', self.preface)


def get_preamble(self, preamble_xpath, notes_xpath) -> None:
"""
Expand All @@ -253,7 +256,6 @@ def get_preamble(self, preamble_xpath, notes_xpath) -> None:

if self.preamble is not None:
self.preamble = self.remove_node(self.preamble, notes_xpath)
self.formula = self.get_formula()
#preamble_data["preamble_final"] = self.preamble.findtext('PREAMBLE.FINAL')

def get_formula(self):
Expand Down Expand Up @@ -331,6 +333,9 @@ def get_recitals(self, recitals_xpath, recital_xpath, text_xpath, extract_intro=

self.recitals = recitals

def get_preamble_final(self):
pass

def get_body(self, body_xpath) -> None:
"""
Extracts the body element from the document.
Expand Down Expand Up @@ -422,6 +427,9 @@ def get_articles(self, article_xpath, extract_eId=None) -> None:
def get_subdivisions(self, subdivision_xpath, extract_eId=None) -> None:
pass

def get_conclusions(self):
pass

def parse(self, file: str, schema, format) -> None:
"""
Parses an Akoma Ntoso file to extract provisions as individual sentences.
Expand Down Expand Up @@ -453,29 +461,34 @@ def parse(self, file: str, schema, format) -> None:

try:
self.get_preface()
print(f"Preface parsed successfully.")
print(f"Preface parsed successfully. Preface: {self.preface}")
except Exception as e:
print(f"Error in get_preface: {e}")

try:
self.get_preamble()
print(f"Preamble parsed successfully.")
print(f"Preamble element found.")
except Exception as e:
print(f"Error in get_preamble: {e}")
try:
self.get_formula()
print(f"Formula parsed successfully.")
except Exception as e:
print(f"Error in get_formula: {e}")
try:
self.get_citations()
print(f"Citations parsed successfully.")
print(f"Citations parsed successfully. Number of citations: {len(self.citations)}")
except Exception as e:
print(f"Error in get_citations: {e}")
try:
self.get_recitals()
print(f"Recitals parsed successfully.")
print(f"Recitals parsed successfully. Number of recitals: {len(self.recitals)}")
except Exception as e:
print(f"Error in get_recitals: {e}")

try:
self.get_body()
print("Body parsed successfully.")
print("Body element found.")
except Exception as e:
print(f"Error in get_body: {e}")
try:
Expand Down

0 comments on commit 3c2f862

Please sign in to comment.