Skip to content

Commit

Permalink
testing with requirements.txt file
Browse files Browse the repository at this point in the history
  • Loading branch information
AlessioNar committed Dec 23, 2024
1 parent 5e79e41 commit b7e7db5
Show file tree
Hide file tree
Showing 5 changed files with 9 additions and 94 deletions.
3 changes: 2 additions & 1 deletion docs/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
sphinx==7.1.2
sphinx-rtd-theme==1.3.0rc1
sphinx-rtd-theme==1.3.0rc1
tulit
5 changes: 5 additions & 0 deletions docs/source/parsers.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@ Parsers

This package contains modules for parsing various types of legal documents. Below are the details for each module.

.. automodule:: parsers.parser
:members:
:undoc-members:
:show-inheritance:

.. automodule:: parsers.formex
:members:
:undoc-members:
Expand Down
34 changes: 0 additions & 34 deletions tests/parsers/test_parser.py
Original file line number Diff line number Diff line change
@@ -1,42 +1,8 @@
import unittest
from tulit.parsers.parser import validate_xml
import os

file_path = os.path.join(os.path.dirname(__file__), '..\\data\\akn\\eu')


class TestXMLValidator(unittest.TestCase):
def setUp(self):
"""Set up test paths."""

# Update these paths to match your actual file locations
self.valid_xml = os.path.join(file_path, "32014L0092.akn")
#self.invalid_xml = "path/to/invalid.xml"
self.xsd_file = os.path.join(os.path.dirname(__file__), "..\\metadata\\schemas\\akomantoso30.xsd")

# def test_valid_xml(self):
# """Test validation with valid XML file."""
# is_valid, error = validate_xml(self.valid_xml, self.xsd_file)
# self.assertTrue(is_valid)
# self.assertIsNone(error)

# def test_invalid_xml(self):
# """Test validation with invalid XML file."""
# is_valid, error = validate_xml_against_xsd(self.invalid_xml, self.xsd_file)
# self.assertFalse(is_valid)
# self.assertIsNotNone(error)

# def test_nonexistent_xml_file(self):
# """Test validation with non-existent XML file."""
# is_valid, error = validate_xml("data/nonexistent.xml", self.xsd_file)
# self.assertFalse(is_valid)
# self.assertIsNotNone(error)

# def test_nonexistent_xsd_file(self):
# """Test validation with non-existent XSD schema file."""
# is_valid, error = validate_xml(self.valid_xml, "nonexistent.xsd")
# self.assertFalse(is_valid)
# self.assertIsNotNone(error)

if __name__ == '__main__':
unittest.main(verbosity=2)
2 changes: 1 addition & 1 deletion tulit/parsers/html.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ def get_lists(self, parent_id: str, container):

def get_articles(self):
"""
Extracts articles from the HTML. Each <div> with an id starting with 'art_' is treated as an article (eId).
Extracts articles from the HTML. Each <div> with an id starting with "art" is treated as an article (eId).
Subsequent subdivisions are processed based on the closest parent with an id.
Returns:
Expand Down
59 changes: 1 addition & 58 deletions tulit/parsers/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,61 +65,4 @@ def remove_node(self, tree, node):
# If no siblings, add the tail text to the parent's text
parent.text = (parent.text or '') + tail_text

return tree

def validate_xml(xml_path: str, xsd_path: str):
"""
Validate an XML file against an XSD schema.
Args:
xml_path (str): Path to the XML file to validate
xsd_path (str): Path to the XSD schema file
Returns:
Tuple[bool, Union[str, None]]: A tuple containing:
- bool: True if validation successful, False otherwise
- Union[str, None]: Error message if validation failed, None if successful
"""
try:
# Create XML parser with schema location resolution
parser = etree.XMLParser(remove_blank_text=True)

# Create a custom resolver to handle relative paths
class LocalResolver(etree.Resolver):
def resolve(self, url, id, context):
# Get the directory of the main XSD file
schema_dir = os.path.dirname(os.path.abspath(xsd_path))
# Construct full path to the imported schema
schema_path = os.path.join(schema_dir, os.path.basename(url))

if os.path.exists(schema_path):
return self.resolve_filename(schema_path, context)
return None

# Add the resolver to the parser
parser.resolvers.add(LocalResolver())

# Parse and validate
xmlschema_doc = etree.parse(xsd_path, parser)
xmlschema = etree.XMLSchema(xmlschema_doc)
xml_doc = etree.parse(xml_path, parser)

xmlschema.assertValid(xml_doc)
return True

except etree.XMLSyntaxError as e:
error_msg = f"XML Syntax Error: {str(e)}"
logging.error(error_msg)
return False, error_msg

except etree.DocumentInvalid as e:
error_msg = f"Schema Validation Error: {str(e)}"
logging.error(error_msg)
return False, error_msg

except Exception as e:
error_msg = f"Unexpected Error: {str(e)}"
logging.error(error_msg)
return False, error_msg


return tree

0 comments on commit b7e7db5

Please sign in to comment.