Skip to content

Commit

Permalink
Merge pull request #12 from AlessioNar:development
Browse files Browse the repository at this point in the history
Merging development to main
  • Loading branch information
AlessioNar authored Dec 23, 2024
2 parents db7873d + 88a1535 commit 881c484
Show file tree
Hide file tree
Showing 18 changed files with 707 additions and 343 deletions.
Binary file added .coverage
Binary file not shown.
94 changes: 93 additions & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ beautifulsoup4 = "^4.12.3"
lxml = "^5.3.0"
chardet = "^5.2.0"
pytest = "^8.3.3"
coverage = "^7.6.9"
pytest-cov = "^6.0.0"


[build-system]
Expand Down
85 changes: 85 additions & 0 deletions tests/download/test_cellar.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
import unittest
import json
from ulit.download.cellar import CellarDownloader
import os
from unittest.mock import patch, Mock
import requests
import io

class TestCellarDownloader(unittest.TestCase):
def setUp(self):
self.maxDiff = None
self.downloader = CellarDownloader(download_dir='./tests/data', log_dir='./tests/logs')

def test_download_documents(self):

# Load the JSON data
with open('./tests/metadata/query_results/query_results.json', 'r') as f:
results = json.loads(f.read()) # Load the JSON data

# Download the documents
document_paths = self.downloader.download(results, format='fmx4')

expected = ['tests\\data\\e115172d-3ab3-4b14-b0a4-dfdcc9871793.0006.04\\DOC_1.xml', 'tests\\data\\e115172d-3ab3-4b14-b0a4-dfdcc9871793.0006.04\\DOC_2.xml', 'tests\\data\\e115172d-3ab3-4b14-b0a4-dfdcc9871793.0006.04\\DOC_3.xml', 'tests\\data\\e115172d-3ab3-4b14-b0a4-dfdcc9871793.0006.04\\DOC_4.xml']

self.assertEqual(document_paths, expected)

def test_get_cellar_ids_from_json_results(self):

with open('./tests/metadata/query_results/query_results.json', 'r') as f:
cellar_results = json.loads(f.read())

self.downloader = CellarDownloader(download_dir='./tests/data', log_dir='./tests/logs')

# Test for formex format
extracted_ids = self.downloader.get_cellar_ids_from_json_results(cellar_results, 'fmx4')
expected = [
'e115172d-3ab3-4b14-b0a4-dfdcc9871793.0006.04/DOC_1', 'e115172d-3ab3-4b14-b0a4-dfdcc9871793.0006.04/DOC_2', 'e115172d-3ab3-4b14-b0a4-dfdcc9871793.0006.04/DOC_3', 'e115172d-3ab3-4b14-b0a4-dfdcc9871793.0006.04/DOC_4'
]

self.assertEqual(extracted_ids, expected)

def test_build_request_url(self):

params = {'cellar': 'e115172d-3ab3-4b14-b0a4-dfdcc9871793.0006.04/DOC_1'}
expected_url = 'http://publications.europa.eu/resource/cellar/e115172d-3ab3-4b14-b0a4-dfdcc9871793.0006.04/DOC_1'
actual_url = self.downloader.build_request_url(params)
self.assertEqual(actual_url, expected_url)

@patch('ulit.download.download.requests.request')
def test_fetch_content(self, mock_request):
mock_response = Mock()
mock_response.status_code = 200
mock_response.raise_for_status.return_value = None
mock_request.return_value = mock_response

url = 'http://publications.europa.eu/resource/cellar/e115172d-3ab3-4b14-b0a4-dfdcc9871793.0006.04/DOC_1'
response = self.downloader.fetch_content(url)

# Check that the request was made with the correct URL and headers
headers = {
'Accept': "*, application/zip, application/zip;mtype=fmx4, application/xml;mtype=fmx4, application/xhtml+xml, text/html, text/html;type=simplified, application/msword, text/plain, application/xml, application/xml;notice=object",
'Accept-Language': "eng",
'Content-Type': "application/x-www-form-urlencoded",
'Host': "publications.europa.eu"
}
mock_request.assert_called_once_with("GET", url, headers=headers)

# Check that the response is as expected
self.assertEqual(response, mock_response)

@patch('ulit.download.download.requests.request')
def test_fetch_content_request_exception(self, mock_request):
# Mock request to raise a RequestException
mock_request.side_effect = requests.RequestException("Error sending GET request")

url = 'http://publications.europa.eu/resource/cellar/e115172d-3ab3-4b14-b0a4-dfdcc9871793.0006.04/DOC_1'
response = self.downloader.fetch_content(url)

# Check that the response is None when an exception is raised
self.assertIsNone(response)



if __name__ == "__main__":
unittest.main()
67 changes: 67 additions & 0 deletions tests/download/test_download.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
import unittest
from unittest.mock import patch, Mock
import os
from ulit.download.download import DocumentDownloader


class TestDocumentDownloader(unittest.TestCase):
def setUp(self):
self.maxDiff = None
self.downloader = DocumentDownloader(download_dir='./tests/data', log_dir='./tests/logs')

def test_get_extension_from_content_type(self):
content_type_mapping = {
'text/html': 'html',
'application/json': 'json',
'application/xml': 'xml',
'text/plain': 'txt',
'application/zip': 'zip'
}
for content_type, expected_extension in content_type_mapping.items():
actual_extension = self.downloader.get_extension_from_content_type(content_type)
self.assertEqual(actual_extension, expected_extension)

@patch('ulit.download.download.zipfile.ZipFile')
def test_extract_zip(self, mock_zipfile):
# Mock zipfile object
mock_zip = Mock()
mock_zipfile.return_value = mock_zip

response = Mock()
response.content = b'fake zip content'

folder_path = './downloads/test_folder'
self.downloader.extract_zip(response, folder_path)

# Check that the zipfile was opened and extracted
args, kwargs = mock_zipfile.call_args
self.assertEqual(args[0].getvalue(), response.content)
mock_zip.extractall.assert_called_once_with(folder_path)

@patch('ulit.download.download.DocumentDownloader.extract_zip')
@patch('ulit.download.download.os.makedirs')
def test_handle_response(self, mock_makedirs, mock_extract_zip):
# Mock response object
response = Mock()
response.headers = {'Content-Type': 'application/zip'}
response.content = b'fake zip content'

cellar_id = 'e115172d-3ab3-4b14-b0a4-dfdcc9871793.0006.04\\DOC_1'
target_path = os.path.join(self.downloader.download_dir, cellar_id)

# Test handling zip content
result = self.downloader.handle_response(response, cellar_id)
mock_extract_zip.assert_called_once_with(response, target_path)
self.assertEqual(result, target_path)

# Test handling non-zip content
response.headers = {'Content-Type': 'application/xml'}
mock_extract_zip.reset_mock()
result = self.downloader.handle_response(response, cellar_id)
expected_file_path = os.path.normpath(f"{target_path}.xml")
self.assertEqual(os.path.normpath(result), expected_file_path)



if __name__ == "__main__":
unittest.main()
Loading

0 comments on commit 881c484

Please sign in to comment.