-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Improved stability and reliance of the package.
- Loading branch information
1 parent
2fa8014
commit 73d7df6
Showing
14 changed files
with
340 additions
and
7,245 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,3 @@ | ||
**/__pycache__/* | ||
tests/logs | ||
tests/logs | ||
tests/data |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,8 +4,8 @@ version = "0.0.1" | |
description = "ULIT - a Universal Legal Informatics Toolkit, is set of legal informatics utilities collected in a Python package that focuses on the retrieval of legal data and metadata from official sources in the EU, and their transformation in pythonic data structures" | ||
|
||
[tool.poetry] | ||
name = "op_cellar" | ||
version = "0.0.3" | ||
name = "ulit" | ||
version = "0.0.1" | ||
description = "ULIT - a Universal Legal Informatics Toolkit, is set of legal informatics utilities collected in a Python package that focuses on the retrieval of legal data and metadata from official sources in the EU, and their transformation in pythonic data structures" | ||
authors = ["AlessioNar <[email protected]>"] | ||
license = "EUPL 1.2" | ||
|
This file was deleted.
Oops, something went wrong.
3,448 changes: 0 additions & 3,448 deletions
3,448
tests/data/html/c008bcb6-e7ec-11ee-9ea8-01aa75ed71a1.0006.03/DOC_1.xhtml
This file was deleted.
Oops, something went wrong.
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
PREFIX cdm: <http://publications.europa.eu/ontology/cdm#> | ||
PREFIX purl: <http://purl.org/dc/elements/1.1/> | ||
|
||
SELECT DISTINCT ?cellarURIs, ?manif, ?format, ?expr | ||
WHERE { | ||
?work owl:sameAs <http://publications.europa.eu/resource/celex/{CELEX}> . | ||
?expr cdm:expression_belongs_to_work ?work ; | ||
cdm:expression_uses_language ?lang . | ||
?lang purl:identifier ?langCode . | ||
?manif cdm:manifestation_manifests_expression ?expr; | ||
cdm:manifestation_type ?format. | ||
?cellarURIs cdm:item_belongs_to_manifestation ?manif. | ||
|
||
FILTER(str(?format)="xhtml" && str(?langCode)="ENG") | ||
} | ||
ORDER BY ?cellarURIs | ||
LIMIT 10 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
{ | ||
"head": { | ||
"link": [], | ||
"vars": [ | ||
"cellarURIs", | ||
"manif", | ||
"format", | ||
"expr" | ||
] | ||
}, | ||
"results": { | ||
"distinct": false, | ||
"ordered": true, | ||
"bindings": [ | ||
{ | ||
"cellarURIs": { | ||
"type": "uri", | ||
"value": "http://publications.europa.eu/resource/cellar/e115172d-3ab3-4b14-b0a4-dfdcc9871793.0006.04/DOC_1" | ||
}, | ||
"manif": { | ||
"type": "uri", | ||
"value": "http://publications.europa.eu/resource/cellar/e115172d-3ab3-4b14-b0a4-dfdcc9871793.0006.04" | ||
}, | ||
"format": { | ||
"type": "typed-literal", | ||
"datatype": "http://www.w3.org/2001/XMLSchema#string", | ||
"value": "fmx4" | ||
}, | ||
"expr": { | ||
"type": "uri", | ||
"value": "http://publications.europa.eu/resource/cellar/e115172d-3ab3-4b14-b0a4-dfdcc9871793.0006" | ||
} | ||
}, | ||
{ | ||
"cellarURIs": { | ||
"type": "uri", | ||
"value": "http://publications.europa.eu/resource/cellar/e115172d-3ab3-4b14-b0a4-dfdcc9871793.0006.04/DOC_2" | ||
}, | ||
"manif": { | ||
"type": "uri", | ||
"value": "http://publications.europa.eu/resource/cellar/e115172d-3ab3-4b14-b0a4-dfdcc9871793.0006.04" | ||
}, | ||
"format": { | ||
"type": "typed-literal", | ||
"datatype": "http://www.w3.org/2001/XMLSchema#string", | ||
"value": "fmx4" | ||
}, | ||
"expr": { | ||
"type": "uri", | ||
"value": "http://publications.europa.eu/resource/cellar/e115172d-3ab3-4b14-b0a4-dfdcc9871793.0006" | ||
} | ||
}, | ||
{ | ||
"cellarURIs": { | ||
"type": "uri", | ||
"value": "http://publications.europa.eu/resource/cellar/e115172d-3ab3-4b14-b0a4-dfdcc9871793.0006.04/DOC_3" | ||
}, | ||
"manif": { | ||
"type": "uri", | ||
"value": "http://publications.europa.eu/resource/cellar/e115172d-3ab3-4b14-b0a4-dfdcc9871793.0006.04" | ||
}, | ||
"format": { | ||
"type": "typed-literal", | ||
"datatype": "http://www.w3.org/2001/XMLSchema#string", | ||
"value": "fmx4" | ||
}, | ||
"expr": { | ||
"type": "uri", | ||
"value": "http://publications.europa.eu/resource/cellar/e115172d-3ab3-4b14-b0a4-dfdcc9871793.0006" | ||
} | ||
}, | ||
{ | ||
"cellarURIs": { | ||
"type": "uri", | ||
"value": "http://publications.europa.eu/resource/cellar/e115172d-3ab3-4b14-b0a4-dfdcc9871793.0006.04/DOC_4" | ||
}, | ||
"manif": { | ||
"type": "uri", | ||
"value": "http://publications.europa.eu/resource/cellar/e115172d-3ab3-4b14-b0a4-dfdcc9871793.0006.04" | ||
}, | ||
"format": { | ||
"type": "typed-literal", | ||
"datatype": "http://www.w3.org/2001/XMLSchema#string", | ||
"value": "fmx4" | ||
}, | ||
"expr": { | ||
"type": "uri", | ||
"value": "http://publications.europa.eu/resource/cellar/e115172d-3ab3-4b14-b0a4-dfdcc9871793.0006" | ||
} | ||
} | ||
] | ||
} | ||
} |
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
import json | ||
import logging | ||
import os | ||
from download import download_documents | ||
from sparql import send_sparql_query | ||
from parsers.html import HTMLParser | ||
from parsers.formex import Formex4Parser | ||
|
||
def main(): | ||
""" | ||
Main function to execute SPARQL query and download documents | ||
""" | ||
# Configure logging | ||
logging.basicConfig(level=logging.INFO) | ||
logger = logging.getLogger(__name__) | ||
|
||
try: | ||
|
||
# Send SPARQL query | ||
logger.info("Executing SPARQL query") | ||
results = send_sparql_query('./tests/metadata/queries/formex_query.rq', celex='32008R1137') | ||
|
||
|
||
# Save query results to JSON | ||
results_file = './tests/metadata/query_results/query_results.json' | ||
with open(results_file, "w") as f: | ||
json.dump(results, f, indent=4) | ||
logger.info(f"Results dumped in {results_file}") | ||
|
||
# Load query results | ||
with open('./tests/metadata/query_results/query_results.json', 'r') as f: | ||
results = json.loads(f.read()) | ||
|
||
# Download documents | ||
logger.info("Downloading documents") | ||
downloaded_document_paths = download_documents( | ||
results, | ||
'./tests/data/formex', | ||
log_dir='./tests/logs', | ||
format='fmx4' | ||
) | ||
logger.info(f'{len(downloaded_document_paths)} documents downloaded in {downloaded_document_paths}') | ||
|
||
# Extract the directory path (removing what's after the last '/') | ||
|
||
# List the contents of the first directory | ||
first_path = downloaded_document_paths[0] | ||
first_item = os.listdir(first_path)[0] | ||
file_path = os.path.join(*first_path.split('/'), first_item) | ||
|
||
print(f'Parsing {file_path}') | ||
# Sort the contents alphabetically and get the first item | ||
|
||
parser = Formex4Parser() | ||
parser.parse(file_path) | ||
print(parser.articles) | ||
#print(document_tree) | ||
|
||
except Exception as e: | ||
logger.error(f"An error occurred: {e}") | ||
raise | ||
|
||
if __name__ == "__main__": | ||
main() |
Oops, something went wrong.