Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix - Refactor JSON template rendering and improve error handling (ISO19139-GeoDCAT-AP) #58

Merged
merged 4 commits into from
Oct 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
File renamed without changes.
21 changes: 21 additions & 0 deletions ckan-pycsw/Dockerfile.dev
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,27 @@ ENV PYCSW_DEV_PORT=5678
ENV TIMEOUT=300
ENV SSL_UNVERIFIED_MODE=False

# PYCSW Catalog configuration
ENV CSW_IDENTIFICATION_TITLE="Sample Geospatial Catalogue" \
CSW_IDENTIFICATION_ABSTRACT="OGC CSW server powered by pycsw" \
CSW_PROVIDER_NAME="ckan-docker development team" \
CSW_PROVIDER_URL="https://github.com/mjanez/ckan-docker" \
CSW_CONTACT_NAME="ckan-docker development team" \
CSW_CONTACT_POSITION="Site Administrator" \
CSW_CONTACT_ADDRESS="ckan-docker development team" \
CSW_CONTACT_CITY="Madrid" \
CSW_CONTACT_STATE_OR_PROVINCE="Madrid" \
CSW_CONTACT_POSTAL_CODE="28001" \
CSW_CONTACT_COUNTRY="Spain" \
CSW_CONTACT_EMAIL=${CKAN_SYSADMIN_EMAIL} \
CSW_CONTACT_URL=${CKAN_URL} \
CSW_INSPIRE_DATE="2024-01-01" \
CSW_INSPIRE_GEMET_KEYWORDS="Utility and governmental services" \
CSW_INSPIRE_CONFORMITY="notEvaluated" \
CSW_INSPIRE_CONTACT_NAME=${CSW_CONTACT_NAME} \
CSW_INSPIRE_CONTACT_EMAIL=${CKAN_SYSADMIN_EMAIL} \
CSW_INSPIRE_TEMP_EXTENT="2024-01-01/2024-12-31"

WORKDIR ${APP_DIR}

# Update files if needed
Expand Down
20 changes: 10 additions & 10 deletions ckan2pycsw/ckan2pycsw.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from schemas.pygeometa.iso19139_inspire import ISO19139_inspireOutputSchema

# debug
import ptvsd
#import ptvsd

# Ennvars
TZ = os.environ.get("TZ", "TZ")
Expand Down Expand Up @@ -231,14 +231,14 @@ def run_tasks():
logging.error(f"{log_module}:ckan2pycsw | Error starting gunicorn: {e}")

if __name__ == "__main__":
if str(DEV_MODE).lower() == "true":
# Allow other computers to attach to ptvsd at this IP address and port.
ptvsd.enable_attach(address=("0.0.0.0", PYCSW_DEV_PORT), redirect_output=True)

# Pause the program until a remote debugger is attached
ptvsd.wait_for_attach()
main()
# Launch a cronjob
else:
# if str(DEV_MODE).lower() == "true":
# # Allow other computers to attach to ptvsd at this IP address and port.
# ptvsd.enable_attach(address=("0.0.0.0", PYCSW_DEV_PORT), redirect_output=True)

# # Pause the program until a remote debugger is attached
# ptvsd.wait_for_attach()
# main()
# # Launch a cronjob
# else:
run_tasks()
run_scheduler()
19 changes: 15 additions & 4 deletions ckan2pycsw/model/template.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,12 +120,18 @@ def render_j2_template(mcf: dict, schema_type: str, url: str = None, template_di
mcf = update_object_lists(mcf)

try:
# Render the template and directly attempt to correct and deserialize the JSON string
mcf_dict = json.loads(re.sub(r'\\(?!["\\/bfnrtu])', r'\\\\', template.render(record=mcf)), strict=False)
# Render the template
rendered_template = template.render(record=mcf)
# Clean trailing commas
cleaned_template = clean_trailing_commas(rendered_template)
# Escape backslashes
escaped_template = re.sub(r'\\(?!["\\/bfnrtu])', r'\\\\', cleaned_template)
# Deserialize the JSON string
mcf_dict = json.loads(escaped_template, strict=False)
except json.JSONDecodeError as e:
LOGGER.error("Error deserializing the template output: %s", e)
# Optionally: Save the problematic output for debugging
LOGGER.error("Problematic output: %s", template.render(record=mcf))
LOGGER.error("Problematic output: %s", rendered_template)
raise

return mcf_dict
Expand Down Expand Up @@ -753,4 +759,9 @@ def get_localized_dataset_value(multilang_value, default_language, languages=Non
if language in multilang_value:
localized_value[language] = multilang_value[language]

return localized_value
return localized_value

def clean_trailing_commas(json_string):
# Removes trailing commas in objects and arrays
json_string = re.sub(r',\s*([}\]])', r'\1', json_string)
return json_string
132 changes: 87 additions & 45 deletions ckan2pycsw/schemas/ckan/iso19139_geodcatap/main.j2
Original file line number Diff line number Diff line change
@@ -1,3 +1,15 @@
{% macro render_field(field_name, field_value) %}
"{{ field_name }}": "{{ field_value }}"
{% endmacro %}

{% macro render_object(fields) %}
{
{%- for field in fields %}
{{ render_field(field[0], field[1]) }}{% if not loop.last %},{% endif %}
{%- endfor %}
}
{% endmacro %}

{
{# INSPIRE ISO19139 Metadata Schema #}
{% set language_iso19115 = record['language']|get_mapping_value_from_yaml_list(input_field="uri", output_field='iso_639_2', codelist="language",mappings_folder=mappings_folder + "/ckan_geodcatap") %}
Expand All @@ -8,36 +20,52 @@
{% set dcat_type = record['dcat_type'].rsplit('/', 1)[-1] %}
"mcf": {"version": 1.0},
"metadata": {
{%- set metadata_fields = [] %}

{% if record['identifier'] %}
"identifier": "{{ record['identifier'] }}",
{%- set _ = metadata_fields.append(('identifier', record['identifier'])) %}
{% else %}
"identifier": "{{ record['id'] }}",
{%- set _ = metadata_fields.append(('identifier', record['id'])) %}
{% endif %}
"language": "{{ language_2code }}",

{%- set _ = metadata_fields.append(('language', language_2code)) %}

{% if language_alternate %}
"language_alternate": "{{ language_alternate }}",
{%- set _ = metadata_fields.append(('language_alternate', language_alternate)) %}
{% endif %}
"charset": "UTF-8",

{%- set _ = metadata_fields.append(('charset', 'UTF-8')) %}

{% if record['source'] %}
"parentidentifier": "{{ record['source'].rsplit('/', 1)[-1] }}",
{%- set parent_id = record['source'].rsplit('/', 1)[-1] %}
{%- set _ = metadata_fields.append(('parentidentifier', parent_id)) %}
{% endif %}
"datestamp": "{{ record['metadata_modified']|normalize_datetime }}",
"dataseturi": "{{ url }}",

{%- set _ = metadata_fields.append(('datestamp', record['metadata_modified']|normalize_datetime)) %}
{%- set _ = metadata_fields.append(('dataseturi', url)) %}

{% if dcat_type == 'service' %}
{% if "catalog" in record['title'].lower() or "csw" in record['title'].lower() %}
"servicetype": "discovery",
{%- set _ = metadata_fields.append(('servicetype', 'discovery')) %}
{% elif "wfs" in record['title'].lower() or "descarg" in record['title'].lower() %}
"servicetype": "download",
{%- set _ = metadata_fields.append(('servicetype', 'download')) %}
{% elif "wms" in record['title'].lower() or "wmts" in record['title'].lower() or "wcs" in record['title'].lower() or "map" in record['title'].lower() %}
"servicetype": "view",
{%- set _ = metadata_fields.append(('servicetype', 'view')) %}
{% else %}
"servicetype": "other",
{%- set _ = metadata_fields.append(('servicetype', 'other')) %}
{% endif %}
{% endif %}
"hierarchylevel": {
"value": "{{ dcat_type }}",
"uri": "{{ record['dcat_type'] }}"
}

{%- set hierarchylevel = {
"value": dcat_type,
"uri": record['dcat_type']
} %}
{%- set _ = metadata_fields.append(('hierarchylevel', hierarchylevel)) %}

{# Render all metadata fields with appropriate commas #}
{%- for field in metadata_fields %}
"{{ field[0] }}": {% if field[1] is mapping or field[1] is iterable and field[1] is not string %}{{ field[1] | tojson }}{% else %}"{{ field[1] }}" {% endif %}{% if not loop.last %},{% endif %}
{%- endfor %}
},
"spatial": {
{% if record['reference_system'] is defined %}
Expand Down Expand Up @@ -313,37 +341,51 @@
"maintenancefrequency": "continual"
},
"contact": {
{% if record['publisher_name'] is defined %}
"publisher": {
{% if record['publisher_name'] %}
"organization": "{{ record['publisher_name'] }}",
{% endif %}
{% if record['publisher_email'] %}
"email": "{{ record['publisher_email'] }}",
{% endif %}
{% if record['publisher_url'] %}
"url": "{{ record['publisher_url'] }}"
{% endif %}
},
{# Initialize a list for storing contact fields #}
{%- set contact_fields = [] %}

{# Publisher #}
{% if record['publisher_name'] or record['publisher_email'] or record['publisher_url'] %}
{%- set publisher = {} %}
{% if record['publisher_name'] %}
{%- set _ = publisher.update({'organization': record['publisher_name']}) %}
{% endif %}
{% if record['publisher_email'] %}
{%- set _ = publisher.update({'email': record['publisher_email']}) %}
{% endif %}
{% if record['publisher_url'] %}
{%- set _ = publisher.update({'url': record['publisher_url']}) %}
{% endif %}
{%- set _ = contact_fields.append(('publisher', publisher)) %}
{% endif %}
{% if record['author_name'] is defined %}
"author": {
{% if record['author_name'] %}
"individualname": "{{ record['author_name'] }}",
{% endif %}
{% if record['author_email'] %}
"email": "{{ record['author_email'] }}",
{% endif %}
{% if record['author_url'] %}
"url": "{{ record['author_url'] }}"
{% endif %}
},

{# Author #}
{% if record['author_name'] or record['author_email'] or record['author_url'] %}
{%- set author = {} %}
{% if record['author_name'] %}
{%- set _ = author.update({'individualname': record['author_name']}) %}
{% endif %}
{% if record['author_email'] %}
{%- set _ = author.update({'email': record['author_email']}) %}
{% endif %}
{% if record['author_url'] %}
{%- set _ = author.update({'url': record['author_url']}) %}
{% endif %}
{%- set _ = contact_fields.append(('author', author)) %}
{% endif %}
"pointOfContact": {
"organization": "{{ record['contact_name'] }}",
"email": "{{ record['contact_email'] }}",
"url": "{{ record['contact_url'] }}"
}

{# Point of Contact (if always to be included) #}
{%- set pointOfContact = {
"organization": record['contact_name'],
"email": record['contact_email'],
"url": record['contact_url']
} %}
{%- set _ = contact_fields.append(('pointOfContact', pointOfContact)) %}

{# Render all contact fields with appropriate commas #}
{%- for field in contact_fields %}
"{{ field[0] }}": {{ render_object(field[1].items()) }}{% if not loop.last %},{% endif %}
{%- endfor %}
},
"distribution": {
{% for resource in record['resources'] %}
Expand Down
Loading