From 12c9d8b26b7b162acefb0a17b6bbfffe1328c0c7 Mon Sep 17 00:00:00 2001 From: Sarah Hilse Date: Fri, 29 Sep 2023 14:52:40 +0200 Subject: [PATCH] feat: hardening format/media type mapping for rdf graph --- ckanext/dcatapchharvest/dcat_helpers.py | 4 ++++ ckanext/dcatapchharvest/profiles.py | 3 ++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/ckanext/dcatapchharvest/dcat_helpers.py b/ckanext/dcatapchharvest/dcat_helpers.py index c01aab0..3e96650 100644 --- a/ckanext/dcatapchharvest/dcat_helpers.py +++ b/ckanext/dcatapchharvest/dcat_helpers.py @@ -252,6 +252,10 @@ def get_iana_media_type_values(): records = root.findall('.//ns:record', media_types_namespaces) media_type_values = {} for record in records: + if record.find('ns:file', media_types_namespaces) is None: + continue + if record.find('ns:name', media_types_namespaces) is None: + continue name = record.find('ns:name', media_types_namespaces).text file_value = record.find('ns:file', media_types_namespaces).text media_type_values[name] = media_types_namespaces['ns']+'/'+file_value diff --git a/ckanext/dcatapchharvest/profiles.py b/ckanext/dcatapchharvest/profiles.py index f717067..6a5840c 100644 --- a/ckanext/dcatapchharvest/profiles.py +++ b/ckanext/dcatapchharvest/profiles.py @@ -893,6 +893,7 @@ def graph_from_dataset(self, dataset_dict, dataset_ref): # noqa g.add((distribution, FOAF.page, doc)) # Format + format_uri = None if resource_dict.get('format'): for key, value in valid_formats.items(): if resource_dict.get('format') == key: @@ -904,7 +905,7 @@ def graph_from_dataset(self, dataset_dict, dataset_ref): # noqa )) # Set Media Type value if format does not match - if not format_uri and resource_dict.get('media_type'): + if format_uri is None and resource_dict.get('media_type'): for key, value in valid_media_types.items(): if resource_dict.get('media_type') == key: media_type_uri = URIRef(value)