diff --git a/ckanext/dcatapchharvest/profiles.py b/ckanext/dcatapchharvest/profiles.py index 5a62e41..c3f29d0 100644 --- a/ckanext/dcatapchharvest/profiles.py +++ b/ckanext/dcatapchharvest/profiles.py @@ -796,7 +796,6 @@ def graph_from_dataset(self, dataset_dict, dataset_ref): # noqa ('status', ADMS.status, None, Literal), ('coverage', DCT.coverage, None, Literal), ('identifier', DCT.identifier, None, Literal), - ('media_type', DCAT.mediaType, None, Literal), ('spatial', DCT.spatial, None, Literal), ] @@ -892,7 +891,8 @@ def graph_from_dataset(self, dataset_dict, dataset_ref): # noqa g.add((doc, RDF.type, FOAF.Document)) g.add((distribution, FOAF.page, doc)) - # Format + # Format and Media Type Case 1: + # Format: Set Format value if format matches EU vocabulary format_uri = None if resource_dict.get('format'): for key, value in valid_formats.items(): @@ -903,8 +903,20 @@ def graph_from_dataset(self, dataset_dict, dataset_ref): # noqa DCT['format'], format_uri )) + # Media Type: Set Format value if format matches EU vocabulary + # and media type is not set + if format_uri and resource_dict.get('media_type') is None: + g.add(( + distribution, + DCT['media_type'], + format_uri + )) - # Set Media Type value if format does not match + # Format and Media Type Case 2: + # Set Media Type and Formar value + # if format does not match eu vocabulary + # but media type matches iana vocabulary + media_type_uri = None if format_uri is None and resource_dict.get('media_type'): for key, value in valid_media_types.items(): if resource_dict.get('media_type') == key: @@ -914,6 +926,31 @@ def graph_from_dataset(self, dataset_dict, dataset_ref): # noqa DCT['format'], media_type_uri )) + g.add(( + distribution, + DCT['media_type'], + media_type_uri + )) + + # Format and Media Type Case 3: + # Set Media Type and Format value + # if format does not match eu vocabulary + # but format matches iana vocabulary + if format_uri is None and media_type_uri is None: + if resource_dict.get('format'): + for key, value in valid_media_types.items(): + if resource_dict.get('format') == key: + media_type_uri_by_format = URIRef(value) + g.add(( + distribution, + DCT['format'], + media_type_uri_by_format + )) + g.add(( + distribution, + DCT['media_type'], + media_type_uri_by_format + )) # Mime-Type if resource_dict.get('mimetype'):