Skip to content

Commit

Permalink
Merge pull request #102 from opendata-swiss/feat/improve-format-mapping
Browse files Browse the repository at this point in the history
Feat/improve format mapping
  • Loading branch information
bellisk authored Apr 8, 2024
2 parents cd90fec + 5e3d1a9 commit 21747d5
Show file tree
Hide file tree
Showing 9 changed files with 504 additions and 131 deletions.
47 changes: 38 additions & 9 deletions ckanext/dcatapchharvest/dcat_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,15 +237,38 @@ def get_pagination(catalog_graph):


def get_format_values():
"""Generate a dict that maps our standardised formats (the keys in this file
https://github.com/opendata-swiss/ckanext-switzerland-ng/blob/master/ckanext/switzerland/helpers/format_mapping.yaml)
to the URIs in the EU file-type vocabulary
(http://publications.europa.eu/resource/authority/file-type).
The standardised formats are converted to lowercase, with non-letter
characters replaced by '_', for use as keys in this dict.
"""
g = Graph()
for prefix, namespace in format_namespaces.items():
g.bind(prefix, namespace)
file = os.path.join(__location__, 'formats.xml')
g.parse(file, format='xml')
formats_file = os.path.join(__location__, 'formats.xml')
g.parse(formats_file, format='xml')
format_values = {}
for format_uri_ref in g.subjects():
format_extension = format_uri_ref.split('/')[-1].lower()
format_values[format_extension] = format_uri_ref

# Add special cases that aren't so easy to map.
format_values.update(
{
"api": "http://publications.europa.eu/resource/authority/file-type/REST", # noqa
"esri_ascii_grid": "http://publications.europa.eu/resource/authority/file-type/GRID_ASCII", # noqa
"sparql": "http://publications.europa.eu/resource/authority/file-type/SPARQLQ", # noqa
"wcs": "http://publications.europa.eu/resource/authority/file-type/WCS_SRVC", # noqa
"wfs": "http://publications.europa.eu/resource/authority/file-type/WFS_SRVC", # noqa
"wms": "http://publications.europa.eu/resource/authority/file-type/WMS_SRVC", # noqa
"wmts": "http://publications.europa.eu/resource/authority/file-type/WMTS_SRVC", # noqa
"worldfile": "http://publications.europa.eu/resource/authority/file-type/WORLD" # noqa
}
)

return format_values


Expand All @@ -267,12 +290,18 @@ def get_iana_media_type_values():
registry_type = registry.get('id')
records = registry.findall('.//ns:record', media_types_namespaces)
for record in records:
if record.find('ns:file', media_types_namespaces) is None:
continue
if record.find('ns:name', media_types_namespaces) is None:
continue
name = record.find('ns:name', media_types_namespaces).text.lower()
file_value = record.find('ns:file', media_types_namespaces).text
name = record.find(
'ns:name', media_types_namespaces
).text.lower()

if record.find('ns:file', media_types_namespaces) is not None:
uri_suffix = record.find(
'ns:file', media_types_namespaces
).text
else:
uri_suffix = registry_type + '/' + name

media_type_values[registry_type + '/' + name] = \
media_types_namespaces['ns'] + '/' + file_value
media_types_namespaces['ns'] + '/media-types/' + uri_suffix

return media_type_values
29 changes: 23 additions & 6 deletions ckanext/dcatapchharvest/formats.xml
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
<rdf:RDF
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"
xmlns:owl="http://www.w3.org/2002/07/owl#"
xmlns:skos="http://www.w3.org/2004/02/skos/core#"
Expand Down Expand Up @@ -35,15 +36,15 @@
<rdfs:label xml:lang="en">File type</rdfs:label>
<owl:imports rdf:resource="http://publications.europa.eu/ontology/euvoc"/>
<rdfs:comment rdf:datatype="http://www.w3.org/2000/01/rdf-schema#Literal">File type</rdfs:comment>
<owl:versionInfo>20230614-0</owl:versionInfo>
<owl:versionInfo>20240313-0</owl:versionInfo>
<skos:prefLabel xml:lang="en">File type</skos:prefLabel>
<dcterms:title xml:lang="en">File type</dcterms:title>
<dcterms:identifier>http://publications.europa.eu/resource/authority/file-type</dcterms:identifier>
<ns5:prefLabel rdf:nodeID="b152773119"/>
<ns5:prefLabel rdf:nodeID="b172825506"/>
<ns6:prefLabel xml:lang="en">File type</ns6:prefLabel>
<ns6:table.id>file-type</ns6:table.id>
<ns6:table.version.number>20230614-0</ns6:table.version.number>
<owl:versionIRI rdf:resource="http://publications.europa.eu/resource/authority/file-type/20230614-0"/>
<ns6:table.version.number>20240313-0</ns6:table.version.number>
<owl:versionIRI rdf:resource="http://publications.europa.eu/resource/authority/file-type/20240313-0"/>
</rdf:Description>
<rdf:Description rdf:about="http://publications.europa.eu/resource/authority/file-type/ARC">
<skos:inScheme rdf:resource="http://publications.europa.eu/resource/authority/file-type"/>
Expand Down Expand Up @@ -721,6 +722,10 @@
<skos:inScheme rdf:resource="http://publications.europa.eu/resource/authority/file-type"/>
<skos:topConceptOf rdf:resource="http://publications.europa.eu/resource/authority/file-type"/>
</rdf:Description>
<rdf:Description rdf:about="http://publications.europa.eu/resource/authority/file-type/GPX">
<skos:inScheme rdf:resource="http://publications.europa.eu/resource/authority/file-type"/>
<skos:topConceptOf rdf:resource="http://publications.europa.eu/resource/authority/file-type"/>
</rdf:Description>
<rdf:Description rdf:about="http://publications.europa.eu/resource/authority/file-type/ETSI_XML">
<skos:inScheme rdf:resource="http://publications.europa.eu/resource/authority/file-type"/>
<skos:topConceptOf rdf:resource="http://publications.europa.eu/resource/authority/file-type"/>
Expand Down Expand Up @@ -753,6 +758,10 @@
<skos:inScheme rdf:resource="http://publications.europa.eu/resource/authority/file-type"/>
<skos:topConceptOf rdf:resource="http://publications.europa.eu/resource/authority/file-type"/>
</rdf:Description>
<rdf:Description rdf:about="http://publications.europa.eu/resource/authority/file-type/ETSI_TSL">
<skos:inScheme rdf:resource="http://publications.europa.eu/resource/authority/file-type"/>
<skos:topConceptOf rdf:resource="http://publications.europa.eu/resource/authority/file-type"/>
</rdf:Description>
<rdf:Description rdf:about="http://publications.europa.eu/resource/authority/file-type/GEOTIFF">
<skos:inScheme rdf:resource="http://publications.europa.eu/resource/authority/file-type"/>
<skos:topConceptOf rdf:resource="http://publications.europa.eu/resource/authority/file-type"/>
Expand Down Expand Up @@ -833,4 +842,12 @@
<skos:inScheme rdf:resource="http://publications.europa.eu/resource/authority/file-type"/>
<skos:topConceptOf rdf:resource="http://publications.europa.eu/resource/authority/file-type"/>
</rdf:Description>
</rdf:RDF>
<rdf:Description rdf:about="http://publications.europa.eu/resource/authority/file-type/DWCA">
<skos:inScheme rdf:resource="http://publications.europa.eu/resource/authority/file-type"/>
<skos:topConceptOf rdf:resource="http://publications.europa.eu/resource/authority/file-type"/>
</rdf:Description>
<rdf:Description rdf:about="http://publications.europa.eu/resource/authority/file-type/MATHML">
<skos:inScheme rdf:resource="http://publications.europa.eu/resource/authority/file-type"/>
<skos:topConceptOf rdf:resource="http://publications.europa.eu/resource/authority/file-type"/>
</rdf:Description>
</rdf:RDF>
Loading

0 comments on commit 21747d5

Please sign in to comment.