Skip to content

Commit

Permalink
Merge pull request #78 from opendata-swiss/feat/harvest-and-export-re…
Browse files Browse the repository at this point in the history
…source-documentation-field

feat: Harvest and export distribution field 'documentation'
  • Loading branch information
bellisk authored Sep 19, 2023
2 parents 82ada5d + dace014 commit 8944ced
Show file tree
Hide file tree
Showing 5 changed files with 44 additions and 3 deletions.
13 changes: 12 additions & 1 deletion ckanext/dcatapchharvest/profiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -522,6 +522,11 @@ def parse_dataset(self, dataset_dict, dataset_ref): # noqa
resource_dict.get('format')):
resource_dict['media_type'] = resource_dict['format']

# Documentation
resource_dict['documentation'] = self._object_value_list(
distribution, FOAF.page
)

# Timestamp fields
for key, predicate in (
('issued', DCT.issued),
Expand Down Expand Up @@ -778,7 +783,6 @@ def graph_from_dataset(self, dataset_dict, dataset_ref): # noqa

# Lists
items = [
('documentation', FOAF.page, None, Literal),
('language', DCT.language, None, Literal),
('conforms_to', DCT.conformsTo, None, Literal),
]
Expand Down Expand Up @@ -810,6 +814,13 @@ def graph_from_dataset(self, dataset_dict, dataset_ref): # noqa
elif download_url:
g.add((distribution, DCAT.accessURL, URIRef(download_url)))

# Documentation
documentation = resource_dict.get('documentation', [])
for link in documentation:
doc = URIRef(link)
g.add((doc, RDF.type, FOAF.Document))
g.add((distribution, FOAF.page, doc))

# Format
if resource_dict.get('format'):
g.add((
Expand Down
6 changes: 6 additions & 0 deletions ckanext/dcatapchharvest/tests/fixtures/1901.xml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,12 @@
<dct:identifier>346265-fr@bundesamt-fur-statistik-bfs</dct:identifier>
<dct:format>HTML</dct:format>
<dct:rights>NonCommercialAllowed-CommercialWithPermission-ReferenceRequired</dct:rights>
<foaf:page>
<foaf:Document rdf:about="https://example.com/documentation-distribution-1"/>
</foaf:page>
<foaf:page>
<foaf:Document rdf:about="https://example.com/documentation-distribution-2"/>
</foaf:page>
</dcat:Distribution>
</dcat:distribution>
<dct:language>de</dct:language>
Expand Down
14 changes: 12 additions & 2 deletions ckanext/dcatapchharvest/tests/fixtures/dataset.json
Original file line number Diff line number Diff line change
Expand Up @@ -94,8 +94,18 @@
"it": ""
},
"documentation": [
"http://dataset.info.org/doc1",
"http://dataset.info.org/doc2"
"https://example.com/documentation-dataset-1",
"https://example.com/documentation-dataset-2"
],
"resources": [
{
"id": "e2c50e70-67ad-4f86-bb1b-3f93867eadaa",
"package_id": "4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6",
"documentation": [
"https://example.com/documentation-resource-1",
"https://example.com/documentation-resource-2"
]
}
],
"extras": [
{
Expand Down
6 changes: 6 additions & 0 deletions ckanext/dcatapchharvest/tests/test_dcatap_ch_parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,12 @@ def test_dataset_all_fields(self):
eq_(resource['url'], u'https://www.bfs.admin.ch/asset/fr/hs-b-00.01-jb-1901')
assert 'download_url' not in resource, "download_url not available on resource"

# Lists
eq_(
sorted(resource['documentation']),
['https://example.com/documentation-distribution-1', 'https://example.com/documentation-distribution-2']
)

# Distribution URI
eq_(resource['uri'], u'https://opendata.swiss/dataset/7451e012-64b2-4bbc-af20-a0e2bc61b585/resource/c8ec6ca0-6923-4cf3-92f2-95a10e6f8e25')

Expand Down
8 changes: 8 additions & 0 deletions ckanext/dcatapchharvest/tests/test_dcatap_ch_serialize.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,14 @@ def test_graph_from_dataset(self):
for value in values:
assert self._triple(g, dataset_ref, item[1], item[2](value))

# Resources
eq_(len([t for t in g.triples((dataset_ref, DCAT.distribution, None))]), len(dataset["resources"]))
for resource_dict in dataset.get("resources", []):
distribution = URIRef(dh.resource_uri(resource_dict))
assert self._triple(g, distribution, RDF.type, DCAT.Distribution)
for link in resource_dict.get("documentation", []):
assert self._triple(g, distribution, FOAF.page, URIRef(link))

def test_graph_from_dataset_uri(self):
"""Tests that datasets (resources) with a uri from the test system
have that uri changed to reference the prod system when they are output
Expand Down

0 comments on commit 8944ced

Please sign in to comment.