Skip to content

Commit

Permalink
Merge pull request #74 from opendata-swiss/feat/harvest-and-export-do…
Browse files Browse the repository at this point in the history
…cumentation-field

feat: Harvest dataset field 'documentation'
  • Loading branch information
bellisk authored Sep 18, 2023
2 parents be003c4 + f9114be commit 82ada5d
Show file tree
Hide file tree
Showing 5 changed files with 31 additions and 7 deletions.
14 changes: 12 additions & 2 deletions ckanext/dcatapchharvest/profiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -487,6 +487,11 @@ def parse_dataset(self, dataset_dict, dataset_ref): # noqa
dataset_uri = dh.dataset_uri(dataset_dict, dataset_ref)
dataset_dict['extras'].append({'key': 'uri', 'value': dataset_uri})

# Documentation
dataset_dict['documentation'] = self._object_value_list(
dataset_ref, FOAF.page
)

# Resources
for distribution in self._distributions(dataset_ref):
resource_dict = {
Expand Down Expand Up @@ -645,7 +650,6 @@ def graph_from_dataset(self, dataset_dict, dataset_ref): # noqa
('theme', DCAT.theme, None, URIRef),
('conforms_to', DCT.conformsTo, None, Literal),
('alternate_identifier', ADMS.identifier, None, Literal),
('documentation', FOAF.page, None, Literal),
('has_version', DCT.hasVersion, None, Literal),
('is_version_of', DCT.isVersionOf, None, Literal),
('source', DCT.source, None, Literal),
Expand Down Expand Up @@ -725,6 +729,13 @@ def graph_from_dataset(self, dataset_dict, dataset_ref): # noqa
)
g.add((dataset_ref, DCT.temporal, temporal_extent))

# Documentation
documentation = dataset_dict.get('documentation', [])
for link in documentation:
doc = URIRef(link)
g.add((doc, RDF.type, FOAF.Document))
g.add((dataset_ref, FOAF.page, doc))

# Themes
groups = self._get_dataset_value(dataset_dict, 'groups', [])
for group_name in groups:
Expand Down Expand Up @@ -1066,7 +1077,6 @@ def graph_from_dataset(self, dataset_dict, dataset_ref):

# Lists
items = [
("documentation", FOAF.page, None, Literal),
("language", DCT.language, None, Literal),
("conforms_to", DCT.conformsTo, None, Literal),
]
Expand Down
6 changes: 6 additions & 0 deletions ckanext/dcatapchharvest/tests/fixtures/1901.xml
Original file line number Diff line number Diff line change
Expand Up @@ -55,5 +55,11 @@
<dcat:keyword xml:lang="it">basi-statistiche-e-presentazioni-generali</dcat:keyword>
<dct:title xml:lang="fr">Annuaire statistique de la Suisse 1901</dct:title>
<dcat:keyword xml:lang="de">statistische-grundlagen-und-ubersichten</dcat:keyword>
<foaf:page>
<foaf:Document rdf:about="https://example.com/documentation-dataset-1"/>
</foaf:page>
<foaf:page>
<foaf:Document rdf:about="https://example.com/documentation-dataset-2"/>
</foaf:page>
</dcat:Dataset>
</rdf:RDF>
8 changes: 4 additions & 4 deletions ckanext/dcatapchharvest/tests/fixtures/dataset.json
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,10 @@
"en": "English Description",
"it": ""
},
"documentation": [
"http://dataset.info.org/doc1",
"http://dataset.info.org/doc2"
],
"extras": [
{
"key": "alternate_identifier",
Expand Down Expand Up @@ -126,10 +130,6 @@
"key": "access_rights",
"value": "public"
},
{
"key": "documentation",
"value": "[\"http://dataset.info.org/doc1\", \"http://dataset.info.org/doc2\"]"
},
{
"key": "provenance",
"value": "Some statement about provenance"
Expand Down
4 changes: 4 additions & 0 deletions ckanext/dcatapchharvest/tests/test_dcatap_ch_parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,10 @@ def test_dataset_all_fields(self):
# Lists
eq_(sorted(dataset['language']), [u'de', u'fr'])
eq_(sorted(dataset['groups']), [{'name': u'statistical-basis'}])
eq_(
sorted(dataset['documentation']),
['https://example.com/documentation-dataset-1', 'https://example.com/documentation-dataset-2']
)

# Dataset URI
eq_(extras['uri'], u'https://opendata.swiss/dataset/7451e012-64b2-4bbc-af20-a0e2bc61b585')
Expand Down
6 changes: 5 additions & 1 deletion ckanext/dcatapchharvest/tests/test_dcatap_ch_serialize.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,14 @@ def test_graph_from_dataset(self):
for keyword in keywords:
assert self._triple(g, dataset_ref, DCAT.keyword, Literal(keyword, lang=key))

# Documentation
eq_(len([t for t in g.triples((dataset_ref, FOAF.page, None))]), 2)
for documentation_link in dataset['documentation']:
assert self._triple(g, dataset_ref, FOAF.page, URIRef(documentation_link))

# List
for item in [
('language', DCT.language, Literal),
# ('documentation', FOAF.page, URIRef, FOAF.Document),
]:
values = json.loads(extras[item[0]])
eq_(len([t for t in g.triples((dataset_ref, item[1], None))]), len(values))
Expand Down

0 comments on commit 82ada5d

Please sign in to comment.