From 81a86a00eaeda85073d7d883a98fdb9745085035 Mon Sep 17 00:00:00 2001 From: Rae Knowler Date: Wed, 13 Sep 2023 09:18:50 +0200 Subject: [PATCH 1/5] feat: Harvest dataset field 'documentation' --- ckanext/dcatapchharvest/profiles.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ckanext/dcatapchharvest/profiles.py b/ckanext/dcatapchharvest/profiles.py index edc1064..c8940c3 100644 --- a/ckanext/dcatapchharvest/profiles.py +++ b/ckanext/dcatapchharvest/profiles.py @@ -487,6 +487,11 @@ def parse_dataset(self, dataset_dict, dataset_ref): # noqa dataset_uri = dh.dataset_uri(dataset_dict, dataset_ref) dataset_dict['extras'].append({'key': 'uri', 'value': dataset_uri}) + # Documentation + dataset_dict['documentation'] = self._object_value_list( + dataset_ref, FOAF.page + ) + # Resources for distribution in self._distributions(dataset_ref): resource_dict = { From cda611f3c77492dbea602aa2b755f1e8c48b345c Mon Sep 17 00:00:00 2001 From: Rae Knowler Date: Wed, 13 Sep 2023 13:57:24 +0200 Subject: [PATCH 2/5] feat: Map DCAT-AP CH v2 documentation property --- ckanext/dcatapchharvest/profiles.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/ckanext/dcatapchharvest/profiles.py b/ckanext/dcatapchharvest/profiles.py index c8940c3..ef1db55 100644 --- a/ckanext/dcatapchharvest/profiles.py +++ b/ckanext/dcatapchharvest/profiles.py @@ -650,7 +650,6 @@ def graph_from_dataset(self, dataset_dict, dataset_ref): # noqa ('theme', DCAT.theme, None, URIRef), ('conforms_to', DCT.conformsTo, None, Literal), ('alternate_identifier', ADMS.identifier, None, Literal), - ('documentation', FOAF.page, None, Literal), ('has_version', DCT.hasVersion, None, Literal), ('is_version_of', DCT.isVersionOf, None, Literal), ('source', DCT.source, None, Literal), @@ -730,6 +729,13 @@ def graph_from_dataset(self, dataset_dict, dataset_ref): # noqa ) g.add((dataset_ref, DCT.temporal, temporal_extent)) + # Documentation + documentation = dataset_dict.get('documentation', []) + for link in documentation: + doc = URIRef(link) + g.add((doc, RDF.type, FOAF.Document)) + g.add((dataset_ref, FOAF.page, doc)) + # Themes groups = self._get_dataset_value(dataset_dict, 'groups', []) for group_name in groups: @@ -1071,7 +1077,6 @@ def graph_from_dataset(self, dataset_dict, dataset_ref): # Lists items = [ - ("documentation", FOAF.page, None, Literal), ("language", DCT.language, None, Literal), ("conforms_to", DCT.conformsTo, None, Literal), ] From 4e1dcb0fec8145b03fe0527696bf113add1bd5df Mon Sep 17 00:00:00 2001 From: Rae Knowler Date: Wed, 13 Sep 2023 14:02:15 +0200 Subject: [PATCH 3/5] tests: Test dataset documentation mapping --- ckanext/dcatapchharvest/tests/fixtures/1901.xml | 6 ++++++ ckanext/dcatapchharvest/tests/test_dcatap_ch_parse.py | 4 ++++ 2 files changed, 10 insertions(+) diff --git a/ckanext/dcatapchharvest/tests/fixtures/1901.xml b/ckanext/dcatapchharvest/tests/fixtures/1901.xml index d554c28..bc573b8 100644 --- a/ckanext/dcatapchharvest/tests/fixtures/1901.xml +++ b/ckanext/dcatapchharvest/tests/fixtures/1901.xml @@ -55,5 +55,11 @@ basi-statistiche-e-presentazioni-generali Annuaire statistique de la Suisse 1901 statistische-grundlagen-und-ubersichten + + + + + + diff --git a/ckanext/dcatapchharvest/tests/test_dcatap_ch_parse.py b/ckanext/dcatapchharvest/tests/test_dcatap_ch_parse.py index 5b66c2e..0db6ac6 100644 --- a/ckanext/dcatapchharvest/tests/test_dcatap_ch_parse.py +++ b/ckanext/dcatapchharvest/tests/test_dcatap_ch_parse.py @@ -86,6 +86,10 @@ def test_dataset_all_fields(self): # Lists eq_(sorted(dataset['language']), [u'de', u'fr']) eq_(sorted(dataset['groups']), [{'name': u'statistical-basis'}]) + eq_( + sorted(dataset['documentation']), + ['https://example.com/documentation-dataset-1', 'https://example.com/documentation-dataset-2'] + ) # Dataset URI eq_(extras['uri'], u'https://opendata.swiss/dataset/7451e012-64b2-4bbc-af20-a0e2bc61b585') From 0347e5e61c7440017e202fd8fb7c3f59371447c4 Mon Sep 17 00:00:00 2001 From: Rae Knowler Date: Wed, 13 Sep 2023 17:34:37 +0200 Subject: [PATCH 4/5] tests: Update fixture --- ckanext/dcatapchharvest/tests/fixtures/dataset.json | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ckanext/dcatapchharvest/tests/fixtures/dataset.json b/ckanext/dcatapchharvest/tests/fixtures/dataset.json index 3391fde..665e46a 100644 --- a/ckanext/dcatapchharvest/tests/fixtures/dataset.json +++ b/ckanext/dcatapchharvest/tests/fixtures/dataset.json @@ -93,6 +93,10 @@ "en": "English Description", "it": "" }, + "documentation": [ + "http://dataset.info.org/doc1", + "http://dataset.info.org/doc2" + ], "extras": [ { "key": "alternate_identifier", @@ -126,10 +130,6 @@ "key": "access_rights", "value": "public" }, - { - "key": "documentation", - "value": "[\"http://dataset.info.org/doc1\", \"http://dataset.info.org/doc2\"]" - }, { "key": "provenance", "value": "Some statement about provenance" From f9114be1772d675009e86ed8934864783209cf0d Mon Sep 17 00:00:00 2001 From: Rae Knowler Date: Wed, 13 Sep 2023 17:35:13 +0200 Subject: [PATCH 5/5] test: Test serialization of dataset document field --- ckanext/dcatapchharvest/tests/test_dcatap_ch_serialize.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/ckanext/dcatapchharvest/tests/test_dcatap_ch_serialize.py b/ckanext/dcatapchharvest/tests/test_dcatap_ch_serialize.py index ea7b0fe..30ee88a 100644 --- a/ckanext/dcatapchharvest/tests/test_dcatap_ch_serialize.py +++ b/ckanext/dcatapchharvest/tests/test_dcatap_ch_serialize.py @@ -56,10 +56,14 @@ def test_graph_from_dataset(self): for keyword in keywords: assert self._triple(g, dataset_ref, DCAT.keyword, Literal(keyword, lang=key)) + # Documentation + eq_(len([t for t in g.triples((dataset_ref, FOAF.page, None))]), 2) + for documentation_link in dataset['documentation']: + assert self._triple(g, dataset_ref, FOAF.page, URIRef(documentation_link)) + # List for item in [ ('language', DCT.language, Literal), - # ('documentation', FOAF.page, URIRef, FOAF.Document), ]: values = json.loads(extras[item[0]]) eq_(len([t for t in g.triples((dataset_ref, item[1], None))]), len(values))