From dcd66edde00ad8b1fe7d5f447586a81d674fc7f5 Mon Sep 17 00:00:00 2001 From: Rae Knowler Date: Mon, 2 Oct 2023 16:43:03 +0200 Subject: [PATCH 1/3] feat: Map qualified_relations onto dataset --- ckanext/dcatapchharvest/profiles.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/ckanext/dcatapchharvest/profiles.py b/ckanext/dcatapchharvest/profiles.py index 67d2ef7..5f36e9f 100644 --- a/ckanext/dcatapchharvest/profiles.py +++ b/ckanext/dcatapchharvest/profiles.py @@ -215,6 +215,17 @@ def _relations(self, subject, predicate): return relations + def _qualified_relations(self, subject): + qualified_relations = [] + + for relation_node in self.g.objects(subject, DCAT.qualifiedRelation): + qualified_relations.append({ + "relation": self._object_value(relation_node, DCT.relation), + "role": self._object_value(relation_node, DCAT.hadRole), + }) + + return qualified_relations + def _license_rights_name(self, subject, predicate): for node in self.g.objects(subject, predicate): # DCAT-AP CH v1: the license as a literal (should be @@ -407,6 +418,7 @@ def parse_dataset(self, dataset_dict, dataset_ref): # noqa dataset_dict['resources'] = [] dataset_dict['relations'] = [] dataset_dict['see_alsos'] = [] + dataset_dict['qualified_relations'] = [] # Basic fields for key, predicate in ( @@ -496,6 +508,10 @@ def parse_dataset(self, dataset_dict, dataset_ref): # noqa for see_also in see_alsos: dataset_dict['see_alsos'].append({'dataset_identifier': see_also}) + dataset_dict["qualified_relations"] = self._qualified_relations( + dataset_ref + ) + # Dataset URI dataset_uri = dh.dataset_uri(dataset_dict, dataset_ref) dataset_dict['extras'].append({'key': 'uri', 'value': dataset_uri}) From 065f7bc468f944d3e9d0598b405a91455c5ece32 Mon Sep 17 00:00:00 2001 From: Rae Knowler Date: Mon, 2 Oct 2023 17:35:13 +0200 Subject: [PATCH 2/3] feat: Export qualified_relations to RDF graph --- ckanext/dcatapchharvest/profiles.py | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/ckanext/dcatapchharvest/profiles.py b/ckanext/dcatapchharvest/profiles.py index 5f36e9f..77cc84e 100644 --- a/ckanext/dcatapchharvest/profiles.py +++ b/ckanext/dcatapchharvest/profiles.py @@ -722,7 +722,7 @@ def graph_from_dataset(self, dataset_dict, dataset_ref): # noqa if dataset_dict.get('see_alsos'): references = dataset_dict.get('see_alsos') for reference in references: - # we only excpect dicts here + # we only expect dicts here if not isinstance(reference, dict): continue reference_identifier = reference.get('dataset_identifier') @@ -733,6 +733,32 @@ def graph_from_dataset(self, dataset_dict, dataset_ref): # noqa Literal(reference_identifier) )) + if dataset_dict.get("qualified_relations"): + for reference in dataset_dict["qualified_relations"]: + if not reference.get("relation"): + continue + + qualified_relation = BNode() + g.add((qualified_relation, RDF.type, DCAT.Relationship)) + g.add(( + qualified_relation, + DCT.relation, + URIRef(reference["relation"]) + )) + + if reference.get("role"): + g.add(( + qualified_relation, + DCAT.hadRole, + URIRef(reference["role"]) + )) + + g.add(( + dataset_ref, + DCAT.qualifiedRelation, + qualified_relation + )) + # Contact details if dataset_dict.get('contact_points'): contact_points = self._get_dataset_value(dataset_dict, 'contact_points') # noqa From 4311987dc687c0fe79ba670183702b9237b8ce6b Mon Sep 17 00:00:00 2001 From: Rae Knowler Date: Tue, 3 Oct 2023 12:13:34 +0200 Subject: [PATCH 3/3] tests: Add test for qualified_relations --- .../dcatapchharvest/tests/fixtures/1901.xml | 12 +++++++++ .../tests/test_dcatap_ch_parse.py | 27 +++++++++++++++---- 2 files changed, 34 insertions(+), 5 deletions(-) diff --git a/ckanext/dcatapchharvest/tests/fixtures/1901.xml b/ckanext/dcatapchharvest/tests/fixtures/1901.xml index 95affb2..63bfa81 100644 --- a/ckanext/dcatapchharvest/tests/fixtures/1901.xml +++ b/ckanext/dcatapchharvest/tests/fixtures/1901.xml @@ -31,6 +31,18 @@ de 4682791@bundesamt-fur-statistik-bfs + + + + + + + + + + + + 1900-12-31T00:00:00 bases-statistiques-et-generalites publikation diff --git a/ckanext/dcatapchharvest/tests/test_dcatap_ch_parse.py b/ckanext/dcatapchharvest/tests/test_dcatap_ch_parse.py index 018dd12..53c5900 100644 --- a/ckanext/dcatapchharvest/tests/test_dcatap_ch_parse.py +++ b/ckanext/dcatapchharvest/tests/test_dcatap_ch_parse.py @@ -18,23 +18,23 @@ class TestSwissDCATAPProfileParsing(BaseParseTest): def test_rights_license(self): - + contents = self._get_file_contents('dataset-rights.xml') p = RDFParser(profiles=['swiss_dcat_ap']) p.parse(contents) - + datasets = [d for d in p.datasets()] - + # Dataset eq_(len(datasets), 1) dataset = datasets[0] - + # Resources eq_(len(dataset['resources']), 1) resource = dataset['resources'][0] eq_(resource['rights'], u'NonCommercialAllowed-CommercialAllowed-ReferenceRequired') eq_(resource['license'], u'NonCommercialAllowed-CommercialWithPermission-ReferenceRequired') - + def test_dataset_all_fields(self): contents = self._get_file_contents('1901.xml') @@ -101,6 +101,23 @@ def test_dataset_all_fields(self): see_also = dataset['see_alsos'][0] eq_(see_also['dataset_identifier'], u'4682791@bundesamt-fur-statistik-bfs') + # Qualified relations + qualified_relations = sorted(dataset["qualified_relations"]) + eq_( + qualified_relations[0], + { + "relation": "http://example.org/Original987", + "role": "http://www.iana.org/assignments/relation/original" + } + ) + eq_( + qualified_relations[1], + { + "relation": "http://example.org/Related486", + "role": "http://www.iana.org/assignments/relation/related" + } + ) + # Lists eq_(sorted(dataset['language']), [u'de', u'fr']) eq_(sorted(dataset['groups']), [{'name': u'statistical-basis'}])