From 8a45d7517bcc43c2fadf016accfc741da8b98165 Mon Sep 17 00:00:00 2001 From: Rae Knowler Date: Wed, 13 Sep 2023 14:02:34 +0200 Subject: [PATCH 01/11] style: Clean up unneeded import --- ckanext/dcatapchharvest/tests/test_swiss_dcatap_profile_parse.py | 1 - 1 file changed, 1 deletion(-) diff --git a/ckanext/dcatapchharvest/tests/test_swiss_dcatap_profile_parse.py b/ckanext/dcatapchharvest/tests/test_swiss_dcatap_profile_parse.py index 67ec814..e685156 100644 --- a/ckanext/dcatapchharvest/tests/test_swiss_dcatap_profile_parse.py +++ b/ckanext/dcatapchharvest/tests/test_swiss_dcatap_profile_parse.py @@ -2,7 +2,6 @@ import os import json -from datetime import datetime import nose From 5e6de62bc4d67bd9e51642b20b9e550391fc19fb Mon Sep 17 00:00:00 2001 From: Rae Knowler Date: Wed, 13 Sep 2023 14:37:16 +0200 Subject: [PATCH 02/11] tests: Clean up base test classes --- .../dcatapchharvest/tests/base_test_classes.py | 16 ++++++++++++++++ .../tests/test_parse_dcatap_conformant_rdf.py | 12 +----------- .../tests/test_parse_deprecated_rdf.py | 10 +--------- .../tests/test_swiss_dcatap_profile_parse.py | 18 +----------------- 4 files changed, 19 insertions(+), 37 deletions(-) create mode 100644 ckanext/dcatapchharvest/tests/base_test_classes.py diff --git a/ckanext/dcatapchharvest/tests/base_test_classes.py b/ckanext/dcatapchharvest/tests/base_test_classes.py new file mode 100644 index 0000000..5db1b20 --- /dev/null +++ b/ckanext/dcatapchharvest/tests/base_test_classes.py @@ -0,0 +1,16 @@ +import os + + +class BaseParseTest(object): + def _extras(self, dataset): + extras = {} + for extra in dataset.get('extras'): + extras[extra['key']] = extra['value'] + return extras + + def _get_file_contents(self, file_name): + path = os.path.join(os.path.dirname(__file__), + 'fixtures', + file_name) + with open(path, 'r') as f: + return f.read() diff --git a/ckanext/dcatapchharvest/tests/test_parse_dcatap_conformant_rdf.py b/ckanext/dcatapchharvest/tests/test_parse_dcatap_conformant_rdf.py index 71d58d5..9bde474 100644 --- a/ckanext/dcatapchharvest/tests/test_parse_dcatap_conformant_rdf.py +++ b/ckanext/dcatapchharvest/tests/test_parse_dcatap_conformant_rdf.py @@ -1,23 +1,13 @@ # -*- coding: utf-8 -*- - -import os import nose import json from ckanext.dcat.processors import RDFParser +from ckanext.dcatapchharvest.tests.base_test_classes import BaseParseTest eq_ = nose.tools.eq_ assert_true = nose.tools.assert_true -class BaseParseTest(object): - def _get_file_contents(self, file_name): - path = os.path.join(os.path.dirname(__file__), - 'fixtures', - file_name) - with open(path, 'r') as f: - return f.read() - - class ConformantProfileParseTest(BaseParseTest): def test_dcatap_conformant_landing_page_import(self): contents = self._get_file_contents('conformant/dataset-landing-page.xml') diff --git a/ckanext/dcatapchharvest/tests/test_parse_deprecated_rdf.py b/ckanext/dcatapchharvest/tests/test_parse_deprecated_rdf.py index 5239b54..f79c0b1 100644 --- a/ckanext/dcatapchharvest/tests/test_parse_deprecated_rdf.py +++ b/ckanext/dcatapchharvest/tests/test_parse_deprecated_rdf.py @@ -4,20 +4,12 @@ import nose import json from ckanext.dcat.processors import RDFParser +from ckanext.dcatapchharvest.tests.base_test_classes import BaseParseTest eq_ = nose.tools.eq_ assert_true = nose.tools.assert_true -class BaseParseTest(object): - def _get_file_contents(self, file_name): - path = os.path.join(os.path.dirname(__file__), - 'fixtures', - file_name) - with open(path, 'r') as f: - return f.read() - - class DeprecatedProfileParseTest(BaseParseTest): def test_deprecated_landing_page_import(self): contents = self._get_file_contents('deprecated/dataset-landing-page.xml') diff --git a/ckanext/dcatapchharvest/tests/test_swiss_dcatap_profile_parse.py b/ckanext/dcatapchharvest/tests/test_swiss_dcatap_profile_parse.py index e685156..5b66c2e 100644 --- a/ckanext/dcatapchharvest/tests/test_swiss_dcatap_profile_parse.py +++ b/ckanext/dcatapchharvest/tests/test_swiss_dcatap_profile_parse.py @@ -1,6 +1,5 @@ # -*- coding: utf-8 -*- -import os import json import nose @@ -10,27 +9,12 @@ from ckanext.dcat.processors import RDFParser from ckanext.dcatapchharvest.profiles import (DCAT, DCT) +from ckanext.dcatapchharvest.tests.base_test_classes import BaseParseTest eq_ = nose.tools.eq_ assert_true = nose.tools.assert_true -class BaseParseTest(object): - - def _extras(self, dataset): - extras = {} - for extra in dataset.get('extras'): - extras[extra['key']] = extra['value'] - return extras - - def _get_file_contents(self, file_name): - path = os.path.join(os.path.dirname(__file__), - 'fixtures', - file_name) - with open(path, 'r') as f: - return f.read() - - class TestSwissDCATAPProfileParsing(BaseParseTest): def test_dataset_all_fields(self): From 9ee60ad4b629046cbfb8c23bec1283775c1f0c81 Mon Sep 17 00:00:00 2001 From: Rae Knowler Date: Wed, 13 Sep 2023 14:40:19 +0200 Subject: [PATCH 03/11] tests: Copy BaseSerializeTest from ckanext-dcat --- .../tests/base_test_classes.py | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/ckanext/dcatapchharvest/tests/base_test_classes.py b/ckanext/dcatapchharvest/tests/base_test_classes.py index 5db1b20..e46b005 100644 --- a/ckanext/dcatapchharvest/tests/base_test_classes.py +++ b/ckanext/dcatapchharvest/tests/base_test_classes.py @@ -1,5 +1,7 @@ import os +from rdflib import URIRef, BNode, Literal + class BaseParseTest(object): def _extras(self, dataset): @@ -14,3 +16,25 @@ def _get_file_contents(self, file_name): file_name) with open(path, 'r') as f: return f.read() + + +class BaseSerializeTest(object): + def _extras(self, dataset): + extras = {} + for extra in dataset.get('extras'): + extras[extra['key']] = extra['value'] + return extras + + def _triples(self, graph, subject, predicate, _object, data_type=None): + + if not (isinstance(_object, URIRef) or isinstance(_object, BNode) or _object is None): + if data_type: + _object = Literal(_object, datatype=data_type) + else: + _object = Literal(_object) + triples = [t for t in graph.triples((subject, predicate, _object))] + return triples + + def _triple(self, graph, subject, predicate, _object, data_type=None): + triples = self._triples(graph, subject, predicate, _object, data_type) + return triples[0] if triples else None From 31ee69c4aac60d55c8dd8027564d7a11a59b360a Mon Sep 17 00:00:00 2001 From: Rae Knowler Date: Wed, 13 Sep 2023 14:41:54 +0200 Subject: [PATCH 04/11] tests: Make helpers available in BaseSerializeTest too --- ckanext/dcatapchharvest/tests/base_test_classes.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/ckanext/dcatapchharvest/tests/base_test_classes.py b/ckanext/dcatapchharvest/tests/base_test_classes.py index e46b005..d64bbcb 100644 --- a/ckanext/dcatapchharvest/tests/base_test_classes.py +++ b/ckanext/dcatapchharvest/tests/base_test_classes.py @@ -18,12 +18,7 @@ def _get_file_contents(self, file_name): return f.read() -class BaseSerializeTest(object): - def _extras(self, dataset): - extras = {} - for extra in dataset.get('extras'): - extras[extra['key']] = extra['value'] - return extras +class BaseSerializeTest(BaseParseTest): def _triples(self, graph, subject, predicate, _object, data_type=None): From d98ae6ff212a65b83e96f551f90af9f3dbee30d6 Mon Sep 17 00:00:00 2001 From: Rae Knowler Date: Wed, 13 Sep 2023 14:47:23 +0200 Subject: [PATCH 05/11] tests: Use our BaseSerializeTest for swiss_schemaorg test --- .../tests/fixtures/dataset-test-uri.json | 16 ++ .../tests/fixtures/dataset.json | 162 ++++++++++++++++++ .../test_swiss_schemaorg_profile_serialize.py | 147 +--------------- 3 files changed, 185 insertions(+), 140 deletions(-) create mode 100644 ckanext/dcatapchharvest/tests/fixtures/dataset-test-uri.json create mode 100644 ckanext/dcatapchharvest/tests/fixtures/dataset.json diff --git a/ckanext/dcatapchharvest/tests/fixtures/dataset-test-uri.json b/ckanext/dcatapchharvest/tests/fixtures/dataset-test-uri.json new file mode 100644 index 0000000..5151c3b --- /dev/null +++ b/ckanext/dcatapchharvest/tests/fixtures/dataset-test-uri.json @@ -0,0 +1,16 @@ +{ + "id": "4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6", + "name": "test-dataset", + "title": "Test DCAT dataset", + "uri": "https://test.example.com/dataset/foo", + "version": "1.0b", + "metadata_created": "2015-06-26T15:21:09.034694", + "metadata_modified": "2015-06-26T15:21:09.075774", + "resources": [ + { + "uri": "https://test.example.com/dataset/foo/resource/fxx", + "id": "5f2be71f-636c-4d3f-aac1-50830b97f853", + "package_id": "4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6" + } + ] +} diff --git a/ckanext/dcatapchharvest/tests/fixtures/dataset.json b/ckanext/dcatapchharvest/tests/fixtures/dataset.json new file mode 100644 index 0000000..3391fde --- /dev/null +++ b/ckanext/dcatapchharvest/tests/fixtures/dataset.json @@ -0,0 +1,162 @@ +{ + "id": "4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6", + "name": "test-dataset", + "title": "Test DCAT dataset", + "url": "http://example.com/ds1", + "version": "1.0b", + "issued": "2015-06-26T15:21:09.034694", + "keywords": { + "fr": [], + "de": [ + "alter", + "sozialhilfe" + ], + "en": [ + "age" + ], + "it": [] + }, + "groups": [ + { + "display_name": { + "fr": "Economie nationale", + "de": "Volkswirtschaft", + "en": "National economy", + "it": "Economia" + }, + "description": { + "fr": "", + "de": "", + "en": "some descriptiom", + "it": "" + }, + "image_display_url": "", + "title": { + "fr": "Economie nationale", + "de": "Volkswirtschaft", + "en": "National economy", + "it": "Economia" + }, + "id": "5389c3f2-2f64-436b-9fac-2d1fc342f7b5", + "name": "national-economy" + }, + { + "display_name": { + "fr": "Education, science", + "de": "Bildung, Wissenschaft", + "en": "Education and science", + "it": "Formazione e scienza" + }, + "description": { + "fr": "", + "de": "", + "en": "", + "it": "" + }, + "image_display_url": "", + "title": { + "fr": "Education, science", + "de": "Bildung, Wissenschaft", + "en": "Education and science", + "it": "Formazione e scienza" + }, + "id": "afcb4a2a-b4b0-4d7c-984a-9078e964be49", + "name": "education" + }, + { + "display_name": { + "fr": "Finances", + "de": "Finanzen", + "en": "Finances", + "it": "Finanze" + }, + "description": { + "fr": "", + "de": "", + "en": "", + "it": "" + }, + "image_display_url": "", + "title": { + "fr": "Finances", + "de": "Finanzen", + "en": "Finances", + "it": "Finanze" + }, + "id": "79cbe120-e9c6-4249-b934-58ca980606d7", + "name": "finances" + } + ], + "description": { + "fr": "", + "de": "Deutsche Beschreibung", + "en": "English Description", + "it": "" + }, + "extras": [ + { + "key": "alternate_identifier", + "value": "[\"xyz\", \"abc\"]" + }, + { + "key": "identifier", + "value": "26be5452-fc5c-11e7-8450-fea9aa178066" + }, + { + "key": "version_notes", + "value": "This is a beta version" + }, + { + "key": "frequency", + "value": "monthly" + }, + { + "key": "language", + "value": "[\"en\"]" + }, + { + "key": "theme", + "value": "[\"http://eurovoc.europa.eu/100142\", \"http://eurovoc.europa.eu/100152\"]" + }, + { + "key": "conforms_to", + "value": "[\"Standard 1\", \"Standard 2\"]" + }, + { + "key": "access_rights", + "value": "public" + }, + { + "key": "documentation", + "value": "[\"http://dataset.info.org/doc1\", \"http://dataset.info.org/doc2\"]" + }, + { + "key": "provenance", + "value": "Some statement about provenance" + }, + { + "key": "dcat_type", + "value": "test-type" + }, + { + "key": "related_resource", + "value": "[\"http://dataset.info.org/related1\", \"http://dataset.info.org/related2\"]" + }, + { + "key": "has_version", + "value": "[\"https://data.some.org/catalog/datasets/derived-dataset-1\", \"https://data.some.org/catalog/datasets/derived-dataset-2\"]" + }, + { + "key": "is_version_of", + "value": "[\"https://data.some.org/catalog/datasets/original-dataset\"]" + }, + { + "key": "source", + "value": "[\"https://data.some.org/catalog/datasets/source-dataset-1\", \"https://data.some.org/catalog/datasets/source-dataset-2\"]" + }, + { + "key": "sample", + "value": "[\"https://data.some.org/catalog/datasets/9df8df51-63db-37a8-e044-0003ba9b0d98/sample\"]" + } + ] +} diff --git a/ckanext/dcatapchharvest/tests/test_swiss_schemaorg_profile_serialize.py b/ckanext/dcatapchharvest/tests/test_swiss_schemaorg_profile_serialize.py index 744e244..8a04b42 100644 --- a/ckanext/dcatapchharvest/tests/test_swiss_schemaorg_profile_serialize.py +++ b/ckanext/dcatapchharvest/tests/test_swiss_schemaorg_profile_serialize.py @@ -12,7 +12,7 @@ from rdflib import URIRef import ckanext.dcatapchharvest.dcat_helpers as dh -from ckanext.dcat.tests.test_euro_dcatap_profile_serialize import BaseSerializeTest +from ckanext.dcatapchharvest.tests.base_test_classes import BaseSerializeTest eq_ = nose.tools.eq_ assert_true = nose.tools.assert_true @@ -22,131 +22,9 @@ class TestSchemaOrgProfileSerializeDataset(BaseSerializeTest): def test_graph_from_dataset(self): - dataset = { - 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', - 'name': 'test-dataset', - 'title': 'Test DCAT dataset', - 'url': 'http://example.com/ds1', - 'version': '1.0b', - 'issued': '2015-06-26T15:21:09.034694', - 'keywords': - { - 'fr': [], - 'de': [ - 'alter', - 'sozialhilfe' - ], - 'en': [ - 'age' - ], - 'it': [] - }, - 'groups': [ - { - 'display_name': - { - 'fr': 'Economie nationale', - 'de': 'Volkswirtschaft', - 'en': 'National economy', - 'it': 'Economia' - }, - 'description': - { - 'fr': '', - 'de': '', - 'en': 'some descriptiom' - '', - 'it': '' - }, - 'image_display_url': '', - 'title': - { - 'fr': 'Economie nationale', - 'de': 'Volkswirtschaft', - 'en': 'National economy', - 'it': 'Economia' - }, - 'id': '5389c3f2-2f64-436b-9fac-2d1fc342f7b5', - 'name': 'national-economy' - }, - { - 'display_name': - { - 'fr': 'Education, science', - 'de': 'Bildung, Wissenschaft', - 'en': 'Education and science', - 'it': 'Formazione e scienza' - }, - 'description': - { - 'fr': '', - 'de': '', - 'en': '', - 'it': '' - }, - 'image_display_url': '', - 'title': - { - 'fr': 'Education, science', - 'de': 'Bildung, Wissenschaft', - 'en': 'Education and science', - 'it': 'Formazione e scienza' - }, - 'id': 'afcb4a2a-b4b0-4d7c-984a-9078e964be49', - 'name': 'education' - }, - { - 'display_name': - { - 'fr': 'Finances', - 'de': 'Finanzen', - 'en': 'Finances', - 'it': 'Finanze' - }, - 'description': - { - 'fr': '', - 'de': '', - 'en': '', - 'it': '' - }, - 'image_display_url': '', - 'title': - { - 'fr': 'Finances', - 'de': 'Finanzen', - 'en': 'Finances', - 'it': 'Finanze' - }, - 'id': '79cbe120-e9c6-4249-b934-58ca980606d7', - 'name': 'finances' - } - ], - 'description': { - 'fr': '', - 'de': 'Deutsche Beschreibung', - 'en': 'English Description', - 'it': '' - }, - 'extras': [ - {'key': 'alternate_identifier', 'value': '[\"xyz\", \"abc\"]'}, - {'key': 'identifier', 'value': '26be5452-fc5c-11e7-8450-fea9aa178066'}, - {'key': 'version_notes', 'value': 'This is a beta version'}, - {'key': 'frequency', 'value': 'monthly'}, - {'key': 'language', 'value': '[\"en\"]'}, - {'key': 'theme', 'value': '[\"http://eurovoc.europa.eu/100142\", \"http://eurovoc.europa.eu/100152\"]'}, - {'key': 'conforms_to', 'value': '[\"Standard 1\", \"Standard 2\"]'}, - {'key': 'access_rights', 'value': 'public'}, - {'key': 'documentation', 'value': '[\"http://dataset.info.org/doc1\", \"http://dataset.info.org/doc2\"]'}, - {'key': 'provenance', 'value': 'Some statement about provenance'}, - {'key': 'dcat_type', 'value': 'test-type'}, - {'key': 'related_resource', 'value': '[\"http://dataset.info.org/related1\", \"http://dataset.info.org/related2\"]'}, - {'key': 'has_version', 'value': '[\"https://data.some.org/catalog/datasets/derived-dataset-1\", \"https://data.some.org/catalog/datasets/derived-dataset-2\"]'}, - {'key': 'is_version_of', 'value': '[\"https://data.some.org/catalog/datasets/original-dataset\"]'}, - {'key': 'source', 'value': '[\"https://data.some.org/catalog/datasets/source-dataset-1\", \"https://data.some.org/catalog/datasets/source-dataset-2\"]'}, - {'key': 'sample', 'value': '[\"https://data.some.org/catalog/datasets/9df8df51-63db-37a8-e044-0003ba9b0d98/sample\"]'}, - ] - } + dataset = json.loads( + self._get_file_contents('dataset.json') + ) extras = self._extras(dataset) s = RDFSerializer(profiles=['swiss_schemaorg']) @@ -191,20 +69,9 @@ def test_graph_from_dataset_uri(self): """"Tests that datasets (resources) with a uri from the test system have that uri changed to reference the prod system when they are output as a graph""" - dataset = { - 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', - 'name': 'test-dataset', - 'title': 'Test DCAT dataset', - 'uri': 'https://test.example.com/dataset/foo', - 'version': '1.0b', - 'metadata_created': '2015-06-26T15:21:09.034694', - 'metadata_modified': '2015-06-26T15:21:09.075774', - 'resources': [ - {'uri': 'https://test.example.com/dataset/foo/resource/fxx', - 'id': '5f2be71f-636c-4d3f-aac1-50830b97f853', - 'package_id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6'}, - ] - } + dataset = json.loads( + self._get_file_contents('dataset-test-uri.json') + ) s = RDFSerializer(profiles=['swiss_schemaorg']) g = s.g From d32a5e98c1b924b400d1d2601bf6ebeea7d378ac Mon Sep 17 00:00:00 2001 From: Rae Knowler Date: Wed, 13 Sep 2023 14:54:34 +0200 Subject: [PATCH 06/11] tests: Add tests for DCAT-AP CH serialization --- .../tests/test_dcatap_ch_profile_serialize.py | 98 +++++++++++++++++++ 1 file changed, 98 insertions(+) create mode 100644 ckanext/dcatapchharvest/tests/test_dcatap_ch_profile_serialize.py diff --git a/ckanext/dcatapchharvest/tests/test_dcatap_ch_profile_serialize.py b/ckanext/dcatapchharvest/tests/test_dcatap_ch_profile_serialize.py new file mode 100644 index 0000000..5827ebf --- /dev/null +++ b/ckanext/dcatapchharvest/tests/test_dcatap_ch_profile_serialize.py @@ -0,0 +1,98 @@ +import json +import os + +import nose + +from rdflib import Literal +from rdflib.namespace import RDF + +from ckanext.dcat import utils +from ckanext.dcat.processors import RDFSerializer +from ckanext.dcat.profiles import FOAF, SCHEMA + +from rdflib import URIRef +import ckanext.dcatapchharvest.dcat_helpers as dh + +from ckanext.dcatapchharvest.tests.base_test_classes import BaseSerializeTest + +import logging +log = logging.getLogger(__name__) + +eq_ = nose.tools.eq_ +assert_true = nose.tools.assert_true + + +class TestSchemaOrgProfileSerializeDataset(BaseSerializeTest): + + def test_graph_from_dataset(self): + + dataset = json.loads( + self._get_file_contents('dataset.json') + ) + extras = self._extras(dataset) + + s = RDFSerializer(profiles=['swiss_schemaorg']) + g = s.g + + dataset_ref = s.graph_from_dataset(dataset) + + eq_(unicode(dataset_ref), utils.dataset_uri(dataset)) + + # Basic fields + assert self._triple(g, dataset_ref, RDF.type, SCHEMA.Dataset) + assert self._triple(g, dataset_ref, SCHEMA.name, dataset['title']) + assert self._triple(g, dataset_ref, SCHEMA.version, dataset['version']) + assert self._triple(g, dataset_ref, SCHEMA.identifier, extras['identifier']) + + # Dates + assert self._triple(g, dataset_ref, SCHEMA.datePublished, dataset['issued']) + assert len(list(g.objects(dataset_ref, SCHEMA.dateModified))) == 0 + + for key, value in dataset['description'].iteritems(): + if dataset['description'].get(key): + assert self._triple(g, dataset_ref, SCHEMA.description, Literal(value, lang=key)) + eq_(len([t for t in g.triples((dataset_ref, SCHEMA.description, None))]), 2) + + # Tags + eq_(len([t for t in g.triples((dataset_ref, SCHEMA.keywords, None))]), 3) + for key, keywords in dataset['keywords'].iteritems(): + if dataset['keywords'].get(key): + for keyword in keywords: + assert self._triple(g, dataset_ref, SCHEMA.keywords, Literal(keyword, lang=key)) + + # List + for item in [ + ('language', SCHEMA.inLanguage, Literal), + # ('documentation', FOAF.page, URIRef, FOAF.Document), + ]: + values = json.loads(extras[item[0]]) + log.warning(values) + eq_(len([t for t in g.triples((dataset_ref, item[1], None))]), len(values)) + for value in values: + assert self._triple(g, dataset_ref, item[1], item[2](value)) + + def test_graph_from_dataset_uri(self): + """"Tests that datasets (resources) with a uri from the test system + have that uri changed to reference the prod system when they are output as a graph""" + + dataset = json.loads( + self._get_file_contents('dataset-test-uri.json') + ) + + s = RDFSerializer(profiles=['swiss_dcat_ap']) + g = s.g + dataset_ref = s.graph_from_dataset(dataset) + + # Change dataset uri that includes a test url + dataset_uri = dh.dataset_uri(dataset, dataset_ref) + dataset_ref_changed = URIRef(dataset_uri) + + # Test that the distribution is present in the graph with the new resource uri + for resource_dict in dataset.get("resources", []): + distribution = URIRef(dh.resource_uri(resource_dict)) + + # Basic fields + assert self._triple(g, dataset_ref_changed, RDF.type, SCHEMA.Dataset) + assert self._triple(g, dataset_ref_changed, SCHEMA.name, dataset['title']) + assert self._triple(g, dataset_ref_changed, SCHEMA.version, dataset['version']) + assert self._triple(g, distribution, RDF.type, SCHEMA.Distribution) From 5f019b24a9df9cdbfc6777d680b08b08d6d25c29 Mon Sep 17 00:00:00 2001 From: Rae Knowler Date: Wed, 13 Sep 2023 15:21:52 +0200 Subject: [PATCH 07/11] tests: Remove unused imports --- .../dcatapchharvest/tests/test_dcatap_ch_profile_serialize.py | 1 - ckanext/dcatapchharvest/tests/test_parse_deprecated_rdf.py | 1 - 2 files changed, 2 deletions(-) diff --git a/ckanext/dcatapchharvest/tests/test_dcatap_ch_profile_serialize.py b/ckanext/dcatapchharvest/tests/test_dcatap_ch_profile_serialize.py index 5827ebf..cf36e57 100644 --- a/ckanext/dcatapchharvest/tests/test_dcatap_ch_profile_serialize.py +++ b/ckanext/dcatapchharvest/tests/test_dcatap_ch_profile_serialize.py @@ -1,5 +1,4 @@ import json -import os import nose diff --git a/ckanext/dcatapchharvest/tests/test_parse_deprecated_rdf.py b/ckanext/dcatapchharvest/tests/test_parse_deprecated_rdf.py index f79c0b1..6c90fab 100644 --- a/ckanext/dcatapchharvest/tests/test_parse_deprecated_rdf.py +++ b/ckanext/dcatapchharvest/tests/test_parse_deprecated_rdf.py @@ -1,6 +1,5 @@ # -*- coding: utf-8 -*- -import os import nose import json from ckanext.dcat.processors import RDFParser From b61cc4baeb059975de2b95d318025bbd2c281118 Mon Sep 17 00:00:00 2001 From: Rae Knowler Date: Wed, 13 Sep 2023 15:26:22 +0200 Subject: [PATCH 08/11] tests: Rename files for ease of use --- ...test_swiss_dcatap_profile_parse.py => test_dcatap_ch_parse.py} | 0 ...p_conformant_rdf.py => test_dcatap_ch_parse_conformant_rdf.py} | 0 ...e_deprecated_rdf.py => test_dcatap_ch_parse_deprecated_rdf.py} | 0 ...dcatap_ch_profile_serialize.py => test_dcatap_ch_serialize.py} | 0 ...org_profile_serialize.py => test_swiss_schemaorg_serialize.py} | 0 5 files changed, 0 insertions(+), 0 deletions(-) rename ckanext/dcatapchharvest/tests/{test_swiss_dcatap_profile_parse.py => test_dcatap_ch_parse.py} (100%) rename ckanext/dcatapchharvest/tests/{test_parse_dcatap_conformant_rdf.py => test_dcatap_ch_parse_conformant_rdf.py} (100%) rename ckanext/dcatapchharvest/tests/{test_parse_deprecated_rdf.py => test_dcatap_ch_parse_deprecated_rdf.py} (100%) rename ckanext/dcatapchharvest/tests/{test_dcatap_ch_profile_serialize.py => test_dcatap_ch_serialize.py} (100%) rename ckanext/dcatapchharvest/tests/{test_swiss_schemaorg_profile_serialize.py => test_swiss_schemaorg_serialize.py} (100%) diff --git a/ckanext/dcatapchharvest/tests/test_swiss_dcatap_profile_parse.py b/ckanext/dcatapchharvest/tests/test_dcatap_ch_parse.py similarity index 100% rename from ckanext/dcatapchharvest/tests/test_swiss_dcatap_profile_parse.py rename to ckanext/dcatapchharvest/tests/test_dcatap_ch_parse.py diff --git a/ckanext/dcatapchharvest/tests/test_parse_dcatap_conformant_rdf.py b/ckanext/dcatapchharvest/tests/test_dcatap_ch_parse_conformant_rdf.py similarity index 100% rename from ckanext/dcatapchharvest/tests/test_parse_dcatap_conformant_rdf.py rename to ckanext/dcatapchharvest/tests/test_dcatap_ch_parse_conformant_rdf.py diff --git a/ckanext/dcatapchharvest/tests/test_parse_deprecated_rdf.py b/ckanext/dcatapchharvest/tests/test_dcatap_ch_parse_deprecated_rdf.py similarity index 100% rename from ckanext/dcatapchharvest/tests/test_parse_deprecated_rdf.py rename to ckanext/dcatapchharvest/tests/test_dcatap_ch_parse_deprecated_rdf.py diff --git a/ckanext/dcatapchharvest/tests/test_dcatap_ch_profile_serialize.py b/ckanext/dcatapchharvest/tests/test_dcatap_ch_serialize.py similarity index 100% rename from ckanext/dcatapchharvest/tests/test_dcatap_ch_profile_serialize.py rename to ckanext/dcatapchharvest/tests/test_dcatap_ch_serialize.py diff --git a/ckanext/dcatapchharvest/tests/test_swiss_schemaorg_profile_serialize.py b/ckanext/dcatapchharvest/tests/test_swiss_schemaorg_serialize.py similarity index 100% rename from ckanext/dcatapchharvest/tests/test_swiss_schemaorg_profile_serialize.py rename to ckanext/dcatapchharvest/tests/test_swiss_schemaorg_serialize.py From fbdf7f7bca6729739f6c7e2f177b459e83e33e3e Mon Sep 17 00:00:00 2001 From: Rae Knowler Date: Wed, 13 Sep 2023 15:27:37 +0200 Subject: [PATCH 09/11] fix: Add default value for dataset's groups Prevents an error if we try to iterate over None. --- ckanext/dcatapchharvest/profiles.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ckanext/dcatapchharvest/profiles.py b/ckanext/dcatapchharvest/profiles.py index 530b388..edc1064 100644 --- a/ckanext/dcatapchharvest/profiles.py +++ b/ckanext/dcatapchharvest/profiles.py @@ -726,7 +726,7 @@ def graph_from_dataset(self, dataset_dict, dataset_ref): # noqa g.add((dataset_ref, DCT.temporal, temporal_extent)) # Themes - groups = self._get_dataset_value(dataset_dict, 'groups') + groups = self._get_dataset_value(dataset_dict, 'groups', []) for group_name in groups: ogdch_theme_ref = URIRef(CHTHEMES_URI + group_name.get('name')) eu_theme_ref_list = eu_theme_mapping.get(ogdch_theme_ref) From 808cbaa61f8f4bade42a579e7c8c99c656660b56 Mon Sep 17 00:00:00 2001 From: Rae Knowler Date: Wed, 13 Sep 2023 15:46:16 +0200 Subject: [PATCH 10/11] tests: Fix test for DCAT-AP CH serialization --- .../tests/test_dcatap_ch_serialize.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/ckanext/dcatapchharvest/tests/test_dcatap_ch_serialize.py b/ckanext/dcatapchharvest/tests/test_dcatap_ch_serialize.py index cf36e57..4175dd7 100644 --- a/ckanext/dcatapchharvest/tests/test_dcatap_ch_serialize.py +++ b/ckanext/dcatapchharvest/tests/test_dcatap_ch_serialize.py @@ -7,7 +7,7 @@ from ckanext.dcat import utils from ckanext.dcat.processors import RDFSerializer -from ckanext.dcat.profiles import FOAF, SCHEMA +from ckanext.dcat.profiles import DCAT, DCT, FOAF, OWL, SCHEMA from rdflib import URIRef import ckanext.dcatapchharvest.dcat_helpers as dh @@ -71,8 +71,10 @@ def test_graph_from_dataset(self): assert self._triple(g, dataset_ref, item[1], item[2](value)) def test_graph_from_dataset_uri(self): - """"Tests that datasets (resources) with a uri from the test system - have that uri changed to reference the prod system when they are output as a graph""" + """Tests that datasets (resources) with a uri from the test system + have that uri changed to reference the prod system when they are output + as a graph + """ dataset = json.loads( self._get_file_contents('dataset-test-uri.json') @@ -91,7 +93,7 @@ def test_graph_from_dataset_uri(self): distribution = URIRef(dh.resource_uri(resource_dict)) # Basic fields - assert self._triple(g, dataset_ref_changed, RDF.type, SCHEMA.Dataset) - assert self._triple(g, dataset_ref_changed, SCHEMA.name, dataset['title']) - assert self._triple(g, dataset_ref_changed, SCHEMA.version, dataset['version']) - assert self._triple(g, distribution, RDF.type, SCHEMA.Distribution) + assert self._triple(g, dataset_ref_changed, RDF.type, DCAT.Dataset) + assert self._triple(g, dataset_ref_changed, DCT.title, dataset['title']) + assert self._triple(g, dataset_ref_changed, OWL.versionInfo, dataset['version']) + assert self._triple(g, distribution, RDF.type, DCAT.Distribution) From 27e7a770e328e9a22ed9a04ad1456c9b7332cd10 Mon Sep 17 00:00:00 2001 From: Rae Knowler Date: Wed, 13 Sep 2023 16:30:27 +0200 Subject: [PATCH 11/11] tests: Adjust TestDCATAPCHProfileSerializeDataset --- .../tests/test_dcatap_ch_serialize.py | 32 ++++++++----------- 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/ckanext/dcatapchharvest/tests/test_dcatap_ch_serialize.py b/ckanext/dcatapchharvest/tests/test_dcatap_ch_serialize.py index 4175dd7..ea7b0fe 100644 --- a/ckanext/dcatapchharvest/tests/test_dcatap_ch_serialize.py +++ b/ckanext/dcatapchharvest/tests/test_dcatap_ch_serialize.py @@ -7,21 +7,18 @@ from ckanext.dcat import utils from ckanext.dcat.processors import RDFSerializer -from ckanext.dcat.profiles import DCAT, DCT, FOAF, OWL, SCHEMA +from ckanext.dcat.profiles import DCAT, DCT, FOAF, OWL, SCHEMA, XSD from rdflib import URIRef import ckanext.dcatapchharvest.dcat_helpers as dh from ckanext.dcatapchharvest.tests.base_test_classes import BaseSerializeTest -import logging -log = logging.getLogger(__name__) - eq_ = nose.tools.eq_ assert_true = nose.tools.assert_true -class TestSchemaOrgProfileSerializeDataset(BaseSerializeTest): +class TestDCATAPCHProfileSerializeDataset(BaseSerializeTest): def test_graph_from_dataset(self): @@ -30,7 +27,7 @@ def test_graph_from_dataset(self): ) extras = self._extras(dataset) - s = RDFSerializer(profiles=['swiss_schemaorg']) + s = RDFSerializer(profiles=['swiss_dcat_ap']) g = s.g dataset_ref = s.graph_from_dataset(dataset) @@ -38,34 +35,33 @@ def test_graph_from_dataset(self): eq_(unicode(dataset_ref), utils.dataset_uri(dataset)) # Basic fields - assert self._triple(g, dataset_ref, RDF.type, SCHEMA.Dataset) - assert self._triple(g, dataset_ref, SCHEMA.name, dataset['title']) - assert self._triple(g, dataset_ref, SCHEMA.version, dataset['version']) - assert self._triple(g, dataset_ref, SCHEMA.identifier, extras['identifier']) + assert self._triple(g, dataset_ref, RDF.type, DCAT.Dataset) + assert self._triple(g, dataset_ref, DCT.title, dataset['title']) + assert self._triple(g, dataset_ref, OWL.versionInfo, dataset['version']) + assert self._triple(g, dataset_ref, DCT.identifier, extras['identifier']) # Dates - assert self._triple(g, dataset_ref, SCHEMA.datePublished, dataset['issued']) - assert len(list(g.objects(dataset_ref, SCHEMA.dateModified))) == 0 + assert self._triple(g, dataset_ref, DCT.issued, dataset['issued'], XSD.dateTime) + assert len(list(g.objects(dataset_ref, DCT.modified))) == 0 for key, value in dataset['description'].iteritems(): if dataset['description'].get(key): - assert self._triple(g, dataset_ref, SCHEMA.description, Literal(value, lang=key)) - eq_(len([t for t in g.triples((dataset_ref, SCHEMA.description, None))]), 2) + assert self._triple(g, dataset_ref, DCT.description, Literal(value, lang=key)) + eq_(len([t for t in g.triples((dataset_ref, DCT.description, None))]), 2) # Tags - eq_(len([t for t in g.triples((dataset_ref, SCHEMA.keywords, None))]), 3) + eq_(len([t for t in g.triples((dataset_ref, DCAT.keyword, None))]), 3) for key, keywords in dataset['keywords'].iteritems(): if dataset['keywords'].get(key): for keyword in keywords: - assert self._triple(g, dataset_ref, SCHEMA.keywords, Literal(keyword, lang=key)) + assert self._triple(g, dataset_ref, DCAT.keyword, Literal(keyword, lang=key)) # List for item in [ - ('language', SCHEMA.inLanguage, Literal), + ('language', DCT.language, Literal), # ('documentation', FOAF.page, URIRef, FOAF.Document), ]: values = json.loads(extras[item[0]]) - log.warning(values) eq_(len([t for t in g.triples((dataset_ref, item[1], None))]), len(values)) for value in values: assert self._triple(g, dataset_ref, item[1], item[2](value))