diff --git a/ckanext/dcat/profiles/croissant.py b/ckanext/dcat/profiles/croissant.py index 17e6abc7..24c757d2 100644 --- a/ckanext/dcat/profiles/croissant.py +++ b/ckanext/dcat/profiles/croissant.py @@ -351,10 +351,24 @@ def _resource_basic_fields_graph(self, resource_ref, resource_dict): items = [ ("name", SCHEMA.name, None, Literal), ("description", SCHEMA.description, None, Literal), - ("hash", SCHEMA.sha256, None, Literal), ] self._add_triples_from_dict(resource_dict, resource_ref, items) + if resource_dict.get("hash"): + predicate = None + if len(resource_dict["hash"]) == 32: + predicate = SCHEMA.md5 + elif len(resource_dict["hash"]) == 64: + predicate = SCHEMA.sha256 + if predicate: + self._add_triple_from_dict( + resource_dict, + resource_ref, + predicate, + "hash", + _type=Literal + ) + def _resource_list_fields_graph(self, resource_ref, resource_dict): items = [ ("same_as", SCHEMA.sameAs, None, Literal), @@ -402,13 +416,16 @@ def _resource_subresources_graph(self, dataset_ref, resource_ref, resource_dict) self.g.add((dataset_ref, SCHEMA.distribution, subresource_ref)) # Note that this is added to the dataset_ref node, not to the resource_ref node self.g.add((subresource_ref, RDF.type, subresource_type_specific)) - items = [ - ("description", SCHEMA.description, None, Literal), - ("format", SCHEMA.encodingFormat, None, Literal), - ] - self._add_triples_from_dict(subresource_dict, subresource_ref, items) + # Basic fields + self._resource_basic_fields_graph(subresource_ref, subresource_dict) + + # Format + self._resource_format_graph(subresource_ref, subresource_dict) + + # URL + self._resource_url_graph(subresource_ref, subresource_dict) - if resource_dict.get("type") == "fileSet": + if subresource_dict.get("type") == "fileSet": items = [ ("includes", CR.includes, None, Literal), ("excludes", CR.excludes, None, Literal), diff --git a/ckanext/dcat/schemas/croissant.yaml b/ckanext/dcat/schemas/croissant.yaml index d9b997c6..0ab69ff3 100644 --- a/ckanext/dcat/schemas/croissant.yaml +++ b/ckanext/dcat/schemas/croissant.yaml @@ -390,6 +390,12 @@ resource_fields: required: false preset: resource_format_autocomplete + # schema:sha256 | schema:Text | ONE + - field_name: hash + label: Hash + help_text: Checksum for the file contents. + required: false + # cr:includes | schema:Text | MANY - field_name: includes label: Includes @@ -411,4 +417,4 @@ resource_fields: # - field_name: contained_in # label: Contained in # help_text: Another FileObject or FileSet that this one is contained in, e.g., in the case of a file extracted from an archive. When this property is present, the contentUrl is evaluated as a relative path within the container object. - # required: false \ No newline at end of file + # required: false diff --git a/ckanext/dcat/tests/profiles/croissant/test_validate.py b/ckanext/dcat/tests/profiles/croissant/test_validate.py index 86952f9e..320ec089 100644 --- a/ckanext/dcat/tests/profiles/croissant/test_validate.py +++ b/ckanext/dcat/tests/profiles/croissant/test_validate.py @@ -10,7 +10,10 @@ from ckanext.dcat.profiles.croissant import JSONLD_CONTEXT from ckanext.dcat.tests.utils import get_file_contents -@pytest.mark.skipif(sys.version_info < (3, 10), reason="croissant is not available in py<3.10") + +@pytest.mark.skipif( + sys.version_info < (3, 10), reason="croissant is not available in py<3.10" +) def test_valid_output(): dataset_dict = json.loads( diff --git a/examples/ckan/ckan_full_dataset_croissant.json b/examples/ckan/ckan_full_dataset_croissant.json index 55e225a9..005be8fd 100644 --- a/examples/ckan/ckan_full_dataset_croissant.json +++ b/examples/ckan/ckan_full_dataset_croissant.json @@ -57,19 +57,27 @@ "description": "Some description", "url": "https://example.com/data.csv", "format": "CSV", - "type": "fileSet", "id_given": "my-custom-resource-id", - "size": 12323, - "hash": "4304cf2e751e6053c90b1804c89c0ebb758f395a", + "size": "12323", + "hash": "b221d9dbb083a7f33428d7c2a3c3198ae925614d70210e28716ccaa7cd4ddb79", "subresources": [ { "type": "fileObject", - "id_given": "my-custom-subresource-id", + "id_given": "my-custom-subresource-id-1", + "url": "https://example.com/data.csv", "description": "Test subresource 1", + "hash": "b221d9dbb083a7f33428d7c2a3c3198ae925614d70210e28716ccaa7cd4ddb79", + "format": "CSV" + }, + { + "type": "fileSet", + "id_given": "my-custom-subresource-id-2", + "description": "Test subresource 2", "format": "CSV", "includes": "**.csv", "excludes": "**.txt" } + ] } ]