From 6d2b2ab140d34b0f872a2e20d1e64457760ed134 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matthias=20B=C3=BCschelberger?= Date: Wed, 18 Dec 2024 15:15:16 +0100 Subject: [PATCH] add suffix unit test for xlsx parser --- data2rdf/models/base.py | 18 +- .../input/mapping/mapping_suffix.json | 127 +++++++++ .../output/output_pipeline_suffix.ttl | 258 ++++++++++++++++++ tests/abox/xls_pipeline_test/test_pipeline.py | 37 +++ 4 files changed, 432 insertions(+), 8 deletions(-) create mode 100644 tests/abox/xls_pipeline_test/input/mapping/mapping_suffix.json create mode 100644 tests/abox/xls_pipeline_test/output/output_pipeline_suffix.ttl diff --git a/data2rdf/models/base.py b/data2rdf/models/base.py index 6ab70c5b..453af997 100644 --- a/data2rdf/models/base.py +++ b/data2rdf/models/base.py @@ -10,8 +10,8 @@ BaseModel, ConfigDict, Field, - ValidationInfo, field_validator, + model_validator, ) from rdflib import Graph @@ -106,16 +106,18 @@ def validate_iri(cls, value: Union[AnyUrl, List[AnyUrl]]) -> AnyUrl: value = [AnyUrl(str(iterable).strip()) for iterable in value] return value - @field_validator("suffix") + @model_validator(mode="after") @classmethod def validate_suffix( - cls, value: Optional[str], info: ValidationInfo - ) -> str: + cls, + self: "BasicSuffixModel", + ) -> "BasicSuffixModel": """Return suffix for individal""" - iri = info.data["iri"] - config = info.data["config"] - if isinstance(iri, list) and value is None: + if isinstance(self.iri, list) and self.suffix is None: raise TypeError("If the iri is a list, the suffix must be set ") - return value or str(iri).split(config.separator)[-1] + self.suffix = ( + self.suffix or str(self.iri).split(self.config.separator)[-1] + ) + return self diff --git a/tests/abox/xls_pipeline_test/input/mapping/mapping_suffix.json b/tests/abox/xls_pipeline_test/input/mapping/mapping_suffix.json new file mode 100644 index 00000000..99985b20 --- /dev/null +++ b/tests/abox/xls_pipeline_test/input/mapping/mapping_suffix.json @@ -0,0 +1,127 @@ +[ + { + "iri": "https://w3id.org/steel/ProcessOntology/Remark", + "key": "Bemerkungen", + "value_location": "UU31", + "worksheet": "Protokoll" + }, + { + "iri": "https://w3id.org/steel/ProcessOntology/WidthChange", + "key": "Breiten\u00e4nderung", + "time_series_start": "E15", + "unit_location": "E14", + "worksheet": "Messdaten" + }, + { + "iri": "https://w3id.org/steel/ProcessOntology/TimeStamp", + "key": "Datum", + "value_location": "AD6", + "worksheet": "Protokoll" + }, + { + "iri": "https://w3id.org/steel/ProcessOntology/PercentageElongation", + "key": "Dehnung", + "time_series_start": "Q15", + "unit": "\u00f7", + "worksheet": "Messdaten" + }, + { + "iri": "https://w3id.org/steel/ProcessOntology/OriginalGaugeLength", + "key": "Messl\u00e4nge Standardweg", + "unit_location": "P16", + "value_location": "M16", + "worksheet": "Protokoll" + }, + { + "iri": "https://w3id.org/steel/ProcessOntology/SpecimenWidth", + "key": "Probenbreite b", + "unit_location": "P15", + "value_location": "M15", + "worksheet": "Protokoll" + }, + { + "iri": "https://w3id.org/steel/ProcessOntology/SpecimenThickness", + "key": "Probendicke a", + "unit_location": "P14", + "value_location": "M14", + "worksheet": "Protokoll" + }, + { + "iri": "https://w3id.org/steel/ProcessOntology/SpecimenType", + "key": "Probenform", + "value_location": "AE7", + "worksheet": "Protokoll" + }, + { + "iri": "https://w3id.org/steel/ProcessOntology/SampleIdentifier-2", + "key": "Probenkennung 2", + "value_location": "U7", + "worksheet": "Protokoll" + }, + { + "iri": "https://w3id.org/steel/ProcessOntology/ProjectNumber", + "key": "Projekt", + "value_location": "F6", + "worksheet": "Protokoll" + }, + { + "iri": "https://w3id.org/steel/ProcessOntology/Tester", + "key": "Pr\u00fcfer", + "value_location": "U6", + "worksheet": "Protokoll" + }, + { + "iri": "https://w3id.org/steel/ProcessOntology/TestingRate", + "key": "Pr\u00fcfgeschwindigkeit", + "value_location": "J9", + "worksheet": "Protokoll" + }, + { + "iri": "https://w3id.org/steel/ProcessOntology/MachineData", + "key": "Pr\u00fcfmaschine", + "value_location": "I8", + "worksheet": "Protokoll" + }, + { + "iri": "https://w3id.org/steel/ProcessOntology/Temperature", + "key": "Pr\u00fcftemperatur", + "value_location": "U8", + "worksheet": "Protokoll" + }, + { + "iri": "https://w3id.org/steel/ProcessOntology/StandardForce", + "key": "Standardkraft", + "time_series_start": "C15", + "unit_location": "C14", + "worksheet": "Messdaten" + }, + { + "iri": "https://w3id.org/steel/ProcessOntology/Extension", + "key": "Standardweg", + "time_series_start": "D15", + "unit_location": "D14", + "worksheet": "Messdaten" + }, + { + "iri": "https://w3id.org/steel/ProcessOntology/AbsoluteCrossheadTravel", + "key": "Traversenweg absolut", + "time_series_start": "B15", + "unit_location": "B14", + "worksheet": "Messdaten" + }, + { + "annotation": "https://w3id.org/steel/ProcessOntology", + "iri": "https://w3id.org/steel/ProcessOntology/Material", + "key": "Werkstoff", + "value_location": "H7", + "worksheet": "Protokoll" + }, + { + "iri": "https://w3id.org/steel/ProcessOntology/TestTime", + "key": "Zeit", + "suffix": "Time", + "time_series_start": "A15", + "unit_location": "A14", + "worksheet": "Messdaten" + } +] diff --git a/tests/abox/xls_pipeline_test/output/output_pipeline_suffix.ttl b/tests/abox/xls_pipeline_test/output/output_pipeline_suffix.ttl new file mode 100644 index 00000000..266ec1c8 --- /dev/null +++ b/tests/abox/xls_pipeline_test/output/output_pipeline_suffix.ttl @@ -0,0 +1,258 @@ +@prefix csvw: . +@prefix dcat: . +@prefix dcterms: . +@prefix fileid: . +@prefix foaf: . +@prefix ns1: . +@prefix qudt: . +@prefix rdfs: . +@prefix xsd: . + +fileid:TensileTestExperiment a ns1:Activity ; + ns1:generated fileid:AbsoluteCrossheadTravel, + fileid:Extension, + fileid:Remark, + fileid:StandardForce, + fileid:TimeStamp, + fileid:dataset ; + ns1:hadPlan fileid:TestStandard ; + ns1:used fileid:DisplacementTransducer, + fileid:ForceMeasuringDevice, + fileid:TensileTestSpecimen, + fileid:TensileTestingMachine, + fileid:TestingFacility ; + ns1:wasAssociatedWith fileid:Tester ; + ns1:wasInfluencedBy fileid:ExperimentPreparation . + +fileid:TestingStandard a ns1:Plan . + +fileid:ExperimentPreparation a ns1:Activity ; + ns1:atLocation fileid:TestingLab ; + ns1:generated fileid:OriginalGaugeLength, + fileid:Preload, + fileid:TestingRate ; + ns1:wasAssociatedWith fileid:DisplacementTransducer, + fileid:ForceMeasuringDevice, + fileid:TensileTestSpecimen, + fileid:TensileTestingMachine, + fileid:Tester ; + ns1:wasInfluencedBy fileid:SamplePreparatation . + +fileid:MachineData a ; + rdfs:label "M_1"^^xsd:string . + +fileid:ParallelLength a ns1:Entity ; + ns1:wasAttributedTo fileid:TensileTestSpecimen . + +fileid:PercentageElongation a ; + qudt:hasUnit "http://qudt.org/vocab/unit/FRACTION"^^xsd:anyURI . + +fileid:Preload a ns1:Entity ; + ns1:wasAttributedTo fileid:TensileTestingMachine . + +fileid:Project a ns1:Activity ; + ns1:generated fileid:ProjectName, + fileid:ProjectNumber ; + ns1:wasAssociatedWith fileid:TestingFacility . + +fileid:ProjectName a ns1:Entity . + +fileid:SampleIdentifier-2 a ; + rdfs:label 123456 . + +fileid:SamplePreparatation a ns1:Activity ; + ns1:generated fileid:ParallelLength, + fileid:SpecimenThickness, + fileid:SpecimenType, + fileid:SpecimenWidth ; + ns1:wasAssociatedWith fileid:Material, + fileid:TensileTestSpecimen ; + ns1:wasInfluencedBy fileid:Project . + +fileid:Temperature a ns1:Entity, + ; + qudt:hasUnit "http://qudt.org/vocab/unit/DEG_C"^^xsd:anyURI ; + qudt:value "25"^^xsd:integer ; + ns1:wasAttributedTo fileid:TestingLab . + +fileid:Time a ; + qudt:hasUnit "http://qudt.org/vocab/unit/SEC"^^xsd:anyURI . + +fileid:WidthChange a ; + qudt:hasUnit "http://qudt.org/vocab/unit/MilliM"^^xsd:anyURI . + +fileid:dataset a dcat:Dataset, + ns1:Entity ; + dcterms:hasPart fileid:tableGroup ; + dcat:distribution [ a dcat:Distribution ; + dcat:accessURL "https://www.example.org/download/"^^xsd:anyURI ; + dcat:mediaType "https://www.iana.org/assignments/media-types/application/vnd.ms-excel"^^xsd:anyURI ] . + +fileid:tableGroup a csvw:TableGroup ; + csvw:table [ a csvw:Table ; + rdfs:label "Time series data" ; + csvw:tableSchema [ a csvw:Schema ; + csvw:column [ a csvw:Column ; + qudt:quantity fileid:WidthChange ; + csvw:titles "Breitenänderung"^^xsd:string ; + foaf:page [ a foaf:Document ; + dcterms:format "https://www.iana.org/assignments/media-types/application/json"^^xsd:anyURI ; + dcterms:identifier "https://www.example.org/download/column-0"^^xsd:anyURI ; + dcterms:type "http://purl.org/dc/terms/Dataset"^^xsd:anyURI ] ], + [ a csvw:Column ; + qudt:quantity fileid:TestTime ; + csvw:titles "Zeit"^^xsd:string ; + foaf:page [ a foaf:Document ; + dcterms:format "https://www.iana.org/assignments/media-types/application/json"^^xsd:anyURI ; + dcterms:identifier "https://www.example.org/download/column-5"^^xsd:anyURI ; + dcterms:type "http://purl.org/dc/terms/Dataset"^^xsd:anyURI ] ], + [ a csvw:Column ; + qudt:quantity fileid:PercentageElongation ; + csvw:titles "Dehnung"^^xsd:string ; + foaf:page [ a foaf:Document ; + dcterms:format "https://www.iana.org/assignments/media-types/application/json"^^xsd:anyURI ; + dcterms:identifier "https://www.example.org/download/column-1"^^xsd:anyURI ; + dcterms:type "http://purl.org/dc/terms/Dataset"^^xsd:anyURI ] ], + [ a csvw:Column ; + qudt:quantity fileid:StandardForce ; + csvw:titles "Standardkraft"^^xsd:string ; + foaf:page [ a foaf:Document ; + dcterms:format "https://www.iana.org/assignments/media-types/application/json"^^xsd:anyURI ; + dcterms:identifier "https://www.example.org/download/column-2"^^xsd:anyURI ; + dcterms:type "http://purl.org/dc/terms/Dataset"^^xsd:anyURI ] ], + [ a csvw:Column ; + qudt:quantity fileid:AbsoluteCrossheadTravel ; + csvw:titles "Traversenweg absolut"^^xsd:string ; + foaf:page [ a foaf:Document ; + dcterms:format "https://www.iana.org/assignments/media-types/application/json"^^xsd:anyURI ; + dcterms:identifier "https://www.example.org/download/column-4"^^xsd:anyURI ; + dcterms:type "http://purl.org/dc/terms/Dataset"^^xsd:anyURI ] ], + [ a csvw:Column ; + qudt:quantity fileid:Extension ; + csvw:titles "Standardweg"^^xsd:string ; + foaf:page [ a foaf:Document ; + dcterms:format "https://www.iana.org/assignments/media-types/application/json"^^xsd:anyURI ; + dcterms:identifier "https://www.example.org/download/column-3"^^xsd:anyURI ; + dcterms:type "http://purl.org/dc/terms/Dataset"^^xsd:anyURI ] ] ] ], + [ a csvw:Table ; + rdfs:label "Metadata" ; + csvw:row [ a csvw:Row ; + csvw:describes fileid:Tester ; + csvw:titles "Prüfer"^^xsd:string ], + [ a csvw:Row ; + csvw:describes fileid:SampleIdentifier-2 ; + csvw:titles "Probenkennung 2"^^xsd:string ], + [ a csvw:Row ; + qudt:quantity fileid:SpecimenThickness ; + csvw:titles "Probendicke a"^^xsd:string ], + [ a csvw:Row ; + qudt:quantity fileid:OriginalGaugeLength ; + csvw:titles "Messlänge Standardweg"^^xsd:string ], + [ a csvw:Row ; + csvw:describes fileid:ProjectNumber ; + csvw:titles "Projekt"^^xsd:string ], + [ a csvw:Row ; + qudt:quantity fileid:TestingRate ; + csvw:titles "Prüfgeschwindigkeit"^^xsd:string ], + [ a csvw:Row ; + qudt:quantity fileid:SpecimenWidth ; + csvw:titles "Probenbreite b"^^xsd:string ], + [ a csvw:Row ; + csvw:describes fileid:Material ; + csvw:titles "Werkstoff"^^xsd:string ], + [ a csvw:Row ; + csvw:describes fileid:MachineData ; + csvw:titles "Prüfmaschine"^^xsd:string ], + [ a csvw:Row ; + csvw:describes fileid:TimeStamp ; + csvw:titles "Datum"^^xsd:string ], + [ a csvw:Row ; + qudt:quantity fileid:Temperature ; + csvw:titles "Prüftemperatur"^^xsd:string ], + [ a csvw:Row ; + csvw:describes fileid:SpecimenType ; + csvw:titles "Probenform"^^xsd:string ] ] . + +fileid:AbsoluteCrossheadTravel a ns1:Entity, + ; + qudt:hasUnit "http://qudt.org/vocab/unit/MilliM"^^xsd:anyURI ; + ns1:wasDerivedFrom fileid:DisplacementTransducer . + +fileid:Extension a ns1:Entity, + ; + qudt:hasUnit "http://qudt.org/vocab/unit/MilliM"^^xsd:anyURI ; + ns1:wasDerivedFrom fileid:DisplacementTransducer . + +fileid:Material a ns1:Agent, + , + ; + rdfs:label "Werkstoff_1"^^xsd:string . + +fileid:OriginalGaugeLength a ns1:Entity, + ; + qudt:hasUnit "http://qudt.org/vocab/unit/MilliM"^^xsd:anyURI ; + qudt:value "15"^^xsd:integer ; + ns1:wasAttributedTo fileid:DisplacementTransducer . + +fileid:ProjectNumber a ns1:Entity, + ; + rdfs:label "Projekt_1"^^xsd:string . + +fileid:SpecimenThickness a ns1:Entity, + ; + qudt:hasUnit "http://qudt.org/vocab/unit/MilliM"^^xsd:anyURI ; + qudt:value "1.5"^^xsd:float ; + ns1:wasAttributedTo fileid:TensileTestSpecimen . + +fileid:SpecimenType a ns1:Entity, + ; + rdfs:label "Fz 10x20"^^xsd:string ; + ns1:wasAttributedTo fileid:TensileTestSpecimen . + +fileid:SpecimenWidth a ns1:Entity, + ; + qudt:hasUnit "http://qudt.org/vocab/unit/MilliM"^^xsd:anyURI ; + qudt:value "9.5"^^xsd:float ; + ns1:wasAttributedTo fileid:TensileTestSpecimen . + +fileid:StandardForce a ns1:Entity, + ; + qudt:hasUnit "http://qudt.org/vocab/unit/KiloN"^^xsd:anyURI ; + ns1:wasDerivedFrom fileid:ForceMeasuringDevice . + +fileid:TestingRate a ns1:Entity, + ; + qudt:hasUnit "http://qudt.org/vocab/unit/MilliM-PER-SEC"^^xsd:anyURI ; + qudt:value "0.02"^^xsd:float ; + ns1:wasAttributedTo fileid:TensileTestingMachine . + +fileid:TimeStamp a ; + rdfs:label "2016-10-11 00:00:00"^^xsd:string . + +fileid:ForceMeasuringDevice a ns1:Agent, + ns1:Entity ; + ns1:atLocation fileid:TestingLab . + +fileid:Tester a ns1:Agent, + ; + rdfs:label "Fe"^^xsd:string ; + ns1:actedOnBehalfOf fileid:TestingFacility ; + ns1:atLocation fileid:TestingLab . + +fileid:TensileTestingMachine a ns1:Agent, + ns1:Entity ; + ns1:atLocation fileid:TestingLab . + +fileid:TestingFacility a ns1:Location, + ns1:Organization . + +fileid:DisplacementTransducer a ns1:Agent, + ns1:Entity ; + ns1:atLocation fileid:TestingLab . + +fileid:TestingLab a ns1:Agent, + ns1:Location ; + ns1:atLocation fileid:TestingFacility . + +fileid:TensileTestSpecimen a ns1:Agent, + ns1:Entity . diff --git a/tests/abox/xls_pipeline_test/test_pipeline.py b/tests/abox/xls_pipeline_test/test_pipeline.py index a2eabd80..5a697f27 100644 --- a/tests/abox/xls_pipeline_test/test_pipeline.py +++ b/tests/abox/xls_pipeline_test/test_pipeline.py @@ -288,3 +288,40 @@ def test_excel_pipeline_inputs(input_kind) -> None: metadata ) assert sort_entries(pipeline.to_dict()) == as_non_dsms_schema(metadata) + + +def test_excel_pipeline_suffix() -> None: + from rdflib import Graph + + from data2rdf.warnings import MappingMissmatchWarning + + from data2rdf import ( # isort:skip + Data2RDF, + Parser, + ) + + raw_data = os.path.join(working_folder, "data", "AFZ1-Fz-S1Q.xlsm") + expected = os.path.join(output_folder, "output_pipeline_suffix.ttl") + + with pytest.warns( + MappingMissmatchWarning, match="Concept with key" + ) as warnings: + pipeline = Data2RDF( + raw_data=raw_data, + mapping=os.path.join(mapping_folder, "mapping_suffix.json"), + parser=Parser.excel, + additional_triples=template, + parser_args={"dropna": True, "unit_from_macro": True}, + ) + + missmatches = [ + warning + for warning in warnings + if warning.category == MappingMissmatchWarning + ] + assert len(missmatches) == 1 + + expected_graph = Graph() + expected_graph.parse(expected) + + assert pipeline.graph.isomorphic(expected_graph)