From 86933aa35ac1d3f81caea9989f14224ea89ef96e Mon Sep 17 00:00:00 2001 From: Caglar Demir Date: Thu, 14 Nov 2024 12:24:56 +0100 Subject: [PATCH] CSV to RDF via OWLAPI is implemented. Overloads of IRI.create() are compressed in a single create with assertion checks --- owlapy/abstracts/abstract_owl_ontology.py | 1 + owlapy/iri.py | 50 +++++-------------- owlapy/owl_ontology.py | 59 +++++++++++++++-------- owlapy/owl_ontology_manager.py | 13 ++++- owlapy/owl_reasoner.py | 2 +- owlapy/util_owl_static_funcs.py | 45 +++++++++++------ tests/test_owl_static_funcs.py | 13 +++-- 7 files changed, 104 insertions(+), 79 deletions(-) diff --git a/owlapy/abstracts/abstract_owl_ontology.py b/owlapy/abstracts/abstract_owl_ontology.py index 1aa6715d..ca281dc0 100644 --- a/owlapy/abstracts/abstract_owl_ontology.py +++ b/owlapy/abstracts/abstract_owl_ontology.py @@ -174,6 +174,7 @@ def remove_axiom(self, axiom: Union[OWLAxiom, Iterable[OWLAxiom]]): """ pass + @abstractmethod def save(self, document_iri: Optional[IRI] = None): """Saves this ontology, using its IRI to determine where/how the ontology should be saved. diff --git a/owlapy/iri.py b/owlapy/iri.py index af6a3238..02d6566c 100644 --- a/owlapy/iri.py +++ b/owlapy/iri.py @@ -49,45 +49,21 @@ def __init__(self, namespace: Union[str, Namespaces], remainder: str=""): self._namespace = sys.intern(namespace) self._remainder = remainder - @overload @staticmethod - def create(namespace: Namespaces, remainder: str) -> 'IRI': - ... - - @overload - @staticmethod - def create(namespace: str, remainder: str) -> 'IRI': - """Creates an IRI by concatenating two strings. The full IRI is an IRI that contains the characters in - namespace + remainder. - - Args: - namespace: The first string. - remainder: The second string. - - Returns: - An IRI whose characters consist of prefix + suffix. - """ - ... - - @overload - @staticmethod - def create(string: str) -> 'IRI': - """Creates an IRI from the specified String. - - Args: - string: The String that specifies the IRI. - - Returns: - The IRI that has the specified string representation. - """ - ... - - @staticmethod - def create(string, remainder=None) -> 'IRI': + def create(iri:str | Namespaces, remainder:str=None) -> 'IRI': + assert isinstance(iri, str) | isinstance(iri, Namespaces), f"Input must be a string or an instance of Namespaces. Currently, {type(iri)}" if remainder is not None: - return IRI(string, remainder) - index = 1 + max(string.rfind("/"), string.rfind(":"), string.rfind("#")) - return IRI(string[0:index], string[index:]) + assert isinstance(remainder,str), f"Reminder must be string. Currently, {type(remainder)}" + return IRI(iri, remainder) + else: + assert isinstance(iri, str) and remainder is None, \ + f"iri must be string if remainder is None. Currently, {type(iri)} and {type(remainder)}" + # Extract reminder from input string + assert "/" in iri, "Input must contain /" + # assert ":" in iri, "Input must contain :" + assert " " not in iri, f"Input must not contain whitespace. Currently:{iri}." + index = 1 + max(iri.rfind("/"), iri.rfind(":"), iri.rfind("#")) + return IRI(iri[0:index], iri[index:]) def __repr__(self): return f"IRI({repr(self._namespace)}, {repr(self._remainder)})" diff --git a/owlapy/owl_ontology.py b/owlapy/owl_ontology.py index 03a2d4af..6be14694 100644 --- a/owlapy/owl_ontology.py +++ b/owlapy/owl_ontology.py @@ -53,7 +53,6 @@ }) - _VERSION_IRI: Final = IRI.create(namespaces.OWL, "versionIRI") _M = TypeVar('_M', bound='OWLOntologyManager') # noqa: F821 @@ -983,20 +982,40 @@ def __init__(self, manager: _SM, path: Union[IRI, str], new: bool = False): self.manager = manager self.path = path self.new = new + + if isinstance(path, IRI): file_path = path.str else: file_path = path if new: # create new ontology if isinstance(path, IRI): - self.owlapi_ontology = manager.get_owlapi_manager().createOntology(Stream.empty(), + self.owlapi_ontology = self.manager.get_owlapi_manager().createOntology(Stream.empty(), owlapi_IRI.create(path.str)) else: raise NotImplementedError("Cant initialize a new ontology using path. Use IRI instead") else: # means we are loading an existing ontology - self.owlapi_ontology = manager.get_owlapi_manager().loadOntologyFromOntologyDocument(File(file_path)) + self.owlapi_ontology = self.manager.get_owlapi_manager().loadOntologyFromOntologyDocument(File(file_path)) self.mapper = OWLAPIMapper(self) + def __eq__(self, other): + if isinstance(other, SyncOntology): + return other.owlapi_ontology.getOntologyID().equals(other.owlapi_ontology.getOntologyID()) + return False + + def __hash__(self): + return int(self.owlapi_ontology.getOntologyID().hashCode()) + + def __repr__(self): + return (f'SyncOntology:' + f'\t|Tbox|={len(self.get_tbox_axioms())}' + f'\t|Abox|={len(self.get_abox_axioms())}' + f'\t|Individuals|={len(self.individuals_in_signature())}' + f'\t|Classes|={len(self.classes_in_signature())}' + f'\t|Object Properties|={len(self.object_properties_in_signature())}' + f'\t|Data Properties|={len(self.data_properties_in_signature())}' + f'\n{self.manager}\tPath:{self.path}\tNew:{self.new}') + def classes_in_signature(self) -> Iterable[OWLClass]: return self.mapper.map_(self.owlapi_ontology.getClassesInSignature()) @@ -1089,24 +1108,22 @@ def remove_axiom(self, axiom: Union[OWLAxiom, Iterable[OWLAxiom]]): else: self.owlapi_ontology.removeAxioms(self.mapper.map_(axiom)) - def __eq__(self, other): - if isinstance(other, SyncOntology): - return other.owlapi_ontology.getOntologyID().equals(other.owlapi_ontology.getOntologyID()) - return False - - def __hash__(self): - return int(self.owlapi_ontology.getOntologyID().hashCode()) - - def __repr__(self): - return (f'SyncOntology:' - f'\t|Tbox|={len(self.get_tbox_axioms())}' - f'\t|Abox|={len(self.get_abox_axioms())}' - f'\t|Individuals|={len(self.individuals_in_signature())}' - f'\t|Classes|={len(self.classes_in_signature())}' - f'\t|Object Properties|={len(self.object_properties_in_signature())}' - f'\t|Data Properties|={len(self.data_properties_in_signature())}' - f'\n{self.manager}\tPath:{self.path}\tNew:{self.new}') - + def save(self, path:str=None, document_iri: Optional[IRI] = None): + """ + https://github.com/phillord/owl-api/blob/b2a5bfb9a0c6730c8ff950776af8f9bf19c78eac/ + contract/src/test/java/org/coode/owlapi/examples/Examples.java#L206 + """ + assert isinstance(path,str), "Path must be a string" + from java.io import File + import org.semanticweb.owlapi.model.IRI + # //Create a file for the new format + file = File(path) + print(f"Saving Ontology into {path}") + if document_iri is None: + document_iri = org.semanticweb.owlapi.model.IRI.create(file.toURI()) + else: + raise NotImplementedError("document_iri must be None for the time being") + self.manager.saveOntology(self.owlapi_ontology, self.manager.getOntologyFormat(self.owlapi_ontology), document_iri) OWLREADY2_FACET_KEYS = MappingProxyType({ OWLFacet.MIN_INCLUSIVE: "min_inclusive", diff --git a/owlapy/owl_ontology_manager.py b/owlapy/owl_ontology_manager.py index 2df6cdd7..f0d03795 100644 --- a/owlapy/owl_ontology_manager.py +++ b/owlapy/owl_ontology_manager.py @@ -1,3 +1,4 @@ +import os.path from typing import Union import jpype @@ -131,11 +132,19 @@ def create_ontology(self, iri: Union[IRI, str]) -> SyncOntology: assert isinstance(iri, IRI), "iri either must be string or an instance of IRI Class" return SyncOntology(self, iri, new=True) - def load_ontology(self, iri: Union[IRI, str]) -> SyncOntology: - return SyncOntology(self, iri, new=False) + def load_ontology(self, path: str) -> SyncOntology: + assert isinstance(path, str), "path either must be string or an instance of IRI Class" + assert os.path.exists(path), "path does not lead to an RDF knowledge graph." + return SyncOntology(self, path, new=False) def get_owlapi_manager(self): return self.owlapi_manager def apply_change(self, change: AbstractOWLOntologyChange): raise NotImplementedError("A change cannot be applied at the moment.") + + def getOntologyFormat(self,*args): + return self.owlapi_manager.getOntologyFormat(*args) + + def saveOntology(self,*args)->None: + self.owlapi_manager.saveOntology(*args) \ No newline at end of file diff --git a/owlapy/owl_reasoner.py b/owlapy/owl_reasoner.py index 5069338f..f48c5f4d 100644 --- a/owlapy/owl_reasoner.py +++ b/owlapy/owl_reasoner.py @@ -1009,7 +1009,7 @@ def __init__(self, ontology: Union[SyncOntology, str], reasoner="HermiT"): # https://owlcs.github.io/owlapi/apidocs_5/org/semanticweb/owlapi/apibinding/OWLManager.html self.manager = SyncOntologyManager() # OWLOntology - self.ontology = self.manager.load_ontology(iri=ontology) + self.ontology = self.manager.load_ontology(ontology) self._owlapi_manager = self.manager.get_owlapi_manager() self._owlapi_ontology = self.ontology.get_owlapi_ontology() diff --git a/owlapy/util_owl_static_funcs.py b/owlapy/util_owl_static_funcs.py index 56da7ab9..34fe67bc 100644 --- a/owlapy/util_owl_static_funcs.py +++ b/owlapy/util_owl_static_funcs.py @@ -1,4 +1,6 @@ -from .owl_ontology import Ontology +import rdflib + +from .owl_ontology import Ontology, SyncOntology from .owl_ontology_manager import OntologyManager from .class_expression import OWLClassExpression, OWLClass from .owl_individual import OWLNamedIndividual @@ -8,6 +10,8 @@ from .owl_literal import OWLLiteral import os from typing import List +from tqdm import tqdm +import pandas as pd def save_owl_class_expressions(expressions: OWLClassExpression | List[OWLClassExpression], path: str = 'predictions', @@ -60,7 +64,7 @@ def save_owl_class_expressions(expressions: OWLClassExpression | List[OWLClassEx ontology.add_axiom(equivalent_classes_axiom) ontology.save(path=path, inplace=False, rdf_format=rdf_format) -def csv_to_rdf_kg(path_csv:str=None,path_kg:str=None,namespace:str=None): +def csv_to_rdf_kg(path_csv:str=None,path_kg:str=None,namespace:str=None,rdf_format:str=None): """ Transfroms a CSV file to an RDF Knowledge Graph in RDF/XML format. @@ -68,6 +72,7 @@ def csv_to_rdf_kg(path_csv:str=None,path_kg:str=None,namespace:str=None): path_csv (str): X path_kg (str): X namespace (str): X + rdf_format(str):X Raises: AssertionError: @@ -85,30 +90,40 @@ def csv_to_rdf_kg(path_csv:str=None,path_kg:str=None,namespace:str=None): >>> print("Dataset saved as iris_dataset.csv") >>> csv_to_rdf_kg("iris_dataset.csv") """ + from owlapy.owl_ontology_manager import SyncOntologyManager assert path_csv is not None, "path cannot be None" assert os.path.exists(path_csv), f"path **{path_csv}**does not exist." - assert path_kg is not None, "path_kg cannot be None" + assert path_kg is not None, f"path_kg cannot be None.Currently {path_kg}" assert namespace is not None, "namespace cannot be None" assert namespace[:7]=="http://", "First characters of namespace must be 'http://'" - import pandas as pd - # Read the CSV file - df = pd.read_csv(path_csv) + if rdf_format is None: + rdf_format="rdfxml" + else: + assert rdf_format in ["ntriples", "turtle"] + # Initialize an Ontology Manager. - manager = OntologyManager() + manager = SyncOntologyManager() # Create an ontology given an ontology manager. - ontology:Ontology = manager.create_ontology(namespace) + ontology:SyncOntology = manager.create_ontology(namespace) + + # Read the CSV file + df = pd.read_csv(path_csv) # () Iterate over rows - for index, row in df.iterrows(): - print(f"Row {index}:",end="\t") - print(row.to_dict()) - i=OWLNamedIndividual(iri=f"{namespace}#{str(index)}") + for index, row in tqdm(df.iterrows()): + individual=OWLNamedIndividual(f"{namespace}#{str(index)}".replace(" ","_")) for column_name, value in row.to_dict().items(): if isinstance(value, float): - axiom = OWLDataPropertyAssertionAxiom(subject=i, - property_=OWLDataProperty(iri=f"{namespace}#ID_{str(column_name)}"), + # Create an IRI for the predicate + str_property_iri=f"{namespace}#{column_name}".replace(" ","_") + str_property_iri=str_property_iri.replace("(","/") + str_property_iri = str_property_iri.replace(")", "") + + axiom = OWLDataPropertyAssertionAxiom(subject=individual, + property_=OWLDataProperty(iri=str_property_iri), object_=OWLLiteral(value=value)) ontology.add_axiom(axiom) else: raise NotImplementedError(f"How to represent value={value} has not been decided") - ontology.save(path=path_kg, inplace=False) \ No newline at end of file + + ontology.save(path=path_kg) diff --git a/tests/test_owl_static_funcs.py b/tests/test_owl_static_funcs.py index c0626c62..f1bd7771 100644 --- a/tests/test_owl_static_funcs.py +++ b/tests/test_owl_static_funcs.py @@ -9,6 +9,10 @@ import pandas as pd import rdflib +from owlapy.owl_ontology_manager import SyncOntologyManager +from sklearn.datasets import load_iris +import pandas as pd + class TestRunningExamples: def test_readme(self): # Using owl classes to create a complex class expression @@ -28,9 +32,12 @@ def test_readme(self): def test_csv_to_kg(self): data = load_iris() - # Convert to DataFrame df = pd.DataFrame(data.data, columns=data.feature_names) df['target'] = data.target - # Save as CSV df.to_csv("iris_dataset.csv", index=False) - csv_to_rdf_kg("iris_dataset.csv", path_kg="iris_kg.owl", namespace="http://example.com/society") + + assert len(df) == 150 + path_kg = "iris_kg.owl" + csv_to_rdf_kg(path_csv="iris_dataset.csv", path_kg=path_kg, namespace="http://example.com/society") + onto = SyncOntologyManager().load_ontology(path_kg) + assert len(onto.get_abox_axioms()) == 750