Skip to content

Commit

Permalink
CSV to RDF via OWLAPI is implemented. Overloads of IRI.create() are c…
Browse files Browse the repository at this point in the history
…ompressed in a single create with assertion checks
  • Loading branch information
Demirrr committed Nov 14, 2024
1 parent 0632617 commit 86933aa
Show file tree
Hide file tree
Showing 7 changed files with 104 additions and 79 deletions.
1 change: 1 addition & 0 deletions owlapy/abstracts/abstract_owl_ontology.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,7 @@ def remove_axiom(self, axiom: Union[OWLAxiom, Iterable[OWLAxiom]]):
"""
pass

@abstractmethod
def save(self, document_iri: Optional[IRI] = None):
"""Saves this ontology, using its IRI to determine where/how the ontology should be
saved.
Expand Down
50 changes: 13 additions & 37 deletions owlapy/iri.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,45 +49,21 @@ def __init__(self, namespace: Union[str, Namespaces], remainder: str=""):
self._namespace = sys.intern(namespace)
self._remainder = remainder

@overload
@staticmethod
def create(namespace: Namespaces, remainder: str) -> 'IRI':
...

@overload
@staticmethod
def create(namespace: str, remainder: str) -> 'IRI':
"""Creates an IRI by concatenating two strings. The full IRI is an IRI that contains the characters in
namespace + remainder.
Args:
namespace: The first string.
remainder: The second string.
Returns:
An IRI whose characters consist of prefix + suffix.
"""
...

@overload
@staticmethod
def create(string: str) -> 'IRI':
"""Creates an IRI from the specified String.
Args:
string: The String that specifies the IRI.
Returns:
The IRI that has the specified string representation.
"""
...

@staticmethod
def create(string, remainder=None) -> 'IRI':
def create(iri:str | Namespaces, remainder:str=None) -> 'IRI':
assert isinstance(iri, str) | isinstance(iri, Namespaces), f"Input must be a string or an instance of Namespaces. Currently, {type(iri)}"
if remainder is not None:
return IRI(string, remainder)
index = 1 + max(string.rfind("/"), string.rfind(":"), string.rfind("#"))
return IRI(string[0:index], string[index:])
assert isinstance(remainder,str), f"Reminder must be string. Currently, {type(remainder)}"
return IRI(iri, remainder)
else:
assert isinstance(iri, str) and remainder is None, \
f"iri must be string if remainder is None. Currently, {type(iri)} and {type(remainder)}"
# Extract reminder from input string
assert "/" in iri, "Input must contain /"
# assert ":" in iri, "Input must contain :"
assert " " not in iri, f"Input must not contain whitespace. Currently:{iri}."
index = 1 + max(iri.rfind("/"), iri.rfind(":"), iri.rfind("#"))
return IRI(iri[0:index], iri[index:])

def __repr__(self):
return f"IRI({repr(self._namespace)}, {repr(self._remainder)})"
Expand Down
59 changes: 38 additions & 21 deletions owlapy/owl_ontology.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@
})



_VERSION_IRI: Final = IRI.create(namespaces.OWL, "versionIRI")

_M = TypeVar('_M', bound='OWLOntologyManager') # noqa: F821
Expand Down Expand Up @@ -983,20 +982,40 @@ def __init__(self, manager: _SM, path: Union[IRI, str], new: bool = False):
self.manager = manager
self.path = path
self.new = new


if isinstance(path, IRI):
file_path = path.str
else:
file_path = path
if new: # create new ontology
if isinstance(path, IRI):
self.owlapi_ontology = manager.get_owlapi_manager().createOntology(Stream.empty(),
self.owlapi_ontology = self.manager.get_owlapi_manager().createOntology(Stream.empty(),
owlapi_IRI.create(path.str))
else:
raise NotImplementedError("Cant initialize a new ontology using path. Use IRI instead")
else: # means we are loading an existing ontology
self.owlapi_ontology = manager.get_owlapi_manager().loadOntologyFromOntologyDocument(File(file_path))
self.owlapi_ontology = self.manager.get_owlapi_manager().loadOntologyFromOntologyDocument(File(file_path))
self.mapper = OWLAPIMapper(self)

def __eq__(self, other):
if isinstance(other, SyncOntology):
return other.owlapi_ontology.getOntologyID().equals(other.owlapi_ontology.getOntologyID())
return False

def __hash__(self):
return int(self.owlapi_ontology.getOntologyID().hashCode())

def __repr__(self):
return (f'SyncOntology:'
f'\t|Tbox|={len(self.get_tbox_axioms())}'
f'\t|Abox|={len(self.get_abox_axioms())}'
f'\t|Individuals|={len(self.individuals_in_signature())}'
f'\t|Classes|={len(self.classes_in_signature())}'
f'\t|Object Properties|={len(self.object_properties_in_signature())}'
f'\t|Data Properties|={len(self.data_properties_in_signature())}'
f'\n{self.manager}\tPath:{self.path}\tNew:{self.new}')

def classes_in_signature(self) -> Iterable[OWLClass]:
return self.mapper.map_(self.owlapi_ontology.getClassesInSignature())

Expand Down Expand Up @@ -1089,24 +1108,22 @@ def remove_axiom(self, axiom: Union[OWLAxiom, Iterable[OWLAxiom]]):
else:
self.owlapi_ontology.removeAxioms(self.mapper.map_(axiom))

def __eq__(self, other):
if isinstance(other, SyncOntology):
return other.owlapi_ontology.getOntologyID().equals(other.owlapi_ontology.getOntologyID())
return False

def __hash__(self):
return int(self.owlapi_ontology.getOntologyID().hashCode())

def __repr__(self):
return (f'SyncOntology:'
f'\t|Tbox|={len(self.get_tbox_axioms())}'
f'\t|Abox|={len(self.get_abox_axioms())}'
f'\t|Individuals|={len(self.individuals_in_signature())}'
f'\t|Classes|={len(self.classes_in_signature())}'
f'\t|Object Properties|={len(self.object_properties_in_signature())}'
f'\t|Data Properties|={len(self.data_properties_in_signature())}'
f'\n{self.manager}\tPath:{self.path}\tNew:{self.new}')

def save(self, path:str=None, document_iri: Optional[IRI] = None):
"""
https://github.com/phillord/owl-api/blob/b2a5bfb9a0c6730c8ff950776af8f9bf19c78eac/
contract/src/test/java/org/coode/owlapi/examples/Examples.java#L206
"""
assert isinstance(path,str), "Path must be a string"
from java.io import File
import org.semanticweb.owlapi.model.IRI
# //Create a file for the new format
file = File(path)
print(f"Saving Ontology into {path}")
if document_iri is None:
document_iri = org.semanticweb.owlapi.model.IRI.create(file.toURI())
else:
raise NotImplementedError("document_iri must be None for the time being")
self.manager.saveOntology(self.owlapi_ontology, self.manager.getOntologyFormat(self.owlapi_ontology), document_iri)

OWLREADY2_FACET_KEYS = MappingProxyType({
OWLFacet.MIN_INCLUSIVE: "min_inclusive",
Expand Down
13 changes: 11 additions & 2 deletions owlapy/owl_ontology_manager.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os.path
from typing import Union

import jpype
Expand Down Expand Up @@ -131,11 +132,19 @@ def create_ontology(self, iri: Union[IRI, str]) -> SyncOntology:
assert isinstance(iri, IRI), "iri either must be string or an instance of IRI Class"
return SyncOntology(self, iri, new=True)

def load_ontology(self, iri: Union[IRI, str]) -> SyncOntology:
return SyncOntology(self, iri, new=False)
def load_ontology(self, path: str) -> SyncOntology:
assert isinstance(path, str), "path either must be string or an instance of IRI Class"
assert os.path.exists(path), "path does not lead to an RDF knowledge graph."
return SyncOntology(self, path, new=False)

def get_owlapi_manager(self):
return self.owlapi_manager

def apply_change(self, change: AbstractOWLOntologyChange):
raise NotImplementedError("A change cannot be applied at the moment.")

def getOntologyFormat(self,*args):
return self.owlapi_manager.getOntologyFormat(*args)

def saveOntology(self,*args)->None:
self.owlapi_manager.saveOntology(*args)
2 changes: 1 addition & 1 deletion owlapy/owl_reasoner.py
Original file line number Diff line number Diff line change
Expand Up @@ -1009,7 +1009,7 @@ def __init__(self, ontology: Union[SyncOntology, str], reasoner="HermiT"):
# https://owlcs.github.io/owlapi/apidocs_5/org/semanticweb/owlapi/apibinding/OWLManager.html
self.manager = SyncOntologyManager()
# OWLOntology
self.ontology = self.manager.load_ontology(iri=ontology)
self.ontology = self.manager.load_ontology(ontology)

self._owlapi_manager = self.manager.get_owlapi_manager()
self._owlapi_ontology = self.ontology.get_owlapi_ontology()
Expand Down
45 changes: 30 additions & 15 deletions owlapy/util_owl_static_funcs.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
from .owl_ontology import Ontology
import rdflib

from .owl_ontology import Ontology, SyncOntology
from .owl_ontology_manager import OntologyManager
from .class_expression import OWLClassExpression, OWLClass
from .owl_individual import OWLNamedIndividual
Expand All @@ -8,6 +10,8 @@
from .owl_literal import OWLLiteral
import os
from typing import List
from tqdm import tqdm
import pandas as pd

def save_owl_class_expressions(expressions: OWLClassExpression | List[OWLClassExpression],
path: str = 'predictions',
Expand Down Expand Up @@ -60,14 +64,15 @@ def save_owl_class_expressions(expressions: OWLClassExpression | List[OWLClassEx
ontology.add_axiom(equivalent_classes_axiom)
ontology.save(path=path, inplace=False, rdf_format=rdf_format)

def csv_to_rdf_kg(path_csv:str=None,path_kg:str=None,namespace:str=None):
def csv_to_rdf_kg(path_csv:str=None,path_kg:str=None,namespace:str=None,rdf_format:str=None):
"""
Transfroms a CSV file to an RDF Knowledge Graph in RDF/XML format.
Args:
path_csv (str): X
path_kg (str): X
namespace (str): X
rdf_format(str):X
Raises:
AssertionError:
Expand All @@ -85,30 +90,40 @@ def csv_to_rdf_kg(path_csv:str=None,path_kg:str=None,namespace:str=None):
>>> print("Dataset saved as iris_dataset.csv")
>>> csv_to_rdf_kg("iris_dataset.csv")
"""
from owlapy.owl_ontology_manager import SyncOntologyManager
assert path_csv is not None, "path cannot be None"
assert os.path.exists(path_csv), f"path **{path_csv}**does not exist."
assert path_kg is not None, "path_kg cannot be None"
assert path_kg is not None, f"path_kg cannot be None.Currently {path_kg}"
assert namespace is not None, "namespace cannot be None"
assert namespace[:7]=="http://", "First characters of namespace must be 'http://'"
import pandas as pd
# Read the CSV file
df = pd.read_csv(path_csv)
if rdf_format is None:
rdf_format="rdfxml"
else:
assert rdf_format in ["ntriples", "turtle"]

# Initialize an Ontology Manager.
manager = OntologyManager()
manager = SyncOntologyManager()
# Create an ontology given an ontology manager.
ontology:Ontology = manager.create_ontology(namespace)
ontology:SyncOntology = manager.create_ontology(namespace)

# Read the CSV file
df = pd.read_csv(path_csv)
# () Iterate over rows
for index, row in df.iterrows():
print(f"Row {index}:",end="\t")
print(row.to_dict())
i=OWLNamedIndividual(iri=f"{namespace}#{str(index)}")
for index, row in tqdm(df.iterrows()):
individual=OWLNamedIndividual(f"{namespace}#{str(index)}".replace(" ","_"))
for column_name, value in row.to_dict().items():
if isinstance(value, float):
axiom = OWLDataPropertyAssertionAxiom(subject=i,
property_=OWLDataProperty(iri=f"{namespace}#ID_{str(column_name)}"),
# Create an IRI for the predicate
str_property_iri=f"{namespace}#{column_name}".replace(" ","_")
str_property_iri=str_property_iri.replace("(","/")
str_property_iri = str_property_iri.replace(")", "")

axiom = OWLDataPropertyAssertionAxiom(subject=individual,
property_=OWLDataProperty(iri=str_property_iri),
object_=OWLLiteral(value=value))
ontology.add_axiom(axiom)

else:
raise NotImplementedError(f"How to represent value={value} has not been decided")
ontology.save(path=path_kg, inplace=False)

ontology.save(path=path_kg)
13 changes: 10 additions & 3 deletions tests/test_owl_static_funcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@
import pandas as pd
import rdflib

from owlapy.owl_ontology_manager import SyncOntologyManager
from sklearn.datasets import load_iris
import pandas as pd

class TestRunningExamples:
def test_readme(self):
# Using owl classes to create a complex class expression
Expand All @@ -28,9 +32,12 @@ def test_readme(self):

def test_csv_to_kg(self):
data = load_iris()
# Convert to DataFrame
df = pd.DataFrame(data.data, columns=data.feature_names)
df['target'] = data.target
# Save as CSV
df.to_csv("iris_dataset.csv", index=False)
csv_to_rdf_kg("iris_dataset.csv", path_kg="iris_kg.owl", namespace="http://example.com/society")

assert len(df) == 150
path_kg = "iris_kg.owl"
csv_to_rdf_kg(path_csv="iris_dataset.csv", path_kg=path_kg, namespace="http://example.com/society")
onto = SyncOntologyManager().load_ontology(path_kg)
assert len(onto.get_abox_axioms()) == 750

0 comments on commit 86933aa

Please sign in to comment.