Skip to content

Commit

Permalink
Merge pull request #108 from dice-group/csv_kg
Browse files Browse the repository at this point in the history
From CSV to RDF Knowledge Graph
  • Loading branch information
Demirrr authored Nov 14, 2024
2 parents d951634 + 1447e01 commit 6a76e04
Show file tree
Hide file tree
Showing 9 changed files with 165 additions and 70 deletions.
1 change: 1 addition & 0 deletions owlapy/abstracts/abstract_owl_ontology.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,7 @@ def remove_axiom(self, axiom: Union[OWLAxiom, Iterable[OWLAxiom]]):
"""
pass

@abstractmethod
def save(self, document_iri: Optional[IRI] = None):
"""Saves this ontology, using its IRI to determine where/how the ontology should be
saved.
Expand Down
52 changes: 14 additions & 38 deletions owlapy/iri.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""OWL IRI"""
import weakref
from abc import ABCMeta
from typing import Final, Union, overload
from typing import Final, Union
from weakref import WeakKeyDictionary

from owlapy import namespaces
Expand Down Expand Up @@ -49,45 +49,21 @@ def __init__(self, namespace: Union[str, Namespaces], remainder: str=""):
self._namespace = sys.intern(namespace)
self._remainder = remainder

@overload
@staticmethod
def create(namespace: Namespaces, remainder: str) -> 'IRI':
...

@overload
@staticmethod
def create(namespace: str, remainder: str) -> 'IRI':
"""Creates an IRI by concatenating two strings. The full IRI is an IRI that contains the characters in
namespace + remainder.
Args:
namespace: The first string.
remainder: The second string.
Returns:
An IRI whose characters consist of prefix + suffix.
"""
...

@overload
@staticmethod
def create(string: str) -> 'IRI':
"""Creates an IRI from the specified String.
Args:
string: The String that specifies the IRI.
Returns:
The IRI that has the specified string representation.
"""
...

@staticmethod
def create(string, remainder=None) -> 'IRI':
def create(iri:str | Namespaces, remainder:str=None) -> 'IRI':
assert isinstance(iri, str) | isinstance(iri, Namespaces), f"Input must be a string or an instance of Namespaces. Currently, {type(iri)}"
if remainder is not None:
return IRI(string, remainder)
index = 1 + max(string.rfind("/"), string.rfind(":"), string.rfind("#"))
return IRI(string[0:index], string[index:])
assert isinstance(remainder,str), f"Reminder must be string. Currently, {type(remainder)}"
return IRI(iri, remainder)
else:
assert isinstance(iri, str) and remainder is None, \
f"iri must be string if remainder is None. Currently, {type(iri)} and {type(remainder)}"
# Extract reminder from input string
assert "/" in iri, "Input must contain /"
# assert ":" in iri, "Input must contain :"
assert " " not in iri, f"Input must not contain whitespace. Currently:{iri}."
index = 1 + max(iri.rfind("/"), iri.rfind(":"), iri.rfind("#"))
return IRI(iri[0:index], iri[index:])

def __repr__(self):
return f"IRI({repr(self._namespace)}, {repr(self._remainder)})"
Expand Down
3 changes: 3 additions & 0 deletions owlapy/owl_axiom.py
Original file line number Diff line number Diff line change
Expand Up @@ -1037,6 +1037,9 @@ class OWLDataPropertyAssertionAxiom(OWLPropertyAssertionAxiom[OWLDataPropertyExp

def __init__(self, subject: OWLIndividual, property_: OWLDataPropertyExpression, object_: OWLLiteral,
annotations: Optional[Iterable['OWLAnnotation']] = None):
assert isinstance(subject,OWLIndividual), f"subject must be an OWLIndividual. Currently, {subject} of {type(subject)}"
assert isinstance(property_,OWLDataPropertyExpression), f"property_ must be an OWLDataPropertyExpression. Currently, {type(property_)}"
assert isinstance(object_,OWLLiteral), f"object_ must be an OWLLiteral. Currently, {type(object_)}"
super().__init__(subject, property_, object_, annotations)


Expand Down
59 changes: 38 additions & 21 deletions owlapy/owl_ontology.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@
})



_VERSION_IRI: Final = IRI.create(namespaces.OWL, "versionIRI")

_M = TypeVar('_M', bound='OWLOntologyManager') # noqa: F821
Expand Down Expand Up @@ -983,20 +982,40 @@ def __init__(self, manager: _SM, path: Union[IRI, str], new: bool = False):
self.manager = manager
self.path = path
self.new = new


if isinstance(path, IRI):
file_path = path.str
else:
file_path = path
if new: # create new ontology
if isinstance(path, IRI):
self.owlapi_ontology = manager.get_owlapi_manager().createOntology(Stream.empty(),
self.owlapi_ontology = self.manager.get_owlapi_manager().createOntology(Stream.empty(),
owlapi_IRI.create(path.str))
else:
raise NotImplementedError("Cant initialize a new ontology using path. Use IRI instead")
else: # means we are loading an existing ontology
self.owlapi_ontology = manager.get_owlapi_manager().loadOntologyFromOntologyDocument(File(file_path))
self.owlapi_ontology = self.manager.get_owlapi_manager().loadOntologyFromOntologyDocument(File(file_path))
self.mapper = OWLAPIMapper(self)

def __eq__(self, other):
if isinstance(other, SyncOntology):
return other.owlapi_ontology.getOntologyID().equals(other.owlapi_ontology.getOntologyID())
return False

def __hash__(self):
return int(self.owlapi_ontology.getOntologyID().hashCode())

def __repr__(self):
return (f'SyncOntology:'
f'\t|Tbox|={len(self.get_tbox_axioms())}'
f'\t|Abox|={len(self.get_abox_axioms())}'
f'\t|Individuals|={len(self.individuals_in_signature())}'
f'\t|Classes|={len(self.classes_in_signature())}'
f'\t|Object Properties|={len(self.object_properties_in_signature())}'
f'\t|Data Properties|={len(self.data_properties_in_signature())}'
f'\n{self.manager}\tPath:{self.path}\tNew:{self.new}')

def classes_in_signature(self) -> Iterable[OWLClass]:
return self.mapper.map_(self.owlapi_ontology.getClassesInSignature())

Expand Down Expand Up @@ -1089,24 +1108,22 @@ def remove_axiom(self, axiom: Union[OWLAxiom, Iterable[OWLAxiom]]):
else:
self.owlapi_ontology.removeAxioms(self.mapper.map_(axiom))

def __eq__(self, other):
if isinstance(other, SyncOntology):
return other.owlapi_ontology.getOntologyID().equals(other.owlapi_ontology.getOntologyID())
return False

def __hash__(self):
return int(self.owlapi_ontology.getOntologyID().hashCode())

def __repr__(self):
return (f'SyncOntology:'
f'\t|Tbox|={len(self.get_tbox_axioms())}'
f'\t|Abox|={len(self.get_abox_axioms())}'
f'\t|Individuals|={len(self.individuals_in_signature())}'
f'\t|Classes|={len(self.classes_in_signature())}'
f'\t|Object Properties|={len(self.object_properties_in_signature())}'
f'\t|Data Properties|={len(self.data_properties_in_signature())}'
f'\n{self.manager}\tPath:{self.path}\tNew:{self.new}')

def save(self, path:str=None, document_iri: Optional[IRI] = None):
"""
https://github.com/phillord/owl-api/blob/b2a5bfb9a0c6730c8ff950776af8f9bf19c78eac/
contract/src/test/java/org/coode/owlapi/examples/Examples.java#L206
"""
assert isinstance(path,str), "Path must be a string"
from java.io import File
import org.semanticweb.owlapi.model.IRI
# //Create a file for the new format
file = File(path)
print(f"Saving Ontology into {path}")
if document_iri is None:
document_iri = org.semanticweb.owlapi.model.IRI.create(file.toURI())
else:
raise NotImplementedError("document_iri must be None for the time being")
self.manager.saveOntology(self.owlapi_ontology, self.manager.getOntologyFormat(self.owlapi_ontology), document_iri)

OWLREADY2_FACET_KEYS = MappingProxyType({
OWLFacet.MIN_INCLUSIVE: "min_inclusive",
Expand Down
13 changes: 11 additions & 2 deletions owlapy/owl_ontology_manager.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os.path
from typing import Union

import jpype
Expand Down Expand Up @@ -131,11 +132,19 @@ def create_ontology(self, iri: Union[IRI, str]) -> SyncOntology:
assert isinstance(iri, IRI), "iri either must be string or an instance of IRI Class"
return SyncOntology(self, iri, new=True)

def load_ontology(self, iri: Union[IRI, str]) -> SyncOntology:
return SyncOntology(self, iri, new=False)
def load_ontology(self, path: str) -> SyncOntology:
assert isinstance(path, str), "path either must be string or an instance of IRI Class"
assert os.path.exists(path), "path does not lead to an RDF knowledge graph."
return SyncOntology(self, path, new=False)

def get_owlapi_manager(self):
return self.owlapi_manager

def apply_change(self, change: AbstractOWLOntologyChange):
raise NotImplementedError("A change cannot be applied at the moment.")

def getOntologyFormat(self,*args):
return self.owlapi_manager.getOntologyFormat(*args)

def saveOntology(self,*args)->None:
self.owlapi_manager.saveOntology(*args)
2 changes: 1 addition & 1 deletion owlapy/owl_reasoner.py
Original file line number Diff line number Diff line change
Expand Up @@ -1009,7 +1009,7 @@ def __init__(self, ontology: Union[SyncOntology, str], reasoner="HermiT"):
# https://owlcs.github.io/owlapi/apidocs_5/org/semanticweb/owlapi/apibinding/OWLManager.html
self.manager = SyncOntologyManager()
# OWLOntology
self.ontology = self.manager.load_ontology(iri=ontology)
self.ontology = self.manager.load_ontology(ontology)

self._owlapi_manager = self.manager.get_owlapi_manager()
self._owlapi_ontology = self.ontology.get_owlapi_ontology()
Expand Down
82 changes: 76 additions & 6 deletions owlapy/util_owl_static_funcs.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,15 @@
from .owl_ontology import Ontology
from .owl_ontology import Ontology, SyncOntology
from .owl_ontology_manager import OntologyManager
from typing import List
from .class_expression import OWLClassExpression, OWLClass
from .owl_individual import OWLNamedIndividual
from .iri import IRI
from .owl_axiom import OWLEquivalentClassesAxiom
from .owl_axiom import OWLEquivalentClassesAxiom, OWLDataPropertyAssertionAxiom
from .owl_property import OWLDataProperty
from .owl_literal import OWLLiteral
import os
from typing import List
from tqdm import tqdm
import pandas as pd

def save_owl_class_expressions(expressions: OWLClassExpression | List[OWLClassExpression],
path: str = 'predictions',
Expand Down Expand Up @@ -45,13 +51,77 @@ def save_owl_class_expressions(expressions: OWLClassExpression | List[OWLClassEx

namespace= 'https://dice-research.org/predictions#' if namespace is None else namespace
assert "#" == namespace[-1], "namespace must end with #"
# ()
# Initialize an Ontology Manager.
manager = OntologyManager()
# ()
# Create an ontology given an ontology manager.
ontology:Ontology = manager.create_ontology(namespace)
# () Iterate over concepts
for th, i in enumerate(expressions):
cls_a = OWLClass(IRI.create(namespace, str(th)))
equivalent_classes_axiom = OWLEquivalentClassesAxiom([cls_a, i])
ontology.add_axiom(equivalent_classes_axiom)
ontology.save(path=path, inplace=False, rdf_format=rdf_format)
ontology.save(path=path, inplace=False, rdf_format=rdf_format)

def csv_to_rdf_kg(path_csv:str=None,path_kg:str=None,namespace:str=None,rdf_format:str=None):
"""
Transfroms a CSV file to an RDF Knowledge Graph in RDF/XML format.
Args:
path_csv (str): X
path_kg (str): X
namespace (str): X
rdf_format(str):X
Raises:
AssertionError:
Example:
>>> from sklearn.datasets import load_iris
>>> import pandas as pd
# Load the dataset
>>> data = load_iris()
# Convert to DataFrame
>>> df = pd.DataFrame(data.data, columns=data.feature_names)
>>> df['target'] = data.target
# Save as CSV
>>> df.to_csv("iris_dataset.csv", index=False)
>>> print("Dataset saved as iris_dataset.csv")
>>> csv_to_rdf_kg("iris_dataset.csv")
"""
from owlapy.owl_ontology_manager import SyncOntologyManager
assert path_csv is not None, "path cannot be None"
assert os.path.exists(path_csv), f"path **{path_csv}**does not exist."
assert path_kg is not None, f"path_kg cannot be None.Currently {path_kg}"
assert namespace is not None, "namespace cannot be None"
assert namespace[:7]=="http://", "First characters of namespace must be 'http://'"
if rdf_format is None:
rdf_format="rdfxml"
else:
assert rdf_format in ["ntriples", "turtle"]

# Initialize an Ontology Manager.
manager = SyncOntologyManager()
# Create an ontology given an ontology manager.
ontology:SyncOntology = manager.create_ontology(namespace)

# Read the CSV file
df = pd.read_csv(path_csv)
# () Iterate over rows
for index, row in tqdm(df.iterrows()):
individual=OWLNamedIndividual(f"{namespace}#{str(index)}".replace(" ","_"))
for column_name, value in row.to_dict().items():
if isinstance(value, float):
# Create an IRI for the predicate
str_property_iri=f"{namespace}#{column_name}".replace(" ","_")
str_property_iri=str_property_iri.replace("(","/")
str_property_iri = str_property_iri.replace(")", "")

axiom = OWLDataPropertyAssertionAxiom(subject=individual,
property_=OWLDataProperty(iri=str_property_iri),
object_=OWLLiteral(value=value))
ontology.add_axiom(axiom)

else:
raise NotImplementedError(f"How to represent value={value} has not been decided")

ontology.save(path=path_kg)
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
include_package_data=True,
package_data={'owlapy': ['jar_dependencies/*.jar'],},
install_requires=[
"scikit-learn>=1.5.2",
"pandas>=1.5.0",
"requests>=2.32.3",
"rdflib>=6.0.2",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,18 @@
from owlapy.util_owl_static_funcs import save_owl_class_expressions
from owlapy.util_owl_static_funcs import save_owl_class_expressions, csv_to_rdf_kg
from owlapy.class_expression import OWLClass, OWLObjectIntersectionOf, OWLObjectSomeValuesFrom
from owlapy.owl_property import OWLObjectProperty
from owlapy import owl_expression_to_sparql, owl_expression_to_dl
from owlapy.owl_ontology_manager import OntologyManager
from owlapy.owl_axiom import OWLDeclarationAxiom, OWLClassAssertionAxiom
from owlapy.owl_individual import OWLNamedIndividual, IRI
from sklearn.datasets import load_iris
import pandas as pd
import rdflib

from owlapy.owl_ontology_manager import SyncOntologyManager
from sklearn.datasets import load_iris
import pandas as pd

class TestRunningExamples:
def test_readme(self):
# Using owl classes to create a complex class expression
Expand All @@ -22,4 +28,16 @@ def test_readme(self):
path="owl_class_expressions.owl",
rdf_format= 'rdfxml')
g=rdflib.Graph().parse("owl_class_expressions.owl")
assert len(g)==22
assert len(g)==22

def test_csv_to_kg(self):
data = load_iris()
df = pd.DataFrame(data.data, columns=data.feature_names)
df['target'] = data.target
df.to_csv("iris_dataset.csv", index=False)

assert len(df) == 150
path_kg = "iris_kg.owl"
csv_to_rdf_kg(path_csv="iris_dataset.csv", path_kg=path_kg, namespace="http://example.com/society")
onto = SyncOntologyManager().load_ontology(path_kg)
assert len(onto.get_abox_axioms()) == 750

0 comments on commit 6a76e04

Please sign in to comment.