Skip to content

Commit

Permalink
Merge pull request #110 from dice-group/csv_kg_categorical
Browse files Browse the repository at this point in the history
A CSV file is mapped to an RDF KGs containing only literals
  • Loading branch information
Demirrr authored Nov 14, 2024
2 parents 6a76e04 + f582927 commit a82f76f
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 20 deletions.
25 changes: 24 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,34 @@ pip3 install owlapy
```shell
# To download RDF knowledge graphs
wget https://files.dice-research.org/projects/Ontolearn/KGs.zip -O ./KGs.zip && unzip KGs.zip
pytest -p no:warnings -x # Running 142 tests ~ 30 secs
pytest -p no:warnings -x # Running 147 tests ~ 35 secs
```

## Examples

### Sklearn to OWL Ontology

<details><summary> Click me! </summary>

```python
from owlapy.owl_ontology_manager import SyncOntologyManager
from owlapy.util_owl_static_funcs import csv_to_rdf_kg
import pandas as pd
from sklearn.datasets import load_iris
data = load_iris()
df = pd.DataFrame(data.data, columns=data.feature_names)
df.to_csv("iris_dataset.csv", index=False)
path_kg = "iris_kg.owl"
# Construct an RDF Knowledge Graph from a CSV file
csv_to_rdf_kg(path_csv="iris_dataset.csv", path_kg=path_kg, namespace="http://owlapy.com/iris")
onto = SyncOntologyManager().load_ontology(path_kg)
assert len(onto.get_abox_axioms()) == 750

```

</details>


### Exploring OWL Ontology

<details><summary> Click me! </summary>
Expand Down
2 changes: 1 addition & 1 deletion owlapy/owl_literal.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,7 @@ def parse_integer(self) -> int:
Returns:
An integer value that is represented by this literal.
"""
raise ValueError
return int(self._v)

def is_string(self) -> bool:
"""Whether this literal is typed as string."""
Expand Down
30 changes: 15 additions & 15 deletions owlapy/util_owl_static_funcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,15 +62,14 @@ def save_owl_class_expressions(expressions: OWLClassExpression | List[OWLClassEx
ontology.add_axiom(equivalent_classes_axiom)
ontology.save(path=path, inplace=False, rdf_format=rdf_format)

def csv_to_rdf_kg(path_csv:str=None,path_kg:str=None,namespace:str=None,rdf_format:str=None):
def csv_to_rdf_kg(path_csv:str=None,path_kg:str=None,namespace:str=None):
"""
Transfroms a CSV file to an RDF Knowledge Graph in RDF/XML format.
Args:
path_csv (str): X
path_kg (str): X
namespace (str): X
rdf_format(str):X
Raises:
AssertionError:
Expand All @@ -94,10 +93,6 @@ def csv_to_rdf_kg(path_csv:str=None,path_kg:str=None,namespace:str=None,rdf_form
assert path_kg is not None, f"path_kg cannot be None.Currently {path_kg}"
assert namespace is not None, "namespace cannot be None"
assert namespace[:7]=="http://", "First characters of namespace must be 'http://'"
if rdf_format is None:
rdf_format="rdfxml"
else:
assert rdf_format in ["ntriples", "turtle"]

# Initialize an Ontology Manager.
manager = SyncOntologyManager()
Expand All @@ -106,22 +101,27 @@ def csv_to_rdf_kg(path_csv:str=None,path_kg:str=None,namespace:str=None,rdf_form

# Read the CSV file
df = pd.read_csv(path_csv)

# () Iterate over rows
for index, row in tqdm(df.iterrows()):
for index, row in (tqdm_bar := tqdm(df.iterrows()) ):
individual=OWLNamedIndividual(f"{namespace}#{str(index)}".replace(" ","_"))
tqdm_bar.set_description_str(f"Creating RDF Graph from Row:{index}")
# column_name is considered as a predicate
# value is considered as a data property
for column_name, value in row.to_dict().items():
if isinstance(value, float):
# Create an IRI for the predicate
str_property_iri=f"{namespace}#{column_name}".replace(" ","_")
str_property_iri=str_property_iri.replace("(","/")
str_property_iri = str_property_iri.replace(")", "")
# Create an IRI for the predicate
str_property_iri = f"{namespace}#{column_name}".replace(" ", "_")
str_property_iri = str_property_iri.replace("(", "/")
str_property_iri = str_property_iri.replace(")", "")

if isinstance(value, float) or isinstance(value, int) or isinstance(value, str):
axiom = OWLDataPropertyAssertionAxiom(subject=individual,
property_=OWLDataProperty(iri=str_property_iri),
object_=OWLLiteral(value=value))
ontology.add_axiom(axiom)

else:
raise NotImplementedError(f"How to represent value={value} has not been decided")

raise NotImplementedError(f"How to represent\n"
f"predicate=**{str_property_iri}**\n"
f"value=**{value}**\n"
f"has not been decided")
ontology.save(path=path_kg)
3 changes: 0 additions & 3 deletions tests/test_owl_static_funcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@
import rdflib

from owlapy.owl_ontology_manager import SyncOntologyManager
from sklearn.datasets import load_iris
import pandas as pd

class TestRunningExamples:
def test_readme(self):
Expand All @@ -35,7 +33,6 @@ def test_csv_to_kg(self):
df = pd.DataFrame(data.data, columns=data.feature_names)
df['target'] = data.target
df.to_csv("iris_dataset.csv", index=False)

assert len(df) == 150
path_kg = "iris_kg.owl"
csv_to_rdf_kg(path_csv="iris_dataset.csv", path_kg=path_kg, namespace="http://example.com/society")
Expand Down

0 comments on commit a82f76f

Please sign in to comment.