initial commit for owl_neural_reasoners package + example script

Dependecies added: Ontolearn (for example script) and dicee for KGE
dice-group · Dec 4, 2024 · 2c5c4f7 · 2c5c4f7
1 parent 7972d4f
commit 2c5c4f7
Show file tree

Hide file tree

Showing 6 changed files with 754 additions and 2 deletions.
diff --git a/.gitignore b/.gitignore
@@ -148,4 +148,7 @@ cython_debug/
 .vscode/
 
 # Project related files
-KGs/
+KGs/
+KGs_*
+ALCQHI_Retrieval_Results.csv
+checkpoints/
diff --git a/examples/neural_reasoner_retrieval.py b/examples/neural_reasoner_retrieval.py
@@ -0,0 +1,287 @@
+"""$ python examples/neural_reasoner_retrieval.py --path_kg KGs/Family/father.owl
+
+##################################################
+Description of generated Concepts
+NC denotes the named concepts   |NC|=3
+NNC denotes the negated named concepts  |NNC|=3
+|NC UNION NC|=9
+|NC Intersection NC|=9
+NC* denotes the union of named concepts and negated named concepts      |NC*|=6
+|NC* UNION NC*|=36
+|NC* Intersection NC*|=36
+|exist R* NC*|=12
+|forall R* NC*|=12
+|Max Cardinalities|=36
+|Min Cardinalities|=36
+|exist R* Nominals|=40
+##################################################
+
+Expression: ∃ hasChild⁻.{heinz , stefan , martin} | Jaccard Similarity:1.0000 | F1 :1.0000 | Runtime Benefits:-0.003:   0%|                                                                                 | 0/232 [00:00<?, ?it/s]Invalid IRI detected: Nec8a193431bd4ba6a57485770b54e72b, error: string index out of range
+Expression: male ⊔ (¬female) | Jaccard Similarity:1.0000 | F1 :1.0000 | Runtime Benefits:-0.002: 100%|███████████████████████████████████████████████████████████████████████████████████████████| 232/232 [00:00<00:00, 285.35it/s]
+Type
+OWLClass                    3
+OWLObjectAllValuesFrom     12
+OWLObjectComplementOf       3
+OWLObjectIntersectionOf    45
+OWLObjectMaxCardinality    36
+OWLObjectMinCardinality    36
+OWLObjectSomeValuesFrom    52
+OWLObjectUnionOf           45
+Name: Type, dtype: int64
+                         Jaccard Similarity   F1  Runtime Benefits
+Type
+OWLClass                                1.0  1.0         -0.001484
+OWLObjectAllValuesFrom                  1.0  1.0         -0.004071
+OWLObjectComplementOf                   1.0  1.0         -0.001592
+OWLObjectIntersectionOf                 1.0  1.0         -0.002914
+OWLObjectMaxCardinality                 1.0  1.0          0.000511
+OWLObjectMinCardinality                 1.0  1.0         -0.002798
+OWLObjectSomeValuesFrom                 1.0  1.0         -0.001701
+OWLObjectUnionOf                        1.0  1.0         -0.002939
+
+"""
+from owlapy.owl_neural_reasoners.owl_neural_reasoner import OWLNeuralReasoner
+from ontolearn.knowledge_base import KnowledgeBase
+from ontolearn.triple_store import TripleStore
+from ontolearn.utils import jaccard_similarity, f1_set_similarity, concept_reducer, concept_reducer_properties
+from owlapy.class_expression import (
+    OWLObjectUnionOf,
+    OWLObjectIntersectionOf,
+    OWLObjectSomeValuesFrom,
+    OWLObjectAllValuesFrom,
+    OWLObjectMinCardinality,
+    OWLObjectMaxCardinality,
+    OWLObjectOneOf,
+)
+import time
+from typing import Tuple, Set
+import pandas as pd
+from owlapy import owl_expression_to_dl
+from itertools import chain
+from argparse import ArgumentParser
+import os
+from tqdm import tqdm
+import random
+import itertools
+import ast
+# Set pandas options to ensure full output
+pd.set_option('display.max_rows', None)
+pd.set_option('display.max_columns', None)
+pd.set_option('display.width', None)
+pd.set_option('display.colheader_justify', 'left')
+pd.set_option('display.expand_frame_repr', False)
+
+def execute(args):
+    # (1) Initialize knowledge base.
+    assert os.path.isfile(args.path_kg)
+    if args.endpoint_triple_store:
+        symbolic_kb = TripleStore(url="http://localhost:3030/family")
+    else:
+        symbolic_kb = KnowledgeBase(path=args.path_kg)
+    # (2) Initialize Neural OWL Reasoner.
+    if args.path_kge_model:
+        neural_owl_reasoner = OWLNeuralReasoner(path_neural_embedding=args.path_kge_model, gamma=args.gamma)
+    else:
+        neural_owl_reasoner = OWLNeuralReasoner(path_of_kb=args.path_kg, gamma=args.gamma)
+    # Fix the random seed.
+    random.seed(args.seed)
+    ###################################################################
+    # GENERATE DL CONCEPTS TO EVALUATE RETRIEVAL PERFORMANCES
+    # (3) R: Extract object properties.
+    object_properties = {i for i in symbolic_kb.get_object_properties()}
+
+    # (3.1) Subsample if required.
+    if args.ratio_sample_object_prop and len(object_properties) > 0:
+        object_properties = {i for i in random.sample(population=list(object_properties),
+                                                      k=max(1, int(len(object_properties) * args.ratio_sample_object_prop)))}
+
+    # (4) R⁻: Inverse of object properties.
+    object_properties_inverse = {i.get_inverse_property() for i in object_properties}
+
+    # (5) R*: R UNION R⁻.
+    object_properties_and_inverse = object_properties.union(object_properties_inverse)
+    # (6) NC: Named owl concepts.
+    nc = {i for i in symbolic_kb.get_concepts()}
+
+    if args.ratio_sample_nc and len(nc) > 0:
+        # (6.1) Subsample if required.
+        nc = {i for i in random.sample(population=list(nc), k=max(1, int(len(nc) * args.ratio_sample_nc)))}
+
+    # (7) NC⁻: Complement of NC.
+    nnc = {i.get_object_complement_of() for i in nc}
+
+    # (8) NC*: NC UNION NC⁻.
+    nc_star = nc.union(nnc)
+    # (9) Retrieve 10 random Nominals.
+    if len(symbolic_kb.all_individuals_set())>args.num_nominals:
+        nominals = set(random.sample(symbolic_kb.all_individuals_set(), args.num_nominals))
+    else:
+        nominals = symbolic_kb.all_individuals_set()
+    # (10) All combinations of 3 for Nominals, e.g. {martin, heinz, markus}
+    nominal_combinations = set( OWLObjectOneOf(combination)for combination in itertools.combinations(nominals, 3))
+
+    # (11) NC UNION NC.
+    unions = concept_reducer(nc, opt=OWLObjectUnionOf)
+    # (12) NC INTERSECTION NC.
+    intersections = concept_reducer(nc, opt=OWLObjectIntersectionOf)
+    # (13) NC* UNION NC*.
+    unions_nc_star = concept_reducer(nc_star, opt=OWLObjectUnionOf)
+    # (14) NC* INTERACTION NC*.
+    intersections_nc_star = concept_reducer(nc_star, opt=OWLObjectIntersectionOf)
+    # (15) \exist r. C s.t. C \in NC* and r \in R* .
+    exist_nc_star = concept_reducer_properties(
+        concepts=nc_star,
+        properties=object_properties_and_inverse,
+        cls=OWLObjectSomeValuesFrom,
+    )
+    # (16) \forall r. C s.t. C \in NC* and r \in R* .
+    for_all_nc_star = concept_reducer_properties(
+        concepts=nc_star,
+        properties=object_properties_and_inverse,
+        cls=OWLObjectAllValuesFrom,
+    )
+    # (17) >= n r. C  and =< n r. C, s.t. C \in NC* and r \in R* .
+    min_cardinality_nc_star_1, min_cardinality_nc_star_2, min_cardinality_nc_star_3 = (
+        concept_reducer_properties(
+            concepts=nc_star,
+            properties=object_properties_and_inverse,
+            cls=OWLObjectMinCardinality,
+            cardinality=i,
+        )
+        for i in [1, 2, 3]
+    )
+    max_cardinality_nc_star_1, max_cardinality_nc_star_2, max_cardinality_nc_star_3 = (
+        concept_reducer_properties(
+            concepts=nc_star,
+            properties=object_properties_and_inverse,
+            cls=OWLObjectMaxCardinality,
+            cardinality=i,
+        )
+        for i in [1, 2, 3]
+    )
+    # (18) \exist r. Nominal s.t. Nominal \in Nominals and r \in R* .
+    exist_nominals = concept_reducer_properties(
+        concepts=nominal_combinations,
+        properties=object_properties_and_inverse,
+        cls=OWLObjectSomeValuesFrom,
+    )
+
+    ###################################################################
+
+    # Retrieval Results
+    def concept_retrieval(retriever_func, c) -> Tuple[Set[str], float]:
+        start_time = time.time()
+        return {i.str for i in retriever_func.individuals(c)}, time.time() - start_time
+
+    # () Collect the data.
+    data = []
+    # () Converted to list so that the progress bar works.
+    concepts = list(
+        chain(
+            nc,           # named concepts          (C)
+            nnc,                   # negated named concepts  (\neg C)
+            unions_nc_star,        # A set of Union of named concepts and negat
+            intersections_nc_star, #
+            exist_nc_star,
+            for_all_nc_star,
+            min_cardinality_nc_star_1, min_cardinality_nc_star_1, min_cardinality_nc_star_3,
+            max_cardinality_nc_star_1, max_cardinality_nc_star_2, max_cardinality_nc_star_3,
+            exist_nominals))
+    print("\n")
+    print("#"*50)
+    print("Description of generated Concepts")
+    print(f"NC denotes the named concepts\t|NC|={len(nc)}")
+    print(f"NNC denotes the negated named concepts\t|NNC|={len(nnc)}")
+    print(f"|NC UNION NC|={len(unions)}")
+    print(f"|NC Intersection NC|={len(intersections)}")
+
+    print(f"NC* denotes the union of named concepts and negated named concepts\t|NC*|={len(nc_star)}")
+    print(f"|NC* UNION NC*|={len(unions_nc_star)}")
+    print(f"|NC* Intersection NC*|={len(intersections_nc_star)}")
+    print(f"|exist R* NC*|={len(exist_nc_star)}")
+    print(f"|forall R* NC*|={len(for_all_nc_star)}")
+
+    print(f"|Max Cardinalities|={len(max_cardinality_nc_star_1) + len(max_cardinality_nc_star_2)+ len(max_cardinality_nc_star_3)}")
+    print(f"|Min Cardinalities|={len(min_cardinality_nc_star_1) + len(min_cardinality_nc_star_1)+ len(min_cardinality_nc_star_3)}")
+    print(f"|exist R* Nominals|={len(exist_nominals)}")
+    print("#" * 50,end="\n\n")
+
+
+    # () Shuffled the data so that the progress bar is not influenced by the order of concepts.
+
+    random.shuffle(concepts)
+    # check if csv arleady exists and delete it cause we want to override it
+    if os.path.exists(args.path_report):
+        os.remove(args.path_report)
+    file_exists = False
+    # () Iterate over single OWL Class Expressions in ALCQIHO
+    for expression in (tqdm_bar := tqdm(concepts, position=0, leave=True)):
+        retrieval_y: Set[str]
+        runtime_y: Set[str]
+        # () Retrieve the true set of individuals and elapsed runtime.
+        retrieval_y, runtime_y = concept_retrieval(symbolic_kb, expression)
+        # () Retrieve a set of inferred individuals and elapsed runtime.
+        retrieval_neural_y, runtime_neural_y = concept_retrieval(neural_owl_reasoner, expression)
+        # () Compute the Jaccard similarity.
+        jaccard_sim = jaccard_similarity(retrieval_y, retrieval_neural_y)
+        # () Compute the F1-score.
+        f1_sim = f1_set_similarity(retrieval_y, retrieval_neural_y)
+        # () Store the data.
+        df_row = pd.DataFrame(
+            [{
+                "Expression": owl_expression_to_dl(expression),
+                "Type": type(expression).__name__,
+                "Jaccard Similarity": jaccard_sim,
+                "F1": f1_sim,
+                "Runtime Benefits": runtime_y - runtime_neural_y,
+                "Runtime Neural": runtime_neural_y,
+                "Symbolic_Retrieval": retrieval_y,
+                "Symbolic_Retrieval_Neural": retrieval_neural_y,
+            }])
+        # Append the row to the CSV file
+        df_row.to_csv(args.path_report, mode='a', header=not file_exists, index=False)
+        file_exists = True
+        # () Update the progress bar.
+        tqdm_bar.set_description_str(
+            f"Expression: {owl_expression_to_dl(expression)} | Jaccard Similarity:{jaccard_sim:.4f} | F1 :{f1_sim:.4f} | Runtime Benefits:{runtime_y - runtime_neural_y:.3f}"
+        )
+    # () Read the data into pandas dataframe
+    df = pd.read_csv(args.path_report, index_col=0, converters={'Symbolic_Retrieval': lambda x: ast.literal_eval(x),
+                                                                'Symbolic_Retrieval_Neural': lambda x: ast.literal_eval(x)})
+    # () Assert that the mean Jaccard Similarity meets the threshold
+    assert df["Jaccard Similarity"].mean() >= args.min_jaccard_similarity
+
+    # () Ensure 'Symbolic_Retrieval_Neural' contains sets
+    x = df["Symbolic_Retrieval_Neural"].iloc[0]
+    assert isinstance(x, set)
+
+    # () Extract numerical features
+    numerical_df = df.select_dtypes(include=["number"])
+
+    # () Group by the type of OWL concepts
+    df_g = df.groupby(by="Type")
+    print(df_g["Type"].count())
+
+    # () Compute mean of numerical columns per group
+    mean_df = df_g[numerical_df.columns].mean()
+    print(mean_df)
+    return jaccard_sim, f1_sim
+
+def get_default_arguments():
+    parser = ArgumentParser()
+    parser.add_argument("--path_kg", type=str, default="KGs/Family/family-benchmark_rich_background.owl")
+    parser.add_argument("--path_kge_model", type=str, default=None)
+    parser.add_argument("--endpoint_triple_store", type=str, default=None)
+    parser.add_argument("--gamma", type=float, default=0.9)
+    parser.add_argument("--seed", type=int, default=1)
+    parser.add_argument("--ratio_sample_nc", type=float, default=1, help="To sample OWL Classes.")
+    parser.add_argument("--ratio_sample_object_prop", type=float, default=1, help="To sample OWL Object Properties.")
+    parser.add_argument("--min_jaccard_similarity", type=float, default=0.0, help="Minimum Jaccard similarity to be achieve by the reasoner")
+    parser.add_argument("--num_nominals", type=int, default=10, help="Number of OWL named individuals to be sampled.")
+
+    # H is obtained if the forward chain is applied on KG.
+    parser.add_argument("--path_report", type=str, default="ALCQHI_Retrieval_Results.csv")
+    return parser.parse_args()
+
+if __name__ == "__main__":
+    execute(get_default_arguments())
diff --git a/owlapy/owl_neural_reasoners/__init__.py b/owlapy/owl_neural_reasoners/__init__.py
diff --git a/owlapy/owl_neural_reasoners/abstract.py b/owlapy/owl_neural_reasoners/abstract.py
@@ -0,0 +1,36 @@
+from abc import ABC, abstractmethod
+from typing import List, Tuple, Generator
+from owlapy.class_expression import OWLClassExpression
+
+class AbstractNeuralReasoner(ABC):
+    """Abstract class for Neural Reasoners that operate on OWL Class Expressions using embeddings."""
+
+    @abstractmethod
+    def predict(self, h: str = None, r: str = None, t: str = None) -> List[Tuple[str, float]]:
+        """Predict triples (h, r, t) with a likelihood score."""
+        pass
+
+    @abstractmethod
+    def instances(self, expression: OWLClassExpression, **kwargs) -> Generator:
+        """Retrieve instances of a given OWL class expression."""
+        pass
+
+    @abstractmethod
+    def classes_in_signature(self) -> List:
+        """Retrieve all OWL classes in the knowledge base."""
+        pass
+
+    @abstractmethod
+    def individuals_in_signature(self) -> List:
+        """Retrieve all individuals in the knowledge base."""
+        pass
+
+    @abstractmethod
+    def object_properties_in_signature(self) -> List:
+        """Retrieve all object properties in the knowledge base."""
+        pass
+
+    @abstractmethod
+    def data_properties_in_signature(self) -> List:
+        """Retrieve all data properties in the knowledge base."""
+        pass