Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pathfinder update #246

Merged
merged 2 commits into from
May 24, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 35 additions & 43 deletions src/utils/metakg/path_finder.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@
import logging
from controller.metakg import MetaKG
from model import ConsolidatedMetaKGDoc
# import traceback
# import pprint

logger=logging.basicConfig(level=logging.INFO, filename="missing_bte.log")

Expand Down Expand Up @@ -39,22 +37,15 @@ def get_graph(self, query_data=None):
"""
index = ConsolidatedMetaKGDoc.Index.name
predicates = self.predicates

# Create a new directed graph
self.G = nx.DiGraph()

# Scroll through search results with direct call to index
for doc in MetaKG.get_all_via_scan(size=1000, query_data=query_data, index=index):
# Extract subject, object, and predicate from hit
subject = doc["_source"]["subject"]
object = doc["_source"]["object"]
predicate = doc["_source"]["predicate"]
# make list here to give back full results
api = [api_dict for api_dict in doc["_source"]["api"]]
# Add the subject & object to the graph
self.G.add_edge(subject, object)

# Add the predicate with api data to a dict based on the node relation
key = f"{subject}-{object}"
if key not in predicates:
predicates[key] = []
Expand All @@ -67,34 +58,36 @@ def build_edge_results(self, paths_data, data, api_details, source_node, target_
Adds edge details between two nodes to the paths data structure.

Parameters:
- paths_data (dict): The paths data structure being built up.
- data (dict): Data about the edge, including the predicate and APIs.
- api_details (bool): If True, include full API details; otherwise, include minimal API information.
- source_node (str): Identifier for the source node of the edge.
- target_node (str): Identifier for the target node of the edge.
- paths_data: dict
The paths data structure being built up.
- data: dict
Data about the edge, including the predicate and APIs.
- api_details: bool
If True, include full API details; otherwise, include minimal API information.
- source_node: str
Identifier for the source node of the edge.
- target_node: str
Identifier for the target node of the edge.

Returns:
- dict: The updated paths_data structure with the new edge added.
"""

apis = data["api"]
# # Case: Give full api results in response
if api_details:
api_content = data["api"]
api_content = apis
else:
if bte:
api_content = [{"api": {"name": item["api"].get("name", None), "smartapi": {"id": item["api"]["smartapi"]["id"]}}, "bte":item["bte"]} for item in apis]
api_content = [{"api": {"name": item.get("name", None), "smartapi": {"id": item["smartapi"]["id"]}}, "bte":item["bte"]} for item in apis]
else:
api_content = [{"api": {"name": item["api"].get("name", None), "smartapi": {"id": item["api"]["smartapi"]["id"]}}} for item in apis]

paths_data["edges"].append(
{
"subject": source_node,
"object": target_node,
"predicate": data["predicate"],
"api": api_content,
}
)
api_content = [{"api": {"name": item.get("name", None), "smartapi": {"id": item["smartapi"]["id"]}}} for item in apis]

paths_data["edges"].append({
"subject": source_node,
"object": target_node,
"predicate": data["predicate"],
"api": api_content,
})

return paths_data

Expand All @@ -103,24 +96,26 @@ def get_paths(self, cutoff=2, api_details=False, predicate_filter=None, bte=Fals
Find all simple paths between expanded subjects and objects in the graph.

Parameters:
- expanded_fields: (dict) The expanded fields containing lists of subjects and objects.
- cutoff: (int, default=2) The maximum length for any path returned.
- api_details: (bool, default=False) If True, includes full details of the 'api' in the result.
- predicate_filter: (list, default=None) A list of predicates to filter the results by.
- cutoff: int (default=2)
The maximum length for any path returned.
- api_details: bool (default=False)
If True, includes full details of the 'api' in the result.
- predicate_filter: list (default=None)
A list of predicates to filter the results by.

Returns:
- all_paths_with_edges: (list of dict) A list containing paths and their edge information for all subject-object pairs.
- all_paths_with_edges: list of dict
A list containing paths and their edge information for all subject-object pairs.
"""

all_paths_with_edges = []

# Convert predicate_filter to a set for faster lookups if it's not None
# Predicate Filter Setup
predicate_filter_set = set(predicate_filter) if predicate_filter else None
# Add predicates from expanded_fields['predicate'] if it exists and is not None
if 'predicate' in self.expanded_fields and self.expanded_fields['predicate']:
predicate_filter_set.update(self.expanded_fields['predicate'])

# Iterate over all combinations of subjects and objects
# Graph iteration over subject-object pairs
for subject in self.expanded_fields["subject"]:
for object in self.expanded_fields["object"]:
try:
Expand All @@ -129,24 +124,21 @@ def get_paths(self, cutoff=2, api_details=False, predicate_filter=None, bte=Fals
raw_paths = nx.all_simple_paths(self.G, source=subject, target=object, cutoff=cutoff)
for path in raw_paths:
paths_data = {"path": path, "edges": []}
edge_added = False # Flag to track if any edge has been added
edge_added = False
for i in range(len(path) - 1):
source_node = path[i]
target_node = path[i + 1]
edge_key = f"{source_node}-{target_node}"
edge_data = self.predicates.get(edge_key, [])

for data in edge_data:
# Case: Filter edges based on predicate
if predicate_filter_set and data["predicate"] not in predicate_filter_set:
continue # Skip this edge
continue
paths_data = self.build_edge_results(paths_data, data, api_details, source_node, target_node, bte)
edge_added = True # Mark that we've added at least one edge
if edge_added: # Only add paths_data if at least one edge was added
edge_added = True
if edge_added:
all_paths_with_edges.append(paths_data)
except Exception as e:
# print(f"Error: {e} {e.args}")
# print(traceback.format_exc())
continue # Explicitly continue to the next subject-object pair
continue

return all_paths_with_edges
Loading