Skip to content

Commit

Permalink
de-duplication of ids
Browse files Browse the repository at this point in the history
  • Loading branch information
joker2411 committed Dec 12, 2024
1 parent 873b8dd commit e1c9a95
Showing 1 changed file with 25 additions and 12 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def __init__(
self.counter = 0
self.logger = logger
self.color_map = {}
self.cluster_specific_id_types = None
self.cluster_specific_id_types = set()

def get_edges_data(self, node_id: str) -> pd.DataFrame:
cluster_query_template = """
Expand Down Expand Up @@ -81,9 +81,11 @@ def get_edges_data(self, node_id: str) -> pd.DataFrame:
def create_graph_with_metadata(self, edges):
G = nx.Graph()
for _, row in edges.iterrows():
G.add_edge(row["id1"], row["id2"])
G.nodes[row["id1"]]["id_type"] = row["id1_type"]
G.nodes[row["id2"]]["id_type"] = row["id2_type"]
node1 = (row["id1"], row["id1_type"])
node2 = (row["id2"], row["id2_type"])
G.add_edge(node1, node2)
G.nodes[node1]["id_type"] = row["id1_type"]
G.nodes[node2]["id_type"] = row["id2_type"]
return G

def compute_graph_metrics(
Expand Down Expand Up @@ -191,13 +193,18 @@ def _visualize_small_graph(self, G: nx.Graph, file_path: str):
for node, attrs in G.nodes(data=True):
color = self._get_node_color(attrs["id_type"], degrees[node], max_degree)
net.add_node(
node,
f"{node[0]}<br>{node[1]}", # complex nodes can't be used as ids in pyvis but they can be used in networkx
label=node[0],
color=color,
title=f"ID: {node}\nID-Type: {attrs['id_type']}\nDegree: {degrees[node]}",
title=f"ID: {node[0]}\nID-Type: {attrs['id_type']}\nDegree: {degrees[node]}",
)

for source, target in G.edges():
net.add_edge(source, target, color="#888888")
net.add_edge(
f"{source[0]}<br>{source[1]}",
f"{target[0]}<br>{target[1]}",
color="#888888",
)

net.set_options(
"""
Expand Down Expand Up @@ -263,18 +270,24 @@ def _visualize_large_graph(self, G: nx.Graph, file_path: str):
color = self._get_node_color(attrs["id_type"], degrees[node], max_degree)
size = 5 + (degrees[node] / max_degree) * 15 # Smaller node sizes
net.add_node(
node,
f"{node[0]}<br>{node[1]}", # complex nodes can't be used as ids in pyvis but they can be used in networkx
label=node[0],
x=int(x),
y=int(y),
physics=False, # Disable physics for pre-positioned nodes
size=size,
color=color,
title=f"ID: {node}\nID-Type: {attrs['id_type']}\nDegree: {degrees[node]}",
title=f"ID: {node[0]}\nID-Type: {attrs['id_type']}\nDegree: {degrees[node]}",
)

print("Adding edges...")
for source, target in G.edges():
net.add_edge(source, target, color="#88888844", width=0.5)
net.add_edge(
f"{source[0]}<br>{source[1]}",
f"{target[0]}<br>{target[1]}",
color="#88888844",
width=0.5,
)

net.set_options(
"""
Expand Down Expand Up @@ -393,11 +406,11 @@ def run(self):
break
print("\n\n")
metrics, G = self._analyse_cluster(user_input)
if metrics is None:
continue
self.cluster_specific_id_types = set(
nx.get_node_attributes(G, "id_type").values()
)
if metrics is None:
continue
cluster_summary = self.get_cluster_summary(metrics)
self.counter += 1
if metrics.get("num_nodes", 0) > 1000:
Expand Down

0 comments on commit e1c9a95

Please sign in to comment.