diff --git a/src/pages/explore_paths.py b/src/pages/explore_paths.py index a62173e..b3c307a 100644 --- a/src/pages/explore_paths.py +++ b/src/pages/explore_paths.py @@ -1,7 +1,6 @@ import dash -from dash import Dash, dash_table, dcc, html +from dash import Dash, html, dcc, Input, Output, Patch, callback, State, ctx, dash_table, dcc, html import dash_bootstrap_components as dbc -from dash.dependencies import Input, Output, State import pandas as pd import plotly.graph_objects as go @@ -12,8 +11,6 @@ from model.Alarms import Alarms from utils.parquet import Parquet -from utils.helpers import timer - def title(): return f"Search & explore" @@ -21,9 +18,18 @@ def title(): def description(q=None): - return f"Explore the 'Path changed' alarms" + return f"Explore the alarms related to traceroute paths" + pq = Parquet() +alarmsInst = Alarms() +# that period should match the one in the layout, +# as well as the range of the cached data + the period on /site page +dateFrom, dateTo = hp.defaultTimeRange(2) +frames, pivotFrames = alarmsInst.loadData(dateFrom, dateTo) +selected_keys = ['path changed between sites', 'path changed', 'ASN path anomalies'] +changeDf = pq.readFile('parquet/prev_next_asn.parquet') +asn_anomalies = pq.readFile('parquet/frames/ASN_path_anomalies.parquet') dash.register_page( __name__, @@ -34,17 +40,20 @@ def description(q=None): def layout(**other_unknown_query_strings): - period = hp.defaultTimeRange(days=3, datesOnly=True) + global frames, pivotFrames, alarmsInst, selected_keys, changeDf + period_to_display = hp.defaultTimeRange(days=2, datesOnly=True) + sitesDropdownData, asnsDropdownData, sankey_fig, dataTables = load_initial_data(selected_keys, changeDf) + heatmap_fig = create_anomalies_heatmap() return dbc.Row([ dbc.Row([ dbc.Col([ html.Div([ html.Div([ html.H1(f"Short term path deviations between sites"), - html.P('The data is based on the alarms of type "path changed"', style={"font-size": "1.2rem"}) + html.P('The plot shows how ASNs were replaced in the period of 2 days. The data is based on the alarms of type "path changed"', style={"font-size": "1.2rem"}) ], className="l-h-3 p-2"), dcc.Loading( - html.Div(id="asn-sankey"), color='#00245A'), + dcc.Graph(figure=sankey_fig, id="asn-sankey"), color='#00245A'), ], className="boxwithshadow page-cont ml-1 p-1") ], xl=6, lg=12, md=12, sm=12, className=" mb-1 flex-grow-1", ), @@ -52,10 +61,10 @@ def layout(**other_unknown_query_strings): html.Div(id="asn-alarms-container", children=[ html.Div([ html.H1(f"ASN path anomalies"), - html.P('The data is based on the alarms of type "ASN path anomalies', style={"font-size": "1.2rem"}) + html.P('The plot shows new ASNs that appeared between two sites. The data is based on the alarms of type "ASN path anomalies"', style={"font-size": "1.2rem"}) ], className="l-h-3 p-2"), dcc.Loading( - html.Div(create_anomalies_heatmap(period), id="asn-alarms-heatmap", style={"max-width": "1000px", "margin": "0 auto"}), + dcc.Graph(figure=heatmap_fig, id="asn-heatmap", style={"max-width": "1000px", "margin": "0 auto"}), color='#00245A') ], className="boxwithshadow page-cont ml-1 p-1") ], xl=6, lg=12, md=12, sm=12, className="mb-1 flex-grow-1") @@ -67,7 +76,7 @@ def layout(**other_unknown_query_strings): html.H1(f"Search the \"Path changed\" alarms", className="l-h-3 pl-2"), html.P( - f'Alarms generated in the period: {period[0]} - {period[1]} ', + f'Alarms generated in the period: {period_to_display[0]} - {period_to_display[1]} ', style={"padding-left": "1.5%", "font-size": "14px"}) ], align="center", className="text-left pair-details rounded-border-1"), ], justify="start", align="center"), @@ -76,14 +85,14 @@ def layout(**other_unknown_query_strings): html.Br(), dbc.Row([ dbc.Col([ - dcc.Dropdown(multi=True, id='paths-sites-dropdown', + dcc.Dropdown(multi=True, id='paths-sites-dropdown', options=sitesDropdownData, placeholder="Search for a site"), ]), ]), html.Br(), dbc.Row([ dbc.Col([ - dcc.Dropdown(multi=True, id='paths-asn-dropdown', + dcc.Dropdown(multi=True, id='paths-asn-dropdown', options=asnsDropdownData, placeholder="Search ASNs"), ]), ]), @@ -104,7 +113,7 @@ def layout(**other_unknown_query_strings): html.Hr(className="my-2"), html.Br(), dcc.Loading( - html.Div(id='paths-results-table'), + html.Div(id='paths-results-table', children=dataTables), style={'height': '0.5rem'}, color='#00245A') ], className="m-2"), ], className="p-2 site boxwithshadow page-cont mb-1 g-0", justify="center", align="center"), @@ -127,95 +136,195 @@ def colorMap(eventTypes): return paletteDict +def load_initial_data(selected_keys, changeDf): + sitesDropdownData = [] + asnsDropdownData = [] + anomalous_asns = [] + dataTables = [] + + print(len(changeDf)) + + for event in sorted(selected_keys): + if event in frames.keys(): + dataTables.append(generate_tables(frames[event], pivotFrames[event], event, alarmsInst)) + + changeDf['jumpedFrom'] = changeDf['jumpedFrom'].fillna(0).astype(int) + changeDf['diff'] = changeDf['diff'].astype(int) + + for event in sorted(selected_keys): + df = pivotFrames[event] + + if 'asn_list' in df.columns: + anomalous_asns = list(df['asn_list'].explode().unique()) + + if len(df) > 0: + dataTables.append(generate_tables(frames[event], df, event, alarmsInst)) + + sortedDf = changeDf[changeDf['jumpedFrom'] > 0].sort_values('count') + asnsDropdownData = list(set(sortedDf['diff'].unique().tolist() + + sortedDf['jumpedFrom'].unique().tolist())) + asnsDropdownData = list(set(asnsDropdownData + anomalous_asns)) if 'anomalous_asns' in locals() else asnsDropdownData + asnsDropdownData = sorted(asnsDropdownData) + + for s in sorted(pivotFrames['path changed'].tag.unique().tolist()): + sitesDropdownData.append({"label": s.upper(), "value": s.upper()}) + + changeDf.loc[changeDf['jumpedFrom'] == 0] = 'No data' + fig = buildSankey([], [], changeDf) + + return [sitesDropdownData, asnsDropdownData, fig, dataTables] + + @dash.callback( - [ - Output("paths-sites-dropdown", "options"), - Output("paths-asn-dropdown", "options"), - Output('asn-sankey', 'children'), + Output('asn-sankey', 'figure'), + Output('asn-heatmap', 'figure'), Output('paths-results-table', 'children'), - Output('asn-alarms-container', 'style') - ], [ Input("search-button", "n_clicks"), - Input("paths-asn-dropdown", "search_value"), - Input("paths-asn-dropdown", "value"), - Input("paths-sites-dropdown", "search_value"), - Input("paths-sites-dropdown", "value"), ], - State("paths-sites-dropdown", "value"), - State("paths-asn-dropdown", "value") + [ + State("paths-asn-dropdown", "value"), + State("paths-sites-dropdown", "value"), + ], + prevent_initial_call=True ) -def update_output(n_clicks, asn, asnState, sites, sitesState, sitesStateValue, asnStateValue): +def update_figures(n_clicks, asnStateValue, sitesStateValue): + if n_clicks is not None: + sitesState = sitesStateValue if sitesStateValue else [] + asnState = asnStateValue if asnStateValue else [] + global changeDf, asn_anomalies - # that period should match the one in the layout, - # as well as the range of the cached data - period = hp.defaultTimeRange(3) + sankey_fig = buildSankey(sitesState, asnState, changeDf) + heatmap_fig = create_anomalies_heatmap(selected_asns=asnState, selected_sites=sitesState) + datatables = create_data_tables(sitesState, asnState) + return sankey_fig, heatmap_fig, datatables - # Load all data initially - if n_clicks is None: - sitesState = [] - asnState = [] - else: - sitesState = [] if sitesStateValue is None else sitesStateValue - asnState = [] if asnStateValue is None else asnStateValue + return dash.no_update + + +def filterASN(df, selected_asns=[], selected_sites=[]): - alarmsInst = Alarms() - frames, pivotFrames = alarmsInst.loadData(period[0], period[1]) + if selected_asns: + s = df.apply(lambda x: pd.Series(x['asn_list']), axis=1).stack().reset_index(level=1, drop=True) + s.name = 'asn' + df = df.join(s) + df = df[df['asn'].isin(selected_asns)] + df = df.drop('asn', axis=1).drop_duplicates(subset=['alarm_id']) + if selected_sites: + df = df[(df['src_netsite'].isin(selected_sites)) | (df['dest_netsite'].isin(selected_sites))] + + return df + + +def create_data_tables(sitesState, asnState): + global selected_keys, pivotFrames dataTables = [] - sitesDropdownData = [] + for event in sorted(selected_keys): + df = pivotFrames[event] + + df = df[df['tag'].isin(sitesState)] if len(sitesState) > 0 else df + if 'diff' in df.columns and len(asnState) > 0: + df = df[df['diff'].isin(asnState)] + elif 'asn' in df.columns and len(asnState) > 0: + df = df[df['asn'].isin(asnState)] + elif 'asn_list' in df.columns and len(asnState) > 0: + df = df[df['asn_list'].isin(asnState)] + + if 'src_site' in df.columns and 'dest_site' in df.columns and len(sitesState) > 0: + df = df[(df['src_site'].isin(sitesState)) | (df['dest_site'].isin(sitesState))] - if 'path changed between sites' in frames.keys() and 'path changed' in frames.keys(): - selected_keys = ['path changed between sites', 'path changed', 'ASN path anomalies'] - frames = {key: frames[key] for key in selected_keys if key in frames} - pivotFrames = {key: pivotFrames[key] for key in selected_keys if key in pivotFrames} + if len(df) > 0: + dataTables.append(generate_tables(frames[event], df, event, alarmsInst)) - df = pivotFrames['path changed between sites'] - scntdf = df[df['tag'] != ''].groupby('tag')[['id']].count().reset_index().rename(columns={'id': 'cnt', 'tag': 'site'}) + if len(dataTables)==0: + dataTables.append(html.P(f'There are no alarms related to the selected criteria', + style={"padding-left": "1.5%", "font-size": "14px"})) - # sites - graphData = scntdf.copy() - graphData = graphData[graphData['site'].isin(sitesState)] + return html.Div(dataTables) - for s in sorted(pivotFrames['path changed'].tag.unique().tolist()): - sitesDropdownData.append({"label": s.upper(), "value": s.upper()}) - # data tables - for event in sorted(['path changed', 'path changed between sites', 'ASN path anomalies']): - df = pivotFrames[event] + +def create_anomalies_heatmap(selected_asns=[], selected_sites=[]): + global asn_anomalies, dateFrom, dateTo + df = asn_anomalies.copy() + df = df[df['to_date'] >= dateFrom] + df = filterASN(df, selected_asns=selected_asns, selected_sites=selected_sites) + + if len(df) > 0: + # Create a summary table with counts and ASN list per IPv6 and IPv4 + heatmap_summary = df.groupby(['src_netsite', 'dest_netsite', 'ipv6']).agg( + asn_details=('asn_list', lambda x: [item for sublist in x for item in set(sublist)]) + ).reset_index() + + # heatmap_summary['asn_details'] = heatmap_summary['asn_details'].apply(lambda x: ', '.join(map(str, x))) + heatmap_summary['asn_details'] = heatmap_summary['asn_details'].apply(lambda x: list(set(x))) + heatmap_summary['count'] = heatmap_summary['asn_details'].apply(len) + + distinct_pairs = heatmap_summary[['src_netsite', 'dest_netsite']].drop_duplicates() + + # Function to create a formatted string with ASN details and count the total unique ASNs + def format_asn_string_and_total_count(group): + ipv4_asns = group[group['ipv6'] == False]['asn_details'].explode().unique().tolist() + ipv6_asns = group[group['ipv6'] == True]['asn_details'].explode().unique().tolist() - df = df[df['tag'].isin(sitesState)] if len(sitesState) > 0 else df - if 'diff' in df.columns and len(asnState) > 0: - df = df[df['diff'].isin(asnState)] - elif 'asn' in df.columns and len(asnState) > 0: - df = df[df['asn'].isin(asnState)] - elif 'asn_list' in df.columns and len(asnState) > 0: - df = df[df['asn_list'].isin(asnState)] - anomalous_asns = list(df['asn_list'].explode().unique()) + # Create formatted ASN strings + formatted_str = "" + if ipv4_asns: + formatted_str += f"IPv4 -> {ipv4_asns}, \n " + if ipv6_asns: + formatted_str += f"IPv6 -> {ipv6_asns}" - if 'src_site' in df.columns and 'dest_site' in df.columns and len(sitesState) > 0: - df = df[(df['src_site'].isin(sitesState)) | (df['dest_site'].isin(sitesState))] + # Calculate total unique ASNs across both versions + total_unique_asns = len(set(ipv4_asns).union(set(ipv6_asns))) - if len(df) > 0: - dataTables.append(generate_tables(frames[event], df, event, alarmsInst)) - - if len(dataTables)==0: - dataTables.append(html.P(f'There are no alarms related to the selected criteria', - style={"padding-left": "1.5%", "font-size": "14px"})) - - dataTables = html.Div(dataTables) + return formatted_str.strip(), total_unique_asns + # Apply function to calculate ASNs and total count + distinct_pairs[['asn_details_str', 'total_count']] = distinct_pairs.apply( + lambda row: format_asn_string_and_total_count( + heatmap_summary[(heatmap_summary['src_netsite'] == row['src_netsite']) & + (heatmap_summary['dest_netsite'] == row['dest_netsite'])] + ), axis=1, result_type='expand' + ) - # graph - changeDf = pq.readFile('parquet/prev_next_asn.parquet') - asnsDropdownData = [] - container_style = {"display": "none"} + heatmap_pivot = distinct_pairs.pivot(index='src_netsite', columns='dest_netsite', values='total_count').fillna(0) - if len(changeDf) == 0: - fig = go.Figure() + # Create a custom data matrix for ASNs grouped by IPv6 for hover display + custom_data = distinct_pairs.pivot(index='src_netsite', columns='dest_netsite', values='asn_details_str').fillna('') + # Create the heatmap using Plotly + fig = px.imshow( + heatmap_pivot, + labels=dict(x="Destination", y="Source", color="Count"), + color_continuous_scale="BuPu", + text_auto=True + ) + + fig.update_traces(customdata=custom_data.values) + fig.update_traces( + hovertemplate="
".join([ + "Source: %{y}", + "Destination: %{x}", + "ASNs: %{customdata}" + ]) + ) + + # Update layout for better appearance + fig.update_layout( + # title="ASN path anomalies summary", + xaxis_title="Destination", + yaxis_title="Source", + xaxis=dict(title=dict(text="Destination", standoff=20, font=dict(size=16))), + height=600, + plot_bgcolor='rgba(0,0,0,0)', + ) + + else: + fig = px.imshow(pd.DataFrame()) fig.update_layout( + plot_bgcolor='rgba(0,0,0,0)', annotations=[ dict( - text="No data available for the selected criteria.", + text="No data available for the selected criteria", showarrow=False, font=dict(size=16), xref="paper", @@ -225,53 +334,9 @@ def update_output(n_clicks, asn, asnState, sites, sitesState, sitesStateValue, a ) ] ) - else: - changeDf['jumpedFrom'] = changeDf['jumpedFrom'].astype(int) - changeDf['diff'] = changeDf['diff'].astype(int) - - sortedDf = changeDf[changeDf['jumpedFrom'] > 0].sort_values('count') - asnsDropdownData = list(set(sortedDf['diff'].unique().tolist() + - sortedDf['jumpedFrom'].unique().tolist())) - asnsDropdownData = list(set(asnsDropdownData + anomalous_asns)) if 'anomalous_asns' in locals() else asnsDropdownData - - changeDf.loc[changeDf['jumpedFrom'] == 0] = 'No data' - fig = buildSankey(sitesState, asnState, changeDf) - container_style = {"display": "block"} - return [sitesDropdownData, asnsDropdownData, dcc.Graph(figure=fig), dataTables, container_style] - - -def create_anomalies_heatmap(period): - df = pq.readFile('parquet/frames/ASN_path_anomalies.parquet') - # df = df[df['to_date'] >= period[0]] - print(period, len(df)) - # Aggregate the number of connections for the heatmap - heatmap_data = df.groupby(['src_netsite', 'dest_netsite'])['asn_count'].sum().reset_index(name='count') - - # Pivot the data to create a matrix - heatmap_pivot = heatmap_data.pivot(index='src_netsite', columns='dest_netsite', values='count').fillna(0) - - # Plot with the custom color scale - fig = px.imshow( - heatmap_pivot, - labels=dict(x="Destination", y="Source", color="Count"), - color_continuous_scale="BuPu", - text_auto=True - ) - - # Update layout for better appearance - fig.update_layout( - xaxis_title="Destination", - yaxis_title="Source", - autosize=True, # Enable responsive autosizing - height=600, # Let the container height define the plot size - width=None, # Let the container width define the plot size - # margin=dict(t=10, b=180, l=70, r=0), - coloraxis_showscale=True - ) - - return dcc.Graph(figure=fig) + return fig # '''Takes the sites from the dropdown list and generates a Dash datatable''' @@ -335,6 +400,8 @@ def addNetworkOwners(df, labels): # ''' Prepares the data for the Sankey diagram''' def data4Sankey(sandf): + print(len(sandf)) + print(sandf) typical = [f't{n}' for n in sandf['jumpedFrom'].unique().tolist()] diff = [f'd{n}' for n in sandf['diff'].unique().tolist()] src = [f'src_{n}' for n in sandf['src_site'].unique().tolist()] @@ -376,7 +443,7 @@ def data4Sankey(sandf): # '''Creates a Sankey diagram''' def buildSankey(sitesState, asnState, df): - + print(len(df), sitesState, asnState) if len(sitesState) > 0 and len(asnState) > 0: df = df[((df['src_site'].isin(sitesState)) | (df['dest_site'].isin(sitesState))) & ((df['jumpedFrom'].isin(asnState)) | @@ -391,52 +458,69 @@ def buildSankey(sitesState, asnState, df): labels, sources, targets, vals, customdata = data4Sankey(df) - fig = go.Figure(data=[go.Sankey( - node=dict( - pad=15, - thickness=20, - line=dict(color="grey", width=0.5), - label=labels, - customdata=customdata, - hovertemplate='%{customdata}', - color="rgb(4, 111, 137)" - ), - link=dict( - # indices correspond to labels - source=sources, - target=targets, - value=vals - ))]) - - for x_coordinate, column_name in enumerate(["Source site", "Previously used ASN", "New ASN", "Desination site"]): - fig.add_annotation( - x=x_coordinate, - y=1.15, - xref="x", - yref="paper", - text=column_name, - showarrow=False, - font=dict( - size=16, + if len(df) > 0: + fig = go.Figure(data=[go.Sankey( + node=dict( + pad=15, + thickness=20, + line=dict(color="grey", width=0.5), + label=labels, + customdata=customdata, + hovertemplate='%{customdata}', + color="rgb(4, 111, 137)" ), - # align="center", - ) + link=dict( + # indices correspond to labels + source=sources, + target=targets, + value=vals + ))]) + + for x_coordinate, column_name in enumerate(["Source site", "Previously used ASN", "New ASN", "Desination site"]): + fig.add_annotation( + x=x_coordinate, + y=1.15, + xref="x", + yref="paper", + text=column_name, + showarrow=False, + font=dict( + size=16, + ), + # align="center", + ) - fig.update_layout( - height=600, - # title_text=f"Short term path deviations between sites", - xaxis={ - 'showgrid': False, # thin lines in the background - 'zeroline': False, # thick line at x=0 - 'visible': False, # numbers below - }, - yaxis={ - 'showgrid': False, # thin lines in the background - 'zeroline': False, # thick line at x=0 - 'visible': False, # numbers below - }, - # margin=dict(b=2, l=0, r=0), - plot_bgcolor='rgba(0,0,0,0)', - font_size=10) + fig.update_layout( + height=600, + # title_text=f"Short term path deviations between sites", + xaxis={ + 'showgrid': False, # thin lines in the background + 'zeroline': False, # thick line at x=0 + 'visible': False, # numbers below + }, + yaxis={ + 'showgrid': False, # thin lines in the background + 'zeroline': False, # thick line at x=0 + 'visible': False, # numbers below + }, + # margin=dict(b=2, l=0, r=0), + plot_bgcolor='rgba(0,0,0,0)', + font_size=10) + else: + fig = go.Figure(data=[go.Sankey()]) + fig.update_layout( + annotations=[ + dict( + text="No data available for the selected criteria or there was no AS number previuosly used at the position of the new ASN.", + showarrow=False, + font=dict(size=16), + xref="paper", + yref="paper", + x=0.5, + y=0.5, + ) + ] + ) return fig +