19
19
import numpy as np
20
20
import matplotlib .pyplot as plt
21
21
22
+ import numpy as np
23
+ import matplotlib .pyplot as plt
24
+
22
25
from ipyfilechooser import FileChooser
23
26
from enum import Enum
24
27
from copy import copy
57
60
SPARQL_EXPLAIN_MODES , OPENCYPHER_EXPLAIN_MODES , GREMLIN_EXPLAIN_MODES , \
58
61
OPENCYPHER_PLAN_CACHE_MODES , OPENCYPHER_DEFAULT_TIMEOUT , OPENCYPHER_STATUS_STATE_MODES , \
59
62
normalize_service_name , NEPTUNE_DB_SERVICE_NAME , NEPTUNE_ANALYTICS_SERVICE_NAME , GRAPH_PG_INFO_METRICS , TRAVERSAL_DIRECTIONS , \
63
+ normalize_service_name , NEPTUNE_DB_SERVICE_NAME , NEPTUNE_ANALYTICS_SERVICE_NAME , GRAPH_PG_INFO_METRICS , TRAVERSAL_DIRECTIONS , \
60
64
GREMLIN_PROTOCOL_FORMATS , DEFAULT_HTTP_PROTOCOL , DEFAULT_WS_PROTOCOL , GRAPHSONV4_UNTYPED , \
61
65
GREMLIN_SERIALIZERS_WS , get_gremlin_serializer_mime , normalize_protocol_name , generate_snapshot_name )
62
66
from graph_notebook .network import SPARQLNetwork
@@ -3928,15 +3932,29 @@ def handle_opencypher_status(self, line, local_ns):
3928
3932
3929
3933
3930
3934
3931
- # %degreeDistribution. Takes traversalDirection, vertexLabels, edgeLabels parameters, and visualizes
3932
- # the degree distribution.
3935
+ # %degreeDistribution magic command.
3936
+ # It obtains the degree distribution of a graph in the form of a visual histogram in notebook. Histogram simply
3937
+ # shows the number of vertices with a given degree, where degree is shown on the x-axis and the count on y-axis.
3938
+ # It takes traversalDirection [both (default), inbound, outbound], vertexLabels [default is empty list],
3939
+ # edgeLabels parameters [default is empty list], and then gives the histogram for the specified degree
3940
+ # (both/in/out) distribution of the vertices in the graph filtered by the specified vertex labels and edge
3941
+ # labels. Parameters can be defined as command line argument and/or through the dropdown widgets.
3942
+ # Example usages:
3943
+ # > %degreeDistribution
3944
+ # > %degreeDistribution --traversalDirection inbound
3945
+ # > %degreeDistribution --traversalDirection inbound --vertexLabels airport country
3946
+
3933
3947
# TODO: Error handling
3934
3948
3935
3949
@line_magic
3936
3950
@needs_local_scope
3937
3951
@display_exceptions
3938
3952
@neptune_graph_only
3939
3953
def degreeDistribution (self , line , local_ns : dict = None ):
3954
+ if not self .client .is_analytics_domain ():
3955
+ print ("This command is only supported for Neptune Analytics domains." )
3956
+ return
3957
+
3940
3958
parser = argparse .ArgumentParser ()
3941
3959
3942
3960
# Get the vertexLabels and edgeLabels from graph summary, to be shown in the widgets for selection.
@@ -3950,18 +3968,23 @@ def degreeDistribution(self, line, local_ns: dict = None):
3950
3968
print (f"Error retrieving graph summary: { e } " )
3951
3969
return
3952
3970
3953
- # traversalDirection parameter
3971
+ # traversalDirection: Type of the degree computed:
3972
+ # - inbound: Counts only the incoming edges for each vertex
3973
+ # - outbound: Counts only the outgoing edges for each vertex
3974
+ # - both [default]: Counts both the incoming and outgoing edges for each vertex.
3954
3975
parser .add_argument ('--traversalDirection' , nargs = '?' , type = str .lower , default = 'both' ,
3955
3976
help = f'Type of the degree for which the distribution is shown. Valid inputs: { TRAVERSAL_DIRECTIONS } . '
3956
3977
f'Default: both.' ,
3957
3978
choices = TRAVERSAL_DIRECTIONS )
3958
3979
3959
- # vertexLabels parameter
3980
+ # vertexLabels: List of the vertex labels, space separated, for which the degrees are computed:
3981
+ # - default value is empty list, which means the degrees are computed for any vertex label.
3960
3982
parser .add_argument ('--vertexLabels' , nargs = '*' , default = [],
3961
3983
help = "The vertex labels for which the induced graph is considered and the degree distribution is shown. "
3962
3984
"If not supplied, we will default to using all the vertex labels." )
3963
3985
3964
- # edgeLabels parameter
3986
+ # edgeLabels: List of the edge labels, space separated, for which the degrees are computed:
3987
+ # - default value is empty list, which means the degrees are computed for any edge label.
3965
3988
parser .add_argument ('--edgeLabels' , nargs = '*' , default = [],
3966
3989
help = "The edge labels for which the degree distribution is shown. If not supplied, "
3967
3990
"we will default to using all the edge labels." )
@@ -3973,7 +3996,8 @@ def degreeDistribution(self, line, local_ns: dict = None):
3973
3996
3974
3997
args = parser .parse_args (line .split ())
3975
3998
3976
- # Put the selection specified on the command line, if any; o.w. default is 'both'
3999
+ # If the traversalDirection parameter selection is specified on the command line, it is shown as the default
4000
+ # in the dropdown menu. Othweise, the default in the dropdown is 'both'
3977
4001
td_val = args .traversalDirection
3978
4002
td_val = td_val .lower () if td_val else 'both'
3979
4003
@@ -3985,7 +4009,9 @@ def degreeDistribution(self, line, local_ns: dict = None):
3985
4009
value = td_val
3986
4010
)
3987
4011
3988
- # Put the vertex label(s) specified on the command line, if any; o.w. default is all the vertex labels (denoted by [])
4012
+ # Existing vertex labels in the graph are shown in the dropdown menu. If any vertex label is specified on
4013
+ # the command line, they are shown to be selected in the dropdown menu. Otherwise, no label is selected
4014
+ # in the dropdown menu, which means any label and all the labels are considered in the computation.
3989
4015
available_vertex_labels = sorted (available_vertex_labels )
3990
4016
selected_vlabels = args .vertexLabels if args .vertexLabels else []
3991
4017
vertex_labels_select = widgets .SelectMultiple (
@@ -3996,7 +4022,9 @@ def degreeDistribution(self, line, local_ns: dict = None):
3996
4022
value = selected_vlabels
3997
4023
)
3998
4024
3999
- # Put the edge label(s) specified on the command line, if any; o.w. default is all the edge labels (denoted by [])
4025
+ # Existing edge labels in the graph are shown in the dropdown menu. If any edge label is specified on
4026
+ # the command line, they are shown to be selected in the dropdown menu. Otherwise, no label is selected
4027
+ # in the dropdown menu, which means any label and all the labels are considered in the computation.
4000
4028
available_edge_labels = sorted (available_edge_labels )
4001
4029
selected_elabels = args .edgeLabels if args .edgeLabels else []
4002
4030
edge_labels_select = widgets .SelectMultiple (
@@ -4024,7 +4052,7 @@ def on_button_clicked(b):
4024
4052
4025
4053
# Call the function with the selected parameters
4026
4054
with output :
4027
- res = self .callDD (td , vlabels , elabels , local_ns )
4055
+ res = self .execute_degree_distribution_query (td , vlabels , elabels , local_ns )
4028
4056
4029
4057
# Retrieve the distribution
4030
4058
pairs = np .array (res ['results' ][0 ]['output' ]['distribution' ])
@@ -4041,7 +4069,7 @@ def on_button_clicked(b):
4041
4069
4042
4070
submit_button .on_click (on_button_clicked )
4043
4071
4044
- def callDD (self , td , vlabels , elabels , local_ns ):
4072
+ def execute_degree_distribution_query (self , td , vlabels , elabels , local_ns ):
4045
4073
query_parts = [f'traversalDirection: "{ td } "' ]
4046
4074
4047
4075
if vlabels :
@@ -4055,8 +4083,7 @@ def callDD (self, td, vlabels, elabels, local_ns):
4055
4083
# Construct the query
4056
4084
line = "CALL neptune.algo.degreeDistribution({" + ", " .join (query_parts ) + "}) YIELD output RETURN output"
4057
4085
4058
- # oc_rebuild_args = (f"{f'--store-to js --silent'}")
4059
- oc_rebuild_args = (f"{ f'--store-to js' } " )
4086
+ oc_rebuild_args = (f"{ f'--store-to js --silent' } " )
4060
4087
4061
4088
self .handle_opencypher_query (oc_rebuild_args , line , local_ns )
4062
4089
@@ -4068,14 +4095,17 @@ def plot_interactive_degree_distribution(self, unique_degrees, counts, max_deg,
4068
4095
min_deg = 0
4069
4096
4070
4097
def update_plot (scale_type , bin_type , bin_width , y_max , x_range , show_mindeg , show_maxdeg ):
4071
- marker_size = 50
4072
- alpha = 0.6
4098
+ # Start timing
4099
+ start_time = time .time ()
4100
+
4101
+ alpha = 1
4073
4102
plt .clf ()
4074
4103
4075
4104
# Get zero degree count
4076
4105
zero_idx = np .where (unique_degrees == 0 )[0 ]
4077
4106
zero_degree_count = counts [zero_idx [0 ]] if len (zero_idx ) > 0 else 0
4078
4107
4108
+ isolateds_exist = zero_degree_count > 0
4079
4109
# Get non-zero degrees and counts
4080
4110
mask = unique_degrees > 0
4081
4111
filtered_degrees = unique_degrees [mask ]
@@ -4085,8 +4115,8 @@ def update_plot(scale_type, bin_type, bin_width, y_max, x_range, show_mindeg, sh
4085
4115
if len (filtered_degrees ) == 0 :
4086
4116
min_deg = 0
4087
4117
else :
4088
- min_deg = np .min (filtered_degrees )
4089
-
4118
+ min_deg = np .min (filtered_degrees )
4119
+
4090
4120
n_bins = 1
4091
4121
# Create histogram only if there are non-zero degree nodes
4092
4122
if len (filtered_degrees ) > 0 :
@@ -4111,20 +4141,32 @@ def update_plot(scale_type, bin_type, bin_width, y_max, x_range, show_mindeg, sh
4111
4141
label = 'Raw' , color = '#000000' )
4112
4142
4113
4143
# Plot zero degree node count separately
4114
- if zero_degree_count > 0 :
4115
- plt .bar (0 , zero_degree_count , color = 'red' ,
4116
- label = 'Isolated' , alpha = alpha , width = 0.2 )
4144
+ if isolateds_exist :
4145
+ # Use a special x position for zero degree nodes in log scale
4146
+ zero_x_pos = 0.1 if scale_type in ['Log-Log' , 'Log(x)-Linear(y)' ] else 0
4147
+ plt .bar (zero_x_pos , zero_degree_count , color = 'red' ,
4148
+ label = 'Isolated' , alpha = alpha , width = 0.1 if scale_type in ['Log-Log' , 'Log(x)-Linear(y)' ] else 2 )
4117
4149
4118
4150
plt .xlim (x_range [0 ], x_range [1 ])
4119
4151
4152
+ if isolateds_exist :
4153
+ plt .xlim (x_range [0 ], x_range [1 ])
4154
+
4120
4155
# Set scales based on selection
4121
4156
if scale_type == 'Log-Log' :
4122
4157
plt .xscale ('log' )
4123
4158
plt .yscale ('log' )
4124
- plt .xlim (x_range [0 ]+ 1 , x_range [1 ])
4159
+ if isolateds_exist :
4160
+ plt .xlim (0.05 , x_range [1 ])
4161
+ else :
4162
+ plt .xlim (x_range [0 ]+ 0.05 , x_range [1 ])
4163
+
4125
4164
elif scale_type == 'Log(x)-Linear(y)' :
4126
4165
plt .xscale ('log' )
4127
- plt .xlim (x_range [0 ]+ 1 , x_range [1 ])
4166
+ if isolateds_exist :
4167
+ plt .xlim (0.05 , x_range [1 ])
4168
+ else :
4169
+ plt .xlim (x_range [0 ]+ 0.05 , x_range [1 ])
4128
4170
elif scale_type == 'Linear(x)-Log(y)' :
4129
4171
plt .yscale ('log' )
4130
4172
@@ -4143,13 +4185,21 @@ def update_plot(scale_type, bin_type, bin_width, y_max, x_range, show_mindeg, sh
4143
4185
plt .legend ()
4144
4186
4145
4187
plt .title (f'Degree Distribution' )
4146
-
4188
+
4189
+ # End timing and display
4190
+ end_time = time .time ()
4191
+ runtime = end_time - start_time
4192
+
4147
4193
# Update statistics
4148
4194
with stats_output :
4149
4195
stats_output .clear_output (wait = True )
4150
4196
total_nodes = sum (counts )
4151
4197
total_edges = sum (d * c for d , c in zip (unique_degrees , counts )) // 2
4152
4198
avg_degree = sum (d * c for d , c in zip (unique_degrees , counts )) / total_nodes
4199
+
4200
+ print (f"Render time: { runtime :.3f} seconds" )
4201
+ print (f"--------------------" )
4202
+
4153
4203
print (f"Number of nodes: { total_nodes } " )
4154
4204
print (f"Number of edges: { total_edges } " )
4155
4205
print (f"Number of isolated nodes: { zero_degree_count } " )
@@ -4178,18 +4228,56 @@ def update_plot(scale_type, bin_type, bin_width, y_max, x_range, show_mindeg, sh
4178
4228
description = 'Binning:'
4179
4229
)
4180
4230
4231
+ # Define a function to update bin_width_widget based on bin_type
4232
+ def update_bin_width_widget (change ):
4233
+ if change ['new' ] == 'Logarithmic' :
4234
+ # For logarithmic binning, use a FloatSlider with smaller values
4235
+ bin_width_widget .min = 1.00
4236
+ bin_width_widget .max = 10.00
4237
+ bin_width_widget .step = 0.01
4238
+ bin_width_widget .value = 1.00
4239
+ bin_width_widget .readout_format = '.2f'
4240
+ bin_width_widget .disabled = False
4241
+ elif change ['new' ] == 'Raw' :
4242
+ # For raw binning, disable the widget
4243
+ bin_width_widget .value = 1
4244
+ bin_width_widget .disabled = True
4245
+ else :
4246
+ # For linear binning, use integer values
4247
+ bin_width_widget .min = 1
4248
+ bin_width_widget .max = (max_deg + 2 )/ 10
4249
+ bin_width_widget .step = 1
4250
+ bin_width_widget .value = 1
4251
+ bin_width_widget .readout_format = 'd'
4252
+ bin_width_widget .disabled = False
4253
+
4254
+ def update_y_max_widget (change ):
4255
+ if bin_widget .value == 'Raw' :
4256
+ # For raw data, use the original max count
4257
+ y_max_widget .max = max_count * 1.1
4258
+ y_max_widget .value = max_count * 1.1
4259
+ elif bin_widget .value == 'Linear' :
4260
+ y_max_widget .max = max_count * bin_width_widget .value * 0.5
4261
+ y_max_widget .value = max_count * bin_width_widget .value * 0.5
4262
+ else : # 'Logarithmic'
4263
+ y_max_widget .max = max_count * (10 ** bin_width_widget .value ) * 0.5
4264
+ y_max_widget .value = max_count * (10 ** bin_width_widget .value ) * 0.5
4265
+
4181
4266
# Bin width widget, integer options in [1, 1+(max_deg/2)] interval
4182
- # TODO: if logarithmic binning, a much smaller range makes more sense
4183
- bin_width_widget = widgets .IntSlider (
4267
+ bin_width_widget = widgets .FloatSlider (
4184
4268
value = 1 ,
4185
4269
min = 1 ,
4186
- max = (max_deg + 2 )/ 2 ,
4270
+ max = (max_deg + 2 )/ 10 ,
4187
4271
step = 1 ,
4188
4272
description = 'Bin width:' ,
4189
4273
tooltip = ('For linear binning: actual width\n '
4190
4274
'For log binning: multiplicative factor' )
4191
4275
)
4192
4276
4277
+ # Observe changes to bin_width_widget and bin_widget
4278
+ bin_width_widget .observe (update_y_max_widget , names = 'value' )
4279
+ bin_widget .observe (update_y_max_widget , names = 'value' )
4280
+
4193
4281
# Upper limit for y-axis range, enables zooming (lower limit is always zero)
4194
4282
y_max_widget = widgets .IntSlider (
4195
4283
value = max_count * 1.1 ,
@@ -4203,7 +4291,7 @@ def update_plot(scale_type, bin_type, bin_width, y_max, x_range, show_mindeg, sh
4203
4291
x_range_widget = widgets .FloatRangeSlider (
4204
4292
min = 0 ,
4205
4293
max = max_deg * 1.1 + 5 ,
4206
- value = [min , max ],
4294
+ value = [0 , max_deg * 1.1 + 5 ],
4207
4295
step = 1 ,
4208
4296
description = 'x-axis range:' ,
4209
4297
disabled = False ,
0 commit comments