jni · jni · Nov 20, 2023 · Oct 16, 2023 · Oct 16, 2023 · Oct 16, 2023
diff --git a/benchmarks/bench_skan.py b/benchmarks/bench_skan.py
@@ -42,7 +42,7 @@ def bench_suite():
         skel_obj = csr.Skeleton(skeleton)
     times['skeleton object again'] = t_skeleton2[0]
     with timer() as t_summary:
-        summary = csr.summarize(skel_obj)
+        summary = csr.summarize(skel_obj, separator='_')
     times['compute per-skeleton statistics'] = t_summary[0]
     return times
 

diff --git a/doc/examples/complete_analysis.md b/doc/examples/complete_analysis.md
@@ -112,21 +112,21 @@ data['field number'] = data['filename'].apply(field_number)
 Next, we filter the branches by using the [*shape index*](http://scikit-image.org/docs/dev/api/skimage.feature.html#skimage.feature.shape_index). We have used a very simple method to extract skeletons (see [Getting started](../getting_started/getting_started)), which does an acceptable job but creates a lot of false branches. Since the goal of Skan is to analyse skeletons, rather than generate them, we attempt to filter the branches, and measure only those that look like ridges according to the shape index.
 
 ```{code-cell} ipython3
-ridges = ((data['mean-shape-index'] < 0.625) &
-          (data['mean-shape-index'] > 0.125))
+ridges = ((data['mean_shape_index'] < 0.625) &
+          (data['mean_shape_index'] > 0.125))
 ```
 
 For the same reason, we only look at junction-to-junction branches, which are more accurately identified by our method than junction-to-endpoint branches.
 
 ```{code-cell} ipython3
-j2j = data['branch-type'] == 2
+j2j = data['branch_type'] == 2
 datar = data.loc[ridges & j2j].copy()
 ```
 
 Finally, we make a new column of measurements in a more natural scale for our purpose.
 
 ```{code-cell} ipython3
-datar['branch distance (nm)'] = datar['branch-distance'] * 1e9
+datar['branch distance (nm)'] = datar['branch_distance'] * 1e9
 ```
 
 ## 3. Making the figure

diff --git a/doc/examples/visualizing_3d_skeletons.md b/doc/examples/visualizing_3d_skeletons.md
@@ -49,17 +49,17 @@ all_paths = [
 ```
 
 ```{code-cell} ipython3
-paths_table = skan.summarize(skeleton)
+paths_table = skan.summarize(skeleton, separator='_')
 ```
 
 ```{code-cell} ipython3
-paths_table['path-id'] = np.arange(skeleton.n_paths)
+paths_table['path_id'] = np.arange(skeleton.n_paths)
 ```
 
 First, we color by random path ID, showing each path in a distinct color using the matplotlib "tab10" qualitative palette. (Coloring by path ID directly results in "bands" of nearby paths receiving the same color.)
 
 ```{code-cell} ipython3
-paths_table['random-path-id'] = np.random.default_rng().permutation(skeleton.n_paths)
+paths_table['random_path_id'] = np.random.default_rng().permutation(skeleton.n_paths)
 ```
 
 ```{code-cell} ipython3
@@ -70,7 +70,7 @@ skeleton_layer = viewer.add_shapes(
         shape_type='path',
         properties=paths_table,
         edge_width=0.5,
-        edge_color='random-path-id',
+        edge_color='random_path_id',
         edge_colormap='tab10',
 )
 ```
@@ -85,9 +85,9 @@ napari.utils.nbscreenshot(viewer)
 We can also demonstrate that most of these branches are in one skeleton, with a few stragglers around the edges, by coloring by skeleton ID:
 
 ```{code-cell} ipython3
-skeleton_layer.edge_color = 'skeleton-id'
+skeleton_layer.edge_color = 'skeleton_id'
 # for now, we need to set the face color as well
-skeleton_layer.face_color = 'skeleton-id'
+skeleton_layer.face_color = 'skeleton_id'
 ```
 
 ```{code-cell} ipython3
@@ -99,10 +99,10 @@ napari.utils.nbscreenshot(viewer)
 Finally, we can color the paths by a numerical property, such as their length.
 
 ```{code-cell} ipython3
-skeleton_layer.edge_color = 'branch-distance'
+skeleton_layer.edge_color = 'branch_distance'
 skeleton_layer.edge_colormap = 'viridis'
 # for now, we need to set the face color as well
-skeleton_layer.face_color = 'branch-distance'
+skeleton_layer.face_color = 'branch_distance'
 skeleton_layer.face_colormap = 'viridis'
 ```
 

diff --git a/doc/getting_started/getting_started.md b/doc/getting_started/getting_started.md
@@ -136,7 +136,7 @@ Let's go back to the red blood cell image to illustrate this graph.
 
 ```{code-cell} ipython3
 from skan import Skeleton, summarize
-branch_data = summarize(Skeleton(skeleton0, spacing=spacing_nm))
+branch_data = summarize(Skeleton(skeleton0, spacing=spacing_nm), separator='_')
 branch_data.head()
 ```
 
@@ -156,7 +156,7 @@ Next come the coordinates in natural space, the Euclidean distance between the p
 This data table follows the "tidy data" paradigm, with one row per branch, which allows fast  exploration of branch statistics. Here, for example, we plot the distribution of branch lengths according to branch type:
 
 ```{code-cell} ipython3
-branch_data.hist(column='branch-distance', by='branch-type', bins=100);
+branch_data.hist(column='branch_distance', by='branch_type', bins=100);
 ```
 
 We can see that junction-to-junction branches tend to be longer than junction-to-endpoint and junction isolated branches, and that there are no cycles in our dataset.
@@ -165,7 +165,7 @@ We can also represent this visually with the `overlay_euclidean_skeleton`, which
 
 ```{code-cell} ipython3
 draw.overlay_euclidean_skeleton_2d(image0, branch_data,
-                                   skeleton_color_source='branch-type');
+                                   skeleton_color_source='branch_type');
 ```
 
 ## 2. Comparing different skeletons
@@ -194,7 +194,7 @@ def skeletonize(images, spacings_nm):
 
 
 skeletons = skeletonize(images, spacings_nm)
-tables = [summarize(Skeleton(skeleton, spacing=spacing))
+tables = [summarize(Skeleton(skeleton, spacing=spacing), separator='_')
           for skeleton, spacing in zip(skeletons, spacings_nm)]
 
 for filename, dataframe in zip(files, tables):
@@ -210,8 +210,8 @@ Now, however, we have a tidy data table with information about the sample origin
 ```{code-cell} ipython3
 import seaborn as sns
 
-j2j = (table[table['branch-type'] == 2].
-       rename(columns={'branch-distance':
+j2j = (table[table['branch_type'] == 2].
+       rename(columns={'branch_distance':
                        'branch distance (nm)'}))
 per_image = j2j.groupby('filename').median()
 per_image['infected'] = ['infected' if 'inf' in fn else 'normal'

diff --git a/src/skan/csr.py b/src/skan/csr.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 import numpy as np
 import pandas as pd
 from scipy import sparse, ndimage as ndi
@@ -705,7 +707,8 @@ def summarize(
         skel: Skeleton,
         *,
         value_is_height: bool = False,
-        find_main_branch: bool = False
+        find_main_branch: bool = False,
+        separator: str | None = None,
         ) -> pd.DataFrame:
     """Compute statistics for every skeleton and branch in ``skel``.
 
@@ -722,56 +725,70 @@ def summarize(
         longest shortest path within a skeleton. This step is very expensive
         as it involves computing the shortest paths between all pairs of branch
         endpoints, so it is off by default.
+    separator : str, optional
+        Some column names are composite, e.g. ``'coord_src_0'``. The separator
+        argument allows users to configure which character is used to separate
+        the components. The default up to version 0.12 is '-', but will change
+        to '_' in version 0.13.
 
     Returns
     -------
     summary : pandas.DataFrame
         A summary of the branches including branch length, mean branch value,
         branch euclidean distance, etc.
     """
+    if separator is None:
+        warnings.warn(
+                "separator in column name will change to _ in version 0.13; "
+                "to silence this warning, use `separator='-'` to maintain "
+                "current behavior and use `separator='_'` to switch to the "
+                "new default behavior.",
+                FutureWarning,
+                )
+        separator = '-'
     summary = {}
     ndim = skel.coordinates.shape[1]
     _, skeleton_ids = csgraph.connected_components(skel.graph, directed=False)
     endpoints_src = skel.paths.indices[skel.paths.indptr[:-1]]
     endpoints_dst = skel.paths.indices[skel.paths.indptr[1:] - 1]
-    summary['skeleton-id'] = skeleton_ids[endpoints_src]
-    summary['node-id-src'] = endpoints_src
-    summary['node-id-dst'] = endpoints_dst
-    summary['branch-distance'] = skel.path_lengths()
+    summary['skeleton_id'] = skeleton_ids[endpoints_src]
+    summary['node_id_src'] = endpoints_src
+    summary['node_id_dst'] = endpoints_dst
+    summary['branch_distance'] = skel.path_lengths()
     deg_src = skel.degrees[endpoints_src]
     deg_dst = skel.degrees[endpoints_dst]
     kind = np.full(deg_src.shape, 2)  # default: junction-to-junction
     kind[(deg_src == 1) | (deg_dst == 1)] = 1  # tip-junction
     kind[(deg_src == 1) & (deg_dst == 1)] = 0  # tip-tip
     kind[endpoints_src == endpoints_dst] = 3  # cycle
-    summary['branch-type'] = kind
-    summary['mean-pixel-value'] = skel.path_means()
-    summary['stdev-pixel-value'] = skel.path_stdev()
+    summary['branch_type'] = kind
+    summary['mean_pixel_value'] = skel.path_means()
+    summary['stdev_pixel_value'] = skel.path_stdev()
     for i in range(ndim):  # keep loops separate for best insertion order
-        summary[f'image-coord-src-{i}'] = skel.coordinates[endpoints_src, i]
+        summary[f'image_coord_src_{i}'] = skel.coordinates[endpoints_src, i]
     for i in range(ndim):
-        summary[f'image-coord-dst-{i}'] = skel.coordinates[endpoints_dst, i]
+        summary[f'image_coord_dst_{i}'] = skel.coordinates[endpoints_dst, i]
     coords_real_src = skel.coordinates[endpoints_src] * skel.spacing
     for i in range(ndim):
-        summary[f'coord-src-{i}'] = coords_real_src[:, i]
+        summary[f'coord_src_{i}'] = coords_real_src[:, i]
     if value_is_height:
         values_src = skel.pixel_values[endpoints_src]
-        summary[f'coord-src-{ndim}'] = values_src
+        summary[f'coord_src_{ndim}'] = values_src
         coords_real_src = np.concatenate(
                 [coords_real_src, values_src[:, np.newaxis]],
                 axis=1,
-                )  # yapf: ignore
+                )
     coords_real_dst = skel.coordinates[endpoints_dst] * skel.spacing
     for i in range(ndim):
-        summary[f'coord-dst-{i}'] = coords_real_dst[:, i]
+        summary[f'coord_dst_{i}'] = coords_real_dst[:, i]
     if value_is_height:
         values_dst = skel.pixel_values[endpoints_dst]
-        summary[f'coord-dst-{ndim}'] = values_dst
+        summary[f'coord_dst_{ndim}'] = values_dst
         coords_real_dst = np.concatenate(
                 [coords_real_dst, values_dst[:, np.newaxis]],
                 axis=1,
-                )  # yapf: ignore
-    summary['euclidean-distance'] = (
+                )
+    summary['euclidean_distance'] = (
             np.sqrt((coords_real_dst - coords_real_src)**2
                     @ np.ones(ndim + int(value_is_height)))
             )
@@ -780,6 +797,7 @@ def summarize(
     if find_main_branch:
         # define main branch as longest shortest path within a single skeleton
         df['main'] = find_main_branches(df)
+    df.rename(columns=lambda s: s.replace('_', separator), inplace=True)
     return df
 
 
@@ -1051,10 +1069,10 @@ def _simplify_graph(skel):
         # don't reduce
         return skel.graph, np.arange(skel.graph.shape[0])
 
-    summary = summarize(skel)
-    src = np.asarray(summary['node-id-src'])
-    dst = np.asarray(summary['node-id-dst'])
-    distance = np.asarray(summary['branch-distance'])
+    summary = summarize(skel, separator='_')
+    src = np.asarray(summary['node_id_src'])
+    dst = np.asarray(summary['node_id_dst'])
+    distance = np.asarray(summary['branch_distance'])
 
     # to reduce the size of simplified graph
     nodes = np.unique(np.append(src, dst))

diff --git a/src/skan/draw.py b/src/skan/draw.py
@@ -100,7 +100,7 @@ def overlay_euclidean_skeleton_2d(
         stats,
         *,
         image_cmap=None,
-        skeleton_color_source='branch-type',
+        skeleton_color_source='branch_type',
         skeleton_colormap='viridis',
         axes=None
         ):
@@ -124,7 +124,7 @@ def overlay_euclidean_skeleton_2d(
 
         - skeleton-id: each individual skeleton (connected component) will
           have a different colour.
-        - branch-type: each branch type (tip-tip, tip-junction,
+        - branch_type: each branch type (tip-tip, tip-junction,
           junction-junction, path-path). This is the default.
         - branch-distance: the curved length of the skeleton branch.
         - euclidean-distance: the straight-line length of the skeleton branch.
@@ -142,8 +142,8 @@ def overlay_euclidean_skeleton_2d(
     image = _normalise_image(image, image_cmap=image_cmap)
     summary = stats
     # transforming from row, col to x, y
-    coords_cols = (['image-coord-src-%i' % i for i in [1, 0]]
-                   + ['image-coord-dst-%i' % i for i in [1, 0]])
+    coords_cols = (['image_coord_src_%i' % i for i in [1, 0]]
+                   + ['image_coord_dst_%i' % i for i in [1, 0]])
     coords = summary[coords_cols].values.reshape((-1, 2, 2))
     if axes is None:
         fig, axes = plt.subplots()

diff --git a/src/skan/napari_skan.py b/src/skan/napari_skan.py
@@ -39,7 +39,7 @@ def labels_to_skeleton_shapes(
     all_paths = [skeleton.path_coordinates(i) for i in range(skeleton.n_paths)]
 
     # option to have main_path = True (or something) changing header
-    paths_table = summarize(skeleton)
+    paths_table = summarize(skeleton, separator='_')
     layer_kwargs = {
             'shape_type': 'path',
             'edge_colormap': 'tab10',

diff --git a/src/skan/pipe.py b/src/skan/pipe.py
@@ -65,13 +65,15 @@ def process_single_image(
             )
     quality = shape_index(image, sigma=pixel_smoothing_radius, mode='reflect')
     skeleton = morphology.skeletonize(thresholded) * quality
-    framedata = csr.summarize(csr.Skeleton(skeleton, spacing=scale))
+    framedata = csr.summarize(
+            csr.Skeleton(skeleton, spacing=scale), separator='_'
+            )
     framedata['squiggle'] = np.log2(
-            framedata['branch-distance'] / framedata['euclidean-distance']
+            framedata['branch_distance'] / framedata['euclidean_distance']
             )
     framedata['scale'] = scale
     framedata.rename(
-            columns={'mean-pixel-value': 'mean-shape-index'},
+            columns={'mean_pixel_value': 'mean_shape_index'},
             inplace=True,
             errors='raise',
             )
@@ -152,9 +154,9 @@ def process_images(
             image_stats['branch density'] = (
                     framedata.shape[0] / image_stats['area']
                     )
-            j2j = framedata[framedata['branch-type'] == 2]
+            j2j = framedata[framedata['branch_type'] == 2]
             image_stats['mean J2J branch distance'] = (
-                    j2j['branch-distance'].mean()
+                    j2j['branch_distance'].mean()
                     )
             image_results.append(image_stats)
             yield filename, image, thresholded, skeleton, framedata

diff --git a/src/skan/summary_utils.py b/src/skan/summary_utils.py
@@ -20,9 +20,9 @@ def find_main_branches(summary: DataFrame) -> np.ndarray:
        skeleton
     """
     is_main = np.zeros(summary.shape[0], dtype=bool)
-    us = summary['node-id-src']
-    vs = summary['node-id-dst']
-    ws = summary['branch-distance']
+    us = summary['node_id_src']
+    vs = summary['node_id_dst']
+    ws = summary['branch_distance']
 
     edge2idx = {(u, v): i for i, (u, v) in enumerate(zip(us, vs))}