Skip to content

Commit

Permalink
feat: Adds Histogram chart migration logic (apache#28780)
Browse files Browse the repository at this point in the history
  • Loading branch information
michael-s-molina authored Jun 5, 2024
1 parent dabb4e0 commit df0b1cb
Show file tree
Hide file tree
Showing 11 changed files with 118 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ import { PostProcessingFactory } from './types';
export const histogramOperator: PostProcessingFactory<
PostProcessingHistogram
> = (formData, queryObject) => {
const { bins, column, cumulative, groupby, normalize } = formData;
const { bins, column, cumulative, groupby = [], normalize } = formData;
const parsedBins = Number.isNaN(Number(bins)) ? 5 : Number(bins);
const parsedColumn = getColumnLabel(column);
const parsedGroupBy = groupby!.map(getColumnLabel);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,13 @@ test('matches formData', () => {
});
});

test('sets default groupby', () => {
expect(
histogramOperator({ ...formData, groupby: undefined }, {})?.options
?.groupby,
).toEqual([]);
});

test('defaults to 5 bins', () => {
expect(
histogramOperator(omit(formData, ['bins']) as SqlaFormData, {}),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,10 @@ export default function buildQuery(formData: HistogramFormData) {
return buildQueryContext(formData, baseQueryObject => [
{
...baseQueryObject,
extras: { where: `${column} IS NOT NULL` },
columns: [...groupby, column],
post_processing: [histogramOperator(formData, baseQueryObject)],
metrics: undefined,
},
]);
}
Original file line number Diff line number Diff line change
Expand Up @@ -136,9 +136,10 @@ export default function transformProps(
const echartOptions: EChartsOption = {
grid: {
...defaultGrid,
bottom: 30,
left: 30,
right: 30,
left: '5%',
right: '5%',
top: '10%',
bottom: '10%',
},
xAxis: {
data: xAxisData,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ const ColumnSelectPopoverTrigger = ({
isTemporal,
onColumnEdit,
popoverLabel,
disabledTabs,
],
);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,7 @@ function DndColumnSelect(props: DndColumnSelectProps) {
closePopover={closePopover}
visible={newColumnPopoverVisible}
isTemporal={isTemporal}
disabledTabs={disabledTabs}
>
<div />
</ColumnSelectPopoverTrigger>
Expand Down
3 changes: 3 additions & 0 deletions superset/cli/viz_migrations.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ class VizType(str, Enum):
DIST_BAR = "dist_bar"
DUAL_LINE = "dual_line"
HEATMAP = "heatmap"
HISTOGRAM = "histogram"
LINE = "line"
PIVOT_TABLE = "pivot_table"
SUNBURST = "sunburst"
Expand Down Expand Up @@ -85,6 +86,7 @@ def migrate(viz_type: VizType, is_downgrade: bool = False) -> None:
MigrateDistBarChart,
MigrateDualLine,
MigrateHeatmapChart,
MigrateHistogramChart,
MigrateLineChart,
MigratePivotTable,
MigrateSunburst,
Expand All @@ -98,6 +100,7 @@ def migrate(viz_type: VizType, is_downgrade: bool = False) -> None:
VizType.DIST_BAR: MigrateDistBarChart,
VizType.DUAL_LINE: MigrateDualLine,
VizType.HEATMAP: MigrateHeatmapChart,
VizType.HISTOGRAM: MigrateHistogramChart,
VizType.LINE: MigrateLineChart,
VizType.PIVOT_TABLE: MigratePivotTable,
VizType.SUNBURST: MigrateSunburst,
Expand Down
23 changes: 23 additions & 0 deletions superset/migrations/shared/migrate_viz/processors.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,3 +280,26 @@ class MigrateHeatmapChart(MigrateViz):

def _pre_action(self) -> None:
self.data["legend_type"] = "continuous"


class MigrateHistogramChart(MigrateViz):
source_viz_type = "histogram"
target_viz_type = "histogram_v2"
rename_keys = {
"x_axis_label": "x_axis_title",
"y_axis_label": "y_axis_title",
"normalized": "normalize",
}
remove_keys = {"all_columns_x", "link_length", "queryFields"}

def _pre_action(self) -> None:
all_columns_x = self.data.get("all_columns_x")
if all_columns_x and len(all_columns_x) > 0:
self.data["column"] = all_columns_x[0]

link_length = self.data.get("link_length")
self.data["bins"] = int(link_length) if link_length else 5

groupby = self.data.get("groupby")
if not groupby:
self.data["groupby"] = []
4 changes: 2 additions & 2 deletions superset/utils/pandas_postprocessing/histogram.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def histogram(
raise ValueError(f"The column '{column}' must be numeric.")

# calculate the histogram bin edges
bin_edges = np.histogram_bin_edges(df[column], bins=bins)
bin_edges = np.histogram_bin_edges(df[column].dropna(), bins=bins)

# convert the bin edges to strings
bin_edges_str = [
Expand All @@ -62,7 +62,7 @@ def histogram(
]

def hist_values(series: Series) -> np.ndarray:
result = np.histogram(series, bins=bin_edges)[0]
result = np.histogram(series.dropna(), bins=bin_edges)[0]
return result if not cumulative else np.cumsum(result)

if len(groupby) == 0:
Expand Down
52 changes: 52 additions & 0 deletions tests/unit_tests/migrations/viz/histogram_v1_v2_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from typing import Any

from superset.migrations.shared.migrate_viz import MigrateHistogramChart
from tests.unit_tests.migrations.viz.utils import migrate_and_assert

SOURCE_FORM_DATA: dict[str, Any] = {
"all_columns_x": ["category"],
"adhoc_filters": [],
"cumulative": True,
"linear_color_scheme": "blue",
"link_length": "5",
"normalized": True,
"row_limit": 100,
"viz_type": "histogram",
"x_axis_label": "X",
"y_axis_label": "Y",
}

TARGET_FORM_DATA: dict[str, Any] = {
"adhoc_filters": [],
"bins": 5,
"column": "category",
"cumulative": True,
"form_data_bak": SOURCE_FORM_DATA,
"groupby": [],
"linear_color_scheme": "blue",
"normalize": True,
"row_limit": 100,
"viz_type": "histogram_v2",
"x_axis_title": "X",
"y_axis_title": "Y",
}


def test_migration() -> None:
migrate_and_assert(MigrateHistogramChart, SOURCE_FORM_DATA, TARGET_FORM_DATA)
22 changes: 22 additions & 0 deletions tests/unit_tests/pandas_postprocessing/test_histogram.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,3 +120,25 @@ def test_histogram_with_non_numeric_column():
histogram(data, "b", ["group"], bins)
except ValueError as e:
assert str(e) == "The column 'b' must be numeric."


# test histogram ignore null values
def test_histogram_ignore_null_values():
data_with_null = DataFrame(
{
"group": ["A", "A", "B", "B", "A", "A", "B", "B", "A", "A"],
"a": [1, 2, 3, 4, 5, 6, 7, 8, 9, None],
"b": [1, 2, 3, 4, 5, 6, 7, 8, 9, None],
}
)
result = histogram(data_with_null, "a", ["group"], bins)
assert result.shape == (2, bins + 1)
assert result.columns.tolist() == [
"group",
"1 - 2",
"2 - 4",
"4 - 5",
"5 - 7",
"7 - 9",
]
assert result.values.tolist() == [["A", 2, 0, 1, 1, 1], ["B", 0, 2, 0, 1, 1]]

0 comments on commit df0b1cb

Please sign in to comment.