Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

sql/stats: more fully undo addOuterBuckets in stripOuterBuckets #135310

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
200 changes: 200 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/distsql_stats
Original file line number Diff line number Diff line change
Expand Up @@ -3412,3 +3412,203 @@ pstat_allindex_partial {a} 3 3 0
pstat_allindex_partial {b} 3 3 0
pstat_allindex_partial {crdb_internal_idx_expr} 3 3 0
pstat_allindex_partial {d} 3 3 0

# Regression test for #134031. Check that we still have a valid, forecast-able
# histogram after merging partial stats with full stats that have been modified
# by addOuterBucket.

statement ok
CREATE TABLE t134031 (
a INT PRIMARY KEY
) WITH (sql_stats_automatic_collection_enabled = false, sql_stats_histogram_buckets_count = 4)

# These stats were created with the following statements:
#
# INSERT INTO t134031 SELECT 2 * i * i FROM generate_series(0, 1000) s(i);
# CREATE STATISTICS __auto__ FROM t134031;
#
# INSERT INTO t134031 VALUES (2000002);
# SELECT pg_sleep(1);
# CREATE STATISTICS __auto__ FROM t134031;
#
# INSERT INTO t134031 VALUES (2000004);
# SELECT pg_sleep(1);
# CREATE STATISTICS __auto__ FROM t134031;
#
# INSERT INTO t134031 VALUES (3000000);
# SELECT pg_sleep(2);
# CREATE STATISTICS __auto_partial__ FROM t134031 USING EXTREMES;

statement ok
ALTER TABLE t134031 INJECT STATISTICS '[
{
"avg_size": 4,
"columns": [
"a"
],
"created_at": "2024-11-15 17:38:35.191236",
"distinct_count": 1001,
"histo_buckets": [
{
"distinct_range": 0,
"num_eq": 1,
"num_range": 0,
"upper_bound": "0"
},
{
"distinct_range": 331.79445036468786,
"num_eq": 1,
"num_range": 332,
"upper_bound": "221778"
},
{
"distinct_range": 332.043798577731,
"num_eq": 1,
"num_range": 332,
"upper_bound": "887112"
},
{
"distinct_range": 333.16175105758106,
"num_eq": 1,
"num_range": 333,
"upper_bound": "2000000"
}
],
"histo_col_type": "INT8",
"histo_version": 3,
"id": 1021122691703046145,
"name": "__auto__",
"null_count": 0,
"row_count": 1001
},
{
"avg_size": 4,
"columns": [
"a"
],
"created_at": "2024-11-15 17:38:36.240863",
"distinct_count": 1002,
"histo_buckets": [
{
"distinct_range": 0,
"num_eq": 1,
"num_range": 0,
"upper_bound": "0"
},
{
"distinct_range": 331.7944820909316,
"num_eq": 1,
"num_range": 332,
"upper_bound": "221778"
},
{
"distinct_range": 333.0442332658522,
"num_eq": 1,
"num_range": 333,
"upper_bound": "889778"
},
{
"distinct_range": 333.16128464321633,
"num_eq": 1,
"num_range": 333,
"upper_bound": "2000002"
}
],
"histo_col_type": "INT8",
"histo_version": 3,
"id": 1021122695141097473,
"name": "__auto__",
"null_count": 0,
"row_count": 1002
},
{
"avg_size": 4,
"columns": [
"a"
],
"created_at": "2024-11-15 17:38:37.296779",
"distinct_count": 1003,
"histo_buckets": [
{
"distinct_range": 0,
"num_eq": 0,
"num_range": 0,
"upper_bound": "-9223372036854775808"
},
{
"distinct_range": 5.684341886080802E-14,
"num_eq": 1,
"num_range": 0,
"upper_bound": "0"
},
{
"distinct_range": 332.7947242432267,
"num_eq": 1,
"num_range": 333,
"upper_bound": "223112"
},
{
"distinct_range": 333.0446396262168,
"num_eq": 1,
"num_range": 333,
"upper_bound": "892448"
},
{
"distinct_range": 333.16063613055655,
"num_eq": 1,
"num_range": 333,
"upper_bound": "2000004"
},
{
"distinct_range": 5.684341886080185E-14,
"num_eq": 0,
"num_range": 0,
"upper_bound": "9223372036854775807"
}
],
"histo_col_type": "INT8",
"histo_version": 3,
"id": 1021122698602217473,
"name": "__auto__",
"null_count": 0,
"row_count": 1003
},
{
"avg_size": 4,
"columns": [
"a"
],
"created_at": "2024-11-15 17:38:39.337031",
"distinct_count": 1,
"full_statistic_id": 1021122698602217473,
"histo_buckets": [
{
"distinct_range": 0,
"num_eq": 1,
"num_range": 0,
"upper_bound": "3000000"
}
],
"histo_col_type": "INT8",
"histo_version": 3,
"id": 1021122705285644289,
"name": "__auto_partial__",
"null_count": 0,
"partial_predicate": "(a IS NULL) OR ((a \u003c 0:::INT8) OR (a \u003e 2000004:::INT8))",
"row_count": 1
}
]'

# Make sure the first bucket of both merged and forecast histograms has 0 for
# distinct_range and num_range.
query TT
SELECT stat->>'name', stat->'histo_buckets'->0
FROM (
SELECT jsonb_array_elements(statistics) AS stat
FROM [SHOW STATISTICS USING JSON FOR TABLE t134031 WITH MERGE, FORECAST]
)
WHERE stat->'name' <@ '["__merged__", "__forecast__"]'
ORDER BY stat->>'name' DESC
----
__merged__ {"distinct_range": 0, "num_eq": 1, "num_range": 0, "upper_bound": "0"}
__forecast__ {"distinct_range": 0, "num_eq": 1, "num_range": 0, "upper_bound": "0"}
6 changes: 5 additions & 1 deletion pkg/sql/stats/merge.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,10 @@ func stripOuterBuckets(
endIdx := len(histogram)
if histogram[0].UpperBound.IsMin(ctx, evalCtx) && histogram[0].NumEq == 0 {
startIdx = 1
// Set the first range counts to zero to counteract range counts added by
// addOuterBuckets.
histogram[startIdx].NumRange = 0
histogram[startIdx].DistinctRange = 0
}
if histogram[len(histogram)-1].UpperBound.IsMax(ctx, evalCtx) && histogram[len(histogram)-1].NumEq == 0 {
endIdx = len(histogram) - 1
Expand All @@ -102,7 +106,7 @@ func stripOuterBuckets(
// Histogram (format is: {NumEq, NumRange, DistinctRange, UpperBound}):
// [{1, 0, 0, 2}, {1, 0, 0, 3}, {1, 0, 0, 4}]
//
// Partial Statistic: {row, 8, dist: 4, null: 0, size: 1}
// Partial Statistic: {row: 8, dist: 4, null: 0, size: 1}
// CreatedAt: 2022-01-03
// Histogram: [{2, 0, 0, 0}, {2, 0, 0, 1}, {2, 0, 0, 5}, {2, 0, 0, 6}]
//
Expand Down
Loading