Skip to content

Use NonZeroU64 for num_distinct_values to avoid div-by-zero #1991

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/docker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ on:
- master
- staging
- dev
- phoebe/hotfix/0.11.1/divide-by-zero
tags:
- 'v*'

Expand Down
19 changes: 15 additions & 4 deletions crates/core/src/db/datastore/locking_tx_datastore/tx.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ use crate::execution_context::ExecutionContext;
use spacetimedb_primitives::{ColList, TableId};
use spacetimedb_sats::AlgebraicValue;
use spacetimedb_schema::schema::TableSchema;
use std::num::NonZeroU64;
use std::sync::Arc;
use std::{
ops::RangeBounds,
Expand Down Expand Up @@ -68,9 +69,19 @@ impl TxId {

/// The Number of Distinct Values (NDV) for a column or list of columns,
/// if there's an index available on `cols`.
pub(crate) fn num_distinct_values(&self, table_id: TableId, cols: &ColList) -> Option<u64> {
self.committed_state_shared_lock
.get_table(table_id)
.and_then(|t| t.indexes.get(cols).map(|index| index.num_keys() as u64))
///
/// Returns `None` if:
/// - No such table as `table_id` exists.
/// - The table `table_id` does not have an index on exactly the `cols`.
/// - The table `table_id` contains zero rows.
//
// This method must never return 0, as it's used as the divisor in quotients.
// Do not change its return type to a bare `u64`.
pub(crate) fn num_distinct_values(&self, table_id: TableId, cols: &ColList) -> Option<NonZeroU64> {
self.committed_state_shared_lock.get_table(table_id).and_then(|t| {
t.indexes
.get(cols)
.and_then(|index| NonZeroU64::new(index.num_keys() as u64))
})
}
}
11 changes: 11 additions & 0 deletions crates/core/src/estimation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ fn row_est(tx: &Tx, src: &SourceExpr, ops: &[Query]) -> u64 {
// We assume a uniform distribution of keys,
// which implies a selectivity = 1 / NDV,
// where NDV stands for Number of Distinct Values.
// We assume that the table exists and has an index on the columns,
// so `index_row_est` will only return 0 if the table is empty.
Query::IndexScan(scan) if scan.is_point() => {
index_row_est(tx, scan.table.table_id, &scan.columns)
}
Expand All @@ -41,6 +43,8 @@ fn row_est(tx: &Tx, src: &SourceExpr, ops: &[Query]) -> u64 {
Query::IndexJoin(join) => {
row_est(tx, &join.probe_side.source, &join.probe_side.query)
.saturating_mul(
// We assume that the table exists and has an index on the columns,
// so `index_row_est` will only return 0 if the table is empty.
index_row_est(tx, src.table_id().unwrap(), &join.index_col.into())
)
}
Expand All @@ -59,8 +63,15 @@ fn row_est(tx: &Tx, src: &SourceExpr, ops: &[Query]) -> u64 {

/// The estimated number of rows that an index probe will return.
/// Note this method is not applicable to range scans.
///
/// Returns 0 in any case that [`Tx::num_distinct_values`] would return `None`:
/// - If there is no such table as `table_id`.
/// - If the table `table_id` does not have an index on exactly the `cols`.
/// - If the table `table_id` contains 0 rows.
fn index_row_est(tx: &Tx, table_id: TableId, cols: &ColList) -> u64 {
tx.num_distinct_values(table_id, cols)
// `num_distinct_values` returns `Option<NonZeroU64>`,
// so this division can never panic.
.map_or(0, |ndv| tx.get_row_count(table_id).unwrap_or(0) / ndv)
}

Expand Down
Loading