From ec056829fc16d84f62bab12859776cb863bdd62b Mon Sep 17 00:00:00 2001 From: Phoebe Goldman Date: Tue, 12 Nov 2024 14:11:48 -0500 Subject: [PATCH 1/4] Use `NonZeroU64` for `num_distinct_values` to avoid div-by-zero Also sprinkle in a few comments, while we're here. --- .../db/datastore/locking_tx_datastore/tx.rs | 19 +++++++++++++++---- crates/core/src/estimation.rs | 7 +++++++ 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/crates/core/src/db/datastore/locking_tx_datastore/tx.rs b/crates/core/src/db/datastore/locking_tx_datastore/tx.rs index deec7e1e84f..2d76f431510 100644 --- a/crates/core/src/db/datastore/locking_tx_datastore/tx.rs +++ b/crates/core/src/db/datastore/locking_tx_datastore/tx.rs @@ -9,6 +9,7 @@ use crate::execution_context::ExecutionContext; use spacetimedb_primitives::{ColList, TableId}; use spacetimedb_sats::AlgebraicValue; use spacetimedb_schema::schema::TableSchema; +use std::num::NonZeroU64; use std::sync::Arc; use std::{ ops::RangeBounds, @@ -68,9 +69,19 @@ impl TxId { /// The Number of Distinct Values (NDV) for a column or list of columns, /// if there's an index available on `cols`. - pub(crate) fn num_distinct_values(&self, table_id: TableId, cols: &ColList) -> Option { - self.committed_state_shared_lock - .get_table(table_id) - .and_then(|t| t.indexes.get(cols).map(|index| index.num_keys() as u64)) + /// + /// Returns `None` if: + /// - No such table as `table_id` exists. + /// - The table `table_id` does not have an index on exactly the `cols`. + /// - The table `table_id` contains zero rows. + // + // This method must never return 0, as it's used as the divisor in quotients. + // Do not change its return type to a bare `u64`. + pub(crate) fn num_distinct_values(&self, table_id: TableId, cols: &ColList) -> Option { + self.committed_state_shared_lock.get_table(table_id).and_then(|t| { + t.indexes + .get(cols) + .map(|index| NonZeroU64::new(index.num_keys() as u64)) + }) } } diff --git a/crates/core/src/estimation.rs b/crates/core/src/estimation.rs index 72a6f8c461e..7cd5a12b535 100644 --- a/crates/core/src/estimation.rs +++ b/crates/core/src/estimation.rs @@ -59,8 +59,15 @@ fn row_est(tx: &Tx, src: &SourceExpr, ops: &[Query]) -> u64 { /// The estimated number of rows that an index probe will return. /// Note this method is not applicable to range scans. +/// +/// Returns 0 in any case that [`Tx::num_distinct_values`] would return `None`: +/// - If there is no such table as `table_id`. +/// - If the table `table_id` does not have an index on exactly the `cols`. +/// - If the table `table_id` contains 0 rows. fn index_row_est(tx: &Tx, table_id: TableId, cols: &ColList) -> u64 { tx.num_distinct_values(table_id, cols) + // `num_distinct_values` returns `Option`, + // so this division can never panic. .map_or(0, |ndv| tx.get_row_count(table_id).unwrap_or(0) / ndv) } From ac697777425df83acac49e5b539f8ca4362f8570 Mon Sep 17 00:00:00 2001 From: Phoebe Goldman Date: Tue, 12 Nov 2024 14:18:33 -0500 Subject: [PATCH 2/4] More comments, justifying calls to `index_row_est` --- crates/core/src/estimation.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/crates/core/src/estimation.rs b/crates/core/src/estimation.rs index 7cd5a12b535..6a94dca8d07 100644 --- a/crates/core/src/estimation.rs +++ b/crates/core/src/estimation.rs @@ -18,6 +18,8 @@ fn row_est(tx: &Tx, src: &SourceExpr, ops: &[Query]) -> u64 { // We assume a uniform distribution of keys, // which implies a selectivity = 1 / NDV, // where NDV stands for Number of Distinct Values. + // We assume that the table exists and has an index on the columns, + // so `index_row_est` will only return 0 if the table is empty. Query::IndexScan(scan) if scan.is_point() => { index_row_est(tx, scan.table.table_id, &scan.columns) } @@ -41,6 +43,8 @@ fn row_est(tx: &Tx, src: &SourceExpr, ops: &[Query]) -> u64 { Query::IndexJoin(join) => { row_est(tx, &join.probe_side.source, &join.probe_side.query) .saturating_mul( + // We assume that the table exists and has an index on the columns, + // so `index_row_est` will only return 0 if the table is empty. index_row_est(tx, src.table_id().unwrap(), &join.index_col.into()) ) } From 9a9b03d0f218415f85760742e5949ac9e459db23 Mon Sep 17 00:00:00 2001 From: Phoebe Goldman Date: Tue, 12 Nov 2024 14:19:32 -0500 Subject: [PATCH 3/4] Use the correct `Option` combinator --- crates/core/src/db/datastore/locking_tx_datastore/tx.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/core/src/db/datastore/locking_tx_datastore/tx.rs b/crates/core/src/db/datastore/locking_tx_datastore/tx.rs index 2d76f431510..3d1f01f0415 100644 --- a/crates/core/src/db/datastore/locking_tx_datastore/tx.rs +++ b/crates/core/src/db/datastore/locking_tx_datastore/tx.rs @@ -81,7 +81,7 @@ impl TxId { self.committed_state_shared_lock.get_table(table_id).and_then(|t| { t.indexes .get(cols) - .map(|index| NonZeroU64::new(index.num_keys() as u64)) + .and_then(|index| NonZeroU64::new(index.num_keys() as u64)) }) } } From 4864367bd93be00a7c107c5291fd5e201e645e66 Mon Sep 17 00:00:00 2001 From: Kurtis Mullins Date: Thu, 14 Nov 2024 09:22:52 -0500 Subject: [PATCH 4/4] !REMOVE Force build phoebe/hotfix/0.11.1/divide-by-zero --- .github/workflows/docker.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index 58bcb8f1951..63d66d94da6 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -6,6 +6,7 @@ on: - master - staging - dev + - phoebe/hotfix/0.11.1/divide-by-zero tags: - 'v*'