Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
50 commits
Select commit Hold shift + click to select a range
ccae8ff
Implement st_haszm using WKBBytesExecutor instead (missing one last e…
petern48 Oct 1, 2025
07aa206
Add note about handling last edge case
petern48 Oct 1, 2025
cf031fb
Minor fix to the comments
petern48 Oct 1, 2025
526fc3b
Fix pre-commit
petern48 Oct 1, 2025
a491d91
Fix cargo clippy
petern48 Oct 2, 2025
cf90bcf
Save progress
petern48 Oct 3, 2025
22a6087
Pull out dimension calculation logic into new wkb_header.rs
petern48 Oct 4, 2025
5c616af
Add MULTIPOINT_WITH_INFERRED_Z_DIMENSION_WKB fixture
petern48 Oct 4, 2025
207ecb1
Fix dimension calculation to support all collection types and add fix…
petern48 Oct 4, 2025
43009f8
Fix clippy and clean up
petern48 Oct 4, 2025
1078bdd
Remove public byte_order method since it's not needed atm
petern48 Oct 4, 2025
4d4e7e0
Perform all wkb_header operations lazily and cache the values as Opti…
petern48 Oct 4, 2025
dfd6c1a
Add python integration test benches
petern48 Oct 4, 2025
0ef812d
Add tests for wkb_header
petern48 Oct 4, 2025
075d6e6
Apply suggestion from @paleolimbot
petern48 Oct 5, 2025
491b3c7
Remaining clean up
petern48 Oct 5, 2025
7efccc0
Update to method to dimensions plural
petern48 Oct 5, 2025
06501e5
Rename method to try_new
petern48 Oct 5, 2025
1b397fd
Update fixture to be multipoint ((1 2 3)) instead of all zeros
petern48 Oct 5, 2025
9ce9f08
Implement refactor
petern48 Oct 7, 2025
617f9b8
Remove inferred dimension case
petern48 Oct 7, 2025
96311fa
Move logic to st_haszm
petern48 Oct 7, 2025
d1d4fc8
Add empty_geometry_first_coord_dimensions test
petern48 Oct 7, 2025
573f7c8
Add test for size
petern48 Oct 7, 2025
b5984dc
Add tests
petern48 Oct 7, 2025
8c1d2f0
Implement fix for first_xy POLYGON logic
petern48 Oct 7, 2025
dc49aac
clean up
petern48 Oct 7, 2025
89ebb4c
Rename from first_coord_dimensions to first_geom_dimensions and adjus…
petern48 Oct 7, 2025
bdc0fae
Update name of method in st_haszm and update some sedona_internal_err…
petern48 Oct 7, 2025
6b9a6ef
Merge branch 'main' into st_haszm_wkb_bytes
petern48 Oct 23, 2025
3c83ff9
Use SedonaGeometryError and remove sedona and datafusion common depen…
petern48 Oct 23, 2025
597c22e
Fix write_geometry arg after merge
petern48 Oct 23, 2025
55b94ef
Create and use read_u32() helper function
petern48 Oct 23, 2025
b77a762
Move all functions into WKbHeader as methods also rename helper to fi…
petern48 Oct 23, 2025
a2218a9
Catch the error instead of hiding it in first_geom_idx
petern48 Oct 23, 2025
b8f1cc3
Use new WkbBuffer that calculates values by consuming bytes and keepi…
petern48 Oct 23, 2025
4e3e525
Fix st_haszm to map sedona errors
petern48 Oct 25, 2025
92dd670
Fix bug, so dimensions supports EWKB and ISO WKB
petern48 Oct 28, 2025
e4d269a
Clean up
petern48 Oct 28, 2025
c25ac66
Remove parse_dimensions function and rename to read_coord
petern48 Oct 28, 2025
86aeb3c
Create set_offset() function to avoid creating new WkbBuffers
petern48 Oct 28, 2025
92759f2
Add EWKB GEOMETRYCOLLECTION w/ Z, M tests
petern48 Oct 28, 2025
389c34f
Delete unnecessary commented test + small match refactor
petern48 Oct 28, 2025
426a17a
Apply suggestion from @paleolimbot
petern48 Oct 29, 2025
a6902f8
Apply suggestion from @paleolimbot
petern48 Oct 29, 2025
4a8c070
Make incomplete_buffer tests more concise and comprehensive
petern48 Oct 29, 2025
99aeb4b
Merge branch 'st_haszm_wkb_bytes' of github.com:petern48/sedona-db in…
petern48 Oct 29, 2025
5d413de
Avoid final nested WkbBuffer::try_new() call to improve performance
petern48 Oct 29, 2025
2d2c8e5
Fix bug with geom collection nested inside geom collection and add ru…
petern48 Oct 29, 2025
9e10d6b
cleanup rust/sedona-functions/src/st_haszm.rs
petern48 Oct 29, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

36 changes: 36 additions & 0 deletions benchmarks/test_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,42 @@ def queries():

benchmark(queries)

@pytest.mark.parametrize(
"eng", [SedonaDBSingleThread, PostGISSingleThread, DuckDBSingleThread]
)
@pytest.mark.parametrize(
"table",
[
"collections_simple",
"collections_complex",
],
)
def test_st_hasm(self, benchmark, eng, table):
eng = self._get_eng(eng)

def queries():
eng.execute_and_collect(f"SELECT ST_HasM(geom1) from {table}")

benchmark(queries)

@pytest.mark.parametrize(
"eng", [SedonaDBSingleThread, PostGISSingleThread, DuckDBSingleThread]
)
@pytest.mark.parametrize(
"table",
[
"collections_simple",
"collections_complex",
],
)
def test_st_hasz(self, benchmark, eng, table):
eng = self._get_eng(eng)

def queries():
eng.execute_and_collect(f"SELECT ST_HasZ(geom1) from {table}")

benchmark(queries)

@pytest.mark.parametrize(
"eng", [SedonaDBSingleThread, PostGISSingleThread, DuckDBSingleThread]
)
Expand Down
5 changes: 5 additions & 0 deletions python/sedonadb/tests/functions/test_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -561,8 +561,13 @@ def test_st_geomfromwkb(eng, geom):
("LINESTRING Z (0 0 0, 1 1 1)", True),
("POLYGON EMPTY", False),
("MULTIPOINT ((0 0), (1 1))", False),
("MULTIPOINT Z ((0 0 0))", True),
("MULTIPOINT ZM ((0 0 0 0))", True),
("GEOMETRYCOLLECTION EMPTY", False),
# Z-dim specified only in the nested geometry
("GEOMETRYCOLLECTION (POINT Z (0 0 0))", True),
# Z-dim specified on both levels
("GEOMETRYCOLLECTION Z (POINT Z (0 0 0))", True),
("GEOMETRYCOLLECTION (GEOMETRYCOLLECTION (POINT Z (0 0 0)))", True),
],
)
Expand Down
100 changes: 71 additions & 29 deletions rust/sedona-functions/src/st_haszm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,19 +16,17 @@
// under the License.
use std::sync::Arc;

use crate::executor::WkbExecutor;
use crate::executor::WkbBytesExecutor;
use arrow_array::builder::BooleanBuilder;
use arrow_schema::DataType;
use datafusion_common::error::Result;
use datafusion_common::{error::Result, DataFusionError};
use datafusion_expr::{
scalar_doc_sections::DOC_SECTION_OTHER, ColumnarValue, Documentation, Volatility,
};
use geo_traits::GeometryCollectionTrait;
use geo_traits::{Dimensions, GeometryTrait};
use sedona_common::sedona_internal_err;
use geo_traits::Dimensions;
use sedona_expr::scalar_udf::{SedonaScalarKernel, SedonaScalarUDF};
use sedona_geometry::wkb_header::WkbHeader;
use sedona_schema::{datatypes::SedonaType, matchers::ArgMatcher};
use wkb::reader::Wkb;

pub fn st_hasz_udf() -> SedonaScalarUDF {
SedonaScalarUDF::new(
Expand Down Expand Up @@ -91,13 +89,13 @@ impl SedonaScalarKernel for STHasZm {
_ => unreachable!(),
};

let executor = WkbExecutor::new(arg_types, args);
let executor = WkbBytesExecutor::new(arg_types, args);
let mut builder = BooleanBuilder::with_capacity(executor.num_iterations());

executor.execute_wkb_void(|maybe_item| {
match maybe_item {
Some(item) => {
builder.append_option(invoke_scalar(&item, dim_index)?);
builder.append_option(invoke_scalar(item, dim_index)?);
}
None => builder.append_null(),
}
Expand All @@ -108,27 +106,34 @@ impl SedonaScalarKernel for STHasZm {
}
}

fn invoke_scalar(item: &Wkb, dim_index: usize) -> Result<Option<bool>> {
match item.as_type() {
geo_traits::GeometryType::GeometryCollection(collection) => {
if collection.num_geometries() == 0 {
Ok(Some(false))
} else {
// PostGIS doesn't allow creating a GeometryCollection with geometries of different dimensions
// so we can just check the dimension of the first one
let first_geom = unsafe { collection.geometry_unchecked(0) };
invoke_scalar(first_geom, dim_index)
}
}
_ => {
let geom_dim = item.dim();
match dim_index {
2 => Ok(Some(matches!(geom_dim, Dimensions::Xyz | Dimensions::Xyzm))),
3 => Ok(Some(matches!(geom_dim, Dimensions::Xym | Dimensions::Xyzm))),
_ => sedona_internal_err!("unexpected dim_index"),
}
}
fn invoke_scalar(buf: &[u8], dim_index: usize) -> Result<Option<bool>> {
let header = WkbHeader::try_new(buf).map_err(|e| DataFusionError::External(Box::new(e)))?;
let top_level_dimensions = header
.dimensions()
.map_err(|e| DataFusionError::External(Box::new(e)))?;

// Infer dimension based on first coordinate dimension for cases where it differs from top-level
// e.g GEOMETRYCOLLECTION (POINT Z (1 2 3))
let dimensions;
if let Some(first_geom_dimensions) = header.first_geom_dimensions() {
dimensions = first_geom_dimensions;
} else {
dimensions = top_level_dimensions;
}

if dim_index == 2 {
return Ok(Some(matches!(
dimensions,
Dimensions::Xyz | Dimensions::Xyzm
)));
}
if dim_index == 3 {
return Ok(Some(matches!(
dimensions,
Dimensions::Xym | Dimensions::Xyzm
)));
}
Ok(Some(false))
}

#[cfg(test)]
Expand All @@ -137,7 +142,9 @@ mod tests {
use datafusion_expr::ScalarUDF;
use rstest::rstest;
use sedona_schema::datatypes::{WKB_GEOMETRY, WKB_VIEW_GEOMETRY};
use sedona_testing::testers::ScalarUdfTester;
use sedona_testing::{
fixtures::MULTIPOINT_WITH_INFERRED_Z_DIMENSION_WKB, testers::ScalarUdfTester,
};

use super::*;

Expand Down Expand Up @@ -184,11 +191,19 @@ mod tests {
let result = m_tester.invoke_wkb_scalar(None).unwrap();
m_tester.assert_scalar_result_equals(result, ScalarValue::Null);

// Z-dimension specified only in the nested geometry, but not the geom collection level
let result = z_tester
.invoke_wkb_scalar(Some("GEOMETRYCOLLECTION (POINT Z (1 2 3))"))
.unwrap();
z_tester.assert_scalar_result_equals(result, ScalarValue::Boolean(Some(true)));

// Z-dimension specified on both the geom collection and nested geometry level
// Geometry collection with Z dimension both on the geom collection and nested geometry level
let result = z_tester
.invoke_wkb_scalar(Some("GEOMETRYCOLLECTION Z (POINT Z (1 2 3))"))
.unwrap();
z_tester.assert_scalar_result_equals(result, ScalarValue::Boolean(Some(true)));

let result = m_tester
.invoke_wkb_scalar(Some("GEOMETRYCOLLECTION (POINT M (1 2 3))"))
.unwrap();
Expand All @@ -203,5 +218,32 @@ mod tests {
.invoke_wkb_scalar(Some("GEOMETRYCOLLECTION EMPTY"))
.unwrap();
m_tester.assert_scalar_result_equals(result, ScalarValue::Boolean(Some(false)));

// Empty geometry collections with Z or M dimensions
let result = z_tester
.invoke_wkb_scalar(Some("GEOMETRYCOLLECTION Z EMPTY"))
.unwrap();
z_tester.assert_scalar_result_equals(result, ScalarValue::Boolean(Some(true)));

let result = m_tester
.invoke_wkb_scalar(Some("GEOMETRYCOLLECTION M EMPTY"))
.unwrap();
m_tester.assert_scalar_result_equals(result, ScalarValue::Boolean(Some(true)));
}

#[test]
fn multipoint_with_inferred_z_dimension() {
let z_tester = ScalarUdfTester::new(st_hasz_udf().into(), vec![WKB_GEOMETRY]);
let m_tester = ScalarUdfTester::new(st_hasm_udf().into(), vec![WKB_GEOMETRY]);

let scalar = ScalarValue::Binary(Some(MULTIPOINT_WITH_INFERRED_Z_DIMENSION_WKB.to_vec()));
assert_eq!(
z_tester.invoke_scalar(scalar.clone()).unwrap(),
ScalarValue::Boolean(Some(true))
);
assert_eq!(
m_tester.invoke_scalar(scalar.clone()).unwrap(),
ScalarValue::Boolean(Some(false))
);
}
}
1 change: 1 addition & 0 deletions rust/sedona-geometry/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ result_large_err = "allow"
[dev-dependencies]
geo-types = { workspace = true }
rstest = { workspace = true }
sedona-testing = { path = "../sedona-testing" }
serde_json = { workspace = true }
wkt = { workspace = true }

Expand Down
1 change: 1 addition & 0 deletions rust/sedona-geometry/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,4 @@ pub mod point_count;
pub mod transform;
pub mod types;
pub mod wkb_factory;
pub mod wkb_header;
Loading