diff --git a/Cargo.lock b/Cargo.lock index 94550ec3..4ce2405e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4975,6 +4975,7 @@ dependencies = [ "geo-types", "lru", "rstest", + "sedona-testing", "serde", "serde_json", "serde_with", diff --git a/benchmarks/test_functions.py b/benchmarks/test_functions.py index d8ec0083..ac77c145 100644 --- a/benchmarks/test_functions.py +++ b/benchmarks/test_functions.py @@ -167,6 +167,42 @@ def queries(): benchmark(queries) + @pytest.mark.parametrize( + "eng", [SedonaDBSingleThread, PostGISSingleThread, DuckDBSingleThread] + ) + @pytest.mark.parametrize( + "table", + [ + "collections_simple", + "collections_complex", + ], + ) + def test_st_hasm(self, benchmark, eng, table): + eng = self._get_eng(eng) + + def queries(): + eng.execute_and_collect(f"SELECT ST_HasM(geom1) from {table}") + + benchmark(queries) + + @pytest.mark.parametrize( + "eng", [SedonaDBSingleThread, PostGISSingleThread, DuckDBSingleThread] + ) + @pytest.mark.parametrize( + "table", + [ + "collections_simple", + "collections_complex", + ], + ) + def test_st_hasz(self, benchmark, eng, table): + eng = self._get_eng(eng) + + def queries(): + eng.execute_and_collect(f"SELECT ST_HasZ(geom1) from {table}") + + benchmark(queries) + @pytest.mark.parametrize( "eng", [SedonaDBSingleThread, PostGISSingleThread, DuckDBSingleThread] ) diff --git a/python/sedonadb/tests/functions/test_functions.py b/python/sedonadb/tests/functions/test_functions.py index 67cb81cd..86bb1ea1 100644 --- a/python/sedonadb/tests/functions/test_functions.py +++ b/python/sedonadb/tests/functions/test_functions.py @@ -561,8 +561,13 @@ def test_st_geomfromwkb(eng, geom): ("LINESTRING Z (0 0 0, 1 1 1)", True), ("POLYGON EMPTY", False), ("MULTIPOINT ((0 0), (1 1))", False), + ("MULTIPOINT Z ((0 0 0))", True), + ("MULTIPOINT ZM ((0 0 0 0))", True), ("GEOMETRYCOLLECTION EMPTY", False), + # Z-dim specified only in the nested geometry ("GEOMETRYCOLLECTION (POINT Z (0 0 0))", True), + # Z-dim specified on both levels + ("GEOMETRYCOLLECTION Z (POINT Z (0 0 0))", True), ("GEOMETRYCOLLECTION (GEOMETRYCOLLECTION (POINT Z (0 0 0)))", True), ], ) diff --git a/rust/sedona-functions/src/st_haszm.rs b/rust/sedona-functions/src/st_haszm.rs index a4a20fff..28320131 100644 --- a/rust/sedona-functions/src/st_haszm.rs +++ b/rust/sedona-functions/src/st_haszm.rs @@ -16,19 +16,17 @@ // under the License. use std::sync::Arc; -use crate::executor::WkbExecutor; +use crate::executor::WkbBytesExecutor; use arrow_array::builder::BooleanBuilder; use arrow_schema::DataType; -use datafusion_common::error::Result; +use datafusion_common::{error::Result, DataFusionError}; use datafusion_expr::{ scalar_doc_sections::DOC_SECTION_OTHER, ColumnarValue, Documentation, Volatility, }; -use geo_traits::GeometryCollectionTrait; -use geo_traits::{Dimensions, GeometryTrait}; -use sedona_common::sedona_internal_err; +use geo_traits::Dimensions; use sedona_expr::scalar_udf::{SedonaScalarKernel, SedonaScalarUDF}; +use sedona_geometry::wkb_header::WkbHeader; use sedona_schema::{datatypes::SedonaType, matchers::ArgMatcher}; -use wkb::reader::Wkb; pub fn st_hasz_udf() -> SedonaScalarUDF { SedonaScalarUDF::new( @@ -91,13 +89,13 @@ impl SedonaScalarKernel for STHasZm { _ => unreachable!(), }; - let executor = WkbExecutor::new(arg_types, args); + let executor = WkbBytesExecutor::new(arg_types, args); let mut builder = BooleanBuilder::with_capacity(executor.num_iterations()); executor.execute_wkb_void(|maybe_item| { match maybe_item { Some(item) => { - builder.append_option(invoke_scalar(&item, dim_index)?); + builder.append_option(invoke_scalar(item, dim_index)?); } None => builder.append_null(), } @@ -108,27 +106,34 @@ impl SedonaScalarKernel for STHasZm { } } -fn invoke_scalar(item: &Wkb, dim_index: usize) -> Result> { - match item.as_type() { - geo_traits::GeometryType::GeometryCollection(collection) => { - if collection.num_geometries() == 0 { - Ok(Some(false)) - } else { - // PostGIS doesn't allow creating a GeometryCollection with geometries of different dimensions - // so we can just check the dimension of the first one - let first_geom = unsafe { collection.geometry_unchecked(0) }; - invoke_scalar(first_geom, dim_index) - } - } - _ => { - let geom_dim = item.dim(); - match dim_index { - 2 => Ok(Some(matches!(geom_dim, Dimensions::Xyz | Dimensions::Xyzm))), - 3 => Ok(Some(matches!(geom_dim, Dimensions::Xym | Dimensions::Xyzm))), - _ => sedona_internal_err!("unexpected dim_index"), - } - } +fn invoke_scalar(buf: &[u8], dim_index: usize) -> Result> { + let header = WkbHeader::try_new(buf).map_err(|e| DataFusionError::External(Box::new(e)))?; + let top_level_dimensions = header + .dimensions() + .map_err(|e| DataFusionError::External(Box::new(e)))?; + + // Infer dimension based on first coordinate dimension for cases where it differs from top-level + // e.g GEOMETRYCOLLECTION (POINT Z (1 2 3)) + let dimensions; + if let Some(first_geom_dimensions) = header.first_geom_dimensions() { + dimensions = first_geom_dimensions; + } else { + dimensions = top_level_dimensions; + } + + if dim_index == 2 { + return Ok(Some(matches!( + dimensions, + Dimensions::Xyz | Dimensions::Xyzm + ))); + } + if dim_index == 3 { + return Ok(Some(matches!( + dimensions, + Dimensions::Xym | Dimensions::Xyzm + ))); } + Ok(Some(false)) } #[cfg(test)] @@ -137,7 +142,9 @@ mod tests { use datafusion_expr::ScalarUDF; use rstest::rstest; use sedona_schema::datatypes::{WKB_GEOMETRY, WKB_VIEW_GEOMETRY}; - use sedona_testing::testers::ScalarUdfTester; + use sedona_testing::{ + fixtures::MULTIPOINT_WITH_INFERRED_Z_DIMENSION_WKB, testers::ScalarUdfTester, + }; use super::*; @@ -184,11 +191,19 @@ mod tests { let result = m_tester.invoke_wkb_scalar(None).unwrap(); m_tester.assert_scalar_result_equals(result, ScalarValue::Null); + // Z-dimension specified only in the nested geometry, but not the geom collection level let result = z_tester .invoke_wkb_scalar(Some("GEOMETRYCOLLECTION (POINT Z (1 2 3))")) .unwrap(); z_tester.assert_scalar_result_equals(result, ScalarValue::Boolean(Some(true))); + // Z-dimension specified on both the geom collection and nested geometry level + // Geometry collection with Z dimension both on the geom collection and nested geometry level + let result = z_tester + .invoke_wkb_scalar(Some("GEOMETRYCOLLECTION Z (POINT Z (1 2 3))")) + .unwrap(); + z_tester.assert_scalar_result_equals(result, ScalarValue::Boolean(Some(true))); + let result = m_tester .invoke_wkb_scalar(Some("GEOMETRYCOLLECTION (POINT M (1 2 3))")) .unwrap(); @@ -203,5 +218,32 @@ mod tests { .invoke_wkb_scalar(Some("GEOMETRYCOLLECTION EMPTY")) .unwrap(); m_tester.assert_scalar_result_equals(result, ScalarValue::Boolean(Some(false))); + + // Empty geometry collections with Z or M dimensions + let result = z_tester + .invoke_wkb_scalar(Some("GEOMETRYCOLLECTION Z EMPTY")) + .unwrap(); + z_tester.assert_scalar_result_equals(result, ScalarValue::Boolean(Some(true))); + + let result = m_tester + .invoke_wkb_scalar(Some("GEOMETRYCOLLECTION M EMPTY")) + .unwrap(); + m_tester.assert_scalar_result_equals(result, ScalarValue::Boolean(Some(true))); + } + + #[test] + fn multipoint_with_inferred_z_dimension() { + let z_tester = ScalarUdfTester::new(st_hasz_udf().into(), vec![WKB_GEOMETRY]); + let m_tester = ScalarUdfTester::new(st_hasm_udf().into(), vec![WKB_GEOMETRY]); + + let scalar = ScalarValue::Binary(Some(MULTIPOINT_WITH_INFERRED_Z_DIMENSION_WKB.to_vec())); + assert_eq!( + z_tester.invoke_scalar(scalar.clone()).unwrap(), + ScalarValue::Boolean(Some(true)) + ); + assert_eq!( + m_tester.invoke_scalar(scalar.clone()).unwrap(), + ScalarValue::Boolean(Some(false)) + ); } } diff --git a/rust/sedona-geometry/Cargo.toml b/rust/sedona-geometry/Cargo.toml index 8f127589..7d3a45c1 100644 --- a/rust/sedona-geometry/Cargo.toml +++ b/rust/sedona-geometry/Cargo.toml @@ -30,6 +30,7 @@ result_large_err = "allow" [dev-dependencies] geo-types = { workspace = true } rstest = { workspace = true } +sedona-testing = { path = "../sedona-testing" } serde_json = { workspace = true } wkt = { workspace = true } diff --git a/rust/sedona-geometry/src/lib.rs b/rust/sedona-geometry/src/lib.rs index 65cc5936..f189ec7b 100644 --- a/rust/sedona-geometry/src/lib.rs +++ b/rust/sedona-geometry/src/lib.rs @@ -24,3 +24,4 @@ pub mod point_count; pub mod transform; pub mod types; pub mod wkb_factory; +pub mod wkb_header; diff --git a/rust/sedona-geometry/src/wkb_header.rs b/rust/sedona-geometry/src/wkb_header.rs new file mode 100644 index 00000000..22f49a30 --- /dev/null +++ b/rust/sedona-geometry/src/wkb_header.rs @@ -0,0 +1,1012 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use geo_traits::Dimensions; + +use crate::error::SedonaGeometryError; +use crate::types::GeometryTypeId; + +const Z_FLAG_BIT: u32 = 0x80000000; +const M_FLAG_BIT: u32 = 0x40000000; +const SRID_FLAG_BIT: u32 = 0x20000000; + +/// Fast-path WKB header parser +/// Performs operations lazily and caches them after the first computation +#[derive(Debug)] +pub struct WkbHeader { + geometry_type: u32, + // Not applicable for a point + // number of points for a linestring + // number of rings for a polygon + // number of geometries for a MULTIPOINT, MULTILINESTRING, MULTIPOLYGON, or GEOMETRYCOLLECTION + size: u32, + // SRID if given buffer was EWKB. Otherwise, 0. + srid: u32, + // First x,y coordinates for a point. Otherwise (f64::NAN, f64::NAN) if empty + first_xy: (f64, f64), + // Dimensions of the first nested geometry of a collection or None if empty + // For POINT, LINESTRING, POLYGON, returns the dimensions of the geometry + first_geom_dimensions: Option, +} + +impl WkbHeader { + /// Creates a new [WkbHeader] from a buffer + pub fn try_new(buf: &[u8]) -> Result { + let mut wkb_buffer = WkbBuffer::new(buf); + + wkb_buffer.read_endian()?; + + let geometry_type = wkb_buffer.read_u32()?; + + let geometry_type_id = GeometryTypeId::try_from_wkb_id(geometry_type & 0x7)?; + + let mut srid = 0; + // if EWKB + if geometry_type & SRID_FLAG_BIT != 0 { + srid = wkb_buffer.read_u32()?; + } + + let size = if geometry_type_id == GeometryTypeId::Point { + // Dummy value for a point + 1 + } else { + wkb_buffer.read_u32()? + }; + + // Default values for empty geometries + let first_x; + let first_y; + let first_geom_dimensions: Option; + + wkb_buffer.set_offset(0); + + let first_geom_idx = wkb_buffer.first_geom_idx()?; + if let Some(i) = first_geom_idx { + // Reset to first_geom_idx and parse the dimensions + wkb_buffer.set_offset(i); + // Parse dimension + wkb_buffer.read_endian()?; + let code = wkb_buffer.read_u32()?; + first_geom_dimensions = Some(calc_dimensions(code)?); + + // For first_xy_coord, we need to pass the buffer starting from the geometry header + wkb_buffer.set_offset(i); + (first_x, first_y) = wkb_buffer.first_xy_coord()?; + } else { + first_geom_dimensions = None; + first_x = f64::NAN; + first_y = f64::NAN; + } + + Ok(Self { + geometry_type, + srid, + size, + first_xy: (first_x, first_y), + first_geom_dimensions, + }) + } + + /// Returns the [GeometryTypeId] of the WKB by only parsing the header instead of the entire WKB + pub fn geometry_type_id(&self) -> Result { + // Only low 3 bits is for the base type, high bits include additional info + let code = self.geometry_type & 0x7; + + let geometry_type_id = GeometryTypeId::try_from_wkb_id(code)?; + + Ok(geometry_type_id) + } + + /// Returns the size of the geometry + /// + /// - 1 for Points + /// - Number of points for a linestring + /// - Number of rings for a polygon + /// - Number of geometries for a MULTIPOINT, MULTILINESTRING, MULTIPOLYGON, or GEOMETRYCOLLECTION + pub fn size(&self) -> u32 { + self.size + } + + /// Returns the SRID if given buffer was EWKB. Otherwise, 0. + pub fn srid(&self) -> u32 { + self.srid + } + + /// Returns the first x, y coordinates for a point. Otherwise (f64::NAN, f64::NAN) if empty + pub fn first_xy(&self) -> (f64, f64) { + self.first_xy + } + + /// Returns the top-level dimension of the WKB + pub fn dimensions(&self) -> Result { + calc_dimensions(self.geometry_type) + } + + /// Returns the dimensions of the first coordinate of the geometry + pub fn first_geom_dimensions(&self) -> Option { + self.first_geom_dimensions + } +} + +// A helper struct for calculating the WKBHeader +struct WkbBuffer<'a> { + buf: &'a [u8], + offset: usize, + remaining: usize, + last_endian: u8, +} + +impl<'a> WkbBuffer<'a> { + fn new(buf: &'a [u8]) -> Self { + Self { + buf, + offset: 0, + remaining: buf.len(), + last_endian: 0, + } + } + + // For MULITPOINT, MULTILINESTRING, MULTIPOLYGON, or GEOMETRYCOLLECTION, returns the index to the first nested + // non-collection geometry (POINT, LINESTRING, or POLYGON), or None if empty + // For POINT, LINESTRING, POLYGON, returns 0 as it already is a non-collection geometry + fn first_geom_idx(&mut self) -> Result, SedonaGeometryError> { + // Record the start of this geometry header so we can return an absolute index + let start_offset = self.offset; + + self.read_endian()?; + let geometry_type = self.read_u32()?; + let geometry_type_id = GeometryTypeId::try_from_wkb_id(geometry_type & 0x7)?; + + match geometry_type_id { + GeometryTypeId::Point | GeometryTypeId::LineString | GeometryTypeId::Polygon => { + // Return absolute offset to the start of this geometry header + Ok(Some(start_offset)) + } + GeometryTypeId::MultiPoint + | GeometryTypeId::MultiLineString + | GeometryTypeId::MultiPolygon + | GeometryTypeId::GeometryCollection => { + if geometry_type & SRID_FLAG_BIT != 0 { + // Skip the SRID + self.read_u32()?; + } + + let num_geometries = self.read_u32()?; + + if num_geometries == 0 { + return Ok(None); + } + + // Recursive call to get first non-collection geometry + self.first_geom_idx() + } + _ => Err(SedonaGeometryError::Invalid(format!( + "Unexpected geometry type: {geometry_type_id:?}" + ))), + } + } + + // Given a point, linestring, or polygon, return the first xy coordinate + // If the geometry, is empty, (NaN, NaN) is returned + fn first_xy_coord(&mut self) -> Result<(f64, f64), SedonaGeometryError> { + self.read_endian()?; + let geometry_type = self.read_u32()?; + + let geometry_type_id = GeometryTypeId::try_from_wkb_id(geometry_type & 0x7)?; + + // Skip the SRID if it's present + if geometry_type & SRID_FLAG_BIT != 0 { + self.read_u32()?; + } + + match geometry_type_id { + GeometryTypeId::LineString => { + let size = self.read_u32()?; + if size == 0 { + return Ok((f64::NAN, f64::NAN)); + } + } + GeometryTypeId::Polygon => { + let size = self.read_u32()?; + if size == 0 { + return Ok((f64::NAN, f64::NAN)); + } + let ring0_num_points = self.read_u32()?; + if ring0_num_points == 0 { + return Ok((f64::NAN, f64::NAN)); + } + } + _ => {} + } + + let x = self.read_coord()?; + let y = self.read_coord()?; + Ok((x, y)) + } + + fn read_endian(&mut self) -> Result<(), SedonaGeometryError> { + if self.remaining < 1 { + return Err(SedonaGeometryError::Invalid(format!( + "Invalid WKB: buffer too small. At offset: {}. Need 1 byte.", + self.offset + ))); + } + self.last_endian = self.buf[self.offset]; + self.remaining -= 1; + self.offset += 1; + Ok(()) + } + + fn read_u32(&mut self) -> Result { + if self.remaining < 4 { + return Err(SedonaGeometryError::Invalid(format!( + "Invalid WKB: buffer too small. At offset: {}. Need 4 bytes.", + self.offset + ))); + } + + let off = self.offset; + let num = match self.last_endian { + 0 => u32::from_be_bytes([ + self.buf[off], + self.buf[off + 1], + self.buf[off + 2], + self.buf[off + 3], + ]), + 1 => u32::from_le_bytes([ + self.buf[off], + self.buf[off + 1], + self.buf[off + 2], + self.buf[off + 3], + ]), + other => { + return Err(SedonaGeometryError::Invalid(format!( + "Unexpected byte order: {other:?}" + ))) + } + }; + self.remaining -= 4; + self.offset += 4; + Ok(num) + } + + // Given a buffer starting at the coordinate itself, parse the x and y coordinates + fn read_coord(&mut self) -> Result { + if self.remaining < 8 { + return Err(SedonaGeometryError::Invalid(format!( + "Invalid WKB: buffer too small. At offset: {}. Need 8 bytes.", + self.offset + ))); + } + + let buf = &self.buf; + let off = self.offset; + let coord: f64 = match self.last_endian { + 0 => f64::from_be_bytes([ + buf[off], + buf[off + 1], + buf[off + 2], + buf[off + 3], + buf[off + 4], + buf[off + 5], + buf[off + 6], + buf[off + 7], + ]), + 1 => f64::from_le_bytes([ + buf[off], + buf[off + 1], + buf[off + 2], + buf[off + 3], + buf[off + 4], + buf[off + 5], + buf[off + 6], + buf[off + 7], + ]), + other => { + return Err(SedonaGeometryError::Invalid(format!( + "Unexpected byte order: {other:?}" + ))) + } + }; + self.remaining -= 8; + self.offset += 8; + + Ok(coord) + } + + fn set_offset(&mut self, offset: usize) { + self.offset = offset; + self.remaining = self.buf.len() - offset; + } +} + +fn calc_dimensions(code: u32) -> Result { + // Check for EWKB Z and M flags + let hasz = (code & Z_FLAG_BIT) != 0; + let hasm = (code & M_FLAG_BIT) != 0; + + match (hasz, hasm) { + (false, false) => {} + // If either flag is set, this must be EWKB (and not ISO WKB) + (true, false) => return Ok(Dimensions::Xyz), + (false, true) => return Ok(Dimensions::Xym), + (true, true) => return Ok(Dimensions::Xyzm), + } + + // if SRID flag is set, then it must be EWKB with no z or m + if code & SRID_FLAG_BIT != 0 { + return Ok(Dimensions::Xy); + } + + // Interpret as ISO WKB + match code / 1000 { + 0 => Ok(Dimensions::Xy), + 1 => Ok(Dimensions::Xyz), + 2 => Ok(Dimensions::Xym), + 3 => Ok(Dimensions::Xyzm), + _ => Err(SedonaGeometryError::Invalid(format!( + "Unexpected code: {:?}", + code + ))), + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::str::FromStr; + use wkb::writer::{write_geometry, WriteOptions}; + use wkt::Wkt; + + fn make_wkb(wkt_value: &'static str) -> Vec { + let geom = Wkt::::from_str(wkt_value).unwrap(); + let mut buf: Vec = vec![]; + write_geometry(&mut buf, &geom, &WriteOptions::default()).unwrap(); + buf + } + + #[test] + fn geometry_type_id() { + let wkb = make_wkb("POINT (1 2)"); + let header = WkbHeader::try_new(&wkb).unwrap(); + assert_eq!(header.geometry_type_id().unwrap(), GeometryTypeId::Point); + + let wkb = make_wkb("LINESTRING (1 2, 3 4)"); + let header = WkbHeader::try_new(&wkb).unwrap(); + assert_eq!( + header.geometry_type_id().unwrap(), + GeometryTypeId::LineString + ); + + let wkb = make_wkb("POLYGON ((0 0, 0 1, 1 0, 0 0))"); + let header = WkbHeader::try_new(&wkb).unwrap(); + assert_eq!(header.geometry_type_id().unwrap(), GeometryTypeId::Polygon); + + let wkb = make_wkb("MULTIPOINT ((1 2), (3 4))"); + let header = WkbHeader::try_new(&wkb).unwrap(); + assert_eq!( + header.geometry_type_id().unwrap(), + GeometryTypeId::MultiPoint + ); + + let wkb = make_wkb("MULTILINESTRING ((1 2, 3 4))"); + let header = WkbHeader::try_new(&wkb).unwrap(); + assert_eq!( + header.geometry_type_id().unwrap(), + GeometryTypeId::MultiLineString + ); + + let wkb = make_wkb("MULTIPOLYGON (((0 0, 0 1, 1 0, 0 0)))"); + let header = WkbHeader::try_new(&wkb).unwrap(); + assert_eq!( + header.geometry_type_id().unwrap(), + GeometryTypeId::MultiPolygon + ); + + let wkb = make_wkb("GEOMETRYCOLLECTION (POINT (1 2))"); + let header = WkbHeader::try_new(&wkb).unwrap(); + assert_eq!( + header.geometry_type_id().unwrap(), + GeometryTypeId::GeometryCollection + ); + + // Some cases with z and m dimensions + let wkb = make_wkb("POINT Z (1 2 3)"); + let header = WkbHeader::try_new(&wkb).unwrap(); + assert_eq!(header.geometry_type_id().unwrap(), GeometryTypeId::Point); + + let wkb = make_wkb("LINESTRING Z (1 2 3, 4 5 6)"); + let header = WkbHeader::try_new(&wkb).unwrap(); + assert_eq!( + header.geometry_type_id().unwrap(), + GeometryTypeId::LineString + ); + + let wkb = make_wkb("POLYGON M ((0 0 0, 0 1 0, 1 0 0, 0 0 0))"); + let header = WkbHeader::try_new(&wkb).unwrap(); + assert_eq!(header.geometry_type_id().unwrap(), GeometryTypeId::Polygon); + } + + #[test] + fn size() { + let wkb = make_wkb("LINESTRING (1 2, 3 4)"); + let header = WkbHeader::try_new(&wkb).unwrap(); + assert_eq!(header.size(), 2); + + let wkb = make_wkb("POLYGON ((0 0, 0 1, 1 0, 0 0), (1 1, 1 2, 2 1, 1 1))"); + let header = WkbHeader::try_new(&wkb).unwrap(); + assert_eq!(header.size(), 2); + + let wkb = make_wkb("MULTIPOINT ((1 2), (3 4))"); + let header = WkbHeader::try_new(&wkb).unwrap(); + assert_eq!(header.size(), 2); + + let wkb = make_wkb("MULTILINESTRING ((1 2, 3 4, 5 6), (7 8, 9 10, 11 12))"); + let header = WkbHeader::try_new(&wkb).unwrap(); + assert_eq!(header.size(), 2); + + let wkb = make_wkb("MULTIPOLYGON (((0 0, 0 1, 1 0, 0 0)), ((1 1, 1 2, 2 1, 1 1)))"); + let header = WkbHeader::try_new(&wkb).unwrap(); + assert_eq!(header.size(), 2); + + let wkb = make_wkb("GEOMETRYCOLLECTION (POINT (1 2))"); + let header = WkbHeader::try_new(&wkb).unwrap(); + assert_eq!(header.size(), 1); + + let wkb = make_wkb("GEOMETRYCOLLECTION (POINT (1 2), LINESTRING (1 2, 3 4), POLYGON ((0 0, 0 1, 1 0, 0 0)))"); + let header = WkbHeader::try_new(&wkb).unwrap(); + assert_eq!(header.size(), 3); + } + + #[test] + fn empty_size() { + let wkb = make_wkb("LINESTRING EMPTY"); + let header = WkbHeader::try_new(&wkb).unwrap(); + assert_eq!(header.size(), 0); + + let wkb = make_wkb("POLYGON EMPTY"); + let header = WkbHeader::try_new(&wkb).unwrap(); + assert_eq!(header.size(), 0); + + let wkb = make_wkb("MULTIPOINT EMPTY"); + let header = WkbHeader::try_new(&wkb).unwrap(); + assert_eq!(header.size(), 0); + + let wkb = make_wkb("MULTILINESTRING EMPTY"); + let header = WkbHeader::try_new(&wkb).unwrap(); + assert_eq!(header.size(), 0); + + let wkb = make_wkb("MULTIPOLYGON EMPTY"); + let header = WkbHeader::try_new(&wkb).unwrap(); + assert_eq!(header.size(), 0); + + let wkb = make_wkb("GEOMETRYCOLLECTION EMPTY"); + let header = WkbHeader::try_new(&wkb).unwrap(); + assert_eq!(header.size(), 0); + + let wkb = make_wkb("GEOMETRYCOLLECTION Z EMPTY"); + let header = WkbHeader::try_new(&wkb).unwrap(); + assert_eq!(header.size(), 0); + } + + #[test] + fn ewkb() { + use sedona_testing::fixtures::*; + + // Test POINT with SRID 4326 + let header = WkbHeader::try_new(&POINT_WITH_SRID_4326_EWKB).unwrap(); + assert_eq!(header.srid(), 4326); + assert_eq!(header.geometry_type_id().unwrap(), GeometryTypeId::Point); + assert_eq!(header.first_xy(), (1.0, 2.0)); + assert_eq!(header.dimensions().unwrap(), Dimensions::Xy); + + // Test POINT Z with SRID 3857 + let header = WkbHeader::try_new(&POINT_Z_WITH_SRID_3857_EWKB).unwrap(); + assert_eq!(header.srid(), 3857); + assert_eq!(header.geometry_type_id().unwrap(), GeometryTypeId::Point); + assert_eq!(header.first_xy(), (1.0, 2.0)); + assert_eq!(header.dimensions().unwrap(), Dimensions::Xyz); + + // Test POINT M with SRID 4326 + let header = WkbHeader::try_new(&POINT_M_WITH_SRID_4326_EWKB).unwrap(); + assert_eq!(header.srid(), 4326); + assert_eq!(header.geometry_type_id().unwrap(), GeometryTypeId::Point); + assert_eq!(header.first_xy(), (1.0, 2.0)); + assert_eq!(header.dimensions().unwrap(), Dimensions::Xym); + + // Test POINT ZM with SRID 4326 + let header = WkbHeader::try_new(&POINT_ZM_WITH_SRID_4326_EWKB).unwrap(); + assert_eq!(header.srid(), 4326); + assert_eq!(header.geometry_type_id().unwrap(), GeometryTypeId::Point); + assert_eq!(header.first_xy(), (1.0, 2.0)); + assert_eq!(header.dimensions().unwrap(), Dimensions::Xyzm); + + // Test GEOMETRYCOLLECTION with SRID 4326 + let header = WkbHeader::try_new(&GEOMETRYCOLLECTION_POINT_WITH_SRID_4326_EWKB).unwrap(); + assert_eq!(header.srid(), 4326); + assert_eq!( + header.geometry_type_id().unwrap(), + GeometryTypeId::GeometryCollection + ); + assert_eq!(header.size(), 1); + assert_eq!(header.first_xy(), (1.0, 2.0)); + assert_eq!(header.dimensions().unwrap(), Dimensions::Xy); + assert_eq!(header.first_geom_dimensions().unwrap(), Dimensions::Xy); + + // Test GEOMETRYCOLLECTION Z with SRID 4326 + let header = WkbHeader::try_new(&GEOMETRYCOLLECTION_POINT_Z_WITH_SRID_4326_EWKB).unwrap(); + assert_eq!(header.srid(), 4326); + assert_eq!( + header.geometry_type_id().unwrap(), + GeometryTypeId::GeometryCollection + ); + assert_eq!(header.size(), 1); + assert_eq!(header.first_xy(), (1.0, 2.0)); + // Outer dimension specified as Xy, but inner dimension is Xyz + assert_eq!(header.dimensions().unwrap(), Dimensions::Xy); + assert_eq!(header.first_geom_dimensions().unwrap(), Dimensions::Xyz); + + // Test GEOMETRYCOLLECTION M with SRID 4326 + let header = WkbHeader::try_new(&GEOMETRYCOLLECTION_POINT_M_WITH_SRID_4326_EWKB).unwrap(); + assert_eq!(header.srid(), 4326); + assert_eq!( + header.geometry_type_id().unwrap(), + GeometryTypeId::GeometryCollection + ); + assert_eq!(header.size(), 1); + assert_eq!(header.first_xy(), (1.0, 2.0)); + // Outer dimension specified as Xy, but inner dimension is Xym + assert_eq!(header.dimensions().unwrap(), Dimensions::Xy); + assert_eq!(header.first_geom_dimensions().unwrap(), Dimensions::Xym); + + // Test GEOMETRYCOLLECTION ZM with SRID 4326 + let header = WkbHeader::try_new(&GEOMETRYCOLLECTION_POINT_ZM_WITH_SRID_4326_EWKB).unwrap(); + assert_eq!(header.srid(), 4326); + assert_eq!( + header.geometry_type_id().unwrap(), + GeometryTypeId::GeometryCollection + ); + assert_eq!(header.size(), 1); + assert_eq!(header.first_xy(), (1.0, 2.0)); + // Outer dimension specified as Xy, but inner dimension is Xyzm + assert_eq!(header.dimensions().unwrap(), Dimensions::Xy); + assert_eq!(header.first_geom_dimensions().unwrap(), Dimensions::Xyzm); + } + + #[test] + fn srid_linestring() { + use sedona_testing::fixtures::*; + + let header = WkbHeader::try_new(&LINESTRING_WITH_SRID_4326_EWKB).unwrap(); + assert_eq!(header.srid(), 4326); + assert_eq!( + header.geometry_type_id().unwrap(), + GeometryTypeId::LineString + ); + assert_eq!(header.size(), 2); + assert_eq!(header.first_xy(), (1.0, 2.0)); + assert_eq!(header.dimensions().unwrap(), Dimensions::Xy); + } + + #[test] + fn srid_polygon() { + use sedona_testing::fixtures::*; + + let header = WkbHeader::try_new(&POLYGON_WITH_SRID_4326_EWKB).unwrap(); + assert_eq!(header.srid(), 4326); + assert_eq!(header.geometry_type_id().unwrap(), GeometryTypeId::Polygon); + assert_eq!(header.size(), 1); + assert_eq!(header.first_xy(), (0.0, 0.0)); + assert_eq!(header.dimensions().unwrap(), Dimensions::Xy); + } + + #[test] + fn multipoint_with_srid() { + use sedona_testing::fixtures::*; + + let header = WkbHeader::try_new(&MULTIPOINT_WITH_SRID_4326_EWKB).unwrap(); + assert_eq!(header.srid(), 4326); + assert_eq!( + header.geometry_type_id().unwrap(), + GeometryTypeId::MultiPoint + ); + assert_eq!(header.size(), 2); + assert_eq!(header.first_xy(), (1.0, 2.0)); + assert_eq!(header.dimensions().unwrap(), Dimensions::Xy); + } + + #[test] + fn srid_empty_geometries_with_srid() { + use sedona_testing::fixtures::*; + + // Test POINT EMPTY with SRID + let header = WkbHeader::try_new(&POINT_EMPTY_WITH_SRID_4326_EWKB).unwrap(); + assert_eq!(header.srid(), 4326); + assert_eq!(header.geometry_type_id().unwrap(), GeometryTypeId::Point); + assert_eq!(header.dimensions().unwrap(), Dimensions::Xy); + + // Test GEOMETRYCOLLECTION EMPTY with SRID + let header = WkbHeader::try_new(&GEOMETRYCOLLECTION_EMPTY_WITH_SRID_4326_EWKB).unwrap(); + assert_eq!(header.srid(), 4326); + assert_eq!( + header.geometry_type_id().unwrap(), + GeometryTypeId::GeometryCollection + ); + assert_eq!(header.size(), 0); + assert_eq!(header.dimensions().unwrap(), Dimensions::Xy); + assert_eq!(header.first_geom_dimensions(), None); + } + + #[test] + fn srid_no_srid_flag() { + // Test that regular WKB (without SRID flag) returns 0 for SRID + let wkb = make_wkb("POINT (1 2)"); + let header = WkbHeader::try_new(&wkb).unwrap(); + assert_eq!(header.srid(), 0); + } + + #[test] + fn first_xy() { + let wkb = make_wkb("POINT (-5 -2)"); + let header = WkbHeader::try_new(&wkb).unwrap(); + assert_eq!(header.first_xy(), (-5.0, -2.0)); + + let wkb = make_wkb("LINESTRING (1 2, 3 4)"); + let header = WkbHeader::try_new(&wkb).unwrap(); + assert_eq!(header.first_xy(), (1.0, 2.0)); + + let wkb = make_wkb("POLYGON ((0 0, 0 1, 1 0, 0 0))"); + let header = WkbHeader::try_new(&wkb).unwrap(); + assert_eq!(header.first_xy(), (0.0, 0.0)); + + // Another polygon test since that logic is more complicated + let wkb = make_wkb("POLYGON ((1.5 0.5, 1.5 1.5, 1.5 0.5), (0 0, 0 1, 1 0, 0 0))"); + let header = WkbHeader::try_new(&wkb).unwrap(); + assert_eq!(header.first_xy(), (1.5, 0.5)); + + let wkb = make_wkb("MULTIPOINT ((1 2), (3 4))"); + let header = WkbHeader::try_new(&wkb).unwrap(); + assert_eq!(header.first_xy(), (1.0, 2.0)); + + let wkb = make_wkb("MULTILINESTRING ((3 4, 1 2), (5 6, 7 8))"); + let header = WkbHeader::try_new(&wkb).unwrap(); + assert_eq!(header.first_xy(), (3.0, 4.0)); + + let wkb = make_wkb("MULTIPOLYGON (((-1 -1, 0 1, 1 -1, -1 -1)), ((0 0, 0 1, 1 0, 0 0)))"); + let header = WkbHeader::try_new(&wkb).unwrap(); + assert_eq!(header.first_xy(), (-1.0, -1.0)); + + let wkb = make_wkb("GEOMETRYCOLLECTION (POINT (1 2))"); + let header = WkbHeader::try_new(&wkb).unwrap(); + assert_eq!(header.first_xy(), (1.0, 2.0)); + + let wkb = make_wkb("GEOMETRYCOLLECTION (POINT (1 2), LINESTRING (1 2, 3 4), POLYGON ((0 0, 0 1, 1 0, 0 0)))"); + let header = WkbHeader::try_new(&wkb).unwrap(); + assert_eq!(header.first_xy(), (1.0, 2.0)); + } + + #[test] + fn empty_first_xy() { + let wkb = make_wkb("POINT EMPTY"); + let header = WkbHeader::try_new(&wkb).unwrap(); + let (x, y) = header.first_xy(); + assert!(x.is_nan()); + assert!(y.is_nan()); + + let wkb = make_wkb("LINESTRING EMPTY"); + let header = WkbHeader::try_new(&wkb).unwrap(); + let (x, y) = header.first_xy(); + assert!(x.is_nan()); + assert!(y.is_nan()); + + let wkb = make_wkb("GEOMETRYCOLLECTION Z EMPTY"); + let header = WkbHeader::try_new(&wkb).unwrap(); + let (x, y) = header.first_xy(); + assert!(x.is_nan()); + assert!(y.is_nan()); + } + + #[test] + fn empty_geometry_type_id() { + let wkb = make_wkb("POINT EMPTY"); + let header = WkbHeader::try_new(&wkb).unwrap(); + assert_eq!(header.geometry_type_id().unwrap(), GeometryTypeId::Point); + + let wkb = make_wkb("LINESTRING EMPTY"); + let header = WkbHeader::try_new(&wkb).unwrap(); + assert_eq!( + header.geometry_type_id().unwrap(), + GeometryTypeId::LineString + ); + + let wkb = make_wkb("POLYGON EMPTY"); + let header = WkbHeader::try_new(&wkb).unwrap(); + assert_eq!(header.geometry_type_id().unwrap(), GeometryTypeId::Polygon); + + let wkb = make_wkb("MULTIPOINT EMPTY"); + let header = WkbHeader::try_new(&wkb).unwrap(); + assert_eq!( + header.geometry_type_id().unwrap(), + GeometryTypeId::MultiPoint + ); + + let wkb = make_wkb("MULTILINESTRING EMPTY"); + let header = WkbHeader::try_new(&wkb).unwrap(); + assert_eq!( + header.geometry_type_id().unwrap(), + GeometryTypeId::MultiLineString + ); + + let wkb = make_wkb("MULTIPOLYGON EMPTY"); + let header = WkbHeader::try_new(&wkb).unwrap(); + assert_eq!( + header.geometry_type_id().unwrap(), + GeometryTypeId::MultiPolygon + ); + + let wkb = make_wkb("GEOMETRYCOLLECTION EMPTY"); + let header = WkbHeader::try_new(&wkb).unwrap(); + assert_eq!( + header.geometry_type_id().unwrap(), + GeometryTypeId::GeometryCollection + ); + + // z, m cases + let wkb = make_wkb("POINT Z EMPTY"); + let header = WkbHeader::try_new(&wkb).unwrap(); + assert_eq!(header.geometry_type_id().unwrap(), GeometryTypeId::Point); + + let wkb = make_wkb("POINT M EMPTY"); + let header = WkbHeader::try_new(&wkb).unwrap(); + assert_eq!(header.geometry_type_id().unwrap(), GeometryTypeId::Point); + + let wkb = make_wkb("LINESTRING ZM EMPTY"); + let header = WkbHeader::try_new(&wkb).unwrap(); + assert_eq!( + header.geometry_type_id().unwrap(), + GeometryTypeId::LineString + ); + } + + #[test] + fn dimensions() { + let wkb = make_wkb("POINT (1 2)"); + let header = WkbHeader::try_new(&wkb).unwrap(); + assert_eq!(header.dimensions().unwrap(), Dimensions::Xy); + + let wkb = make_wkb("POINT Z (1 2 3)"); + let header = WkbHeader::try_new(&wkb).unwrap(); + assert_eq!(header.dimensions().unwrap(), Dimensions::Xyz); + + let wkb = make_wkb("POINT M (1 2 3)"); + let header = WkbHeader::try_new(&wkb).unwrap(); + assert_eq!(header.dimensions().unwrap(), Dimensions::Xym); + + let wkb = make_wkb("POINT ZM (1 2 3 4)"); + let header = WkbHeader::try_new(&wkb).unwrap(); + assert_eq!(header.dimensions().unwrap(), Dimensions::Xyzm); + } + + #[test] + fn empty_geometry_dimensions() { + // POINTs + let wkb = make_wkb("POINT EMPTY"); + let header = WkbHeader::try_new(&wkb).unwrap(); + assert_eq!(header.dimensions().unwrap(), Dimensions::Xy); + + let wkb = make_wkb("POINT Z EMPTY"); + let header = WkbHeader::try_new(&wkb).unwrap(); + assert_eq!(header.dimensions().unwrap(), Dimensions::Xyz); + + let wkb = make_wkb("POINT M EMPTY"); + let header = WkbHeader::try_new(&wkb).unwrap(); + assert_eq!(header.dimensions().unwrap(), Dimensions::Xym); + + let wkb = make_wkb("POINT ZM EMPTY"); + let header = WkbHeader::try_new(&wkb).unwrap(); + assert_eq!(header.dimensions().unwrap(), Dimensions::Xyzm); + + // GEOMETRYCOLLECTIONs + let wkb = make_wkb("GEOMETRYCOLLECTION EMPTY"); + let header = WkbHeader::try_new(&wkb).unwrap(); + assert_eq!(header.dimensions().unwrap(), Dimensions::Xy); + + let wkb = make_wkb("GEOMETRYCOLLECTION Z EMPTY"); + let header = WkbHeader::try_new(&wkb).unwrap(); + assert_eq!(header.dimensions().unwrap(), Dimensions::Xyz); + + let wkb = make_wkb("GEOMETRYCOLLECTION M EMPTY"); + let header = WkbHeader::try_new(&wkb).unwrap(); + assert_eq!(header.dimensions().unwrap(), Dimensions::Xym); + + let wkb = make_wkb("GEOMETRYCOLLECTION ZM EMPTY"); + let header = WkbHeader::try_new(&wkb).unwrap(); + assert_eq!(header.dimensions().unwrap(), Dimensions::Xyzm); + } + + #[test] + fn first_geom_dimensions() { + // Top-level dimension is xy, while nested geometry is xyz + let wkb = make_wkb("GEOMETRYCOLLECTION (POINT Z (1 2 3))"); + let header = WkbHeader::try_new(&wkb).unwrap(); + assert_eq!(header.first_geom_dimensions().unwrap(), Dimensions::Xyz); + assert_eq!(header.dimensions().unwrap(), Dimensions::Xy); + + let wkb = make_wkb("GEOMETRYCOLLECTION (POINT ZM (1 2 3 4))"); + let header = WkbHeader::try_new(&wkb).unwrap(); + assert_eq!(header.first_geom_dimensions().unwrap(), Dimensions::Xyzm); + + let wkb = make_wkb("GEOMETRYCOLLECTION (POINT M (1 2 3))"); + let header = WkbHeader::try_new(&wkb).unwrap(); + assert_eq!(header.first_geom_dimensions().unwrap(), Dimensions::Xym); + + let wkb = make_wkb("GEOMETRYCOLLECTION (POINT ZM (1 2 3 4))"); + let header = WkbHeader::try_new(&wkb).unwrap(); + assert_eq!(header.first_geom_dimensions().unwrap(), Dimensions::Xyzm); + } + + #[test] + fn empty_geometry_first_geom_dimensions() { + let wkb = make_wkb("POINT EMPTY"); + let header = WkbHeader::try_new(&wkb).unwrap(); + assert_eq!(header.first_geom_dimensions(), Some(Dimensions::Xy)); + + let wkb = make_wkb("LINESTRING EMPTY"); + let header = WkbHeader::try_new(&wkb).unwrap(); + assert_eq!(header.first_geom_dimensions(), Some(Dimensions::Xy)); + + let wkb = make_wkb("POLYGON Z EMPTY"); + let header = WkbHeader::try_new(&wkb).unwrap(); + assert_eq!(header.first_geom_dimensions(), Some(Dimensions::Xyz)); + + // Empty collections should return None + let wkb = make_wkb("MULTIPOINT EMPTY"); + let header = WkbHeader::try_new(&wkb).unwrap(); + assert_eq!(header.first_geom_dimensions(), None); + + let wkb = make_wkb("MULTILINESTRING Z EMPTY"); + let header = WkbHeader::try_new(&wkb).unwrap(); + assert_eq!(header.first_geom_dimensions(), None); + + let wkb = make_wkb("MULTIPOLYGON M EMPTY"); + let header = WkbHeader::try_new(&wkb).unwrap(); + assert_eq!(header.first_geom_dimensions(), None); + + let wkb = make_wkb("GEOMETRYCOLLECTION ZM EMPTY"); + let header = WkbHeader::try_new(&wkb).unwrap(); + assert_eq!(header.first_geom_dimensions(), None); + } + + #[test] + fn incomplete_buffers() { + // Test various incomplete buffer scenarios to ensure proper error handling + + // Empty buffer + let result = WkbHeader::try_new(&[]); + assert!(result.is_err()); + + // Test truncation of a simple POINT + let wkb = make_wkb("POINT (1 2)"); + for i in 1..wkb.len() - 1 { + assert!( + WkbHeader::try_new(&wkb[0..i]).is_err(), + "0..{} unexpectedly succeeded", + i + ); + } + + // Test truncation of a POINT ZM + // Iterate through all i that is less than the number needed for the first_xy coord + // 1 byte_order + 4 geometry type + 8 x + 8 y + let last_i = 1 + 4 + 8 + 8; + let wkb = make_wkb("POINT ZM (1 2 3 4)"); + for i in 1..last_i { + assert!( + WkbHeader::try_new(&wkb[0..i]).is_err(), + "0..{} unexpectedly succeeded", + i + ); + } + + // Test truncation of a GEOMETRYCOLLECTION with nested geometries + // Iterate through all i that is less than the number needed for the first_xy coord + // 1 byte_order + 4 geometry type + 4 size + 8 x + 8 y + let last_i = 1 + 4 + 4 + 8 + 8; + let wkb = make_wkb("GEOMETRYCOLLECTION (POINT (1 2), LINESTRING (1 2, 3 4))"); + for i in 1..last_i { + assert!( + WkbHeader::try_new(&wkb[0..i]).is_err(), + "0..{} unexpectedly succeeded", + i + ); + } + } + + #[test] + fn incomplete_ewkb_buffers() { + use sedona_testing::fixtures::*; + // Test incomplete EWKB buffers + + // 1 byte_order + 4 geometry type + 4 srid + 8 x + 8 y + let wkb = POINT_WITH_SRID_4326_EWKB; + let last_i = 1 + 4 + 4 + 8 + 8; + for i in 1..last_i { + assert!( + WkbHeader::try_new(&wkb[0..i]).is_err(), + "0..{} unexpectedly succeeded", + i + ); + } + + // 1 byte_order + 4 geometry type + 4 srid + 4 size + 1 byte_order + 4 geometry type + 8 x + 8 y + let last_i = 1 + 4 + 4 + 4 + 1 + 4 + 8 + 8; + let wkb = MULTIPOINT_WITH_SRID_4326_EWKB; + for i in 1..last_i { + assert!( + WkbHeader::try_new(&wkb[0..i]).is_err(), + "0..{} unexpectedly succeeded", + i + ); + } + + // 1 byte_order + 4 geometry type + 4 srid + 4 size + 1 byte_order + 4 geometry type + 8 x + 8 y + let last_i = 1 + 4 + 4 + 4 + 1 + 4 + 8 + 8; + let wkb = GEOMETRYCOLLECTION_POINT_ZM_WITH_SRID_4326_EWKB; + for i in 1..last_i { + assert!( + WkbHeader::try_new(&wkb[0..i]).is_err(), + "0..{} unexpectedly succeeded", + i + ); + } + } + + #[test] + fn invalid_byte_order() { + // Test invalid byte order values + let result = WkbHeader::try_new(&[0x02, 0x01, 0x00, 0x00, 0x00]); + assert!(result.is_err()); + + let result = WkbHeader::try_new(&[0xff, 0x01, 0x00, 0x00, 0x00]); + assert!(result.is_err()); + } + + #[test] + fn nested_geometry_collections() { + let wkb = make_wkb("GEOMETRYCOLLECTION (GEOMETRYCOLLECTION (POINT (1 2)))"); + let header = WkbHeader::try_new(&wkb).unwrap(); + assert_eq!( + header.geometry_type_id().unwrap(), + GeometryTypeId::GeometryCollection + ); + assert_eq!(header.size(), 1); + assert_eq!(header.first_xy(), (1.0, 2.0)); + assert_eq!(header.dimensions().unwrap(), Dimensions::Xy); + assert_eq!(header.first_geom_dimensions().unwrap(), Dimensions::Xy); + + let wkb = make_wkb("GEOMETRYCOLLECTION (GEOMETRYCOLLECTION (POINT ZM (1 2 3 4)))"); + let header = WkbHeader::try_new(&wkb).unwrap(); + assert_eq!( + header.geometry_type_id().unwrap(), + GeometryTypeId::GeometryCollection + ); + assert_eq!(header.size(), 1); + assert_eq!(header.first_xy(), (1.0, 2.0)); + assert_eq!(header.dimensions().unwrap(), Dimensions::Xy); + assert_eq!(header.first_geom_dimensions().unwrap(), Dimensions::Xyzm); + } +} diff --git a/rust/sedona-testing/src/fixtures.rs b/rust/sedona-testing/src/fixtures.rs index 13011bf8..b4c1bb15 100644 --- a/rust/sedona-testing/src/fixtures.rs +++ b/rust/sedona-testing/src/fixtures.rs @@ -28,6 +28,192 @@ pub const MULTIPOINT_WITH_EMPTY_CHILD_WKB: [u8; 30] = [ 0x00, 0x00, 0x00, 0x00, 0xf8, 0x7f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf8, 0x7f, ]; +/// A well-known binary blob of MULTIPOINT ((1 2 3)) where outer dimension is specified for xy +/// while inner point's dimension is actually xyz +pub const MULTIPOINT_WITH_INFERRED_Z_DIMENSION_WKB: [u8; 38] = [ + 0x01, // byte-order + 0x04, 0x00, 0x00, 0x00, // multipoint with xy-dimension specified + 0x01, 0x00, 0x00, 0x00, // 1 point + // nested point geom + 0x01, // byte-order + 0xe9, 0x03, 0x00, 0x00, // point with xyz-dimension specified + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x3f, // x-coordinate of point + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, // y-coordinate of point + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x40, // z-coordinate of point +]; + +/// EWKB for POINT (1 2) with SRID 4326 +/// Little endian, geometry type 1 (POINT) with SRID flag (0x20000000) +pub const POINT_WITH_SRID_4326_EWKB: [u8; 25] = [ + 0x01, // byte-order + 0x01, 0x00, 0x00, 0x20, // geometry type 1 (POINT) with SRID flag (0x20000000) + 0xe6, 0x10, 0x00, 0x00, // SRID 4326 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x3f, // x-coordinate 1.0 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, // y-coordinate 2.0 +]; + +/// EWKB for POINT Z (1 2 3) with SRID 3857 +/// Little endian, geometry type 1001 (POINT Z) with SRID flag +pub const POINT_Z_WITH_SRID_3857_EWKB: [u8; 33] = [ + 0x01, // byte-order + 0x01, 0x00, 0x00, 0xa0, // geometry type + // 0xe9, 0x03, 0x00, 0x20, // geometry type 1001 (POINT Z) with SRID flag + 0x11, 0x0f, 0x00, 0x00, // SRID 3857 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x3f, // x-coordinate 1.0 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, // y-coordinate 2.0 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x40, // z-coordinate 3.0 +]; + +pub const POINT_M_WITH_SRID_4326_EWKB: [u8; 33] = [ + 0x01, // byte-order + 0x01, 0x00, 0x00, 0x60, // geometry type + 0xe6, 0x10, 0x00, 0x00, // SRID + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x3f, // x-coordinate 1.0 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, // y-coordinate 2.0 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x40, // m-coordinate 3.0 +]; + +/// EWKB for POINT ZM (1 2 3 4) with SRID 4326 +pub const POINT_ZM_WITH_SRID_4326_EWKB: [u8; 41] = [ + 0x01, // byte-order + 0x01, 0x00, 0x00, 0xe0, // geometry type + // 0xb9, 0x0b, 0x00, 0x20, // geometry type 3001 (POINT ZM) with SRID flag + 0xe6, 0x10, 0x00, 0x00, // SRID 4326 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x3f, // x = 1.0 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, // y = 2.0 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x40, // z = 3.0 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x40, // m = 4.0 +]; + +/// EWKB for LINESTRING (1 2, 3 4) with SRID 4326 +/// Little endian, geometry type 2 (LINESTRING) with SRID flag +pub const LINESTRING_WITH_SRID_4326_EWKB: [u8; 45] = [ + 0x01, // byte-order + 0x02, 0x00, 0x00, 0x20, // geometry type + 0xe6, 0x10, 0x00, 0x00, // SRID 4326 + 0x02, 0x00, 0x00, 0x00, // number of points (2) + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x3f, // x1 = 1.0 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, // y1 = 2.0 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x40, // x2 = 3.0 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x40, // y2 = 4.0 +]; + +/// EWKB for POLYGON ((0 0, 0 1, 1 0, 0 0)) with SRID 4326 +/// Little endian, geometry type 3 (POLYGON) with SRID flag +pub const POLYGON_WITH_SRID_4326_EWKB: [u8; 81] = [ + 0x01, // byte-order + 0x03, 0x00, 0x00, 0x20, // geometry type 3 (POLYGON) with SRID flag + 0xe6, 0x10, 0x00, 0x00, // SRID 4326 + 0x01, 0x00, 0x00, 0x00, // number of rings (1) + 0x04, 0x00, 0x00, 0x00, // number of points in exterior ring (4) + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // x1 = 0.0 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // y1 = 0.0 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // x2 = 0.0 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x3f, // y2 = 1.0 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x3f, // x3 = 1.0 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // y3 = 0.0 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // x4 = 0.0 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // y4 = 0.0 +]; + +/// EWKB for MULTIPOINT ((1 2), (3 4)) with SRID 4326 +/// Little endian, geometry type 4 (MULTIPOINT) with SRID flag +pub const MULTIPOINT_WITH_SRID_4326_EWKB: [u8; 55] = [ + 0x01, // byte-order + 0x04, 0x00, 0x00, 0x20, // geometry type 4 (MULTIPOINT) with SRID flag + 0xe6, 0x10, 0x00, 0x00, // SRID 4326 + 0x02, 0x00, 0x00, 0x00, // number of points (2) + // First point + 0x01, // byte-order + 0x01, 0x00, 0x00, 0x00, // geometry type 1 (POINT) - no SRID flag + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x3f, // x1 = 1.0 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, // y1 = 2.0 + // Second point + 0x01, // byte-order + 0x01, 0x00, 0x00, 0x00, // geometry type 1 (POINT) - no SRID flag + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x40, // x2 = 3.0 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x40, // y2 = 4.0 +]; + +/// EWKB for GEOMETRYCOLLECTION (POINT (1 2)) with SRID 4326 +/// Little endian, geometry type 7 (GEOMETRYCOLLECTION) with SRID flag +pub const GEOMETRYCOLLECTION_POINT_WITH_SRID_4326_EWKB: [u8; 34] = [ + 0x01, // byte-order + 0x07, 0x00, 0x00, 0x20, // geometry type 7 (GEOMETRYCOLLECTION) with SRID flag + 0xe6, 0x10, 0x00, 0x00, // SRID 4326 + 0x01, 0x00, 0x00, 0x00, // number of geometries (1) + // Nested POINT + 0x01, // byte-order + 0x01, 0x00, 0x00, 0x00, // geometry type 1 (POINT) - no SRID flag + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x3f, // x = 1.0 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, // y = 2.0 +]; + +/// EWKB for GEOMETRYCOLLECTION (POINT Z (1 2 3)) with SRID 4326 +/// Little endian, geometry type 7 (GEOMETRYCOLLECTION) with SRID flag; nested POINT Z (Z flag set) +pub const GEOMETRYCOLLECTION_POINT_Z_WITH_SRID_4326_EWKB: [u8; 42] = [ + 0x01, // byte-order + 0x07, 0x00, 0x00, 0x20, // geometry type 7 (GEOMETRYCOLLECTION) with SRID flag + 0xe6, 0x10, 0x00, 0x00, // SRID 4326 + 0x01, 0x00, 0x00, 0x00, // number of geometries (1) + // Nested POINT Z + 0x01, // byte-order + 0x01, 0x00, 0x00, 0x80, // geometry type 1 (POINT) with Z flag + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x3f, // x = 1.0 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, // y = 2.0 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x40, // z = 3.0 +]; + +/// EWKB for GEOMETRYCOLLECTION (POINT M (1 2 4)) with SRID 4326 +/// Little endian, geometry type 7 (GEOMETRYCOLLECTION) with SRID flag; nested POINT M (M flag set) +pub const GEOMETRYCOLLECTION_POINT_M_WITH_SRID_4326_EWKB: [u8; 42] = [ + 0x01, // byte-order + 0x07, 0x00, 0x00, 0x20, // geometry type 7 (GEOMETRYCOLLECTION) with SRID flag + 0xe6, 0x10, 0x00, 0x00, // SRID 4326 + 0x01, 0x00, 0x00, 0x00, // number of geometries (1) + // Nested POINT M + 0x01, // byte-order + 0x01, 0x00, 0x00, 0x40, // geometry type 1 (POINT) with M flag + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x3f, // x = 1.0 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, // y = 2.0 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x40, // m = 4.0 +]; + +/// EWKB for GEOMETRYCOLLECTION (POINT ZM (1 2 3 4)) with SRID 4326 +/// Little endian, geometry type 7 (GEOMETRYCOLLECTION) with SRID flag; nested POINT ZM (Z and M flags set) +pub const GEOMETRYCOLLECTION_POINT_ZM_WITH_SRID_4326_EWKB: [u8; 50] = [ + 0x01, // byte-order + 0x07, 0x00, 0x00, 0x20, // geometry type 7 (GEOMETRYCOLLECTION) with SRID flag + 0xe6, 0x10, 0x00, 0x00, // SRID 4326 + 0x01, 0x00, 0x00, 0x00, // number of geometries (1) + // Nested POINT ZM + 0x01, // byte-order + 0x01, 0x00, 0x00, 0xc0, // geometry type 1 (POINT) with Z and M flags + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x3f, // x = 1.0 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, // y = 2.0 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x40, // z = 3.0 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x40, // m = 4.0 +]; + +/// EWKB for POINT EMPTY with SRID 4326 +/// Little endian, geometry type 1 (POINT) with SRID flag +pub const POINT_EMPTY_WITH_SRID_4326_EWKB: [u8; 25] = [ + 0x01, // byte-order + 0x01, 0x00, 0x00, 0x20, // geometry type 1 (POINT) with SRID flag + 0xe6, 0x10, 0x00, 0x00, // SRID 4326 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf8, 0x7f, // x = NaN + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf8, 0x7f, // y = NaN +]; + +/// EWKB for GEOMETRYCOLLECTION EMPTY with SRID 4326 +/// Little endian, geometry type 7 (GEOMETRYCOLLECTION) with SRID flag +pub const GEOMETRYCOLLECTION_EMPTY_WITH_SRID_4326_EWKB: [u8; 13] = [ + 0x01, // byte-order + 0x07, 0x00, 0x00, 0x20, // geometry type 7 (GEOMETRYCOLLECTION) with SRID flag + 0xe6, 0x10, 0x00, 0x00, // SRID 4326 + 0x00, 0x00, 0x00, 0x00, // number of geometries (0) +]; + pub fn louisiana() -> LineString where T: WktFloat + Default + FromStr,