Skip to content
Draft
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 39 additions & 5 deletions c/sedona-geos/src/st_buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ use std::sync::Arc;

use arrow_array::builder::BinaryBuilder;
use arrow_schema::DataType;
use datafusion_common::error::Result;
use datafusion_common::DataFusionError;
use datafusion_common::{error::Result, exec_err};
use datafusion_expr::ColumnarValue;
use geos::{BufferParams, Geom};
use sedona_expr::scalar_udf::{ScalarKernelRef, SedonaScalarKernel};
Expand Down Expand Up @@ -71,10 +71,7 @@ impl SedonaScalarKernel for STBuffer {
distance = Some(f64::try_from(scalar_arg.clone())?);
}
} else {
return Err(DataFusionError::Execution(format!(
"Invalid distance: {:?}",
args[1]
)));
return exec_err!("Invalid distance: {:?}", args[1]);
}

let executor = GeosExecutor::new(arg_types, args);
Expand Down Expand Up @@ -163,4 +160,41 @@ mod tests {
let envelope_result = envelope_tester.invoke_array(buffer_result).unwrap();
assert_array_equal(&envelope_result, &expected_envelope);
}

#[test]
fn test_empty_geometry() {
let udf = SedonaScalarUDF::from_kernel("st_buffer", st_buffer_impl());
let tester = ScalarUdfTester::new(
udf.into(),
vec![WKB_GEOMETRY, SedonaType::Arrow(DataType::Float64)],
);

let input_wkt = vec![
Some("POINT EMPTY"),
Some("LINESTRING EMPTY"),
Some("POLYGON EMPTY"),
Some("MULTIPOINT EMPTY"),
Some("MULTILINESTRING EMPTY"),
Some("MULTIPOLYGON EMPTY"),
Some("GEOMETRYCOLLECTION EMPTY"),
];
let input_dist = 2;

let buffer_result = tester
.invoke_wkb_array_scalar(input_wkt, input_dist)
.unwrap();
let expected: ArrayRef = create_array(
&[
Some("POLYGON EMPTY"),
Some("POLYGON EMPTY"),
Some("POLYGON EMPTY"),
Some("POLYGON EMPTY"),
Some("POLYGON EMPTY"),
Some("POLYGON EMPTY"),
Some("POLYGON EMPTY"),
],
&WKB_GEOMETRY,
);
assert_array_equal(&buffer_result, &expected);
}
}
25 changes: 24 additions & 1 deletion python/sedonadb/tests/functions/test_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,30 @@ def test_st_buffer(eng, geom, dist, expected_area):
eng.assert_query_result(
f"SELECT ST_Area(ST_Buffer({geom_or_null(geom)}, {val_or_null(dist)}))",
expected_area,
numeric_epsilon=1e-9,
# geos passes with 1e-9, but geo needs it as high as 1e-3
numeric_epsilon=1e-3,
)


@pytest.mark.parametrize("eng", [SedonaDB, PostGIS])
@pytest.mark.parametrize(
("geom", "dist", "expected"),
[
("POINT EMPTY", 2.0, "POLYGON EMPTY"),
("LINESTRING EMPTY", 1.5, "POLYGON EMPTY"),
("POLYGON EMPTY", 0.5, "POLYGON EMPTY"),
("MULTIPOINT EMPTY", 1.0, "POLYGON EMPTY"),
("MULTILINESTRING EMPTY", 1.0, "POLYGON EMPTY"),
("MULTIPOLYGON EMPTY", 1.0, "POLYGON EMPTY"),
("GEOMETRYCOLLECTION EMPTY", 1.0, "POLYGON EMPTY"),
],
)
def test_st_buffer_empty(eng, geom, dist, expected):
eng = SedonaDB.create_or_skip()

eng.assert_query_result(
f"SELECT ST_Buffer({geom_or_null(geom)}, {val_or_null(dist)})",
expected,
)


Expand Down
15 changes: 15 additions & 0 deletions rust/sedona-geo/benches/geo-functions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,21 @@ fn criterion_benchmark(c: &mut Criterion) {
benchmark::scalar(c, &f, "geo", "st_area", Polygon(10));
benchmark::scalar(c, &f, "geo", "st_area", Polygon(500));

benchmark::scalar(
c,
&f,
"geo",
"st_buffer",
ArrayScalar(Polygon(10), Float64(1.0, 10.0)),
);
benchmark::scalar(
c,
&f,
"geo",
"st_buffer",
ArrayScalar(Polygon(500), Float64(1.0, 10.0)),
);

benchmark::scalar(c, &f, "geo", "st_perimeter", Polygon(10));
benchmark::scalar(c, &f, "geo", "st_perimeter", Polygon(500));

Expand Down
1 change: 1 addition & 0 deletions rust/sedona-geo/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
pub mod centroid;
pub mod register;
mod st_area;
mod st_buffer;
mod st_centroid;
mod st_distance;
mod st_dwithin;
Expand Down
7 changes: 4 additions & 3 deletions rust/sedona-geo/src/register.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,16 @@ use crate::st_intersection_aggr::st_intersection_aggr_impl;
use crate::st_line_interpolate_point::st_line_interpolate_point_impl;
use crate::st_union_aggr::st_union_aggr_impl;
use crate::{
st_area::st_area_impl, st_centroid::st_centroid_impl, st_distance::st_distance_impl,
st_dwithin::st_dwithin_impl, st_intersects::st_intersects_impl, st_length::st_length_impl,
st_perimeter::st_perimeter_impl,
st_area::st_area_impl, st_buffer::st_buffer_impl, st_centroid::st_centroid_impl,
st_distance::st_distance_impl, st_dwithin::st_dwithin_impl, st_intersects::st_intersects_impl,
st_length::st_length_impl, st_perimeter::st_perimeter_impl,
};

pub fn scalar_kernels() -> Vec<(&'static str, ScalarKernelRef)> {
vec![
("st_intersects", st_intersects_impl()),
("st_area", st_area_impl()),
("st_buffer", st_buffer_impl()),
("st_centroid", st_centroid_impl()),
("st_distance", st_distance_impl()),
("st_dwithin", st_dwithin_impl()),
Expand Down
224 changes: 224 additions & 0 deletions rust/sedona-geo/src/st_buffer.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,224 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

use std::sync::Arc;

use arrow_array::builder::BinaryBuilder;
use arrow_schema::DataType;
use datafusion_common::{error::Result, exec_err, DataFusionError};
use datafusion_expr::ColumnarValue;
use geo::algorithm::buffer::{Buffer, BufferStyle};
use sedona_expr::scalar_udf::{ScalarKernelRef, SedonaScalarKernel};
use sedona_functions::executor::WkbExecutor;
use sedona_geometry::wkb_factory::WKB_MIN_PROBABLE_BYTES;
use sedona_schema::{
datatypes::{SedonaType, WKB_GEOMETRY},
matchers::ArgMatcher,
};
use wkb::{
writer::{write_geometry, WriteOptions},
Endianness,
};

/// ST_Centroid() implementation using centroid extraction
pub fn st_buffer_impl() -> ScalarKernelRef {
Arc::new(STBuffer {})
}

#[derive(Debug)]
struct STBuffer {}

impl SedonaScalarKernel for STBuffer {
fn return_type(&self, args: &[SedonaType]) -> Result<Option<SedonaType>> {
let matcher = ArgMatcher::new(
vec![ArgMatcher::is_geometry(), ArgMatcher::is_numeric()],
WKB_GEOMETRY,
);

matcher.match_args(args)
}

fn invoke_batch(
&self,
arg_types: &[SedonaType],
args: &[ColumnarValue],
) -> Result<ColumnarValue> {
// Extract the constant scalar value before looping over the input geometries
let params: Option<BufferStyle<f64>>;
let arg1 = args[1].cast_to(&DataType::Float64, None)?;
if let ColumnarValue::Scalar(scalar_arg) = &arg1 {
if scalar_arg.is_null() {
params = None;
} else {
let distance = f64::try_from(scalar_arg.clone())?;
params = Some(BufferStyle::new(distance));
}
} else {
return exec_err!("Invalid distance: {:?}", args[1]);
}

// let executor = GeoTypesExecutor::new(arg_types, args);
let executor = WkbExecutor::new(arg_types, args);
let mut builder = BinaryBuilder::with_capacity(
executor.num_iterations(),
WKB_MIN_PROBABLE_BYTES * executor.num_iterations(),
);
executor.execute_wkb_void(|maybe_wkb| {
match (maybe_wkb, params.clone()) {
(Some(wkb), Some(params)) => {
invoke_scalar(&wkb, params, &mut builder)?;
builder.append_value([]);
}
_ => builder.append_null(),
}

Ok(())
})?;

executor.finish(Arc::new(builder.finish()))
}
}

use wkb::reader::Wkb;
fn invoke_scalar(
wkb: &Wkb,
params: BufferStyle<f64>,
writer: &mut impl std::io::Write,
) -> Result<()> {
use crate::to_geo::item_to_geometry;
use geo_types::Polygon;
use sedona_geometry::is_empty::is_geometry_empty;

// PostGIS returns POLYGON EMPTY for all empty geometries
let is_empty = is_geometry_empty(wkb).map_err(|e| DataFusionError::External(Box::new(e)))?;
if is_empty {
Comment on lines +105 to +107
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was running into an error here with POINT EMPTY since geo apparently doesn't support it.

not_impl_err!(
"geo kernel implementation on {}, {}, or {} not supported",
"MULTIPOINT with EMPTY child",
"POINT EMPTY",
"GEOMETRYCOLLECTION"

My current workaround is to use WKBExecutor here instead of GeoTypesExecutor and use our native is_geometry_empty check.

Wondering if there's a better way we can handle empty points in our item_to_geometry() method. The docstring for geo's try_to_point() function we are in there says returning None represents an empty point. Though returning None is not a safe option, so I can't think of anything better than this workaround atm.

let empty_polygon = Polygon::<f64>::empty();
write_geometry(
writer,
&empty_polygon,
&WriteOptions {
endianness: Endianness::LittleEndian,
},
)
.map_err(|e| DataFusionError::External(Box::new(e)))?;
return Ok(());
}

let geom = item_to_geometry(wkb)?;

let buffer = geom.buffer_with_style(params);

// Convert type to geo::Geometry
let geometry = geo::Geometry::MultiPolygon(buffer);

write_geometry(
writer,
&geometry,
&WriteOptions {
endianness: Endianness::LittleEndian,
},
)
.map_err(|e| DataFusionError::External(Box::new(e)))?;
Ok(())
}

#[cfg(test)]
mod tests {
use arrow_array::ArrayRef;
use datafusion_common::ScalarValue;
use rstest::rstest;
use sedona_expr::scalar_udf::SedonaScalarUDF;
use sedona_schema::datatypes::{WKB_GEOMETRY, WKB_VIEW_GEOMETRY};
use sedona_testing::compare::assert_array_equal;
use sedona_testing::create::create_array;
use sedona_testing::testers::ScalarUdfTester;

use super::*;

#[rstest]
fn udf(#[values(WKB_GEOMETRY, WKB_VIEW_GEOMETRY)] sedona_type: SedonaType) {
let udf = SedonaScalarUDF::from_kernel("st_buffer", st_buffer_impl());
let tester = ScalarUdfTester::new(
udf.into(),
vec![sedona_type.clone(), SedonaType::Arrow(DataType::Float64)],
);
tester.assert_return_type(WKB_GEOMETRY);

// Check the envelope of the buffers
let envelope_udf = sedona_functions::st_envelope::st_envelope_udf();
let envelope_tester = ScalarUdfTester::new(envelope_udf.into(), vec![WKB_GEOMETRY]);

let buffer_result = tester.invoke_scalar_scalar("POINT (1 2)", 2.0).unwrap();
let envelope_result = envelope_tester.invoke_scalar(buffer_result).unwrap();
let expected_envelope = "POLYGON((-1 0, -1 4, 3 4, 3 0, -1 0))";
tester.assert_scalar_result_equals(envelope_result, expected_envelope);

let result = tester
.invoke_scalar_scalar(ScalarValue::Null, ScalarValue::Null)
.unwrap();
assert!(result.is_null());

let input_wkt = vec![None, Some("POINT (0 0)")];
let input_dist = 1;
let expected_envelope: ArrayRef = create_array(
&[None, Some("POLYGON((-1 -1, -1 1, 1 1, 1 -1, -1 -1))")],
&WKB_GEOMETRY,
);
let buffer_result = tester
.invoke_wkb_array_scalar(input_wkt, input_dist)
.unwrap();
let envelope_result = envelope_tester.invoke_array(buffer_result).unwrap();
assert_array_equal(&envelope_result, &expected_envelope);
}

#[test]
fn test_empty_geometry() {
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The test suite here is the same as the geos buffer test suite plus this new function, which I also copied over to geos st_buffer to be sure it works the same.

let udf = SedonaScalarUDF::from_kernel("st_buffer", st_buffer_impl());
let tester = ScalarUdfTester::new(
udf.into(),
vec![WKB_GEOMETRY, SedonaType::Arrow(DataType::Float64)],
);

let input_wkt = vec![
Some("POINT EMPTY"),
Some("LINESTRING EMPTY"),
Some("POLYGON EMPTY"),
Some("MULTIPOINT EMPTY"),
Some("MULTILINESTRING EMPTY"),
Some("MULTIPOLYGON EMPTY"),
Some("GEOMETRYCOLLECTION EMPTY"),
];
let input_dist = 2;

let buffer_result = tester
.invoke_wkb_array_scalar(input_wkt, input_dist)
.unwrap();
let expected: ArrayRef = create_array(
&[
Some("POLYGON EMPTY"),
Some("POLYGON EMPTY"),
Some("POLYGON EMPTY"),
Some("POLYGON EMPTY"),
Some("POLYGON EMPTY"),
Some("POLYGON EMPTY"),
Some("POLYGON EMPTY"),
],
&WKB_GEOMETRY,
);
assert_array_equal(&buffer_result, &expected);
}
}
6 changes: 4 additions & 2 deletions rust/sedona-geo/src/st_centroid.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,10 @@ impl SedonaScalarKernel for STCentroid {
args: &[ColumnarValue],
) -> Result<ColumnarValue> {
let executor = WkbExecutor::new(arg_types, args);
let mut builder =
BinaryBuilder::with_capacity(executor.num_iterations(), WKB_MIN_PROBABLE_BYTES);
let mut builder = BinaryBuilder::with_capacity(
executor.num_iterations(),
WKB_MIN_PROBABLE_BYTES * executor.num_iterations(),
);
executor.execute_wkb_void(|maybe_wkb| {
match maybe_wkb {
Some(wkb) => {
Expand Down
Loading