Skip to content

Commit

Permalink
Split out WKB crate (redux) (#856)
Browse files Browse the repository at this point in the history
TODO:

- Fix handling of "maybe_multi" that was removed. Add `from_geometries`
to capacity counters and to geometry builders.
- Restore tests from deleted reading files

Closes #825, closes
#843
  • Loading branch information
kylebarron authored Nov 13, 2024
1 parent 157a46c commit 08b448b
Show file tree
Hide file tree
Showing 47 changed files with 460 additions and 2,582 deletions.
14 changes: 12 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 12 additions & 2 deletions python/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 1 addition & 2 deletions rust/geoarrow/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,6 @@ arrow-ipc = "53"
arrow-schema = "53"
async-stream = { version = "0.3", optional = true }
async-trait = { version = "0.1", optional = true }
byteorder = "1"
bytes = { version = "1.5.0", optional = true }
chrono = { version = "0.4" }
dbase = "0.5.0"
Expand All @@ -72,7 +71,6 @@ half = { version = "2.4.1" }
http-range-client = { version = "0.8", optional = true }
indexmap = { version = "2" }
lexical-core = { version = "0.8.5" }
num_enum = "0.7"
object_store = { version = "0.11", optional = true }
parquet = { version = "53", optional = true, default-features = false, features = [
"arrow",
Expand All @@ -99,6 +97,7 @@ thiserror = "1"
tokio = { version = "1", default-features = false, optional = true }
# wkt = "0.11"
wkt = { git = "https://github.com/georust/wkt", branch = "kyle/geo-traits-writer" }
wkb = { git = "https://github.com/kylebarron/wkb", rev = "7d58a2327fe21cf250dab5ac6860b6cf0fddb838" }


[dev-dependencies]
Expand Down
50 changes: 26 additions & 24 deletions rust/geoarrow/src/algorithm/native/type_id.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use crate::array::*;
use crate::trait_::ArrayAccessor;
use arrow::array::Int16Builder;
use arrow_array::{Int16Array, OffsetSizeTrait};
use arrow_array::Int16Array;
use std::collections::HashSet;

/// Calculation of the geometry types within a GeometryArray
Expand Down Expand Up @@ -115,26 +115,28 @@ impl TypeIds for MixedGeometryArray<2> {
}
}

impl<O: OffsetSizeTrait> TypeIds for WKBArray<O> {
fn get_type_ids(&self) -> Int16Array {
let mut output_array = Int16Builder::with_capacity(self.len());
self.iter().for_each(|maybe_wkb| {
output_array.append_option(maybe_wkb.map(|wkb| {
let type_id = u32::from(wkb.wkb_type().unwrap());
type_id.try_into().unwrap()
}))
});

output_array.finish()
}

fn get_unique_type_ids(&self) -> HashSet<i16> {
let mut values = HashSet::new();
self.iter().flatten().for_each(|wkb| {
let type_id = u32::from(wkb.wkb_type().unwrap());
values.insert(type_id.try_into().unwrap());
});

values
}
}
// Impl removed when `wkb` was refactored into a standalone crate.
//
// impl<O: OffsetSizeTrait> TypeIds for WKBArray<O> {
// fn get_type_ids(&self) -> Int16Array {
// let mut output_array = Int16Builder::with_capacity(self.len());
// self.iter().for_each(|maybe_wkb| {
// output_array.append_option(maybe_wkb.map(|wkb| {
// let type_id = u32::from(wkb.wkb_type().unwrap());
// type_id.try_into().unwrap()
// }))
// });

// output_array.finish()
// }

// fn get_unique_type_ids(&self) -> HashSet<i16> {
// let mut values = HashSet::new();
// self.iter().flatten().for_each(|wkb| {
// let type_id = u32::from(wkb.wkb_type().unwrap());
// values.insert(type_id.try_into().unwrap());
// });

// values
// }
// }
7 changes: 4 additions & 3 deletions rust/geoarrow/src/array/binary/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,10 @@ impl<O: OffsetSizeTrait> WKBArray<O> {
/// Infer the minimal NativeType that this WKBArray can be casted to.
#[allow(dead_code)]
// TODO: is this obsolete with new from_wkb approach that uses downcasting?
pub(crate) fn infer_geo_data_type(&self, coord_type: CoordType) -> Result<NativeType> {
use crate::io::wkb::reader::r#type::infer_geometry_type;
infer_geometry_type(self.iter().flatten(), coord_type)
pub(crate) fn infer_geo_data_type(&self, _coord_type: CoordType) -> Result<NativeType> {
todo!()
// use crate::io::wkb::reader::r#type::infer_geometry_type;
// infer_geometry_type(self.iter().flatten(), coord_type)
}

/// The lengths of each buffer contained in this array.
Expand Down
28 changes: 15 additions & 13 deletions rust/geoarrow/src/array/binary/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,19 @@ use std::sync::Arc;
use crate::array::binary::WKBCapacity;
use crate::array::metadata::ArrayMetadata;
use crate::error::{GeoArrowError, Result};
use crate::io::wkb::writer::{
geometry_collection_wkb_size, line_string_wkb_size, multi_line_string_wkb_size,
multi_point_wkb_size, multi_polygon_wkb_size, point_wkb_size, polygon_wkb_size,
write_geometry_collection_as_wkb, write_line_string_as_wkb, write_multi_line_string_as_wkb,
write_multi_point_as_wkb, write_multi_polygon_as_wkb, write_point_as_wkb, write_polygon_as_wkb,
};
use arrow_array::builder::GenericBinaryBuilder;
use arrow_array::OffsetSizeTrait;
use geo_traits::{
GeometryCollectionTrait, GeometryTrait, GeometryType, LineStringTrait, MultiLineStringTrait,
MultiPointTrait, MultiPolygonTrait, PointTrait, PolygonTrait,
};
use wkb::writer::{
geometry_collection_wkb_size, line_string_wkb_size, multi_line_string_wkb_size,
multi_point_wkb_size, multi_polygon_wkb_size, point_wkb_size, polygon_wkb_size,
write_geometry_collection, write_line_string, write_multi_line_string, write_multi_point,
write_multi_polygon, write_point, write_polygon,
};
use wkb::Endianness;

use super::array::WKBArray;

Expand Down Expand Up @@ -82,7 +83,7 @@ impl<O: OffsetSizeTrait> WKBBuilder<O> {
if let Some(geom) = geom {
// TODO: figure out how to write directly to the underlying vec without a copy
let mut buf = Vec::with_capacity(point_wkb_size(geom.dim()));
write_point_as_wkb(&mut buf, geom).unwrap();
write_point(&mut buf, geom, Endianness::LittleEndian).unwrap();
self.0.append_value(&buf)
} else {
self.0.append_null();
Expand All @@ -95,7 +96,7 @@ impl<O: OffsetSizeTrait> WKBBuilder<O> {
if let Some(geom) = geom {
// TODO: figure out how to write directly to the underlying vec without a copy
let mut buf = Vec::with_capacity(line_string_wkb_size(geom));
write_line_string_as_wkb(&mut buf, geom).unwrap();
write_line_string(&mut buf, geom, Endianness::LittleEndian).unwrap();
self.0.append_value(&buf)
} else {
self.0.append_null()
Expand All @@ -108,7 +109,7 @@ impl<O: OffsetSizeTrait> WKBBuilder<O> {
if let Some(geom) = geom {
// TODO: figure out how to write directly to the underlying vec without a copy
let mut buf = Vec::with_capacity(polygon_wkb_size(geom));
write_polygon_as_wkb(&mut buf, geom).unwrap();
write_polygon(&mut buf, geom, Endianness::LittleEndian).unwrap();
self.0.append_value(&buf)
} else {
self.0.append_null()
Expand All @@ -121,7 +122,7 @@ impl<O: OffsetSizeTrait> WKBBuilder<O> {
if let Some(geom) = geom {
// TODO: figure out how to write directly to the underlying vec without a copy
let mut buf = Vec::with_capacity(multi_point_wkb_size(geom));
write_multi_point_as_wkb(&mut buf, geom).unwrap();
write_multi_point(&mut buf, geom, Endianness::LittleEndian).unwrap();
self.0.append_value(&buf)
} else {
self.0.append_null()
Expand All @@ -134,7 +135,7 @@ impl<O: OffsetSizeTrait> WKBBuilder<O> {
if let Some(geom) = geom {
// TODO: figure out how to write directly to the underlying vec without a copy
let mut buf = Vec::with_capacity(multi_line_string_wkb_size(geom));
write_multi_line_string_as_wkb(&mut buf, geom).unwrap();
write_multi_line_string(&mut buf, geom, Endianness::LittleEndian).unwrap();
self.0.append_value(&buf)
} else {
self.0.append_null()
Expand All @@ -147,7 +148,7 @@ impl<O: OffsetSizeTrait> WKBBuilder<O> {
if let Some(geom) = geom {
// TODO: figure out how to write directly to the underlying vec without a copy
let mut buf = Vec::with_capacity(multi_polygon_wkb_size(geom));
write_multi_polygon_as_wkb(&mut buf, geom).unwrap();
write_multi_polygon(&mut buf, geom, Endianness::LittleEndian).unwrap();
self.0.append_value(&buf)
} else {
self.0.append_null()
Expand All @@ -159,6 +160,7 @@ impl<O: OffsetSizeTrait> WKBBuilder<O> {
pub fn push_geometry(&mut self, geom: Option<&impl GeometryTrait<T = f64>>) {
use GeometryType::*;

// TODO: call wkb::write_geometry directly
if let Some(geom) = geom {
match geom.as_type() {
Point(point) => self.push_point(Some(point)),
Expand Down Expand Up @@ -188,7 +190,7 @@ impl<O: OffsetSizeTrait> WKBBuilder<O> {
if let Some(geom) = geom {
// TODO: figure out how to write directly to the underlying vec without a copy
let mut buf = Vec::with_capacity(geometry_collection_wkb_size(geom));
write_geometry_collection_as_wkb(&mut buf, geom).unwrap();
write_geometry_collection(&mut buf, geom, Endianness::LittleEndian).unwrap();
self.0.append_value(&buf)
} else {
self.0.append_null()
Expand Down
8 changes: 4 additions & 4 deletions rust/geoarrow/src/array/binary/capacity.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@ use std::ops::Add;

use arrow_array::OffsetSizeTrait;

use crate::io::wkb::writer::{
geometry_collection_wkb_size, line_string_wkb_size, multi_line_string_wkb_size,
multi_point_wkb_size, multi_polygon_wkb_size, point_wkb_size, polygon_wkb_size,
};
use geo_traits::{
GeometryCollectionTrait, GeometryTrait, LineStringTrait, MultiLineStringTrait, MultiPointTrait,
MultiPolygonTrait, PointTrait, PolygonTrait,
};
use wkb::writer::{
geometry_collection_wkb_size, line_string_wkb_size, multi_line_string_wkb_size,
multi_point_wkb_size, multi_polygon_wkb_size, point_wkb_size, polygon_wkb_size,
};

/// A counter for the buffer sizes of a [`WKBArray`][crate::array::WKBArray].
///
Expand Down
7 changes: 3 additions & 4 deletions rust/geoarrow/src/array/geometrycollection/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ use crate::array::mixed::builder::DEFAULT_PREFER_MULTI;
use crate::array::offset_builder::OffsetsBuilder;
use crate::array::{CoordType, GeometryCollectionArray, MixedGeometryBuilder, WKBArray};
use crate::error::{GeoArrowError, Result};
use crate::io::wkb::reader::WKBGeometry;
use crate::scalar::WKB;
use crate::trait_::{ArrayAccessor, GeometryArrayBuilder, IntoArrow};
use geo_traits::{
Expand Down Expand Up @@ -369,10 +368,10 @@ impl<'a, const D: usize> GeometryCollectionBuilder<D> {
metadata: Arc<ArrayMetadata>,
prefer_multi: bool,
) -> Result<Self> {
let wkb_objects2: Vec<Option<WKBGeometry>> = wkb_objects
let wkb_objects2 = wkb_objects
.iter()
.map(|maybe_wkb| maybe_wkb.as_ref().map(|wkb| wkb.to_wkb_object()))
.collect();
.map(|maybe_wkb| maybe_wkb.as_ref().map(|wkb| wkb.parse()).transpose())
.collect::<Result<Vec<_>>>()?;
Self::from_nullable_geometries(&wkb_objects2, coord_type, metadata, prefer_multi)
}
}
Expand Down
37 changes: 24 additions & 13 deletions rust/geoarrow/src/array/linestring/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ use crate::array::{
MultiPointBuilder, SeparatedCoordBufferBuilder, WKBArray,
};
use crate::error::{GeoArrowError, Result};
use crate::io::wkb::reader::WKBLineString;
use crate::scalar::WKB;
use crate::trait_::{ArrayAccessor, GeometryArrayBuilder, IntoArrow};
use arrow_array::{Array, GenericListArray, OffsetSizeTrait};
Expand Down Expand Up @@ -251,6 +250,14 @@ impl<const D: usize> LineStringBuilder<D> {
.unwrap();
}

pub fn extend_from_geometry_iter<'a>(
&mut self,
geoms: impl Iterator<Item = Option<&'a (impl GeometryTrait<T = f64> + 'a)>>,
) -> Result<()> {
geoms.into_iter().try_for_each(|g| self.push_geometry(g))?;
Ok(())
}

/// Push a raw coordinate to the underlying coordinate array.
///
/// # Safety
Expand Down Expand Up @@ -282,24 +289,28 @@ impl<const D: usize> LineStringBuilder<D> {
Ok(())
}

pub fn from_nullable_geometries(
geoms: &[Option<impl GeometryTrait<T = f64>>],
coord_type: Option<CoordType>,
metadata: Arc<ArrayMetadata>,
) -> Result<Self> {
let capacity = LineStringCapacity::from_geometries(geoms.iter().map(|x| x.as_ref()))?;
let mut array =
Self::with_capacity_and_options(capacity, coord_type.unwrap_or_default(), metadata);
array.extend_from_geometry_iter(geoms.iter().map(|x| x.as_ref()))?;
Ok(array)
}

pub(crate) fn from_wkb<W: OffsetSizeTrait>(
wkb_objects: &[Option<WKB<'_, W>>],
coord_type: Option<CoordType>,
metadata: Arc<ArrayMetadata>,
) -> Result<Self> {
let wkb_objects2: Vec<Option<WKBLineString>> = wkb_objects
let wkb_objects2 = wkb_objects
.iter()
.map(|maybe_wkb| {
maybe_wkb
.as_ref()
.map(|wkb| wkb.to_wkb_object().into_line_string())
})
.collect();
Ok(Self::from_nullable_line_strings(
&wkb_objects2,
coord_type,
metadata,
))
.map(|maybe_wkb| maybe_wkb.as_ref().map(|wkb| wkb.parse()).transpose())
.collect::<Result<Vec<_>>>()?;
Self::from_nullable_geometries(&wkb_objects2, coord_type, metadata)
}
}

Expand Down
Loading

0 comments on commit 08b448b

Please sign in to comment.