Skip to content

Commit

Permalink
metric infra
Browse files Browse the repository at this point in the history
  • Loading branch information
Filimoa committed Dec 5, 2024
1 parent f052723 commit b4e3844
Show file tree
Hide file tree
Showing 7 changed files with 152 additions and 16 deletions.
30 changes: 16 additions & 14 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# h3-polars

This is a [Polars](https://docs.pola.rs/) extension that adds support for the [H3 discrete global grid system](https://github.com/uber/h3/), so you can index points and geometries to hexagons in SQL.
This is a [Polars](https://docs.pola.rs/) extension that adds support for the [H3 discrete global grid system](https://github.com/uber/h3/), so you can index points and geometries to hexagons directly in Polars. All credits goes to the [h3o](https://github.com/HydroniumLabs/h3o) for doing the heavy lifting.

# Get started

Expand All @@ -22,12 +22,14 @@ SELECT h3_cell_to_latlng(586265647244115967);

# Implemented functions

This extension implements the entire [H3 API](https://h3geo.org/docs/api/indexing). The full list of functions is below.
This extension implements most of the [H3 API](https://h3geo.org/docs/api/indexing). The full list of functions is below.

All functions support H3 indexes specified as `UBIGINT` (`uint64`) or `BIGINT` (`int64`),
All functions support H3 indexes specified as `pl.UInt64` or `pl.Int64`,
but the unsigned one is preferred and is returned when the extension can't detect which
one to use. The unsigned and signed APIs are identical. All functions also support
`VARCHAR` H3 index input and output.
`pl.Utf8` H3 index input and output.

We are unable to support the functions that work with geometries.

### Full list of functions

Expand Down Expand Up @@ -73,16 +75,16 @@ Here's the updated table with an additional column, **Supported**, which indicat
| `cells_to_directed_edge` | Convert an origin/destination pair to directed edge ID ||
| `are_neighbor_cells` | True if the two cell IDs are directly adjacent ||
| `directed_edge_to_boundary_wkt` | Convert directed edge ID to linestring WKT ||
| `h3_get_hexagon_area_avg` | Get average area of a hexagon cell at resolution | 🕥|
| `h3_cell_area` | Get the area of a cell ID | 🕥|
| `h3_get_hexagon_edge_length_avg` | Average hexagon edge length at resolution | 🕥|
| `h3_edge_length` | Get the length of a directed edge ID | 🕥|
| `h3_get_num_cells` | Get the number of cells at a resolution | 🕥|
| `h3_get_res0_cells` | Get all resolution 0 cells | 🕥|
| `h3_get_res0_cells_string` | Get all resolution 0 cells (returns VARCHAR) | 🕥|
| `h3_get_pentagons` | Get all pentagons at a resolution | 🕥|
| `h3_get_pentagons_string` | Get all pentagons at a resolution (returns VARCHAR) | 🕥|
| `h3_great_circle_distance` | Compute the great circle distance between two points (haversine) | 🕥|
| `h3_get_hexagon_area_avg` | Get average area of a hexagon cell at resolution | 🚧|
| `h3_cell_area` | Get the area of a cell ID | 🚧|
| `h3_get_hexagon_edge_length_avg` | Average hexagon edge length at resolution | 🚧|
| `h3_edge_length` | Get the length of a directed edge ID | 🚧|
| `h3_get_num_cells` | Get the number of cells at a resolution | 🚧|
| `h3_get_res0_cells` | Get all resolution 0 cells | 🚧|
| `h3_get_res0_cells_string` | Get all resolution 0 cells (returns VARCHAR) | 🚧|
| `h3_get_pentagons` | Get all pentagons at a resolution | 🚧|
| `h3_get_pentagons_string` | Get all pentagons at a resolution (returns VARCHAR) | 🚧|
| `h3_great_circle_distance` | Compute the great circle distance between two points (haversine) | 🚧|
| `cells_to_multi_polygon_wkt` | Convert a set of cells to multipolygon WKT | 🛑 |
| `polygon_wkt_to_cells` | Convert polygon WKT to a set of cells | 🛑 |
| `cell_to_boundary_wkt` | Convert cell ID to cell boundary | 🛑 |
5 changes: 5 additions & 0 deletions h3_polars/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -347,3 +347,8 @@ def directed_edge_to_boundary(edge: IntoExprColumn) -> pl.Expr:
plugin_path=LIB,
function_name="directed_edge_to_boundary",
)


# ===== Metrics ===== #

# get_hexagon_edge_length , get_hexagon_area, great_circle_distance are not implemented
68 changes: 68 additions & 0 deletions src/engine/metrics.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
use super::utils::parse_cell_indices;
use h3o::{CellIndex, Resolution};
use polars::prelude::*;
use rayon::prelude::*;

const EARTH_RADIUS_KM: f64 = 6371.007180918475;

pub fn cell_area(cell_series: &Series, unit: &str) -> PolarsResult<Series> {
let cells = parse_cell_indices(cell_series)?;

let areas: Float64Chunked = cells
.into_par_iter()
.map(|cell| {
cell.map(|idx| {
let area_rads2 = idx.area_rads2();
match unit {
"rads^2" => area_rads2,
"km^2" => area_rads2 * EARTH_RADIUS_KM * EARTH_RADIUS_KM,
"m^2" => area_rads2 * EARTH_RADIUS_KM * EARTH_RADIUS_KM * 1_000_000.0,
_ => f64::NAN,
}
})
})
.collect();

Ok(areas.into_series())
}

// Cell counting functions
pub fn get_num_cells(resolution: u8) -> PolarsResult<Series> {
let res = Resolution::try_from(resolution)
.map_err(|_| PolarsError::ComputeError("Invalid resolution".into()))?;

let num_cells = 2 + 120 * (7_u64.pow(u32::from(resolution)));
Ok(Series::new(PlSmallStr::from(""), &[num_cells]))
}

pub fn get_res0_cells() -> PolarsResult<Series> {
let cells: ListChunked = vec![Some(Series::new(
PlSmallStr::from(""),
CellIndex::base_cells()
.map(u64::from)
.collect::<Vec<_>>()
.as_slice(),
))]
.into_iter()
.collect();

Ok(cells.into_series())
}

pub fn get_pentagons(resolution: u8) -> PolarsResult<Series> {
let res = Resolution::try_from(resolution)
.map_err(|_| PolarsError::ComputeError("Invalid resolution".into()))?;

let pentagons: ListChunked = vec![Some(Series::new(
PlSmallStr::from(""),
CellIndex::base_cells()
.filter(|cell| cell.is_pentagon())
.map(|cell| u64::from(cell.center_child(res).unwrap_or(cell)))
.collect::<Vec<_>>()
.as_slice(),
))]
.into_iter()
.collect();

Ok(pentagons.into_series())
}
3 changes: 1 addition & 2 deletions src/engine/mod.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
// pub mod boundaries;
pub mod edge;
pub mod hierarchy;
pub mod indexing;
pub mod inspection;
pub mod metrics;
pub mod traversal;
pub mod utils;
pub mod vertexes;
// pub mod metrics;
Empty file removed src/engine/regions.rs
Empty file.
Empty file removed src/engine/traversing.rs
Empty file.
62 changes: 62 additions & 0 deletions src/expressions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -296,3 +296,65 @@ fn directed_edge_to_boundary(inputs: &[Series]) -> PolarsResult<Series> {
let edge_series = &inputs[0];
crate::engine::edge::directed_edge_to_boundary(edge_series)
}

// ===== Metrics ===== //

// #[derive(Deserialize)]
// struct UnitKwargs {
// unit: String,
// }

// #[derive(Deserialize)]
// struct ResolutionAndUnitKwargs {
// resolution: u8,
// unit: String,
// }

// #[polars_expr(output_type=Float64)]
// fn get_hexagon_area(inputs: &[Series], kwargs: ResolutionAndUnitKwargs) -> PolarsResult<Series> {
// crate::engine::metrics::get_hexagon_area(kwargs.resolution, &kwargs.unit)
// }

// #[polars_expr(output_type=Float64)]
// fn cell_area(inputs: &[Series], kwargs: UnitKwargs) -> PolarsResult<Series> {
// let cell_series = &inputs[0];
// crate::engine::metrics::cell_area(cell_series, &kwargs.unit)
// }

// #[polars_expr(output_type=Float64)]
// fn get_hexagon_edge_length(
// inputs: &[Series],
// kwargs: ResolutionAndUnitKwargs,
// ) -> PolarsResult<Series> {
// crate::engine::metrics::get_hexagon_edge_length(kwargs.resolution, &kwargs.unit)
// }

// #[polars_expr(output_type=UInt64)]
// fn get_num_cells(inputs: &[Series], kwargs: ResolutionKwargs) -> PolarsResult<Series> {
// crate::engine::metrics::get_num_cells(kwargs.resolution)
// }

// #[polars_expr(output_type_func=list_uint64_dtype)]
// fn get_res0_cells(inputs: &[Series]) -> PolarsResult<Series> {
// crate::engine::metrics::get_res0_cells()
// }

// #[polars_expr(output_type_func=list_uint64_dtype)]
// fn get_pentagons(inputs: &[Series], kwargs: ResolutionKwargs) -> PolarsResult<Series> {
// crate::engine::metrics::get_pentagons(kwargs.resolution)
// }

// #[polars_expr(output_type=Float64)]
// fn great_circle_distance(inputs: &[Series], kwargs: UnitKwargs) -> PolarsResult<Series> {
// let lat1_series = &inputs[0];
// let lng1_series = &inputs[1];
// let lat2_series = &inputs[2];
// let lng2_series = &inputs[3];
// crate::engine::metrics::great_circle_distance(
// lat1_series,
// lng1_series,
// lat2_series,
// lng2_series,
// &kwargs.unit,
// )
// }

0 comments on commit b4e3844

Please sign in to comment.