Skip to content

Commit

Permalink
Upgrade to h3 v4.2.0 (#139)
Browse files Browse the repository at this point in the history
* upgrade to h3 v4.2.0

* add experimental functions

* round

* upgrade ci tools

* upgrade artifact download

* test

* fix test
  • Loading branch information
isaacbrodsky authored Dec 9, 2024
1 parent ee10e9f commit 8b1cf40
Show file tree
Hide file tree
Showing 8 changed files with 344 additions and 22 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/_extension_deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ jobs:
cd duckdb
git checkout ${{ inputs.duckdb_version }}
- uses: actions/download-artifact@v3
- uses: actions/download-artifact@v4
with:
name: ${{ inputs.extension_name }}-${{ inputs.duckdb_version }}-extension-${{matrix.duckdb_arch}}${{inputs.artifact_postfix}}${{startsWith(matrix.duckdb, 'wasm') && '.wasm' || ''}}
path: |
Expand Down
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[![Extension Test](https://github.com/isaacbrodsky/h3-duckdb/actions/workflows/test.yml/badge.svg)](https://github.com/isaacbrodsky/h3-duckdb/actions/workflows/test.yml)
[![DuckDB Version](https://img.shields.io/static/v1?label=duckdb&message=v1.1.3&color=blue)](https://github.com/duckdb/duckdb/releases/tag/v1.1.3)
[![H3 Version](https://img.shields.io/static/v1?label=h3&message=v4.1.0&color=blue)](https://github.com/uber/h3/releases/tag/v4.1.0)
[![H3 Version](https://img.shields.io/static/v1?label=h3&message=v4.2.0&color=blue)](https://github.com/uber/h3/releases/tag/v4.2.0)
[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](LICENSE)

This is a [DuckDB](https://duckdb.org) extension that adds support for the [H3 discrete global grid system](https://github.com/uber/h3/), so you can index points and geometries to hexagons in SQL.
Expand Down Expand Up @@ -94,6 +94,8 @@ one to use. The unsigned and signed APIs are identical. All functions also suppo
| `h3_cells_to_multi_polygon_wkt` | Convert a set of cells to multipolygon WKT
| `h3_polygon_wkt_to_cells` | Convert polygon WKT to a set of cells
| `h3_polygon_wkt_to_cells_string` | Convert polygon WKT to a set of cells (returns VARCHAR)
| `h3_polygon_wkt_to_cells_experimental` | Convert polygon WKT to a set of cells, new algorithm
| `h3_polygon_wkt_to_cells_experimental_string` | Convert polygon WKT to a set of cells, new algorithm (returns VARCHAR)

# Alternative download / install

Expand Down
2 changes: 1 addition & 1 deletion h3
Submodule h3 updated 186 files
211 changes: 211 additions & 0 deletions src/h3_regions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -360,6 +360,203 @@ static void PolygonWktToCellsVarcharFunction(DataChunk &args,
});
}

static void PolygonWktToCellsExperimentalFunction(DataChunk &args, ExpressionState &state,
Vector &result) {
// TODO: Note this function is not fully noexcept -- some invalid WKT strings
// will throw, others will return empty lists.
TernaryExecutor::Execute<string_t, string_t, int, list_entry_t>(
args.data[0], args.data[1], args.data[2], result, args.size(),
[&](string_t input, string_t flagsStr, int res) {
GeoPolygon polygon;
int32_t flags = 0;

std::string str = input.GetString();

uint64_t offset = ListVector::GetListSize(result);

// TODO: Make flags easier to work with
if (flagsStr == "CONTAINMENT_CENTER") {
flags = 0;
} else if (flagsStr == "CONTAINMENT_FULL") {
flags = 1;
} else if (flagsStr == "CONTAINMENT_OVERLAPPING") {
flags = 2;
} else if (flagsStr == "CONTAINMENT_OVERLAPPING_BBOX") {
flags = 3;
} else {
// Invalid flags input
return list_entry_t(offset, 0);
}

if (str.rfind(POLYGON, 0) != 0) {
return list_entry_t(offset, 0);
}

size_t strIndex = POLYGON.length();
strIndex = whitespace(str, strIndex);

if (str.rfind(EMPTY, strIndex) == strIndex) {
return list_entry_t(offset, 0);
}

if (str[strIndex] == '(') {
strIndex++;
strIndex = whitespace(str, strIndex);

auto outerVerts = duckdb::make_shared_ptr<std::vector<LatLng>>();
strIndex = readGeoLoop(str, strIndex, outerVerts, polygon.geoloop);

std::vector<GeoLoop> holes;
std::vector<duckdb::shared_ptr<std::vector<LatLng>>> holesVerts;
while (strIndex < str.length() && str[strIndex] == ',') {
strIndex++;
strIndex = whitespace(str, strIndex);
if (str[strIndex] == '(') {
GeoLoop hole;
auto verts = duckdb::make_shared_ptr<std::vector<LatLng>>();
strIndex = readGeoLoop(str, strIndex, verts, hole);
holes.push_back(hole);
holesVerts.push_back(verts);
} else {
throw InvalidInputException(StringUtil::Format(
"Invalid WKT: expected a hole loop '(' after ',' at pos %lu",
strIndex));
}
}
if (str[strIndex] != ')') {
throw InvalidInputException(StringUtil::Format(
"Invalid WKT: expected a hole loop ',' or final ')' at pos %lu",
strIndex));
}

polygon.numHoles = holes.size();
polygon.holes = holes.data();

int64_t numCells = 0;
H3Error err = maxPolygonToCellsSizeExperimental(&polygon, res, flags, &numCells);
if (err) {
return list_entry_t(offset, 0);
} else {
std::vector<H3Index> out(numCells);
H3Error err2 = polygonToCellsExperimental(&polygon, res, flags, numCells, out.data());
if (err2) {
return list_entry_t(offset, 0);
} else {
uint64_t actual = 0;
for (H3Index outCell : out) {
if (outCell != H3_NULL) {
ListVector::PushBack(result, Value::UBIGINT(outCell));
actual++;
}
}
return list_entry_t(offset, actual);
}
}
}
return list_entry_t(offset, 0);
});
}

static void PolygonWktToCellsExperimentalVarcharFunction(DataChunk &args,
ExpressionState &state,
Vector &result) {
// TODO: Note this function is not fully noexcept -- some invalid WKT strings
// will throw, others will return empty lists.
TernaryExecutor::Execute<string_t, string_t, int, list_entry_t>(
args.data[0], args.data[1], args.data[2], result, args.size(),
[&](string_t input, string_t flagsStr, int res) {
GeoPolygon polygon;
int32_t flags = 0;

std::string str = input.GetString();

uint64_t offset = ListVector::GetListSize(result);

// TODO: Make flags easier to work with
if (flagsStr == "CONTAINMENT_CENTER") {
flags = 0;
} else if (flagsStr == "CONTAINMENT_FULL") {
flags = 1;
} else if (flagsStr == "CONTAINMENT_OVERLAPPING") {
flags = 2;
} else if (flagsStr == "CONTAINMENT_OVERLAPPING_BBOX") {
flags = 3;
} else {
// Invalid flags input
return list_entry_t(offset, 0);
}

if (str.rfind(POLYGON, 0) != 0) {
return list_entry_t(offset, 0);
}

size_t strIndex = POLYGON.length();
strIndex = whitespace(str, strIndex);

if (str.rfind(EMPTY, strIndex) == strIndex) {
return list_entry_t(offset, 0);
}

if (str[strIndex] == '(') {
strIndex++;
strIndex = whitespace(str, strIndex);

auto outerVerts = duckdb::make_shared_ptr<std::vector<LatLng>>();
strIndex = readGeoLoop(str, strIndex, outerVerts, polygon.geoloop);

std::vector<GeoLoop> holes;
std::vector<duckdb::shared_ptr<std::vector<LatLng>>> holesVerts;
while (strIndex < str.length() && str[strIndex] == ',') {
strIndex++;
strIndex = whitespace(str, strIndex);
if (str[strIndex] == '(') {
GeoLoop hole;
auto verts = duckdb::make_shared_ptr<std::vector<LatLng>>();
strIndex = readGeoLoop(str, strIndex, verts, hole);
holes.push_back(hole);
holesVerts.push_back(verts);
} else {
throw InvalidInputException(StringUtil::Format(
"Invalid WKT: expected a hole loop '(' after ',' at pos %lu",
strIndex));
}
}
if (str[strIndex] != ')') {
throw InvalidInputException(StringUtil::Format(
"Invalid WKT: expected a hole loop ',' or final ')' at pos %lu",
strIndex));
}

polygon.numHoles = holes.size();
polygon.holes = holes.data();

int64_t numCells = 0;
H3Error err = maxPolygonToCellsSizeExperimental(&polygon, res, flags, &numCells);
if (err) {
return list_entry_t(offset, 0);
} else {
std::vector<H3Index> out(numCells);
H3Error err2 = polygonToCellsExperimental(&polygon, res, flags, numCells, out.data());
if (err2) {
return list_entry_t(offset, 0);
} else {
uint64_t actual = 0;
for (H3Index outCell : out) {
if (outCell != H3_NULL) {
auto str = StringUtil::Format("%llx", outCell);
string_t strAsStr = string_t(strdup(str.c_str()), str.size());
ListVector::PushBack(result, strAsStr);
actual++;
}
}
return list_entry_t(offset, actual);
}
}
}
return list_entry_t(offset, 0);
});
}

CreateScalarFunctionInfo H3Functions::GetCellsToMultiPolygonWktFunction() {
ScalarFunctionSet funcs("h3_cells_to_multi_polygon_wkt");
funcs.AddFunction(ScalarFunction(
Expand Down Expand Up @@ -393,4 +590,18 @@ CreateScalarFunctionInfo H3Functions::GetPolygonWktToCellsVarcharFunction() {
PolygonWktToCellsVarcharFunction));
}

CreateScalarFunctionInfo H3Functions::GetPolygonWktToCellsExperimentalFunction() {
return CreateScalarFunctionInfo(ScalarFunction(
"h3_polygon_wkt_to_cells_experimental", {LogicalType::VARCHAR, LogicalType::VARCHAR, LogicalType::INTEGER},
LogicalType::LIST(LogicalType::UBIGINT), PolygonWktToCellsExperimentalFunction));
}

CreateScalarFunctionInfo H3Functions::GetPolygonWktToCellsExperimentalVarcharFunction() {
return CreateScalarFunctionInfo(
ScalarFunction("h3_polygon_wkt_to_cells_experimental_string",
{LogicalType::VARCHAR, LogicalType::VARCHAR, LogicalType::INTEGER},
LogicalType::LIST(LogicalType::VARCHAR),
PolygonWktToCellsExperimentalVarcharFunction));
}

} // namespace duckdb
4 changes: 4 additions & 0 deletions src/include/h3_functions.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,8 @@ class H3Functions {
functions.push_back(GetCellsToMultiPolygonWktFunction());
functions.push_back(GetPolygonWktToCellsFunction());
functions.push_back(GetPolygonWktToCellsVarcharFunction());
functions.push_back(GetPolygonWktToCellsExperimentalFunction());
functions.push_back(GetPolygonWktToCellsExperimentalVarcharFunction());

return functions;
}
Expand Down Expand Up @@ -168,6 +170,8 @@ class H3Functions {
static CreateScalarFunctionInfo GetCellsToMultiPolygonWktFunction();
static CreateScalarFunctionInfo GetPolygonWktToCellsFunction();
static CreateScalarFunctionInfo GetPolygonWktToCellsVarcharFunction();
static CreateScalarFunctionInfo GetPolygonWktToCellsExperimentalFunction();
static CreateScalarFunctionInfo GetPolygonWktToCellsExperimentalVarcharFunction();

static void AddAliases(vector<string> names, CreateScalarFunctionInfo fun,
vector<CreateScalarFunctionInfo> &functions) {
Expand Down
105 changes: 105 additions & 0 deletions test/sql/h3/h3_functions_regions.test
Original file line number Diff line number Diff line change
Expand Up @@ -274,3 +274,108 @@ query I
select h3_polygon_wkt_to_cells_string('POLYGON EMPTY', 9);
----
[]

query I
select length(h3_polygon_wkt_to_cells_experimental('POLYGON', 'CONTAINMENT_CENTER', 9));
----
0

query I
select h3_polygon_wkt_to_cells_experimental('POLYGON ((-122.53401215374411 37.81666158907579, -122.53401215374411 37.70454536656959, -122.3479361380842 37.70454536656959, -122.3479361380842 37.81666158907579, -122.53401215374411 37.81666158907579))', 'AAA', 9);
----
[]

query I
select h3_polygon_wkt_to_cells_experimental('POLYGON ((-122.53401215374411 37.81666158907579, -122.53401215374411 37.70454536656959, -122.3479361380842 37.70454536656959, -122.3479361380842 37.81666158907579, -122.53401215374411 37.81666158907579))', 'CONTAINMENT_CENTER', 5)
----
[]

query I
select h3_polygon_wkt_to_cells_experimental('POLYGON ((-122.53401215374411 37.81666158907579, -122.53401215374411 37.70454536656959, -122.3479361380842 37.70454536656959, -122.3479361380842 37.81666158907579, -122.53401215374411 37.81666158907579))', 'CONTAINMENT_FULL', 5)
----
[]

query I
select h3_polygon_wkt_to_cells_experimental('POLYGON ((-122.53401215374411 37.81666158907579, -122.53401215374411 37.70454536656959, -122.3479361380842 37.70454536656959, -122.3479361380842 37.81666158907579, -122.53401215374411 37.81666158907579))', 'CONTAINMENT_OVERLAPPING', 5)
----
[599685771850416127, 599685772924157951, 599685776145383423, 599685777219125247]

query I
select h3_polygon_wkt_to_cells_experimental('POLYGON ((-122.53401215374411 37.81666158907579, -122.53401215374411 37.70454536656959, -122.3479361380842 37.70454536656959, -122.3479361380842 37.81666158907579, -122.53401215374411 37.81666158907579))', 'CONTAINMENT_OVERLAPPING_BBOX', 5)
----
[599685771850416127, 599685772924157951, 599685773997899775, 599685775071641599, 599685776145383423, 599685777219125247, 599685784735318015, 599686100415414271, 599686104710381567]

query I
select h3_polygon_wkt_to_cells_experimental_string('POLYGON ((-122.53401215374411 37.81666158907579, -122.53401215374411 37.70454536656959, -122.3479361380842 37.70454536656959, -122.3479361380842 37.81666158907579, -122.53401215374411 37.81666158907579))', 'CONTAINMENT_CENTER', 5)
----
[]

query I
select h3_polygon_wkt_to_cells_experimental_string('POLYGON ((-122.53401215374411 37.81666158907579, -122.53401215374411 37.70454536656959, -122.3479361380842 37.70454536656959, -122.3479361380842 37.81666158907579, -122.53401215374411 37.81666158907579))', 'CONTAINMENT_FULL', 5)
----
[]

query I
select h3_polygon_wkt_to_cells_experimental_string('POLYGON ((-122.53401215374411 37.81666158907579, -122.53401215374411 37.70454536656959, -122.3479361380842 37.70454536656959, -122.3479361380842 37.81666158907579, -122.53401215374411 37.81666158907579))', 'CONTAINMENT_OVERLAPPING', 5)
----
[85283083fffffff, 85283087fffffff, 85283093fffffff, 85283097fffffff]

query I
select h3_polygon_wkt_to_cells_experimental_string('POLYGON ((-122.53401215374411 37.81666158907579, -122.53401215374411 37.70454536656959, -122.3479361380842 37.70454536656959, -122.3479361380842 37.81666158907579, -122.53401215374411 37.81666158907579))', 'CONTAINMENT_OVERLAPPING_BBOX', 5)
----
[85283083fffffff, 85283087fffffff, 8528308bfffffff, 8528308ffffffff, 85283093fffffff, 85283097fffffff, 852830b3fffffff, 8528354bfffffff, 8528355bfffffff]

query I
select h3_polygon_wkt_to_cells_experimental('POLYGON ((-122.53401215374411 37.81666158907579, -122.53401215374411 37.70454536656959, -122.3479361380842 37.70454536656959, -122.3479361380842 37.81666158907579, -122.53401215374411 37.81666158907579))', 'CONTAINMENT_FULL', 5)
----
[]

query I
select h3_polygon_wkt_to_cells_experimental('POLYGON ((-122.53401215374411 37.81666158907579, -122.53401215374411 37.70454536656959, -122.3479361380842 37.70454536656959, -122.3479361380842 37.81666158907579, -122.53401215374411 37.81666158907579))', 'CONTAINMENT_OVERLAPPING', 5)
----
[599685771850416127, 599685772924157951, 599685776145383423, 599685777219125247]

query I
select h3_polygon_wkt_to_cells_experimental('POLYGON ((-122.53401215374411 37.81666158907579, -122.53401215374411 37.70454536656959, -122.3479361380842 37.70454536656959, -122.3479361380842 37.81666158907579, -122.53401215374411 37.81666158907579))', 'CONTAINMENT_OVERLAPPING_BBOX', 5)
----
[599685771850416127, 599685772924157951, 599685773997899775, 599685775071641599, 599685776145383423, 599685777219125247, 599685784735318015, 599686100415414271, 599686104710381567]

query I
select h3_polygon_wkt_to_cells_experimental('POLYGON ((-122.53401215374411 37.81666158907579, -122.53401215374411 37.70454536656959, -122.3479361380842 37.70454536656959, -122.3479361380842 37.81666158907579, -122.53401215374411 37.81666158907579))', 'CONTAINMENT_CENTER', 6)
----
[604189371075133439, 604189371209351167, 604189372417310719, 604189376175407103, 604189376309624831]

query I
select h3_polygon_wkt_to_cells_experimental('POLYGON ((-122.53401215374411 37.81666158907579, -122.53401215374411 37.70454536656959, -122.3479361380842 37.70454536656959, -122.3479361380842 37.81666158907579, -122.53401215374411 37.81666158907579))', 'CONTAINMENT_FULL', 6)
----
[604189371209351167, 604189376309624831]

query I
select h3_polygon_wkt_to_cells_experimental('POLYGON ((-122.53401215374411 37.81666158907579, -122.53401215374411 37.70454536656959, -122.3479361380842 37.70454536656959, -122.3479361380842 37.81666158907579, -122.53401215374411 37.81666158907579))', 'CONTAINMENT_OVERLAPPING', 6)
----
[604189370538262527, 604189370672480255, 604189371075133439, 604189371209351167, 604189372148875263, 604189372417310719, 604189374967447551, 604189375235883007, 604189375906971647, 604189376041189375, 604189376175407103, 604189376309624831]

query I
select h3_polygon_wkt_to_cells_experimental('POLYGON ((-122.53401215374411 37.81666158907579, -122.53401215374411 37.70454536656959, -122.3479361380842 37.70454536656959, -122.3479361380842 37.81666158907579, -122.53401215374411 37.81666158907579))', 'CONTAINMENT_OVERLAPPING_BBOX', 6)
----
[604189370538262527, 604189370672480255, 604189370940915711, 604189371075133439, 604189371209351167, 604189371343568895, 604189371612004351, 604189372148875263, 604189372283092991, 604189372417310719, 604189374967447551, 604189375235883007, 604189375906971647, 604189376041189375, 604189376175407103, 604189376309624831, 604189376578060287, 604189376712278015]

query I
select h3_polygon_wkt_to_cells_experimental_string('POLYGON ((-122.53401215374411 37.81666158907579, -122.53401215374411 37.70454536656959, -122.3479361380842 37.70454536656959, -122.3479361380842 37.81666158907579, -122.53401215374411 37.81666158907579))', 'CONTAINMENT_CENTER', 6)
----
[862830827ffffff, 86283082fffffff, 862830877ffffff, 862830957ffffff, 86283095fffffff]

query I
select h3_polygon_wkt_to_cells_experimental_string('POLYGON ((-122.53401215374411 37.81666158907579, -122.53401215374411 37.70454536656959, -122.3479361380842 37.70454536656959, -122.3479361380842 37.81666158907579, -122.53401215374411 37.81666158907579))', 'CONTAINMENT_FULL', 6)
----
[86283082fffffff, 86283095fffffff]

query I
select h3_polygon_wkt_to_cells_experimental_string('POLYGON ((-122.53401215374411 37.81666158907579, -122.53401215374411 37.70454536656959, -122.3479361380842 37.70454536656959, -122.3479361380842 37.81666158907579, -122.53401215374411 37.81666158907579))', 'CONTAINMENT_OVERLAPPING', 6)
----
[862830807ffffff, 86283080fffffff, 862830827ffffff, 86283082fffffff, 862830867ffffff, 862830877ffffff, 86283090fffffff, 86283091fffffff, 862830947ffffff, 86283094fffffff, 862830957ffffff, 86283095fffffff]

query I
select h3_polygon_wkt_to_cells_experimental_string('POLYGON ((-122.53401215374411 37.81666158907579, -122.53401215374411 37.70454536656959, -122.3479361380842 37.70454536656959, -122.3479361380842 37.81666158907579, -122.53401215374411 37.81666158907579))', 'CONTAINMENT_OVERLAPPING_BBOX', 6)
----
[862830807ffffff, 86283080fffffff, 86283081fffffff, 862830827ffffff, 86283082fffffff, 862830837ffffff, 862830847ffffff, 862830867ffffff, 86283086fffffff, 862830877ffffff, 86283090fffffff, 86283091fffffff, 862830947ffffff, 86283094fffffff, 862830957ffffff, 86283095fffffff, 86283096fffffff, 862830977ffffff]
Loading

0 comments on commit 8b1cf40

Please sign in to comment.