Skip to content

Commit

Permalink
fix(datatype-parsing): ensure that geospatial types are round trippab…
Browse files Browse the repository at this point in the history
…le through the data type parser (ibis-project#10171)

Fixes ibis-project#10170.
  • Loading branch information
cpcloud authored and ncclementi committed Sep 24, 2024
1 parent 2aa51ae commit f34106f
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 8 deletions.
27 changes: 20 additions & 7 deletions ibis/expr/datatypes/parse.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from __future__ import annotations

import ast
import functools
import re
from functools import lru_cache, partial
from operator import methodcaller

import parsy
Expand Down Expand Up @@ -50,7 +50,7 @@ def spaceless_string(*strings: str):


@public
@functools.lru_cache(maxsize=100)
@lru_cache(maxsize=100)
def parse(
text: str, default_decimal_parameters: tuple[int | None, int | None] = (None, None)
) -> dt.DataType:
Expand Down Expand Up @@ -88,12 +88,15 @@ def parse(
geotype = spaceless_string("geography", "geometry")

srid_geotype = SEMICOLON.then(parsy.seq(srid=NUMBER.skip(COLON), geotype=geotype))
geotype_srid = COLON.then(parsy.seq(geotype=geotype, srid=SEMICOLON.then(NUMBER)))
geotype_part = COLON.then(parsy.seq(geotype=geotype))
srid_part = SEMICOLON.then(parsy.seq(srid=NUMBER))

def geotype_parser(typ: type[dt.DataType]) -> dt.DataType:
return spaceless_string(typ.__name__.lower()).then(
(srid_geotype | geotype_part | srid_part).optional(dict()).combine_dict(typ)
(srid_geotype | geotype_srid | geotype_part | srid_part)
.optional(dict())
.combine_dict(typ)
)

primitive = (
Expand All @@ -116,15 +119,25 @@ def geotype_parser(typ: type[dt.DataType]) -> dt.DataType:
"time",
"date",
"null",
).map(functools.partial(getattr, dt))
| spaceless_string("bytes").result(dt.binary)
| geotype.map(dt.GeoSpatial)
).map(partial(getattr, dt))
| geotype_parser(dt.LineString)
| geotype_parser(dt.Polygon)
| geotype_parser(dt.Point)
| geotype_parser(dt.MultiLineString)
| geotype_parser(dt.MultiPolygon)
| geotype_parser(dt.MultiPoint)
| spaceless_string("bytes").result(dt.binary)
| spaceless_string("geospatial:geography").then(
srid_part.optional(dict()).combine_dict(
partial(dt.GeoSpatial, geotype="geography")
)
)
| spaceless_string("geospatial:geometry").then(
srid_part.optional(dict()).combine_dict(
partial(dt.GeoSpatial, geotype="geometry")
)
)
| geotype.map(dt.GeoSpatial)
)

varchar_or_char = (
Expand Down Expand Up @@ -201,7 +214,7 @@ def geotype_parser(typ: type[dt.DataType]) -> dt.DataType:
| map
| struct
| spaceless_string("jsonb", "json", "uuid", "macaddr", "inet").map(
functools.partial(getattr, dt)
partial(getattr, dt)
)
| spaceless_string("int").result(dt.int64)
| spaceless_string("str").result(dt.string)
Expand Down
2 changes: 1 addition & 1 deletion ibis/expr/datatypes/tests/test_parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,6 @@ def test_parse_null():


# corresponds to its.all_dtypes() but without:
# - geospacial types, the string representation is different from what the parser expects
# - struct types, the generated struct field names contain special characters

field_names = st.text(
Expand All @@ -286,6 +285,7 @@ def test_parse_null():
| its.struct_dtypes(names=field_names)
| its.array_dtypes(roundtrippable_dtypes)
| its.map_dtypes(roundtrippable_dtypes, roundtrippable_dtypes)
| its.geospatial_dtypes()
)
)

Expand Down

0 comments on commit f34106f

Please sign in to comment.