Skip to content

Commit

Permalink
Data type converter: Resolve data type ID to converter function upfront
Browse files Browse the repository at this point in the history
  • Loading branch information
amotl committed Oct 11, 2022
1 parent 4180496 commit 624faf8
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 17 deletions.
5 changes: 5 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
[tool.mypy]

# Needed until `mypy-0.990` for `ConverterDefinition` in `converter.py`.
# https://github.com/python/mypy/issues/731#issuecomment-1260976955
enable_recursive_aliases = true
51 changes: 36 additions & 15 deletions src/crate/client/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,12 @@
from copy import deepcopy
from datetime import datetime
from enum import Enum
from typing import Any, Callable, Dict, Optional, Union, List
from typing import Any, Callable, Dict, List, Optional, Tuple, Union

InputVal = Any
ConverterFunction = Callable[[Optional[InputVal]], Optional[Any]]
ConverterDefinition = Union[ConverterFunction, Tuple[ConverterFunction, "ConverterDefinition"]]
ColTypesDefinition = Union[int, List[Union[int, "ColTypesDefinition"]]]


def _to_ipaddress(value: Optional[str]) -> Optional[Union[ipaddress.IPv4Address, ipaddress.IPv6Address]]:
Expand Down Expand Up @@ -87,7 +90,7 @@ class DataType(Enum):


# Map data type identifier to converter function.
_DEFAULT_CONVERTERS: Dict[DataType, Callable[[Optional[InputVal]], Optional[Any]]] = {
_DEFAULT_CONVERTERS: Dict[DataType, ConverterFunction] = {
DataType.IP: _to_ipaddress,
DataType.TIMESTAMP_WITH_TZ: _to_datetime,
DataType.TIMESTAMP_WITHOUT_TZ: _to_datetime,
Expand All @@ -97,37 +100,55 @@ class DataType(Enum):
class Converter:
def __init__(
self,
mappings: Dict[DataType, Callable[[Optional[InputVal]], Optional[Any]]] = None,
default: Callable[[Optional[InputVal]], Optional[Any]] = _to_default,
mappings: Optional[Dict[DataType, ConverterFunction]] = None,
default: ConverterFunction = _to_default,
) -> None:
self._mappings = mappings or {}
self._default = default

@property
def mappings(self) -> Dict[DataType, Callable[[Optional[InputVal]], Optional[Any]]]:
def mappings(self) -> Dict[DataType, ConverterFunction]:
return self._mappings

def get(self, type_: DataType) -> Callable[[Optional[InputVal]], Optional[Any]]:
def get(self, type_: DataType) -> ConverterFunction:
return self.mappings.get(type_, self._default)

def set(self, type_: DataType, converter: Callable[[Optional[InputVal]], Optional[Any]]) -> None:
def set(self, type_: DataType, converter: ConverterFunction) -> None:
self.mappings[type_] = converter

def convert(self, type_: Union[DataType, int], value: Optional[Any]) -> Optional[Any]:
def convert(self, converter_definition: ConverterDefinition, value: Optional[Any]) -> Optional[Any]:
"""
Convert a single row cell value with given data type. Invoked from `Cursor._convert_rows`.
Convert a single row cell value using given converter definition.
Also works recursively on nested values like `ARRAY` collections.
Invoked from `Cursor._convert_rows`.
"""
if isinstance(type_, List):
type_, inner_type = type_
if DataType(type_) is not DataType.ARRAY:
raise ValueError(f"Data type {type_} is not implemented as collection type")
if isinstance(converter_definition, tuple):
type_, inner_type = converter_definition
if value is None:
result = self.convert(inner_type, None)
else:
result = [self.convert(inner_type, item) for item in value]
else:
converter = self.get(DataType(type_))
result = converter(value)
result = converter_definition(value)
return result

def col_type_to_converter(self, type_: ColTypesDefinition) -> ConverterDefinition:
"""
Resolve integer data type identifier to its corresponding converter function.
Also handles nested definitions with a *list* of data type identifiers on the
right hand side, describing the inner type of `ARRAY` values.
It is important to resolve the converter functions first, in order not to
hog the row loop with redundant lookups to the `mappings` dictionary.
"""
result: ConverterDefinition
if isinstance(type_, list):
type_, inner_type = type_
if DataType(type_) is not DataType.ARRAY:
raise ValueError(f"Data type {type_} is not implemented as collection type")
result = (self.get(DataType(type_)), self.col_type_to_converter(inner_type))
else:
result = self.get(DataType(type_))
return result


Expand Down
12 changes: 10 additions & 2 deletions src/crate/client/cursor.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,11 +226,19 @@ def _convert_rows(self):
"""
assert "col_types" in self._result and self._result["col_types"], \
"Unable to apply type conversion without `col_types` information"

# Resolve `col_types` definition to converter functions. Running the lookup
# redundantly on each row loop iteration would be a huge performance hog.
type_id_list = self._result["col_types"]
converter_definitions = [
self._converter.col_type_to_converter(type_id) for type_id in type_id_list
]

# Process result rows with conversion.
for row in self._result["rows"]:
yield [
self._converter.convert(type_id, value)
for type_id, value in zip(type_id_list, row)
self._converter.convert(converter_definition, value)
for converter_definition, value in zip(converter_definitions, row)
]

@staticmethod
Expand Down

0 comments on commit 624faf8

Please sign in to comment.