Skip to content

Commit

Permalink
Marshalling: Use orjson to improve JSON serialization performance
Browse files Browse the repository at this point in the history
  • Loading branch information
amotl committed Jan 16, 2025
1 parent a2aae9b commit c779801
Show file tree
Hide file tree
Showing 4 changed files with 52 additions and 34 deletions.
11 changes: 11 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,17 @@ Changes for crate
Unreleased
==========

- Switched JSON encoder to use the `orjson`_ library, to improve JSON
marshalling performance. Thanks, @widmogrod.
orjson is fast and in some spots even more correct when compared against
Python's stdlib ``json`` module. Contrary to the stdlib variant, orjson
will serialize to ``bytes`` instead of ``str``. Please also note it
will not deserialize to dataclasses, UUIDs, decimals, etc., or support
``object_hook``. Within ``crate-python``, it is applied with an encoder
function for additional type support about Python's `Decimal` type and
freezegun's `FakeDateTime` type.

.. _orjson: https://github.com/ijl/orjson

2024/11/23 1.0.1
================
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ def read(path):
packages=find_namespace_packages("src"),
package_dir={"": "src"},
install_requires=[
"orjson<4",
"urllib3",
"verlib2",
],
Expand Down
60 changes: 33 additions & 27 deletions src/crate/client/http.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,23 +20,21 @@
# software solely pursuant to the terms of the relevant commercial agreement.


import calendar
import heapq
import io
import json
import logging
import os
import re
import socket
import ssl
import threading
import typing as t
from base64 import b64encode
from datetime import date, datetime, timezone
from decimal import Decimal
from time import time
from urllib.parse import urlparse
from uuid import UUID

import orjson
import urllib3
from urllib3 import connection_from_url
from urllib3.connection import HTTPConnection
Expand Down Expand Up @@ -86,25 +84,33 @@ def super_len(o):
return None


class CrateJsonEncoder(json.JSONEncoder):
epoch_aware = datetime(1970, 1, 1, tzinfo=timezone.utc)
epoch_naive = datetime(1970, 1, 1)

def default(self, o):
if isinstance(o, (Decimal, UUID)):
return str(o)
if isinstance(o, datetime):
if o.tzinfo is not None:
delta = o - self.epoch_aware
else:
delta = o - self.epoch_naive
return int(
delta.microseconds / 1000.0
+ (delta.seconds + delta.days * 24 * 3600) * 1000.0
)
if isinstance(o, date):
return calendar.timegm(o.timetuple()) * 1000
return json.JSONEncoder.default(self, o)
def cratedb_json_encoder(obj: t.Any) -> str:
"""
Encoder function for orjson, with additional type support.
- Python's `Decimal` type.
- freezegun's `FakeDateTime` type.
https://github.com/ijl/orjson#default
"""
if isinstance(obj, Decimal):
return str(obj)
elif hasattr(obj, "isoformat"):
return obj.isoformat()
raise TypeError


def json_dumps(obj: t.Any) -> bytes:
"""
Serialize to JSON format, using `orjson`, with additional type support.
https://github.com/ijl/orjson
"""
return orjson.dumps(
obj,
default=cratedb_json_encoder,
option=orjson.OPT_SERIALIZE_NUMPY,
)


class Server:
Expand Down Expand Up @@ -180,7 +186,7 @@ def close(self):

def _json_from_response(response):
try:
return json.loads(response.data.decode("utf-8"))
return orjson.loads(response.data)
except ValueError as ex:
raise ProgrammingError(
"Invalid server response of content-type '{}':\n{}".format(
Expand Down Expand Up @@ -223,7 +229,7 @@ def _raise_for_status_real(response):
if response.status == 503:
raise ConnectionError(message)
if response.headers.get("content-type", "").startswith("application/json"):
data = json.loads(response.data.decode("utf-8"))
data = orjson.loads(response.data)
error = data.get("error", {})
error_trace = data.get("error_trace", None)
if "results" in data:
Expand Down Expand Up @@ -323,7 +329,7 @@ def _update_pool_kwargs_for_ssl_minimum_version(server, kwargs):
kwargs["ssl_minimum_version"] = ssl.TLSVersion.MINIMUM_SUPPORTED


def _create_sql_payload(stmt, args, bulk_args):
def _create_sql_payload(stmt, args, bulk_args) -> bytes:
if not isinstance(stmt, str):
raise ValueError("stmt is not a string")
if args and bulk_args:
Expand All @@ -334,7 +340,7 @@ def _create_sql_payload(stmt, args, bulk_args):
data["args"] = args
if bulk_args:
data["bulk_args"] = bulk_args
return json.dumps(data, cls=CrateJsonEncoder)
return json_dumps(data)


def _get_socket_opts(
Expand Down
14 changes: 7 additions & 7 deletions tests/client/test_http.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,9 @@
)
from crate.client.http import (
Client,
CrateJsonEncoder,
_get_socket_opts,
_remove_certs_for_non_https,
json_dumps,
)

REQUEST = "crate.client.http.Server.request"
Expand Down Expand Up @@ -318,7 +318,7 @@ def test_datetime_is_converted_to_ts(self, request):
# convert string to dict
# because the order of the keys isn't deterministic
data = json.loads(request.call_args[1]["data"])
self.assertEqual(data["args"], [1425108700000])
self.assertEqual(data["args"], ["2015-02-28T07:31:40"])
client.close()

@patch(REQUEST, autospec=True)
Expand All @@ -329,7 +329,7 @@ def test_date_is_converted_to_ts(self, request):
day = dt.date(2016, 4, 21)
client.sql("insert into users (dt) values (?)", (day,))
data = json.loads(request.call_args[1]["data"])
self.assertEqual(data["args"], [1461196800000])
self.assertEqual(data["args"], ["2016-04-21"])
client.close()

def test_socket_options_contain_keepalive(self):
Expand Down Expand Up @@ -724,10 +724,10 @@ def test_username(self):
class TestCrateJsonEncoder(TestCase):
def test_naive_datetime(self):
data = dt.datetime.fromisoformat("2023-06-26T09:24:00.123")
result = json.dumps(data, cls=CrateJsonEncoder)
self.assertEqual(result, "1687771440123")
result = json_dumps(data)
self.assertEqual(result, b'"2023-06-26T09:24:00.123000"')

def test_aware_datetime(self):
data = dt.datetime.fromisoformat("2023-06-26T09:24:00.123+02:00")
result = json.dumps(data, cls=CrateJsonEncoder)
self.assertEqual(result, "1687764240123")
result = json_dumps(data)
self.assertEqual(result, b'"2023-06-26T09:24:00.123000+02:00"')

0 comments on commit c779801

Please sign in to comment.