From 18e5ba113e9ae19be6eb2cdff3cec37bd013d8e6 Mon Sep 17 00:00:00 2001 From: smheidrich Date: Wed, 8 Jan 2025 01:34:28 +0100 Subject: [PATCH] Add test for load() iterable split in UTF-8 char --- tests/test_load_iterable.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 tests/test_load_iterable.py diff --git a/tests/test_load_iterable.py b/tests/test_load_iterable.py new file mode 100644 index 0000000..f9dbc3f --- /dev/null +++ b/tests/test_load_iterable.py @@ -0,0 +1,26 @@ +""" +Test compatibility with json-stream's support for giving iterables to `load()`. +""" +import json_stream +import pytest + + +@pytest.mark.parametrize("chunk_size", [1, 2, 3, 4, 10]) +def test_chunk_boundary_inside_utf8_char(chunk_size: int) -> None: + """ + Test that chunk boundaries inside UTF-8 chars are handled correctly. + + Regression test for https://github.com/daggaz/json-stream/issues/59. + """ + inner_str = "——" + document_str = f'"{inner_str}"' + document_bytes = document_str.encode("utf-8") + + iterable = ( + document_bytes[i : i + chunk_size] + for i in range(0, len(document_bytes), chunk_size) + ) + + parsed = json_stream.load(iterable) + + assert parsed == inner_str