Skip to content

Commit

Permalink
Add test for load() iterable split in UTF-8 char
Browse files Browse the repository at this point in the history
  • Loading branch information
smheidrich committed Jan 8, 2025
1 parent 163a036 commit 18e5ba1
Showing 1 changed file with 26 additions and 0 deletions.
26 changes: 26 additions & 0 deletions tests/test_load_iterable.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
"""
Test compatibility with json-stream's support for giving iterables to `load()`.
"""
import json_stream
import pytest


@pytest.mark.parametrize("chunk_size", [1, 2, 3, 4, 10])
def test_chunk_boundary_inside_utf8_char(chunk_size: int) -> None:
"""
Test that chunk boundaries inside UTF-8 chars are handled correctly.
Regression test for https://github.com/daggaz/json-stream/issues/59.
"""
inner_str = "——"
document_str = f'"{inner_str}"'
document_bytes = document_str.encode("utf-8")

iterable = (
document_bytes[i : i + chunk_size]
for i in range(0, len(document_bytes), chunk_size)
)

parsed = json_stream.load(iterable)

assert parsed == inner_str

0 comments on commit 18e5ba1

Please sign in to comment.