Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

handle trailing period in partial float mode #156

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
116 changes: 67 additions & 49 deletions crates/jiter-python/tests/test_jiter.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,9 @@ def test_extracted_value_error():
def test_partial_array():
json = b'["string", true, null, 1, "foo'

with pytest.raises(ValueError, match='EOF while parsing a string at line 1 column 30'):
with pytest.raises(
ValueError, match="EOF while parsing a string at line 1 column 30"
):
jiter.from_json(json, partial_mode=False)

parsed = jiter.from_json(json, partial_mode=True)
Expand All @@ -79,12 +81,12 @@ def test_partial_array():

def test_partial_array_trailing_strings():
json = b'["string", true, null, 1, "foo'
parsed = jiter.from_json(json, partial_mode='trailing-strings')
parsed = jiter.from_json(json, partial_mode="trailing-strings")
assert parsed == ["string", True, None, 1, "foo"]

# test that stopping at every points is ok
for i in range(1, len(json)):
parsed = jiter.from_json(json[:i], partial_mode='trailing-strings')
parsed = jiter.from_json(json[:i], partial_mode="trailing-strings")
assert isinstance(parsed, list)


Expand All @@ -97,7 +99,7 @@ def test_partial_array_first():
jiter.from_json(json)

with pytest.raises(ValueError, match="EOF while parsing a list at line 1 column 1"):
jiter.from_json(json, partial_mode='off')
jiter.from_json(json, partial_mode="off")


def test_partial_object():
Expand All @@ -115,7 +117,7 @@ def test_partial_object_string():
json = b'{"a": 1, "b": 2, "c": "foo'
parsed = jiter.from_json(json, partial_mode=True)
assert parsed == {"a": 1, "b": 2}
parsed = jiter.from_json(json, partial_mode='on')
parsed = jiter.from_json(json, partial_mode="on")
assert parsed == {"a": 1, "b": 2}

# test that stopping at every points is ok
Expand All @@ -130,7 +132,7 @@ def test_partial_object_string():

def test_partial_object_string_trailing_strings():
json = b'{"a": 1, "b": 2, "c": "foo'
parsed = jiter.from_json(json, partial_mode='trailing-strings')
parsed = jiter.from_json(json, partial_mode="trailing-strings")
assert parsed == {"a": 1, "b": 2, "c": "foo"}

# test that stopping at every points is ok
Expand All @@ -139,10 +141,21 @@ def test_partial_object_string_trailing_strings():
assert isinstance(parsed, dict)

json = b'{"title": "Pride and Prejudice", "author": "Jane A'
parsed = jiter.from_json(json, partial_mode='trailing-strings')
parsed = jiter.from_json(json, partial_mode="trailing-strings")
assert parsed == {"title": "Pride and Prejudice", "author": "Jane A"}


def test_partial_float():
json = b'{"a": 1.2, "b": 2.3, "c": 3.'
parsed = jiter.from_json(json, partial_mode=True)
assert parsed == {"a": 1.2, "b": 2.3, "c": 3.0}

# test that stopping at every points is ok
for i in range(1, len(json)):
parsed = jiter.from_json(json[:i], partial_mode=True)
assert isinstance(parsed, dict)
Comment on lines +148 to +156
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the only material change in this file (rest is autoformatting, sorry).



def test_partial_nested():
json = b'{"a": 1, "b": 2, "c": [1, 2, {"d": 1, '
parsed = jiter.from_json(json, partial_mode=True)
Expand All @@ -157,14 +170,16 @@ def test_partial_nested():
def test_partial_error():
json = b'["string", true, null, 1, "foo'

with pytest.raises(ValueError, match='EOF while parsing a string at line 1 column 30'):
with pytest.raises(
ValueError, match="EOF while parsing a string at line 1 column 30"
):
jiter.from_json(json, partial_mode=False)

assert jiter.from_json(json, partial_mode=True) == ["string", True, None, 1]

msg = "Invalid partial mode, should be `'off'`, `'on'`, `'trailing-strings'` or a `bool`"
with pytest.raises(ValueError, match=msg):
jiter.from_json(json, partial_mode='wrong')
jiter.from_json(json, partial_mode="wrong")
with pytest.raises(TypeError, match=msg):
jiter.from_json(json, partial_mode=123)

Expand Down Expand Up @@ -215,94 +230,93 @@ def test_unicode_cache():


def test_json_float():
f = jiter.LosslessFloat(b'123.45')
assert str(f) == '123.45'
assert repr(f) == 'LosslessFloat(123.45)'
f = jiter.LosslessFloat(b"123.45")
assert str(f) == "123.45"
assert repr(f) == "LosslessFloat(123.45)"
assert float(f) == 123.45
assert f.as_decimal() == Decimal('123.45')
assert bytes(f) == b'123.45'
assert f.as_decimal() == Decimal("123.45")
assert bytes(f) == b"123.45"


def test_json_float_scientific():
f = jiter.LosslessFloat(b'123e4')
assert str(f) == '123e4'
f = jiter.LosslessFloat(b"123e4")
assert str(f) == "123e4"
assert float(f) == 123e4
assert f.as_decimal() == Decimal('123e4')
assert f.as_decimal() == Decimal("123e4")


def test_json_float_invalid():
with pytest.raises(ValueError, match='trailing characters at line 1 column 6'):
jiter.LosslessFloat(b'123.4x')
with pytest.raises(ValueError, match="trailing characters at line 1 column 6"):
jiter.LosslessFloat(b"123.4x")


def test_lossless_floats():
f = jiter.from_json(b'12.3')
f = jiter.from_json(b"12.3")
assert isinstance(f, float)
assert f == 12.3

f = jiter.from_json(b'12.3', float_mode='float')
f = jiter.from_json(b"12.3", float_mode="float")
assert isinstance(f, float)
assert f == 12.3

f = jiter.from_json(b'12.3', float_mode='lossless-float')
f = jiter.from_json(b"12.3", float_mode="lossless-float")
assert isinstance(f, jiter.LosslessFloat)
assert str(f) == '12.3'
assert str(f) == "12.3"
assert float(f) == 12.3
assert f.as_decimal() == Decimal('12.3')
assert f.as_decimal() == Decimal("12.3")

f = jiter.from_json(b'123.456789123456789e45', float_mode='lossless-float')
f = jiter.from_json(b"123.456789123456789e45", float_mode="lossless-float")
assert isinstance(f, jiter.LosslessFloat)
assert 123e45 < float(f) < 124e45
assert f.as_decimal() == Decimal('1.23456789123456789E+47')
assert bytes(f) == b'123.456789123456789e45'
assert str(f) == '123.456789123456789e45'
assert repr(f) == 'LosslessFloat(123.456789123456789e45)'
assert f.as_decimal() == Decimal("1.23456789123456789E+47")
assert bytes(f) == b"123.456789123456789e45"
assert str(f) == "123.456789123456789e45"
assert repr(f) == "LosslessFloat(123.456789123456789e45)"

f = jiter.from_json(b'123', float_mode='lossless-float')
f = jiter.from_json(b"123", float_mode="lossless-float")
assert isinstance(f, int)
assert f == 123

with pytest.raises(ValueError, match='expected value at line 1 column 1'):
jiter.from_json(b'wrong', float_mode='lossless-float')

with pytest.raises(ValueError, match='trailing characters at line 1 column 2'):
jiter.from_json(b'1wrong', float_mode='lossless-float')
with pytest.raises(ValueError, match="expected value at line 1 column 1"):
jiter.from_json(b"wrong", float_mode="lossless-float")

with pytest.raises(ValueError, match="trailing characters at line 1 column 2"):
jiter.from_json(b"1wrong", float_mode="lossless-float")


def test_decimal_floats():
f = jiter.from_json(b'12.3')
f = jiter.from_json(b"12.3")
assert isinstance(f, float)
assert f == 12.3

f = jiter.from_json(b'12.3', float_mode='decimal')
f = jiter.from_json(b"12.3", float_mode="decimal")
assert isinstance(f, Decimal)
assert f == Decimal('12.3')
assert f == Decimal("12.3")

f = jiter.from_json(b'123.456789123456789e45', float_mode='decimal')
f = jiter.from_json(b"123.456789123456789e45", float_mode="decimal")
assert isinstance(f, Decimal)
assert f == Decimal('1.23456789123456789E+47')
assert f == Decimal("1.23456789123456789E+47")

f = jiter.from_json(b'123', float_mode='decimal')
f = jiter.from_json(b"123", float_mode="decimal")
assert isinstance(f, int)
assert f == 123

with pytest.raises(ValueError, match='expected value at line 1 column 1'):
jiter.from_json(b'wrong', float_mode='decimal')
with pytest.raises(ValueError, match="expected value at line 1 column 1"):
jiter.from_json(b"wrong", float_mode="decimal")

with pytest.raises(ValueError, match='trailing characters at line 1 column 2'):
jiter.from_json(b'1wrong', float_mode='decimal')
with pytest.raises(ValueError, match="trailing characters at line 1 column 2"):
jiter.from_json(b"1wrong", float_mode="decimal")


def test_unicode_roundtrip():
original = ['中文']
original = ["中文"]
json_data = json.dumps(original).encode()
assert jiter.from_json(json_data) == original
assert json.loads(json_data) == original


def test_unicode_roundtrip_ensure_ascii():
original = {'name': '中文'}
original = {"name": "中文"}
json_data = json.dumps(original, ensure_ascii=False).encode()
assert jiter.from_json(json_data, cache_mode=False) == original
assert json.loads(json_data) == original
Expand All @@ -311,8 +325,12 @@ def test_unicode_roundtrip_ensure_ascii():
def test_catch_duplicate_keys():
assert jiter.from_json(b'{"foo": 1, "foo": 2}') == {"foo": 2}

with pytest.raises(ValueError, match='Detected duplicate key "foo" at line 1 column 18'):
with pytest.raises(
ValueError, match='Detected duplicate key "foo" at line 1 column 18'
):
jiter.from_json(b'{"foo": 1, "foo": 2}', catch_duplicate_keys=True)

with pytest.raises(ValueError, match='Detected duplicate key "foo" at line 1 column 28'):
with pytest.raises(
ValueError, match='Detected duplicate key "foo" at line 1 column 28'
):
jiter.from_json(b'{"foo": 1, "bar": 2, "foo": 2}', catch_duplicate_keys=True)
34 changes: 22 additions & 12 deletions crates/jiter/src/jiter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ pub struct Jiter<'j> {
tape: Tape,
allow_inf_nan: bool,
allow_partial_strings: bool,
allow_partial_float_period: bool,
}

impl Clone for Jiter<'_> {
Expand All @@ -26,6 +27,7 @@ impl Clone for Jiter<'_> {
tape: Tape::default(),
allow_inf_nan: self.allow_inf_nan,
allow_partial_strings: self.allow_partial_strings,
allow_partial_float_period: self.allow_partial_float_period,
}
}
}
Expand All @@ -43,6 +45,7 @@ impl<'j> Jiter<'j> {
tape: Tape::default(),
allow_inf_nan: false,
allow_partial_strings: false,
allow_partial_float_period: false,
}
}

Expand All @@ -56,6 +59,11 @@ impl<'j> Jiter<'j> {
self
}

pub fn with_allow_partial_float_period(mut self) -> Self {
self.allow_partial_float_period = true;
self
}

/// Get the current [LinePosition] of the parser.
pub fn current_position(&self) -> LinePosition {
self.parser.current_position()
Expand Down Expand Up @@ -135,7 +143,7 @@ impl<'j> Jiter<'j> {
/// Knowing the next value is a number, parse it.
pub fn known_number(&mut self, peek: Peek) -> JiterResult<NumberAny> {
self.parser
.consume_number::<NumberAny>(peek.into_inner(), self.allow_inf_nan)
.consume_number::<NumberAny>(peek.into_inner(), self.allow_inf_nan, self.allow_partial_float_period)
.map_err(|e| self.maybe_number_error(e, JsonType::Int, peek))
}

Expand All @@ -148,7 +156,7 @@ impl<'j> Jiter<'j> {
/// Knowing the next value is an integer, parse it.
pub fn known_int(&mut self, peek: Peek) -> JiterResult<NumberInt> {
self.parser
.consume_number::<NumberInt>(peek.into_inner(), self.allow_inf_nan)
.consume_number::<NumberInt>(peek.into_inner(), self.allow_inf_nan, self.allow_partial_float_period)
.map_err(|e| {
if e.error_type == JsonErrorType::FloatExpectingInt {
JiterError::wrong_type(JsonType::Int, JsonType::Float, self.parser.index)
Expand All @@ -167,7 +175,7 @@ impl<'j> Jiter<'j> {
/// Knowing the next value is a float, parse it.
pub fn known_float(&mut self, peek: Peek) -> JiterResult<f64> {
self.parser
.consume_number::<NumberFloat>(peek.into_inner(), self.allow_inf_nan)
.consume_number::<NumberFloat>(peek.into_inner(), self.allow_inf_nan, self.allow_partial_float_period)
.map_err(|e| self.maybe_number_error(e, JsonType::Float, peek))
}

Expand All @@ -179,10 +187,11 @@ impl<'j> Jiter<'j> {

/// Knowing the next value is a number, parse it and return bytes from the original JSON data.
fn known_number_bytes(&mut self, peek: Peek) -> JiterResult<&[u8]> {
match self
.parser
.consume_number::<NumberRange>(peek.into_inner(), self.allow_inf_nan)
{
match self.parser.consume_number::<NumberRange>(
peek.into_inner(),
self.allow_inf_nan,
self.allow_partial_float_period,
) {
Ok(numbe_range) => Ok(&self.data[numbe_range.range]),
Err(e) => Err(self.maybe_number_error(e, JsonType::Float, peek)),
}
Expand Down Expand Up @@ -377,11 +386,12 @@ impl<'j> Jiter<'j> {

fn wrong_num(&self, first: u8, expected: JsonType) -> JiterError {
let mut parser2 = self.parser.clone();
let actual = match parser2.consume_number::<NumberAny>(first, self.allow_inf_nan) {
Ok(NumberAny::Int { .. }) => JsonType::Int,
Ok(NumberAny::Float { .. }) => JsonType::Float,
Err(e) => return e.into(),
};
let actual =
match parser2.consume_number::<NumberAny>(first, self.allow_inf_nan, self.allow_partial_float_period) {
Ok(NumberAny::Int { .. }) => JsonType::Int,
Ok(NumberAny::Float { .. }) => JsonType::Float,
Err(e) => return e.into(),
};
JiterError::wrong_type(expected, actual, self.parser.index)
}

Expand Down
Loading
Loading