Skip to content

Commit

Permalink
Improve string value reader and writer error behavior
Browse files Browse the repository at this point in the history
Now when an error occurs, any subsequent read and write attempts will
return an error as well. This might still not fully satisfy the
requirements of `Read::read` and `Write::write`
(see https://users.rust-lang.org/t/error-handling-for-custom-read-read-write-write-implementation/101742)
but it is at least less error-prone than the previous behavior in case
users assume they can continue reading from a `Read` or writing to a
`Write` after an error occurred.
  • Loading branch information
Marcono1234 committed Oct 28, 2023
1 parent 1fddf25 commit d436a6f
Show file tree
Hide file tree
Showing 3 changed files with 130 additions and 19 deletions.
87 changes: 71 additions & 16 deletions src/reader/stream_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -225,20 +225,6 @@ impl<R: Read + Debug> Debug for JsonStreamReader<R> {
}
}

// 4 (max bytes for UTF-8 encoded char) - 1, since at least one byte was already consumed
const STRING_VALUE_READER_BUF_SIZE: usize = 3;

struct StringValueReader<'j, R: Read> {
json_reader: &'j mut JsonStreamReader<R>,
/// Buffer in case multi-byte character is read but caller did not provide large enough buffer
utf8_buf: [u8; STRING_VALUE_READER_BUF_SIZE],
/// Start position within [utf8_buf]
utf8_start_pos: usize,
/// Number of bytes currently in the [utf8_buf]
utf8_count: usize,
reached_end: bool,
}

/// Settings to customize the JSON reader behavior
///
/// These settings are used by [`JsonStreamReader::new_custom`]. To avoid repeating the
Expand Down Expand Up @@ -2112,6 +2098,7 @@ impl<R: Read> JsonReader for JsonStreamReader<R> {
utf8_start_pos: 0,
utf8_count: 0,
reached_end: false,
error: None,
}))
}

Expand Down Expand Up @@ -2339,8 +2326,25 @@ impl<R: Read> JsonReader for JsonStreamReader<R> {
}
}

impl<'j, R: Read> Read for StringValueReader<'j, R> {
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
// - 1, since at least one byte was already consumed
const STRING_VALUE_READER_BUF_SIZE: usize = utf8::MAX_BYTES_PER_CHAR - 1;

struct StringValueReader<'j, R: Read> {
json_reader: &'j mut JsonStreamReader<R>,
/// Buffer in case multi-byte character is read but caller did not provide large enough buffer
utf8_buf: [u8; STRING_VALUE_READER_BUF_SIZE],
/// Start position within [utf8_buf]
utf8_start_pos: usize,
/// Number of bytes currently in the [utf8_buf]
utf8_count: usize,
reached_end: bool,
/// The last error which occurred, and which should be returned for every subsequent `read` call
// `io::Error` does not implement Clone, so this only contains some of its data
error: Option<(ErrorKind, String)>,
}

impl<R: Read> StringValueReader<'_, R> {
fn read_impl(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
if self.reached_end || buf.is_empty() {
return Ok(0);
}
Expand Down Expand Up @@ -2404,6 +2408,26 @@ impl<'j, R: Read> Read for StringValueReader<'j, R> {
Ok(pos)
}
}
impl<R: Read> Read for StringValueReader<'_, R> {
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
if let Some(e) = &self.error {
return Err(IoError::new(e.0, e.1.clone()));
}

let result = self.read_impl(buf);
if let Err(e) = &result {
// Must not store `Interrupted` error as `self.error` and return the error again
// for subsequent calls because that could lead to infinite loops since `Interrupted`
// normally allows retrying
// However, JsonStreamReader implementation is not propagating `Interrupted` error anyway
if e.kind() == ErrorKind::Interrupted {
panic!("Unexpected Interrupted error: {e:?}");
}
self.error = Some((e.kind(), e.to_string()));
}
result
}
}

#[cfg(test)]
mod tests {
Expand Down Expand Up @@ -3065,6 +3089,37 @@ mod tests {
Ok(())
}

#[test]
fn string_reader_repeats_error() -> TestResult {
let mut json_reader = new_reader("\"\\uINVALID\"");
let mut reader = json_reader.next_string_reader()?;

let mut buf = [0_u8; 10];
match reader.read(&mut buf) {
Ok(_) => panic!("Should have failed"),
Err(e) => {
assert_eq!(ErrorKind::Other, e.kind());
assert_eq!(
"JSON syntax error MalformedEscapeSequence at line 0, column 1 at path $",
e.get_ref().unwrap().to_string()
);
}
}

// Subsequent read attemps should fail with the same error
match reader.read(&mut buf) {
Ok(_) => panic!("Should have failed"),
Err(e) => {
assert_eq!(ErrorKind::Other, e.kind());
assert_eq!(
"JSON syntax error MalformedEscapeSequence at line 0, column 1 at path $",
e.get_ref().unwrap().to_string()
);
}
}
Ok(())
}

#[test]
#[should_panic(
expected = "Incorrect reader usage: Cannot peek when string value reader is active"
Expand Down
2 changes: 2 additions & 0 deletions src/writer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -577,6 +577,8 @@ pub trait StringValueWriter: Write {
/// ```
fn write_str(&mut self, s: &str) -> Result<(), IoError> {
// write_all retries on `ErrorKind::Interrupted`, as desired
// this is mostly relevant for custom JsonWriter implementations, for JsonStreamWriter it
// should not matter because it is not expected to return `Interrupted`, see also tests there
self.write_all(s.as_bytes())
}

Expand Down
60 changes: 57 additions & 3 deletions src/writer/stream_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -531,6 +531,7 @@ impl<W: Write> JsonWriter for JsonStreamWriter<W> {
utf8_buf: [0; utf8::MAX_BYTES_PER_CHAR],
utf8_pos: 0,
utf8_expected_len: 0,
error: None,
}))
}
}
Expand Down Expand Up @@ -623,6 +624,9 @@ struct StringValueWriterImpl<'j, W: Write> {
utf8_pos: usize,
/// Expected number of total bytes for the character whose bytes are currently in [utf8_buf]
utf8_expected_len: usize,
/// The last error which occurred, and which should be returned for every subsequent `write` call
// `io::Error` does not implement Clone, so this only contains some of its data
error: Option<(ErrorKind, String)>,
}

fn map_utf8_error(e: Utf8Error) -> IoError {
Expand All @@ -639,8 +643,8 @@ fn decode_utf8_char(bytes: &[u8]) -> Result<&str, IoError> {
}
}

impl<'j, W: Write> Write for StringValueWriterImpl<'j, W> {
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
impl<W: Write> StringValueWriterImpl<'_, W> {
fn write_impl(&mut self, buf: &[u8]) -> std::io::Result<usize> {
if buf.is_empty() {
return Ok(0);
}
Expand Down Expand Up @@ -716,13 +720,37 @@ impl<'j, W: Write> Write for StringValueWriterImpl<'j, W> {
)?;
Ok(buf.len())
}
}
impl<W: Write> Write for StringValueWriterImpl<'_, W> {
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
if let Some(e) = &self.error {
return Err(IoError::new(
e.0,
// Adjusts error message to indicate that the error is not related to data provided in `buf`
format!("previous error: {}", e.1.clone()),
));
}

let result = self.write_impl(buf);
if let Err(e) = &result {
// Must not store `Interrupted` error as `self.error` and return the error again
// for subsequent calls because that could lead to infinite loops since `Interrupted`
// normally allows retrying
// However, JsonStreamWriter implementation is not propagating `Interrupted` error anyway
if e.kind() == ErrorKind::Interrupted {
panic!("Unexpected Interrupted error: {e:?}");
}
self.error = Some((e.kind(), e.to_string()));
}
result
}

fn flush(&mut self) -> std::io::Result<()> {
self.json_writer.flush()
}
}

impl<'j, W: Write> StringValueWriter for StringValueWriterImpl<'j, W> {
impl<W: Write> StringValueWriter for StringValueWriterImpl<'_, W> {
// Provides more efficient implementation which benefits from avoided UTF-8 validation
fn write_str(&mut self, s: &str) -> Result<(), IoError> {
if self.utf8_pos > 0 {
Expand Down Expand Up @@ -1165,6 +1193,32 @@ mod tests {
);
}

#[test]
fn string_writer_repeats_error() -> TestResult {
let mut writer = Vec::<u8>::new();
let mut json_writer = JsonStreamWriter::new(&mut writer);

let mut string_writer = json_writer.string_value_writer()?;
// Invalid UTF-8 byte 1111_1000
match string_writer.write_all(b"\xF8") {
Ok(_) => panic!("Should have failed"),
Err(e) => {
assert_eq!(ErrorKind::InvalidData, e.kind());
assert_eq!("invalid UTF-8 data", e.to_string());
}
}

// Subsequent write attemps should fail with the same error
match string_writer.write_all(b"test") {
Ok(_) => panic!("Should have failed"),
Err(e) => {
assert_eq!(ErrorKind::InvalidData, e.kind());
assert_eq!("previous error: invalid UTF-8 data", e.to_string());
}
}
Ok(())
}

#[test]
#[should_panic(
expected = "Incorrect writer usage: Cannot end array when string value writer is still active"
Expand Down

0 comments on commit d436a6f

Please sign in to comment.