Skip to content

Commit

Permalink
Restore panic if pos is greater than input
Browse files Browse the repository at this point in the history
- Restore previous behavior for errant programs
- Use safe conversion to ensure str is UTF-8 in cases where the pos is
  not on a valid UTF-8 boundary
  • Loading branch information
bluk committed Nov 13, 2023
1 parent 59821e4 commit 82f5767
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 20 deletions.
43 changes: 24 additions & 19 deletions maybe_xml/src/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,10 @@ impl<'a> Lexer<'a> {
///
/// If a token is found, the position is also updated to after the token.
///
/// # Panics
///
/// Panics if the `pos` is greater than the input length.
///
/// # Examples
///
/// ```
Expand All @@ -158,23 +162,20 @@ impl<'a> Lexer<'a> {
///```
#[must_use]
pub fn tokenize(&self, pos: &mut usize) -> Option<Token<'a>> {
if let Some(end) = scan(self.input, *pos) {
// This is a convoluted but *const* way of getting &self.input[*pos..end]
let (bytes, _) = self.input.split_at(end);
let (_, bytes) = bytes.split_at(*pos);
let token = Token::from_str(unsafe { core::str::from_utf8_unchecked(bytes) });

*pos = end;
Some(token)
} else {
None
}
let end = scan(self.input, *pos)?;
let token = Token::from_str(core::str::from_utf8(&self.input[*pos..end]).ok()?);
*pos = end;
Some(token)
}

/// Constant function which tokenizes the input starting at the given position.
///
/// If a token is found, the position is also updated to after the token.
///
/// # Panics
///
/// Panics if the `pos` is greater than the input length.
///
/// # Examples
///
/// ```
Expand All @@ -198,8 +199,12 @@ impl<'a> Lexer<'a> {
// This is a convoluted but *const* way of getting &self.input[*pos..end]
let (bytes, _) = self.input.split_at(end);
let (_, bytes) = bytes.split_at(pos);
let token = Token::from_str(unsafe { core::str::from_utf8_unchecked(bytes) });
Some(token)
if let Ok(s) = core::str::from_utf8(bytes) {
let token = Token::from_str(s);
Some(token)
} else {
None
}
} else {
None
}
Expand Down Expand Up @@ -463,19 +468,19 @@ mod tests {
}

#[test]
fn pos_greater_than_slice_len() {
#[should_panic(expected = "out of bounds")]
fn panic_on_pos_greater_than_slice_len() {
let lexer = Lexer::from_str("");
let mut pos = 1;
assert_eq!(None, lexer.tokenize(&mut pos));
assert_eq!(pos, 1);
let _ = lexer.tokenize(&mut pos);
}

#[test]
fn pos_greater_than_slice_len_2() {
#[should_panic(expected = "out of bounds")]
fn panic_on_pos_greater_than_slice_len_2() {
let lexer = Lexer::from_str("hello");
let mut pos = "hello".len() + 1;
assert_eq!(None, lexer.tokenize(&mut pos));
assert_eq!(pos, "hello".len() + 1);
let _ = lexer.tokenize(&mut pos);
}

#[cfg(any(feature = "std", feature = "alloc"))]
Expand Down
2 changes: 1 addition & 1 deletion maybe_xml/src/lexer/scanner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -348,7 +348,7 @@ const fn scan_cdata(input: &[u8]) -> Option<usize> {
#[inline]
#[must_use]
pub(super) const fn scan(input: &[u8], pos: usize) -> Option<usize> {
if input.len() <= pos {
if input.len() == pos {
return None;
}

Expand Down

0 comments on commit 82f5767

Please sign in to comment.