Skip to content

Commit 702b0dc

Browse files
committed
rename unescape_for_errors -> check_for_errors, and improve docs
1 parent 67eadd0 commit 702b0dc

File tree

2 files changed

+48
-15
lines changed

2 files changed

+48
-15
lines changed

CHANGELOG.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
- Add `check_raw_str`, `check_raw_byte_str`, `check_raw_c_str`,
44
- Add `unescape_str`, `unescape_byte_str`, `unescape_c_str`,
5-
- Add `unescape_for_errors`,
5+
- Add `check_for_errors`,
66
- Remove: `unescape_unicode` and `unescape_mixed`
77

88
# 0.0.3

src/lib.rs

Lines changed: 47 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
//! Utilities for validating string and char literals and turning them into
2-
//! values they represent.
1+
//! Utilities for validating (raw) string, char, and byte literals and
2+
//! turning escape sequences into the values they represent.
33
44
use std::ffi::CStr;
55
use std::ops::Range;
@@ -8,9 +8,9 @@ use std::str::Chars;
88
#[cfg(test)]
99
mod tests;
1010

11-
/// Errors and warnings that can occur during string unescaping. They mostly
12-
/// relate to malformed escape sequences, but there are a few that are about
13-
/// other problems.
11+
/// Errors and warnings that can occur during string, char, and byte unescaping.
12+
///
13+
/// Mostly relating to malformed escape sequences, but also a few other problems.
1414
#[derive(Debug, PartialEq, Eq)]
1515
pub enum EscapeError {
1616
/// Expected 1 char, but 0 were found.
@@ -58,7 +58,7 @@ pub enum EscapeError {
5858
/// Non-ascii character in byte literal, byte string literal, or raw byte string literal.
5959
NonAsciiCharInByte,
6060

61-
// `\0` in a C string literal.
61+
/// `\0` in a C string literal.
6262
NulInCStr,
6363

6464
/// After a line ending with '\', the next line contains whitespace
@@ -79,6 +79,8 @@ impl EscapeError {
7979
}
8080
}
8181

82+
/// Check a raw string literal for validity
83+
///
8284
/// Takes the contents of a raw string literal (without quotes)
8385
/// and produces a sequence of characters or errors,
8486
/// which are returned by invoking `callback`.
@@ -87,6 +89,8 @@ pub fn check_raw_str(src: &str, callback: impl FnMut(Range<usize>, Result<char,
8789
str::check_raw(src, callback);
8890
}
8991

92+
/// Check a raw byte string literal for validity
93+
///
9094
/// Takes the contents of a raw byte string literal (without quotes)
9195
/// and produces a sequence of bytes or errors,
9296
/// which are returned by invoking `callback`.
@@ -95,6 +99,8 @@ pub fn check_raw_byte_str(src: &str, callback: impl FnMut(Range<usize>, Result<u
9599
<[u8]>::check_raw(src, callback);
96100
}
97101

102+
/// Check a raw C string literal for validity
103+
///
98104
/// Takes the contents of a raw C string literal (without quotes)
99105
/// and produces a sequence of characters or errors,
100106
/// which are returned by invoking `callback`.
@@ -103,7 +109,7 @@ pub fn check_raw_c_str(src: &str, callback: impl FnMut(Range<usize>, Result<char
103109
CStr::check_raw(src, callback);
104110
}
105111

106-
/// trait for checking raw strings
112+
/// Trait for checking raw string literals for validity
107113
trait CheckRaw {
108114
/// Unit type of the implementing string type (`char` for string, `u8` for byte string)
109115
type RawUnit;
@@ -161,6 +167,7 @@ impl CheckRaw for [u8] {
161167
}
162168
}
163169

170+
/// Turn an ascii char into a byte
164171
fn char2byte(c: char) -> Result<u8, EscapeError> {
165172
// do NOT do: c.try_into().ok_or(EscapeError::NonAsciiCharInByte)
166173
if c.is_ascii() {
@@ -182,32 +189,42 @@ impl CheckRaw for CStr {
182189
}
183190
}
184191

192+
/// Unescape a char literal
193+
///
185194
/// Takes the contents of a char literal (without quotes),
186195
/// and returns an unescaped char or an error.
187196
pub fn unescape_char(src: &str) -> Result<char, EscapeError> {
188197
str::unescape_single(&mut src.chars())
189198
}
190199

200+
/// Unescape a byte literal
201+
///
191202
/// Takes the contents of a byte literal (without quotes),
192203
/// and returns an unescaped byte or an error.
193204
pub fn unescape_byte(src: &str) -> Result<u8, EscapeError> {
194205
<[u8]>::unescape_single(&mut src.chars())
195206
}
196207

208+
/// Unescape a string literal
209+
///
197210
/// Takes the contents of a string literal (without quotes)
198211
/// and produces a sequence of escaped characters or errors,
199212
/// which are returned by invoking `callback`.
200213
pub fn unescape_str(src: &str, callback: impl FnMut(Range<usize>, Result<char, EscapeError>)) {
201214
str::unescape(src, callback)
202215
}
203216

217+
/// Unescape a byte string literal
218+
///
204219
/// Takes the contents of a byte string literal (without quotes)
205220
/// and produces a sequence of escaped bytes or errors,
206221
/// which are returned by invoking `callback`.
207222
pub fn unescape_byte_str(src: &str, callback: impl FnMut(Range<usize>, Result<u8, EscapeError>)) {
208223
<[u8]>::unescape(src, callback)
209224
}
210225

226+
/// Unescape a C string literal
227+
///
211228
/// Takes the contents of a C string literal (without quotes)
212229
/// and produces a sequence of escaped MixedUnits or errors,
213230
/// which are returned by invoking `callback`.
@@ -218,6 +235,8 @@ pub fn unescape_c_str(
218235
CStr::unescape(src, callback)
219236
}
220237

238+
/// Enum representing either a char or a byte
239+
///
221240
/// Used for mixed utf8 string literals, i.e. those that allow both unicode
222241
/// chars and high bytes.
223242
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
@@ -254,7 +273,7 @@ impl From<u8> for MixedUnit {
254273
}
255274
}
256275

257-
/// trait for unescaping escape sequences in strings
276+
/// Trait for unescaping escape sequences in strings
258277
trait Unescape {
259278
/// Unit type of the implementing string type (`char` for string, `u8` for byte string)
260279
type Unit: From<u8>;
@@ -307,7 +326,9 @@ trait Unescape {
307326
}
308327
}
309328

310-
/// Takes the contents of a raw literal (without quotes)
329+
/// Unescape a string literal
330+
///
331+
/// Takes the contents of a raw string literal (without quotes)
311332
/// and produces a sequence of `Result<Self::Unit, EscapeError>`
312333
/// which are returned via `callback`.
313334
fn unescape(
@@ -340,7 +361,9 @@ trait Unescape {
340361
}
341362
}
342363

343-
/// Parse the character of an ASCII escape (except nul) without the leading backslash.
364+
/// Interpret a non-nul ASCII escape
365+
///
366+
/// Parses the character of an ASCII escape (except nul) without the leading backslash.
344367
fn simple_escape(c: char) -> Result<u8, char> {
345368
// Previous character was '\\', unescape what follows.
346369
Ok(match c {
@@ -354,7 +377,9 @@ fn simple_escape(c: char) -> Result<u8, char> {
354377
})
355378
}
356379

357-
/// Parse the two hexadecimal characters of a hexadecimal escape without the leading r"\x".
380+
/// Interpret a hexadecimal escape
381+
///
382+
/// Parses the two hexadecimal characters of a hexadecimal escape without the leading r"\x".
358383
fn hex_escape(chars: &mut impl Iterator<Item = char>) -> Result<u8, EscapeError> {
359384
let hi = chars.next().ok_or(EscapeError::TooShortHexEscape)?;
360385
let hi = hi.to_digit(16).ok_or(EscapeError::InvalidCharInHexEscape)?;
@@ -365,6 +390,8 @@ fn hex_escape(chars: &mut impl Iterator<Item = char>) -> Result<u8, EscapeError>
365390
Ok((hi * 16 + lo) as u8)
366391
}
367392

393+
/// Interpret a unicode escape
394+
///
368395
/// Parse the braces with hexadecimal characters (and underscores) part of a unicode escape.
369396
/// This r"{...}" normally comes after r"\u" and cannot start with an underscore.
370397
fn unicode_escape(chars: &mut impl Iterator<Item = char>) -> Result<u32, EscapeError> {
@@ -412,6 +439,8 @@ fn unicode_escape(chars: &mut impl Iterator<Item = char>) -> Result<u32, EscapeE
412439
}
413440
}
414441

442+
/// Interpret a string continuation escape (https://doc.rust-lang.org/reference/expressions/literal-expr.html#string-continuation-escapes)
443+
///
415444
/// Skip ASCII whitespace, except for the formfeed character
416445
/// (see [this issue](https://github.com/rust-lang/rust/issues/136600)).
417446
/// Warns on unescaped newline and following non-ASCII whitespace.
@@ -513,7 +542,7 @@ impl Unescape for CStr {
513542
}
514543
}
515544

516-
/// What kind of literal do we parse.
545+
/// Enum of the different kinds of literal
517546
#[derive(Debug, Clone, Copy, PartialEq)]
518547
pub enum Mode {
519548
Char,
@@ -552,10 +581,14 @@ impl Mode {
552581
}
553582
}
554583

584+
/// Check a literal only for errors
585+
///
555586
/// Takes the contents of a literal (without quotes)
556-
/// and produces a sequence of errors,
587+
/// and produces a sequence of only errors,
557588
/// which are returned by invoking `error_callback`.
558-
pub fn unescape_for_errors(
589+
///
590+
/// NB Does not produce any output other than errors
591+
pub fn check_for_errors(
559592
src: &str,
560593
mode: Mode,
561594
mut error_callback: impl FnMut(Range<usize>, EscapeError),

0 commit comments

Comments
 (0)