1
- //! Utilities for validating string and char literals and turning them into
2
- //! values they represent.
1
+ //! Utilities for validating (raw) string, char, and byte literals and
2
+ //! turning escape sequences into the values they represent.
3
3
4
4
use std:: ffi:: CStr ;
5
5
use std:: ops:: Range ;
@@ -8,9 +8,9 @@ use std::str::Chars;
8
8
#[ cfg( test) ]
9
9
mod tests;
10
10
11
- /// Errors and warnings that can occur during string unescaping. They mostly
12
- /// relate to malformed escape sequences, but there are a few that are about
13
- /// other problems.
11
+ /// Errors and warnings that can occur during string, char, and byte unescaping.
12
+ ///
13
+ /// Mostly relating to malformed escape sequences, but also a few other problems.
14
14
#[ derive( Debug , PartialEq , Eq ) ]
15
15
pub enum EscapeError {
16
16
/// Expected 1 char, but 0 were found.
@@ -58,7 +58,7 @@ pub enum EscapeError {
58
58
/// Non-ascii character in byte literal, byte string literal, or raw byte string literal.
59
59
NonAsciiCharInByte ,
60
60
61
- // `\0` in a C string literal.
61
+ /// `\0` in a C string literal.
62
62
NulInCStr ,
63
63
64
64
/// After a line ending with '\', the next line contains whitespace
@@ -79,6 +79,8 @@ impl EscapeError {
79
79
}
80
80
}
81
81
82
+ /// Check a raw string literal for validity
83
+ ///
82
84
/// Takes the contents of a raw string literal (without quotes)
83
85
/// and produces a sequence of characters or errors,
84
86
/// which are returned by invoking `callback`.
@@ -87,6 +89,8 @@ pub fn check_raw_str(src: &str, callback: impl FnMut(Range<usize>, Result<char,
87
89
str:: check_raw ( src, callback) ;
88
90
}
89
91
92
+ /// Check a raw byte string literal for validity
93
+ ///
90
94
/// Takes the contents of a raw byte string literal (without quotes)
91
95
/// and produces a sequence of bytes or errors,
92
96
/// which are returned by invoking `callback`.
@@ -95,6 +99,8 @@ pub fn check_raw_byte_str(src: &str, callback: impl FnMut(Range<usize>, Result<u
95
99
<[ u8 ] >:: check_raw ( src, callback) ;
96
100
}
97
101
102
+ /// Check a raw C string literal for validity
103
+ ///
98
104
/// Takes the contents of a raw C string literal (without quotes)
99
105
/// and produces a sequence of characters or errors,
100
106
/// which are returned by invoking `callback`.
@@ -103,7 +109,7 @@ pub fn check_raw_c_str(src: &str, callback: impl FnMut(Range<usize>, Result<char
103
109
CStr :: check_raw ( src, callback) ;
104
110
}
105
111
106
- /// trait for checking raw strings
112
+ /// Trait for checking raw string literals for validity
107
113
trait CheckRaw {
108
114
/// Unit type of the implementing string type (`char` for string, `u8` for byte string)
109
115
type RawUnit ;
@@ -161,6 +167,7 @@ impl CheckRaw for [u8] {
161
167
}
162
168
}
163
169
170
+ /// Turn an ascii char into a byte
164
171
fn char2byte ( c : char ) -> Result < u8 , EscapeError > {
165
172
// do NOT do: c.try_into().ok_or(EscapeError::NonAsciiCharInByte)
166
173
if c. is_ascii ( ) {
@@ -182,32 +189,42 @@ impl CheckRaw for CStr {
182
189
}
183
190
}
184
191
192
+ /// Unescape a char literal
193
+ ///
185
194
/// Takes the contents of a char literal (without quotes),
186
195
/// and returns an unescaped char or an error.
187
196
pub fn unescape_char ( src : & str ) -> Result < char , EscapeError > {
188
197
str:: unescape_single ( & mut src. chars ( ) )
189
198
}
190
199
200
+ /// Unescape a byte literal
201
+ ///
191
202
/// Takes the contents of a byte literal (without quotes),
192
203
/// and returns an unescaped byte or an error.
193
204
pub fn unescape_byte ( src : & str ) -> Result < u8 , EscapeError > {
194
205
<[ u8 ] >:: unescape_single ( & mut src. chars ( ) )
195
206
}
196
207
208
+ /// Unescape a string literal
209
+ ///
197
210
/// Takes the contents of a string literal (without quotes)
198
211
/// and produces a sequence of escaped characters or errors,
199
212
/// which are returned by invoking `callback`.
200
213
pub fn unescape_str ( src : & str , callback : impl FnMut ( Range < usize > , Result < char , EscapeError > ) ) {
201
214
str:: unescape ( src, callback)
202
215
}
203
216
217
+ /// Unescape a byte string literal
218
+ ///
204
219
/// Takes the contents of a byte string literal (without quotes)
205
220
/// and produces a sequence of escaped bytes or errors,
206
221
/// which are returned by invoking `callback`.
207
222
pub fn unescape_byte_str ( src : & str , callback : impl FnMut ( Range < usize > , Result < u8 , EscapeError > ) ) {
208
223
<[ u8 ] >:: unescape ( src, callback)
209
224
}
210
225
226
+ /// Unescape a C string literal
227
+ ///
211
228
/// Takes the contents of a C string literal (without quotes)
212
229
/// and produces a sequence of escaped MixedUnits or errors,
213
230
/// which are returned by invoking `callback`.
@@ -218,6 +235,8 @@ pub fn unescape_c_str(
218
235
CStr :: unescape ( src, callback)
219
236
}
220
237
238
+ /// Enum representing either a char or a byte
239
+ ///
221
240
/// Used for mixed utf8 string literals, i.e. those that allow both unicode
222
241
/// chars and high bytes.
223
242
#[ derive( Copy , Clone , Debug , PartialEq , Eq ) ]
@@ -254,7 +273,7 @@ impl From<u8> for MixedUnit {
254
273
}
255
274
}
256
275
257
- /// trait for unescaping escape sequences in strings
276
+ /// Trait for unescaping escape sequences in strings
258
277
trait Unescape {
259
278
/// Unit type of the implementing string type (`char` for string, `u8` for byte string)
260
279
type Unit : From < u8 > ;
@@ -307,7 +326,9 @@ trait Unescape {
307
326
}
308
327
}
309
328
310
- /// Takes the contents of a raw literal (without quotes)
329
+ /// Unescape a string literal
330
+ ///
331
+ /// Takes the contents of a raw string literal (without quotes)
311
332
/// and produces a sequence of `Result<Self::Unit, EscapeError>`
312
333
/// which are returned via `callback`.
313
334
fn unescape (
@@ -340,7 +361,9 @@ trait Unescape {
340
361
}
341
362
}
342
363
343
- /// Parse the character of an ASCII escape (except nul) without the leading backslash.
364
+ /// Interpret a non-nul ASCII escape
365
+ ///
366
+ /// Parses the character of an ASCII escape (except nul) without the leading backslash.
344
367
fn simple_escape ( c : char ) -> Result < u8 , char > {
345
368
// Previous character was '\\', unescape what follows.
346
369
Ok ( match c {
@@ -354,7 +377,9 @@ fn simple_escape(c: char) -> Result<u8, char> {
354
377
} )
355
378
}
356
379
357
- /// Parse the two hexadecimal characters of a hexadecimal escape without the leading r"\x".
380
+ /// Interpret a hexadecimal escape
381
+ ///
382
+ /// Parses the two hexadecimal characters of a hexadecimal escape without the leading r"\x".
358
383
fn hex_escape ( chars : & mut impl Iterator < Item = char > ) -> Result < u8 , EscapeError > {
359
384
let hi = chars. next ( ) . ok_or ( EscapeError :: TooShortHexEscape ) ?;
360
385
let hi = hi. to_digit ( 16 ) . ok_or ( EscapeError :: InvalidCharInHexEscape ) ?;
@@ -365,6 +390,8 @@ fn hex_escape(chars: &mut impl Iterator<Item = char>) -> Result<u8, EscapeError>
365
390
Ok ( ( hi * 16 + lo) as u8 )
366
391
}
367
392
393
+ /// Interpret a unicode escape
394
+ ///
368
395
/// Parse the braces with hexadecimal characters (and underscores) part of a unicode escape.
369
396
/// This r"{...}" normally comes after r"\u" and cannot start with an underscore.
370
397
fn unicode_escape ( chars : & mut impl Iterator < Item = char > ) -> Result < u32 , EscapeError > {
@@ -412,6 +439,8 @@ fn unicode_escape(chars: &mut impl Iterator<Item = char>) -> Result<u32, EscapeE
412
439
}
413
440
}
414
441
442
+ /// Interpret a string continuation escape (https://doc.rust-lang.org/reference/expressions/literal-expr.html#string-continuation-escapes)
443
+ ///
415
444
/// Skip ASCII whitespace, except for the formfeed character
416
445
/// (see [this issue](https://github.com/rust-lang/rust/issues/136600)).
417
446
/// Warns on unescaped newline and following non-ASCII whitespace.
@@ -513,7 +542,7 @@ impl Unescape for CStr {
513
542
}
514
543
}
515
544
516
- /// What kind of literal do we parse.
545
+ /// Enum of the different kinds of literal
517
546
#[ derive( Debug , Clone , Copy , PartialEq ) ]
518
547
pub enum Mode {
519
548
Char ,
@@ -552,10 +581,14 @@ impl Mode {
552
581
}
553
582
}
554
583
584
+ /// Check a literal only for errors
585
+ ///
555
586
/// Takes the contents of a literal (without quotes)
556
- /// and produces a sequence of errors,
587
+ /// and produces a sequence of only errors,
557
588
/// which are returned by invoking `error_callback`.
558
- pub fn unescape_for_errors (
589
+ ///
590
+ /// NB Does not produce any output other than errors
591
+ pub fn check_for_errors (
559
592
src : & str ,
560
593
mode : Mode ,
561
594
mut error_callback : impl FnMut ( Range < usize > , EscapeError ) ,
0 commit comments