From 24b5d3afd75a6cdbaca1dc488d43b90f159108cb Mon Sep 17 00:00:00 2001 From: Simon Mazur Date: Thu, 24 Sep 2015 02:03:47 +0300 Subject: [PATCH 1/4] Improve speed of `fmt::Debug` for `str` and `char` fixes #26920 --- src/libcore/char.rs | 10 ++++++++++ src/libcore/fmt/mod.rs | 22 +++++++++++++++------- 2 files changed, 25 insertions(+), 7 deletions(-) diff --git a/src/libcore/char.rs b/src/libcore/char.rs index dfcbfd476bc3f..a697c8c320e30 100644 --- a/src/libcore/char.rs +++ b/src/libcore/char.rs @@ -147,6 +147,7 @@ pub trait CharExt { fn to_digit(self, radix: u32) -> Option; fn escape_unicode(self) -> EscapeUnicode; fn escape_default(self) -> EscapeDefault; + fn needs_escape_default(self) -> bool; fn len_utf8(self) -> usize; fn len_utf16(self) -> usize; fn encode_utf8(self, dst: &mut [u8]) -> Option; @@ -194,6 +195,15 @@ impl CharExt for char { EscapeDefault { state: init_state } } + #[inline] + fn needs_escape_default(self) -> bool { + match self { + '\\' | '\'' | '"' => true, + '\x20' ... '\x7e' => false, + _ => true + } + } + #[inline] fn len_utf8(self) -> usize { let code = self as u32; diff --git a/src/libcore/fmt/mod.rs b/src/libcore/fmt/mod.rs index db7e6d3006f3f..cbcb3026a8e4c 100644 --- a/src/libcore/fmt/mod.rs +++ b/src/libcore/fmt/mod.rs @@ -1310,11 +1310,20 @@ impl Display for bool { #[stable(feature = "rust1", since = "1.0.0")] impl Debug for str { fn fmt(&self, f: &mut Formatter) -> Result { - try!(write!(f, "\"")); - for c in self.chars().flat_map(|c| c.escape_default()) { - try!(f.write_char(c)) + try!(f.write_char('"')); + let mut from = 0; + for (i, c) in self.char_indices() { + // If char needs escaping, flush backlog so far and write, else skip + if c.needs_escape_default() { + try!(f.write_str(&self[from..i])); + for e in c.escape_default() { + try!(f.write_char(e)); + } + from = i + c.len_utf8(); + } } - write!(f, "\"") + try!(f.write_str(&self[from..])); + f.write_char('"') } } @@ -1328,12 +1337,11 @@ impl Display for str { #[stable(feature = "rust1", since = "1.0.0")] impl Debug for char { fn fmt(&self, f: &mut Formatter) -> Result { - use char::CharExt; - try!(write!(f, "'")); + try!(f.write_char('\'')); for c in self.escape_default() { try!(f.write_char(c)) } - write!(f, "'") + f.write_char('\'') } } From 025ca11ab909a2f2eda4ae8ccb891acf83b296f3 Mon Sep 17 00:00:00 2001 From: Simon Mazur Date: Tue, 29 Sep 2015 15:53:58 +0300 Subject: [PATCH 2/4] Add `fmt::Debug` string escape tests --- src/test/run-pass/ifmt.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/test/run-pass/ifmt.rs b/src/test/run-pass/ifmt.rs index c8adb6ccc0ab8..2cc033b8a46e1 100644 --- a/src/test/run-pass/ifmt.rs +++ b/src/test/run-pass/ifmt.rs @@ -74,6 +74,10 @@ pub fn main() { t!(format!("{:?}", 10_usize), "10"); t!(format!("{:?}", "true"), "\"true\""); t!(format!("{:?}", "foo\nbar"), "\"foo\\nbar\""); + t!(format!("{:?}", "foo\n\"bar\"\r\n\'baz\'\t\\qux\\"), + r#""foo\n\"bar\"\r\n\'baz\'\t\\qux\\""#); + t!(format!("{:?}", "foo\0bar\x01baz\u{3b1}q\u{75}x"), + r#""foo\u{0}bar\u{1}baz\u{3b1}qux""#); t!(format!("{:o}", 10_usize), "12"); t!(format!("{:x}", 10_usize), "a"); t!(format!("{:X}", 10_usize), "A"); From d2d08721bebe5c200a1db1173a2e5f109393c725 Mon Sep 17 00:00:00 2001 From: Simon Mazur Date: Tue, 29 Sep 2015 21:25:40 +0300 Subject: [PATCH 3/4] Implement `size_hint` for `EscapeDefault` --- src/libcore/char.rs | 23 ++++++++++------------- src/libcore/fmt/mod.rs | 7 ++++--- 2 files changed, 14 insertions(+), 16 deletions(-) diff --git a/src/libcore/char.rs b/src/libcore/char.rs index a697c8c320e30..8d1af46691c70 100644 --- a/src/libcore/char.rs +++ b/src/libcore/char.rs @@ -147,7 +147,6 @@ pub trait CharExt { fn to_digit(self, radix: u32) -> Option; fn escape_unicode(self) -> EscapeUnicode; fn escape_default(self) -> EscapeDefault; - fn needs_escape_default(self) -> bool; fn len_utf8(self) -> usize; fn len_utf16(self) -> usize; fn encode_utf8(self, dst: &mut [u8]) -> Option; @@ -186,24 +185,13 @@ impl CharExt for char { '\t' => EscapeDefaultState::Backslash('t'), '\r' => EscapeDefaultState::Backslash('r'), '\n' => EscapeDefaultState::Backslash('n'), - '\\' => EscapeDefaultState::Backslash('\\'), - '\'' => EscapeDefaultState::Backslash('\''), - '"' => EscapeDefaultState::Backslash('"'), + '\\' | '\'' | '"' => EscapeDefaultState::Backslash(self), '\x20' ... '\x7e' => EscapeDefaultState::Char(self), _ => EscapeDefaultState::Unicode(self.escape_unicode()) }; EscapeDefault { state: init_state } } - #[inline] - fn needs_escape_default(self) -> bool { - match self { - '\\' | '\'' | '"' => true, - '\x20' ... '\x7e' => false, - _ => true - } - } - #[inline] fn len_utf8(self) -> usize { let code = self as u32; @@ -390,4 +378,13 @@ impl Iterator for EscapeDefault { EscapeDefaultState::Unicode(ref mut iter) => iter.next() } } + + fn size_hint(&self) -> (usize, Option) { + match self.state { + EscapeDefaultState::Char(_) => (1, Some(1)), + EscapeDefaultState::Backslash(_) => (2, Some(2)), + EscapeDefaultState::Unicode(_) => (0, Some(10)), + _ => (0, Some(0)) + } + } } diff --git a/src/libcore/fmt/mod.rs b/src/libcore/fmt/mod.rs index cbcb3026a8e4c..88b3963df8d9b 100644 --- a/src/libcore/fmt/mod.rs +++ b/src/libcore/fmt/mod.rs @@ -1313,11 +1313,12 @@ impl Debug for str { try!(f.write_char('"')); let mut from = 0; for (i, c) in self.char_indices() { + let esc = c.escape_default(); // If char needs escaping, flush backlog so far and write, else skip - if c.needs_escape_default() { + if esc.size_hint().0 != 1 { try!(f.write_str(&self[from..i])); - for e in c.escape_default() { - try!(f.write_char(e)); + for c in esc { + try!(f.write_char(c)); } from = i + c.len_utf8(); } From 0294098d8f3e2926cf0e6adba9a96e47099c3c0f Mon Sep 17 00:00:00 2001 From: Simon Mazur Date: Wed, 30 Sep 2015 20:31:41 +0300 Subject: [PATCH 4/4] Implement `size_hint` for `EscapeUnicode` --- src/libcore/char.rs | 22 +++++++++++++++++++--- src/libcore/fmt/mod.rs | 2 +- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/src/libcore/char.rs b/src/libcore/char.rs index 8d1af46691c70..ccce2ad22ddc2 100644 --- a/src/libcore/char.rs +++ b/src/libcore/char.rs @@ -342,6 +342,22 @@ impl Iterator for EscapeUnicode { EscapeUnicodeState::Done => None, } } + + fn size_hint(&self) -> (usize, Option) { + let mut n = 0; + while (self.c as usize) >> (4 * (n + 1)) != 0 { + n += 1; + } + let n = match self.state { + EscapeUnicodeState::Backslash => n + 5, + EscapeUnicodeState::Type => n + 4, + EscapeUnicodeState::LeftBrace => n + 3, + EscapeUnicodeState::Value(offset) => offset + 2, + EscapeUnicodeState::RightBrace => 1, + EscapeUnicodeState::Done => 0, + }; + (n, Some(n)) + } } /// An iterator over the characters that represent a `char`, escaped @@ -375,7 +391,7 @@ impl Iterator for EscapeDefault { Some(c) } EscapeDefaultState::Done => None, - EscapeDefaultState::Unicode(ref mut iter) => iter.next() + EscapeDefaultState::Unicode(ref mut iter) => iter.next(), } } @@ -383,8 +399,8 @@ impl Iterator for EscapeDefault { match self.state { EscapeDefaultState::Char(_) => (1, Some(1)), EscapeDefaultState::Backslash(_) => (2, Some(2)), - EscapeDefaultState::Unicode(_) => (0, Some(10)), - _ => (0, Some(0)) + EscapeDefaultState::Unicode(ref iter) => iter.size_hint(), + EscapeDefaultState::Done => (0, Some(0)), } } } diff --git a/src/libcore/fmt/mod.rs b/src/libcore/fmt/mod.rs index 88b3963df8d9b..e226b5f2ca8fc 100644 --- a/src/libcore/fmt/mod.rs +++ b/src/libcore/fmt/mod.rs @@ -1315,7 +1315,7 @@ impl Debug for str { for (i, c) in self.char_indices() { let esc = c.escape_default(); // If char needs escaping, flush backlog so far and write, else skip - if esc.size_hint().0 != 1 { + if esc.size_hint() != (1, Some(1)) { try!(f.write_str(&self[from..i])); for c in esc { try!(f.write_char(c));