diff --git a/src/etc/char_private.py b/src/etc/char_private.py new file mode 100644 index 0000000000000..3566d143529be --- /dev/null +++ b/src/etc/char_private.py @@ -0,0 +1,154 @@ +#!/usr/bin/env python +# +# Copyright 2011-2016 The Rust Project Developers. See the COPYRIGHT +# file at the top-level directory of this distribution and at +# http://rust-lang.org/COPYRIGHT. +# +# Licensed under the Apache License, Version 2.0 or the MIT license +# , at your +# option. This file may not be copied, modified, or distributed +# except according to those terms. + +# This script uses the following Unicode tables: +# - Categories.txt + +import os +import subprocess + +def to_ranges(iter): + current = None + for i in iter: + if current is None or i != current[1] or i in (0x10000, 0x20000): + if current is not None: + yield tuple(current) + current = [i, i + 1] + else: + current[1] += 1 + if current is not None: + yield tuple(current) + +def get_escaped(dictionary): + for i in range(0x110000): + if dictionary.get(i, "Cn") in "Cc Cf Cs Co Cn Zl Zp Zs".split() and i != ord(' '): + yield i + +def get_file(f): + try: + return open(os.path.basename(f)) + except FileNotFoundError: + subprocess.run(["curl", "-O", f], check=True) + return open(os.path.basename(f)) + +def main(): + file = get_file("http://www.unicode.org/notes/tn36/Categories.txt") + + dictionary = {int(line.split()[0], 16): line.split()[1] for line in file} + + CUTOFF=0x10000 + singletons0 = [] + singletons1 = [] + normal0 = [] + normal1 = [] + extra = [] + + for a, b in to_ranges(get_escaped(dictionary)): + if a > 2 * CUTOFF: + extra.append((a, b - a)) + elif a == b - 1: + if a & CUTOFF: + singletons1.append(a & ~CUTOFF) + else: + singletons0.append(a) + elif a == b - 2: + if a & CUTOFF: + singletons1.append(a & ~CUTOFF) + singletons1.append((a + 1) & ~CUTOFF) + else: + singletons0.append(a) + singletons0.append(a + 1) + else: + if a >= 2 * CUTOFF: + extra.append((a, b - a)) + elif a & CUTOFF: + normal1.append((a & ~CUTOFF, b - a)) + else: + normal0.append((a, b - a)) + + print("""\ +// Copyright 2012-2016 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +// NOTE: The following code was generated by "src/etc/char_private.py", +// do not edit directly! + +use slice::SliceExt; + +fn check(x: u16, singletons: &[u16], normal: &[u16]) -> bool { + for &s in singletons { + if x == s { + return false; + } else if x < s { + break; + } + } + for w in normal.chunks(2) { + let start = w[0]; + let len = w[1]; + let difference = (x as i32) - (start as i32); + if 0 <= difference { + if difference < len as i32 { + return false; + } + } else { + break; + } + } + true +} + +pub fn is_printable(x: char) -> bool { + let x = x as u32; + let lower = x as u16; + if x < 0x10000 { + check(lower, SINGLETONS0, NORMAL0) + } else if x < 0x20000 { + check(lower, SINGLETONS1, NORMAL1) + } else {\ +""") + for a, b in extra: + print(" if 0x{:x} <= x && x < 0x{:x} {{".format(a, a + b)) + print(" return false;") + print(" }") + print("""\ + true + } +}\ +""") + print() + print("const SINGLETONS0: &'static [u16] = &[") + for s in singletons0: + print(" 0x{:x},".format(s)) + print("];") + print("const SINGLETONS1: &'static [u16] = &[") + for s in singletons1: + print(" 0x{:x},".format(s)) + print("];") + print("const NORMAL0: &'static [u16] = &[") + for a, b in normal0: + print(" 0x{:x}, 0x{:x},".format(a, b)) + print("];") + print("const NORMAL1: &'static [u16] = &[") + for a, b in normal1: + print(" 0x{:x}, 0x{:x},".format(a, b)) + print("];") + +if __name__ == '__main__': + main() diff --git a/src/libcollections/lib.rs b/src/libcollections/lib.rs index f027d074cb6f0..7fc6e54d69f73 100644 --- a/src/libcollections/lib.rs +++ b/src/libcollections/lib.rs @@ -33,6 +33,7 @@ #![feature(allow_internal_unstable)] #![feature(box_patterns)] #![feature(box_syntax)] +#![cfg_attr(not(test), feature(char_escape_debug))] #![feature(core_intrinsics)] #![feature(dropck_parametricity)] #![feature(fmt_internals)] diff --git a/src/libcollections/str.rs b/src/libcollections/str.rs index 55308a46f0ac5..4c64019de097e 100644 --- a/src/libcollections/str.rs +++ b/src/libcollections/str.rs @@ -1697,6 +1697,14 @@ impl str { return s; } + /// Escapes each char in `s` with `char::escape_debug`. + #[unstable(feature = "str_escape", + reason = "return type may change to be an iterator", + issue = "27791")] + pub fn escape_debug(&self) -> String { + self.chars().flat_map(|c| c.escape_debug()).collect() + } + /// Escapes each char in `s` with `char::escape_default`. #[unstable(feature = "str_escape", reason = "return type may change to be an iterator", diff --git a/src/libcollectionstest/str.rs b/src/libcollectionstest/str.rs index 07428f3f8b2d8..a61925cd3be5a 100644 --- a/src/libcollectionstest/str.rs +++ b/src/libcollectionstest/str.rs @@ -703,16 +703,32 @@ fn test_escape_unicode() { assert_eq!("\u{1d4ea}\r".escape_unicode(), "\\u{1d4ea}\\u{d}"); } +#[test] +fn test_escape_debug() { + assert_eq!("abc".escape_debug(), "abc"); + assert_eq!("a c".escape_debug(), "a c"); + assert_eq!("éèê".escape_debug(), "éèê"); + assert_eq!("\r\n\t".escape_debug(), "\\r\\n\\t"); + assert_eq!("'\"\\".escape_debug(), "\\'\\\"\\\\"); + assert_eq!("\u{7f}\u{ff}".escape_debug(), "\\u{7f}\u{ff}"); + assert_eq!("\u{100}\u{ffff}".escape_debug(), "\u{100}\\u{ffff}"); + assert_eq!("\u{10000}\u{10ffff}".escape_debug(), "\u{10000}\\u{10ffff}"); + assert_eq!("ab\u{200b}".escape_debug(), "ab\\u{200b}"); + assert_eq!("\u{10d4ea}\r".escape_debug(), "\\u{10d4ea}\\r"); +} + #[test] fn test_escape_default() { assert_eq!("abc".escape_default(), "abc"); assert_eq!("a c".escape_default(), "a c"); + assert_eq!("éèê".escape_default(), "\\u{e9}\\u{e8}\\u{ea}"); assert_eq!("\r\n\t".escape_default(), "\\r\\n\\t"); assert_eq!("'\"\\".escape_default(), "\\'\\\"\\\\"); + assert_eq!("\u{7f}\u{ff}".escape_default(), "\\u{7f}\\u{ff}"); assert_eq!("\u{100}\u{ffff}".escape_default(), "\\u{100}\\u{ffff}"); assert_eq!("\u{10000}\u{10ffff}".escape_default(), "\\u{10000}\\u{10ffff}"); - assert_eq!("ab\u{fb00}".escape_default(), "ab\\u{fb00}"); - assert_eq!("\u{1d4ea}\r".escape_default(), "\\u{1d4ea}\\r"); + assert_eq!("ab\u{200b}".escape_default(), "ab\\u{200b}"); + assert_eq!("\u{10d4ea}\r".escape_default(), "\\u{10d4ea}\\r"); } #[test] diff --git a/src/libcore/char.rs b/src/libcore/char.rs index 0e7f04c775825..a3440fe8aa644 100644 --- a/src/libcore/char.rs +++ b/src/libcore/char.rs @@ -17,6 +17,7 @@ use prelude::v1::*; +use char_private::is_printable; use mem::transmute; // UTF-8 ranges and tags for encoding characters @@ -263,6 +264,8 @@ pub trait CharExt { fn escape_unicode(self) -> EscapeUnicode; #[stable(feature = "core", since = "1.6.0")] fn escape_default(self) -> EscapeDefault; + #[unstable(feature = "char_escape_debug", issue = "35068")] + fn escape_debug(self) -> EscapeDebug; #[stable(feature = "core", since = "1.6.0")] fn len_utf8(self) -> usize; #[stable(feature = "core", since = "1.6.0")] @@ -326,6 +329,19 @@ impl CharExt for char { EscapeDefault { state: init_state } } + #[inline] + fn escape_debug(self) -> EscapeDebug { + let init_state = match self { + '\t' => EscapeDefaultState::Backslash('t'), + '\r' => EscapeDefaultState::Backslash('r'), + '\n' => EscapeDefaultState::Backslash('n'), + '\\' | '\'' | '"' => EscapeDefaultState::Backslash(self), + c if is_printable(c) => EscapeDefaultState::Char(c), + c => EscapeDefaultState::Unicode(c.escape_unicode()), + }; + EscapeDebug(EscapeDefault { state: init_state }) + } + #[inline] fn len_utf8(self) -> usize { let code = self as u32; @@ -600,6 +616,27 @@ impl ExactSizeIterator for EscapeDefault { } } +/// An iterator that yields the literal escape code of a `char`. +/// +/// This `struct` is created by the [`escape_debug()`] method on [`char`]. See its +/// documentation for more. +/// +/// [`escape_debug()`]: ../../std/primitive.char.html#method.escape_debug +/// [`char`]: ../../std/primitive.char.html +#[unstable(feature = "char_escape_debug", issue = "35068")] +#[derive(Clone, Debug)] +pub struct EscapeDebug(EscapeDefault); + +#[unstable(feature = "char_escape_debug", issue = "35068")] +impl Iterator for EscapeDebug { + type Item = char; + fn next(&mut self) -> Option { self.0.next() } + fn size_hint(&self) -> (usize, Option) { self.0.size_hint() } +} + +#[unstable(feature = "char_escape_debug", issue = "35068")] +impl ExactSizeIterator for EscapeDebug { } + /// An iterator over `u8` entries represending the UTF-8 encoding of a `char` /// value. /// diff --git a/src/libcore/char_private.rs b/src/libcore/char_private.rs new file mode 100644 index 0000000000000..1d8f95cd4b81c --- /dev/null +++ b/src/libcore/char_private.rs @@ -0,0 +1,695 @@ +// Copyright 2012-2016 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +// NOTE: The following code was generated by "src/etc/char_private.py", +// do not edit directly! + +use slice::SliceExt; + +fn check(x: u16, singletons: &[u16], normal: &[u16]) -> bool { + for &s in singletons { + if x == s { + return false; + } else if x < s { + break; + } + } + for w in normal.chunks(2) { + let start = w[0]; + let len = w[1]; + let difference = (x as i32) - (start as i32); + if 0 <= difference { + if difference < len as i32 { + return false; + } + } else { + break; + } + } + true +} + +pub fn is_printable(x: char) -> bool { + let x = x as u32; + let lower = x as u16; + if x < 0x10000 { + check(lower, SINGLETONS0, NORMAL0) + } else if x < 0x20000 { + check(lower, SINGLETONS1, NORMAL1) + } else { + if 0x20000 <= x && x < 0x2f800 { + return false; + } + if 0x2fa1e <= x && x < 0xe0100 { + return false; + } + if 0xe01f0 <= x && x < 0x110000 { + return false; + } + true + } +} + +const SINGLETONS0: &'static [u16] = &[ + 0xad, + 0x378, + 0x379, + 0x38b, + 0x38d, + 0x3a2, + 0x557, + 0x558, + 0x560, + 0x588, + 0x590, + 0x61c, + 0x61d, + 0x6dd, + 0x70e, + 0x70f, + 0x74b, + 0x74c, + 0x82e, + 0x82f, + 0x83f, + 0x85c, + 0x85d, + 0x8a1, + 0x8ff, + 0x978, + 0x980, + 0x984, + 0x98d, + 0x98e, + 0x991, + 0x992, + 0x9a9, + 0x9b1, + 0x9ba, + 0x9bb, + 0x9c5, + 0x9c6, + 0x9c9, + 0x9ca, + 0x9de, + 0x9e4, + 0x9e5, + 0xa04, + 0xa11, + 0xa12, + 0xa29, + 0xa31, + 0xa34, + 0xa37, + 0xa3a, + 0xa3b, + 0xa3d, + 0xa49, + 0xa4a, + 0xa5d, + 0xa84, + 0xa8e, + 0xa92, + 0xaa9, + 0xab1, + 0xab4, + 0xaba, + 0xabb, + 0xac6, + 0xaca, + 0xace, + 0xacf, + 0xae4, + 0xae5, + 0xb04, + 0xb0d, + 0xb0e, + 0xb11, + 0xb12, + 0xb29, + 0xb31, + 0xb34, + 0xb3a, + 0xb3b, + 0xb45, + 0xb46, + 0xb49, + 0xb4a, + 0xb5e, + 0xb64, + 0xb65, + 0xb84, + 0xb91, + 0xb9b, + 0xb9d, + 0xbc9, + 0xbce, + 0xbcf, + 0xc04, + 0xc0d, + 0xc11, + 0xc29, + 0xc34, + 0xc45, + 0xc49, + 0xc57, + 0xc64, + 0xc65, + 0xc80, + 0xc81, + 0xc84, + 0xc8d, + 0xc91, + 0xca9, + 0xcb4, + 0xcba, + 0xcbb, + 0xcc5, + 0xcc9, + 0xcdf, + 0xce4, + 0xce5, + 0xcf0, + 0xd04, + 0xd0d, + 0xd11, + 0xd3b, + 0xd3c, + 0xd45, + 0xd49, + 0xd64, + 0xd65, + 0xd80, + 0xd81, + 0xd84, + 0xdb2, + 0xdbc, + 0xdbe, + 0xdbf, + 0xdd5, + 0xdd7, + 0xe83, + 0xe85, + 0xe86, + 0xe89, + 0xe8b, + 0xe8c, + 0xe98, + 0xea0, + 0xea4, + 0xea6, + 0xea8, + 0xea9, + 0xeac, + 0xeba, + 0xebe, + 0xebf, + 0xec5, + 0xec7, + 0xece, + 0xecf, + 0xeda, + 0xedb, + 0xf48, + 0xf98, + 0xfbd, + 0xfcd, + 0x10c6, + 0x10ce, + 0x10cf, + 0x1249, + 0x124e, + 0x124f, + 0x1257, + 0x1259, + 0x125e, + 0x125f, + 0x1289, + 0x128e, + 0x128f, + 0x12b1, + 0x12b6, + 0x12b7, + 0x12bf, + 0x12c1, + 0x12c6, + 0x12c7, + 0x12d7, + 0x1311, + 0x1316, + 0x1317, + 0x135b, + 0x135c, + 0x1680, + 0x170d, + 0x176d, + 0x1771, + 0x17de, + 0x17df, + 0x180e, + 0x180f, + 0x196e, + 0x196f, + 0x1a1c, + 0x1a1d, + 0x1a5f, + 0x1a7d, + 0x1a7e, + 0x1f16, + 0x1f17, + 0x1f1e, + 0x1f1f, + 0x1f46, + 0x1f47, + 0x1f4e, + 0x1f4f, + 0x1f58, + 0x1f5a, + 0x1f5c, + 0x1f5e, + 0x1f7e, + 0x1f7f, + 0x1fb5, + 0x1fc5, + 0x1fd4, + 0x1fd5, + 0x1fdc, + 0x1ff0, + 0x1ff1, + 0x1ff5, + 0x2072, + 0x2073, + 0x208f, + 0x2700, + 0x2c2f, + 0x2c5f, + 0x2d26, + 0x2d2e, + 0x2d2f, + 0x2da7, + 0x2daf, + 0x2db7, + 0x2dbf, + 0x2dc7, + 0x2dcf, + 0x2dd7, + 0x2ddf, + 0x2e9a, + 0x3040, + 0x3097, + 0x3098, + 0x318f, + 0x321f, + 0x32ff, + 0xa78f, + 0xa9ce, + 0xaa4e, + 0xaa4f, + 0xaa5a, + 0xaa5b, + 0xab07, + 0xab08, + 0xab0f, + 0xab10, + 0xab27, + 0xabee, + 0xabef, + 0xfa6e, + 0xfa6f, + 0xfb37, + 0xfb3d, + 0xfb3f, + 0xfb42, + 0xfb45, + 0xfd90, + 0xfd91, + 0xfdfe, + 0xfdff, + 0xfe53, + 0xfe67, + 0xfe75, + 0xffc8, + 0xffc9, + 0xffd0, + 0xffd1, + 0xffd8, + 0xffd9, + 0xffe7, + 0xfffe, + 0xffff, +]; +const SINGLETONS1: &'static [u16] = &[ + 0xc, + 0x27, + 0x3b, + 0x3e, + 0x4e, + 0x4f, + 0x31f, + 0x39e, + 0x49e, + 0x49f, + 0x806, + 0x807, + 0x809, + 0x836, + 0x83d, + 0x83e, + 0x856, + 0xa04, + 0xa14, + 0xa18, + 0xb56, + 0xb57, + 0x10bd, + 0x1135, + 0xd127, + 0xd128, + 0xd455, + 0xd49d, + 0xd4a0, + 0xd4a1, + 0xd4a3, + 0xd4a4, + 0xd4a7, + 0xd4a8, + 0xd4ad, + 0xd4ba, + 0xd4bc, + 0xd4c4, + 0xd506, + 0xd50b, + 0xd50c, + 0xd515, + 0xd51d, + 0xd53a, + 0xd53f, + 0xd545, + 0xd551, + 0xd6a6, + 0xd6a7, + 0xd7cc, + 0xd7cd, + 0xee04, + 0xee20, + 0xee23, + 0xee25, + 0xee26, + 0xee28, + 0xee33, + 0xee38, + 0xee3a, + 0xee48, + 0xee4a, + 0xee4c, + 0xee50, + 0xee53, + 0xee55, + 0xee56, + 0xee58, + 0xee5a, + 0xee5c, + 0xee5e, + 0xee60, + 0xee63, + 0xee65, + 0xee66, + 0xee6b, + 0xee73, + 0xee78, + 0xee7d, + 0xee7f, + 0xee8a, + 0xeea4, + 0xeeaa, + 0xf0af, + 0xf0b0, + 0xf0bf, + 0xf0c0, + 0xf0d0, + 0xf12f, + 0xf336, + 0xf3c5, + 0xf43f, + 0xf441, + 0xf4f8, + 0xf53e, + 0xf53f, +]; +const NORMAL0: &'static [u16] = &[ + 0x0, 0x20, + 0x7f, 0x22, + 0x37f, 0x5, + 0x528, 0x9, + 0x58b, 0x4, + 0x5c8, 0x8, + 0x5eb, 0x5, + 0x5f5, 0x11, + 0x7b2, 0xe, + 0x7fb, 0x5, + 0x85f, 0x41, + 0x8ad, 0x37, + 0x9b3, 0x3, + 0x9cf, 0x8, + 0x9d8, 0x4, + 0x9fc, 0x5, + 0xa0b, 0x4, + 0xa43, 0x4, + 0xa4e, 0x3, + 0xa52, 0x7, + 0xa5f, 0x7, + 0xa76, 0xb, + 0xad1, 0xf, + 0xaf2, 0xf, + 0xb4e, 0x8, + 0xb58, 0x4, + 0xb78, 0xa, + 0xb8b, 0x3, + 0xb96, 0x3, + 0xba0, 0x3, + 0xba5, 0x3, + 0xbab, 0x3, + 0xbba, 0x4, + 0xbc3, 0x3, + 0xbd1, 0x6, + 0xbd8, 0xe, + 0xbfb, 0x6, + 0xc3a, 0x3, + 0xc4e, 0x7, + 0xc5a, 0x6, + 0xc70, 0x8, + 0xcce, 0x7, + 0xcd7, 0x7, + 0xcf3, 0xf, + 0xd4f, 0x8, + 0xd58, 0x8, + 0xd76, 0x3, + 0xd97, 0x3, + 0xdc7, 0x3, + 0xdcb, 0x4, + 0xde0, 0x12, + 0xdf5, 0xc, + 0xe3b, 0x4, + 0xe5c, 0x25, + 0xe8e, 0x6, + 0xee0, 0x20, + 0xf6d, 0x4, + 0xfdb, 0x25, + 0x10c8, 0x5, + 0x137d, 0x3, + 0x139a, 0x6, + 0x13f5, 0xb, + 0x169d, 0x3, + 0x16f1, 0xf, + 0x1715, 0xb, + 0x1737, 0x9, + 0x1754, 0xc, + 0x1774, 0xc, + 0x17ea, 0x6, + 0x17fa, 0x6, + 0x181a, 0x6, + 0x1878, 0x8, + 0x18ab, 0x5, + 0x18f6, 0xa, + 0x191d, 0x3, + 0x192c, 0x4, + 0x193c, 0x4, + 0x1941, 0x3, + 0x1975, 0xb, + 0x19ac, 0x4, + 0x19ca, 0x6, + 0x19db, 0x3, + 0x1a8a, 0x6, + 0x1a9a, 0x6, + 0x1aae, 0x52, + 0x1b4c, 0x4, + 0x1b7d, 0x3, + 0x1bf4, 0x8, + 0x1c38, 0x3, + 0x1c4a, 0x3, + 0x1c80, 0x40, + 0x1cc8, 0x8, + 0x1cf7, 0x9, + 0x1de7, 0x15, + 0x1fff, 0x11, + 0x2028, 0x8, + 0x205f, 0x11, + 0x209d, 0x3, + 0x20ba, 0x16, + 0x20f1, 0xf, + 0x218a, 0x6, + 0x23f4, 0xc, + 0x2427, 0x19, + 0x244b, 0x15, + 0x2b4d, 0x3, + 0x2b5a, 0xa6, + 0x2cf4, 0x5, + 0x2d28, 0x5, + 0x2d68, 0x7, + 0x2d71, 0xe, + 0x2d97, 0x9, + 0x2e3c, 0x44, + 0x2ef4, 0xc, + 0x2fd6, 0x1a, + 0x2ffc, 0x5, + 0x3100, 0x5, + 0x312e, 0x3, + 0x31bb, 0x5, + 0x31e4, 0xc, + 0x3400, 0x19c0, + 0x4e00, 0x5200, + 0xa48d, 0x3, + 0xa4c7, 0x9, + 0xa62c, 0x14, + 0xa698, 0x7, + 0xa6f8, 0x8, + 0xa794, 0xc, + 0xa7ab, 0x4d, + 0xa82c, 0x4, + 0xa83a, 0x6, + 0xa878, 0x8, + 0xa8c5, 0x9, + 0xa8da, 0x6, + 0xa8fc, 0x4, + 0xa954, 0xb, + 0xa97d, 0x3, + 0xa9da, 0x4, + 0xa9e0, 0x20, + 0xaa37, 0x9, + 0xaa7c, 0x4, + 0xaac3, 0x18, + 0xaaf7, 0xa, + 0xab17, 0x9, + 0xab2f, 0x91, + 0xabfa, 0x2bb6, + 0xd7c7, 0x4, + 0xd7fc, 0x2104, + 0xfada, 0x26, + 0xfb07, 0xc, + 0xfb18, 0x5, + 0xfbc2, 0x11, + 0xfd40, 0x10, + 0xfdc8, 0x28, + 0xfe1a, 0x6, + 0xfe27, 0x9, + 0xfe6c, 0x4, + 0xfefd, 0x4, + 0xffbf, 0x3, + 0xffdd, 0x3, + 0xffef, 0xd, +]; +const NORMAL1: &'static [u16] = &[ + 0x5e, 0x22, + 0xfb, 0x5, + 0x103, 0x4, + 0x134, 0x3, + 0x18b, 0x5, + 0x19c, 0x34, + 0x1fe, 0x82, + 0x29d, 0x3, + 0x2d1, 0x2f, + 0x324, 0xc, + 0x34b, 0x35, + 0x3c4, 0x4, + 0x3d6, 0x2a, + 0x4aa, 0x356, + 0x839, 0x3, + 0x860, 0xa0, + 0x91c, 0x3, + 0x93a, 0x5, + 0x940, 0x40, + 0x9b8, 0x6, + 0x9c0, 0x40, + 0xa07, 0x5, + 0xa34, 0x4, + 0xa3b, 0x4, + 0xa48, 0x8, + 0xa59, 0x7, + 0xa80, 0x80, + 0xb36, 0x3, + 0xb73, 0x5, + 0xb80, 0x80, + 0xc49, 0x217, + 0xe7f, 0x181, + 0x104e, 0x4, + 0x1070, 0x10, + 0x10c2, 0xe, + 0x10e9, 0x7, + 0x10fa, 0x6, + 0x1144, 0x3c, + 0x11c9, 0x7, + 0x11da, 0x4a6, + 0x16b8, 0x8, + 0x16ca, 0x936, + 0x236f, 0x91, + 0x2463, 0xd, + 0x2474, 0xb8c, + 0x342f, 0x33d1, + 0x6a39, 0x4c7, + 0x6f45, 0xb, + 0x6f7f, 0x10, + 0x6fa0, 0x4060, + 0xb002, 0x1ffe, + 0xd0f6, 0xa, + 0xd173, 0x8, + 0xd1de, 0x22, + 0xd246, 0xba, + 0xd357, 0x9, + 0xd372, 0x8e, + 0xd547, 0x3, + 0xd800, 0x1600, + 0xee3c, 0x6, + 0xee43, 0x4, + 0xee9c, 0x5, + 0xeebc, 0x34, + 0xeef2, 0x10e, + 0xf02c, 0x4, + 0xf094, 0xc, + 0xf0e0, 0x20, + 0xf10b, 0x5, + 0xf16c, 0x4, + 0xf19b, 0x4b, + 0xf203, 0xd, + 0xf23b, 0x5, + 0xf249, 0x7, + 0xf252, 0xae, + 0xf321, 0xf, + 0xf37d, 0x3, + 0xf394, 0xc, + 0xf3cb, 0x15, + 0xf3f1, 0xf, + 0xf4fd, 0x3, + 0xf544, 0xc, + 0xf568, 0x93, + 0xf641, 0x4, + 0xf650, 0x30, + 0xf6c6, 0x3a, + 0xf774, 0x88c, +]; diff --git a/src/libcore/fmt/mod.rs b/src/libcore/fmt/mod.rs index e5eb8f21382be..173c55e35d51e 100644 --- a/src/libcore/fmt/mod.rs +++ b/src/libcore/fmt/mod.rs @@ -1383,7 +1383,7 @@ impl Debug for str { f.write_char('"')?; let mut from = 0; for (i, c) in self.char_indices() { - let esc = c.escape_default(); + let esc = c.escape_debug(); // If char needs escaping, flush backlog so far and write, else skip if esc.len() != 1 { f.write_str(&self[from..i])?; @@ -1409,7 +1409,7 @@ impl Display for str { impl Debug for char { fn fmt(&self, f: &mut Formatter) -> Result { f.write_char('\'')?; - for c in self.escape_default() { + for c in self.escape_debug() { f.write_char(c)? } f.write_char('\'') diff --git a/src/libcore/lib.rs b/src/libcore/lib.rs index 0ad1f671f155b..fabb3900ec648 100644 --- a/src/libcore/lib.rs +++ b/src/libcore/lib.rs @@ -103,17 +103,17 @@ mod int_macros; #[macro_use] mod uint_macros; -#[path = "num/isize.rs"] pub mod isize; -#[path = "num/i8.rs"] pub mod i8; -#[path = "num/i16.rs"] pub mod i16; -#[path = "num/i32.rs"] pub mod i32; -#[path = "num/i64.rs"] pub mod i64; +#[path = "num/isize.rs"] pub mod isize; +#[path = "num/i8.rs"] pub mod i8; +#[path = "num/i16.rs"] pub mod i16; +#[path = "num/i32.rs"] pub mod i32; +#[path = "num/i64.rs"] pub mod i64; #[path = "num/usize.rs"] pub mod usize; -#[path = "num/u8.rs"] pub mod u8; -#[path = "num/u16.rs"] pub mod u16; -#[path = "num/u32.rs"] pub mod u32; -#[path = "num/u64.rs"] pub mod u64; +#[path = "num/u8.rs"] pub mod u8; +#[path = "num/u16.rs"] pub mod u16; +#[path = "num/u32.rs"] pub mod u32; +#[path = "num/u64.rs"] pub mod u64; #[path = "num/f32.rs"] pub mod f32; #[path = "num/f64.rs"] pub mod f64; @@ -161,5 +161,6 @@ pub mod hash; pub mod fmt; // note: does not need to be public +mod char_private; mod iter_private; mod tuple; diff --git a/src/libcoretest/char.rs b/src/libcoretest/char.rs index c8906fed3d2fa..4632419336d7f 100644 --- a/src/libcoretest/char.rs +++ b/src/libcoretest/char.rs @@ -123,6 +123,49 @@ fn test_is_digit() { assert!(!'Q'.is_numeric()); } +#[test] +fn test_escape_debug() { + fn string(c: char) -> String { + c.escape_debug().collect() + } + let s = string('\n'); + assert_eq!(s, "\\n"); + let s = string('\r'); + assert_eq!(s, "\\r"); + let s = string('\''); + assert_eq!(s, "\\'"); + let s = string('"'); + assert_eq!(s, "\\\""); + let s = string(' '); + assert_eq!(s, " "); + let s = string('a'); + assert_eq!(s, "a"); + let s = string('~'); + assert_eq!(s, "~"); + let s = string('é'); + assert_eq!(s, "é"); + let s = string('\x00'); + assert_eq!(s, "\\u{0}"); + let s = string('\x1f'); + assert_eq!(s, "\\u{1f}"); + let s = string('\x7f'); + assert_eq!(s, "\\u{7f}"); + let s = string('\u{80}'); + assert_eq!(s, "\\u{80}"); + let s = string('\u{ff}'); + assert_eq!(s, "\u{ff}"); + let s = string('\u{11b}'); + assert_eq!(s, "\u{11b}"); + let s = string('\u{1d4b6}'); + assert_eq!(s, "\u{1d4b6}"); + let s = string('\u{200b}'); // zero width space + assert_eq!(s, "\\u{200b}"); + let s = string('\u{e000}'); // private use 1 + assert_eq!(s, "\\u{e000}"); + let s = string('\u{100000}'); // private use 2 + assert_eq!(s, "\\u{100000}"); +} + #[test] fn test_escape_default() { fn string(c: char) -> String { @@ -142,18 +185,28 @@ fn test_escape_default() { assert_eq!(s, "a"); let s = string('~'); assert_eq!(s, "~"); + let s = string('é'); + assert_eq!(s, "\\u{e9}"); let s = string('\x00'); assert_eq!(s, "\\u{0}"); let s = string('\x1f'); assert_eq!(s, "\\u{1f}"); let s = string('\x7f'); assert_eq!(s, "\\u{7f}"); + let s = string('\u{80}'); + assert_eq!(s, "\\u{80}"); let s = string('\u{ff}'); assert_eq!(s, "\\u{ff}"); let s = string('\u{11b}'); assert_eq!(s, "\\u{11b}"); let s = string('\u{1d4b6}'); assert_eq!(s, "\\u{1d4b6}"); + let s = string('\u{200b}'); // zero width space + assert_eq!(s, "\\u{200b}"); + let s = string('\u{e000}'); // private use 1 + assert_eq!(s, "\\u{e000}"); + let s = string('\u{100000}'); // private use 2 + assert_eq!(s, "\\u{100000}"); } #[test] diff --git a/src/libcoretest/lib.rs b/src/libcoretest/lib.rs index 1ef2b58351fe5..ef0042808f98b 100644 --- a/src/libcoretest/lib.rs +++ b/src/libcoretest/lib.rs @@ -14,6 +14,7 @@ #![feature(borrow_state)] #![feature(box_syntax)] #![feature(cell_extras)] +#![feature(char_escape_debug)] #![feature(const_fn)] #![feature(core_private_bignum)] #![feature(core_private_diy_float)] @@ -29,10 +30,10 @@ #![feature(slice_patterns)] #![feature(step_by)] #![feature(test)] +#![feature(try_from)] #![feature(unboxed_closures)] #![feature(unicode)] #![feature(unique)] -#![feature(try_from)] extern crate core; extern crate test; diff --git a/src/librustc_unicode/char.rs b/src/librustc_unicode/char.rs index 7445ff94eb502..7e308684a2567 100644 --- a/src/librustc_unicode/char.rs +++ b/src/librustc_unicode/char.rs @@ -36,7 +36,7 @@ use tables::{conversions, derived_property, general_category, property}; #[stable(feature = "rust1", since = "1.0.0")] pub use core::char::{MAX, from_digit, from_u32, from_u32_unchecked}; #[stable(feature = "rust1", since = "1.0.0")] -pub use core::char::{EncodeUtf16, EncodeUtf8, EscapeDefault, EscapeUnicode}; +pub use core::char::{EncodeUtf16, EncodeUtf8, EscapeDebug, EscapeDefault, EscapeUnicode}; // unstable reexports #[unstable(feature = "decode_utf8", issue = "33906")] @@ -267,6 +267,41 @@ impl char { C::escape_unicode(self) } + /// Returns an iterator that yields the literal escape code of a `char`. + /// + /// This will escape the characters similar to the `Debug` implementations + /// of `str` or `char`. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// for i in '\n'.escape_default() { + /// println!("{}", i); + /// } + /// ``` + /// + /// This prints: + /// + /// ```text + /// \ + /// n + /// ``` + /// + /// Collecting into a `String`: + /// + /// ``` + /// let quote: String = '\n'.escape_default().collect(); + /// + /// assert_eq!(quote, "\\n"); + /// ``` + #[unstable(feature = "char_escape_debug", issue = "35068")] + #[inline] + pub fn escape_debug(self) -> EscapeDebug { + C::escape_debug(self) + } + /// Returns an iterator that yields the literal escape code of a `char`. /// /// The default is chosen with a bias toward producing literals that are diff --git a/src/librustc_unicode/lib.rs b/src/librustc_unicode/lib.rs index f91a754ab57db..3ae905eba279b 100644 --- a/src/librustc_unicode/lib.rs +++ b/src/librustc_unicode/lib.rs @@ -32,6 +32,7 @@ #![cfg_attr(not(stage0), deny(warnings))] #![no_std] +#![feature(char_escape_debug)] #![feature(core_char_ext)] #![feature(decode_utf8)] #![feature(lang_items)] diff --git a/src/libstd/lib.rs b/src/libstd/lib.rs index d05a5a0961483..865d067cdb6dc 100644 --- a/src/libstd/lib.rs +++ b/src/libstd/lib.rs @@ -218,8 +218,9 @@ #![feature(associated_consts)] #![feature(borrow_state)] #![feature(box_syntax)] -#![feature(cfg_target_vendor)] #![feature(cfg_target_thread_local)] +#![feature(cfg_target_vendor)] +#![feature(char_escape_debug)] #![feature(char_internals)] #![feature(collections)] #![feature(collections_bound)] @@ -229,10 +230,10 @@ #![feature(dropck_parametricity)] #![feature(float_extras)] #![feature(float_from_str_radix)] -#![feature(fnbox)] #![feature(fn_traits)] -#![feature(heap_api)] +#![feature(fnbox)] #![feature(hashmap_hasher)] +#![feature(heap_api)] #![feature(inclusive_range)] #![feature(int_error_internals)] #![feature(into_cow)] @@ -242,6 +243,7 @@ #![feature(linkage)] #![feature(macro_reexport)] #![cfg_attr(test, feature(map_values_mut))] +#![feature(needs_panic_runtime)] #![feature(num_bits_bytes)] #![feature(old_wrapping)] #![feature(on_unimplemented)] @@ -249,10 +251,11 @@ #![feature(optin_builtin_traits)] #![feature(panic_unwind)] #![feature(placement_in_syntax)] +#![feature(question_mark)] #![feature(rand)] #![feature(raw)] -#![feature(repr_simd)] #![feature(reflect_marker)] +#![feature(repr_simd)] #![feature(rustc_attrs)] #![feature(shared)] #![feature(sip_hash_13)] @@ -266,6 +269,7 @@ #![feature(str_utf16)] #![feature(test, rustc_private)] #![feature(thread_local)] +#![feature(try_from)] #![feature(unboxed_closures)] #![feature(unicode)] #![feature(unique)] @@ -273,9 +277,6 @@ #![feature(unwind_attributes)] #![feature(vec_push_all)] #![feature(zero_one)] -#![feature(question_mark)] -#![feature(try_from)] -#![feature(needs_panic_runtime)] // Issue# 30592: Systematically use alloc_system during stage0 since jemalloc // might be unavailable or disabled diff --git a/src/libstd/sys/common/wtf8.rs b/src/libstd/sys/common/wtf8.rs index 2c1a656290f94..c0e6ec46b55b9 100644 --- a/src/libstd/sys/common/wtf8.rs +++ b/src/libstd/sys/common/wtf8.rs @@ -390,7 +390,7 @@ impl fmt::Debug for Wtf8 { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { fn write_str_escaped(f: &mut fmt::Formatter, s: &str) -> fmt::Result { use fmt::Write; - for c in s.chars().flat_map(|c| c.escape_default()) { + for c in s.chars().flat_map(|c| c.escape_debug()) { f.write_char(c)? } Ok(()) @@ -1064,9 +1064,9 @@ mod tests { #[test] fn wtf8buf_show() { - let mut string = Wtf8Buf::from_str("a\té 💩\r"); + let mut string = Wtf8Buf::from_str("a\té \u{7f}💩\r"); string.push(CodePoint::from_u32(0xD800).unwrap()); - assert_eq!(format!("{:?}", string), r#""a\t\u{e9} \u{1f4a9}\r\u{D800}""#); + assert_eq!(format!("{:?}", string), "\"a\\té \\u{7f}\u{1f4a9}\\r\\u{D800}\""); } #[test] diff --git a/src/test/run-pass/ifmt.rs b/src/test/run-pass/ifmt.rs index ed56519d23628..8b5536de12e49 100644 --- a/src/test/run-pass/ifmt.rs +++ b/src/test/run-pass/ifmt.rs @@ -70,15 +70,15 @@ pub fn main() { t!(format!("{}", '☃'), "☃"); t!(format!("{}", 10), "10"); t!(format!("{}", 10_usize), "10"); - t!(format!("{:?}", '☃'), "'\\u{2603}'"); + t!(format!("{:?}", '☃'), "'☃'"); t!(format!("{:?}", 10), "10"); t!(format!("{:?}", 10_usize), "10"); t!(format!("{:?}", "true"), "\"true\""); t!(format!("{:?}", "foo\nbar"), "\"foo\\nbar\""); t!(format!("{:?}", "foo\n\"bar\"\r\n\'baz\'\t\\qux\\"), r#""foo\n\"bar\"\r\n\'baz\'\t\\qux\\""#); - t!(format!("{:?}", "foo\0bar\x01baz\u{3b1}q\u{75}x"), - r#""foo\u{0}bar\u{1}baz\u{3b1}qux""#); + t!(format!("{:?}", "foo\0bar\x01baz\u{7f}q\u{75}x"), + r#""foo\u{0}bar\u{1}baz\u{7f}qux""#); t!(format!("{:o}", 10_usize), "12"); t!(format!("{:x}", 10_usize), "a"); t!(format!("{:X}", 10_usize), "A"); diff --git a/src/test/run-pass/sync-send-iterators-in-libcore.rs b/src/test/run-pass/sync-send-iterators-in-libcore.rs index 931789948159d..d12bdf182fa4d 100644 --- a/src/test/run-pass/sync-send-iterators-in-libcore.rs +++ b/src/test/run-pass/sync-send-iterators-in-libcore.rs @@ -67,7 +67,7 @@ macro_rules! is_sync_send { fn main() { // for char.rs - all_sync_send!("Я", escape_default, escape_unicode); + all_sync_send!("Я", escape_debug, escape_default, escape_unicode); // for iter.rs all_sync_send_mutable_ref!([1], iter);