From ae6adf335c88cad4e4a1805a805ac49dd1350174 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Thu, 5 Apr 2018 15:45:33 +0200 Subject: [PATCH 01/15] Move char::REPLACEMENT_CHARACTER to libcore --- src/libcore/char.rs | 8 ++++++++ src/libstd_unicode/char.rs | 10 ++-------- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/src/libcore/char.rs b/src/libcore/char.rs index 718c6b893edf2..b5554a59db56a 100644 --- a/src/libcore/char.rs +++ b/src/libcore/char.rs @@ -77,6 +77,14 @@ const MAX_THREE_B: u32 = 0x10000; #[stable(feature = "rust1", since = "1.0.0")] pub const MAX: char = '\u{10ffff}'; +/// `U+FFFD REPLACEMENT CHARACTER` (�) is used in Unicode to represent a +/// decoding error. +/// +/// It can occur, for example, when giving ill-formed UTF-8 bytes to +/// [`String::from_utf8_lossy`](../../std/string/struct.String.html#method.from_utf8_lossy). +#[stable(feature = "decode_utf16", since = "1.9.0")] +pub const REPLACEMENT_CHARACTER: char = '\u{FFFD}'; + /// Converts a `u32` to a `char`. /// /// Note that all [`char`]s are valid [`u32`]s, and can be cast to one with diff --git a/src/libstd_unicode/char.rs b/src/libstd_unicode/char.rs index 33e47ade8cb9c..460f83d875a7b 100644 --- a/src/libstd_unicode/char.rs +++ b/src/libstd_unicode/char.rs @@ -38,6 +38,8 @@ use tables::{conversions, derived_property, general_category, property}; pub use core::char::{MAX, from_digit, from_u32, from_u32_unchecked}; #[stable(feature = "rust1", since = "1.0.0")] pub use core::char::{EscapeDebug, EscapeDefault, EscapeUnicode}; +#[stable(feature = "decode_utf16", since = "1.9.0")] +pub use core::char::REPLACEMENT_CHARACTER; #[stable(feature = "char_from_str", since = "1.20.0")] pub use core::char::ParseCharError; @@ -1581,11 +1583,3 @@ impl fmt::Display for DecodeUtf16Error { write!(f, "unpaired surrogate found: {:x}", self.code) } } - -/// `U+FFFD REPLACEMENT CHARACTER` (�) is used in Unicode to represent a -/// decoding error. -/// -/// It can occur, for example, when giving ill-formed UTF-8 bytes to -/// [`String::from_utf8_lossy`](../../std/string/struct.String.html#method.from_utf8_lossy). -#[stable(feature = "decode_utf16", since = "1.9.0")] -pub const REPLACEMENT_CHARACTER: char = '\u{FFFD}'; From f87d4a15a82a76e7510629173c366d084f2c02ca Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Thu, 5 Apr 2018 15:55:28 +0200 Subject: [PATCH 02/15] Move Utf8Lossy decoder to libcore --- src/liballoc/string.rs | 2 +- src/{libstd_unicode => libcore/str}/lossy.rs | 9 ++++----- src/libcore/str/mod.rs | 4 ++++ src/libcore/tests/lib.rs | 2 ++ .../tests/lossy.rs => libcore/tests/str_lossy.rs} | 2 +- src/libstd/sys/redox/os_str.rs | 2 +- src/libstd/sys/unix/os_str.rs | 2 +- src/libstd/sys/wasm/os_str.rs | 2 +- src/libstd/sys_common/bytestring.rs | 2 +- src/libstd_unicode/Cargo.toml | 4 ---- src/libstd_unicode/lib.rs | 1 - src/libstd_unicode/tests/lib.rs | 15 --------------- 12 files changed, 16 insertions(+), 31 deletions(-) rename src/{libstd_unicode => libcore/str}/lossy.rs (98%) rename src/{libstd_unicode/tests/lossy.rs => libcore/tests/str_lossy.rs} (99%) delete mode 100644 src/libstd_unicode/tests/lib.rs diff --git a/src/liballoc/string.rs b/src/liballoc/string.rs index b95aae02894ed..5f90e28cb3c3f 100644 --- a/src/liballoc/string.rs +++ b/src/liballoc/string.rs @@ -63,7 +63,7 @@ use core::ops::Bound::{Excluded, Included, Unbounded}; use core::ops::{self, Add, AddAssign, Index, IndexMut, RangeBounds}; use core::ptr; use core::str::pattern::Pattern; -use std_unicode::lossy; +use core::str::lossy; use std_unicode::char::{decode_utf16, REPLACEMENT_CHARACTER}; use borrow::{Cow, ToOwned}; diff --git a/src/libstd_unicode/lossy.rs b/src/libcore/str/lossy.rs similarity index 98% rename from src/libstd_unicode/lossy.rs rename to src/libcore/str/lossy.rs index cc8e93308a527..30b7267da7c5e 100644 --- a/src/libstd_unicode/lossy.rs +++ b/src/libcore/str/lossy.rs @@ -8,12 +8,11 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -use core::str as core_str; -use core::fmt; -use core::fmt::Write; use char; -use core::mem; - +use str as core_str; +use fmt; +use fmt::Write; +use mem; /// Lossy UTF-8 string. #[unstable(feature = "str_internals", issue = "0")] diff --git a/src/libcore/str/mod.rs b/src/libcore/str/mod.rs index 1185b7acaae1f..7a97d89dcf967 100644 --- a/src/libcore/str/mod.rs +++ b/src/libcore/str/mod.rs @@ -26,6 +26,10 @@ use mem; pub mod pattern; +#[unstable(feature = "str_internals", issue = "0")] +#[allow(missing_docs)] +pub mod lossy; + /// A trait to abstract the idea of creating a new instance of a type from a /// string. /// diff --git a/src/libcore/tests/lib.rs b/src/libcore/tests/lib.rs index c3162899bbd01..149269263dc8a 100644 --- a/src/libcore/tests/lib.rs +++ b/src/libcore/tests/lib.rs @@ -33,6 +33,7 @@ #![feature(sort_internals)] #![feature(specialization)] #![feature(step_trait)] +#![feature(str_internals)] #![feature(test)] #![feature(trusted_len)] #![feature(try_trait)] @@ -68,4 +69,5 @@ mod ptr; mod result; mod slice; mod str; +mod str_lossy; mod tuple; diff --git a/src/libstd_unicode/tests/lossy.rs b/src/libcore/tests/str_lossy.rs similarity index 99% rename from src/libstd_unicode/tests/lossy.rs rename to src/libcore/tests/str_lossy.rs index e05d066855635..69e28256da9c3 100644 --- a/src/libstd_unicode/tests/lossy.rs +++ b/src/libcore/tests/str_lossy.rs @@ -8,7 +8,7 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -use std_unicode::lossy::*; +use core::str::lossy::*; #[test] fn chunks() { diff --git a/src/libstd/sys/redox/os_str.rs b/src/libstd/sys/redox/os_str.rs index da27787babb97..eb3a1ead58c94 100644 --- a/src/libstd/sys/redox/os_str.rs +++ b/src/libstd/sys/redox/os_str.rs @@ -19,7 +19,7 @@ use rc::Rc; use sync::Arc; use sys_common::{AsInner, IntoInner}; use sys_common::bytestring::debug_fmt_bytestring; -use std_unicode::lossy::Utf8Lossy; +use core::str::lossy::Utf8Lossy; #[derive(Clone, Hash)] pub struct Buf { diff --git a/src/libstd/sys/unix/os_str.rs b/src/libstd/sys/unix/os_str.rs index e43bc6da5f1f8..01c0fb830aadd 100644 --- a/src/libstd/sys/unix/os_str.rs +++ b/src/libstd/sys/unix/os_str.rs @@ -19,7 +19,7 @@ use rc::Rc; use sync::Arc; use sys_common::{AsInner, IntoInner}; use sys_common::bytestring::debug_fmt_bytestring; -use std_unicode::lossy::Utf8Lossy; +use core::str::lossy::Utf8Lossy; #[derive(Clone, Hash)] pub struct Buf { diff --git a/src/libstd/sys/wasm/os_str.rs b/src/libstd/sys/wasm/os_str.rs index 84f560af69bec..e0da5bdf36c14 100644 --- a/src/libstd/sys/wasm/os_str.rs +++ b/src/libstd/sys/wasm/os_str.rs @@ -19,7 +19,7 @@ use rc::Rc; use sync::Arc; use sys_common::{AsInner, IntoInner}; use sys_common::bytestring::debug_fmt_bytestring; -use std_unicode::lossy::Utf8Lossy; +use core::str::lossy::Utf8Lossy; #[derive(Clone, Hash)] pub struct Buf { diff --git a/src/libstd/sys_common/bytestring.rs b/src/libstd/sys_common/bytestring.rs index eb9cad0991505..971b83938c167 100644 --- a/src/libstd/sys_common/bytestring.rs +++ b/src/libstd/sys_common/bytestring.rs @@ -11,7 +11,7 @@ #![allow(dead_code)] use fmt::{Formatter, Result, Write}; -use std_unicode::lossy::{Utf8Lossy, Utf8LossyChunk}; +use core::str::lossy::{Utf8Lossy, Utf8LossyChunk}; pub fn debug_fmt_bytestring(slice: &[u8], f: &mut Formatter) -> Result { // Writes out a valid unicode string with the correct escape sequences diff --git a/src/libstd_unicode/Cargo.toml b/src/libstd_unicode/Cargo.toml index 283070a0e2cf7..b1c55c2e4b6ce 100644 --- a/src/libstd_unicode/Cargo.toml +++ b/src/libstd_unicode/Cargo.toml @@ -9,10 +9,6 @@ path = "lib.rs" test = false bench = false -[[test]] -name = "std_unicode_tests" -path = "tests/lib.rs" - [dependencies] core = { path = "../libcore" } compiler_builtins = { path = "../rustc/compiler_builtins_shim" } diff --git a/src/libstd_unicode/lib.rs b/src/libstd_unicode/lib.rs index cf8c101a2f91f..106a2c0f0c514 100644 --- a/src/libstd_unicode/lib.rs +++ b/src/libstd_unicode/lib.rs @@ -45,7 +45,6 @@ mod tables; mod u_str; mod version; pub mod char; -pub mod lossy; #[allow(deprecated)] pub mod str { diff --git a/src/libstd_unicode/tests/lib.rs b/src/libstd_unicode/tests/lib.rs deleted file mode 100644 index 9535ec18763e6..0000000000000 --- a/src/libstd_unicode/tests/lib.rs +++ /dev/null @@ -1,15 +0,0 @@ -// Copyright 2012-2017 The Rust Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution and at -// http://rust-lang.org/COPYRIGHT. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -#![feature(str_internals, unicode)] - -extern crate std_unicode; - -mod lossy; From 5807be7ccb2c14df9db87a54038221bbf5ae00fa Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Thu, 5 Apr 2018 17:09:28 +0200 Subject: [PATCH 03/15] Move contents of libstd_unicode into libcore --- src/libcore/lib.rs | 2 ++ .../unicode}/bool_trie.rs | 0 .../unicode}/char.rs | 24 +++++++------- src/libcore/unicode/mod.rs | 29 +++++++++++++++++ .../u_str.rs => libcore/unicode/str.rs} | 6 ++-- .../unicode}/tables.rs | 7 ++-- .../unicode}/unicode.py | 7 ++-- .../unicode}/version.rs | 0 src/libstd_unicode/lib.rs | 32 ++----------------- .../single-primitive-inherent-impl.rs | 8 ++--- 10 files changed, 56 insertions(+), 59 deletions(-) rename src/{libstd_unicode => libcore/unicode}/bool_trie.rs (100%) rename src/{libstd_unicode => libcore/unicode}/char.rs (98%) create mode 100644 src/libcore/unicode/mod.rs rename src/{libstd_unicode/u_str.rs => libcore/unicode/str.rs} (98%) rename src/{libstd_unicode => libcore/unicode}/tables.rs (99%) rename src/{libstd_unicode => libcore/unicode}/unicode.py (99%) rename src/{libstd_unicode => libcore/unicode}/version.rs (100%) diff --git a/src/libcore/lib.rs b/src/libcore/lib.rs index e194b173aa718..7cb635a299aef 100644 --- a/src/libcore/lib.rs +++ b/src/libcore/lib.rs @@ -180,6 +180,8 @@ pub mod hash; pub mod fmt; pub mod time; +pub mod unicode; + /* Heap memory allocator trait */ #[allow(missing_docs)] pub mod heap; diff --git a/src/libstd_unicode/bool_trie.rs b/src/libcore/unicode/bool_trie.rs similarity index 100% rename from src/libstd_unicode/bool_trie.rs rename to src/libcore/unicode/bool_trie.rs diff --git a/src/libstd_unicode/char.rs b/src/libcore/unicode/char.rs similarity index 98% rename from src/libstd_unicode/char.rs rename to src/libcore/unicode/char.rs index 460f83d875a7b..0e8b09f621a07 100644 --- a/src/libstd_unicode/char.rs +++ b/src/libcore/unicode/char.rs @@ -28,30 +28,30 @@ #![stable(feature = "rust1", since = "1.0.0")] -use core::char::CharExt as C; -use core::iter::FusedIterator; -use core::fmt::{self, Write}; -use tables::{conversions, derived_property, general_category, property}; +use char::CharExt as C; +use iter::FusedIterator; +use fmt::{self, Write}; +use unicode::tables::{conversions, derived_property, general_category, property}; // stable re-exports #[stable(feature = "rust1", since = "1.0.0")] -pub use core::char::{MAX, from_digit, from_u32, from_u32_unchecked}; +pub use char::{MAX, from_digit, from_u32, from_u32_unchecked}; #[stable(feature = "rust1", since = "1.0.0")] -pub use core::char::{EscapeDebug, EscapeDefault, EscapeUnicode}; +pub use char::{EscapeDebug, EscapeDefault, EscapeUnicode}; #[stable(feature = "decode_utf16", since = "1.9.0")] -pub use core::char::REPLACEMENT_CHARACTER; +pub use char::REPLACEMENT_CHARACTER; #[stable(feature = "char_from_str", since = "1.20.0")] -pub use core::char::ParseCharError; +pub use char::ParseCharError; // unstable re-exports #[stable(feature = "try_from", since = "1.26.0")] -pub use core::char::CharTryFromError; +pub use char::CharTryFromError; #[unstable(feature = "decode_utf8", issue = "33906")] -pub use core::char::{DecodeUtf8, decode_utf8}; +pub use char::{DecodeUtf8, decode_utf8}; #[unstable(feature = "unicode", issue = "27783")] -pub use tables::{UNICODE_VERSION}; +pub use unicode::tables::{UNICODE_VERSION}; #[unstable(feature = "unicode", issue = "27783")] -pub use version::UnicodeVersion; +pub use unicode::version::UnicodeVersion; /// Returns an iterator that yields the lowercase equivalent of a `char`. /// diff --git a/src/libcore/unicode/mod.rs b/src/libcore/unicode/mod.rs new file mode 100644 index 0000000000000..aaf8081799f07 --- /dev/null +++ b/src/libcore/unicode/mod.rs @@ -0,0 +1,29 @@ +// Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +#![unstable(feature = "unicode", issue = "27783")] +#![allow(missing_docs)] + +mod bool_trie; +mod tables; +mod version; + +pub mod str; +pub mod char; + +// For use in liballoc, not re-exported in libstd. +pub mod derived_property { + pub use unicode::tables::derived_property::{Case_Ignorable, Cased}; +} + +// For use in libsyntax +pub mod property { + pub use unicode::tables::property::Pattern_White_Space; +} diff --git a/src/libstd_unicode/u_str.rs b/src/libcore/unicode/str.rs similarity index 98% rename from src/libstd_unicode/u_str.rs rename to src/libcore/unicode/str.rs index a72e1210d93f6..18581bf4d580b 100644 --- a/src/libstd_unicode/u_str.rs +++ b/src/libcore/unicode/str.rs @@ -13,9 +13,9 @@ //! This module provides functionality to `str` that requires the Unicode //! methods provided by the unicode parts of the CharExt trait. -use core::char; -use core::iter::{Filter, FusedIterator}; -use core::str::Split; +use char; +use iter::{Filter, FusedIterator}; +use str::Split; /// An iterator over the non-whitespace substrings of a string, /// separated by any amount of whitespace. diff --git a/src/libstd_unicode/tables.rs b/src/libcore/unicode/tables.rs similarity index 99% rename from src/libstd_unicode/tables.rs rename to src/libcore/unicode/tables.rs index b53953b62a7af..7e8e925bda32e 100644 --- a/src/libstd_unicode/tables.rs +++ b/src/libcore/unicode/tables.rs @@ -12,8 +12,8 @@ #![allow(missing_docs, non_upper_case_globals, non_snake_case)] -use version::UnicodeVersion; -use bool_trie::{BoolTrie, SmallBoolTrie}; +use unicode::version::UnicodeVersion; +use unicode::bool_trie::{BoolTrie, SmallBoolTrie}; /// The version of [Unicode](http://www.unicode.org/) that the Unicode parts of /// `CharExt` and `UnicodeStrPrelude` traits are based on. @@ -1138,9 +1138,6 @@ pub mod property { } pub mod conversions { - use core::option::Option; - use core::option::Option::{Some, None}; - pub fn to_lower(c: char) -> [char; 3] { match bsearch_case_table(c, to_lowercase_table) { None => [c, '\0', '\0'], diff --git a/src/libstd_unicode/unicode.py b/src/libcore/unicode/unicode.py similarity index 99% rename from src/libstd_unicode/unicode.py rename to src/libcore/unicode/unicode.py index a86294930861b..39b68dc7d9b67 100755 --- a/src/libstd_unicode/unicode.py +++ b/src/libcore/unicode/unicode.py @@ -39,8 +39,8 @@ #![allow(missing_docs, non_upper_case_globals, non_snake_case)] -use version::UnicodeVersion; -use bool_trie::{BoolTrie, SmallBoolTrie}; +use unicode::version::UnicodeVersion; +use unicode::bool_trie::{BoolTrie, SmallBoolTrie}; ''' # Mapping taken from Table 12 from: @@ -408,9 +408,6 @@ def emit_property_module(f, mod, tbl, emit): def emit_conversions_module(f, to_upper, to_lower, to_title): f.write("pub mod conversions {") f.write(""" - use core::option::Option; - use core::option::Option::{Some, None}; - pub fn to_lower(c: char) -> [char; 3] { match bsearch_case_table(c, to_lowercase_table) { None => [c, '\\0', '\\0'], diff --git a/src/libstd_unicode/version.rs b/src/libcore/unicode/version.rs similarity index 100% rename from src/libstd_unicode/version.rs rename to src/libcore/unicode/version.rs diff --git a/src/libstd_unicode/lib.rs b/src/libstd_unicode/lib.rs index 106a2c0f0c514..8cdeb6c8ad184 100644 --- a/src/libstd_unicode/lib.rs +++ b/src/libstd_unicode/lib.rs @@ -27,37 +27,9 @@ html_playground_url = "https://play.rust-lang.org/", issue_tracker_base_url = "https://github.com/rust-lang/rust/issues/", test(no_crate_inject, attr(allow(unused_variables), deny(warnings))))] -#![deny(missing_debug_implementations)] #![no_std] -#![feature(ascii_ctype)] -#![feature(core_char_ext)] -#![feature(str_internals)] -#![feature(decode_utf8)] -#![feature(fn_traits)] -#![feature(lang_items)] -#![feature(non_exhaustive)] +#![feature(unicode)] #![feature(staged_api)] -#![feature(unboxed_closures)] -mod bool_trie; -mod tables; -mod u_str; -mod version; -pub mod char; - -#[allow(deprecated)] -pub mod str { - pub use u_str::{SplitWhitespace, UnicodeStr}; - pub use u_str::Utf16Encoder; -} - -// For use in liballoc, not re-exported in libstd. -pub mod derived_property { - pub use tables::derived_property::{Case_Ignorable, Cased}; -} - -// For use in libsyntax -pub mod property { - pub use tables::property::Pattern_White_Space; -} +pub use core::unicode::*; diff --git a/src/test/compile-fail/single-primitive-inherent-impl.rs b/src/test/compile-fail/single-primitive-inherent-impl.rs index 5ceb870528a65..365387c3e5e27 100644 --- a/src/test/compile-fail/single-primitive-inherent-impl.rs +++ b/src/test/compile-fail/single-primitive-inherent-impl.rs @@ -15,9 +15,9 @@ #![no_std] // OK -#[lang = "char"] -impl char {} +#[lang = "str"] +impl str {} -impl char { -//~^ error: only a single inherent implementation marked with `#[lang = "char"]` is allowed for the `char` primitive +impl str { +//~^ error: only a single inherent implementation marked with `#[lang = "str"]` is allowed for the `str` primitive } From b2027ef17c03e47a4d716d8ea8148ed785934b04 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Thu, 5 Apr 2018 17:20:08 +0200 Subject: [PATCH 04/15] Deprecate the std_unicode crate --- src/Cargo.lock | 1 - src/ci/docker/wasm32-unknown/Dockerfile | 1 - src/doc/unstable-book/src/language-features/lang-items.md | 2 +- src/liballoc/Cargo.toml | 1 - src/liballoc/lib.rs | 2 -- src/liballoc/str.rs | 7 +++---- src/liballoc/string.rs | 2 +- src/liballoc/tests/lib.rs | 2 +- src/liballoc/tests/str.rs | 2 +- src/liballoc/tests/string.rs | 2 +- src/libcore/char.rs | 2 +- src/librustdoc/lib.rs | 1 - src/libstd/lib.rs | 3 +-- src/libstd_unicode/lib.rs | 1 + src/libsyntax/lib.rs | 2 +- src/libsyntax/parse/lexer/mod.rs | 2 +- 16 files changed, 13 insertions(+), 20 deletions(-) diff --git a/src/Cargo.lock b/src/Cargo.lock index a60679e417ada..6e7c4b67acf24 100644 --- a/src/Cargo.lock +++ b/src/Cargo.lock @@ -13,7 +13,6 @@ dependencies = [ "compiler_builtins 0.0.0", "core 0.0.0", "rand 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)", - "std_unicode 0.0.0", ] [[package]] diff --git a/src/ci/docker/wasm32-unknown/Dockerfile b/src/ci/docker/wasm32-unknown/Dockerfile index 6c0ec1ad9d4e1..853923ad947cd 100644 --- a/src/ci/docker/wasm32-unknown/Dockerfile +++ b/src/ci/docker/wasm32-unknown/Dockerfile @@ -34,4 +34,3 @@ ENV SCRIPT python2.7 /checkout/x.py test --target $TARGETS \ src/test/mir-opt \ src/test/codegen-units \ src/libcore \ - src/libstd_unicode/ \ diff --git a/src/doc/unstable-book/src/language-features/lang-items.md b/src/doc/unstable-book/src/language-features/lang-items.md index c51674186146b..6a7aea7f1c27e 100644 --- a/src/doc/unstable-book/src/language-features/lang-items.md +++ b/src/doc/unstable-book/src/language-features/lang-items.md @@ -243,7 +243,7 @@ the source code. - `usize`: `libcore/num/mod.rs` - `f32`: `libstd/f32.rs` - `f64`: `libstd/f64.rs` - - `char`: `libstd_unicode/char.rs` + - `char`: `libcore/char.rs` - `slice`: `liballoc/slice.rs` - `str`: `liballoc/str.rs` - `const_ptr`: `libcore/ptr.rs` diff --git a/src/liballoc/Cargo.toml b/src/liballoc/Cargo.toml index 2eb8ea1260446..6383bd1e941ed 100644 --- a/src/liballoc/Cargo.toml +++ b/src/liballoc/Cargo.toml @@ -9,7 +9,6 @@ path = "lib.rs" [dependencies] core = { path = "../libcore" } -std_unicode = { path = "../libstd_unicode" } compiler_builtins = { path = "../rustc/compiler_builtins_shim" } [dev-dependencies] diff --git a/src/liballoc/lib.rs b/src/liballoc/lib.rs index b08bd66b47c59..d1a91ab4a9ce1 100644 --- a/src/liballoc/lib.rs +++ b/src/liballoc/lib.rs @@ -135,8 +135,6 @@ extern crate test; #[cfg(test)] extern crate rand; -extern crate std_unicode; - // Module with internal macros used by other modules (needs to be included before other modules). #[macro_use] mod macros; diff --git a/src/liballoc/str.rs b/src/liballoc/str.rs index d5ef41df0d850..eaca9eb49f9f0 100644 --- a/src/liballoc/str.rs +++ b/src/liballoc/str.rs @@ -45,12 +45,11 @@ use core::str::pattern::{Searcher, ReverseSearcher, DoubleEndedSearcher}; use core::mem; use core::ptr; use core::iter::FusedIterator; -use std_unicode::str::{UnicodeStr, Utf16Encoder}; +use core::unicode::str::{UnicodeStr, Utf16Encoder}; use vec_deque::VecDeque; use borrow::{Borrow, ToOwned}; use string::String; -use std_unicode; use vec::Vec; use slice::{SliceConcatExt, SliceIndex}; use boxed::Box; @@ -75,7 +74,7 @@ pub use core::str::{from_utf8, from_utf8_mut, Chars, CharIndices, Bytes}; #[stable(feature = "rust1", since = "1.0.0")] pub use core::str::{from_utf8_unchecked, from_utf8_unchecked_mut, ParseBoolError}; #[stable(feature = "rust1", since = "1.0.0")] -pub use std_unicode::str::SplitWhitespace; +pub use core::unicode::str::SplitWhitespace; #[stable(feature = "rust1", since = "1.0.0")] pub use core::str::pattern; @@ -1960,7 +1959,7 @@ impl str { } fn case_ignoreable_then_cased>(iter: I) -> bool { - use std_unicode::derived_property::{Cased, Case_Ignorable}; + use core::unicode::derived_property::{Cased, Case_Ignorable}; match iter.skip_while(|&c| Case_Ignorable(c)).next() { Some(c) => Cased(c), None => false, diff --git a/src/liballoc/string.rs b/src/liballoc/string.rs index 5f90e28cb3c3f..a902f0bb06b62 100644 --- a/src/liballoc/string.rs +++ b/src/liballoc/string.rs @@ -64,7 +64,7 @@ use core::ops::{self, Add, AddAssign, Index, IndexMut, RangeBounds}; use core::ptr; use core::str::pattern::Pattern; use core::str::lossy; -use std_unicode::char::{decode_utf16, REPLACEMENT_CHARACTER}; +use core::unicode::char::{decode_utf16, REPLACEMENT_CHARACTER}; use borrow::{Cow, ToOwned}; use str::{self, from_boxed_utf8_unchecked, FromStr, Utf8Error, Chars}; diff --git a/src/liballoc/tests/lib.rs b/src/liballoc/tests/lib.rs index 17f1d0464a5c2..fddf341d0d18e 100644 --- a/src/liballoc/tests/lib.rs +++ b/src/liballoc/tests/lib.rs @@ -29,7 +29,7 @@ #![feature(inclusive_range_fields)] extern crate alloc_system; -extern crate std_unicode; +extern crate core; extern crate rand; use std::hash::{Hash, Hasher}; diff --git a/src/liballoc/tests/str.rs b/src/liballoc/tests/str.rs index a14a5d32738b3..763dbe675b91d 100644 --- a/src/liballoc/tests/str.rs +++ b/src/liballoc/tests/str.rs @@ -1204,7 +1204,7 @@ fn test_rev_split_char_iterator_no_trailing() { #[test] fn test_utf16_code_units() { - use std_unicode::str::Utf16Encoder; + use core::unicode::str::Utf16Encoder; assert_eq!(Utf16Encoder::new(vec!['é', '\u{1F4A9}'].into_iter()).collect::>(), [0xE9, 0xD83D, 0xDCA9]) } diff --git a/src/liballoc/tests/string.rs b/src/liballoc/tests/string.rs index cb4a17a22d8a4..33f20be100db6 100644 --- a/src/liballoc/tests/string.rs +++ b/src/liballoc/tests/string.rs @@ -132,7 +132,7 @@ fn test_from_utf16() { let s_as_utf16 = s.encode_utf16().collect::>(); let u_as_string = String::from_utf16(&u).unwrap(); - assert!(::std_unicode::char::decode_utf16(u.iter().cloned()).all(|r| r.is_ok())); + assert!(::core::unicode::char::decode_utf16(u.iter().cloned()).all(|r| r.is_ok())); assert_eq!(s_as_utf16, u); assert_eq!(u_as_string, s); diff --git a/src/libcore/char.rs b/src/libcore/char.rs index b5554a59db56a..6e2626cc362ba 100644 --- a/src/libcore/char.rs +++ b/src/libcore/char.rs @@ -10,7 +10,7 @@ //! Character manipulation. //! -//! For more details, see ::std_unicode::char (a.k.a. std::char) +//! For more details, see ::core::unicode::char (a.k.a. std::char) #![allow(non_snake_case)] #![stable(feature = "core_char", since = "1.2.0")] diff --git a/src/librustdoc/lib.rs b/src/librustdoc/lib.rs index 730f61e0aa66d..9ac034869acb8 100644 --- a/src/librustdoc/lib.rs +++ b/src/librustdoc/lib.rs @@ -41,7 +41,6 @@ extern crate serialize; #[macro_use] extern crate syntax; extern crate syntax_pos; extern crate test as testing; -extern crate std_unicode; #[macro_use] extern crate log; extern crate rustc_errors as errors; extern crate pulldown_cmark; diff --git a/src/libstd/lib.rs b/src/libstd/lib.rs index 672723341eb57..16bca9ddcd328 100644 --- a/src/libstd/lib.rs +++ b/src/libstd/lib.rs @@ -354,7 +354,6 @@ extern crate core as __core; #[macro_reexport(vec, format)] extern crate alloc; extern crate alloc_system; -extern crate std_unicode; #[doc(masked)] extern crate libc; @@ -455,7 +454,7 @@ pub use alloc::string; #[stable(feature = "rust1", since = "1.0.0")] pub use alloc::vec; #[stable(feature = "rust1", since = "1.0.0")] -pub use std_unicode::char; +pub use core::unicode::char; #[stable(feature = "i128", since = "1.26.0")] pub use core::u128; diff --git a/src/libstd_unicode/lib.rs b/src/libstd_unicode/lib.rs index 8cdeb6c8ad184..29de017c64d88 100644 --- a/src/libstd_unicode/lib.rs +++ b/src/libstd_unicode/lib.rs @@ -31,5 +31,6 @@ #![feature(unicode)] #![feature(staged_api)] +#![rustc_deprecated(since = "1.27.0", reason = "moved into libcore")] pub use core::unicode::*; diff --git a/src/libsyntax/lib.rs b/src/libsyntax/lib.rs index d80430f609b3a..9de905c01d6c7 100644 --- a/src/libsyntax/lib.rs +++ b/src/libsyntax/lib.rs @@ -32,9 +32,9 @@ extern crate rustc_cratesio_shim; #[macro_use] extern crate bitflags; +extern crate core; extern crate serialize; #[macro_use] extern crate log; -extern crate std_unicode; pub extern crate rustc_errors as errors; extern crate syntax_pos; extern crate rustc_data_structures; diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index 39b2f77f2305e..cb3323c7eca4e 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -15,7 +15,7 @@ use errors::{FatalError, DiagnosticBuilder}; use parse::{token, ParseSess}; use str::char_at; use symbol::{Symbol, keywords}; -use std_unicode::property::Pattern_White_Space; +use core::unicode::property::Pattern_White_Space; use std::borrow::Cow; use std::char; From 3613b0b52fec8cb7844149804efa76e6e904896c Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Thu, 5 Apr 2018 17:35:13 +0200 Subject: [PATCH 05/15] Move the core::char module to its own directory --- src/libcore/{char.rs => char/mod.rs} | 4 +++- src/{etc/char_private.py => libcore/char/printable.py} | 2 +- src/libcore/{char_private.rs => char/printable.rs} | 2 +- src/libcore/lib.rs | 1 - 4 files changed, 5 insertions(+), 4 deletions(-) rename src/libcore/{char.rs => char/mod.rs} (99%) rename src/{etc/char_private.py => libcore/char/printable.py} (98%) rename src/libcore/{char_private.rs => char/printable.rs} (99%) diff --git a/src/libcore/char.rs b/src/libcore/char/mod.rs similarity index 99% rename from src/libcore/char.rs rename to src/libcore/char/mod.rs index 6e2626cc362ba..c6b620e5238c3 100644 --- a/src/libcore/char.rs +++ b/src/libcore/char/mod.rs @@ -15,7 +15,9 @@ #![allow(non_snake_case)] #![stable(feature = "core_char", since = "1.2.0")] -use char_private::is_printable; +mod printable; + +use self::printable::is_printable; use convert::TryFrom; use fmt::{self, Write}; use slice; diff --git a/src/etc/char_private.py b/src/libcore/char/printable.py similarity index 98% rename from src/etc/char_private.py rename to src/libcore/char/printable.py index cfe5b01e934e7..484822e10aa7b 100644 --- a/src/etc/char_private.py +++ b/src/libcore/char/printable.py @@ -187,7 +187,7 @@ def main(): // option. This file may not be copied, modified, or distributed // except according to those terms. -// NOTE: The following code was generated by "src/etc/char_private.py", +// NOTE: The following code was generated by "src/libcore/char/printable.py", // do not edit directly! fn check(x: u16, singletonuppers: &[(u8, u8)], singletonlowers: &[u8], diff --git a/src/libcore/char_private.rs b/src/libcore/char/printable.rs similarity index 99% rename from src/libcore/char_private.rs rename to src/libcore/char/printable.rs index e6803745ab543..ce011fab1878b 100644 --- a/src/libcore/char_private.rs +++ b/src/libcore/char/printable.rs @@ -8,7 +8,7 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -// NOTE: The following code was generated by "src/etc/char_private.py", +// NOTE: The following code was generated by "src/libcore/char/printable.py", // do not edit directly! fn check(x: u16, singletonuppers: &[(u8, u8)], singletonlowers: &[u8], diff --git a/src/libcore/lib.rs b/src/libcore/lib.rs index 7cb635a299aef..9ff8465bc0f2d 100644 --- a/src/libcore/lib.rs +++ b/src/libcore/lib.rs @@ -187,7 +187,6 @@ pub mod unicode; pub mod heap; // note: does not need to be public -mod char_private; mod iter_private; mod tuple; mod unit; From 939692409da499ff3d498eae782620435f16a981 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Thu, 5 Apr 2018 17:56:46 +0200 Subject: [PATCH 06/15] Reexport from core::unicode::char in core::char rather than vice versa --- src/liballoc/string.rs | 2 +- src/liballoc/tests/string.rs | 2 +- src/libcore/char/mod.rs | 12 ++++++++++++ src/libcore/unicode/char.rs | 21 +-------------------- src/libcore/unicode/mod.rs | 6 +++--- src/libstd/lib.rs | 2 +- 6 files changed, 19 insertions(+), 26 deletions(-) diff --git a/src/liballoc/string.rs b/src/liballoc/string.rs index a902f0bb06b62..29d759b1f0007 100644 --- a/src/liballoc/string.rs +++ b/src/liballoc/string.rs @@ -56,6 +56,7 @@ #![stable(feature = "rust1", since = "1.0.0")] +use core::char::{decode_utf16, REPLACEMENT_CHARACTER}; use core::fmt; use core::hash; use core::iter::{FromIterator, FusedIterator}; @@ -64,7 +65,6 @@ use core::ops::{self, Add, AddAssign, Index, IndexMut, RangeBounds}; use core::ptr; use core::str::pattern::Pattern; use core::str::lossy; -use core::unicode::char::{decode_utf16, REPLACEMENT_CHARACTER}; use borrow::{Cow, ToOwned}; use str::{self, from_boxed_utf8_unchecked, FromStr, Utf8Error, Chars}; diff --git a/src/liballoc/tests/string.rs b/src/liballoc/tests/string.rs index 33f20be100db6..17d53e4cf3e09 100644 --- a/src/liballoc/tests/string.rs +++ b/src/liballoc/tests/string.rs @@ -132,7 +132,7 @@ fn test_from_utf16() { let s_as_utf16 = s.encode_utf16().collect::>(); let u_as_string = String::from_utf16(&u).unwrap(); - assert!(::core::unicode::char::decode_utf16(u.iter().cloned()).all(|r| r.is_ok())); + assert!(::core::char::decode_utf16(u.iter().cloned()).all(|r| r.is_ok())); assert_eq!(s_as_utf16, u); assert_eq!(u_as_string, s); diff --git a/src/libcore/char/mod.rs b/src/libcore/char/mod.rs index c6b620e5238c3..3efa8396331ef 100644 --- a/src/libcore/char/mod.rs +++ b/src/libcore/char/mod.rs @@ -15,6 +15,18 @@ #![allow(non_snake_case)] #![stable(feature = "core_char", since = "1.2.0")] +// stable re-exports +#[stable(feature = "rust1", since = "1.0.0")] +pub use unicode::char::{ToLowercase, ToUppercase}; +#[stable(feature = "decode_utf16", since = "1.9.0")] +pub use unicode::char::{decode_utf16, DecodeUtf16, DecodeUtf16Error}; + +// unstable re-exports +#[unstable(feature = "unicode", issue = "27783")] +pub use unicode::tables::{UNICODE_VERSION}; +#[unstable(feature = "unicode", issue = "27783")] +pub use unicode::version::UnicodeVersion; + mod printable; use self::printable::is_printable; diff --git a/src/libcore/unicode/char.rs b/src/libcore/unicode/char.rs index 0e8b09f621a07..e75338aedf145 100644 --- a/src/libcore/unicode/char.rs +++ b/src/libcore/unicode/char.rs @@ -28,31 +28,12 @@ #![stable(feature = "rust1", since = "1.0.0")] +use char::*; use char::CharExt as C; use iter::FusedIterator; use fmt::{self, Write}; use unicode::tables::{conversions, derived_property, general_category, property}; -// stable re-exports -#[stable(feature = "rust1", since = "1.0.0")] -pub use char::{MAX, from_digit, from_u32, from_u32_unchecked}; -#[stable(feature = "rust1", since = "1.0.0")] -pub use char::{EscapeDebug, EscapeDefault, EscapeUnicode}; -#[stable(feature = "decode_utf16", since = "1.9.0")] -pub use char::REPLACEMENT_CHARACTER; -#[stable(feature = "char_from_str", since = "1.20.0")] -pub use char::ParseCharError; - -// unstable re-exports -#[stable(feature = "try_from", since = "1.26.0")] -pub use char::CharTryFromError; -#[unstable(feature = "decode_utf8", issue = "33906")] -pub use char::{DecodeUtf8, decode_utf8}; -#[unstable(feature = "unicode", issue = "27783")] -pub use unicode::tables::{UNICODE_VERSION}; -#[unstable(feature = "unicode", issue = "27783")] -pub use unicode::version::UnicodeVersion; - /// Returns an iterator that yields the lowercase equivalent of a `char`. /// /// This `struct` is created by the [`to_lowercase`] method on [`char`]. See diff --git a/src/libcore/unicode/mod.rs b/src/libcore/unicode/mod.rs index aaf8081799f07..0ea1aa12146f1 100644 --- a/src/libcore/unicode/mod.rs +++ b/src/libcore/unicode/mod.rs @@ -12,11 +12,11 @@ #![allow(missing_docs)] mod bool_trie; -mod tables; -mod version; +pub(crate) mod tables; +pub(crate) mod version; pub mod str; -pub mod char; +pub(crate) mod char; // For use in liballoc, not re-exported in libstd. pub mod derived_property { diff --git a/src/libstd/lib.rs b/src/libstd/lib.rs index 16bca9ddcd328..94e48732c26e8 100644 --- a/src/libstd/lib.rs +++ b/src/libstd/lib.rs @@ -454,7 +454,7 @@ pub use alloc::string; #[stable(feature = "rust1", since = "1.0.0")] pub use alloc::vec; #[stable(feature = "rust1", since = "1.0.0")] -pub use core::unicode::char; +pub use core::char; #[stable(feature = "i128", since = "1.26.0")] pub use core::u128; From 955450212aac9c2babd6cb511974092224fcf93d Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Thu, 5 Apr 2018 18:02:52 +0200 Subject: [PATCH 07/15] Move char decoding iterators into a separate private module. --- src/libcore/char/decode.rs | 259 ++++++++++++++++++++++++++++++++++++ src/libcore/char/mod.rs | 126 +----------------- src/libcore/unicode/char.rs | 129 ------------------ 3 files changed, 265 insertions(+), 249 deletions(-) create mode 100644 src/libcore/char/decode.rs diff --git a/src/libcore/char/decode.rs b/src/libcore/char/decode.rs new file mode 100644 index 0000000000000..48b531104f882 --- /dev/null +++ b/src/libcore/char/decode.rs @@ -0,0 +1,259 @@ +// Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +//! UTF-8 and UTF-16 decoding iterators + +use fmt; +use iter::FusedIterator; +use super::from_u32_unchecked; + +/// An iterator over an iterator of bytes of the characters the bytes represent +/// as UTF-8 +#[unstable(feature = "decode_utf8", issue = "33906")] +#[derive(Clone, Debug)] +pub struct DecodeUtf8>(::iter::Peekable); + +/// Decodes an `Iterator` of bytes as UTF-8. +#[unstable(feature = "decode_utf8", issue = "33906")] +#[inline] +pub fn decode_utf8>(i: I) -> DecodeUtf8 { + DecodeUtf8(i.into_iter().peekable()) +} + +/// `::next` returns this for an invalid input sequence. +#[unstable(feature = "decode_utf8", issue = "33906")] +#[derive(PartialEq, Eq, Debug)] +pub struct InvalidSequence(()); + +#[unstable(feature = "decode_utf8", issue = "33906")] +impl> Iterator for DecodeUtf8 { + type Item = Result; + #[inline] + + fn next(&mut self) -> Option> { + self.0.next().map(|first_byte| { + // Emit InvalidSequence according to + // Unicode §5.22 Best Practice for U+FFFD Substitution + // http://www.unicode.org/versions/Unicode9.0.0/ch05.pdf#G40630 + + // Roughly: consume at least one byte, + // then validate one byte at a time and stop before the first unexpected byte + // (which might be the valid start of the next byte sequence). + + let mut code_point; + macro_rules! first_byte { + ($mask: expr) => { + code_point = u32::from(first_byte & $mask) + } + } + macro_rules! continuation_byte { + () => { continuation_byte!(0x80...0xBF) }; + ($range: pat) => { + match self.0.peek() { + Some(&byte @ $range) => { + code_point = (code_point << 6) | u32::from(byte & 0b0011_1111); + self.0.next(); + } + _ => return Err(InvalidSequence(())) + } + } + } + + match first_byte { + 0x00...0x7F => { + first_byte!(0b1111_1111); + } + 0xC2...0xDF => { + first_byte!(0b0001_1111); + continuation_byte!(); + } + 0xE0 => { + first_byte!(0b0000_1111); + continuation_byte!(0xA0...0xBF); // 0x80...0x9F here are overlong + continuation_byte!(); + } + 0xE1...0xEC | 0xEE...0xEF => { + first_byte!(0b0000_1111); + continuation_byte!(); + continuation_byte!(); + } + 0xED => { + first_byte!(0b0000_1111); + continuation_byte!(0x80...0x9F); // 0xA0..0xBF here are surrogates + continuation_byte!(); + } + 0xF0 => { + first_byte!(0b0000_0111); + continuation_byte!(0x90...0xBF); // 0x80..0x8F here are overlong + continuation_byte!(); + continuation_byte!(); + } + 0xF1...0xF3 => { + first_byte!(0b0000_0111); + continuation_byte!(); + continuation_byte!(); + continuation_byte!(); + } + 0xF4 => { + first_byte!(0b0000_0111); + continuation_byte!(0x80...0x8F); // 0x90..0xBF here are beyond char::MAX + continuation_byte!(); + continuation_byte!(); + } + _ => return Err(InvalidSequence(())) // Illegal first byte, overlong, or beyond MAX + } + unsafe { + Ok(from_u32_unchecked(code_point)) + } + }) + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + let (lower, upper) = self.0.size_hint(); + + // A code point is at most 4 bytes long. + let min_code_points = lower / 4; + + (min_code_points, upper) + } +} + +#[unstable(feature = "decode_utf8", issue = "33906")] +impl> FusedIterator for DecodeUtf8 {} + +/// An iterator that decodes UTF-16 encoded code points from an iterator of `u16`s. +#[stable(feature = "decode_utf16", since = "1.9.0")] +#[derive(Clone, Debug)] +pub struct DecodeUtf16 + where I: Iterator +{ + iter: I, + buf: Option, +} + +/// An error that can be returned when decoding UTF-16 code points. +#[stable(feature = "decode_utf16", since = "1.9.0")] +#[derive(Debug, Clone, Eq, PartialEq)] +pub struct DecodeUtf16Error { + code: u16, +} + +/// Create an iterator over the UTF-16 encoded code points in `iter`, +/// returning unpaired surrogates as `Err`s. +/// +/// # Examples +/// +/// Basic usage: +/// +/// ``` +/// use std::char::decode_utf16; +/// +/// fn main() { +/// // 𝄞music +/// let v = [0xD834, 0xDD1E, 0x006d, 0x0075, +/// 0x0073, 0xDD1E, 0x0069, 0x0063, +/// 0xD834]; +/// +/// assert_eq!(decode_utf16(v.iter().cloned()) +/// .map(|r| r.map_err(|e| e.unpaired_surrogate())) +/// .collect::>(), +/// vec![Ok('𝄞'), +/// Ok('m'), Ok('u'), Ok('s'), +/// Err(0xDD1E), +/// Ok('i'), Ok('c'), +/// Err(0xD834)]); +/// } +/// ``` +/// +/// A lossy decoder can be obtained by replacing `Err` results with the replacement character: +/// +/// ``` +/// use std::char::{decode_utf16, REPLACEMENT_CHARACTER}; +/// +/// fn main() { +/// // 𝄞music +/// let v = [0xD834, 0xDD1E, 0x006d, 0x0075, +/// 0x0073, 0xDD1E, 0x0069, 0x0063, +/// 0xD834]; +/// +/// assert_eq!(decode_utf16(v.iter().cloned()) +/// .map(|r| r.unwrap_or(REPLACEMENT_CHARACTER)) +/// .collect::(), +/// "𝄞mus�ic�"); +/// } +/// ``` +#[stable(feature = "decode_utf16", since = "1.9.0")] +#[inline] +pub fn decode_utf16>(iter: I) -> DecodeUtf16 { + DecodeUtf16 { + iter: iter.into_iter(), + buf: None, + } +} + +#[stable(feature = "decode_utf16", since = "1.9.0")] +impl> Iterator for DecodeUtf16 { + type Item = Result; + + fn next(&mut self) -> Option> { + let u = match self.buf.take() { + Some(buf) => buf, + None => self.iter.next()? + }; + + if u < 0xD800 || 0xDFFF < u { + // not a surrogate + Some(Ok(unsafe { from_u32_unchecked(u as u32) })) + } else if u >= 0xDC00 { + // a trailing surrogate + Some(Err(DecodeUtf16Error { code: u })) + } else { + let u2 = match self.iter.next() { + Some(u2) => u2, + // eof + None => return Some(Err(DecodeUtf16Error { code: u })), + }; + if u2 < 0xDC00 || u2 > 0xDFFF { + // not a trailing surrogate so we're not a valid + // surrogate pair, so rewind to redecode u2 next time. + self.buf = Some(u2); + return Some(Err(DecodeUtf16Error { code: u })); + } + + // all ok, so lets decode it. + let c = (((u - 0xD800) as u32) << 10 | (u2 - 0xDC00) as u32) + 0x1_0000; + Some(Ok(unsafe { from_u32_unchecked(c) })) + } + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + let (low, high) = self.iter.size_hint(); + // we could be entirely valid surrogates (2 elements per + // char), or entirely non-surrogates (1 element per char) + (low / 2, high) + } +} + +impl DecodeUtf16Error { + /// Returns the unpaired surrogate which caused this error. + #[stable(feature = "decode_utf16", since = "1.9.0")] + pub fn unpaired_surrogate(&self) -> u16 { + self.code + } +} + +#[stable(feature = "decode_utf16", since = "1.9.0")] +impl fmt::Display for DecodeUtf16Error { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "unpaired surrogate found: {:x}", self.code) + } +} diff --git a/src/libcore/char/mod.rs b/src/libcore/char/mod.rs index 3efa8396331ef..388bc47750d66 100644 --- a/src/libcore/char/mod.rs +++ b/src/libcore/char/mod.rs @@ -15,19 +15,22 @@ #![allow(non_snake_case)] #![stable(feature = "core_char", since = "1.2.0")] +mod printable; +mod decode; + // stable re-exports #[stable(feature = "rust1", since = "1.0.0")] pub use unicode::char::{ToLowercase, ToUppercase}; #[stable(feature = "decode_utf16", since = "1.9.0")] -pub use unicode::char::{decode_utf16, DecodeUtf16, DecodeUtf16Error}; +pub use self::decode::{decode_utf16, DecodeUtf16, DecodeUtf16Error}; // unstable re-exports #[unstable(feature = "unicode", issue = "27783")] pub use unicode::tables::{UNICODE_VERSION}; #[unstable(feature = "unicode", issue = "27783")] pub use unicode::version::UnicodeVersion; - -mod printable; +#[unstable(feature = "decode_utf8", issue = "33906")] +pub use self::decode::{decode_utf8, DecodeUtf8, InvalidSequence}; use self::printable::is_printable; use convert::TryFrom; @@ -821,120 +824,3 @@ impl fmt::Display for EscapeDebug { fmt::Display::fmt(&self.0, f) } } - - - -/// An iterator over an iterator of bytes of the characters the bytes represent -/// as UTF-8 -#[unstable(feature = "decode_utf8", issue = "33906")] -#[derive(Clone, Debug)] -pub struct DecodeUtf8>(::iter::Peekable); - -/// Decodes an `Iterator` of bytes as UTF-8. -#[unstable(feature = "decode_utf8", issue = "33906")] -#[inline] -pub fn decode_utf8>(i: I) -> DecodeUtf8 { - DecodeUtf8(i.into_iter().peekable()) -} - -/// `::next` returns this for an invalid input sequence. -#[unstable(feature = "decode_utf8", issue = "33906")] -#[derive(PartialEq, Eq, Debug)] -pub struct InvalidSequence(()); - -#[unstable(feature = "decode_utf8", issue = "33906")] -impl> Iterator for DecodeUtf8 { - type Item = Result; - #[inline] - - fn next(&mut self) -> Option> { - self.0.next().map(|first_byte| { - // Emit InvalidSequence according to - // Unicode §5.22 Best Practice for U+FFFD Substitution - // http://www.unicode.org/versions/Unicode9.0.0/ch05.pdf#G40630 - - // Roughly: consume at least one byte, - // then validate one byte at a time and stop before the first unexpected byte - // (which might be the valid start of the next byte sequence). - - let mut code_point; - macro_rules! first_byte { - ($mask: expr) => { - code_point = u32::from(first_byte & $mask) - } - } - macro_rules! continuation_byte { - () => { continuation_byte!(0x80...0xBF) }; - ($range: pat) => { - match self.0.peek() { - Some(&byte @ $range) => { - code_point = (code_point << 6) | u32::from(byte & 0b0011_1111); - self.0.next(); - } - _ => return Err(InvalidSequence(())) - } - } - } - - match first_byte { - 0x00...0x7F => { - first_byte!(0b1111_1111); - } - 0xC2...0xDF => { - first_byte!(0b0001_1111); - continuation_byte!(); - } - 0xE0 => { - first_byte!(0b0000_1111); - continuation_byte!(0xA0...0xBF); // 0x80...0x9F here are overlong - continuation_byte!(); - } - 0xE1...0xEC | 0xEE...0xEF => { - first_byte!(0b0000_1111); - continuation_byte!(); - continuation_byte!(); - } - 0xED => { - first_byte!(0b0000_1111); - continuation_byte!(0x80...0x9F); // 0xA0..0xBF here are surrogates - continuation_byte!(); - } - 0xF0 => { - first_byte!(0b0000_0111); - continuation_byte!(0x90...0xBF); // 0x80..0x8F here are overlong - continuation_byte!(); - continuation_byte!(); - } - 0xF1...0xF3 => { - first_byte!(0b0000_0111); - continuation_byte!(); - continuation_byte!(); - continuation_byte!(); - } - 0xF4 => { - first_byte!(0b0000_0111); - continuation_byte!(0x80...0x8F); // 0x90..0xBF here are beyond char::MAX - continuation_byte!(); - continuation_byte!(); - } - _ => return Err(InvalidSequence(())) // Illegal first byte, overlong, or beyond MAX - } - unsafe { - Ok(from_u32_unchecked(code_point)) - } - }) - } - - #[inline] - fn size_hint(&self) -> (usize, Option) { - let (lower, upper) = self.0.size_hint(); - - // A code point is at most 4 bytes long. - let min_code_points = lower / 4; - - (min_code_points, upper) - } -} - -#[unstable(feature = "decode_utf8", issue = "33906")] -impl> FusedIterator for DecodeUtf8 {} diff --git a/src/libcore/unicode/char.rs b/src/libcore/unicode/char.rs index e75338aedf145..fda1914a50f1b 100644 --- a/src/libcore/unicode/char.rs +++ b/src/libcore/unicode/char.rs @@ -1435,132 +1435,3 @@ impl char { self.is_ascii() && (*self as u8).is_ascii_control() } } - -/// An iterator that decodes UTF-16 encoded code points from an iterator of `u16`s. -#[stable(feature = "decode_utf16", since = "1.9.0")] -#[derive(Clone, Debug)] -pub struct DecodeUtf16 - where I: Iterator -{ - iter: I, - buf: Option, -} - -/// An error that can be returned when decoding UTF-16 code points. -#[stable(feature = "decode_utf16", since = "1.9.0")] -#[derive(Debug, Clone, Eq, PartialEq)] -pub struct DecodeUtf16Error { - code: u16, -} - -/// Create an iterator over the UTF-16 encoded code points in `iter`, -/// returning unpaired surrogates as `Err`s. -/// -/// # Examples -/// -/// Basic usage: -/// -/// ``` -/// use std::char::decode_utf16; -/// -/// fn main() { -/// // 𝄞music -/// let v = [0xD834, 0xDD1E, 0x006d, 0x0075, -/// 0x0073, 0xDD1E, 0x0069, 0x0063, -/// 0xD834]; -/// -/// assert_eq!(decode_utf16(v.iter().cloned()) -/// .map(|r| r.map_err(|e| e.unpaired_surrogate())) -/// .collect::>(), -/// vec![Ok('𝄞'), -/// Ok('m'), Ok('u'), Ok('s'), -/// Err(0xDD1E), -/// Ok('i'), Ok('c'), -/// Err(0xD834)]); -/// } -/// ``` -/// -/// A lossy decoder can be obtained by replacing `Err` results with the replacement character: -/// -/// ``` -/// use std::char::{decode_utf16, REPLACEMENT_CHARACTER}; -/// -/// fn main() { -/// // 𝄞music -/// let v = [0xD834, 0xDD1E, 0x006d, 0x0075, -/// 0x0073, 0xDD1E, 0x0069, 0x0063, -/// 0xD834]; -/// -/// assert_eq!(decode_utf16(v.iter().cloned()) -/// .map(|r| r.unwrap_or(REPLACEMENT_CHARACTER)) -/// .collect::(), -/// "𝄞mus�ic�"); -/// } -/// ``` -#[stable(feature = "decode_utf16", since = "1.9.0")] -#[inline] -pub fn decode_utf16>(iter: I) -> DecodeUtf16 { - DecodeUtf16 { - iter: iter.into_iter(), - buf: None, - } -} - -#[stable(feature = "decode_utf16", since = "1.9.0")] -impl> Iterator for DecodeUtf16 { - type Item = Result; - - fn next(&mut self) -> Option> { - let u = match self.buf.take() { - Some(buf) => buf, - None => self.iter.next()? - }; - - if u < 0xD800 || 0xDFFF < u { - // not a surrogate - Some(Ok(unsafe { from_u32_unchecked(u as u32) })) - } else if u >= 0xDC00 { - // a trailing surrogate - Some(Err(DecodeUtf16Error { code: u })) - } else { - let u2 = match self.iter.next() { - Some(u2) => u2, - // eof - None => return Some(Err(DecodeUtf16Error { code: u })), - }; - if u2 < 0xDC00 || u2 > 0xDFFF { - // not a trailing surrogate so we're not a valid - // surrogate pair, so rewind to redecode u2 next time. - self.buf = Some(u2); - return Some(Err(DecodeUtf16Error { code: u })); - } - - // all ok, so lets decode it. - let c = (((u - 0xD800) as u32) << 10 | (u2 - 0xDC00) as u32) + 0x1_0000; - Some(Ok(unsafe { from_u32_unchecked(c) })) - } - } - - #[inline] - fn size_hint(&self) -> (usize, Option) { - let (low, high) = self.iter.size_hint(); - // we could be entirely valid surrogates (2 elements per - // char), or entirely non-surrogates (1 element per char) - (low / 2, high) - } -} - -impl DecodeUtf16Error { - /// Returns the unpaired surrogate which caused this error. - #[stable(feature = "decode_utf16", since = "1.9.0")] - pub fn unpaired_surrogate(&self) -> u16 { - self.code - } -} - -#[stable(feature = "decode_utf16", since = "1.9.0")] -impl fmt::Display for DecodeUtf16Error { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "unpaired surrogate found: {:x}", self.code) - } -} From 1800d695b9bd2c256f2d081da07a94e7a6cba832 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Thu, 5 Apr 2018 18:26:58 +0200 Subject: [PATCH 08/15] Move char conversions into a separate private module. --- src/libcore/char/convert.rs | 304 ++++++++++++++++++++++++++++++++++++ src/libcore/char/mod.rs | 301 ++--------------------------------- 2 files changed, 315 insertions(+), 290 deletions(-) create mode 100644 src/libcore/char/convert.rs diff --git a/src/libcore/char/convert.rs b/src/libcore/char/convert.rs new file mode 100644 index 0000000000000..150562a4a9b29 --- /dev/null +++ b/src/libcore/char/convert.rs @@ -0,0 +1,304 @@ +// Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +//! Character conversions. + +use convert::TryFrom; +use fmt; +use mem::transmute; +use str::FromStr; +use super::MAX; + +/// Converts a `u32` to a `char`. +/// +/// Note that all [`char`]s are valid [`u32`]s, and can be cast to one with +/// [`as`]: +/// +/// ``` +/// let c = '💯'; +/// let i = c as u32; +/// +/// assert_eq!(128175, i); +/// ``` +/// +/// However, the reverse is not true: not all valid [`u32`]s are valid +/// [`char`]s. `from_u32()` will return `None` if the input is not a valid value +/// for a [`char`]. +/// +/// [`char`]: ../../std/primitive.char.html +/// [`u32`]: ../../std/primitive.u32.html +/// [`as`]: ../../book/first-edition/casting-between-types.html#as +/// +/// For an unsafe version of this function which ignores these checks, see +/// [`from_u32_unchecked`]. +/// +/// [`from_u32_unchecked`]: fn.from_u32_unchecked.html +/// +/// # Examples +/// +/// Basic usage: +/// +/// ``` +/// use std::char; +/// +/// let c = char::from_u32(0x2764); +/// +/// assert_eq!(Some('❤'), c); +/// ``` +/// +/// Returning `None` when the input is not a valid [`char`]: +/// +/// ``` +/// use std::char; +/// +/// let c = char::from_u32(0x110000); +/// +/// assert_eq!(None, c); +/// ``` +#[inline] +#[stable(feature = "rust1", since = "1.0.0")] +pub fn from_u32(i: u32) -> Option { + char::try_from(i).ok() +} + +/// Converts a `u32` to a `char`, ignoring validity. +/// +/// Note that all [`char`]s are valid [`u32`]s, and can be cast to one with +/// [`as`]: +/// +/// ``` +/// let c = '💯'; +/// let i = c as u32; +/// +/// assert_eq!(128175, i); +/// ``` +/// +/// However, the reverse is not true: not all valid [`u32`]s are valid +/// [`char`]s. `from_u32_unchecked()` will ignore this, and blindly cast to +/// [`char`], possibly creating an invalid one. +/// +/// [`char`]: ../../std/primitive.char.html +/// [`u32`]: ../../std/primitive.u32.html +/// [`as`]: ../../book/first-edition/casting-between-types.html#as +/// +/// # Safety +/// +/// This function is unsafe, as it may construct invalid `char` values. +/// +/// For a safe version of this function, see the [`from_u32`] function. +/// +/// [`from_u32`]: fn.from_u32.html +/// +/// # Examples +/// +/// Basic usage: +/// +/// ``` +/// use std::char; +/// +/// let c = unsafe { char::from_u32_unchecked(0x2764) }; +/// +/// assert_eq!('❤', c); +/// ``` +#[inline] +#[stable(feature = "char_from_unchecked", since = "1.5.0")] +pub unsafe fn from_u32_unchecked(i: u32) -> char { + transmute(i) +} + +#[stable(feature = "char_convert", since = "1.13.0")] +impl From for u32 { + #[inline] + fn from(c: char) -> Self { + c as u32 + } +} + +/// Maps a byte in 0x00...0xFF to a `char` whose code point has the same value, in U+0000 to U+00FF. +/// +/// Unicode is designed such that this effectively decodes bytes +/// with the character encoding that IANA calls ISO-8859-1. +/// This encoding is compatible with ASCII. +/// +/// Note that this is different from ISO/IEC 8859-1 a.k.a. ISO 8859-1 (with one less hyphen), +/// which leaves some "blanks", byte values that are not assigned to any character. +/// ISO-8859-1 (the IANA one) assigns them to the C0 and C1 control codes. +/// +/// Note that this is *also* different from Windows-1252 a.k.a. code page 1252, +/// which is a superset ISO/IEC 8859-1 that assigns some (not all!) blanks +/// to punctuation and various Latin characters. +/// +/// To confuse things further, [on the Web](https://encoding.spec.whatwg.org/) +/// `ascii`, `iso-8859-1`, and `windows-1252` are all aliases +/// for a superset of Windows-1252 that fills the remaining blanks with corresponding +/// C0 and C1 control codes. +#[stable(feature = "char_convert", since = "1.13.0")] +impl From for char { + #[inline] + fn from(i: u8) -> Self { + i as char + } +} + + +/// An error which can be returned when parsing a char. +#[stable(feature = "char_from_str", since = "1.20.0")] +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct ParseCharError { + kind: CharErrorKind, +} + +impl ParseCharError { + #[unstable(feature = "char_error_internals", + reason = "this method should not be available publicly", + issue = "0")] + #[doc(hidden)] + pub fn __description(&self) -> &str { + match self.kind { + CharErrorKind::EmptyString => { + "cannot parse char from empty string" + }, + CharErrorKind::TooManyChars => "too many characters in string" + } + } +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +enum CharErrorKind { + EmptyString, + TooManyChars, +} + +#[stable(feature = "char_from_str", since = "1.20.0")] +impl fmt::Display for ParseCharError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + self.__description().fmt(f) + } +} + + +#[stable(feature = "char_from_str", since = "1.20.0")] +impl FromStr for char { + type Err = ParseCharError; + + #[inline] + fn from_str(s: &str) -> Result { + let mut chars = s.chars(); + match (chars.next(), chars.next()) { + (None, _) => { + Err(ParseCharError { kind: CharErrorKind::EmptyString }) + }, + (Some(c), None) => Ok(c), + _ => { + Err(ParseCharError { kind: CharErrorKind::TooManyChars }) + } + } + } +} + + +#[stable(feature = "try_from", since = "1.26.0")] +impl TryFrom for char { + type Error = CharTryFromError; + + #[inline] + fn try_from(i: u32) -> Result { + if (i > MAX as u32) || (i >= 0xD800 && i <= 0xDFFF) { + Err(CharTryFromError(())) + } else { + Ok(unsafe { from_u32_unchecked(i) }) + } + } +} + +/// The error type returned when a conversion from u32 to char fails. +#[stable(feature = "try_from", since = "1.26.0")] +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub struct CharTryFromError(()); + +#[stable(feature = "try_from", since = "1.26.0")] +impl fmt::Display for CharTryFromError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + "converted integer out of range for `char`".fmt(f) + } +} + +/// Converts a digit in the given radix to a `char`. +/// +/// A 'radix' here is sometimes also called a 'base'. A radix of two +/// indicates a binary number, a radix of ten, decimal, and a radix of +/// sixteen, hexadecimal, to give some common values. Arbitrary +/// radices are supported. +/// +/// `from_digit()` will return `None` if the input is not a digit in +/// the given radix. +/// +/// # Panics +/// +/// Panics if given a radix larger than 36. +/// +/// # Examples +/// +/// Basic usage: +/// +/// ``` +/// use std::char; +/// +/// let c = char::from_digit(4, 10); +/// +/// assert_eq!(Some('4'), c); +/// +/// // Decimal 11 is a single digit in base 16 +/// let c = char::from_digit(11, 16); +/// +/// assert_eq!(Some('b'), c); +/// ``` +/// +/// Returning `None` when the input is not a digit: +/// +/// ``` +/// use std::char; +/// +/// let c = char::from_digit(20, 10); +/// +/// assert_eq!(None, c); +/// ``` +/// +/// Passing a large radix, causing a panic: +/// +/// ``` +/// use std::thread; +/// use std::char; +/// +/// let result = thread::spawn(|| { +/// // this panics +/// let c = char::from_digit(1, 37); +/// }).join(); +/// +/// assert!(result.is_err()); +/// ``` +#[inline] +#[stable(feature = "rust1", since = "1.0.0")] +pub fn from_digit(num: u32, radix: u32) -> Option { + if radix > 36 { + panic!("from_digit: radix is too high (maximum 36)"); + } + if num < radix { + let num = num as u8; + if num < 10 { + Some((b'0' + num) as char) + } else { + Some((b'a' + num - 10) as char) + } + } else { + None + } +} + diff --git a/src/libcore/char/mod.rs b/src/libcore/char/mod.rs index 388bc47750d66..01a7b49ac7446 100644 --- a/src/libcore/char/mod.rs +++ b/src/libcore/char/mod.rs @@ -15,11 +15,20 @@ #![allow(non_snake_case)] #![stable(feature = "core_char", since = "1.2.0")] -mod printable; +mod convert; mod decode; +mod printable; // stable re-exports #[stable(feature = "rust1", since = "1.0.0")] +pub use self::convert::{from_u32, from_digit}; +#[stable(feature = "char_from_unchecked", since = "1.5.0")] +pub use self::convert::from_u32_unchecked; +#[stable(feature = "char_from_str", since = "1.20.0")] +pub use self::convert::ParseCharError; +#[stable(feature = "try_from", since = "1.26.0")] +pub use self::convert::CharTryFromError; +#[stable(feature = "rust1", since = "1.0.0")] pub use unicode::char::{ToLowercase, ToUppercase}; #[stable(feature = "decode_utf16", since = "1.9.0")] pub use self::decode::{decode_utf16, DecodeUtf16, DecodeUtf16Error}; @@ -33,12 +42,10 @@ pub use unicode::version::UnicodeVersion; pub use self::decode::{decode_utf8, DecodeUtf8, InvalidSequence}; use self::printable::is_printable; -use convert::TryFrom; use fmt::{self, Write}; use slice; -use str::{from_utf8_unchecked_mut, FromStr}; +use str::from_utf8_unchecked_mut; use iter::FusedIterator; -use mem::transmute; // UTF-8 ranges and tags for encoding characters const TAG_CONT: u8 = 0b1000_0000; @@ -102,292 +109,6 @@ pub const MAX: char = '\u{10ffff}'; #[stable(feature = "decode_utf16", since = "1.9.0")] pub const REPLACEMENT_CHARACTER: char = '\u{FFFD}'; -/// Converts a `u32` to a `char`. -/// -/// Note that all [`char`]s are valid [`u32`]s, and can be cast to one with -/// [`as`]: -/// -/// ``` -/// let c = '💯'; -/// let i = c as u32; -/// -/// assert_eq!(128175, i); -/// ``` -/// -/// However, the reverse is not true: not all valid [`u32`]s are valid -/// [`char`]s. `from_u32()` will return `None` if the input is not a valid value -/// for a [`char`]. -/// -/// [`char`]: ../../std/primitive.char.html -/// [`u32`]: ../../std/primitive.u32.html -/// [`as`]: ../../book/first-edition/casting-between-types.html#as -/// -/// For an unsafe version of this function which ignores these checks, see -/// [`from_u32_unchecked`]. -/// -/// [`from_u32_unchecked`]: fn.from_u32_unchecked.html -/// -/// # Examples -/// -/// Basic usage: -/// -/// ``` -/// use std::char; -/// -/// let c = char::from_u32(0x2764); -/// -/// assert_eq!(Some('❤'), c); -/// ``` -/// -/// Returning `None` when the input is not a valid [`char`]: -/// -/// ``` -/// use std::char; -/// -/// let c = char::from_u32(0x110000); -/// -/// assert_eq!(None, c); -/// ``` -#[inline] -#[stable(feature = "rust1", since = "1.0.0")] -pub fn from_u32(i: u32) -> Option { - char::try_from(i).ok() -} - -/// Converts a `u32` to a `char`, ignoring validity. -/// -/// Note that all [`char`]s are valid [`u32`]s, and can be cast to one with -/// [`as`]: -/// -/// ``` -/// let c = '💯'; -/// let i = c as u32; -/// -/// assert_eq!(128175, i); -/// ``` -/// -/// However, the reverse is not true: not all valid [`u32`]s are valid -/// [`char`]s. `from_u32_unchecked()` will ignore this, and blindly cast to -/// [`char`], possibly creating an invalid one. -/// -/// [`char`]: ../../std/primitive.char.html -/// [`u32`]: ../../std/primitive.u32.html -/// [`as`]: ../../book/first-edition/casting-between-types.html#as -/// -/// # Safety -/// -/// This function is unsafe, as it may construct invalid `char` values. -/// -/// For a safe version of this function, see the [`from_u32`] function. -/// -/// [`from_u32`]: fn.from_u32.html -/// -/// # Examples -/// -/// Basic usage: -/// -/// ``` -/// use std::char; -/// -/// let c = unsafe { char::from_u32_unchecked(0x2764) }; -/// -/// assert_eq!('❤', c); -/// ``` -#[inline] -#[stable(feature = "char_from_unchecked", since = "1.5.0")] -pub unsafe fn from_u32_unchecked(i: u32) -> char { - transmute(i) -} - -#[stable(feature = "char_convert", since = "1.13.0")] -impl From for u32 { - #[inline] - fn from(c: char) -> Self { - c as u32 - } -} - -/// Maps a byte in 0x00...0xFF to a `char` whose code point has the same value, in U+0000 to U+00FF. -/// -/// Unicode is designed such that this effectively decodes bytes -/// with the character encoding that IANA calls ISO-8859-1. -/// This encoding is compatible with ASCII. -/// -/// Note that this is different from ISO/IEC 8859-1 a.k.a. ISO 8859-1 (with one less hyphen), -/// which leaves some "blanks", byte values that are not assigned to any character. -/// ISO-8859-1 (the IANA one) assigns them to the C0 and C1 control codes. -/// -/// Note that this is *also* different from Windows-1252 a.k.a. code page 1252, -/// which is a superset ISO/IEC 8859-1 that assigns some (not all!) blanks -/// to punctuation and various Latin characters. -/// -/// To confuse things further, [on the Web](https://encoding.spec.whatwg.org/) -/// `ascii`, `iso-8859-1`, and `windows-1252` are all aliases -/// for a superset of Windows-1252 that fills the remaining blanks with corresponding -/// C0 and C1 control codes. -#[stable(feature = "char_convert", since = "1.13.0")] -impl From for char { - #[inline] - fn from(i: u8) -> Self { - i as char - } -} - - -/// An error which can be returned when parsing a char. -#[stable(feature = "char_from_str", since = "1.20.0")] -#[derive(Clone, Debug, PartialEq, Eq)] -pub struct ParseCharError { - kind: CharErrorKind, -} - -impl ParseCharError { - #[unstable(feature = "char_error_internals", - reason = "this method should not be available publicly", - issue = "0")] - #[doc(hidden)] - pub fn __description(&self) -> &str { - match self.kind { - CharErrorKind::EmptyString => { - "cannot parse char from empty string" - }, - CharErrorKind::TooManyChars => "too many characters in string" - } - } -} - -#[derive(Copy, Clone, Debug, PartialEq, Eq)] -enum CharErrorKind { - EmptyString, - TooManyChars, -} - -#[stable(feature = "char_from_str", since = "1.20.0")] -impl fmt::Display for ParseCharError { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - self.__description().fmt(f) - } -} - - -#[stable(feature = "char_from_str", since = "1.20.0")] -impl FromStr for char { - type Err = ParseCharError; - - #[inline] - fn from_str(s: &str) -> Result { - let mut chars = s.chars(); - match (chars.next(), chars.next()) { - (None, _) => { - Err(ParseCharError { kind: CharErrorKind::EmptyString }) - }, - (Some(c), None) => Ok(c), - _ => { - Err(ParseCharError { kind: CharErrorKind::TooManyChars }) - } - } - } -} - - -#[stable(feature = "try_from", since = "1.26.0")] -impl TryFrom for char { - type Error = CharTryFromError; - - #[inline] - fn try_from(i: u32) -> Result { - if (i > MAX as u32) || (i >= 0xD800 && i <= 0xDFFF) { - Err(CharTryFromError(())) - } else { - Ok(unsafe { from_u32_unchecked(i) }) - } - } -} - -/// The error type returned when a conversion from u32 to char fails. -#[stable(feature = "try_from", since = "1.26.0")] -#[derive(Copy, Clone, Debug, PartialEq, Eq)] -pub struct CharTryFromError(()); - -#[stable(feature = "try_from", since = "1.26.0")] -impl fmt::Display for CharTryFromError { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - "converted integer out of range for `char`".fmt(f) - } -} - -/// Converts a digit in the given radix to a `char`. -/// -/// A 'radix' here is sometimes also called a 'base'. A radix of two -/// indicates a binary number, a radix of ten, decimal, and a radix of -/// sixteen, hexadecimal, to give some common values. Arbitrary -/// radices are supported. -/// -/// `from_digit()` will return `None` if the input is not a digit in -/// the given radix. -/// -/// # Panics -/// -/// Panics if given a radix larger than 36. -/// -/// # Examples -/// -/// Basic usage: -/// -/// ``` -/// use std::char; -/// -/// let c = char::from_digit(4, 10); -/// -/// assert_eq!(Some('4'), c); -/// -/// // Decimal 11 is a single digit in base 16 -/// let c = char::from_digit(11, 16); -/// -/// assert_eq!(Some('b'), c); -/// ``` -/// -/// Returning `None` when the input is not a digit: -/// -/// ``` -/// use std::char; -/// -/// let c = char::from_digit(20, 10); -/// -/// assert_eq!(None, c); -/// ``` -/// -/// Passing a large radix, causing a panic: -/// -/// ``` -/// use std::thread; -/// use std::char; -/// -/// let result = thread::spawn(|| { -/// // this panics -/// let c = char::from_digit(1, 37); -/// }).join(); -/// -/// assert!(result.is_err()); -/// ``` -#[inline] -#[stable(feature = "rust1", since = "1.0.0")] -pub fn from_digit(num: u32, radix: u32) -> Option { - if radix > 36 { - panic!("from_digit: radix is too high (maximum 36)"); - } - if num < radix { - let num = num as u8; - if num < 10 { - Some((b'0' + num) as char) - } else { - Some((b'a' + num - 10) as char) - } - } else { - None - } -} - // NB: the stabilization and documentation for this trait is in // unicode/char.rs, not here #[allow(missing_docs)] // docs in libunicode/u_char.rs From 34c52534f72f035b898efe3b86028741576f1499 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Thu, 5 Apr 2018 18:36:02 +0200 Subject: [PATCH 09/15] Move the rest of core::unicode::char to core::unicode --- .../{unicode/char.rs => char/methods.rs} | 251 +++++++-------- src/libcore/char/mod.rs | 288 +++++++++--------- src/libcore/unicode/mod.rs | 1 - 3 files changed, 266 insertions(+), 274 deletions(-) rename src/libcore/{unicode/char.rs => char/methods.rs} (88%) diff --git a/src/libcore/unicode/char.rs b/src/libcore/char/methods.rs similarity index 88% rename from src/libcore/unicode/char.rs rename to src/libcore/char/methods.rs index fda1914a50f1b..0958c67ea05d9 100644 --- a/src/libcore/unicode/char.rs +++ b/src/libcore/char/methods.rs @@ -8,150 +8,155 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -//! A character type. -//! -//! The `char` type represents a single character. More specifically, since -//! 'character' isn't a well-defined concept in Unicode, `char` is a '[Unicode -//! scalar value]', which is similar to, but not the same as, a '[Unicode code -//! point]'. -//! -//! [Unicode scalar value]: http://www.unicode.org/glossary/#unicode_scalar_value -//! [Unicode code point]: http://www.unicode.org/glossary/#code_point -//! -//! This module exists for technical reasons, the primary documentation for -//! `char` is directly on [the `char` primitive type](../../std/primitive.char.html) -//! itself. -//! -//! This module is the home of the iterator implementations for the iterators -//! implemented on `char`, as well as some useful constants and conversion -//! functions that convert various types to `char`. +//! impl char {} -#![stable(feature = "rust1", since = "1.0.0")] - -use char::*; -use char::CharExt as C; -use iter::FusedIterator; -use fmt::{self, Write}; +use slice; +use str::from_utf8_unchecked_mut; +use super::*; +use super::CharExt as C; +use super::printable::is_printable; use unicode::tables::{conversions, derived_property, general_category, property}; -/// Returns an iterator that yields the lowercase equivalent of a `char`. -/// -/// This `struct` is created by the [`to_lowercase`] method on [`char`]. See -/// its documentation for more. -/// -/// [`to_lowercase`]: ../../std/primitive.char.html#method.to_lowercase -/// [`char`]: ../../std/primitive.char.html -#[stable(feature = "rust1", since = "1.0.0")] -#[derive(Debug, Clone)] -pub struct ToLowercase(CaseMappingIter); +#[stable(feature = "core", since = "1.6.0")] +impl CharExt for char { + #[inline] + fn is_digit(self, radix: u32) -> bool { + self.to_digit(radix).is_some() + } -#[stable(feature = "rust1", since = "1.0.0")] -impl Iterator for ToLowercase { - type Item = char; - fn next(&mut self) -> Option { - self.0.next() + #[inline] + fn to_digit(self, radix: u32) -> Option { + if radix > 36 { + panic!("to_digit: radix is too high (maximum 36)"); + } + let val = match self { + '0' ... '9' => self as u32 - '0' as u32, + 'a' ... 'z' => self as u32 - 'a' as u32 + 10, + 'A' ... 'Z' => self as u32 - 'A' as u32 + 10, + _ => return None, + }; + if val < radix { Some(val) } + else { None } } -} -#[stable(feature = "fused", since = "1.26.0")] -impl FusedIterator for ToLowercase {} + #[inline] + fn escape_unicode(self) -> EscapeUnicode { + let c = self as u32; -/// Returns an iterator that yields the uppercase equivalent of a `char`. -/// -/// This `struct` is created by the [`to_uppercase`] method on [`char`]. See -/// its documentation for more. -/// -/// [`to_uppercase`]: ../../std/primitive.char.html#method.to_uppercase -/// [`char`]: ../../std/primitive.char.html -#[stable(feature = "rust1", since = "1.0.0")] -#[derive(Debug, Clone)] -pub struct ToUppercase(CaseMappingIter); + // or-ing 1 ensures that for c==0 the code computes that one + // digit should be printed and (which is the same) avoids the + // (31 - 32) underflow + let msb = 31 - (c | 1).leading_zeros(); -#[stable(feature = "rust1", since = "1.0.0")] -impl Iterator for ToUppercase { - type Item = char; - fn next(&mut self) -> Option { - self.0.next() + // the index of the most significant hex digit + let ms_hex_digit = msb / 4; + EscapeUnicode { + c: self, + state: EscapeUnicodeState::Backslash, + hex_digit_idx: ms_hex_digit as usize, + } } -} -#[stable(feature = "fused", since = "1.26.0")] -impl FusedIterator for ToUppercase {} + #[inline] + fn escape_default(self) -> EscapeDefault { + let init_state = match self { + '\t' => EscapeDefaultState::Backslash('t'), + '\r' => EscapeDefaultState::Backslash('r'), + '\n' => EscapeDefaultState::Backslash('n'), + '\\' | '\'' | '"' => EscapeDefaultState::Backslash(self), + '\x20' ... '\x7e' => EscapeDefaultState::Char(self), + _ => EscapeDefaultState::Unicode(self.escape_unicode()) + }; + EscapeDefault { state: init_state } + } -#[derive(Debug, Clone)] -enum CaseMappingIter { - Three(char, char, char), - Two(char, char), - One(char), - Zero, -} + #[inline] + fn escape_debug(self) -> EscapeDebug { + let init_state = match self { + '\t' => EscapeDefaultState::Backslash('t'), + '\r' => EscapeDefaultState::Backslash('r'), + '\n' => EscapeDefaultState::Backslash('n'), + '\\' | '\'' | '"' => EscapeDefaultState::Backslash(self), + c if is_printable(c) => EscapeDefaultState::Char(c), + c => EscapeDefaultState::Unicode(c.escape_unicode()), + }; + EscapeDebug(EscapeDefault { state: init_state }) + } -impl CaseMappingIter { - fn new(chars: [char; 3]) -> CaseMappingIter { - if chars[2] == '\0' { - if chars[1] == '\0' { - CaseMappingIter::One(chars[0]) // Including if chars[0] == '\0' - } else { - CaseMappingIter::Two(chars[0], chars[1]) - } + #[inline] + fn len_utf8(self) -> usize { + let code = self as u32; + if code < MAX_ONE_B { + 1 + } else if code < MAX_TWO_B { + 2 + } else if code < MAX_THREE_B { + 3 } else { - CaseMappingIter::Three(chars[0], chars[1], chars[2]) + 4 } } -} -impl Iterator for CaseMappingIter { - type Item = char; - fn next(&mut self) -> Option { - match *self { - CaseMappingIter::Three(a, b, c) => { - *self = CaseMappingIter::Two(b, c); - Some(a) - } - CaseMappingIter::Two(b, c) => { - *self = CaseMappingIter::One(c); - Some(b) - } - CaseMappingIter::One(c) => { - *self = CaseMappingIter::Zero; - Some(c) - } - CaseMappingIter::Zero => None, - } + #[inline] + fn len_utf16(self) -> usize { + let ch = self as u32; + if (ch & 0xFFFF) == ch { 1 } else { 2 } } -} -impl fmt::Display for CaseMappingIter { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match *self { - CaseMappingIter::Three(a, b, c) => { - f.write_char(a)?; - f.write_char(b)?; - f.write_char(c) - } - CaseMappingIter::Two(b, c) => { - f.write_char(b)?; - f.write_char(c) - } - CaseMappingIter::One(c) => { - f.write_char(c) - } - CaseMappingIter::Zero => Ok(()), + #[inline] + fn encode_utf8(self, dst: &mut [u8]) -> &mut str { + let code = self as u32; + unsafe { + let len = + if code < MAX_ONE_B && !dst.is_empty() { + *dst.get_unchecked_mut(0) = code as u8; + 1 + } else if code < MAX_TWO_B && dst.len() >= 2 { + *dst.get_unchecked_mut(0) = (code >> 6 & 0x1F) as u8 | TAG_TWO_B; + *dst.get_unchecked_mut(1) = (code & 0x3F) as u8 | TAG_CONT; + 2 + } else if code < MAX_THREE_B && dst.len() >= 3 { + *dst.get_unchecked_mut(0) = (code >> 12 & 0x0F) as u8 | TAG_THREE_B; + *dst.get_unchecked_mut(1) = (code >> 6 & 0x3F) as u8 | TAG_CONT; + *dst.get_unchecked_mut(2) = (code & 0x3F) as u8 | TAG_CONT; + 3 + } else if dst.len() >= 4 { + *dst.get_unchecked_mut(0) = (code >> 18 & 0x07) as u8 | TAG_FOUR_B; + *dst.get_unchecked_mut(1) = (code >> 12 & 0x3F) as u8 | TAG_CONT; + *dst.get_unchecked_mut(2) = (code >> 6 & 0x3F) as u8 | TAG_CONT; + *dst.get_unchecked_mut(3) = (code & 0x3F) as u8 | TAG_CONT; + 4 + } else { + panic!("encode_utf8: need {} bytes to encode U+{:X}, but the buffer has {}", + from_u32_unchecked(code).len_utf8(), + code, + dst.len()) + }; + from_utf8_unchecked_mut(dst.get_unchecked_mut(..len)) } } -} - -#[stable(feature = "char_struct_display", since = "1.16.0")] -impl fmt::Display for ToLowercase { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - fmt::Display::fmt(&self.0, f) - } -} -#[stable(feature = "char_struct_display", since = "1.16.0")] -impl fmt::Display for ToUppercase { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - fmt::Display::fmt(&self.0, f) + #[inline] + fn encode_utf16(self, dst: &mut [u16]) -> &mut [u16] { + let mut code = self as u32; + unsafe { + if (code & 0xFFFF) == code && !dst.is_empty() { + // The BMP falls through (assuming non-surrogate, as it should) + *dst.get_unchecked_mut(0) = code as u16; + slice::from_raw_parts_mut(dst.as_mut_ptr(), 1) + } else if dst.len() >= 2 { + // Supplementary planes break into surrogates. + code -= 0x1_0000; + *dst.get_unchecked_mut(0) = 0xD800 | ((code >> 10) as u16); + *dst.get_unchecked_mut(1) = 0xDC00 | ((code as u16) & 0x3FF); + slice::from_raw_parts_mut(dst.as_mut_ptr(), 2) + } else { + panic!("encode_utf16: need {} units to encode U+{:X}, but the buffer has {}", + from_u32_unchecked(code).len_utf16(), + code, + dst.len()) + } + } } } diff --git a/src/libcore/char/mod.rs b/src/libcore/char/mod.rs index 01a7b49ac7446..7b4f0dc454882 100644 --- a/src/libcore/char/mod.rs +++ b/src/libcore/char/mod.rs @@ -8,15 +8,30 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -//! Character manipulation. +//! A character type. //! -//! For more details, see ::core::unicode::char (a.k.a. std::char) +//! The `char` type represents a single character. More specifically, since +//! 'character' isn't a well-defined concept in Unicode, `char` is a '[Unicode +//! scalar value]', which is similar to, but not the same as, a '[Unicode code +//! point]'. +//! +//! [Unicode scalar value]: http://www.unicode.org/glossary/#unicode_scalar_value +//! [Unicode code point]: http://www.unicode.org/glossary/#code_point +//! +//! This module exists for technical reasons, the primary documentation for +//! `char` is directly on [the `char` primitive type](../../std/primitive.char.html) +//! itself. +//! +//! This module is the home of the iterator implementations for the iterators +//! implemented on `char`, as well as some useful constants and conversion +//! functions that convert various types to `char`. #![allow(non_snake_case)] #![stable(feature = "core_char", since = "1.2.0")] mod convert; mod decode; +mod methods; mod printable; // stable re-exports @@ -28,8 +43,6 @@ pub use self::convert::from_u32_unchecked; pub use self::convert::ParseCharError; #[stable(feature = "try_from", since = "1.26.0")] pub use self::convert::CharTryFromError; -#[stable(feature = "rust1", since = "1.0.0")] -pub use unicode::char::{ToLowercase, ToUppercase}; #[stable(feature = "decode_utf16", since = "1.9.0")] pub use self::decode::{decode_utf16, DecodeUtf16, DecodeUtf16Error}; @@ -41,10 +54,7 @@ pub use unicode::version::UnicodeVersion; #[unstable(feature = "decode_utf8", issue = "33906")] pub use self::decode::{decode_utf8, DecodeUtf8, InvalidSequence}; -use self::printable::is_printable; use fmt::{self, Write}; -use slice; -use str::from_utf8_unchecked_mut; use iter::FusedIterator; // UTF-8 ranges and tags for encoding characters @@ -137,149 +147,6 @@ pub trait CharExt { fn encode_utf16(self, dst: &mut [u16]) -> &mut [u16]; } -#[stable(feature = "core", since = "1.6.0")] -impl CharExt for char { - #[inline] - fn is_digit(self, radix: u32) -> bool { - self.to_digit(radix).is_some() - } - - #[inline] - fn to_digit(self, radix: u32) -> Option { - if radix > 36 { - panic!("to_digit: radix is too high (maximum 36)"); - } - let val = match self { - '0' ... '9' => self as u32 - '0' as u32, - 'a' ... 'z' => self as u32 - 'a' as u32 + 10, - 'A' ... 'Z' => self as u32 - 'A' as u32 + 10, - _ => return None, - }; - if val < radix { Some(val) } - else { None } - } - - #[inline] - fn escape_unicode(self) -> EscapeUnicode { - let c = self as u32; - - // or-ing 1 ensures that for c==0 the code computes that one - // digit should be printed and (which is the same) avoids the - // (31 - 32) underflow - let msb = 31 - (c | 1).leading_zeros(); - - // the index of the most significant hex digit - let ms_hex_digit = msb / 4; - EscapeUnicode { - c: self, - state: EscapeUnicodeState::Backslash, - hex_digit_idx: ms_hex_digit as usize, - } - } - - #[inline] - fn escape_default(self) -> EscapeDefault { - let init_state = match self { - '\t' => EscapeDefaultState::Backslash('t'), - '\r' => EscapeDefaultState::Backslash('r'), - '\n' => EscapeDefaultState::Backslash('n'), - '\\' | '\'' | '"' => EscapeDefaultState::Backslash(self), - '\x20' ... '\x7e' => EscapeDefaultState::Char(self), - _ => EscapeDefaultState::Unicode(self.escape_unicode()) - }; - EscapeDefault { state: init_state } - } - - #[inline] - fn escape_debug(self) -> EscapeDebug { - let init_state = match self { - '\t' => EscapeDefaultState::Backslash('t'), - '\r' => EscapeDefaultState::Backslash('r'), - '\n' => EscapeDefaultState::Backslash('n'), - '\\' | '\'' | '"' => EscapeDefaultState::Backslash(self), - c if is_printable(c) => EscapeDefaultState::Char(c), - c => EscapeDefaultState::Unicode(c.escape_unicode()), - }; - EscapeDebug(EscapeDefault { state: init_state }) - } - - #[inline] - fn len_utf8(self) -> usize { - let code = self as u32; - if code < MAX_ONE_B { - 1 - } else if code < MAX_TWO_B { - 2 - } else if code < MAX_THREE_B { - 3 - } else { - 4 - } - } - - #[inline] - fn len_utf16(self) -> usize { - let ch = self as u32; - if (ch & 0xFFFF) == ch { 1 } else { 2 } - } - - #[inline] - fn encode_utf8(self, dst: &mut [u8]) -> &mut str { - let code = self as u32; - unsafe { - let len = - if code < MAX_ONE_B && !dst.is_empty() { - *dst.get_unchecked_mut(0) = code as u8; - 1 - } else if code < MAX_TWO_B && dst.len() >= 2 { - *dst.get_unchecked_mut(0) = (code >> 6 & 0x1F) as u8 | TAG_TWO_B; - *dst.get_unchecked_mut(1) = (code & 0x3F) as u8 | TAG_CONT; - 2 - } else if code < MAX_THREE_B && dst.len() >= 3 { - *dst.get_unchecked_mut(0) = (code >> 12 & 0x0F) as u8 | TAG_THREE_B; - *dst.get_unchecked_mut(1) = (code >> 6 & 0x3F) as u8 | TAG_CONT; - *dst.get_unchecked_mut(2) = (code & 0x3F) as u8 | TAG_CONT; - 3 - } else if dst.len() >= 4 { - *dst.get_unchecked_mut(0) = (code >> 18 & 0x07) as u8 | TAG_FOUR_B; - *dst.get_unchecked_mut(1) = (code >> 12 & 0x3F) as u8 | TAG_CONT; - *dst.get_unchecked_mut(2) = (code >> 6 & 0x3F) as u8 | TAG_CONT; - *dst.get_unchecked_mut(3) = (code & 0x3F) as u8 | TAG_CONT; - 4 - } else { - panic!("encode_utf8: need {} bytes to encode U+{:X}, but the buffer has {}", - from_u32_unchecked(code).len_utf8(), - code, - dst.len()) - }; - from_utf8_unchecked_mut(dst.get_unchecked_mut(..len)) - } - } - - #[inline] - fn encode_utf16(self, dst: &mut [u16]) -> &mut [u16] { - let mut code = self as u32; - unsafe { - if (code & 0xFFFF) == code && !dst.is_empty() { - // The BMP falls through (assuming non-surrogate, as it should) - *dst.get_unchecked_mut(0) = code as u16; - slice::from_raw_parts_mut(dst.as_mut_ptr(), 1) - } else if dst.len() >= 2 { - // Supplementary planes break into surrogates. - code -= 0x1_0000; - *dst.get_unchecked_mut(0) = 0xD800 | ((code >> 10) as u16); - *dst.get_unchecked_mut(1) = 0xDC00 | ((code as u16) & 0x3FF); - slice::from_raw_parts_mut(dst.as_mut_ptr(), 2) - } else { - panic!("encode_utf16: need {} units to encode U+{:X}, but the buffer has {}", - from_u32_unchecked(code).len_utf16(), - code, - dst.len()) - } - } - } -} - /// Returns an iterator that yields the hexadecimal Unicode escape of a /// character, as `char`s. /// @@ -545,3 +412,124 @@ impl fmt::Display for EscapeDebug { fmt::Display::fmt(&self.0, f) } } + +/// Returns an iterator that yields the lowercase equivalent of a `char`. +/// +/// This `struct` is created by the [`to_lowercase`] method on [`char`]. See +/// its documentation for more. +/// +/// [`to_lowercase`]: ../../std/primitive.char.html#method.to_lowercase +/// [`char`]: ../../std/primitive.char.html +#[stable(feature = "rust1", since = "1.0.0")] +#[derive(Debug, Clone)] +pub struct ToLowercase(CaseMappingIter); + +#[stable(feature = "rust1", since = "1.0.0")] +impl Iterator for ToLowercase { + type Item = char; + fn next(&mut self) -> Option { + self.0.next() + } +} + +#[stable(feature = "fused", since = "1.26.0")] +impl FusedIterator for ToLowercase {} + +/// Returns an iterator that yields the uppercase equivalent of a `char`. +/// +/// This `struct` is created by the [`to_uppercase`] method on [`char`]. See +/// its documentation for more. +/// +/// [`to_uppercase`]: ../../std/primitive.char.html#method.to_uppercase +/// [`char`]: ../../std/primitive.char.html +#[stable(feature = "rust1", since = "1.0.0")] +#[derive(Debug, Clone)] +pub struct ToUppercase(CaseMappingIter); + +#[stable(feature = "rust1", since = "1.0.0")] +impl Iterator for ToUppercase { + type Item = char; + fn next(&mut self) -> Option { + self.0.next() + } +} + +#[stable(feature = "fused", since = "1.26.0")] +impl FusedIterator for ToUppercase {} + +#[derive(Debug, Clone)] +enum CaseMappingIter { + Three(char, char, char), + Two(char, char), + One(char), + Zero, +} + +impl CaseMappingIter { + fn new(chars: [char; 3]) -> CaseMappingIter { + if chars[2] == '\0' { + if chars[1] == '\0' { + CaseMappingIter::One(chars[0]) // Including if chars[0] == '\0' + } else { + CaseMappingIter::Two(chars[0], chars[1]) + } + } else { + CaseMappingIter::Three(chars[0], chars[1], chars[2]) + } + } +} + +impl Iterator for CaseMappingIter { + type Item = char; + fn next(&mut self) -> Option { + match *self { + CaseMappingIter::Three(a, b, c) => { + *self = CaseMappingIter::Two(b, c); + Some(a) + } + CaseMappingIter::Two(b, c) => { + *self = CaseMappingIter::One(c); + Some(b) + } + CaseMappingIter::One(c) => { + *self = CaseMappingIter::Zero; + Some(c) + } + CaseMappingIter::Zero => None, + } + } +} + +impl fmt::Display for CaseMappingIter { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match *self { + CaseMappingIter::Three(a, b, c) => { + f.write_char(a)?; + f.write_char(b)?; + f.write_char(c) + } + CaseMappingIter::Two(b, c) => { + f.write_char(b)?; + f.write_char(c) + } + CaseMappingIter::One(c) => { + f.write_char(c) + } + CaseMappingIter::Zero => Ok(()), + } + } +} + +#[stable(feature = "char_struct_display", since = "1.16.0")] +impl fmt::Display for ToLowercase { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt::Display::fmt(&self.0, f) + } +} + +#[stable(feature = "char_struct_display", since = "1.16.0")] +impl fmt::Display for ToUppercase { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt::Display::fmt(&self.0, f) + } +} diff --git a/src/libcore/unicode/mod.rs b/src/libcore/unicode/mod.rs index 0ea1aa12146f1..060c55286fecf 100644 --- a/src/libcore/unicode/mod.rs +++ b/src/libcore/unicode/mod.rs @@ -16,7 +16,6 @@ pub(crate) mod tables; pub(crate) mod version; pub mod str; -pub(crate) mod char; // For use in liballoc, not re-exported in libstd. pub mod derived_property { From 33358dc3c5c1f5d627544075de6ff37b9e328efa Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Thu, 5 Apr 2018 18:46:16 +0200 Subject: [PATCH 10/15] Remove the CharExt trait, now that libcore has inherent methods for char --- src/libcore/char/methods.rs | 257 +++++++++++++-------------------- src/libcore/char/mod.rs | 28 ---- src/libcore/prelude/v1.rs | 3 - src/libcore/unicode/str.rs | 5 +- src/libcore/unicode/tables.rs | 2 +- src/libcore/unicode/unicode.py | 2 +- 6 files changed, 107 insertions(+), 190 deletions(-) diff --git a/src/libcore/char/methods.rs b/src/libcore/char/methods.rs index 0958c67ea05d9..2c433a7ac9ed5 100644 --- a/src/libcore/char/methods.rs +++ b/src/libcore/char/methods.rs @@ -13,153 +13,9 @@ use slice; use str::from_utf8_unchecked_mut; use super::*; -use super::CharExt as C; use super::printable::is_printable; use unicode::tables::{conversions, derived_property, general_category, property}; -#[stable(feature = "core", since = "1.6.0")] -impl CharExt for char { - #[inline] - fn is_digit(self, radix: u32) -> bool { - self.to_digit(radix).is_some() - } - - #[inline] - fn to_digit(self, radix: u32) -> Option { - if radix > 36 { - panic!("to_digit: radix is too high (maximum 36)"); - } - let val = match self { - '0' ... '9' => self as u32 - '0' as u32, - 'a' ... 'z' => self as u32 - 'a' as u32 + 10, - 'A' ... 'Z' => self as u32 - 'A' as u32 + 10, - _ => return None, - }; - if val < radix { Some(val) } - else { None } - } - - #[inline] - fn escape_unicode(self) -> EscapeUnicode { - let c = self as u32; - - // or-ing 1 ensures that for c==0 the code computes that one - // digit should be printed and (which is the same) avoids the - // (31 - 32) underflow - let msb = 31 - (c | 1).leading_zeros(); - - // the index of the most significant hex digit - let ms_hex_digit = msb / 4; - EscapeUnicode { - c: self, - state: EscapeUnicodeState::Backslash, - hex_digit_idx: ms_hex_digit as usize, - } - } - - #[inline] - fn escape_default(self) -> EscapeDefault { - let init_state = match self { - '\t' => EscapeDefaultState::Backslash('t'), - '\r' => EscapeDefaultState::Backslash('r'), - '\n' => EscapeDefaultState::Backslash('n'), - '\\' | '\'' | '"' => EscapeDefaultState::Backslash(self), - '\x20' ... '\x7e' => EscapeDefaultState::Char(self), - _ => EscapeDefaultState::Unicode(self.escape_unicode()) - }; - EscapeDefault { state: init_state } - } - - #[inline] - fn escape_debug(self) -> EscapeDebug { - let init_state = match self { - '\t' => EscapeDefaultState::Backslash('t'), - '\r' => EscapeDefaultState::Backslash('r'), - '\n' => EscapeDefaultState::Backslash('n'), - '\\' | '\'' | '"' => EscapeDefaultState::Backslash(self), - c if is_printable(c) => EscapeDefaultState::Char(c), - c => EscapeDefaultState::Unicode(c.escape_unicode()), - }; - EscapeDebug(EscapeDefault { state: init_state }) - } - - #[inline] - fn len_utf8(self) -> usize { - let code = self as u32; - if code < MAX_ONE_B { - 1 - } else if code < MAX_TWO_B { - 2 - } else if code < MAX_THREE_B { - 3 - } else { - 4 - } - } - - #[inline] - fn len_utf16(self) -> usize { - let ch = self as u32; - if (ch & 0xFFFF) == ch { 1 } else { 2 } - } - - #[inline] - fn encode_utf8(self, dst: &mut [u8]) -> &mut str { - let code = self as u32; - unsafe { - let len = - if code < MAX_ONE_B && !dst.is_empty() { - *dst.get_unchecked_mut(0) = code as u8; - 1 - } else if code < MAX_TWO_B && dst.len() >= 2 { - *dst.get_unchecked_mut(0) = (code >> 6 & 0x1F) as u8 | TAG_TWO_B; - *dst.get_unchecked_mut(1) = (code & 0x3F) as u8 | TAG_CONT; - 2 - } else if code < MAX_THREE_B && dst.len() >= 3 { - *dst.get_unchecked_mut(0) = (code >> 12 & 0x0F) as u8 | TAG_THREE_B; - *dst.get_unchecked_mut(1) = (code >> 6 & 0x3F) as u8 | TAG_CONT; - *dst.get_unchecked_mut(2) = (code & 0x3F) as u8 | TAG_CONT; - 3 - } else if dst.len() >= 4 { - *dst.get_unchecked_mut(0) = (code >> 18 & 0x07) as u8 | TAG_FOUR_B; - *dst.get_unchecked_mut(1) = (code >> 12 & 0x3F) as u8 | TAG_CONT; - *dst.get_unchecked_mut(2) = (code >> 6 & 0x3F) as u8 | TAG_CONT; - *dst.get_unchecked_mut(3) = (code & 0x3F) as u8 | TAG_CONT; - 4 - } else { - panic!("encode_utf8: need {} bytes to encode U+{:X}, but the buffer has {}", - from_u32_unchecked(code).len_utf8(), - code, - dst.len()) - }; - from_utf8_unchecked_mut(dst.get_unchecked_mut(..len)) - } - } - - #[inline] - fn encode_utf16(self, dst: &mut [u16]) -> &mut [u16] { - let mut code = self as u32; - unsafe { - if (code & 0xFFFF) == code && !dst.is_empty() { - // The BMP falls through (assuming non-surrogate, as it should) - *dst.get_unchecked_mut(0) = code as u16; - slice::from_raw_parts_mut(dst.as_mut_ptr(), 1) - } else if dst.len() >= 2 { - // Supplementary planes break into surrogates. - code -= 0x1_0000; - *dst.get_unchecked_mut(0) = 0xD800 | ((code >> 10) as u16); - *dst.get_unchecked_mut(1) = 0xDC00 | ((code as u16) & 0x3FF); - slice::from_raw_parts_mut(dst.as_mut_ptr(), 2) - } else { - panic!("encode_utf16: need {} units to encode U+{:X}, but the buffer has {}", - from_u32_unchecked(code).len_utf16(), - code, - dst.len()) - } - } - } -} - #[lang = "char"] impl char { /// Checks if a `char` is a digit in the given radix. @@ -211,7 +67,7 @@ impl char { #[stable(feature = "rust1", since = "1.0.0")] #[inline] pub fn is_digit(self, radix: u32) -> bool { - C::is_digit(self, radix) + self.to_digit(radix).is_some() } /// Converts a `char` to a digit in the given radix. @@ -265,7 +121,17 @@ impl char { #[stable(feature = "rust1", since = "1.0.0")] #[inline] pub fn to_digit(self, radix: u32) -> Option { - C::to_digit(self, radix) + if radix > 36 { + panic!("to_digit: radix is too high (maximum 36)"); + } + let val = match self { + '0' ... '9' => self as u32 - '0' as u32, + 'a' ... 'z' => self as u32 - 'a' as u32 + 10, + 'A' ... 'Z' => self as u32 - 'A' as u32 + 10, + _ => return None, + }; + if val < radix { Some(val) } + else { None } } /// Returns an iterator that yields the hexadecimal Unicode escape of a @@ -305,7 +171,20 @@ impl char { #[stable(feature = "rust1", since = "1.0.0")] #[inline] pub fn escape_unicode(self) -> EscapeUnicode { - C::escape_unicode(self) + let c = self as u32; + + // or-ing 1 ensures that for c==0 the code computes that one + // digit should be printed and (which is the same) avoids the + // (31 - 32) underflow + let msb = 31 - (c | 1).leading_zeros(); + + // the index of the most significant hex digit + let ms_hex_digit = msb / 4; + EscapeUnicode { + c: self, + state: EscapeUnicodeState::Backslash, + hex_digit_idx: ms_hex_digit as usize, + } } /// Returns an iterator that yields the literal escape code of a character @@ -345,7 +224,15 @@ impl char { #[stable(feature = "char_escape_debug", since = "1.20.0")] #[inline] pub fn escape_debug(self) -> EscapeDebug { - C::escape_debug(self) + let init_state = match self { + '\t' => EscapeDefaultState::Backslash('t'), + '\r' => EscapeDefaultState::Backslash('r'), + '\n' => EscapeDefaultState::Backslash('n'), + '\\' | '\'' | '"' => EscapeDefaultState::Backslash(self), + c if is_printable(c) => EscapeDefaultState::Char(c), + c => EscapeDefaultState::Unicode(c.escape_unicode()), + }; + EscapeDebug(EscapeDefault { state: init_state }) } /// Returns an iterator that yields the literal escape code of a character @@ -400,7 +287,15 @@ impl char { #[stable(feature = "rust1", since = "1.0.0")] #[inline] pub fn escape_default(self) -> EscapeDefault { - C::escape_default(self) + let init_state = match self { + '\t' => EscapeDefaultState::Backslash('t'), + '\r' => EscapeDefaultState::Backslash('r'), + '\n' => EscapeDefaultState::Backslash('n'), + '\\' | '\'' | '"' => EscapeDefaultState::Backslash(self), + '\x20' ... '\x7e' => EscapeDefaultState::Char(self), + _ => EscapeDefaultState::Unicode(self.escape_unicode()) + }; + EscapeDefault { state: init_state } } /// Returns the number of bytes this `char` would need if encoded in UTF-8. @@ -451,7 +346,16 @@ impl char { #[stable(feature = "rust1", since = "1.0.0")] #[inline] pub fn len_utf8(self) -> usize { - C::len_utf8(self) + let code = self as u32; + if code < MAX_ONE_B { + 1 + } else if code < MAX_TWO_B { + 2 + } else if code < MAX_THREE_B { + 3 + } else { + 4 + } } /// Returns the number of 16-bit code units this `char` would need if @@ -476,7 +380,8 @@ impl char { #[stable(feature = "rust1", since = "1.0.0")] #[inline] pub fn len_utf16(self) -> usize { - C::len_utf16(self) + let ch = self as u32; + if (ch & 0xFFFF) == ch { 1 } else { 2 } } /// Encodes this character as UTF-8 into the provided byte buffer, @@ -518,7 +423,35 @@ impl char { #[stable(feature = "unicode_encode_char", since = "1.15.0")] #[inline] pub fn encode_utf8(self, dst: &mut [u8]) -> &mut str { - C::encode_utf8(self, dst) + let code = self as u32; + unsafe { + let len = + if code < MAX_ONE_B && !dst.is_empty() { + *dst.get_unchecked_mut(0) = code as u8; + 1 + } else if code < MAX_TWO_B && dst.len() >= 2 { + *dst.get_unchecked_mut(0) = (code >> 6 & 0x1F) as u8 | TAG_TWO_B; + *dst.get_unchecked_mut(1) = (code & 0x3F) as u8 | TAG_CONT; + 2 + } else if code < MAX_THREE_B && dst.len() >= 3 { + *dst.get_unchecked_mut(0) = (code >> 12 & 0x0F) as u8 | TAG_THREE_B; + *dst.get_unchecked_mut(1) = (code >> 6 & 0x3F) as u8 | TAG_CONT; + *dst.get_unchecked_mut(2) = (code & 0x3F) as u8 | TAG_CONT; + 3 + } else if dst.len() >= 4 { + *dst.get_unchecked_mut(0) = (code >> 18 & 0x07) as u8 | TAG_FOUR_B; + *dst.get_unchecked_mut(1) = (code >> 12 & 0x3F) as u8 | TAG_CONT; + *dst.get_unchecked_mut(2) = (code >> 6 & 0x3F) as u8 | TAG_CONT; + *dst.get_unchecked_mut(3) = (code & 0x3F) as u8 | TAG_CONT; + 4 + } else { + panic!("encode_utf8: need {} bytes to encode U+{:X}, but the buffer has {}", + from_u32_unchecked(code).len_utf8(), + code, + dst.len()) + }; + from_utf8_unchecked_mut(dst.get_unchecked_mut(..len)) + } } /// Encodes this character as UTF-16 into the provided `u16` buffer, @@ -558,7 +491,25 @@ impl char { #[stable(feature = "unicode_encode_char", since = "1.15.0")] #[inline] pub fn encode_utf16(self, dst: &mut [u16]) -> &mut [u16] { - C::encode_utf16(self, dst) + let mut code = self as u32; + unsafe { + if (code & 0xFFFF) == code && !dst.is_empty() { + // The BMP falls through (assuming non-surrogate, as it should) + *dst.get_unchecked_mut(0) = code as u16; + slice::from_raw_parts_mut(dst.as_mut_ptr(), 1) + } else if dst.len() >= 2 { + // Supplementary planes break into surrogates. + code -= 0x1_0000; + *dst.get_unchecked_mut(0) = 0xD800 | ((code >> 10) as u16); + *dst.get_unchecked_mut(1) = 0xDC00 | ((code as u16) & 0x3FF); + slice::from_raw_parts_mut(dst.as_mut_ptr(), 2) + } else { + panic!("encode_utf16: need {} units to encode U+{:X}, but the buffer has {}", + from_u32_unchecked(code).len_utf16(), + code, + dst.len()) + } + } } /// Returns true if this `char` is an alphabetic code point, and false if not. diff --git a/src/libcore/char/mod.rs b/src/libcore/char/mod.rs index 7b4f0dc454882..c051a1ff8c886 100644 --- a/src/libcore/char/mod.rs +++ b/src/libcore/char/mod.rs @@ -119,34 +119,6 @@ pub const MAX: char = '\u{10ffff}'; #[stable(feature = "decode_utf16", since = "1.9.0")] pub const REPLACEMENT_CHARACTER: char = '\u{FFFD}'; -// NB: the stabilization and documentation for this trait is in -// unicode/char.rs, not here -#[allow(missing_docs)] // docs in libunicode/u_char.rs -#[doc(hidden)] -#[unstable(feature = "core_char_ext", - reason = "the stable interface is `impl char` in later crate", - issue = "32110")] -pub trait CharExt { - #[stable(feature = "core", since = "1.6.0")] - fn is_digit(self, radix: u32) -> bool; - #[stable(feature = "core", since = "1.6.0")] - fn to_digit(self, radix: u32) -> Option; - #[stable(feature = "core", since = "1.6.0")] - fn escape_unicode(self) -> EscapeUnicode; - #[stable(feature = "core", since = "1.6.0")] - fn escape_default(self) -> EscapeDefault; - #[stable(feature = "char_escape_debug", since = "1.20.0")] - fn escape_debug(self) -> EscapeDebug; - #[stable(feature = "core", since = "1.6.0")] - fn len_utf8(self) -> usize; - #[stable(feature = "core", since = "1.6.0")] - fn len_utf16(self) -> usize; - #[stable(feature = "unicode_encode_char", since = "1.15.0")] - fn encode_utf8(self, dst: &mut [u8]) -> &mut str; - #[stable(feature = "unicode_encode_char", since = "1.15.0")] - fn encode_utf16(self, dst: &mut [u16]) -> &mut [u16]; -} - /// Returns an iterator that yields the hexadecimal Unicode escape of a /// character, as `char`s. /// diff --git a/src/libcore/prelude/v1.rs b/src/libcore/prelude/v1.rs index d43496c387cb8..cc3ad71117a5d 100644 --- a/src/libcore/prelude/v1.rs +++ b/src/libcore/prelude/v1.rs @@ -62,6 +62,3 @@ pub use slice::SliceExt; #[stable(feature = "core_prelude", since = "1.4.0")] #[doc(no_inline)] pub use str::StrExt; -#[stable(feature = "core_prelude", since = "1.4.0")] -#[doc(no_inline)] -pub use char::CharExt; diff --git a/src/libcore/unicode/str.rs b/src/libcore/unicode/str.rs index 18581bf4d580b..0882984e0774c 100644 --- a/src/libcore/unicode/str.rs +++ b/src/libcore/unicode/str.rs @@ -9,9 +9,6 @@ // except according to those terms. //! Unicode-intensive string manipulations. -//! -//! This module provides functionality to `str` that requires the Unicode -//! methods provided by the unicode parts of the CharExt trait. use char; use iter::{Filter, FusedIterator}; @@ -109,7 +106,7 @@ impl Iterator for Utf16Encoder let mut buf = [0; 2]; self.chars.next().map(|ch| { - let n = CharExt::encode_utf16(ch, &mut buf).len(); + let n = ch.encode_utf16(&mut buf).len(); if n == 2 { self.extra = buf[1]; } diff --git a/src/libcore/unicode/tables.rs b/src/libcore/unicode/tables.rs index 7e8e925bda32e..1f7b1bde9141a 100644 --- a/src/libcore/unicode/tables.rs +++ b/src/libcore/unicode/tables.rs @@ -16,7 +16,7 @@ use unicode::version::UnicodeVersion; use unicode::bool_trie::{BoolTrie, SmallBoolTrie}; /// The version of [Unicode](http://www.unicode.org/) that the Unicode parts of -/// `CharExt` and `UnicodeStrPrelude` traits are based on. +/// `char` and `str` methods are based on. pub const UNICODE_VERSION: UnicodeVersion = UnicodeVersion { major: 10, minor: 0, diff --git a/src/libcore/unicode/unicode.py b/src/libcore/unicode/unicode.py index 39b68dc7d9b67..82262cc76627f 100755 --- a/src/libcore/unicode/unicode.py +++ b/src/libcore/unicode/unicode.py @@ -470,7 +470,7 @@ def emit_norm_module(f, canon, compat, combine, norm_props): unicode_version = re.search(pattern, readme.read()).groups() rf.write(""" /// The version of [Unicode](http://www.unicode.org/) that the Unicode parts of -/// `CharExt` and `UnicodeStrPrelude` traits are based on. +/// `char` and `str` methods are based on. pub const UNICODE_VERSION: UnicodeVersion = UnicodeVersion { major: %s, minor: %s, From 0d9afcd9b9f881545c8b722855f7e39361495d27 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Thu, 5 Apr 2018 19:00:48 +0200 Subject: [PATCH 11/15] Merge core::unicode::str into core::str And the UnicodeStr trait into StrExt --- src/liballoc/str.rs | 16 ++-- src/liballoc/tests/str.rs | 2 +- src/libcore/str/mod.rs | 116 ++++++++++++++++++++++- src/libcore/unicode/mod.rs | 60 +++++++++++- src/libcore/unicode/str.rs | 186 ------------------------------------- 5 files changed, 182 insertions(+), 198 deletions(-) delete mode 100644 src/libcore/unicode/str.rs diff --git a/src/liballoc/str.rs b/src/liballoc/str.rs index eaca9eb49f9f0..0b961c2c18612 100644 --- a/src/liballoc/str.rs +++ b/src/liballoc/str.rs @@ -45,7 +45,7 @@ use core::str::pattern::{Searcher, ReverseSearcher, DoubleEndedSearcher}; use core::mem; use core::ptr; use core::iter::FusedIterator; -use core::unicode::str::{UnicodeStr, Utf16Encoder}; +use core::unicode::Utf16Encoder; use vec_deque::VecDeque; use borrow::{Borrow, ToOwned}; @@ -74,7 +74,7 @@ pub use core::str::{from_utf8, from_utf8_mut, Chars, CharIndices, Bytes}; #[stable(feature = "rust1", since = "1.0.0")] pub use core::str::{from_utf8_unchecked, from_utf8_unchecked_mut, ParseBoolError}; #[stable(feature = "rust1", since = "1.0.0")] -pub use core::unicode::str::SplitWhitespace; +pub use core::str::SplitWhitespace; #[stable(feature = "rust1", since = "1.0.0")] pub use core::str::pattern; @@ -800,7 +800,7 @@ impl str { #[stable(feature = "split_whitespace", since = "1.1.0")] #[inline] pub fn split_whitespace(&self) -> SplitWhitespace { - UnicodeStr::split_whitespace(self) + StrExt::split_whitespace(self) } /// An iterator over the lines of a string, as string slices. @@ -1570,7 +1570,7 @@ impl str { /// ``` #[stable(feature = "rust1", since = "1.0.0")] pub fn trim(&self) -> &str { - UnicodeStr::trim(self) + StrExt::trim(self) } /// Returns a string slice with leading whitespace removed. @@ -1606,7 +1606,7 @@ impl str { /// ``` #[stable(feature = "rust1", since = "1.0.0")] pub fn trim_left(&self) -> &str { - UnicodeStr::trim_left(self) + StrExt::trim_left(self) } /// Returns a string slice with trailing whitespace removed. @@ -1642,7 +1642,7 @@ impl str { /// ``` #[stable(feature = "rust1", since = "1.0.0")] pub fn trim_right(&self) -> &str { - UnicodeStr::trim_right(self) + StrExt::trim_right(self) } /// Returns a string slice with all prefixes and suffixes that match a @@ -2141,7 +2141,7 @@ impl str { #[stable(feature = "unicode_methods_on_intrinsics", since = "1.27.0")] #[inline] pub fn is_whitespace(&self) -> bool { - UnicodeStr::is_whitespace(self) + StrExt::is_whitespace(self) } /// Returns true if this `str` is entirely alphanumeric, and false otherwise. @@ -2160,7 +2160,7 @@ impl str { #[stable(feature = "unicode_methods_on_intrinsics", since = "1.27.0")] #[inline] pub fn is_alphanumeric(&self) -> bool { - UnicodeStr::is_alphanumeric(self) + StrExt::is_alphanumeric(self) } /// Checks if all characters in this string are within the ASCII range. diff --git a/src/liballoc/tests/str.rs b/src/liballoc/tests/str.rs index 763dbe675b91d..2df8ca63a3eeb 100644 --- a/src/liballoc/tests/str.rs +++ b/src/liballoc/tests/str.rs @@ -1204,7 +1204,7 @@ fn test_rev_split_char_iterator_no_trailing() { #[test] fn test_utf16_code_units() { - use core::unicode::str::Utf16Encoder; + use core::unicode::Utf16Encoder; assert_eq!(Utf16Encoder::new(vec!['é', '\u{1F4A9}'].into_iter()).collect::>(), [0xE9, 0xD83D, 0xDCA9]) } diff --git a/src/libcore/str/mod.rs b/src/libcore/str/mod.rs index 7a97d89dcf967..f1fe23092de93 100644 --- a/src/libcore/str/mod.rs +++ b/src/libcore/str/mod.rs @@ -19,7 +19,7 @@ use self::pattern::{Searcher, ReverseSearcher, DoubleEndedSearcher}; use char; use fmt; -use iter::{Map, Cloned, FusedIterator, TrustedLen}; +use iter::{Map, Cloned, FusedIterator, TrustedLen, Filter}; use iter_private::TrustedRandomAccess; use slice::{self, SliceIndex}; use mem; @@ -2216,6 +2216,18 @@ pub trait StrExt { fn is_empty(&self) -> bool; #[stable(feature = "core", since = "1.6.0")] fn parse(&self) -> Result; + #[stable(feature = "split_whitespace", since = "1.1.0")] + fn split_whitespace<'a>(&'a self) -> SplitWhitespace<'a>; + #[stable(feature = "unicode_methods_on_intrinsics", since = "1.27.0")] + fn is_whitespace(&self) -> bool; + #[stable(feature = "unicode_methods_on_intrinsics", since = "1.27.0")] + fn is_alphanumeric(&self) -> bool; + #[stable(feature = "rust1", since = "1.0.0")] + fn trim(&self) -> &str; + #[stable(feature = "rust1", since = "1.0.0")] + fn trim_left(&self) -> &str; + #[stable(feature = "rust1", since = "1.0.0")] + fn trim_right(&self) -> &str; } // truncate `&str` to length at most equal to `max` @@ -2536,6 +2548,36 @@ impl StrExt for str { #[inline] fn parse(&self) -> Result { FromStr::from_str(self) } + + #[inline] + fn split_whitespace(&self) -> SplitWhitespace { + SplitWhitespace { inner: self.split(IsWhitespace).filter(IsNotEmpty) } + } + + #[inline] + fn is_whitespace(&self) -> bool { + self.chars().all(|c| c.is_whitespace()) + } + + #[inline] + fn is_alphanumeric(&self) -> bool { + self.chars().all(|c| c.is_alphanumeric()) + } + + #[inline] + fn trim(&self) -> &str { + self.trim_matches(|c: char| c.is_whitespace()) + } + + #[inline] + fn trim_left(&self) -> &str { + self.trim_left_matches(|c: char| c.is_whitespace()) + } + + #[inline] + fn trim_right(&self) -> &str { + self.trim_right_matches(|c: char| c.is_whitespace()) + } } #[stable(feature = "rust1", since = "1.0.0")] @@ -2551,3 +2593,75 @@ impl<'a> Default for &'a str { /// Creates an empty str fn default() -> &'a str { "" } } + +/// An iterator over the non-whitespace substrings of a string, +/// separated by any amount of whitespace. +/// +/// This struct is created by the [`split_whitespace`] method on [`str`]. +/// See its documentation for more. +/// +/// [`split_whitespace`]: ../../std/primitive.str.html#method.split_whitespace +/// [`str`]: ../../std/primitive.str.html +#[stable(feature = "split_whitespace", since = "1.1.0")] +#[derive(Clone, Debug)] +pub struct SplitWhitespace<'a> { + inner: Filter, IsNotEmpty>, +} + +#[derive(Clone)] +struct IsWhitespace; + +impl FnOnce<(char, )> for IsWhitespace { + type Output = bool; + + #[inline] + extern "rust-call" fn call_once(mut self, arg: (char, )) -> bool { + self.call_mut(arg) + } +} + +impl FnMut<(char, )> for IsWhitespace { + #[inline] + extern "rust-call" fn call_mut(&mut self, arg: (char, )) -> bool { + arg.0.is_whitespace() + } +} + +#[derive(Clone)] +struct IsNotEmpty; + +impl<'a, 'b> FnOnce<(&'a &'b str, )> for IsNotEmpty { + type Output = bool; + + #[inline] + extern "rust-call" fn call_once(mut self, arg: (&&str, )) -> bool { + self.call_mut(arg) + } +} + +impl<'a, 'b> FnMut<(&'a &'b str, )> for IsNotEmpty { + #[inline] + extern "rust-call" fn call_mut(&mut self, arg: (&&str, )) -> bool { + !arg.0.is_empty() + } +} + + +#[stable(feature = "split_whitespace", since = "1.1.0")] +impl<'a> Iterator for SplitWhitespace<'a> { + type Item = &'a str; + + fn next(&mut self) -> Option<&'a str> { + self.inner.next() + } +} + +#[stable(feature = "split_whitespace", since = "1.1.0")] +impl<'a> DoubleEndedIterator for SplitWhitespace<'a> { + fn next_back(&mut self) -> Option<&'a str> { + self.inner.next_back() + } +} + +#[stable(feature = "fused", since = "1.26.0")] +impl<'a> FusedIterator for SplitWhitespace<'a> {} diff --git a/src/libcore/unicode/mod.rs b/src/libcore/unicode/mod.rs index 060c55286fecf..3413476fd2288 100644 --- a/src/libcore/unicode/mod.rs +++ b/src/libcore/unicode/mod.rs @@ -15,8 +15,6 @@ mod bool_trie; pub(crate) mod tables; pub(crate) mod version; -pub mod str; - // For use in liballoc, not re-exported in libstd. pub mod derived_property { pub use unicode::tables::derived_property::{Case_Ignorable, Cased}; @@ -26,3 +24,61 @@ pub mod derived_property { pub mod property { pub use unicode::tables::property::Pattern_White_Space; } + +use iter::FusedIterator; + +/// Iterator adaptor for encoding `char`s to UTF-16. +#[derive(Clone)] +#[allow(missing_debug_implementations)] +pub struct Utf16Encoder { + chars: I, + extra: u16, +} + +impl Utf16Encoder { + /// Create a UTF-16 encoder from any `char` iterator. + pub fn new(chars: I) -> Utf16Encoder + where I: Iterator + { + Utf16Encoder { + chars, + extra: 0, + } + } +} + +impl Iterator for Utf16Encoder + where I: Iterator +{ + type Item = u16; + + #[inline] + fn next(&mut self) -> Option { + if self.extra != 0 { + let tmp = self.extra; + self.extra = 0; + return Some(tmp); + } + + let mut buf = [0; 2]; + self.chars.next().map(|ch| { + let n = ch.encode_utf16(&mut buf).len(); + if n == 2 { + self.extra = buf[1]; + } + buf[0] + }) + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + let (low, high) = self.chars.size_hint(); + // every char gets either one u16 or two u16, + // so this iterator is between 1 or 2 times as + // long as the underlying iterator. + (low, high.and_then(|n| n.checked_mul(2))) + } +} + +impl FusedIterator for Utf16Encoder + where I: FusedIterator {} diff --git a/src/libcore/unicode/str.rs b/src/libcore/unicode/str.rs deleted file mode 100644 index 0882984e0774c..0000000000000 --- a/src/libcore/unicode/str.rs +++ /dev/null @@ -1,186 +0,0 @@ -// Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution and at -// http://rust-lang.org/COPYRIGHT. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -//! Unicode-intensive string manipulations. - -use char; -use iter::{Filter, FusedIterator}; -use str::Split; - -/// An iterator over the non-whitespace substrings of a string, -/// separated by any amount of whitespace. -/// -/// This struct is created by the [`split_whitespace`] method on [`str`]. -/// See its documentation for more. -/// -/// [`split_whitespace`]: ../../std/primitive.str.html#method.split_whitespace -/// [`str`]: ../../std/primitive.str.html -#[stable(feature = "split_whitespace", since = "1.1.0")] -#[derive(Clone, Debug)] -pub struct SplitWhitespace<'a> { - inner: Filter, IsNotEmpty>, -} - -/// Methods for Unicode string slices -#[allow(missing_docs)] // docs in liballoc -pub trait UnicodeStr { - fn split_whitespace<'a>(&'a self) -> SplitWhitespace<'a>; - fn is_whitespace(&self) -> bool; - fn is_alphanumeric(&self) -> bool; - fn trim(&self) -> &str; - fn trim_left(&self) -> &str; - fn trim_right(&self) -> &str; -} - -impl UnicodeStr for str { - #[inline] - fn split_whitespace(&self) -> SplitWhitespace { - SplitWhitespace { inner: self.split(IsWhitespace).filter(IsNotEmpty) } - } - - #[inline] - fn is_whitespace(&self) -> bool { - self.chars().all(|c| c.is_whitespace()) - } - - #[inline] - fn is_alphanumeric(&self) -> bool { - self.chars().all(|c| c.is_alphanumeric()) - } - - #[inline] - fn trim(&self) -> &str { - self.trim_matches(|c: char| c.is_whitespace()) - } - - #[inline] - fn trim_left(&self) -> &str { - self.trim_left_matches(|c: char| c.is_whitespace()) - } - - #[inline] - fn trim_right(&self) -> &str { - self.trim_right_matches(|c: char| c.is_whitespace()) - } -} - -/// Iterator adaptor for encoding `char`s to UTF-16. -#[derive(Clone)] -#[allow(missing_debug_implementations)] -pub struct Utf16Encoder { - chars: I, - extra: u16, -} - -impl Utf16Encoder { - /// Create a UTF-16 encoder from any `char` iterator. - pub fn new(chars: I) -> Utf16Encoder - where I: Iterator - { - Utf16Encoder { - chars, - extra: 0, - } - } -} - -impl Iterator for Utf16Encoder - where I: Iterator -{ - type Item = u16; - - #[inline] - fn next(&mut self) -> Option { - if self.extra != 0 { - let tmp = self.extra; - self.extra = 0; - return Some(tmp); - } - - let mut buf = [0; 2]; - self.chars.next().map(|ch| { - let n = ch.encode_utf16(&mut buf).len(); - if n == 2 { - self.extra = buf[1]; - } - buf[0] - }) - } - - #[inline] - fn size_hint(&self) -> (usize, Option) { - let (low, high) = self.chars.size_hint(); - // every char gets either one u16 or two u16, - // so this iterator is between 1 or 2 times as - // long as the underlying iterator. - (low, high.and_then(|n| n.checked_mul(2))) - } -} - -impl FusedIterator for Utf16Encoder - where I: FusedIterator {} - -#[derive(Clone)] -struct IsWhitespace; - -impl FnOnce<(char, )> for IsWhitespace { - type Output = bool; - - #[inline] - extern "rust-call" fn call_once(mut self, arg: (char, )) -> bool { - self.call_mut(arg) - } -} - -impl FnMut<(char, )> for IsWhitespace { - #[inline] - extern "rust-call" fn call_mut(&mut self, arg: (char, )) -> bool { - arg.0.is_whitespace() - } -} - -#[derive(Clone)] -struct IsNotEmpty; - -impl<'a, 'b> FnOnce<(&'a &'b str, )> for IsNotEmpty { - type Output = bool; - - #[inline] - extern "rust-call" fn call_once(mut self, arg: (&&str, )) -> bool { - self.call_mut(arg) - } -} - -impl<'a, 'b> FnMut<(&'a &'b str, )> for IsNotEmpty { - #[inline] - extern "rust-call" fn call_mut(&mut self, arg: (&&str, )) -> bool { - !arg.0.is_empty() - } -} - - -#[stable(feature = "split_whitespace", since = "1.1.0")] -impl<'a> Iterator for SplitWhitespace<'a> { - type Item = &'a str; - - fn next(&mut self) -> Option<&'a str> { - self.inner.next() - } -} - -#[stable(feature = "split_whitespace", since = "1.1.0")] -impl<'a> DoubleEndedIterator for SplitWhitespace<'a> { - fn next_back(&mut self) -> Option<&'a str> { - self.inner.next_back() - } -} - -#[stable(feature = "fused", since = "1.26.0")] -impl<'a> FusedIterator for SplitWhitespace<'a> {} From d4ed1e6fa4978141408ef01d0d35c7bd142dd164 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 6 Apr 2018 10:24:01 +0200 Subject: [PATCH 12/15] Merge unstable Utf16Encoder into EncodeUtf16 --- src/liballoc/str.rs | 27 ++++++++++++++---- src/liballoc/tests/str.rs | 3 +- src/libcore/unicode/mod.rs | 58 -------------------------------------- 3 files changed, 23 insertions(+), 65 deletions(-) diff --git a/src/liballoc/str.rs b/src/liballoc/str.rs index 0b961c2c18612..65df93bd3bb54 100644 --- a/src/liballoc/str.rs +++ b/src/liballoc/str.rs @@ -45,7 +45,6 @@ use core::str::pattern::{Searcher, ReverseSearcher, DoubleEndedSearcher}; use core::mem; use core::ptr; use core::iter::FusedIterator; -use core::unicode::Utf16Encoder; use vec_deque::VecDeque; use borrow::{Borrow, ToOwned}; @@ -146,7 +145,8 @@ impl> SliceConcatExt for [S] { #[derive(Clone)] #[stable(feature = "encode_utf16", since = "1.8.0")] pub struct EncodeUtf16<'a> { - encoder: Utf16Encoder>, + chars: Chars<'a>, + extra: u16, } #[stable(feature = "collection_debug", since = "1.17.0")] @@ -162,12 +162,29 @@ impl<'a> Iterator for EncodeUtf16<'a> { #[inline] fn next(&mut self) -> Option { - self.encoder.next() + if self.extra != 0 { + let tmp = self.extra; + self.extra = 0; + return Some(tmp); + } + + let mut buf = [0; 2]; + self.chars.next().map(|ch| { + let n = ch.encode_utf16(&mut buf).len(); + if n == 2 { + self.extra = buf[1]; + } + buf[0] + }) } #[inline] fn size_hint(&self) -> (usize, Option) { - self.encoder.size_hint() + let (low, high) = self.chars.size_hint(); + // every char gets either one u16 or two u16, + // so this iterator is between 1 or 2 times as + // long as the underlying iterator. + (low, high.and_then(|n| n.checked_mul(2))) } } @@ -870,7 +887,7 @@ impl str { /// ``` #[stable(feature = "encode_utf16", since = "1.8.0")] pub fn encode_utf16(&self) -> EncodeUtf16 { - EncodeUtf16 { encoder: Utf16Encoder::new(self[..].chars()) } + EncodeUtf16 { chars: self[..].chars(), extra: 0 } } /// Returns `true` if the given pattern matches a sub-slice of diff --git a/src/liballoc/tests/str.rs b/src/liballoc/tests/str.rs index 2df8ca63a3eeb..a3f4c385fe23b 100644 --- a/src/liballoc/tests/str.rs +++ b/src/liballoc/tests/str.rs @@ -1204,8 +1204,7 @@ fn test_rev_split_char_iterator_no_trailing() { #[test] fn test_utf16_code_units() { - use core::unicode::Utf16Encoder; - assert_eq!(Utf16Encoder::new(vec!['é', '\u{1F4A9}'].into_iter()).collect::>(), + assert_eq!("é\u{1F4A9}".encode_utf16().collect::>(), [0xE9, 0xD83D, 0xDCA9]) } diff --git a/src/libcore/unicode/mod.rs b/src/libcore/unicode/mod.rs index 3413476fd2288..9ab8cb748b10d 100644 --- a/src/libcore/unicode/mod.rs +++ b/src/libcore/unicode/mod.rs @@ -24,61 +24,3 @@ pub mod derived_property { pub mod property { pub use unicode::tables::property::Pattern_White_Space; } - -use iter::FusedIterator; - -/// Iterator adaptor for encoding `char`s to UTF-16. -#[derive(Clone)] -#[allow(missing_debug_implementations)] -pub struct Utf16Encoder { - chars: I, - extra: u16, -} - -impl Utf16Encoder { - /// Create a UTF-16 encoder from any `char` iterator. - pub fn new(chars: I) -> Utf16Encoder - where I: Iterator - { - Utf16Encoder { - chars, - extra: 0, - } - } -} - -impl Iterator for Utf16Encoder - where I: Iterator -{ - type Item = u16; - - #[inline] - fn next(&mut self) -> Option { - if self.extra != 0 { - let tmp = self.extra; - self.extra = 0; - return Some(tmp); - } - - let mut buf = [0; 2]; - self.chars.next().map(|ch| { - let n = ch.encode_utf16(&mut buf).len(); - if n == 2 { - self.extra = buf[1]; - } - buf[0] - }) - } - - #[inline] - fn size_hint(&self) -> (usize, Option) { - let (low, high) = self.chars.size_hint(); - // every char gets either one u16 or two u16, - // so this iterator is between 1 or 2 times as - // long as the underlying iterator. - (low, high.and_then(|n| n.checked_mul(2))) - } -} - -impl FusedIterator for Utf16Encoder - where I: FusedIterator {} From 670e85339a5014ad6fde02beb0603d2e12027a89 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 6 Apr 2018 10:26:57 +0200 Subject: [PATCH 13/15] Move core::char::printable to core::unicode::printable --- src/libcore/char/methods.rs | 2 +- src/libcore/char/mod.rs | 1 - src/libcore/unicode/mod.rs | 1 + src/libcore/{char => unicode}/printable.py | 2 +- src/libcore/{char => unicode}/printable.rs | 2 +- 5 files changed, 4 insertions(+), 4 deletions(-) rename src/libcore/{char => unicode}/printable.py (98%) rename src/libcore/{char => unicode}/printable.rs (99%) diff --git a/src/libcore/char/methods.rs b/src/libcore/char/methods.rs index 2c433a7ac9ed5..374adafef647d 100644 --- a/src/libcore/char/methods.rs +++ b/src/libcore/char/methods.rs @@ -13,7 +13,7 @@ use slice; use str::from_utf8_unchecked_mut; use super::*; -use super::printable::is_printable; +use unicode::printable::is_printable; use unicode::tables::{conversions, derived_property, general_category, property}; #[lang = "char"] diff --git a/src/libcore/char/mod.rs b/src/libcore/char/mod.rs index c051a1ff8c886..b2af95eb5bab3 100644 --- a/src/libcore/char/mod.rs +++ b/src/libcore/char/mod.rs @@ -32,7 +32,6 @@ mod convert; mod decode; mod methods; -mod printable; // stable re-exports #[stable(feature = "rust1", since = "1.0.0")] diff --git a/src/libcore/unicode/mod.rs b/src/libcore/unicode/mod.rs index 9ab8cb748b10d..0fbc4dcd175d1 100644 --- a/src/libcore/unicode/mod.rs +++ b/src/libcore/unicode/mod.rs @@ -12,6 +12,7 @@ #![allow(missing_docs)] mod bool_trie; +pub(crate) mod printable; pub(crate) mod tables; pub(crate) mod version; diff --git a/src/libcore/char/printable.py b/src/libcore/unicode/printable.py similarity index 98% rename from src/libcore/char/printable.py rename to src/libcore/unicode/printable.py index 484822e10aa7b..9410dafbbc364 100644 --- a/src/libcore/char/printable.py +++ b/src/libcore/unicode/printable.py @@ -187,7 +187,7 @@ def main(): // option. This file may not be copied, modified, or distributed // except according to those terms. -// NOTE: The following code was generated by "src/libcore/char/printable.py", +// NOTE: The following code was generated by "src/libcore/unicode/printable.py", // do not edit directly! fn check(x: u16, singletonuppers: &[(u8, u8)], singletonlowers: &[u8], diff --git a/src/libcore/char/printable.rs b/src/libcore/unicode/printable.rs similarity index 99% rename from src/libcore/char/printable.rs rename to src/libcore/unicode/printable.rs index ce011fab1878b..4426c32eebcee 100644 --- a/src/libcore/char/printable.rs +++ b/src/libcore/unicode/printable.rs @@ -8,7 +8,7 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -// NOTE: The following code was generated by "src/libcore/char/printable.py", +// NOTE: The following code was generated by "src/libcore/unicode/printable.py", // do not edit directly! fn check(x: u16, singletonuppers: &[(u8, u8)], singletonlowers: &[u8], From 1ca2905cda99dd395347b46c3ce225f58b4d3844 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 6 Apr 2018 14:18:28 +0200 Subject: [PATCH 14/15] Dedicated tracking issue for UnicodeVersion and UNICODE_VERSION. --- src/libcore/char/mod.rs | 6 +++--- src/libcore/unicode/tables.rs | 1 + src/libcore/unicode/unicode.py | 1 + src/libcore/unicode/version.rs | 1 + src/test/run-pass/char_unicode.rs | 4 +--- 5 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/libcore/char/mod.rs b/src/libcore/char/mod.rs index b2af95eb5bab3..9edc0c88756b3 100644 --- a/src/libcore/char/mod.rs +++ b/src/libcore/char/mod.rs @@ -46,9 +46,9 @@ pub use self::convert::CharTryFromError; pub use self::decode::{decode_utf16, DecodeUtf16, DecodeUtf16Error}; // unstable re-exports -#[unstable(feature = "unicode", issue = "27783")] -pub use unicode::tables::{UNICODE_VERSION}; -#[unstable(feature = "unicode", issue = "27783")] +#[unstable(feature = "unicode_version", issue = "49726")] +pub use unicode::tables::UNICODE_VERSION; +#[unstable(feature = "unicode_version", issue = "49726")] pub use unicode::version::UnicodeVersion; #[unstable(feature = "decode_utf8", issue = "33906")] pub use self::decode::{decode_utf8, DecodeUtf8, InvalidSequence}; diff --git a/src/libcore/unicode/tables.rs b/src/libcore/unicode/tables.rs index 1f7b1bde9141a..3fbbc011bc41d 100644 --- a/src/libcore/unicode/tables.rs +++ b/src/libcore/unicode/tables.rs @@ -17,6 +17,7 @@ use unicode::bool_trie::{BoolTrie, SmallBoolTrie}; /// The version of [Unicode](http://www.unicode.org/) that the Unicode parts of /// `char` and `str` methods are based on. +#[unstable(feature = "unicode_version", issue = "49726")] pub const UNICODE_VERSION: UnicodeVersion = UnicodeVersion { major: 10, minor: 0, diff --git a/src/libcore/unicode/unicode.py b/src/libcore/unicode/unicode.py index 82262cc76627f..75ec01944bfb9 100755 --- a/src/libcore/unicode/unicode.py +++ b/src/libcore/unicode/unicode.py @@ -471,6 +471,7 @@ def emit_norm_module(f, canon, compat, combine, norm_props): rf.write(""" /// The version of [Unicode](http://www.unicode.org/) that the Unicode parts of /// `char` and `str` methods are based on. +#[unstable(feature = "unicode_version", issue = "49726")] pub const UNICODE_VERSION: UnicodeVersion = UnicodeVersion { major: %s, minor: %s, diff --git a/src/libcore/unicode/version.rs b/src/libcore/unicode/version.rs index d82a749d91786..59ebf5f501269 100644 --- a/src/libcore/unicode/version.rs +++ b/src/libcore/unicode/version.rs @@ -12,6 +12,7 @@ /// /// See also: #[derive(Clone, Copy, Debug, Eq, Ord, PartialEq, PartialOrd)] +#[unstable(feature = "unicode_version", issue = "49726")] pub struct UnicodeVersion { /// Major version. pub major: u32, diff --git a/src/test/run-pass/char_unicode.rs b/src/test/run-pass/char_unicode.rs index b4884acdd078f..bfc7faac06ebe 100644 --- a/src/test/run-pass/char_unicode.rs +++ b/src/test/run-pass/char_unicode.rs @@ -8,9 +8,7 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. - -#![feature(unicode)] - +#![feature(unicode_version)] /// Tests access to the internal Unicode Version type and value. pub fn main() { From ef41788cf37074e44f70257508c97efd539a7f29 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 6 Apr 2018 14:23:00 +0200 Subject: [PATCH 15/15] Mark the rest of the `unicode` feature flag as perma-unstable. --- src/liballoc/lib.rs | 2 +- src/liballoc/tests/lib.rs | 1 - src/libcore/unicode/mod.rs | 2 +- src/librustdoc/lib.rs | 1 - src/libstd/lib.rs | 1 - src/libstd_unicode/lib.rs | 2 +- src/libsyntax/lib.rs | 2 +- 7 files changed, 4 insertions(+), 7 deletions(-) diff --git a/src/liballoc/lib.rs b/src/liballoc/lib.rs index d1a91ab4a9ce1..69fc007ab7c16 100644 --- a/src/liballoc/lib.rs +++ b/src/liballoc/lib.rs @@ -113,7 +113,7 @@ #![feature(trusted_len)] #![feature(try_reserve)] #![feature(unboxed_closures)] -#![feature(unicode)] +#![feature(unicode_internals)] #![feature(unsize)] #![feature(allocator_internals)] #![feature(on_unimplemented)] diff --git a/src/liballoc/tests/lib.rs b/src/liballoc/tests/lib.rs index fddf341d0d18e..3227216930700 100644 --- a/src/liballoc/tests/lib.rs +++ b/src/liballoc/tests/lib.rs @@ -24,7 +24,6 @@ #![feature(string_retain)] #![feature(try_reserve)] #![feature(unboxed_closures)] -#![feature(unicode)] #![feature(exact_chunks)] #![feature(inclusive_range_fields)] diff --git a/src/libcore/unicode/mod.rs b/src/libcore/unicode/mod.rs index 0fbc4dcd175d1..b6b033adc046e 100644 --- a/src/libcore/unicode/mod.rs +++ b/src/libcore/unicode/mod.rs @@ -8,7 +8,7 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -#![unstable(feature = "unicode", issue = "27783")] +#![unstable(feature = "unicode_internals", issue = "0")] #![allow(missing_docs)] mod bool_trie; diff --git a/src/librustdoc/lib.rs b/src/librustdoc/lib.rs index 9ac034869acb8..4a062e9a55b30 100644 --- a/src/librustdoc/lib.rs +++ b/src/librustdoc/lib.rs @@ -20,7 +20,6 @@ #![feature(fs_read_write)] #![feature(set_stdio)] #![feature(test)] -#![feature(unicode)] #![feature(vec_remove_item)] #![feature(entry_and_modify)] diff --git a/src/libstd/lib.rs b/src/libstd/lib.rs index 94e48732c26e8..c82d600e4a184 100644 --- a/src/libstd/lib.rs +++ b/src/libstd/lib.rs @@ -307,7 +307,6 @@ #![feature(toowned_clone_into)] #![feature(try_reserve)] #![feature(unboxed_closures)] -#![feature(unicode)] #![feature(untagged_unions)] #![feature(unwind_attributes)] #![feature(vec_push_all)] diff --git a/src/libstd_unicode/lib.rs b/src/libstd_unicode/lib.rs index 29de017c64d88..c0d47f1fcb42b 100644 --- a/src/libstd_unicode/lib.rs +++ b/src/libstd_unicode/lib.rs @@ -29,7 +29,7 @@ test(no_crate_inject, attr(allow(unused_variables), deny(warnings))))] #![no_std] -#![feature(unicode)] +#![feature(unicode_internals)] #![feature(staged_api)] #![rustc_deprecated(since = "1.27.0", reason = "moved into libcore")] diff --git a/src/libsyntax/lib.rs b/src/libsyntax/lib.rs index 9de905c01d6c7..4e1e9fa2b46ea 100644 --- a/src/libsyntax/lib.rs +++ b/src/libsyntax/lib.rs @@ -19,7 +19,7 @@ html_root_url = "https://doc.rust-lang.org/nightly/", test(attr(deny(warnings))))] -#![feature(unicode)] +#![feature(unicode_internals)] #![feature(rustc_diagnostic_macros)] #![feature(non_exhaustive)] #![feature(const_atomic_usize_new)]