From c57f249adf8c313dade6bd9bad68d2fd4d7091f9 Mon Sep 17 00:00:00 2001 From: Josh Triplett Date: Thu, 2 Jan 2025 22:37:03 +0200 Subject: [PATCH] Implement `ByteStr` and `ByteString` types Approved ACP: https://github.com/rust-lang/libs-team/issues/502 Tracking issue: https://github.com/rust-lang/rust/issues/134915 These types represent human-readable strings that are conventionally, but not always, UTF-8. The `Debug` impl prints non-UTF-8 bytes using escape sequences, and the `Display` impl uses the Unicode replacement character. This is a minimal implementation of these types and associated trait impls. It does not add any helper methods to other types such as `[u8]` or `Vec`. I've omitted a few implementations of `AsRef`, `AsMut`, `Borrow`, `From`, and `PartialOrd`, when those would be the second implementation for a type (counting the `T` impl) or otherwise may cause inference failures. These impls are important, but we can attempt to add them later in standalone commits, and run them through crater. In addition to the `bstr` feature, I've added a `bstr_internals` feature for APIs provided by `core` for use by `alloc` but not currently intended for stabilization. This API and its implementation are based *heavily* on the `bstr` crate by Andrew Gallant (@BurntSushi). --- library/alloc/src/bstr.rs | 696 +++++++++++++++++++++++++++++++++++++ library/alloc/src/lib.rs | 4 + library/core/src/bstr.rs | 594 +++++++++++++++++++++++++++++++ library/core/src/lib.rs | 4 + library/core/tests/bstr.rs | 54 +++ library/std/src/bstr.rs | 4 + library/std/src/lib.rs | 4 + 7 files changed, 1360 insertions(+) create mode 100644 library/alloc/src/bstr.rs create mode 100644 library/core/src/bstr.rs create mode 100644 library/core/tests/bstr.rs create mode 100644 library/std/src/bstr.rs diff --git a/library/alloc/src/bstr.rs b/library/alloc/src/bstr.rs new file mode 100644 index 0000000000000..6b11b9a00d2b7 --- /dev/null +++ b/library/alloc/src/bstr.rs @@ -0,0 +1,696 @@ +//! The `ByteStr` and `ByteString` types and trait implementations. + +use core::borrow::{Borrow, BorrowMut}; +#[unstable(feature = "bstr", issue = "134915")] +pub use core::bstr::ByteStr; +use core::bstr::{impl_partial_eq, impl_partial_eq_ord, impl_partial_eq_ord_n}; +use core::cmp::Ordering; +use core::ops::{ + Deref, DerefMut, DerefPure, Index, IndexMut, Range, RangeFrom, RangeFull, RangeInclusive, + RangeTo, RangeToInclusive, +}; +use core::str::FromStr; +use core::{fmt, hash}; + +use crate::borrow::{Cow, ToOwned}; +use crate::boxed::Box; +use crate::rc::Rc; +use crate::string::String; +use crate::sync::Arc; +use crate::vec::Vec; + +/// A wrapper for `Vec` representing a human-readable string that's conventionally, but not +/// always, UTF-8. +/// +/// Unlike `String`, this type permits non-UTF-8 contents, making it suitable for user input, +/// non-native filenames (as `Path` only supports native filenames), and other applications that +/// need to round-trip whatever data the user provides. +/// +/// A `ByteString` owns its contents and can grow and shrink, like a `Vec` or `String`. For a +/// borrowed byte string, see [`ByteStr`](../../std/bstr/struct.ByteStr.html). +/// +/// `ByteString` implements `Deref` to `&Vec`, so all methods available on `&Vec` are +/// available on `ByteString`. Similarly, `ByteString` implements `DerefMut` to `&mut Vec`, +/// so you can modify a `ByteString` using any method available on `&mut Vec`. +/// +/// The `Debug` and `Display` implementations for `ByteString` are the same as those for `ByteStr`, +/// showing invalid UTF-8 as hex escapes or the Unicode replacement character, respectively. +/// +/// # Examples +/// +/// You can create a new `ByteString` from a `Vec` directly, or via a `From` impl from various +/// string types: +/// +/// ``` +/// # #![feature(bstr)] +/// # use std::bstr::ByteString; +/// let s1 = ByteString(vec![b'H', b'e', b'l', b'l', b'o']); +/// let s2 = ByteString::from("Hello"); +/// let s3 = ByteString::from(b"Hello"); +/// assert_eq!(s1, s2); +/// assert_eq!(s2, s3); +/// ``` +#[unstable(feature = "bstr", issue = "134915")] +#[repr(transparent)] +#[derive(Clone)] +pub struct ByteString(pub Vec); + +impl ByteString { + #[inline] + pub(crate) fn as_bytes(&self) -> &[u8] { + &self.0 + } + + #[inline] + pub(crate) fn as_bytestr(&self) -> &ByteStr { + ByteStr::new(&self.0) + } + + #[inline] + pub(crate) fn as_mut_bytestr(&mut self) -> &mut ByteStr { + ByteStr::from_bytes_mut(&mut self.0) + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl Deref for ByteString { + type Target = Vec; + + #[inline] + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl DerefMut for ByteString { + #[inline] + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } +} + +#[unstable(feature = "deref_pure_trait", issue = "87121")] +unsafe impl DerefPure for ByteString {} + +#[unstable(feature = "bstr", issue = "134915")] +impl fmt::Debug for ByteString { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Debug::fmt(self.as_bytestr(), f) + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl fmt::Display for ByteString { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Display::fmt(self.as_bytestr(), f) + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl AsRef<[u8]> for ByteString { + #[inline] + fn as_ref(&self) -> &[u8] { + &self.0 + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl AsRef for ByteString { + #[inline] + fn as_ref(&self) -> &ByteStr { + self.as_bytestr() + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl AsMut<[u8]> for ByteString { + #[inline] + fn as_mut(&mut self) -> &mut [u8] { + &mut self.0 + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl AsMut for ByteString { + #[inline] + fn as_mut(&mut self) -> &mut ByteStr { + self.as_mut_bytestr() + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl Borrow<[u8]> for ByteString { + #[inline] + fn borrow(&self) -> &[u8] { + &self.0 + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl Borrow for ByteString { + #[inline] + fn borrow(&self) -> &ByteStr { + self.as_bytestr() + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl Borrow for Vec { + #[inline] + fn borrow(&self) -> &ByteStr { + ByteStr::from_bytes(self.as_slice()) + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl Borrow for String { + #[inline] + fn borrow(&self) -> &ByteStr { + ByteStr::from_bytes(self.as_bytes()) + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl BorrowMut<[u8]> for ByteString { + #[inline] + fn borrow_mut(&mut self) -> &mut [u8] { + &mut self.0 + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl BorrowMut for ByteString { + #[inline] + fn borrow_mut(&mut self) -> &mut ByteStr { + self.as_mut_bytestr() + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl BorrowMut for Vec { + #[inline] + fn borrow_mut(&mut self) -> &mut ByteStr { + ByteStr::from_bytes_mut(self.as_mut_slice()) + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl Default for ByteString { + fn default() -> Self { + ByteString(Vec::new()) + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl<'a, const N: usize> From<&'a [u8; N]> for ByteString { + #[inline] + fn from(s: &'a [u8; N]) -> Self { + ByteString(s.as_slice().to_vec()) + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl From<[u8; N]> for ByteString { + #[inline] + fn from(s: [u8; N]) -> Self { + ByteString(s.as_slice().to_vec()) + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl<'a> From<&'a [u8]> for ByteString { + #[inline] + fn from(s: &'a [u8]) -> Self { + ByteString(s.to_vec()) + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl From> for ByteString { + #[inline] + fn from(s: Vec) -> Self { + ByteString(s) + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl From for Vec { + #[inline] + fn from(s: ByteString) -> Self { + s.0 + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl<'a> From<&'a str> for ByteString { + #[inline] + fn from(s: &'a str) -> Self { + ByteString(s.as_bytes().to_vec()) + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl From for ByteString { + #[inline] + fn from(s: String) -> Self { + ByteString(s.into_bytes()) + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl<'a> From<&'a ByteStr> for ByteString { + #[inline] + fn from(s: &'a ByteStr) -> Self { + ByteString(s.0.to_vec()) + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl<'a> From for Cow<'a, ByteStr> { + #[inline] + fn from(s: ByteString) -> Self { + Cow::Owned(s) + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl<'a> From<&'a ByteString> for Cow<'a, ByteStr> { + #[inline] + fn from(s: &'a ByteString) -> Self { + Cow::Borrowed(s.as_bytestr()) + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl FromIterator for ByteString { + #[inline] + fn from_iter>(iter: T) -> Self { + ByteString(iter.into_iter().collect::().into_bytes()) + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl FromIterator for ByteString { + #[inline] + fn from_iter>(iter: T) -> Self { + ByteString(iter.into_iter().collect()) + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl<'a> FromIterator<&'a str> for ByteString { + #[inline] + fn from_iter>(iter: T) -> Self { + ByteString(iter.into_iter().collect::().into_bytes()) + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl<'a> FromIterator<&'a [u8]> for ByteString { + #[inline] + fn from_iter>(iter: T) -> Self { + let mut buf = Vec::new(); + for b in iter { + buf.extend_from_slice(b); + } + ByteString(buf) + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl<'a> FromIterator<&'a ByteStr> for ByteString { + #[inline] + fn from_iter>(iter: T) -> Self { + let mut buf = Vec::new(); + for b in iter { + buf.extend_from_slice(&b.0); + } + ByteString(buf) + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl FromIterator for ByteString { + #[inline] + fn from_iter>(iter: T) -> Self { + let mut buf = Vec::new(); + for mut b in iter { + buf.append(&mut b.0); + } + ByteString(buf) + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl FromStr for ByteString { + type Err = core::convert::Infallible; + + #[inline] + fn from_str(s: &str) -> Result { + Ok(ByteString(s.as_bytes().to_vec())) + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl Index for ByteString { + type Output = u8; + + #[inline] + fn index(&self, idx: usize) -> &u8 { + &self.0[idx] + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl Index for ByteString { + type Output = ByteStr; + + #[inline] + fn index(&self, _: RangeFull) -> &ByteStr { + self.as_bytestr() + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl Index> for ByteString { + type Output = ByteStr; + + #[inline] + fn index(&self, r: Range) -> &ByteStr { + ByteStr::from_bytes(&self.0[r]) + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl Index> for ByteString { + type Output = ByteStr; + + #[inline] + fn index(&self, r: RangeInclusive) -> &ByteStr { + ByteStr::from_bytes(&self.0[r]) + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl Index> for ByteString { + type Output = ByteStr; + + #[inline] + fn index(&self, r: RangeFrom) -> &ByteStr { + ByteStr::from_bytes(&self.0[r]) + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl Index> for ByteString { + type Output = ByteStr; + + #[inline] + fn index(&self, r: RangeTo) -> &ByteStr { + ByteStr::from_bytes(&self.0[r]) + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl Index> for ByteString { + type Output = ByteStr; + + #[inline] + fn index(&self, r: RangeToInclusive) -> &ByteStr { + ByteStr::from_bytes(&self.0[r]) + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl IndexMut for ByteString { + #[inline] + fn index_mut(&mut self, idx: usize) -> &mut u8 { + &mut self.0[idx] + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl IndexMut for ByteString { + #[inline] + fn index_mut(&mut self, _: RangeFull) -> &mut ByteStr { + self.as_mut_bytestr() + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl IndexMut> for ByteString { + #[inline] + fn index_mut(&mut self, r: Range) -> &mut ByteStr { + ByteStr::from_bytes_mut(&mut self.0[r]) + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl IndexMut> for ByteString { + #[inline] + fn index_mut(&mut self, r: RangeInclusive) -> &mut ByteStr { + ByteStr::from_bytes_mut(&mut self.0[r]) + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl IndexMut> for ByteString { + #[inline] + fn index_mut(&mut self, r: RangeFrom) -> &mut ByteStr { + ByteStr::from_bytes_mut(&mut self.0[r]) + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl IndexMut> for ByteString { + #[inline] + fn index_mut(&mut self, r: RangeTo) -> &mut ByteStr { + ByteStr::from_bytes_mut(&mut self.0[r]) + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl IndexMut> for ByteString { + #[inline] + fn index_mut(&mut self, r: RangeToInclusive) -> &mut ByteStr { + ByteStr::from_bytes_mut(&mut self.0[r]) + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl hash::Hash for ByteString { + #[inline] + fn hash(&self, state: &mut H) { + self.0.hash(state); + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl Eq for ByteString {} + +#[unstable(feature = "bstr", issue = "134915")] +impl PartialEq for ByteString { + #[inline] + fn eq(&self, other: &ByteString) -> bool { + self.0 == other.0 + } +} + +macro_rules! impl_partial_eq_ord_cow { + ($lhs:ty, $rhs:ty) => { + #[allow(unused_lifetimes)] + #[unstable(feature = "bstr", issue = "134915")] + impl<'a> PartialEq<$rhs> for $lhs { + #[inline] + fn eq(&self, other: &$rhs) -> bool { + let other: &[u8] = (&**other).as_ref(); + PartialEq::eq(self.as_bytes(), other) + } + } + + #[allow(unused_lifetimes)] + #[unstable(feature = "bstr", issue = "134915")] + impl<'a> PartialEq<$lhs> for $rhs { + #[inline] + fn eq(&self, other: &$lhs) -> bool { + let this: &[u8] = (&**self).as_ref(); + PartialEq::eq(this, other.as_bytes()) + } + } + + #[allow(unused_lifetimes)] + #[unstable(feature = "bstr", issue = "134915")] + impl<'a> PartialOrd<$rhs> for $lhs { + #[inline] + fn partial_cmp(&self, other: &$rhs) -> Option { + let other: &[u8] = (&**other).as_ref(); + PartialOrd::partial_cmp(self.as_bytes(), other) + } + } + + #[allow(unused_lifetimes)] + #[unstable(feature = "bstr", issue = "134915")] + impl<'a> PartialOrd<$lhs> for $rhs { + #[inline] + fn partial_cmp(&self, other: &$lhs) -> Option { + let this: &[u8] = (&**self).as_ref(); + PartialOrd::partial_cmp(this, other.as_bytes()) + } + } + }; +} + +impl_partial_eq_ord!(ByteString, Vec); +// PartialOrd with `[u8]` omitted to avoid inference failures +impl_partial_eq!(ByteString, [u8]); +// PartialOrd with `&[u8]` omitted to avoid inference failures +impl_partial_eq!(ByteString, &[u8]); +impl_partial_eq_ord!(ByteString, String); +// PartialOrd with `str` omitted to avoid inference failures +impl_partial_eq!(ByteString, str); +// PartialOrd with `&str` omitted to avoid inference failures +impl_partial_eq!(ByteString, &str); +impl_partial_eq_ord!(ByteString, ByteStr); +impl_partial_eq_ord!(ByteString, &ByteStr); +impl_partial_eq_ord_n!(ByteString, [u8; N]); +impl_partial_eq_ord_n!(ByteString, &[u8; N]); +impl_partial_eq_ord_cow!(ByteString, Cow<'_, ByteStr>); +impl_partial_eq_ord_cow!(ByteString, Cow<'_, str>); +impl_partial_eq_ord_cow!(ByteString, Cow<'_, [u8]>); + +#[unstable(feature = "bstr", issue = "134915")] +impl Ord for ByteString { + #[inline] + fn cmp(&self, other: &ByteString) -> Ordering { + Ord::cmp(&self.0, &other.0) + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl PartialOrd for ByteString { + #[inline] + fn partial_cmp(&self, other: &ByteString) -> Option { + PartialOrd::partial_cmp(&self.0, &other.0) + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl ToOwned for ByteStr { + type Owned = ByteString; + + #[inline] + fn to_owned(&self) -> ByteString { + ByteString(self.0.to_vec()) + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl TryFrom for String { + type Error = crate::string::FromUtf8Error; + + #[inline] + fn try_from(s: ByteString) -> Result { + String::from_utf8(s.0) + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl<'a> TryFrom<&'a ByteString> for &'a str { + type Error = crate::str::Utf8Error; + + #[inline] + fn try_from(s: &'a ByteString) -> Result { + crate::str::from_utf8(s.0.as_slice()) + } +} + +// Additional impls for `ByteStr` that require types from `alloc`: + +#[unstable(feature = "bstr", issue = "134915")] +impl Clone for Box { + #[inline] + fn clone(&self) -> Self { + Self::from(Box::<[u8]>::from(&self.0)) + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl<'a> From<&'a ByteStr> for Cow<'a, ByteStr> { + #[inline] + fn from(s: &'a ByteStr) -> Self { + Cow::Borrowed(s) + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl From> for Box { + #[inline] + fn from(s: Box<[u8]>) -> Box { + // SAFETY: `ByteStr` is a transparent wrapper around `[u8]`. + unsafe { Box::from_raw(Box::into_raw(s) as _) } + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl From> for Box<[u8]> { + #[inline] + fn from(s: Box) -> Box<[u8]> { + // SAFETY: `ByteStr` is a transparent wrapper around `[u8]`. + unsafe { Box::from_raw(Box::into_raw(s) as _) } + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl From> for Rc { + #[inline] + fn from(s: Rc<[u8]>) -> Rc { + // SAFETY: `ByteStr` is a transparent wrapper around `[u8]`. + unsafe { Rc::from_raw(Rc::into_raw(s) as _) } + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl From> for Rc<[u8]> { + #[inline] + fn from(s: Rc) -> Rc<[u8]> { + // SAFETY: `ByteStr` is a transparent wrapper around `[u8]`. + unsafe { Rc::from_raw(Rc::into_raw(s) as _) } + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl From> for Arc { + #[inline] + fn from(s: Arc<[u8]>) -> Arc { + // SAFETY: `ByteStr` is a transparent wrapper around `[u8]`. + unsafe { Arc::from_raw(Arc::into_raw(s) as _) } + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl From> for Arc<[u8]> { + #[inline] + fn from(s: Arc) -> Arc<[u8]> { + // SAFETY: `ByteStr` is a transparent wrapper around `[u8]`. + unsafe { Arc::from_raw(Arc::into_raw(s) as _) } + } +} + +impl_partial_eq_ord!(ByteStr, Vec); +impl_partial_eq_ord!(ByteStr, String); +impl_partial_eq_ord_cow!(&'a ByteStr, Cow<'a, ByteStr>); +impl_partial_eq_ord_cow!(&'a ByteStr, Cow<'a, str>); +impl_partial_eq_ord_cow!(&'a ByteStr, Cow<'a, [u8]>); + +#[unstable(feature = "bstr", issue = "134915")] +impl<'a> TryFrom<&'a ByteStr> for String { + type Error = core::str::Utf8Error; + + #[inline] + fn try_from(s: &'a ByteStr) -> Result { + Ok(core::str::from_utf8(&s.0)?.into()) + } +} diff --git a/library/alloc/src/lib.rs b/library/alloc/src/lib.rs index 40759cb0ba83c..996fe96362d9e 100644 --- a/library/alloc/src/lib.rs +++ b/library/alloc/src/lib.rs @@ -103,6 +103,8 @@ #![feature(async_fn_traits)] #![feature(async_iterator)] #![feature(box_uninit_write)] +#![feature(bstr)] +#![feature(bstr_internals)] #![feature(clone_to_uninit)] #![feature(coerce_unsized)] #![feature(const_eval_select)] @@ -226,6 +228,8 @@ mod boxed { pub use std::boxed::Box; } pub mod borrow; +#[unstable(feature = "bstr", issue = "134915")] +pub mod bstr; pub mod collections; #[cfg(all(not(no_rc), not(no_sync), not(no_global_oom_handling)))] pub mod ffi; diff --git a/library/core/src/bstr.rs b/library/core/src/bstr.rs new file mode 100644 index 0000000000000..7dbc444eca5cf --- /dev/null +++ b/library/core/src/bstr.rs @@ -0,0 +1,594 @@ +//! The `ByteStr` type and trait implementations. + +use crate::borrow::{Borrow, BorrowMut}; +use crate::cmp::Ordering; +use crate::ops::{ + Deref, DerefMut, DerefPure, Index, IndexMut, Range, RangeFrom, RangeFull, RangeInclusive, + RangeTo, RangeToInclusive, +}; +use crate::{fmt, hash}; + +/// A wrapper for `&[u8]` representing a human-readable string that's conventionally, but not +/// always, UTF-8. +/// +/// Unlike `&str`, this type permits non-UTF-8 contents, making it suitable for user input, +/// non-native filenames (as `Path` only supports native filenames), and other applications that +/// need to round-trip whatever data the user provides. +/// +/// For an owned, growable byte string buffer, use +/// [`ByteString`](../../std/bstr/struct.ByteString.html). +/// +/// `ByteStr` implements `Deref` to `[u8]`, so all methods available on `[u8]` are available on +/// `ByteStr`. +/// +/// # Representation +/// +/// A `&ByteStr` has the same representation as a `&str`. That is, a `&ByteStr` is a wide pointer +/// which includes a pointer to some bytes and a length. +/// +/// # Trait implementations +/// +/// The `ByteStr` type has a number of trait implementations, and in particular, defines equality +/// and comparisons between `&ByteStr`, `&str`, and `&[u8]`, for convenience. +/// +/// The `Debug` implementation for `ByteStr` shows its bytes as a normal string, with invalid UTF-8 +/// presented as hex escape sequences. +/// +/// The `Display` implementation behaves as if the `ByteStr` were first lossily converted to a +/// `str`, with invalid UTF-8 presented as the Unicode replacement character: � +/// +#[unstable(feature = "bstr", issue = "134915")] +#[repr(transparent)] +pub struct ByteStr(pub [u8]); + +impl ByteStr { + /// Creates a `ByteStr` slice from anything that can be converted to a byte slice. + /// + /// This is a zero-cost conversion. + /// + /// # Example + /// + /// You can create a `ByteStr` from a byte array, a byte slice or a string slice: + /// + /// ``` + /// # #![feature(bstr)] + /// # use std::bstr::ByteStr; + /// let a = ByteStr::new(b"abc"); + /// let b = ByteStr::new(&b"abc"[..]); + /// let c = ByteStr::new("abc"); + /// + /// assert_eq!(a, b); + /// assert_eq!(a, c); + /// ``` + #[inline] + #[unstable(feature = "bstr", issue = "134915")] + pub fn new>(bytes: &B) -> &Self { + ByteStr::from_bytes(bytes.as_ref()) + } + + #[doc(hidden)] + #[unstable(feature = "bstr_internals", issue = "none")] + #[inline] + pub fn from_bytes(slice: &[u8]) -> &Self { + // SAFETY: `ByteStr` is a transparent wrapper around `[u8]`, so we can turn a reference to + // the wrapped type into a reference to the wrapper type. + unsafe { &*(slice as *const [u8] as *const Self) } + } + + #[doc(hidden)] + #[unstable(feature = "bstr_internals", issue = "none")] + #[inline] + pub fn from_bytes_mut(slice: &mut [u8]) -> &mut Self { + // SAFETY: `ByteStr` is a transparent wrapper around `[u8]`, so we can turn a reference to + // the wrapped type into a reference to the wrapper type. + unsafe { &mut *(slice as *mut [u8] as *mut Self) } + } + + #[doc(hidden)] + #[unstable(feature = "bstr_internals", issue = "none")] + #[inline] + pub fn as_bytes(&self) -> &[u8] { + &self.0 + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl Deref for ByteStr { + type Target = [u8]; + + #[inline] + fn deref(&self) -> &[u8] { + &self.0 + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl DerefMut for ByteStr { + #[inline] + fn deref_mut(&mut self) -> &mut [u8] { + &mut self.0 + } +} + +#[unstable(feature = "deref_pure_trait", issue = "87121")] +unsafe impl DerefPure for ByteStr {} + +#[unstable(feature = "bstr", issue = "134915")] +impl fmt::Debug for ByteStr { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "\"")?; + for chunk in self.utf8_chunks() { + for c in chunk.valid().chars() { + match c { + '\0' => write!(f, "\\0")?, + '\x01'..='\x7f' => write!(f, "{}", (c as u8).escape_ascii())?, + _ => write!(f, "{}", c.escape_debug())?, + } + } + write!(f, "{}", chunk.invalid().escape_ascii())?; + } + write!(f, "\"")?; + Ok(()) + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl fmt::Display for ByteStr { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fn fmt_nopad(this: &ByteStr, f: &mut fmt::Formatter<'_>) -> fmt::Result { + for chunk in this.utf8_chunks() { + f.write_str(chunk.valid())?; + if !chunk.invalid().is_empty() { + f.write_str("\u{FFFD}")?; + } + } + Ok(()) + } + + let Some(align) = f.align() else { + return fmt_nopad(self, f); + }; + let nchars: usize = self + .utf8_chunks() + .map(|chunk| chunk.valid().len() + if chunk.invalid().is_empty() { 0 } else { 1 }) + .sum(); + let padding = f.width().unwrap_or(0).saturating_sub(nchars); + let fill = f.fill(); + let (lpad, rpad) = match align { + fmt::Alignment::Left => (0, padding), + fmt::Alignment::Right => (padding, 0), + fmt::Alignment::Center => { + let half = padding / 2; + (half, half + padding % 2) + } + }; + for _ in 0..lpad { + write!(f, "{fill}")?; + } + fmt_nopad(self, f)?; + for _ in 0..rpad { + write!(f, "{fill}")?; + } + + Ok(()) + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl AsRef<[u8]> for ByteStr { + #[inline] + fn as_ref(&self) -> &[u8] { + &self.0 + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl AsRef for ByteStr { + #[inline] + fn as_ref(&self) -> &ByteStr { + self + } +} + +// `impl AsRef for [u8]` omitted to avoid widespread inference failures + +#[unstable(feature = "bstr", issue = "134915")] +impl AsRef for str { + #[inline] + fn as_ref(&self) -> &ByteStr { + ByteStr::new(self) + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl AsMut<[u8]> for ByteStr { + #[inline] + fn as_mut(&mut self) -> &mut [u8] { + &mut self.0 + } +} + +// `impl AsMut for [u8]` omitted to avoid widespread inference failures + +// `impl Borrow for [u8]` omitted to avoid widespread inference failures + +// `impl Borrow for str` omitted to avoid widespread inference failures + +#[unstable(feature = "bstr", issue = "134915")] +impl Borrow<[u8]> for ByteStr { + #[inline] + fn borrow(&self) -> &[u8] { + &self.0 + } +} + +// `impl BorrowMut for [u8]` omitted to avoid widespread inference failures + +#[unstable(feature = "bstr", issue = "134915")] +impl BorrowMut<[u8]> for ByteStr { + #[inline] + fn borrow_mut(&mut self) -> &mut [u8] { + &mut self.0 + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl<'a> Default for &'a ByteStr { + fn default() -> Self { + ByteStr::from_bytes(b"") + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl<'a> Default for &'a mut ByteStr { + fn default() -> Self { + ByteStr::from_bytes_mut(&mut []) + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl<'a, const N: usize> From<&'a [u8; N]> for &'a ByteStr { + #[inline] + fn from(s: &'a [u8; N]) -> Self { + ByteStr::from_bytes(s) + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl<'a> From<&'a [u8]> for &'a ByteStr { + #[inline] + fn from(s: &'a [u8]) -> Self { + ByteStr::from_bytes(s) + } +} + +// Omitted due to slice-from-array-issue-113238: +// +// #[unstable(feature = "bstr", issue = "134915")] +// impl<'a> From<&'a ByteStr> for &'a [u8] { +// #[inline] +// fn from(s: &'a ByteStr) -> Self { +// &s.0 +// } +// } +// +// #[unstable(feature = "bstr", issue = "134915")] +// impl<'a> From<&'a mut ByteStr> for &'a mut [u8] { +// #[inline] +// fn from(s: &'a mut ByteStr) -> Self { +// &mut s.0 +// } +// } + +#[unstable(feature = "bstr", issue = "134915")] +impl<'a> From<&'a str> for &'a ByteStr { + #[inline] + fn from(s: &'a str) -> Self { + ByteStr::from_bytes(s.as_bytes()) + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl hash::Hash for ByteStr { + #[inline] + fn hash(&self, state: &mut H) { + self.0.hash(state); + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl Index for ByteStr { + type Output = u8; + + #[inline] + fn index(&self, idx: usize) -> &u8 { + &self.0[idx] + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl Index for ByteStr { + type Output = ByteStr; + + #[inline] + fn index(&self, _: RangeFull) -> &ByteStr { + self + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl Index> for ByteStr { + type Output = ByteStr; + + #[inline] + fn index(&self, r: Range) -> &ByteStr { + ByteStr::from_bytes(&self.0[r]) + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl Index> for ByteStr { + type Output = ByteStr; + + #[inline] + fn index(&self, r: RangeInclusive) -> &ByteStr { + ByteStr::from_bytes(&self.0[r]) + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl Index> for ByteStr { + type Output = ByteStr; + + #[inline] + fn index(&self, r: RangeFrom) -> &ByteStr { + ByteStr::from_bytes(&self.0[r]) + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl Index> for ByteStr { + type Output = ByteStr; + + #[inline] + fn index(&self, r: RangeTo) -> &ByteStr { + ByteStr::from_bytes(&self.0[r]) + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl Index> for ByteStr { + type Output = ByteStr; + + #[inline] + fn index(&self, r: RangeToInclusive) -> &ByteStr { + ByteStr::from_bytes(&self.0[r]) + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl IndexMut for ByteStr { + #[inline] + fn index_mut(&mut self, idx: usize) -> &mut u8 { + &mut self.0[idx] + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl IndexMut for ByteStr { + #[inline] + fn index_mut(&mut self, _: RangeFull) -> &mut ByteStr { + self + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl IndexMut> for ByteStr { + #[inline] + fn index_mut(&mut self, r: Range) -> &mut ByteStr { + ByteStr::from_bytes_mut(&mut self.0[r]) + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl IndexMut> for ByteStr { + #[inline] + fn index_mut(&mut self, r: RangeInclusive) -> &mut ByteStr { + ByteStr::from_bytes_mut(&mut self.0[r]) + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl IndexMut> for ByteStr { + #[inline] + fn index_mut(&mut self, r: RangeFrom) -> &mut ByteStr { + ByteStr::from_bytes_mut(&mut self.0[r]) + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl IndexMut> for ByteStr { + #[inline] + fn index_mut(&mut self, r: RangeTo) -> &mut ByteStr { + ByteStr::from_bytes_mut(&mut self.0[r]) + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl IndexMut> for ByteStr { + #[inline] + fn index_mut(&mut self, r: RangeToInclusive) -> &mut ByteStr { + ByteStr::from_bytes_mut(&mut self.0[r]) + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl Eq for ByteStr {} + +#[unstable(feature = "bstr", issue = "134915")] +impl PartialEq for ByteStr { + #[inline] + fn eq(&self, other: &ByteStr) -> bool { + &self.0 == &other.0 + } +} + +#[doc(hidden)] +#[macro_export] +#[unstable(feature = "bstr_internals", issue = "none")] +macro_rules! impl_partial_eq { + ($lhs:ty, $rhs:ty) => { + #[allow(unused_lifetimes)] + impl<'a> PartialEq<$rhs> for $lhs { + #[inline] + fn eq(&self, other: &$rhs) -> bool { + let other: &[u8] = other.as_ref(); + PartialEq::eq(self.as_bytes(), other) + } + } + + #[allow(unused_lifetimes)] + impl<'a> PartialEq<$lhs> for $rhs { + #[inline] + fn eq(&self, other: &$lhs) -> bool { + let this: &[u8] = self.as_ref(); + PartialEq::eq(this, other.as_bytes()) + } + } + }; +} + +#[doc(hidden)] +#[unstable(feature = "bstr_internals", issue = "none")] +pub use impl_partial_eq; + +#[doc(hidden)] +#[macro_export] +#[unstable(feature = "bstr_internals", issue = "none")] +macro_rules! impl_partial_eq_ord { + ($lhs:ty, $rhs:ty) => { + $crate::bstr::impl_partial_eq!($lhs, $rhs); + + #[allow(unused_lifetimes)] + #[unstable(feature = "bstr", issue = "134915")] + impl<'a> PartialOrd<$rhs> for $lhs { + #[inline] + fn partial_cmp(&self, other: &$rhs) -> Option { + let other: &[u8] = other.as_ref(); + PartialOrd::partial_cmp(self.as_bytes(), other) + } + } + + #[allow(unused_lifetimes)] + #[unstable(feature = "bstr", issue = "134915")] + impl<'a> PartialOrd<$lhs> for $rhs { + #[inline] + fn partial_cmp(&self, other: &$lhs) -> Option { + let this: &[u8] = self.as_ref(); + PartialOrd::partial_cmp(this, other.as_bytes()) + } + } + }; +} + +#[doc(hidden)] +#[unstable(feature = "bstr_internals", issue = "none")] +pub use impl_partial_eq_ord; + +#[doc(hidden)] +#[macro_export] +#[unstable(feature = "bstr_internals", issue = "none")] +macro_rules! impl_partial_eq_ord_n { + ($lhs:ty, $rhs:ty) => { + #[allow(unused_lifetimes)] + #[unstable(feature = "bstr", issue = "134915")] + impl PartialEq<$rhs> for $lhs { + #[inline] + fn eq(&self, other: &$rhs) -> bool { + let other: &[u8] = other.as_ref(); + PartialEq::eq(self.as_bytes(), other) + } + } + + #[allow(unused_lifetimes)] + #[unstable(feature = "bstr", issue = "134915")] + impl PartialEq<$lhs> for $rhs { + #[inline] + fn eq(&self, other: &$lhs) -> bool { + let this: &[u8] = self.as_ref(); + PartialEq::eq(this, other.as_bytes()) + } + } + + #[allow(unused_lifetimes)] + #[unstable(feature = "bstr", issue = "134915")] + impl PartialOrd<$rhs> for $lhs { + #[inline] + fn partial_cmp(&self, other: &$rhs) -> Option { + let other: &[u8] = other.as_ref(); + PartialOrd::partial_cmp(self.as_bytes(), other) + } + } + + #[allow(unused_lifetimes)] + #[unstable(feature = "bstr", issue = "134915")] + impl PartialOrd<$lhs> for $rhs { + #[inline] + fn partial_cmp(&self, other: &$lhs) -> Option { + let this: &[u8] = self.as_ref(); + PartialOrd::partial_cmp(this, other.as_bytes()) + } + } + }; +} + +#[doc(hidden)] +#[unstable(feature = "bstr_internals", issue = "none")] +pub use impl_partial_eq_ord_n; + +// PartialOrd with `[u8]` omitted to avoid inference failures +impl_partial_eq!(ByteStr, [u8]); +// PartialOrd with `&[u8]` omitted to avoid inference failures +impl_partial_eq!(ByteStr, &[u8]); +// PartialOrd with `str` omitted to avoid inference failures +impl_partial_eq!(ByteStr, str); +// PartialOrd with `&str` omitted to avoid inference failures +impl_partial_eq!(ByteStr, &str); +impl_partial_eq_ord_n!(ByteStr, [u8; N]); +impl_partial_eq_ord_n!(ByteStr, &[u8; N]); + +#[unstable(feature = "bstr", issue = "134915")] +impl Ord for ByteStr { + #[inline] + fn cmp(&self, other: &ByteStr) -> Ordering { + Ord::cmp(&self.0, &other.0) + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl PartialOrd for ByteStr { + #[inline] + fn partial_cmp(&self, other: &ByteStr) -> Option { + PartialOrd::partial_cmp(&self.0, &other.0) + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl<'a> TryFrom<&'a ByteStr> for &'a str { + type Error = crate::str::Utf8Error; + + #[inline] + fn try_from(s: &'a ByteStr) -> Result { + crate::str::from_utf8(&s.0) + } +} + +#[unstable(feature = "bstr", issue = "134915")] +impl<'a> TryFrom<&'a mut ByteStr> for &'a mut str { + type Error = crate::str::Utf8Error; + + #[inline] + fn try_from(s: &'a mut ByteStr) -> Result { + crate::str::from_utf8_mut(&mut s.0) + } +} diff --git a/library/core/src/lib.rs b/library/core/src/lib.rs index e31957a1fa703..d886b738a5831 100644 --- a/library/core/src/lib.rs +++ b/library/core/src/lib.rs @@ -111,6 +111,8 @@ #![feature(array_ptr_get)] #![feature(asm_experimental_arch)] #![feature(bigint_helper_methods)] +#![feature(bstr)] +#![feature(bstr_internals)] #![feature(const_carrying_mul_add)] #![feature(const_eval_select)] #![feature(core_intrinsics)] @@ -336,6 +338,8 @@ pub mod ascii; pub mod asserting; #[unstable(feature = "async_iterator", issue = "79024")] pub mod async_iter; +#[unstable(feature = "bstr", issue = "134915")] +pub mod bstr; pub mod cell; pub mod char; pub mod ffi; diff --git a/library/core/tests/bstr.rs b/library/core/tests/bstr.rs new file mode 100644 index 0000000000000..5fecd0a4084c7 --- /dev/null +++ b/library/core/tests/bstr.rs @@ -0,0 +1,54 @@ +#![feature(bstr)] + +use core::ByteStr; + +#[test] +fn test_debug() { + assert_eq!( + r#""\0\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x11\x12\r\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f \x7f\x80\x81\xfe\xff""#, + format!("{:?}", ByteStr::new(b"\0\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x11\x12\r\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f \x7f\x80\x81\xfe\xff")), + ); +} + +#[test] +fn test_display() { + let b1 = ByteStr::new("abc"); + let b2 = ByteStr::new(b"\xf0\x28\x8c\xbc"); + + assert_eq!(&format!("{b1}"), "abc"); + assert_eq!(&format!("{b2}"), "�(��"); + + assert_eq!(&format!("{b1:<7}!"), "abc !"); + assert_eq!(&format!("{b1:>7}!"), " abc!"); + assert_eq!(&format!("{b1:^7}!"), " abc !"); + assert_eq!(&format!("{b1:^6}!"), " abc !"); + assert_eq!(&format!("{b1:-<7}!"), "abc----!"); + assert_eq!(&format!("{b1:->7}!"), "----abc!"); + assert_eq!(&format!("{b1:-^7}!"), "--abc--!"); + assert_eq!(&format!("{b1:-^6}!"), "-abc--!"); + + assert_eq!(&format!("{b2:<7}!"), "�(�� !"); + assert_eq!(&format!("{b2:>7}!"), " �(��!"); + assert_eq!(&format!("{b2:^7}!"), " �(�� !"); + assert_eq!(&format!("{b2:^6}!"), " �(�� !"); + assert_eq!(&format!("{b2:-<7}!"), "�(��---!"); + assert_eq!(&format!("{b2:->7}!"), "---�(��!"); + assert_eq!(&format!("{b2:-^7}!"), "-�(��--!"); + assert_eq!(&format!("{b2:-^6}!"), "-�(��-!"); + + assert_eq!(&format!("{b1:<2}!"), "abc!"); + assert_eq!(&format!("{b1:>2}!"), "abc!"); + assert_eq!(&format!("{b1:^2}!"), "abc!"); + assert_eq!(&format!("{b1:-<2}!"), "abc!"); + assert_eq!(&format!("{b1:->2}!"), "abc!"); + assert_eq!(&format!("{b1:-^2}!"), "abc!"); + + assert_eq!(&format!("{b2:<3}!"), "�(��!"); + assert_eq!(&format!("{b2:>3}!"), "�(��!"); + assert_eq!(&format!("{b2:^3}!"), "�(��!"); + assert_eq!(&format!("{b2:^2}!"), "�(��!"); + assert_eq!(&format!("{b2:-<3}!"), "�(��!"); + assert_eq!(&format!("{b2:->3}!"), "�(��!"); + assert_eq!(&format!("{b2:-^3}!"), "�(��!"); + assert_eq!(&format!("{b2:-^2}!"), "�(��!"); +} diff --git a/library/std/src/bstr.rs b/library/std/src/bstr.rs new file mode 100644 index 0000000000000..dd49177162833 --- /dev/null +++ b/library/std/src/bstr.rs @@ -0,0 +1,4 @@ +//! The `ByteStr` and `ByteString` types and trait implementations. + +#[unstable(feature = "bstr", issue = "134915")] +pub use alloc::bstr::{ByteStr, ByteString}; diff --git a/library/std/src/lib.rs b/library/std/src/lib.rs index 2f8f5c5c58180..7560bab6d5dbc 100644 --- a/library/std/src/lib.rs +++ b/library/std/src/lib.rs @@ -320,6 +320,8 @@ // Library features (core): // tidy-alphabetical-start #![feature(array_chunks)] +#![feature(bstr)] +#![feature(bstr_internals)] #![feature(c_str_module)] #![feature(char_internals)] #![feature(clone_to_uninit)] @@ -580,6 +582,8 @@ pub mod f64; pub mod thread; pub mod ascii; pub mod backtrace; +#[unstable(feature = "bstr", issue = "134915")] +pub mod bstr; pub mod collections; pub mod env; pub mod error;