Skip to content

Commit 9a6dab6

Browse files
committed
Auto merge of #60659 - nnethercote:tweak-Symbol-and-InternedString, r=<try>
Tweak `Symbol` and `InternedString` Some minor improvements to speed and code cleanliness. r? @Zoxc
2 parents 9f83961 + 3e5e8d3 commit 9a6dab6

File tree

2 files changed

+57
-29
lines changed

2 files changed

+57
-29
lines changed

src/librustc_codegen_llvm/debuginfo/metadata.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,7 @@ impl TypeMap<'ll, 'tcx> {
170170
// the id is unknown.
171171
fn get_unique_type_id_as_string(&self, unique_type_id: UniqueTypeId) -> &str {
172172
let UniqueTypeId(interner_key) = unique_type_id;
173-
self.unique_id_interner.get(interner_key)
173+
self.unique_id_interner.symbol_str(interner_key)
174174
}
175175

176176
// Get the UniqueTypeId for the given type. If the UniqueTypeId for the given
@@ -226,7 +226,7 @@ impl TypeMap<'ll, 'tcx> {
226226
let variant_part_type_id = format!("{}_variant_part",
227227
self.get_unique_type_id_as_string(enum_type_id));
228228
let interner_key = self.unique_id_interner.intern(&variant_part_type_id);
229-
self.unique_id_interner.get(interner_key)
229+
self.unique_id_interner.symbol_str(interner_key)
230230
}
231231
}
232232

src/libsyntax_pos/symbol.rs

+55-27
Original file line numberDiff line numberDiff line change
@@ -344,9 +344,18 @@ impl Decodable for Ident {
344344
}
345345
}
346346

347-
/// A symbol is an interned or gensymed string. The use of `newtype_index!` means
348-
/// that `Option<Symbol>` only takes up 4 bytes, because `newtype_index!` reserves
349-
/// the last 256 values for tagging purposes.
347+
/// A symbol is an interned or gensymed string. (A gensym is a special kind of
348+
/// symbol that is never equal to any other symbol. E.g.:
349+
/// - `Symbol::intern("x") == Symbol::intern("x")`
350+
/// - `Symbol::gensym("x") != Symbol::intern("x")`
351+
/// - `Symbol::gensym("x") != Symbol::gensym("x")`
352+
///
353+
/// Gensyms are useful when creating new identifiers that must not match any
354+
/// existing identifiers during macro expansion and syntax desugaring.)
355+
///
356+
/// The use of `newtype_index!` means that `Option<Symbol>` only takes up 4
357+
/// bytes, because `newtype_index!` reserves the last 256 values for tagging
358+
/// purposes.
350359
///
351360
/// Note that `Symbol` cannot directly be a `newtype_index!` because it implements
352361
/// `fmt::Debug`, `Encodable`, and `Decodable` in special ways.
@@ -367,10 +376,6 @@ impl Symbol {
367376
with_interner(|interner| interner.intern(string))
368377
}
369378

370-
pub fn interned(self) -> Self {
371-
with_interner(|interner| interner.interned(self))
372-
}
373-
374379
/// Gensyms a new `usize`, using the current interner.
375380
pub fn gensym(string: &str) -> Self {
376381
with_interner(|interner| interner.gensym(string))
@@ -387,7 +392,7 @@ impl Symbol {
387392
pub fn as_str(self) -> LocalInternedString {
388393
with_interner(|interner| unsafe {
389394
LocalInternedString {
390-
string: std::mem::transmute::<&str, &str>(interner.get(self))
395+
string: std::mem::transmute::<&str, &str>(interner.symbol_or_gensym_str(self))
391396
}
392397
})
393398
}
@@ -488,11 +493,11 @@ impl Interner {
488493
name
489494
}
490495

491-
pub fn interned(&self, symbol: Symbol) -> Symbol {
496+
fn interned(&self, symbol: Symbol) -> Symbol {
492497
if (symbol.0.as_usize()) < self.strings.len() {
493498
symbol
494499
} else {
495-
self.interned(self.gensyms[(SymbolIndex::MAX_AS_U32 - symbol.0.as_u32()) as usize])
500+
self.gensyms[(SymbolIndex::MAX_AS_U32 - symbol.0.as_u32()) as usize]
496501
}
497502
}
498503

@@ -510,10 +515,19 @@ impl Interner {
510515
symbol.0.as_usize() >= self.strings.len()
511516
}
512517

513-
pub fn get(&self, symbol: Symbol) -> &str {
518+
/// Get the chars of a normal (non-gensym) symbol.
519+
pub fn symbol_str(&self, symbol: Symbol) -> &str {
520+
self.strings[symbol.0.as_usize()]
521+
}
522+
523+
/// Get the chars of a normal or gensym symbol.
524+
fn symbol_or_gensym_str(&self, symbol: Symbol) -> &str {
514525
match self.strings.get(symbol.0.as_usize()) {
515526
Some(string) => string,
516-
None => self.get(self.gensyms[(SymbolIndex::MAX_AS_U32 - symbol.0.as_u32()) as usize]),
527+
None => {
528+
let symbol = self.gensyms[(SymbolIndex::MAX_AS_U32 - symbol.0.as_u32()) as usize];
529+
self.strings[symbol.0.as_usize()]
530+
}
517531
}
518532
}
519533
}
@@ -611,11 +625,17 @@ fn with_interner<T, F: FnOnce(&mut Interner) -> T>(f: F) -> T {
611625
GLOBALS.with(|globals| f(&mut *globals.symbol_interner.lock()))
612626
}
613627

614-
/// Represents a string stored in the interner. Because the interner outlives any thread
615-
/// which uses this type, we can safely treat `string` which points to interner data,
616-
/// as an immortal string, as long as this type never crosses between threads.
617-
// FIXME: ensure that the interner outlives any thread which uses `LocalInternedString`,
618-
// by creating a new thread right after constructing the interner.
628+
/// An alternative to `Symbol` and `LocalInternedString`, useful when the chars
629+
/// within the symbol need to be accessed. It is best used for temporary
630+
/// values.
631+
///
632+
/// Because the interner outlives any thread which uses this type, we can
633+
/// safely treat `string` which points to interner data, as an immortal string,
634+
/// as long as this type never crosses between threads.
635+
//
636+
// FIXME: ensure that the interner outlives any thread which uses
637+
// `LocalInternedString`, by creating a new thread right after constructing the
638+
// interner.
619639
#[derive(Clone, Copy, Hash, PartialOrd, Eq, Ord)]
620640
pub struct LocalInternedString {
621641
string: &'static str,
@@ -708,7 +728,12 @@ impl Encodable for LocalInternedString {
708728
}
709729
}
710730

711-
/// Represents a string stored in the string interner.
731+
/// A thin wrapper around `Symbol`. It has two main differences to `Symbol`.
732+
/// - Its implementations of `Hash`, `PartialOrd` and `Ord` work with the
733+
/// string chars rather than the symbol integer. This is useful when
734+
/// hash stability is required across compile sessions, or a guaranteed sort
735+
/// ordering is required.
736+
/// - It is guaranteed to not be a gensym symbol.
712737
#[derive(Clone, Copy, Eq)]
713738
pub struct InternedString {
714739
symbol: Symbol,
@@ -717,14 +742,23 @@ pub struct InternedString {
717742
impl InternedString {
718743
pub fn with<F: FnOnce(&str) -> R, R>(self, f: F) -> R {
719744
let str = with_interner(|interner| {
720-
interner.get(self.symbol) as *const str
745+
interner.symbol_str(self.symbol) as *const str
721746
});
722747
// This is safe because the interner keeps string alive until it is dropped.
723748
// We can access it because we know the interner is still alive since we use a
724749
// scoped thread local to access it, and it was alive at the beginning of this scope
725750
unsafe { f(&*str) }
726751
}
727752

753+
pub fn with2<F: FnOnce(&str, &str) -> R, R>(self, other: &InternedString, f: F) -> R {
754+
let (self_str, other_str) = with_interner(|interner| {
755+
(interner.symbol_str(self.symbol) as *const str,
756+
interner.symbol_str(other.symbol) as *const str)
757+
});
758+
// This is safe for the same reason that `with` is safe.
759+
unsafe { f(&*self_str, &*other_str) }
760+
}
761+
728762
pub fn as_symbol(self) -> Symbol {
729763
self.symbol
730764
}
@@ -745,7 +779,7 @@ impl PartialOrd<InternedString> for InternedString {
745779
if self.symbol == other.symbol {
746780
return Some(Ordering::Equal);
747781
}
748-
self.with(|self_str| other.with(|other_str| self_str.partial_cmp(other_str)))
782+
self.with2(other, |self_str, other_str| self_str.partial_cmp(other_str))
749783
}
750784
}
751785

@@ -754,7 +788,7 @@ impl Ord for InternedString {
754788
if self.symbol == other.symbol {
755789
return Ordering::Equal;
756790
}
757-
self.with(|self_str| other.with(|other_str| self_str.cmp(&other_str)))
791+
self.with2(other, |self_str, other_str| self_str.cmp(other_str))
758792
}
759793
}
760794

@@ -794,12 +828,6 @@ impl<'a> PartialEq<InternedString> for &'a String {
794828
}
795829
}
796830

797-
impl std::convert::From<InternedString> for String {
798-
fn from(val: InternedString) -> String {
799-
val.as_symbol().to_string()
800-
}
801-
}
802-
803831
impl fmt::Debug for InternedString {
804832
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
805833
self.with(|str| fmt::Debug::fmt(&str, f))

0 commit comments

Comments
 (0)