@@ -344,9 +344,18 @@ impl Decodable for Ident {
344
344
}
345
345
}
346
346
347
- /// A symbol is an interned or gensymed string. The use of `newtype_index!` means
348
- /// that `Option<Symbol>` only takes up 4 bytes, because `newtype_index!` reserves
349
- /// the last 256 values for tagging purposes.
347
+ /// A symbol is an interned or gensymed string. (A gensym is a special kind of
348
+ /// symbol that is never equal to any other symbol. E.g.:
349
+ /// - `Symbol::intern("x") == Symbol::intern("x")`
350
+ /// - `Symbol::gensym("x") != Symbol::intern("x")`
351
+ /// - `Symbol::gensym("x") != Symbol::gensym("x")`
352
+ ///
353
+ /// Gensyms are useful when creating new identifiers that must not match any
354
+ /// existing identifiers during macro expansion and syntax desugaring.)
355
+ ///
356
+ /// The use of `newtype_index!` means that `Option<Symbol>` only takes up 4
357
+ /// bytes, because `newtype_index!` reserves the last 256 values for tagging
358
+ /// purposes.
350
359
///
351
360
/// Note that `Symbol` cannot directly be a `newtype_index!` because it implements
352
361
/// `fmt::Debug`, `Encodable`, and `Decodable` in special ways.
@@ -367,10 +376,6 @@ impl Symbol {
367
376
with_interner ( |interner| interner. intern ( string) )
368
377
}
369
378
370
- pub fn interned ( self ) -> Self {
371
- with_interner ( |interner| interner. interned ( self ) )
372
- }
373
-
374
379
/// Gensyms a new `usize`, using the current interner.
375
380
pub fn gensym ( string : & str ) -> Self {
376
381
with_interner ( |interner| interner. gensym ( string) )
@@ -387,7 +392,7 @@ impl Symbol {
387
392
pub fn as_str ( self ) -> LocalInternedString {
388
393
with_interner ( |interner| unsafe {
389
394
LocalInternedString {
390
- string : std:: mem:: transmute :: < & str , & str > ( interner. get ( self ) )
395
+ string : std:: mem:: transmute :: < & str , & str > ( interner. symbol_or_gensym_str ( self ) )
391
396
}
392
397
} )
393
398
}
@@ -488,11 +493,11 @@ impl Interner {
488
493
name
489
494
}
490
495
491
- pub fn interned ( & self , symbol : Symbol ) -> Symbol {
496
+ fn interned ( & self , symbol : Symbol ) -> Symbol {
492
497
if ( symbol. 0 . as_usize ( ) ) < self . strings . len ( ) {
493
498
symbol
494
499
} else {
495
- self . interned ( self . gensyms [ ( SymbolIndex :: MAX_AS_U32 - symbol. 0 . as_u32 ( ) ) as usize ] )
500
+ self . gensyms [ ( SymbolIndex :: MAX_AS_U32 - symbol. 0 . as_u32 ( ) ) as usize ]
496
501
}
497
502
}
498
503
@@ -510,10 +515,19 @@ impl Interner {
510
515
symbol. 0 . as_usize ( ) >= self . strings . len ( )
511
516
}
512
517
513
- pub fn get ( & self , symbol : Symbol ) -> & str {
518
+ /// Get the chars of a normal (non-gensym) symbol.
519
+ pub fn symbol_str ( & self , symbol : Symbol ) -> & str {
520
+ self . strings [ symbol. 0 . as_usize ( ) ]
521
+ }
522
+
523
+ /// Get the chars of a normal or gensym symbol.
524
+ fn symbol_or_gensym_str ( & self , symbol : Symbol ) -> & str {
514
525
match self . strings . get ( symbol. 0 . as_usize ( ) ) {
515
526
Some ( string) => string,
516
- None => self . get ( self . gensyms [ ( SymbolIndex :: MAX_AS_U32 - symbol. 0 . as_u32 ( ) ) as usize ] ) ,
527
+ None => {
528
+ let symbol = self . gensyms [ ( SymbolIndex :: MAX_AS_U32 - symbol. 0 . as_u32 ( ) ) as usize ] ;
529
+ self . strings [ symbol. 0 . as_usize ( ) ]
530
+ }
517
531
}
518
532
}
519
533
}
@@ -611,11 +625,17 @@ fn with_interner<T, F: FnOnce(&mut Interner) -> T>(f: F) -> T {
611
625
GLOBALS . with ( |globals| f ( & mut * globals. symbol_interner . lock ( ) ) )
612
626
}
613
627
614
- /// Represents a string stored in the interner. Because the interner outlives any thread
615
- /// which uses this type, we can safely treat `string` which points to interner data,
616
- /// as an immortal string, as long as this type never crosses between threads.
617
- // FIXME: ensure that the interner outlives any thread which uses `LocalInternedString`,
618
- // by creating a new thread right after constructing the interner.
628
+ /// An alternative to `Symbol` and `LocalInternedString`, useful when the chars
629
+ /// within the symbol need to be accessed. It is best used for temporary
630
+ /// values.
631
+ ///
632
+ /// Because the interner outlives any thread which uses this type, we can
633
+ /// safely treat `string` which points to interner data, as an immortal string,
634
+ /// as long as this type never crosses between threads.
635
+ //
636
+ // FIXME: ensure that the interner outlives any thread which uses
637
+ // `LocalInternedString`, by creating a new thread right after constructing the
638
+ // interner.
619
639
#[ derive( Clone , Copy , Hash , PartialOrd , Eq , Ord ) ]
620
640
pub struct LocalInternedString {
621
641
string : & ' static str ,
@@ -708,7 +728,12 @@ impl Encodable for LocalInternedString {
708
728
}
709
729
}
710
730
711
- /// Represents a string stored in the string interner.
731
+ /// A thin wrapper around `Symbol`. It has two main differences to `Symbol`.
732
+ /// - Its implementations of `Hash`, `PartialOrd` and `Ord` work with the
733
+ /// string chars rather than the symbol integer. This is useful when
734
+ /// hash stability is required across compile sessions, or a guaranteed sort
735
+ /// ordering is required.
736
+ /// - It is guaranteed to not be a gensym symbol.
712
737
#[ derive( Clone , Copy , Eq ) ]
713
738
pub struct InternedString {
714
739
symbol : Symbol ,
@@ -717,14 +742,23 @@ pub struct InternedString {
717
742
impl InternedString {
718
743
pub fn with < F : FnOnce ( & str ) -> R , R > ( self , f : F ) -> R {
719
744
let str = with_interner ( |interner| {
720
- interner. get ( self . symbol ) as * const str
745
+ interner. symbol_str ( self . symbol ) as * const str
721
746
} ) ;
722
747
// This is safe because the interner keeps string alive until it is dropped.
723
748
// We can access it because we know the interner is still alive since we use a
724
749
// scoped thread local to access it, and it was alive at the beginning of this scope
725
750
unsafe { f ( & * str) }
726
751
}
727
752
753
+ pub fn with2 < F : FnOnce ( & str , & str ) -> R , R > ( self , other : & InternedString , f : F ) -> R {
754
+ let ( self_str, other_str) = with_interner ( |interner| {
755
+ ( interner. symbol_str ( self . symbol ) as * const str ,
756
+ interner. symbol_str ( other. symbol ) as * const str )
757
+ } ) ;
758
+ // This is safe for the same reason that `with` is safe.
759
+ unsafe { f ( & * self_str, & * other_str) }
760
+ }
761
+
728
762
pub fn as_symbol ( self ) -> Symbol {
729
763
self . symbol
730
764
}
@@ -745,7 +779,7 @@ impl PartialOrd<InternedString> for InternedString {
745
779
if self . symbol == other. symbol {
746
780
return Some ( Ordering :: Equal ) ;
747
781
}
748
- self . with ( |self_str| other . with ( | other_str| self_str. partial_cmp ( other_str) ) )
782
+ self . with2 ( other , |self_str, other_str| self_str. partial_cmp ( other_str) )
749
783
}
750
784
}
751
785
@@ -754,7 +788,7 @@ impl Ord for InternedString {
754
788
if self . symbol == other. symbol {
755
789
return Ordering :: Equal ;
756
790
}
757
- self . with ( |self_str| other . with ( | other_str| self_str. cmp ( & other_str) ) )
791
+ self . with2 ( other , |self_str, other_str| self_str. cmp ( other_str) )
758
792
}
759
793
}
760
794
@@ -794,12 +828,6 @@ impl<'a> PartialEq<InternedString> for &'a String {
794
828
}
795
829
}
796
830
797
- impl std:: convert:: From < InternedString > for String {
798
- fn from ( val : InternedString ) -> String {
799
- val. as_symbol ( ) . to_string ( )
800
- }
801
- }
802
-
803
831
impl fmt:: Debug for InternedString {
804
832
fn fmt ( & self , f : & mut fmt:: Formatter < ' _ > ) -> fmt:: Result {
805
833
self . with ( |str| fmt:: Debug :: fmt ( & str, f) )
0 commit comments