@@ -706,7 +706,9 @@ impl<'a> Tokenizer<'a> {
706
706
// BigQuery uses b or B for byte string literal
707
707
b @ 'B' | b @ 'b' if dialect_of ! ( self is BigQueryDialect | GenericDialect ) => {
708
708
chars. next ( ) ; // consume
709
- match chars. peek ( ) {
709
+ match peeking_skip_whitespace_take_if ( chars, |ch| {
710
+ matches ! ( ch, '\'' ) || matches ! ( ch, '\"' )
711
+ } ) {
710
712
Some ( '\'' ) => {
711
713
if self . dialect . supports_triple_quoted_string ( ) {
712
714
return self
@@ -745,7 +747,9 @@ impl<'a> Tokenizer<'a> {
745
747
// BigQuery uses r or R for raw string literal
746
748
b @ 'R' | b @ 'r' if dialect_of ! ( self is BigQueryDialect | GenericDialect ) => {
747
749
chars. next ( ) ; // consume
748
- match chars. peek ( ) {
750
+ match peeking_skip_whitespace_take_if ( chars, |ch| {
751
+ matches ! ( ch, '\'' ) || matches ! ( ch, '\"' )
752
+ } ) {
749
753
Some ( '\'' ) => self
750
754
. tokenize_single_or_triple_quoted_string :: < fn ( String ) -> Token > (
751
755
chars,
@@ -772,12 +776,19 @@ impl<'a> Tokenizer<'a> {
772
776
// Redshift uses lower case n for national string literal
773
777
n @ 'N' | n @ 'n' => {
774
778
chars. next ( ) ; // consume, to check the next char
775
- match chars. peek ( ) {
779
+ match peeking_skip_whitespace_take_if ( chars, |ch| {
780
+ matches ! ( ch, '\'' ) || matches ! ( ch, '\"' )
781
+ } ) {
776
782
Some ( '\'' ) => {
777
783
// N'...' - a <national character string literal>
778
784
let s = self . tokenize_single_quoted_string ( chars, '\'' , true ) ?;
779
785
Ok ( Some ( Token :: NationalStringLiteral ( s) ) )
780
786
}
787
+ Some ( '\"' ) => {
788
+ // N"..." - a <national character string literal>
789
+ let s = self . tokenize_single_quoted_string ( chars, '\"' , true ) ?;
790
+ Ok ( Some ( Token :: NationalStringLiteral ( s) ) )
791
+ }
781
792
_ => {
782
793
// regular identifier starting with an "N"
783
794
let s = self . tokenize_word ( n, chars) ;
@@ -789,7 +800,7 @@ impl<'a> Tokenizer<'a> {
789
800
x @ 'e' | x @ 'E' => {
790
801
let starting_loc = chars. location ( ) ;
791
802
chars. next ( ) ; // consume, to check the next char
792
- match chars. peek ( ) {
803
+ match peeking_skip_whitespace_take_if ( chars, |ch| matches ! ( ch , '\'' ) ) {
793
804
Some ( '\'' ) => {
794
805
let s =
795
806
self . tokenize_escaped_single_quoted_string ( starting_loc, chars) ?;
@@ -823,12 +834,19 @@ impl<'a> Tokenizer<'a> {
823
834
// string, but PostgreSQL, at least, allows a lowercase 'x' too.
824
835
x @ 'x' | x @ 'X' => {
825
836
chars. next ( ) ; // consume, to check the next char
826
- match chars. peek ( ) {
837
+ match peeking_skip_whitespace_take_if ( chars, |ch| {
838
+ matches ! ( ch, '\'' ) || matches ! ( ch, '\"' )
839
+ } ) {
827
840
Some ( '\'' ) => {
828
841
// X'...' - a <binary string literal>
829
842
let s = self . tokenize_single_quoted_string ( chars, '\'' , true ) ?;
830
843
Ok ( Some ( Token :: HexStringLiteral ( s) ) )
831
844
}
845
+ Some ( '\"' ) => {
846
+ // X"..." - a <binary string literal>
847
+ let s = self . tokenize_single_quoted_string ( chars, '\"' , true ) ?;
848
+ Ok ( Some ( Token :: HexStringLiteral ( s) ) )
849
+ }
832
850
_ => {
833
851
// regular identifier starting with an "X"
834
852
let s = self . tokenize_word ( x, chars) ;
@@ -1674,6 +1692,47 @@ fn peeking_take_while(chars: &mut State, mut predicate: impl FnMut(char) -> bool
1674
1692
s
1675
1693
}
1676
1694
1695
+ /// Peek ahead in a clone of `self.peekable`, skipping whitespace,
1696
+ /// until `predicate` returns `true` or a non-whitespace character is encountered.
1697
+ /// If a character matching the predicate is found:
1698
+ /// - Advance the original iterator by the number of whitespace characters skipped
1699
+ /// - Return the peeked character matching the predicate
1700
+ ///
1701
+ /// If a non-whitespace character not matching the predicate is encountered, or EOF is reached,
1702
+ /// return `self.peek()` without advancing the iterator.
1703
+ ///
1704
+ /// Note: This function may advance the original iterator if a match is found after skipping whitespace.
1705
+ fn peeking_skip_whitespace_take_if (
1706
+ chars : & mut State ,
1707
+ mut predicate : impl FnMut ( char ) -> bool ,
1708
+ ) -> Option < char > {
1709
+ // Check if the next character is a match to avoid unnecessary cloning.
1710
+ if let Some ( & ch) = chars. peek ( ) {
1711
+ if predicate ( ch) {
1712
+ return Some ( ch) ;
1713
+ }
1714
+ }
1715
+
1716
+ let mut chars_clone = chars. peekable . clone ( ) ;
1717
+ let mut next_count = 0 ;
1718
+ loop {
1719
+ match chars_clone. peek ( ) {
1720
+ Some ( & ch) if predicate ( ch) => {
1721
+ // Advance the original iterator
1722
+ for _ in 0 ..next_count {
1723
+ chars. next ( ) ;
1724
+ }
1725
+ return chars. peek ( ) . copied ( ) ;
1726
+ }
1727
+ Some ( ch) if ch. is_whitespace ( ) || matches ! ( ch, ' ' | '\t' | '\n' | '\r' ) => {
1728
+ next_count += 1 ;
1729
+ chars_clone. next ( ) ;
1730
+ }
1731
+ _ => return chars. peek ( ) . copied ( ) ,
1732
+ }
1733
+ }
1734
+ }
1735
+
1677
1736
fn unescape_single_quoted_string ( chars : & mut State < ' _ > ) -> Option < String > {
1678
1737
Unescape :: new ( chars) . unescape ( )
1679
1738
}
0 commit comments