@@ -6,7 +6,7 @@ use std::str::FromStr;
6
6
7
7
use nom:: branch:: alt;
8
8
use nom:: bytes:: streaming:: { is_a, is_not, tag, take_until, take_while1, take_while_m_n} ;
9
- use nom:: character:: streaming:: { char, digit1, one_of, satisfy} ;
9
+ use nom:: character:: streaming:: { alphanumeric1 , char, digit1, one_of, satisfy} ;
10
10
use nom:: combinator:: { consumed, fail, map, not, opt, peek, recognize, success, value} ;
11
11
use nom:: error:: { ErrorKind , ParseError } ;
12
12
use nom:: multi:: { many0_count, many1_count} ;
@@ -17,8 +17,8 @@ use crate::lazy::encoding::TextEncoding;
17
17
use crate :: lazy:: raw_stream_item:: RawStreamItem ;
18
18
use crate :: lazy:: text:: encoded_value:: EncodedTextValue ;
19
19
use crate :: lazy:: text:: matched:: {
20
- MatchedDecimal , MatchedFloat , MatchedHoursAndMinutes , MatchedInt , MatchedString , MatchedSymbol ,
21
- MatchedTimestamp , MatchedTimestampOffset , MatchedValue ,
20
+ MatchedBlob , MatchedDecimal , MatchedFloat , MatchedHoursAndMinutes , MatchedInt , MatchedString ,
21
+ MatchedSymbol , MatchedTimestamp , MatchedTimestampOffset , MatchedValue ,
22
22
} ;
23
23
use crate :: lazy:: text:: parse_result:: { InvalidInputError , IonParseError } ;
24
24
use crate :: lazy:: text:: parse_result:: { IonMatchResult , IonParseResult } ;
@@ -497,6 +497,12 @@ impl<'data> TextBufferView<'data> {
497
497
)
498
498
} ,
499
499
) ,
500
+ map (
501
+ match_and_length ( Self :: match_blob) ,
502
+ |( matched_blob, length) | {
503
+ EncodedTextValue :: new ( MatchedValue :: Blob ( matched_blob) , self . offset ( ) , length)
504
+ } ,
505
+ ) ,
500
506
map (
501
507
match_and_length ( Self :: match_list) ,
502
508
|( matched_list, length) | {
@@ -1337,6 +1343,28 @@ impl<'data> TextBufferView<'data> {
1337
1343
recognize ( pair ( Self :: match_any_digit, Self :: match_any_digit) ) ,
1338
1344
) ( self )
1339
1345
}
1346
+
1347
+ /// Matches a complete blob, including the opening `{{` and closing `}}`.
1348
+ pub fn match_blob ( self ) -> IonParseResult < ' data , MatchedBlob > {
1349
+ delimited (
1350
+ tag ( "{{" ) ,
1351
+ // Only whitespace (not comments) can appear within the blob
1352
+ preceded ( Self :: match_optional_whitespace, Self :: match_base64_content) ,
1353
+ preceded ( Self :: match_optional_whitespace, tag ( "}}" ) ) ,
1354
+ )
1355
+ . map ( |base64_data| {
1356
+ MatchedBlob :: new ( base64_data. offset ( ) - self . offset ( ) , base64_data. len ( ) )
1357
+ } )
1358
+ . parse ( self )
1359
+ }
1360
+
1361
+ /// Matches the base64 content within a blob.
1362
+ fn match_base64_content ( self ) -> IonMatchResult < ' data > {
1363
+ recognize ( terminated (
1364
+ many1_count ( alt ( ( alphanumeric1, is_a ( "+/" ) ) ) ) ,
1365
+ opt ( alt ( ( tag ( "==" ) , tag ( "=" ) ) ) ) ,
1366
+ ) ) ( self )
1367
+ }
1340
1368
}
1341
1369
1342
1370
// === nom trait implementations ===
@@ -2002,4 +2030,50 @@ mod tests {
2002
2030
mismatch_sexp ( input) ;
2003
2031
}
2004
2032
}
2033
+
2034
+ #[ test]
2035
+ fn test_match_blob ( ) {
2036
+ fn match_blob ( input : & str ) {
2037
+ MatchTest :: new ( input) . expect_match ( match_length ( TextBufferView :: match_blob) ) ;
2038
+ }
2039
+ fn mismatch_blob ( input : & str ) {
2040
+ MatchTest :: new ( input) . expect_mismatch ( match_length ( TextBufferView :: match_blob) ) ;
2041
+ }
2042
+ // Base64 encodings of utf-8 strings
2043
+ let good_inputs = & [
2044
+ // hello
2045
+ "{{aGVsbG8=}}" ,
2046
+ "{{ aGVsbG8=}}" ,
2047
+ "{{aGVsbG8= }}" ,
2048
+ "{{\t aGVsbG8=\n \n }}" ,
2049
+ // hello!
2050
+ "{{aGVsbG8h}}" ,
2051
+ "{{ aGVsbG8h}}" ,
2052
+ "{{aGVsbG8h }}" ,
2053
+ "{{ aGVsbG8h }}" ,
2054
+ // razzle dazzle root beer
2055
+ "{{cmF6emxlIGRhenpsZSByb290IGJlZXI=}}" ,
2056
+ "{{\n cmF6emxlIGRhenpsZSByb290IGJlZXI=\r }}" ,
2057
+ ] ;
2058
+ for input in good_inputs {
2059
+ match_blob ( input) ;
2060
+ }
2061
+
2062
+ let bad_inputs = & [
2063
+ // illegal character $
2064
+ "{{$aGVsbG8=}}" ,
2065
+ // comment within braces
2066
+ r#"{{
2067
+ // Here's the data:
2068
+ aGVsbG8=
2069
+ }}"# ,
2070
+ // padding at the beginning
2071
+ "{{=aGVsbG8}}" ,
2072
+ // too much padding
2073
+ "{{aGVsbG8===}}" ,
2074
+ ] ;
2075
+ for input in bad_inputs {
2076
+ mismatch_blob ( input) ;
2077
+ }
2078
+ }
2005
2079
}
0 commit comments