@@ -6,7 +6,7 @@ use std::str::FromStr;
6
6
7
7
use nom:: branch:: alt;
8
8
use nom:: bytes:: streaming:: { is_a, is_not, tag, take_until, take_while1, take_while_m_n} ;
9
- use nom:: character:: streaming:: { char, digit1, one_of, satisfy} ;
9
+ use nom:: character:: streaming:: { alphanumeric1 , char, digit1, one_of, satisfy} ;
10
10
use nom:: combinator:: { consumed, fail, map, not, opt, peek, recognize, success, value} ;
11
11
use nom:: error:: { ErrorKind , ParseError } ;
12
12
use nom:: multi:: { many0_count, many1_count} ;
@@ -17,8 +17,8 @@ use crate::lazy::encoding::TextEncoding;
17
17
use crate :: lazy:: raw_stream_item:: RawStreamItem ;
18
18
use crate :: lazy:: text:: encoded_value:: EncodedTextValue ;
19
19
use crate :: lazy:: text:: matched:: {
20
- MatchedDecimal , MatchedFloat , MatchedHoursAndMinutes , MatchedInt , MatchedString , MatchedSymbol ,
21
- MatchedTimestamp , MatchedTimestampOffset , MatchedValue ,
20
+ MatchedBlob , MatchedDecimal , MatchedFloat , MatchedHoursAndMinutes , MatchedInt , MatchedString ,
21
+ MatchedSymbol , MatchedTimestamp , MatchedTimestampOffset , MatchedValue ,
22
22
} ;
23
23
use crate :: lazy:: text:: parse_result:: { InvalidInputError , IonParseError } ;
24
24
use crate :: lazy:: text:: parse_result:: { IonMatchResult , IonParseResult } ;
@@ -497,6 +497,12 @@ impl<'data> TextBufferView<'data> {
497
497
)
498
498
} ,
499
499
) ,
500
+ map (
501
+ match_and_length ( Self :: match_blob) ,
502
+ |( matched_blob, length) | {
503
+ EncodedTextValue :: new ( MatchedValue :: Blob ( matched_blob) , self . offset ( ) , length)
504
+ } ,
505
+ ) ,
500
506
map (
501
507
match_and_length ( Self :: match_list) ,
502
508
|( matched_list, length) | {
@@ -1341,6 +1347,36 @@ impl<'data> TextBufferView<'data> {
1341
1347
recognize ( pair ( one_of ( "012345" ) , Self :: match_any_digit) ) ,
1342
1348
) ( self )
1343
1349
}
1350
+
1351
+ /// Matches a complete blob, including the opening `{{` and closing `}}`.
1352
+ pub fn match_blob ( self ) -> IonParseResult < ' data , MatchedBlob > {
1353
+ delimited (
1354
+ tag ( "{{" ) ,
1355
+ // Only whitespace (not comments) can appear within the blob
1356
+ recognize ( Self :: match_base64_content) ,
1357
+ preceded ( Self :: match_optional_whitespace, tag ( "}}" ) ) ,
1358
+ )
1359
+ . map ( |base64_data| {
1360
+ MatchedBlob :: new ( base64_data. offset ( ) - self . offset ( ) , base64_data. len ( ) )
1361
+ } )
1362
+ . parse ( self )
1363
+ }
1364
+
1365
+ /// Matches the base64 content within a blob. Ion allows the base64 content to be broken up with
1366
+ /// whitespace, so the matched input region may need to be stripped of whitespace before
1367
+ /// the data can be decoded.
1368
+ fn match_base64_content ( self ) -> IonMatchResult < ' data > {
1369
+ recognize ( terminated (
1370
+ many0_count ( preceded (
1371
+ Self :: match_optional_whitespace,
1372
+ alt ( ( alphanumeric1, is_a ( "+/" ) ) ) ,
1373
+ ) ) ,
1374
+ opt ( preceded (
1375
+ Self :: match_optional_whitespace,
1376
+ alt ( ( tag ( "==" ) , tag ( "=" ) ) ) ,
1377
+ ) ) ,
1378
+ ) ) ( self )
1379
+ }
1344
1380
}
1345
1381
1346
1382
// === nom trait implementations ===
@@ -2008,4 +2044,59 @@ mod tests {
2008
2044
mismatch_sexp ( input) ;
2009
2045
}
2010
2046
}
2047
+
2048
+ #[ test]
2049
+ fn test_match_blob ( ) {
2050
+ fn match_blob ( input : & str ) {
2051
+ MatchTest :: new ( input) . expect_match ( match_length ( TextBufferView :: match_blob) ) ;
2052
+ }
2053
+ fn mismatch_blob ( input : & str ) {
2054
+ MatchTest :: new ( input) . expect_mismatch ( match_length ( TextBufferView :: match_blob) ) ;
2055
+ }
2056
+ // Base64 encodings of utf-8 strings
2057
+ let good_inputs = & [
2058
+ // <empty blobs>
2059
+ "{{}}" ,
2060
+ "{{ }}" ,
2061
+ "{{\n \t }}" ,
2062
+ // hello
2063
+ "{{aGVsbG8=}}" ,
2064
+ "{{ aGVsbG8=}}" ,
2065
+ "{{aGVsbG8= }}" ,
2066
+ "{{\t aGVsbG8=\n \n }}" ,
2067
+ "{{aG Vs bG 8 =}}" ,
2068
+ r#"{{
2069
+ aG Vs
2070
+ bG 8=
2071
+ }}"# ,
2072
+ // hello!
2073
+ "{{aGVsbG8h}}" ,
2074
+ "{{ aGVsbG8h}}" ,
2075
+ "{{aGVsbG8h }}" ,
2076
+ "{{ aGVsbG8h }}" ,
2077
+ // razzle dazzle root beer
2078
+ "{{cmF6emxlIGRhenpsZSByb290IGJlZXI=}}" ,
2079
+ "{{\n cmF6emxlIGRhenpsZSByb290IGJlZXI=\r }}" ,
2080
+ ] ;
2081
+ for input in good_inputs {
2082
+ match_blob ( input) ;
2083
+ }
2084
+
2085
+ let bad_inputs = & [
2086
+ // illegal character $
2087
+ "{{$aGVsbG8=}}" ,
2088
+ // comment within braces
2089
+ r#"{{
2090
+ // Here's the data:
2091
+ aGVsbG8=
2092
+ }}"# ,
2093
+ // padding at the beginning
2094
+ "{{=aGVsbG8}}" ,
2095
+ // too much padding
2096
+ "{{aGVsbG8===}}" ,
2097
+ ] ;
2098
+ for input in bad_inputs {
2099
+ mismatch_blob ( input) ;
2100
+ }
2101
+ }
2011
2102
}
0 commit comments