Skip to content

Commit

Permalink
Implement multiline strings and multiline f-strings
Browse files Browse the repository at this point in the history
Fixes #575.
  • Loading branch information
mmarx committed Jan 4, 2025
1 parent 5960045 commit c5bc6c0
Show file tree
Hide file tree
Showing 7 changed files with 146 additions and 26 deletions.
5 changes: 5 additions & 0 deletions nemo-physical/src/datavalues/syntax.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,11 @@ pub mod iri {
pub mod string {
/// Language tag indicator after strings
pub const LANG_TAG: &str = "@";

/// Quote to delimit string literals
pub const QUOTE: &str = r#"""#;
/// Triple quotes to delimit multi-line strings
pub const TRIPLE_QUOTE: &str = r#"""""#;
}

/// RDF datatype indicator
Expand Down
17 changes: 12 additions & 5 deletions nemo/src/parser/ast/expression/basic/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,18 @@ impl<'a> StringLiteral<'a> {

/// Parse the main part of the string.
pub fn parse_string(input: ParserInput<'a>) -> ParserResult<'a, Token<'a>> {
delimited(
Token::quote,
alt((Token::string, Token::empty)),
Token::quote,
)(input)
alt((
delimited(
Token::triple_quote,
alt((Token::multiline_string, Token::empty)),
Token::triple_quote,
),
delimited(
Token::quote,
alt((Token::string, Token::empty)),
Token::quote,
),
))(input)
}

/// Parse the language tag of the string.
Expand Down
38 changes: 28 additions & 10 deletions nemo/src/parser/ast/expression/complex/fstring.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,16 @@ impl<'a> FormatString<'a> {
map(Self::parse_expression, FormatStringElement::Expression),
))(input)
}

/// Parse a multi-line [FormatStringElement] by parsing either a string or an expression element.
fn parse_multiline_element(
input: ParserInput<'a>,
) -> ParserResult<'a, FormatStringElement<'a>> {
alt((
map(Token::multiline_fstring, FormatStringElement::String),
map(Self::parse_expression, FormatStringElement::Expression),
))(input)
}
}

const CONTEXT: ParserContext = ParserContext::FormatString;
Expand Down Expand Up @@ -81,11 +91,18 @@ impl<'a> ProgramAST<'a> for FormatString<'a> {

context(
CONTEXT,
delimited(
Token::fstring_open,
many0(Self::parse_element),
Token::fstring_close,
),
alt((
delimited(
Token::fstring_multiline_open,
many0(Self::parse_multiline_element),
Token::fstring_multiline_close,
),
delimited(
Token::fstring_open,
many0(Self::parse_element),
Token::fstring_close,
),
)),
)(input)
.map(|(rest, elements)| {
let rest_span = rest.span;
Expand Down Expand Up @@ -118,11 +135,12 @@ mod test {
#[test]
fn parse_format_string() {
let test = vec![
("f\"\"", 0),
("f\"string\"", 1),
("f\"{?x + 1}\"", 1),
("f\"result: {?x + 1}\"", 2),
("f\"{?x} + {?y} = {?x + ?y}\"", 5),
(r#"f"""#, 0),
(r#"f"string""#, 1),
(r#"f"""string""""#, 1),
(r#"f"{?x + 1}""#, 1),
(r#"f"result: {?x + 1}""#, 2),
(r#"f"{?x} + {?y} = {?x + ?y}""#, 5),
];

for (input, expected) in test {
Expand Down
97 changes: 89 additions & 8 deletions nemo/src/parser/ast/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,19 @@ use enum_assoc::Assoc;

use nom::{
branch::alt,
bytes::complete::{is_a, is_not, tag},
bytes::complete::{is_a, is_not, tag, take_until},
character::complete::{alpha1, alphanumeric1, digit1, multispace1, space0, space1},
combinator::{map, opt, recognize, verify},
multi::many0,
sequence::pair,
FindSubstring, InputTake,
};
use nom_supreme::error::{BaseErrorKind, Expectation};

use crate::{
parser::{
context::{context, ParserContext},
error::ParserErrorTree,
span::Span,
ParserInput, ParserResult,
},
Expand All @@ -33,10 +36,10 @@ use crate::{
#[derive(Assoc, Debug, Clone, Copy, PartialEq, Eq)]
#[func(pub fn name(&self) -> &'static str)]
pub enum TokenKind {
/// Opening parenthesis for parenthesised arithmitic terms
/// Opening parenthesis for parenthesised arithmetic terms
#[assoc(name = "(")]
OpenParenthesis,
/// Closing parenthesis for parenthesised arithmitic terms
/// Closing parenthesis for parenthesised arithmetic terms
#[assoc(name = ")")]
ClosedParenthesis,
/// Opening delimiter for maps
Expand Down Expand Up @@ -148,18 +151,27 @@ pub enum TokenKind {
#[assoc(name = datavalues::DOT)]
Dot,
/// Quote
#[assoc(name = "\"")]
#[assoc(name = string::QUOTE)]
Quote,
/// Triple Quotes
#[assoc(name = string::TRIPLE_QUOTE)]
TripleQuote,
/// Format string open
#[assoc(name = format_string::OPEN)]
FormatStringOpen,
/// Format string close
#[assoc(name = format_string::CLOSE)]
FormatStringClose,
/// Format string open
/// Format string multi-line open
#[assoc(name = format_string::MULTILINE_OPEN)]
FormatStringMultilineOpen,
/// Format string multi-line close
#[assoc(name = format_string::MULTILINE_CLOSE)]
FormatStringMultilineClose,
/// Format string expression open
#[assoc(name = format_string::EXPRESSION_START)]
FormatStringExpressionStart,
/// Format string close
/// Format string expression close
#[assoc(name = format_string::EXPRESSION_END)]
FormatStringExpressionEnd,
/// Blank node prefix
Expand Down Expand Up @@ -374,22 +386,85 @@ impl<'a> Token<'a> {
})
}

fn parse_character_sequence_until(
input: ParserInput<'a>,
tag: &str,
) -> ParserResult<'a, Token<'a>> {
take_until(tag)(input).map(|(rest, result)| {
(
rest.clone(),
Token {
span: result.span,
kind: TokenKind::String,
},
)
})
}

fn parse_character_sequence_until_one_of(
input: ParserInput<'a>,
tags: &'static [&'static str],
) -> ParserResult<'a, Token<'a>> {
match tags
.iter()
.filter_map(|tag| input.find_substring(tag))
.min()
{
None => Err(nom::Err::Error(ParserErrorTree::Base {
location: input,
kind: BaseErrorKind::Expected(Expectation::Tag(tags[0])),
})),
Some(0) => Err(nom::Err::Error(ParserErrorTree::Base {
location: input,
kind: BaseErrorKind::Kind(nom::error::ErrorKind::Eof),
})),
Some(idx @ 1..) => {
let (rest, result) = input.take_split(idx);
Ok((
rest.clone(),
Token {
span: result.span,
kind: TokenKind::String,
},
))
}
}
}

/// Parse [TokenKind::String].
pub fn string(input: ParserInput<'a>) -> ParserResult<'a, Token<'a>> {
Self::parse_character_sequence(input, "\"")
Self::parse_character_sequence(input, string::QUOTE)
}

/// Parse a multi-line [TokenKind::String].o
pub fn multiline_string(input: ParserInput<'a>) -> ParserResult<'a, Token<'a>> {
Self::parse_character_sequence_until(input, string::TRIPLE_QUOTE)
}

/// Parse [TokenKind::FormatString].
pub fn fstring(input: ParserInput<'a>) -> ParserResult<'a, Token<'a>> {
let excluded = format!(
"\"{}{}",
"{}{}{}",
format_string::CLOSE,
format_string::EXPRESSION_START,
format_string::EXPRESSION_END
);

Self::parse_character_sequence(input, &excluded)
}

/// Parse a multi-line [TokenKind::FormatString].
pub fn multiline_fstring(input: ParserInput<'a>) -> ParserResult<'a, Token<'a>> {
Self::parse_character_sequence_until_one_of(
input,
&[
format_string::MULTILINE_CLOSE,
format_string::EXPRESSION_START,
format_string::EXPRESSION_END,
],
)
}

/// Parse [TokenKind::Digits].
pub fn digits(input: ParserInput<'a>) -> ParserResult<'a, Token<'a>> {
context(ParserContext::token(TokenKind::Digits), digit1)(input).map(
Expand Down Expand Up @@ -633,8 +708,14 @@ impl<'a> Token<'a> {
string_token!(doc_comment, TokenKind::DocComment);
string_token!(toplevel_comment, TokenKind::TopLevelComment);
string_token!(quote, TokenKind::Quote);
string_token!(triple_quote, TokenKind::TripleQuote);
string_token!(fstring_open, TokenKind::FormatStringOpen);
string_token!(fstring_close, TokenKind::FormatStringClose);
string_token!(fstring_multiline_open, TokenKind::FormatStringMultilineOpen);
string_token!(
fstring_multiline_close,
TokenKind::FormatStringMultilineClose
);
string_token!(
fstring_expression_start,
TokenKind::FormatStringExpressionStart
Expand Down
9 changes: 7 additions & 2 deletions nemo/src/syntax.rs
Original file line number Diff line number Diff line change
Expand Up @@ -129,9 +129,14 @@ pub mod expression {
/// Syntax for format strings
pub mod format_string {
/// Opening part of a format string
pub const OPEN: &str = "f\"";
pub const OPEN: &str = r#"f""#;
/// Closing part of a format string
pub const CLOSE: &str = "\"";
pub const CLOSE: &str = r#"""#;

/// Opening part of a multi-line format string
pub const MULTILINE_OPEN: &str = r#"f""""#;
/// Closing part of a multi-line format string
pub const MULTILINE_CLOSE: &str = r#"""""#;

/// Marker of the start of an expression
pub const EXPRESSION_START: &str = "{";
Expand Down
5 changes: 4 additions & 1 deletion resources/testcases/arithmetic/builtins.rls
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,10 @@ result(uriencode, ?R) :- uri_strings(?A, _), ?R = URIENCODE(?A).
result(uridecode, ?R) :- uri_strings(_, ?A), ?R = URIDECODE(?A).

% F-string literal
result(fstring_basic, ?R) :- strings(?A, ?B), ?R = f"{?A} and {?B}".
result(fstring_basic, ?R) :- strings(?A, ?B), ?R = f"{?A} and {?B}".
result(fstring_multiline, ?R) :- strings(?A, ?B), ?M = f"""{?A}
and
{?B}""", ?R = CONCAT(SUBSTR(?M, 1, 5), " ", SUBSTR(?M, 7, 3), " ", SUBSTR(?M, 11)).
result(fstring_arithmetic, ?R) :- strings(?A, ?B), ?R = f"len*10={STRLEN(?A) * 10}".

% Numeric arithmetic
Expand Down
1 change: 1 addition & 0 deletions resources/testcases/arithmetic/builtins/result.csv
Original file line number Diff line number Diff line change
Expand Up @@ -62,4 +62,5 @@ bitor,3
bitxor,0
fstring_basic,"""Hello and World"""
fstring_arithmetic,"""len*10=50"""
fstring_multiline,"""Hello and World"""
uriencode,"""%3Ffoo%5B%5D%3D%22bar%20quuz%22"""

0 comments on commit c5bc6c0

Please sign in to comment.