From c5af3d2e05141cf3c68ab081008f457b7a4ec892 Mon Sep 17 00:00:00 2001 From: Carl Gay Date: Sat, 3 Aug 2024 14:15:36 -0400 Subject: [PATCH] reader: Fix multi-line string literal escaping bug Fixes: https://github.com/dylan-lang/opendylan/issues/1624 --- sources/dfmc/reader/lexer-transitions.dylan | 2 +- sources/dfmc/reader/lexer.dylan | 294 +++++++-------- .../tests/dfmc-reader-test-suite-app.dylan | 2 +- .../reader/tests/literal-test-suite.dylan | 334 ++++++++---------- 4 files changed, 293 insertions(+), 339 deletions(-) diff --git a/sources/dfmc/reader/lexer-transitions.dylan b/sources/dfmc/reader/lexer-transitions.dylan index 72e743e2bc..b2d4b52a7a 100644 --- a/sources/dfmc/reader/lexer-transitions.dylan +++ b/sources/dfmc/reader/lexer-transitions.dylan @@ -523,7 +523,7 @@ define constant $initial-state :: state(#"3string", #f, // seen """ #('"' . #"close-double-quote"), #('\\' . #"3string-escape"), - #(" !#-[]-~\r\n" . #"3string"), + #(" !#-[]-~\r\n" . #"3string"), // Ranges #-[ and ]-~ exclude backslash pair($ascii-8-bit-extensions, #"3string")), state(#"3string-escape", #f, #("\\'\"abefnrt0" . #"3string"), diff --git a/sources/dfmc/reader/lexer.dylan b/sources/dfmc/reader/lexer.dylan index a91b83f6cf..209452da86 100644 --- a/sources/dfmc/reader/lexer.dylan +++ b/sources/dfmc/reader/lexer.dylan @@ -837,8 +837,9 @@ define method hex-escape-character (source-location :: , start :: ) => (char :: , end-pos :: ) let (code, epos) - = parse-integer(source-location, radix: 16, start: start, - stop-at-non-digit?: #t); + = parse-integer(source-location, + source-location.source-location-record.contents, + radix: 16, start: start, stop-at-non-digit?: #t); if (code > $max-lexer-code) note(, source-location: @@ -853,150 +854,162 @@ define method hex-escape-character end end method hex-escape-character; -// Convert a string literal to its internal representation by processing escape -// codes and line endings. Canonicalize CRLF and CR to a single LF. Works for -// both one-line and multi-line strings because the lexer state transitions -// disallow CR and LF in one-line strings in the first place. If escapes? is -// true, process escape codes. +// Convert a string literal to its internal representation by removing the prefix (if +// any), processing escape codes if escapes? is true, and canonicalizing line endings to +// just \n. Works for both one-line and multi-line strings because the lexer state +// transitions disallow CR and LF in one-line strings in the first place. bpos points to +// just after the start delimiter (" or """) and epos points to the first character of +// the end delimiter. define method decode-string (source-location :: , bpos :: , - epos :: , escapes? :: ) - => (string :: , multi-line? :: ) - let contents = source-location.source-location-record.contents; - let multi-line? = #f; + epos :: , escapes? :: , triple-quoted? :: ) + => (string :: ) local - method skip-hex-escape (pos) - // TODO(cgay): signal better error if '>' not found. - if (contents[pos] == as(, '>')) - pos + 1 + method fail (format-string, #rest format-args) + note(, + source-location: source-location, + token-string: extract-string(source-location), + detail: apply(format-to-string, + concatenate("invalid multi-line string literal: ", + format-string), + format-args)); + end, + method whitespace-code? (c) + c == $space-code | c == $tab-code + end, + method find-line-break (seq, bpos, epos) + if (bpos < epos) + select (seq[bpos]) + $newline-code => + values(bpos, bpos + 1); + $carriage-return-code => + if (bpos + 1 < epos & seq[bpos + 1] == $newline-code) + values(bpos, bpos + 2) + else + values(bpos, bpos + 1) + end; + otherwise => + find-line-break(seq, bpos + 1, epos); + end + end + end, + method remove-prefix (prefix, line) + if (~prefix | empty?(prefix)) + line else - skip-hex-escape(pos + 1) + for (c in line, p in prefix) + if (c ~== p) + fail("each line must begin with the same whitespace prefix, got %=, want %=", + as(, line), as(, prefix)) + end; + end; + copy-sequence(line, start: prefix.size) end - end method, - method loop (pos :: , len :: , prev-was-cr? :: , - string :: false-or()) - => (len :: ) - if (pos >= epos) - len + end, + // Can't use hex-escape-character because we don't know the correct offset from the + // beginning of the literal due to using split/join. + method parse-hex-escape (line, start) => (char, epos) + let (code, epos) + = parse-integer(source-location, line, + radix: 16, start: start, stop-at-non-digit?: #t); + assert(epos <= line.size, "epos out of bounds: %d", epos); + assert(line[epos] == as(, '>'), + "hex escape must end with '>', got %=", line[epos]); + if (code > $max-lexer-code) + note(, + source-location: record-position-as-location + (source-location.source-location-record, + source-location.source-location-source-position), + token-string: extract-string(source-location)); + values(0, epos) // If forced, continue with NUL... else - let code = contents[pos]; - select (code) - as(, '\\') => - if (~escapes?) - string & (string[len] := '\\'); - loop(pos + 1, len + 1, #f, string) - else - let escape-char = as(, contents[pos + 1]); - let new-position - = if (escape-char == '<') - if (string) - let (char, epos) - = hex-escape-character(source-location, pos + 2); - string[len] := char; - epos + 1 - else - skip-hex-escape(pos + 2) - end - else - string & (string[len] := escape-character(escape-char)); - pos + 2 - end; - loop(new-position, len + 1, #f, string); - end if; - as(, '\r') => - multi-line? := #t; - string & (string[len] := '\n'); - loop(pos + 1, len + 1, #t, string); - as(, '\n') => - multi-line? := #t; - let increment = if (prev-was-cr?) - 0 // already stored a LF - else - string & (string[len] := '\n'); - 1 - end; - loop(pos + 1, len + increment, #f, string); - otherwise => - string & (string[len] := as(, code)); - loop(pos + 1, len + 1, #f, string); - end select - end if - end method; - let length = loop(bpos, 0, #f, #f); - let string = make(, size: length); - loop(bpos, 0, #f, string); - values(string, multi-line?) -end method decode-string; - -// https://opendylan.org/proposals/dep-0012-string-literals.html#the-rectangle-rule -// -// When this is called `string` is known to contain at least one literal newline -// character, the EOL sequence has already been canonicalized to just '\n', escape -// sequences have been processed, and the start/end delimiters have been removed. -define function trim-multi-line-prefix - (string :: , source-location) => (maybe-trimmed :: ) - let lines = split(string, '\n'); - let junk = first(lines); - let prefix = last(lines); - if (~empty?(junk) & ~whitespace?(junk)) - note(, - source-location: source-location, - token-string: extract-string(source-location), - detail: - "only whitespace may follow the start delimiter \"\"\" on the same line"); - end; - if (~empty?(prefix) & ~whitespace?(prefix)) - note(, - source-location: source-location, - token-string: extract-string(source-location), - detail: - "only whitespace may precede the end delimiter \"\"\" on the same line"); - end; - local method remove-prefix (line) - if (line = "") - line - elseif (~starts-with?(line, prefix)) - note(, - source-location: source-location, - token-string: extract-string(source-location), - detail: - format-to-string - ("each line must begin with the same whitespace that precedes the end" - " delimiter (got %=, want %=)", - copy-sequence(line, end: prefix.size), prefix)); + values(code, epos + 1) + end + end, + method process-escapes (line) + let len = line.size; + let new = make(); + iterate loop (pos = 0, escaped? = #f) + if (pos >= len) + as(, new) + else + let code = line[pos]; + if (escaped?) + let new-position + = if (code == as(, '<')) + let (code, epos) = parse-hex-escape(line, pos + 1); + add!(new, code); + epos + else + add!(new, as(, escape-character(as(, code)))); + pos + 1 + end; + loop(new-position, #f) + elseif (code == $escape-code) + loop(pos + 1, #t) else - copy-sequence(line, start: prefix.size) + add!(new, code); + loop(pos + 1, #f) end - end method; - select (lines.size) - 1 => error("compiler bug while trimming multi-line string prefix"); - 2 => ""; - otherwise => - let keep = copy-sequence(lines, start: 1, end: lines.size - 1); - let trimmed = map(remove-prefix, keep); - if (every?(empty?, trimmed)) - // If all lines are empty the last line needs to be handled specially because of - // the exceptional case of ``abc\n"""`` (where we don't want the final newline) - // vs ``\n\n"""`` (where we do want the final newline). - join(concatenate(trimmed, #("")), "\n") - else - join(trimmed, "\n") - end - end select -end function; + end + end iterate + end, + method process-line (prefix, line) + if (~empty?(line)) + if (prefix & ~empty?(prefix)) + line := remove-prefix(prefix, line); + end; + if (escapes? & member?($escape-code, line)) + line := process-escapes(line); + end; + end; + line + end; + let contents = source-location.source-location-record.contents; + let parts = split(contents, find-line-break, start: bpos, end: epos); + if (parts.size == 1) + as(, process-line(#f, parts[0])) // e.g., """abc""" + else + let prefix = parts.last; + if (~every?(whitespace-code?, prefix)) + fail("prefix must be all whitespace, got %=", as(, prefix)); + end; + if (~every?(whitespace-code?, parts.first)) + fail("only whitespace may follow the open delimiter \"\"\" on the" + " same line, got %=", parts.first); + end; + let parts = map(curry(process-line, prefix), parts); + // Deal with this oddity in our spec: + // """\n + // abc\n => LF excluded, end is before '\n' + // """ + // """\n + // \n => LF included, end is after '\n' + // """ + as(, + join(copy-sequence(parts, + start: 1, + end: if (empty?(parts[parts.size - 2])) + parts.size + else + parts.size - 1 + end), + make(, size: 1, fill: $newline-code))) + end if +end method decode-string; // Make a when confronted with the #"foo" syntax. // These are referred to as "unique strings" in the DRM Lexical Syntax. // define method %make-quoted-symbol (lexer :: , source-location :: , - start-offset :: , end-offset :: ) + start-offset :: , end-offset :: , multi-line? :: ) => (res :: ) let sym = as(, decode-string(source-location, source-location.start-posn + start-offset, source-location.end-posn - end-offset, - #t)); + #t, multi-line?)); make(, record: source-location.source-location-record, source-position: source-location.source-location-source-position, @@ -1004,10 +1017,10 @@ define method %make-quoted-symbol end method; define constant make-quoted-symbol - = rcurry(%make-quoted-symbol, 2, 1); + = rcurry(%make-quoted-symbol, 2, 1, #f); define constant make-multi-line-quoted-symbol - = rcurry(%make-quoted-symbol, 4, 3); + = rcurry(%make-quoted-symbol, 4, 3, #t); // Make a when confronted with the foo: syntax. // @@ -1035,14 +1048,12 @@ define constant $underscore_code :: = as(, '_'); // Parse and return an integer in the supplied radix. // define method parse-integer - (source-location :: , + (source-location :: , contents :: , #key radix :: = 10, start :: = source-location.start-posn, end: finish :: = source-location.end-posn, stop-at-non-digit? = #f) => (res :: , end-pos :: ) - let contents :: - = source-location.source-location-record.contents; // We do our working in negative integers to avoid representation // overflow until absolutely necessary. local method repeat (posn :: , result :: ) @@ -1126,7 +1137,7 @@ define method parse-integer-literal end if; end if; - let int = parse-integer(source-location, radix: radix, start: posn); + let int = parse-integer(source-location, contents, radix: radix, start: posn); if (~extended & (int < runtime-$minimum-integer @@ -1175,15 +1186,11 @@ end method make-character-literal; define method %make-string-literal (lexer :: , source-location :: , start-offset :: , end-offset :: , - allow-escapes? :: ) + allow-escapes? :: , multi-line? :: ) => (res :: ) let bpos = source-location.start-posn + start-offset; let epos = source-location.end-posn - end-offset; - let (string, multi-line?) - = decode-string(source-location, bpos, epos, allow-escapes?); - if (multi-line?) - string := trim-multi-line-prefix(string, source-location); - end; + let string = decode-string(source-location, bpos, epos, allow-escapes?, multi-line?); make(, record: source-location.source-location-record, source-position: source-location.source-location-source-position, @@ -1192,16 +1199,16 @@ define method %make-string-literal end method; define constant make-string-literal // "..." - = rcurry(%make-string-literal, 1, 1, #t); + = rcurry(%make-string-literal, 1, 1, #t, #f); define constant make-multi-line-string-literal // """...""" - = rcurry(%make-string-literal, 3, 3, #t); + = rcurry(%make-string-literal, 3, 3, #t, #t); define constant make-raw-string-literal // #r"..." - = rcurry(%make-string-literal, 3, 1, #f); + = rcurry(%make-string-literal, 3, 1, #f, #f); define constant make-multi-line-raw-string-literal // #r"""...""" - = rcurry(%make-string-literal, 5, 3, #f); + = rcurry(%make-string-literal, 5, 3, #f, #t); define method parse-ratio-literal (lexer :: , source-location :: ) @@ -1603,6 +1610,7 @@ end method parse-conditional; // TODO: CORRECTNESS: Multiplatform newline sequence handling. define constant $space-code = as(, ' '); +define constant $carriage-return-code = as(, '\r'); define constant $newline-code = as(, '\n'); define constant $tab-code = as(, '\t'); diff --git a/sources/dfmc/reader/tests/dfmc-reader-test-suite-app.dylan b/sources/dfmc/reader/tests/dfmc-reader-test-suite-app.dylan index d5046d2c85..76202a103b 100644 --- a/sources/dfmc/reader/tests/dfmc-reader-test-suite-app.dylan +++ b/sources/dfmc/reader/tests/dfmc-reader-test-suite-app.dylan @@ -2,4 +2,4 @@ Module: dfmc-reader-test-suite-app License: See License.txt in this distribution for details. -run-test-application(dfmc-reader-test-suite); +run-test-application(); diff --git a/sources/dfmc/reader/tests/literal-test-suite.dylan b/sources/dfmc/reader/tests/literal-test-suite.dylan index 82ad55d084..6af6f511e2 100644 --- a/sources/dfmc/reader/tests/literal-test-suite.dylan +++ b/sources/dfmc/reader/tests/literal-test-suite.dylan @@ -2,11 +2,11 @@ Module: dfmc-reader-test-suite License: See License.txt in this distribution for details. define function verify-literal - (fragment, value, required-class) => () + (fragment, want-value, required-class) => () assert-instance?(required-class, fragment, - format-to-string("verify-literal for value %=", value)); + format-to-string("verify-literal for value %=", want-value)); if (instance?(fragment, required-class)) - assert-equal(fragment.fragment-value, value); + assert-equal(want-value, fragment.fragment-value); end; end function; @@ -197,6 +197,7 @@ define test ratio-literal-test () assert-signals(, read-fragment("1/2")); end test ratio-literal-test; +// Define #:string: syntax. define function string-parser (s) s end; define test string-literal-test () @@ -210,6 +211,7 @@ define test string-literal-test () map-as(, char, #('\a', '\b', '\e', '\f', '\n', '\r', '\t', '\0', '\'', '\"', '\\')), ); + // Basic hex escaping. verify-literal(read-fragment(#:string:{"z\<9f>z"}), map-as(, char, #('z', #x9f, 'z')), @@ -224,71 +226,98 @@ define test string-literal-test () assert-signals(, read-fragment(#:string:{"\1"})); end test; -// verify multi-line string -define function verify-mls - (name, source, want) - assert-no-errors(read-fragment(source), "%s - parses without error", name); - let frag = read-fragment(source); - assert-instance?(, frag, "%s - is string fragment", name); - assert-equal(frag.fragment-value, want, "%s - has expected value", name); -end function; - -define test string-literal-one-line-test () - verify-mls("empty string", #:string:{""""""}, ""); - +// Note: one line as in one line of source code not as in having no newline characters. +define test test-triple-quoted-one-line-strings () + let frag0 = read-fragment(#:string:{""""""}); + expect-equal("", frag0.fragment-value); // Make sure the reader didn't stop at the first pair of double quotes... - let empty-string-fragment = read-fragment(#:string:{""""""}); assert-equal(#:string:{""""""}, - source-location-string(fragment-source-location(empty-string-fragment)), + source-location-string(fragment-source-location(frag0)), "entire empty string consumed"); - - verify-mls("simple abc", - #:string:{"""abc"""}, "abc"); - verify-mls("abc with spaces", - #:string:{""" abc """}, " abc "); + let frag1 = read-fragment(#:string:{"""abc"""}); + assert-equal("abc", frag1.fragment-value); + let frag2 = read-fragment(#:string:{""" abc """}); + assert-equal(" abc ", frag2.fragment-value); + let frag3 = read-fragment(#:string:{"""abc\ndef"""}); + assert-equal("abc\ndef", frag3.fragment-value); end test; -define test string-literal-multi-line-test () - verify-mls("multi-line empty string, no prefix", - #:string:{""" -"""}, - ""); - verify-mls("multi-line empty string, with prefix", - #:string:{""" -"""}, - ""); - verify-mls("multi-line one blank line, no prefix", - #:string:{""" - -"""}, - "\n"); - verify-mls("leading whitespace relative to end delim retained", - #:string:{""" +define test test-multi-line-string-basics () + let frag1 = read-fragment(#:string:{ +""" +abc +"""}); + assert-equal("abc", frag1.fragment-value); + + let frag2 = read-fragment(#:string:{ + """ abc + """}); + assert-equal("abc", frag2.fragment-value); + + let frag3 = read-fragment(#:string:{ +""" +abc def -"""}, - " abc\ndef"); - verify-mls("end delim to right of start delim", - #:string:{""" - abc - def - """}, - " abc\ndef"); - verify-mls("whitespace on first line ignored?", // 0x20 = space - #:string:{"""\<20>\<20> +"""}); + assert-equal("abc\ndef", frag3.fragment-value); + + let frag4 = read-fragment(#:string:{ + """ + abc + def + """}); + assert-equal("abc\ndef", frag4.fragment-value); + + let frag5 = read-fragment(#:string:{ +""" + + + +"""}); + assert-equal("\n\n\n", frag5.fragment-value); +end test; + +define test test-multi-line-empty-strings () + let frag1 = read-fragment(#:string:{""" +"""}); + assert-equal("", frag1.fragment-value, "multi-line empty string, no prefix"); + let frag2 = read-fragment(#:string:{""" + """}); + assert-equal("", frag2.fragment-value, "multi-line empty string, with prefix"); +end test; + +define test test-multi-line-string-with-blank-lines () + let frag1 = read-fragment(#:string:{""" + +"""}); + assert-equal("\n", frag1.fragment-value, "blank line, no prefix"); + + // Not using #:string: here to avoid having trailing whitespace, which could be removed + // by editors. + let frag2 = read-fragment("\"\"\"\n def\n \n \"\"\""); + assert-equal("def\n\n", frag2.fragment-value, "blank line with prefix"); +end test; + +define test test-multi-line-string-whitespace-relative-to-delimiters () + let frag1 = read-fragment(#:string:{""" abc def -"""}, - " abc\ndef"); - // The first blank line below is truly empty and the second one has only the prefix - // (written as \<20> to avoid editors removing trailing whitespace). - verify-mls("blank lines retained", - #:string:{""" - - def -\<20>\<20>\<20> - """}, - "\ndef\n"); +"""}); + assert-equal(" abc\ndef", frag1.fragment-value); + let frag2 = read-fragment(#:string:{""" + xxx + yyy + """}); + assert-equal(" xxx\nyyy", frag2.fragment-value); +end test; + +define test test-multi-line-string-delimiter-rules () + // Whitespace following the opening """ and preceding the first \n should be ignored. + // Since editors sometimes remove trailing whitespace we write this using escape + // sequences instead of with #:string:. + let frag1 = read-fragment("\"\"\" \n abc\n \"\"\""); + assert-equal("abc", frag1.fragment-value); assert-signals(, read-fragment(#:string:{"""a (only whitespace allowed after start delim) abc @@ -305,143 +334,56 @@ xxx"""}), xxx (this line not indented enough) """}), "prefix mismatch non-white"); - // Prefix should be " " but one line has a literal tab in prefix. - /* TODO: the literal tab causes a failure due (I presume) to - https://github.com/dylan-lang/opendylan/issues/425 - check-condition("prefix mismatch whitespace", - , - read-fragment("\"\"\"\n aaa\n \t bbb\n \"\"\"")); - */ +end test; +define test test-multi-line-string-eol-handling () // Check that CRLF and CR are converted to LF. - verify-mls("eol canonicalized 1", - "\"\"\"\na\r\nc\n\"\"\"", - "a\nc"); - verify-mls("eol canonicalized 2", - "\"\"\"\na\rc\n\"\"\"", - "a\nc"); - verify-mls("eol canonicalized 3", - "\"\"\"\r\na\n\rc\r\n\"\"\"", - "a\n\nc"); + let frag1 = read-fragment("\"\"\"\na\r\nc\n\"\"\""); + assert-equal("a\nc", frag1.fragment-value); + let frag2 = read-fragment("\"\"\"\na\rc\n\"\"\""); + assert-equal("a\nc", frag2.fragment-value); + let frag3 = read-fragment("\"\"\"\r\na\n\rc\r\n\"\"\""); + assert-equal("a\n\nc", frag3.fragment-value); +end test; +define test test-multi-line-string-escaping () + let frag1 = read-fragment(#:string:{"""\a\b\e\f\n\r\t\0\'\"\\"""}); + // https://github.com/dylan-lang/opendylan/issues/425 + assert-equal(map-as(, identity, + #('\a', '\b', '\e', '\f', '\n', '\r', '\t', '\0', '\'', '\"', '\\')), + frag1.fragment-value, + "one of each standard escape sequence"); let char = curry(as, ); - verify-mls("all escape sequences", - #:string:{"""\a\b\e\f\n\r\t\0\'\"\\"""}, - map-as(, char, - #('\a', '\b', '\e', '\f', '\n', '\r', '\t', '\0', '\'', '\"', '\\'))); - verify-mls("basic hex escaping", - #:string:{"""z\<9f>z"""}, - map-as(, char, #('z', #x9f, 'z'))); + let frag2 = read-fragment(#:string:{"""z\<9f>z"""}); + assert-equal(map-as(, char, #('z', #x9f, 'z')), + frag2.fragment-value, + "basic hex escaping"); + // We can't handle character codes > 255 yet, but the leading zeros shouldn't // confuse the reader. - verify-mls("hex escape with leading zeros", - #:string:{"""z\<009f>z"""}, - map-as(, char, #('z', #x9f, 'z'))); + let frag3 = read-fragment(#:string:{"""z\<009f>z"""}); + assert-equal(map-as(, char, #('z', #x9f, 'z')), + frag3.fragment-value, + "hex escape with leading zeros"); assert-signals(, read-fragment(#:string:{"""\1"""}), "invalid escape sequence"); - - verify-mls("one line", - #:string:{ -""" -abc -"""}, - "abc"); - verify-mls("one line with prefix", - #:string:{ - """ - abc - """}, - "abc"); - verify-mls("two lines", - #:string:{ -""" -abc -def -"""}, - "abc\ndef"); - verify-mls("two lines with prefix", - #:string:{ - """ - abc - def - """}, - "abc\ndef"); - verify-mls("empty line at start", - #:string:{ -""" - -abc -"""}, - "\nabc"); - verify-mls("two empty lines at start", - #:string:{ -""" - - -abc -"""}, - "\n\nabc"); - verify-mls("one empty line", - #:string:{ -""" - -"""}, - "\n"); - verify-mls("one empty line with prefix", - #:string:{ - """ -\<20>\<20> - """}, - "\n"); - verify-mls("empty line at end", - #:string:{ -""" -abc - -"""}, - "abc\n"); - verify-mls("two empty lines at end", - #:string:{ -""" -abc - - -"""}, - "abc\n\n"); - verify-mls("empty lines at start and end", - #:string:{ -""" - -abc - -"""}, - "\nabc\n"); - verify-mls("two empty lines", - #:string:{ -""" - - -"""}, - "\n\n"); - verify-mls("three empty lines", - #:string:{ -""" - - - -"""}, - "\n\n\n"); - verify-mls("three empty lines at end", - #:string:{ -""" -abc - - - -"""}, - "abc\n\n\n"); + let frag4 = read-fragment(#:string:{""" + \nxxx + """}); + assert-equal("\nxxx", frag4.fragment-value, + "newline escape sequence, xxx longer than prefix"); + let frag5 = read-fragment(#:string:{""" + \nxx + """}); + assert-equal("\nxx", frag5.fragment-value, + "newline escape sequence, xx same length as prefix"); + let frag6 = read-fragment(#:string:{""" + \nx + """}); + assert-equal("\nx", frag6.fragment-value, + "Newline escape sequences, x shorter than prefix"); end test; define test string-literal-raw-one-line-test () @@ -474,10 +416,9 @@ define test string-literal-raw-multi-line-test () verify-literal(read-fragment(#:string:{#r"""a""c"""}), "a\"\"c", ); // All escape codes ignored? \ precedes the terminating double quotes to - // ensure that it is ignored. We replace the X after the fact, to avoid - // confusing Emacs. + // ensure that it is ignored. let s = #:string:{#r"""\a\b\e\f\n\r\t\0\'\\\); @@ -498,17 +439,17 @@ define test symbol-literal-test () verify-presentation(sym, #:string:{#"a"}); // Literal Newline accepted and preserved? - let sym = read-fragment("#\"\"\"a\nb\"\"\""); + let sym = read-fragment("#\"\"\"\na\nb\n\"\"\""); verify-literal(sym, #"a\nb", ); verify-presentation(sym, "#\"a\nb\""); // CRLF -> LF? - let sym = read-fragment("#\"\"\"c\r\nd\"\"\""); + let sym = read-fragment("#\"\"\"\nc\r\nd\n\"\"\""); verify-literal(sym, #"c\nd", ); verify-presentation(sym, "#\"c\nd\""); // CR -> LF? - let sym = read-fragment("#\"\"\"e\rf\"\"\""); + let sym = read-fragment("#\"\"\"\ne\rf\n\"\"\""); verify-literal(sym, #"e\nf", ); verify-presentation(sym, "#\"e\nf\""); end test symbol-literal-test; @@ -546,12 +487,17 @@ define suite literal-test-suite () test octal-integer-literal-test; test pair-literal-test; test ratio-literal-test; - test string-literal-multi-line-test; - test string-literal-one-line-test; test string-literal-raw-multi-line-test; test string-literal-raw-one-line-test; test string-literal-test; test symbol-literal-test; test vector-literal-test; test hash-literal-test; + test test-multi-line-string-eol-handling; + test test-multi-line-string-basics; + test test-multi-line-string-escaping; + test test-multi-line-string-delimiter-rules; + test test-multi-line-string-whitespace-relative-to-delimiters; + test test-multi-line-string-with-blank-lines; + test test-multi-line-empty-strings; end suite literal-test-suite;