From c5af3d2e05141cf3c68ab081008f457b7a4ec892 Mon Sep 17 00:00:00 2001
From: Carl Gay <carlgay@gmail.com>
Date: Sat, 3 Aug 2024 14:15:36 -0400
Subject: [PATCH] reader: Fix multi-line string literal escaping bug

Fixes: https://github.com/dylan-lang/opendylan/issues/1624
---
 sources/dfmc/reader/lexer-transitions.dylan   |   2 +-
 sources/dfmc/reader/lexer.dylan               | 294 +++++++--------
 .../tests/dfmc-reader-test-suite-app.dylan    |   2 +-
 .../reader/tests/literal-test-suite.dylan     | 334 ++++++++----------
 4 files changed, 293 insertions(+), 339 deletions(-)
diff --git a/sources/dfmc/reader/lexer-transitions.dylan b/sources/dfmc/reader/lexer-transitions.dylan
index 72e743e2bc..b2d4b52a7a 100644
--- a/sources/dfmc/reader/lexer-transitions.dylan
+++ b/sources/dfmc/reader/lexer-transitions.dylan
@@ -523,7 +523,7 @@ define constant $initial-state :: <state>
        state(#"3string", #f, // seen """
              #('"' . #"close-double-quote"),
              #('\\' . #"3string-escape"),
-             #(" !#-[]-~\r\n" . #"3string"),
+             #(" !#-[]-~\r\n" . #"3string"),  // Ranges #-[ and ]-~ exclude backslash
              pair($ascii-8-bit-extensions, #"3string")),
        state(#"3string-escape", #f,
              #("\\'\"abefnrt0" . #"3string"),
diff --git a/sources/dfmc/reader/lexer.dylan b/sources/dfmc/reader/lexer.dylan
index a91b83f6cf..209452da86 100644
--- a/sources/dfmc/reader/lexer.dylan
+++ b/sources/dfmc/reader/lexer.dylan
@@ -837,8 +837,9 @@ define method hex-escape-character
     (source-location :: <lexer-source-location>, start :: <integer>)
  => (char :: <character>, end-pos :: <integer>)
   let (code, epos)
-    = parse-integer(source-location, radix: 16, start: start,
-                    stop-at-non-digit?: #t);
+    = parse-integer(source-location,
+                    source-location.source-location-record.contents,
+                    radix: 16, start: start, stop-at-non-digit?: #t);
   if (code > $max-lexer-code)
     note(<character-code-too-large>,
          source-location:
@@ -853,150 +854,162 @@ define method hex-escape-character
   end
 end method hex-escape-character;
 
-// Convert a string literal to its internal representation by processing escape
-// codes and line endings. Canonicalize CRLF and CR to a single LF.  Works for
-// both one-line and multi-line strings because the lexer state transitions
-// disallow CR and LF in one-line strings in the first place. If escapes? is
-// true, process escape codes.
+// Convert a string literal to its internal representation by removing the prefix (if
+// any), processing escape codes if escapes? is true, and canonicalizing line endings to
+// just \n.  Works for both one-line and multi-line strings because the lexer state
+// transitions disallow CR and LF in one-line strings in the first place. bpos points to
+// just after the start delimiter (" or """) and epos points to the first character of
+// the end delimiter.
 define method decode-string
     (source-location :: <lexer-source-location>, bpos :: <integer>,
-     epos :: <integer>, escapes? :: <boolean>)
- => (string :: <byte-string>, multi-line? :: <boolean>)
-  let contents = source-location.source-location-record.contents;
-  let multi-line? = #f;
+     epos :: <integer>, escapes? :: <boolean>, triple-quoted? :: <boolean>)
+ => (string :: <byte-string>)
   local
-    method skip-hex-escape (pos)
-      // TODO(cgay): signal better error if '>' not found.
-      if (contents[pos] == as(<integer>, '>'))
-        pos + 1
+    method fail (format-string, #rest format-args)
+      note(<invalid-multi-line-string-literal>,
+           source-location: source-location,
+           token-string: extract-string(source-location),
+           detail: apply(format-to-string,
+                         concatenate("invalid multi-line string literal: ",
+                                     format-string),
+                         format-args));
+    end,
+    method whitespace-code? (c)
+      c == $space-code | c == $tab-code
+    end,
+    method find-line-break (seq, bpos, epos)
+      if (bpos < epos)
+        select (seq[bpos])
+          $newline-code =>
+            values(bpos, bpos + 1);
+          $carriage-return-code =>
+            if (bpos + 1 < epos & seq[bpos + 1] == $newline-code)
+              values(bpos, bpos + 2)
+            else
+              values(bpos, bpos + 1)
+            end;
+          otherwise =>
+            find-line-break(seq, bpos + 1, epos);
+        end
+      end
+    end,
+    method remove-prefix (prefix, line)
+      if (~prefix | empty?(prefix))
+        line
       else
-        skip-hex-escape(pos + 1)
+        for (c in line, p in prefix)
+          if (c ~== p)
+            fail("each line must begin with the same whitespace prefix, got %=, want %=",
+                 as(<string>, line), as(<string>, prefix))
+          end;
+        end;
+        copy-sequence(line, start: prefix.size)
       end
-    end method,
-    method loop (pos :: <integer>, len :: <integer>, prev-was-cr? :: <boolean>,
-                 string :: false-or(<string>))
-             => (len :: <integer>)
-      if (pos >= epos)
-        len
+    end,
+    // Can't use hex-escape-character because we don't know the correct offset from the
+    // beginning of the literal due to using split/join.
+    method parse-hex-escape (line, start) => (char, epos)
+      let (code, epos)
+        = parse-integer(source-location, line,
+                        radix: 16, start: start, stop-at-non-digit?: #t);
+      assert(epos <= line.size, "epos out of bounds: %d", epos);
+      assert(line[epos] == as(<integer>, '>'),
+             "hex escape must end with '>', got %=", line[epos]);
+      if (code > $max-lexer-code)
+        note(<character-code-too-large>,
+             source-location: record-position-as-location
+               (source-location.source-location-record,
+                source-location.source-location-source-position),
+             token-string: extract-string(source-location));
+        values(0, epos)        // If forced, continue with NUL...
       else
-        let code = contents[pos];
-        select (code)
-          as(<integer>, '\\') =>
-            if (~escapes?)
-              string & (string[len] := '\\');
-              loop(pos + 1, len + 1, #f, string)
-            else
-              let escape-char = as(<character>, contents[pos + 1]);
-              let new-position
-                = if (escape-char == '<')
-                    if (string)
-                      let (char, epos)
-                        = hex-escape-character(source-location, pos + 2);
-                      string[len] := char;
-                      epos + 1
-                    else
-                      skip-hex-escape(pos + 2)
-                    end
-                  else
-                    string & (string[len] := escape-character(escape-char));
-                    pos + 2
-                  end;
-              loop(new-position, len + 1, #f, string);
-            end if;
-          as(<integer>, '\r') =>
-            multi-line? := #t;
-            string & (string[len] := '\n');
-            loop(pos + 1, len + 1, #t, string);
-          as(<integer>, '\n') =>
-            multi-line? := #t;
-            let increment = if (prev-was-cr?)
-                              0 // already stored a LF
-                            else
-                              string & (string[len] := '\n');
-                              1
-                            end;
-            loop(pos + 1, len + increment, #f, string);
-          otherwise =>
-            string & (string[len] := as(<character>, code));
-            loop(pos + 1, len + 1, #f, string);
-        end select
-      end if
-    end method;
-  let length = loop(bpos, 0, #f, #f);
-  let string = make(<string>, size: length);
-  loop(bpos, 0, #f, string);
-  values(string, multi-line?)
-end method decode-string;
-
-// https://opendylan.org/proposals/dep-0012-string-literals.html#the-rectangle-rule
-//
-// When this is called `string` is known to contain at least one literal newline
-// character, the EOL sequence has already been canonicalized to just '\n', escape
-// sequences have been processed, and the start/end delimiters have been removed.
-define function trim-multi-line-prefix
-    (string :: <string>, source-location) => (maybe-trimmed :: <string>)
-  let lines = split(string, '\n');
-  let junk = first(lines);
-  let prefix = last(lines);
-  if (~empty?(junk) & ~whitespace?(junk))
-    note(<invalid-multi-line-string-literal>,
-         source-location: source-location,
-         token-string: extract-string(source-location),
-         detail:
-           "only whitespace may follow the start delimiter \"\"\" on the same line");
-  end;
-  if (~empty?(prefix) & ~whitespace?(prefix))
-    note(<invalid-multi-line-string-literal>,
-         source-location: source-location,
-         token-string: extract-string(source-location),
-         detail:
-           "only whitespace may precede the end delimiter \"\"\" on the same line");
-  end;
-  local method remove-prefix (line)
-          if (line = "")
-            line
-          elseif (~starts-with?(line, prefix))
-            note(<invalid-multi-line-string-literal>,
-                 source-location: source-location,
-                 token-string: extract-string(source-location),
-                 detail:
-                   format-to-string
-                   ("each line must begin with the same whitespace that precedes the end"
-                      " delimiter (got %=, want %=)",
-                    copy-sequence(line, end: prefix.size), prefix));
+        values(code, epos + 1)
+      end
+    end,
+    method process-escapes (line)
+      let len = line.size;
+      let new = make(<stretchy-vector>);
+      iterate loop (pos = 0, escaped? = #f)
+        if (pos >= len)
+          as(<byte-vector>, new)
+        else
+          let code = line[pos];
+          if (escaped?)
+            let new-position
+              = if (code == as(<integer>, '<'))
+                  let (code, epos) = parse-hex-escape(line, pos + 1);
+                  add!(new, code);
+                  epos
+                else
+                  add!(new, as(<integer>, escape-character(as(<character>, code))));
+                  pos + 1
+                end;
+            loop(new-position, #f)
+          elseif (code == $escape-code)
+            loop(pos + 1, #t)
           else
-            copy-sequence(line, start: prefix.size)
+            add!(new, code);
+            loop(pos + 1, #f)
           end
-        end method;
-  select (lines.size)
-    1 => error("compiler bug while trimming multi-line string prefix");
-    2 => "";
-    otherwise =>
-      let keep = copy-sequence(lines, start: 1, end: lines.size - 1);
-      let trimmed = map(remove-prefix, keep);
-      if (every?(empty?, trimmed))
-        // If all lines are empty the last line needs to be handled specially because of
-        // the exceptional case of ``abc\n"""`` (where we don't want the final newline)
-        // vs ``\n\n"""`` (where we do want the final newline).
-        join(concatenate(trimmed, #("")), "\n")
-      else
-        join(trimmed, "\n")
-      end
-  end select
-end function;
+        end
+      end iterate
+    end,
+    method process-line (prefix, line)
+      if (~empty?(line))
+        if (prefix & ~empty?(prefix))
+          line := remove-prefix(prefix, line);
+        end;
+        if (escapes? & member?($escape-code, line))
+          line := process-escapes(line);
+        end;
+      end;
+      line
+    end;
+  let contents = source-location.source-location-record.contents;
+  let parts = split(contents, find-line-break, start: bpos, end: epos);
+  if (parts.size == 1)
+    as(<string>, process-line(#f, parts[0]))      // e.g., """abc"""
+  else
+    let prefix = parts.last;
+    if (~every?(whitespace-code?, prefix))
+      fail("prefix must be all whitespace, got %=", as(<string>, prefix));
+    end;
+    if (~every?(whitespace-code?, parts.first))
+      fail("only whitespace may follow the open delimiter \"\"\" on the"
+             " same line, got %=", parts.first);
+    end;
+    let parts = map(curry(process-line, prefix), parts);
+    // Deal with this oddity in our spec:
+    // """\n
+    // abc\n            => LF excluded, end is before '\n'
+    // """
+    // """\n
+    // \n               => LF included, end is after '\n'
+    // """
+    as(<string>,
+       join(copy-sequence(parts,
+                          start: 1,
+                          end: if (empty?(parts[parts.size - 2]))
+                                 parts.size
+                               else
+                                 parts.size - 1
+                               end),
+            make(<byte-vector>, size: 1, fill: $newline-code)))
+  end if
+end method decode-string;
 
 // Make a <literal-token> when confronted with the #"foo" syntax.
 // These are referred to as "unique strings" in the DRM Lexical Syntax.
 //
 define method %make-quoted-symbol
     (lexer :: <lexer>, source-location :: <lexer-source-location>,
-     start-offset :: <integer>, end-offset :: <integer>)
+     start-offset :: <integer>, end-offset :: <integer>, multi-line? :: <boolean>)
  => (res :: <symbol-syntax-symbol-fragment>)
   let sym = as(<symbol>,
                decode-string(source-location,
                              source-location.start-posn + start-offset,
                              source-location.end-posn - end-offset,
-                             #t));
+                             #t, multi-line?));
   make(<symbol-syntax-symbol-fragment>,
        record: source-location.source-location-record,
        source-position: source-location.source-location-source-position,
@@ -1004,10 +1017,10 @@ define method %make-quoted-symbol
 end method;
 
 define constant make-quoted-symbol
-  = rcurry(%make-quoted-symbol, 2, 1);
+  = rcurry(%make-quoted-symbol, 2, 1, #f);
 
 define constant make-multi-line-quoted-symbol
-  = rcurry(%make-quoted-symbol, 4, 3);
+  = rcurry(%make-quoted-symbol, 4, 3, #t);
 
 // Make a <literal-token> when confronted with the foo: syntax.
 //
@@ -1035,14 +1048,12 @@ define constant $underscore_code :: <integer> = as(<integer>, '_');
 // Parse and return an integer in the supplied radix.
 //
 define method parse-integer
-    (source-location :: <lexer-source-location>,
+    (source-location :: <lexer-source-location>, contents :: <byte-vector>,
      #key radix :: <integer> = 10,
           start :: <integer> = source-location.start-posn,
           end: finish :: <integer> = source-location.end-posn,
           stop-at-non-digit? = #f)
  => (res :: <abstract-integer>, end-pos :: <integer>)
-  let contents :: <byte-vector>
-    = source-location.source-location-record.contents;
   // We do our working in negative integers to avoid representation
   // overflow until absolutely necessary.
   local method repeat (posn :: <integer>, result :: <abstract-integer>)
@@ -1126,7 +1137,7 @@ define method parse-integer-literal
     end if;
   end if;
 
-  let int = parse-integer(source-location, radix: radix, start: posn);
+  let int = parse-integer(source-location, contents, radix: radix, start: posn);
 
   if (~extended &
         (int < runtime-$minimum-integer
@@ -1175,15 +1186,11 @@ end method make-character-literal;
 define method %make-string-literal
     (lexer :: <lexer>, source-location :: <lexer-source-location>,
      start-offset :: <integer>, end-offset :: <integer>,
-     allow-escapes? :: <boolean>)
+     allow-escapes? :: <boolean>, multi-line? :: <boolean>)
  => (res :: <string-fragment>)
   let bpos = source-location.start-posn + start-offset;
   let epos = source-location.end-posn - end-offset;
-  let (string, multi-line?)
-    = decode-string(source-location, bpos, epos, allow-escapes?);
-  if (multi-line?)
-    string := trim-multi-line-prefix(string, source-location);
-  end;
+  let string = decode-string(source-location, bpos, epos, allow-escapes?, multi-line?);
   make(<string-fragment>,
        record: source-location.source-location-record,
        source-position: source-location.source-location-source-position,
@@ -1192,16 +1199,16 @@ define method %make-string-literal
 end method;
 
 define constant make-string-literal                // "..."
-  = rcurry(%make-string-literal, 1, 1, #t);
+  = rcurry(%make-string-literal, 1, 1, #t, #f);
 
 define constant make-multi-line-string-literal     // """..."""
-  = rcurry(%make-string-literal, 3, 3, #t);
+  = rcurry(%make-string-literal, 3, 3, #t, #t);
 
 define constant make-raw-string-literal            // #r"..."
-  = rcurry(%make-string-literal, 3, 1, #f);
+  = rcurry(%make-string-literal, 3, 1, #f, #f);
 
 define constant make-multi-line-raw-string-literal // #r"""..."""
-  = rcurry(%make-string-literal, 5, 3, #f);
+  = rcurry(%make-string-literal, 5, 3, #f, #t);
 
 define method parse-ratio-literal
     (lexer :: <lexer>, source-location :: <lexer-source-location>)
@@ -1603,6 +1610,7 @@ end method parse-conditional;
 // TODO: CORRECTNESS: Multiplatform newline sequence handling.
 
 define constant $space-code = as(<integer>, ' ');
+define constant $carriage-return-code = as(<integer>, '\r');
 define constant $newline-code = as(<integer>, '\n');
 define constant $tab-code = as(<integer>, '\t');
 
diff --git a/sources/dfmc/reader/tests/dfmc-reader-test-suite-app.dylan b/sources/dfmc/reader/tests/dfmc-reader-test-suite-app.dylan
index d5046d2c85..76202a103b 100644
--- a/sources/dfmc/reader/tests/dfmc-reader-test-suite-app.dylan
+++ b/sources/dfmc/reader/tests/dfmc-reader-test-suite-app.dylan
@@ -2,4 +2,4 @@ Module: dfmc-reader-test-suite-app
 License: See License.txt in this distribution for details.
 
 
-run-test-application(dfmc-reader-test-suite);
+run-test-application();
diff --git a/sources/dfmc/reader/tests/literal-test-suite.dylan b/sources/dfmc/reader/tests/literal-test-suite.dylan
index 82ad55d084..6af6f511e2 100644
--- a/sources/dfmc/reader/tests/literal-test-suite.dylan
+++ b/sources/dfmc/reader/tests/literal-test-suite.dylan
@@ -2,11 +2,11 @@ Module: dfmc-reader-test-suite
 License: See License.txt in this distribution for details.
 
 define function verify-literal
-    (fragment, value, required-class) => ()
+    (fragment, want-value, required-class) => ()
   assert-instance?(required-class, fragment,
-                   format-to-string("verify-literal for value %=", value));
+                   format-to-string("verify-literal for value %=", want-value));
   if (instance?(fragment, required-class))
-    assert-equal(fragment.fragment-value, value);
+    assert-equal(want-value, fragment.fragment-value);
   end;
 end function;
 
@@ -197,6 +197,7 @@ define test ratio-literal-test ()
   assert-signals(<ratios-not-supported>, read-fragment("1/2"));
 end test ratio-literal-test;
 
+// Define #:string: syntax.
 define function string-parser (s) s end;
 
 define test string-literal-test ()
@@ -210,6 +211,7 @@ define test string-literal-test ()
                  map-as(<string>, char, #('\a', '\b', '\e', '\f', '\n', '\r',
                                           '\t', '\0', '\'', '\"', '\\')),
                  <string-fragment>);
+
   // Basic hex escaping.
   verify-literal(read-fragment(#:string:{"z\<9f>z"}),
                  map-as(<string>, char, #('z', #x9f, 'z')),
@@ -224,71 +226,98 @@ define test string-literal-test ()
   assert-signals(<invalid-token>, read-fragment(#:string:{"\1<b>"}));
 end test;
 
-// verify multi-line string
-define function verify-mls
-    (name, source, want)
-  assert-no-errors(read-fragment(source), "%s - parses without error", name);
-  let frag = read-fragment(source);
-  assert-instance?(<string-fragment>, frag, "%s - is string fragment", name);
-  assert-equal(frag.fragment-value, want, "%s - has expected value", name);
-end function;
-
-define test string-literal-one-line-test ()
-  verify-mls("empty string", #:string:{""""""}, "");
-
+// Note: one line as in one line of source code not as in having no newline characters.
+define test test-triple-quoted-one-line-strings ()
+  let frag0 = read-fragment(#:string:{""""""});
+  expect-equal("", frag0.fragment-value);
   // Make sure the reader didn't stop at the first pair of double quotes...
-  let empty-string-fragment = read-fragment(#:string:{""""""});
   assert-equal(#:string:{""""""},
-               source-location-string(fragment-source-location(empty-string-fragment)),
+               source-location-string(fragment-source-location(frag0)),
                "entire empty string consumed");
-
-  verify-mls("simple abc",
-             #:string:{"""abc"""},  "abc");
-  verify-mls("abc with spaces",
-             #:string:{""" abc """},  " abc ");
+  let frag1 = read-fragment(#:string:{"""abc"""});
+  assert-equal("abc", frag1.fragment-value);
+  let frag2 = read-fragment(#:string:{""" abc """});
+  assert-equal(" abc ", frag2.fragment-value);
+  let frag3 = read-fragment(#:string:{"""abc\ndef"""});
+  assert-equal("abc\ndef", frag3.fragment-value);
 end test;
 
-define test string-literal-multi-line-test ()
-  verify-mls("multi-line empty string, no prefix",
-             #:string:{"""
-"""},
-             "");
-  verify-mls("multi-line empty string, with prefix",
-             #:string:{"""
-"""},
-             "");
-  verify-mls("multi-line one blank line, no prefix",
-             #:string:{"""
-
-"""},
-             "\n");
-  verify-mls("leading whitespace relative to end delim retained",
-             #:string:{"""
+define test test-multi-line-string-basics ()
+  let frag1 = read-fragment(#:string:{
+"""
+abc
+"""});
+  assert-equal("abc", frag1.fragment-value);
+
+  let frag2 = read-fragment(#:string:{
+  """
   abc
+  """});
+  assert-equal("abc", frag2.fragment-value);
+
+  let frag3 = read-fragment(#:string:{
+"""
+abc
 def
-"""},
-             "  abc\ndef");
-  verify-mls("end delim to right of start delim",
-             #:string:{"""
-                           abc
-                         def
-                         """},
-             "  abc\ndef");
-  verify-mls("whitespace on first line ignored?", // 0x20 = space
-             #:string:{"""\<20>\<20>
+"""});
+  assert-equal("abc\ndef", frag3.fragment-value);
+
+  let frag4 = read-fragment(#:string:{
+  """
+  abc
+  def
+  """});
+  assert-equal("abc\ndef", frag4.fragment-value);
+
+  let frag5 = read-fragment(#:string:{
+"""
+
+
+
+"""});
+  assert-equal("\n\n\n", frag5.fragment-value);
+end test;
+
+define test test-multi-line-empty-strings ()
+  let frag1 = read-fragment(#:string:{"""
+"""});
+  assert-equal("", frag1.fragment-value, "multi-line empty string, no prefix");
+  let frag2 = read-fragment(#:string:{"""
+                                      """});
+  assert-equal("", frag2.fragment-value, "multi-line empty string, with prefix");
+end test;
+
+define test test-multi-line-string-with-blank-lines ()
+  let frag1 = read-fragment(#:string:{"""
+
+"""});
+  assert-equal("\n", frag1.fragment-value, "blank line, no prefix");
+
+  // Not using #:string: here to avoid having trailing whitespace, which could be removed
+  // by editors.
+  let frag2 = read-fragment("\"\"\"\n  def\n  \n  \"\"\"");
+  assert-equal("def\n\n", frag2.fragment-value, "blank line with prefix");
+end test;
+
+define test test-multi-line-string-whitespace-relative-to-delimiters ()
+  let frag1 = read-fragment(#:string:{"""
   abc
 def
-"""},
-             "  abc\ndef");
-  // The first blank line below is truly empty and the second one has only the prefix
-  // (written as \<20> to avoid editors removing trailing whitespace).
-  verify-mls("blank lines retained",
-             #:string:{"""
-
-   def
-\<20>\<20>\<20>
-   """},
-             "\ndef\n");
+"""});
+  assert-equal("  abc\ndef", frag1.fragment-value);
+  let frag2 = read-fragment(#:string:{"""
+    xxx
+  yyy
+  """});
+  assert-equal("  xxx\nyyy", frag2.fragment-value);
+end test;
+
+define test test-multi-line-string-delimiter-rules ()
+  // Whitespace following the opening """ and preceding the first \n should be ignored.
+  // Since editors sometimes remove trailing whitespace we write this using escape
+  // sequences instead of with #:string:.
+  let frag1 = read-fragment("\"\"\"   \n  abc\n  \"\"\"");
+  assert-equal("abc", frag1.fragment-value);
   assert-signals(<invalid-multi-line-string-literal>,
                  read-fragment(#:string:{"""a  (only whitespace allowed after start delim)
 abc
@@ -305,143 +334,56 @@ xxx"""}),
   xxx  (this line not indented enough)
    """}),
                  "prefix mismatch non-white");
-  // Prefix should be "   " but one line has a literal tab in prefix.
-  /* TODO: the literal tab causes a failure due (I presume) to
-           https://github.com/dylan-lang/opendylan/issues/425
-  check-condition("prefix mismatch whitespace",
-                  <error>,
-                  read-fragment("\"\"\"\n   aaa\n \t bbb\n   \"\"\""));
-  */
+end test;
 
+define test test-multi-line-string-eol-handling ()
   // Check that CRLF and CR are converted to LF.
-  verify-mls("eol canonicalized 1",
-             "\"\"\"\na\r\nc\n\"\"\"",
-             "a\nc");
-  verify-mls("eol canonicalized 2",
-             "\"\"\"\na\rc\n\"\"\"",
-             "a\nc");
-  verify-mls("eol canonicalized 3",
-             "\"\"\"\r\na\n\rc\r\n\"\"\"",
-             "a\n\nc");
+  let frag1 = read-fragment("\"\"\"\na\r\nc\n\"\"\"");
+  assert-equal("a\nc", frag1.fragment-value);
+  let frag2 = read-fragment("\"\"\"\na\rc\n\"\"\"");
+  assert-equal("a\nc", frag2.fragment-value);
+  let frag3 = read-fragment("\"\"\"\r\na\n\rc\r\n\"\"\"");
+  assert-equal("a\n\nc", frag3.fragment-value);
+end test;
 
+define test test-multi-line-string-escaping ()
+  let frag1 = read-fragment(#:string:{"""\a\b\e\f\n\r\t\0\'\"\\"""});
+  // https://github.com/dylan-lang/opendylan/issues/425
+  assert-equal(map-as(<string>, identity,
+                      #('\a', '\b', '\e', '\f', '\n', '\r', '\t', '\0', '\'', '\"', '\\')),
+               frag1.fragment-value,
+               "one of each standard escape sequence");
   let char = curry(as, <character>);
-  verify-mls("all escape sequences",
-             #:string:{"""\a\b\e\f\n\r\t\0\'\"\\"""},
-             map-as(<string>, char,
-                    #('\a', '\b', '\e', '\f', '\n', '\r', '\t', '\0', '\'', '\"', '\\')));
-  verify-mls("basic hex escaping",
-             #:string:{"""z\<9f>z"""},
-             map-as(<string>, char, #('z', #x9f, 'z')));
+  let frag2 = read-fragment(#:string:{"""z\<9f>z"""});
+  assert-equal(map-as(<string>, char, #('z', #x9f, 'z')),
+               frag2.fragment-value,
+               "basic hex escaping");
+
   // We can't handle character codes > 255 yet, but the leading zeros shouldn't
   // confuse the reader.
-  verify-mls("hex escape with leading zeros",
-             #:string:{"""z\<009f>z"""},
-             map-as(<string>, char, #('z', #x9f, 'z')));
+  let frag3 = read-fragment(#:string:{"""z\<009f>z"""});
+  assert-equal(map-as(<string>, char, #('z', #x9f, 'z')),
+               frag3.fragment-value,
+               "hex escape with leading zeros");
 
   assert-signals(<invalid-token>,
                  read-fragment(#:string:{"""\1<b>"""}),
                  "invalid escape sequence");
-
-  verify-mls("one line",
-             #:string:{
-"""
-abc
-"""},
-                  "abc");
-  verify-mls("one line with prefix",
-             #:string:{
-  """
-  abc
-  """},
-             "abc");
-  verify-mls("two lines",
-             #:string:{
-"""
-abc
-def
-"""},
-             "abc\ndef");
-  verify-mls("two lines with prefix",
-             #:string:{
-  """
-  abc
-  def
-  """},
-             "abc\ndef");
-  verify-mls("empty line at start",
-             #:string:{
-"""
-
-abc
-"""},
-             "\nabc");
-  verify-mls("two empty lines at start",
-             #:string:{
-"""
-
-
-abc
-"""},
-             "\n\nabc");
-  verify-mls("one empty line",
-             #:string:{
-"""
-
-"""},
-             "\n");
-  verify-mls("one empty line with prefix",
-             #:string:{
-  """
-\<20>\<20>
-  """},
-             "\n");
-  verify-mls("empty line at end",
-             #:string:{
-"""
-abc
-
-"""},
-             "abc\n");
-  verify-mls("two empty lines at end",
-             #:string:{
-"""
-abc
-
-
-"""},
-                  "abc\n\n");
-  verify-mls("empty lines at start and end",
-             #:string:{
-"""
-
-abc
-
-"""},
-             "\nabc\n");
-  verify-mls("two empty lines",
-             #:string:{
-"""
-
-
-"""},
-             "\n\n");
-  verify-mls("three empty lines",
-             #:string:{
-"""
-
-
-
-"""},
-             "\n\n\n");
-  verify-mls("three empty lines at end",
-             #:string:{
-"""
-abc
-
-
-
-"""},
-             "abc\n\n\n");
+  let frag4 = read-fragment(#:string:{"""
+  \nxxx
+  """});
+  assert-equal("\nxxx", frag4.fragment-value,
+               "newline escape sequence, xxx longer than prefix");
+  let frag5 = read-fragment(#:string:{"""
+  \nxx
+  """});
+  assert-equal("\nxx", frag5.fragment-value,
+               "newline escape sequence, xx same length as prefix");
+  let frag6 = read-fragment(#:string:{"""
+  \nx
+  """});
+  assert-equal("\nx", frag6.fragment-value,
+               "Newline escape sequences, x shorter than prefix");
 end test;
 
 define test string-literal-raw-one-line-test ()
@@ -474,10 +416,9 @@ define test string-literal-raw-multi-line-test ()
   verify-literal(read-fragment(#:string:{#r"""a""c"""}), "a\"\"c", <string-fragment>);
 
   // All escape codes ignored?  \ precedes the terminating double quotes to
-  // ensure that it is ignored. We replace the X after the fact, to avoid
-  // confusing Emacs.
+  // ensure that it is ignored.
   let s = #:string:{#r"""\a\b\e\f\n\r\t\0\'\\\<X"""};
-  s[s.size - 4] := '\\';
+  s[s.size - 4] := '\\';        // to avoid confusing emacs
   verify-literal(read-fragment(s),
                  concatenate(#:string:{\a\b\e\f\n\r\t\0\'\\\<}, "\\"),
                  <string-fragment>);
@@ -498,17 +439,17 @@ define test symbol-literal-test ()
   verify-presentation(sym, #:string:{#"a"});
 
   // Literal Newline accepted and preserved?
-  let sym = read-fragment("#\"\"\"a\nb\"\"\"");
+  let sym = read-fragment("#\"\"\"\na\nb\n\"\"\"");
   verify-literal(sym, #"a\nb", <symbol-syntax-symbol-fragment>);
   verify-presentation(sym, "#\"a\nb\"");
 
   // CRLF -> LF?
-  let sym = read-fragment("#\"\"\"c\r\nd\"\"\"");
+  let sym = read-fragment("#\"\"\"\nc\r\nd\n\"\"\"");
   verify-literal(sym, #"c\nd", <symbol-syntax-symbol-fragment>);
   verify-presentation(sym, "#\"c\nd\"");
 
   // CR -> LF?
-  let sym = read-fragment("#\"\"\"e\rf\"\"\"");
+  let sym = read-fragment("#\"\"\"\ne\rf\n\"\"\"");
   verify-literal(sym, #"e\nf", <symbol-syntax-symbol-fragment>);
   verify-presentation(sym, "#\"e\nf\"");
 end test symbol-literal-test;
@@ -546,12 +487,17 @@ define suite literal-test-suite ()
   test octal-integer-literal-test;
   test pair-literal-test;
   test ratio-literal-test;
-  test string-literal-multi-line-test;
-  test string-literal-one-line-test;
   test string-literal-raw-multi-line-test;
   test string-literal-raw-one-line-test;
   test string-literal-test;
   test symbol-literal-test;
   test vector-literal-test;
   test hash-literal-test;
+  test test-multi-line-string-eol-handling;
+  test test-multi-line-string-basics;
+  test test-multi-line-string-escaping;
+  test test-multi-line-string-delimiter-rules;
+  test test-multi-line-string-whitespace-relative-to-delimiters;
+  test test-multi-line-string-with-blank-lines;
+  test test-multi-line-empty-strings;
 end suite literal-test-suite;