From ba12eea240e4d9b81746ad83db6f11b4f6303515 Mon Sep 17 00:00:00 2001
From: Silvan Mosberger <silvan.mosberger@tweag.io>
Date: Wed, 26 Jun 2024 21:36:43 +0200
Subject: [PATCH 1/2] Ensure that files in tests/correct are formatted

---
 test/correct/dollars-before-interpolation.nix   |  6 ++++--
 test/correct/paths-with-interpolations.nix      |  3 ++-
 test/correct/quotes-in-inherit-2.nix            | 10 +++++++++-
 .../operator-after-operator/in.nix}             |  0
 test/diff/operator-after-operator/out.nix       |  2 ++
 test/test.sh                                    | 17 +++++++++++++----
 6 files changed, 30 insertions(+), 8 deletions(-)
 rename test/{correct/operator-after-operator.nix => diff/operator-after-operator/in.nix} (100%)
 create mode 100644 test/diff/operator-after-operator/out.nix

diff --git a/test/correct/dollars-before-interpolation.nix b/test/correct/dollars-before-interpolation.nix
index aaf25409..6e10ef54 100644
--- a/test/correct/dollars-before-interpolation.nix
+++ b/test/correct/dollars-before-interpolation.nix
@@ -1,6 +1,8 @@
-let bar = "bar";
+let
+  bar = "bar";
 
-in [
+in
+[
   "$$\${bar}"
   "$\$${bar}"
   ''
diff --git a/test/correct/paths-with-interpolations.nix b/test/correct/paths-with-interpolations.nix
index 76355bfb..e09d344f 100644
--- a/test/correct/paths-with-interpolations.nix
+++ b/test/correct/paths-with-interpolations.nix
@@ -2,7 +2,8 @@ let
   foo = "foo";
   bar = "bar";
 
-in [
+in
+[
   /${foo}
   ./${foo}
   /foo/${bar}
diff --git a/test/correct/quotes-in-inherit-2.nix b/test/correct/quotes-in-inherit-2.nix
index c25efa8c..fb0b9ca8 100644
--- a/test/correct/quotes-in-inherit-2.nix
+++ b/test/correct/quotes-in-inherit-2.nix
@@ -4,4 +4,12 @@ let
   ${"baz"} = 3;
   ${"in"} = 4;
 
-in { inherit ${"foo"} bar "baz" "in"; }
+in
+{
+  inherit
+    ${"foo"}
+    bar
+    "baz"
+    "in"
+    ;
+}
diff --git a/test/correct/operator-after-operator.nix b/test/diff/operator-after-operator/in.nix
similarity index 100%
rename from test/correct/operator-after-operator.nix
rename to test/diff/operator-after-operator/in.nix
diff --git a/test/diff/operator-after-operator/out.nix b/test/diff/operator-after-operator/out.nix
new file mode 100644
index 00000000..34bf728a
--- /dev/null
+++ b/test/diff/operator-after-operator/out.nix
@@ -0,0 +1,2 @@
+# https://github.com/NixOS/nixfmt/issues/122
+(1 + 1) (1 + 0.4)
diff --git a/test/test.sh b/test/test.sh
index 9cde3cbe..4a28ed19 100755
--- a/test/test.sh
+++ b/test/test.sh
@@ -23,13 +23,22 @@ fi
 # Do a test run to make sure it compiles fine
 nixfmt --version
 
-# Verify "correct"
+# Verify "correct", files that don't change when formatted
 for file in test/correct/*.nix; do
-  if ! nixfmt --verify < "$file" > /dev/null; then
-    echo "[ERROR] $file failed nixfmt verification"
+  echo "Checking $file …"
+  if ! out=$(nixfmt --verify < "$file"); then
+    echo "[ERROR] failed nixfmt verification"
     exit 1
+  fi
+
+  if diff --color=always --unified "$file" <(echo "$out"); then
+    echo "[OK]"
+  elif [[ $* == *--update-diff* ]]; then
+    echo "$out" > "$file"
+    echo "[UPDATED] $file"
   else
-    echo "[OK] $file"
+    echo "[ERROR] Formatting not stable (run with --update-diff to update the diff)"
+    exit 1
   fi
 done
 

From ea74366f0170ff30daa5ef7c3fcf9049c425217e Mon Sep 17 00:00:00 2001
From: Silvan Mosberger <silvan.mosberger@tweag.io>
Date: Wed, 26 Jun 2024 20:39:30 +0200
Subject: [PATCH 2/2] Fully preserve indented strings and match Nix parser

This should make sure that:
- No sequences of characters get modified during parsing,
  indented strings are left exactly as is
- We won't have any parsing bugs

Also added a ton of docs to explain how it works and more tests
---
 src/Nixfmt/Parser.hs             | 55 +++++++++++++++++++++++++-------
 test/correct/indented-string.nix | 14 ++++++++
 test/correct/regression-197.nix  |  3 ++
 3 files changed, 61 insertions(+), 11 deletions(-)
 create mode 100644 test/correct/indented-string.nix
 create mode 100644 test/correct/regression-197.nix

diff --git a/src/Nixfmt/Parser.hs b/src/Nixfmt/Parser.hs
index 9dbcae53..f424876f 100644
--- a/src/Nixfmt/Parser.hs
+++ b/src/Nixfmt/Parser.hs
@@ -193,17 +193,50 @@ indentedStringPart :: Parser StringPart
 indentedStringPart =
   TextPart
     <$> someText
-      ( chunk "''\\n"
-          <|> chunk "''\\r"
-          <|> chunk "''\\t"
-          <|> chunk "''\\"
-          *> (Text.singleton <$> anySingle)
-            <|> chunk "''$"
-            <|> chunk "'''"
-            <|> chunk "$$"
-            <|> try (chunk "$" <* notFollowedBy (char '{'))
-            <|> try (chunk "'" <* notFollowedBy (char '\''))
-            <|> someP (\t -> t /= '\'' && t /= '$' && t /= '\n')
+      ( {-
+          This should match https://github.com/NixOS/nix/blob/052f1320ddf72d617e337479ff1bf22cb4ee682a/src/libexpr/lexer.l#L182-L205
+          To understand that file, [this](https://westes.github.io/flex/manual/Matching.html#Matching) is important:
+          > If it finds more than one match, it takes the one matching the most text.
+          > If it finds two or more matches of the same length, the rule listed first in the flex input file is chosen.
+
+          There are some inconsequential differences though:
+          - We only parse one line at a time here, so we make sure to not process any newlines
+          - We don't want to transform the input in any way, e.g. don't turn ''' into ''
+            This is to preserve the input string as-is
+        -}
+
+        -- <IND_STRING>([^\$\']|\$[^\{\']|\'[^\'\$])+
+        -- While this rule doesn't have a fixed length, it's non-conflicting with the rules below,
+        -- so we can do it first without worrying about the length matching
+        someText
+          ( Text.singleton
+              <$> satisfy (\t -> t /= '$' && t /= '\'' && t /= '\n')
+                <|> try (Text.snoc <$> chunk "$" <*> satisfy (\t -> t /= '{' && t /= '\'' && t /= '\n'))
+                <|> try (Text.snoc <$> chunk "'" <*> satisfy (\t -> t /= '\'' && t /= '$' && t /= '\n'))
+          )
+          -- These rules are of length 3, they need to come before shorter ones
+          -- <IND_STRING>\'\'\$
+          <|> chunk "''$"
+          -- <IND_STRING>\'\'\'
+          <|> chunk "'''"
+          -- <IND_STRING>\'\'\\{ANY} -> Note that ANY can match newlines as well, but we need to ignore those
+          <|> do
+            prefix <- chunk "''\\"
+            -- If we do have a newline
+            rest <-
+              -- If there's no newline, take the next character
+              (notFollowedBy (char '\n') *> (Text.singleton <$> anySingle))
+                -- Otherwise there's an unconsumed newline, which we don't need to handle,
+                -- it's consumed elsewhere
+                <|> pure ""
+            pure $ prefix <> rest
+          -- These are rules with length 2 and 1
+          -- <IND_STRING>\$\{ -> don't match, this will be an interpolation
+          -- <IND_STRING>\$ -> do match, just a dollar
+          <|> try (chunk "$" <* notFollowedBy (char '{'))
+          -- <IND_STRING>\'\' -> don't match, indented string ends
+          -- <IND_STRING>\' -> do match, just a quote
+          <|> try (chunk "'" <* notFollowedBy (char '\''))
       )
 
 indentedLine :: Parser [StringPart]
diff --git a/test/correct/indented-string.nix b/test/correct/indented-string.nix
new file mode 100644
index 00000000..0e7eb5a7
--- /dev/null
+++ b/test/correct/indented-string.nix
@@ -0,0 +1,14 @@
+# These come from previous parser bugs in Nix
+[
+  ''
+    ''\a
+    ''\
+
+    '${true}'
+
+    $'\t'
+  ''
+
+  ''ending dollar $''
+  ''$''
+]
diff --git a/test/correct/regression-197.nix b/test/correct/regression-197.nix
new file mode 100644
index 00000000..66666b89
--- /dev/null
+++ b/test/correct/regression-197.nix
@@ -0,0 +1,3 @@
+''
+  ''\'''${A}
+''