From d6ec6f554dca2b0f33951cab457454d180bbdc33 Mon Sep 17 00:00:00 2001 From: Silvan Mosberger Date: Wed, 26 Jun 2024 20:39:30 +0200 Subject: [PATCH] Fully preserve indented strings and match Nix parser This should make sure that: - No sequences of characters get modified during parsing, indented strings are left exactly as is - We won't have any parsing bugs Also added a ton of docs to explain how it works and more tests --- src/Nixfmt/Parser.hs | 55 ++++++++++++++++++++++++++------- test/stable/indented-string.nix | 14 +++++++++ test/stable/regression-197.nix | 3 ++ test/test.sh | 19 ++++++++++++ 4 files changed, 80 insertions(+), 11 deletions(-) create mode 100644 test/stable/indented-string.nix create mode 100644 test/stable/regression-197.nix diff --git a/src/Nixfmt/Parser.hs b/src/Nixfmt/Parser.hs index 9dbcae53..eb0c1c0e 100644 --- a/src/Nixfmt/Parser.hs +++ b/src/Nixfmt/Parser.hs @@ -193,17 +193,50 @@ indentedStringPart :: Parser StringPart indentedStringPart = TextPart <$> someText - ( chunk "''\\n" - <|> chunk "''\\r" - <|> chunk "''\\t" - <|> chunk "''\\" - *> (Text.singleton <$> anySingle) - <|> chunk "''$" - <|> chunk "'''" - <|> chunk "$$" - <|> try (chunk "$" <* notFollowedBy (char '{')) - <|> try (chunk "'" <* notFollowedBy (char '\'')) - <|> someP (\t -> t /= '\'' && t /= '$' && t /= '\n') + ( + {- + This should match https://github.com/NixOS/nix/blob/052f1320ddf72d617e337479ff1bf22cb4ee682a/src/libexpr/lexer.l#L182-L205 + To understand that file, [this](https://westes.github.io/flex/manual/Matching.html#Matching) is important: + > If it finds more than one match, it takes the one matching the most text. + > If it finds two or more matches of the same length, the rule listed first in the flex input file is chosen. + + There are some inconsequential differences though: + - We only parse one line at a time here, so we make sure to not process any newlines + - We don't want to transform the input in any way, e.g. don't turn ''' into '' + This is to preserve the input string as-is + -} + + -- ([^\$\']|\$[^\{\']|\'[^\'\$])+ + -- While this rule doesn't have a fixed length, it's non-conflicting with the rules below, + -- so we can do it first without worrying about the length matching + someText ( + Text.singleton <$> satisfy (\t -> t /= '$' && t /= '\'' && t /= '\n') + <|> try (Text.snoc <$> chunk "$" <*> satisfy (\t -> t /= '{' && t /= '\'' && t /= '\n')) + <|> try (Text.snoc <$> chunk "'" <*> satisfy (\t -> t /= '\'' && t /= '$' && t /= '\n')) + ) + -- These rules are of length 3, they need to come before shorter ones + -- \'\'\$ + <|> chunk "''$" + -- \'\'\' + <|> chunk "'''" + -- \'\'\\{ANY} -> Note that ANY can match newlines as well, but we need to ignore those + <|> do + prefix <- chunk "''\\" + -- If we do have a newline + rest <- + -- If there's no newline, take the next character + (notFollowedBy (char '\n') *> (Text.singleton <$> anySingle)) + -- Otherwise there's an unconsumed newline, which we don't need to handle, + -- it's consumed elsewhere + <|> pure "" + pure $ prefix <> rest + -- These are rules with length 2 and 1 + -- \$\{ -> don't match, this will be an interpolation + -- \$ -> do match, just a dollar + <|> try (chunk "$" <* notFollowedBy (char '{')) + -- \'\' -> don't match, indented string ends + -- \' -> do match, just a quote + <|> try (chunk "'" <* notFollowedBy (char '\'')) ) indentedLine :: Parser [StringPart] diff --git a/test/stable/indented-string.nix b/test/stable/indented-string.nix new file mode 100644 index 00000000..0e7eb5a7 --- /dev/null +++ b/test/stable/indented-string.nix @@ -0,0 +1,14 @@ +# These come from previous parser bugs in Nix +[ + '' + ''\a + ''\ + + '${true}' + + $'\t' + '' + + ''ending dollar $'' + ''$'' +] diff --git a/test/stable/regression-197.nix b/test/stable/regression-197.nix new file mode 100644 index 00000000..66666b89 --- /dev/null +++ b/test/stable/regression-197.nix @@ -0,0 +1,3 @@ +'' + ''\'''${A} +'' diff --git a/test/test.sh b/test/test.sh index 9cde3cbe..cae0ed90 100755 --- a/test/test.sh +++ b/test/test.sh @@ -60,3 +60,22 @@ for file in test/diff/**/in.nix; do exit 1 fi done + +# Verify "stable", files that don't change when formatted +for file in test/stable/*.nix; do + echo "Checking $file …" + if ! out=$(nixfmt --verify < "$file"); then + echo "[ERROR] failed nixfmt verification" + exit 1 + fi + + if diff --color=always --unified "$file" <(echo "$out"); then + echo "[OK]" + elif [[ $* == *--update-diff* ]]; then + echo "$out" > "$file" + echo "[UPDATED] $file" + else + echo "[ERROR] Formatting not stable (run with --update-diff to update the diff)" + exit 1 + fi +done