Skip to content

Commit

Permalink
improve robustness in .NET part for parsing of imports from Elm module
Browse files Browse the repository at this point in the history
  • Loading branch information
Viir committed Jan 31, 2025
1 parent cdf9c4b commit 456a765
Show file tree
Hide file tree
Showing 2 changed files with 90 additions and 21 deletions.
65 changes: 46 additions & 19 deletions implement/pine/ElmSyntax/ElmModule.cs
Original file line number Diff line number Diff line change
Expand Up @@ -163,21 +163,7 @@ public static Result<string, IReadOnlyList<string>> ParseModuleName(ReadOnlyMemo

public static Result<string, IReadOnlyList<string>> ParseModuleName(string moduleText)
{
// This pattern removes all leading:
// - whitespace (\s)
// - single-line comments (--... up to end of line)
// - multi-line comments ({- ... -}), which can span multiple lines
// The * at the end repeats that pattern until it no longer matches.
// Using RegexOptions.Singleline so '.' can match across newlines within {- -}.
var textWithoutLeadingComments = Regex.Replace(
moduleText,
pattern: @"\A(?:
\s+ # skip any whitespace
| --[^\r\n]*(?:\r\n|\r|\n|$) # skip single-line comment + EOL
| \{\-[\s\S]*?\-\} # skip multi-line comment
)*",
replacement: "",
options: RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline);
var textWithoutLeadingComments = RemoveLeadingTrivia(moduleText);

{
// Match: optional `port `, then `module`, then a dotted identifier, then `exposing`.
Expand Down Expand Up @@ -215,17 +201,58 @@ public static Result<string, IReadOnlyList<string>> ParseModuleName(string modul

public static IEnumerable<IReadOnlyList<string>> ParseModuleImportedModulesNames(string moduleText)
{
foreach (var moduleTextLine in moduleText.Trim().ModuleLines())
var textWithoutLeadingComments = RemoveLeadingTrivia(moduleText);

bool inTripleQuotedString = false;

foreach (var line in ModuleLines(textWithoutLeadingComments))
{
var match = Regex.Match(moduleTextLine, @"^import\s+([\w.]+)(\s|$)");
// We'll do a simple toggle for every """ we see on the line.
// Each occurrence flips us from outside->inside or inside->outside.
int searchStart = 0;
while (true)
{
var index = line.IndexOf("\"\"\"", searchStart, StringComparison.Ordinal);
if (index < 0)
break;

if (match.Success)
inTripleQuotedString = !inTripleQuotedString;
searchStart = index + 3;
}

// Only parse imports if we are outside any triple-quoted string
if (!inTripleQuotedString)
{
yield return match.Groups[1].Value.Split('.');
var match = Regex.Match(line, @"^import\s+([\w.]+)(\s|$)");
if (match.Success)
{
yield return match.Groups[1].Value.Split('.');
}
}
}
}

public static string RemoveLeadingTrivia(string moduleText)
{
// This pattern removes all leading:
// - whitespace (\s)
// - single-line comments (--... up to end of line)
// - multi-line comments ({- ... -}), which can span multiple lines
// The * at the end repeats that pattern until it no longer matches.
// Using RegexOptions.Singleline so '.' can match across newlines within {- -}.
var textWithoutLeadingComments = Regex.Replace(
moduleText,
pattern: @"\A(?:
\s+ # skip any whitespace
| --[^\r\n]*(?:\r\n|\r|\n|$) # skip single-line comment + EOL
| \{\-[\s\S]*?\-\} # skip multi-line comment
)*",
replacement: "",
options: RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline);

return textWithoutLeadingComments;
}

public static IEnumerable<string> ModuleLines(this string moduleText)
{
int lastLineStartIndex = 0;
Expand Down
46 changes: 44 additions & 2 deletions implement/test-elm-time/ElmSyntaxTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@ public void Parse_Elm_module_text_imports()
moduleText = @"module TestModule exposing (..)",
expectedImports = System.Array.Empty<IReadOnlyList<string>>()
},

new
{
moduleText = """
Expand All @@ -162,6 +163,7 @@ import Dict
import List exposing ((::))
import Maybe exposing (Maybe(..))
""",

expectedImports = new IReadOnlyList<string>[]
{
["Basics"],
Expand All @@ -170,16 +172,56 @@ import Maybe exposing (Maybe(..))
["Maybe"]
}
},

new
{
moduleText = """"
module TestModule exposing (..)
import Dict
d = """
import List exposing ((::))
import Maybe exposing (Maybe(..))
"""
"""",

expectedImports = new IReadOnlyList<string>[]
{
["Dict"],
}
},

new
{
moduleText = """"
module TestModule exposing (..)
import Dict
{-
import List exposing ((::))
import Maybe exposing (Maybe(..))
-}
"""",

expectedImports = new IReadOnlyList<string>[]
{
["Dict"],
}
},
};

foreach (var testCase in testCases)
{
var actualImports =
var parsedImports =
ElmModule.ParseModuleImportedModulesNames(testCase.moduleText)
.ToImmutableHashSet(EnumerableExtension.EqualityComparer<IReadOnlyList<string>>());

Assert.IsTrue(
actualImports.SequenceEqual(
parsedImports.SequenceEqual(
testCase.expectedImports,
EnumerableExtension.EqualityComparer<IReadOnlyList<string>>()));
}
Expand Down

0 comments on commit 456a765

Please sign in to comment.