Skip to content

Commit

Permalink
Handle regex {} quantifiers correctly
Browse files Browse the repository at this point in the history
  • Loading branch information
sblom committed Dec 21, 2023
1 parent fb646d9 commit 18f8b62
Show file tree
Hide file tree
Showing 2 changed files with 113 additions and 10 deletions.
82 changes: 80 additions & 2 deletions RegExtract.Test/Usage.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
using System.Text.RegularExpressions;
using System.Collections.Generic;
using Xunit.Abstractions;
using RegExtract.RegexTools;

namespace RegExtract.Test
{
Expand Down Expand Up @@ -142,6 +143,83 @@ public void a011()
Assert.Equivalent((('&', "kx"), new List<string>() { "zs", "br", "jd", "bj", "vg"}), result);
}

record Rule()
{
public static Rule? Parse(string str)
{
if (str.Contains(":"))
return str.Extract<Conditional>();
else
return str.Extract<Absolute>();
}
}

record Absolute(Action step) : Rule
{
public const string REGEXTRACT_REGEX_PATTERN = @"(.*)";
}
record Conditional(Condition cond, Action step) : Rule
{
public const string REGEXTRACT_REGEX_PATTERN = @"((.)([<>])(\d+)):(\w+)";
}
record Condition(char prop, char test, int val);

record Action
{
public static Action Parse(string str)
{
return str switch
{
"A" => new Accept(),
"R" => new Reject(),
_ => new Workflow(str)
};
}
}
record Accept : Action;
record Reject : Action;
record Workflow(string workflow) : Action;

[Fact]
public void a012()
{
var plan = CreateAndLogPlan<(string workflow, List<Rule> rules) >(/* language=regex */@"(\w+){(([^,]+),?)+}");

var result = plan.Extract("sxc{x>2414:jtp,s>954:R,m>2406:A,xfz}");

Assert.Equivalent(("sxc", new List<Rule> { new Conditional(new Condition('x', '>', 2414), new Workflow("jtp")), new Conditional(new Condition('s', '>', 954), new Reject()), new Conditional(new Condition('m', '>', 2406), new Accept()), new Absolute(new Workflow("xfz")) }), result);
}

[Fact]
public void a013()
{
var
tree = new RegexCaptureGroupTree(new Regex("(asdf){}"));
output.WriteLine(tree.TreeViz());
tree = new RegexCaptureGroupTree(new Regex(@"(asdf){01"));
output.WriteLine(tree.TreeViz());
tree = new RegexCaptureGroupTree(new Regex(@"(asdf){01}"));
output.WriteLine(tree.TreeViz());
tree = new RegexCaptureGroupTree(new Regex(@"(asdf){01,}"));
output.WriteLine(tree.TreeViz());
tree = new RegexCaptureGroupTree(new Regex(@"(asdf){1,2}"));
output.WriteLine(tree.TreeViz());
tree = new RegexCaptureGroupTree(new Regex(@"(asdf){1,2}?"));
output.WriteLine(tree.TreeViz());
tree = new RegexCaptureGroupTree(new Regex(@"(asdf){,12}"));
output.WriteLine(tree.TreeViz());
tree = new RegexCaptureGroupTree(new Regex(@"(asdf){1\,2}"));
output.WriteLine(tree.TreeViz());
tree = new RegexCaptureGroupTree(new Regex(@"(asdf){\1,2}"));
output.WriteLine(tree.TreeViz());
tree = new RegexCaptureGroupTree(new Regex(@"(asdf)\{1}"));
output.WriteLine(tree.TreeViz());
tree = new RegexCaptureGroupTree(new Regex(@"(asdf){1\}"));
output.WriteLine(tree.TreeViz());
tree = new RegexCaptureGroupTree(new Regex(@"(asdf){,}"));
output.WriteLine(tree.TreeViz());
}

[Fact]
public void slow()
{
Expand Down Expand Up @@ -513,8 +591,8 @@ public void debug()
//var result = plan.Execute(Regex.Match(data, @"^(.+) bags contain(?: (no other bags)\.| ((\d+) (.*?)) bags?[,.])+$"));

Regex rx;

Check warning on line 593 in RegExtract.Test/Usage.cs

View workflow job for this annotation

GitHub Actions / build

The variable 'rx' is declared but never used

Check warning on line 593 in RegExtract.Test/Usage.cs

View workflow job for this annotation

GitHub Actions / build

The variable 'rx' is declared but never used

Check warning on line 593 in RegExtract.Test/Usage.cs

View workflow job for this annotation

GitHub Actions / build

The variable 'rx' is declared but never used

Check warning on line 593 in RegExtract.Test/Usage.cs

View workflow job for this annotation

GitHub Actions / build

The variable 'rx' is declared but never used

Check warning on line 593 in RegExtract.Test/Usage.cs

View workflow job for this annotation

GitHub Actions / build

The variable 'rx' is declared but never used

Check warning on line 593 in RegExtract.Test/Usage.cs

View workflow job for this annotation

GitHub Actions / build

The variable 'rx' is declared but never used

Check warning on line 593 in RegExtract.Test/Usage.cs

View workflow job for this annotation

GitHub Actions / build

The variable 'rx' is declared but never used

Check warning on line 593 in RegExtract.Test/Usage.cs

View workflow job for this annotation

GitHub Actions / build

The variable 'rx' is declared but never used

Check warning on line 593 in RegExtract.Test/Usage.cs

View workflow job for this annotation

GitHub Actions / build

The variable 'rx' is declared but never used

Check warning on line 593 in RegExtract.Test/Usage.cs

View workflow job for this annotation

GitHub Actions / build

The variable 'rx' is declared but never used

Check warning on line 593 in RegExtract.Test/Usage.cs

View workflow job for this annotation

GitHub Actions / build

The variable 'rx' is declared but never used

Check warning on line 593 in RegExtract.Test/Usage.cs

View workflow job for this annotation

GitHub Actions / build

The variable 'rx' is declared but never used

Check warning on line 593 in RegExtract.Test/Usage.cs

View workflow job for this annotation

GitHub Actions / build

The variable 'rx' is declared but never used

Check warning on line 593 in RegExtract.Test/Usage.cs

View workflow job for this annotation

GitHub Actions / build

The variable 'rx' is declared but never used

Check warning on line 593 in RegExtract.Test/Usage.cs

View workflow job for this annotation

GitHub Actions / build

The variable 'rx' is declared but never used

Check warning on line 593 in RegExtract.Test/Usage.cs

View workflow job for this annotation

GitHub Actions / build

The variable 'rx' is declared but never used
var plan = ExtractionPlan<bagdescription>.CreatePlan(rx = new Regex(@"^(?<name>.+) bags contain(?: (?<none>no other bags)\.| (?<contents>(\d+) (.*?)) bags?[,.])+$"));
var result = plan.Extract(rx.Match("faded yellow bags contain 4 mirrored fuchsia bags, 4 dotted indigo bags, 3 faded orange bags, 5 plaid crimson bags."));
var plan = CreateAndLogPlan<bagdescription>(/* language=regex */@"^(?<name>.+) bags contain(?: (?<none>no other bags)\.| (?<contents>(\d+) (.*?)) bags?[,.])+$");
var result = plan.Extract("faded yellow bags contain 4 mirrored fuchsia bags, 4 dotted indigo bags, 3 faded orange bags, 5 plaid crimson bags.");

Assert.Equivalent(new bagdescription { name = "faded yellow", contents = new List<includedbags> { new includedbags(4, "mirrored fuchsia"), new includedbags(4, "dotted indigo"), new includedbags(3, "faded orange"), new includedbags(5, "plaid crimson") } }, result);
}
Expand Down
41 changes: 33 additions & 8 deletions RegExtract/RegexCaptureGroupTree.cs
Original file line number Diff line number Diff line change
Expand Up @@ -157,17 +157,42 @@ private RegexCaptureGroupNode BuildCaptureGroupTree(ref int loc, ref int num, in
else if (_regexString[loc + 1] is '{')
{
var startloc = loc;
// TODO: The actual regex grammar will bag out of this if the quantifier doesn't actually parse.
while (loc < _regexString.Length && _regexString[loc] is not '}')
var quantifierState = 0;
loc ++;
while (loc < _regexString.Length - 1 && quantifierState >= 0)
{
loc++;
switch (quantifierState, _regexString[loc])
{
case (0, >= '0' and <= '9'):
quantifierState = 1;
break;
case (1 or 2, >= '0' and <= '9'):
break;
case (1, ','):
quantifierState = 2;
break;
case (1 or 2, '}'):
quantifierState = 3;
break;
default:
quantifierState = -1;
break;
}

if (quantifierState == 3)
{
if (loc < _regexString.Length - 1 && _regexString[loc + 1] is '?')
loc++;

break;
}
else if (loc >= _regexString.Length - 1 || quantifierState == -1)
{
loc = startloc;
break;
}
}
if (loc >= _regexString.Length || _regexString[loc] is not '}')
{
loc = startloc;
}
else if (loc + 1 < _regexString.Length && _regexString[loc + 1] is '?')
loc++;
}
}
return new RegexCaptureGroupNode(myname, children.ToArray(), ((start, loc - start + 1),_regexString.Substring(start, loc - start + 1)));
Expand Down

0 comments on commit 18f8b62

Please sign in to comment.