Skip to content

Commit

Permalink
Add capture group cache for Range calculation
Browse files Browse the repository at this point in the history
  • Loading branch information
sblom committed Dec 17, 2023
1 parent bdbc208 commit 2c9db75
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 28 deletions.
14 changes: 12 additions & 2 deletions RegExtract.Test/Usage.cs
Original file line number Diff line number Diff line change
Expand Up @@ -299,10 +299,10 @@ public void can_extract_capture_collections_to_lists()
public void can_extract_single_item()
{
var output = "asdf".Extract<string>("(.*)");
Assert.Equal(output, "asdf");
Assert.Equal("asdf", output);

var n = "2023".Extract<int>(@"(\d+)");
Assert.Equal(n, 2023);
Assert.Equal(2023, n);
}

[Fact]
Expand Down Expand Up @@ -395,11 +395,21 @@ record bounds(int lo, int hi);
[Fact]
public void nested_extraction()
{
// TODO: Why is this so much slower than nested_extraction_control?
var result = "2-12 c: abcdefg".Extract<(bounds, char, string)>(@"((\d+)-(\d+)) (.): (.*)");

Assert.Equal((new bounds(2, 12), 'c', "abcdefg"), result);
}

[Fact]
public void nested_extraction_control()
{
var result = "2-12 c: abcdefg".Extract<((int lo, int hi), char ch, string str)>(@"((\d+)-(\d+)) (.): (.*)");

Assert.Equal(((2, 12), 'c', "abcdefg"), result);
}


[Fact]
public void nested_extraction_of_list()
{
Expand Down
27 changes: 18 additions & 9 deletions RegExtract/ExtractionPlanNode.cs
Original file line number Diff line number Diff line change
Expand Up @@ -145,20 +145,27 @@ internal virtual void Validate()
return;
}

internal virtual object? Construct(Match match, Type type, (string Value, int Index, int Length) range)
internal virtual object? Construct(Match match, Type type, (string Value, int Index, int Length) range, Dictionary<string, (string Value, int Index, int Length)[]> cache)
{
throw new InvalidOperationException("Can't construct a node based on base ExtractionPlanNode type.");
}

protected IEnumerable<(string Value, int Index, int Length)> Ranges(Match match, string groupName, int captureStart, int captureLength) => AsEnumerable(match.Groups[groupName].Captures)
.Where(cap => cap.Index >= captureStart && cap.Index + cap.Length <= captureStart + captureLength)
.Select(cap => (cap.Value, cap.Index, cap.Length));
protected IEnumerable<(string Value, int Index, int Length)> Ranges(Match match, string groupName, int captureStart, int captureLength, Dictionary<string, (string Value, int Index, int Length)[]> cache)
{
if (!cache.ContainsKey(groupName))
{
cache[groupName] = AsEnumerable(match.Groups[groupName].Captures)
.Select(cap => (cap.Value, cap.Index, cap.Length))
.ToArray();
}
return cache[groupName].Where(cap => cap.Index >= captureStart && cap.Index + cap.Length <= captureStart + captureLength);
}

internal virtual object? Execute(Match match, int captureStart, int captureLength)
internal virtual object? Execute(Match match, int captureStart, int captureLength, Dictionary<string, (string Value, int Index, int Length)[]> cache)
{
object? result = null;

var ranges = Ranges(match, groupName, captureStart, captureLength);
var ranges = Ranges(match, groupName, captureStart, captureLength, cache).ToArray();

Type innerType = IsNullable(type) ? type.GetGenericArguments().Single() : type;

Expand All @@ -171,13 +178,13 @@ internal virtual void Validate()
{
var lastRange = ranges.Last();

result = Construct(match, innerType, lastRange);
result = Construct(match, innerType, lastRange, cache);

if (result is not null)
{
foreach (var prop in propertyNodes)
{
result.GetType().GetProperty(prop.groupName).GetSetMethod().Invoke(result, new[] { prop.Execute(match, lastRange.Index, lastRange.Length) });
result.GetType().GetProperty(prop.groupName).GetSetMethod().Invoke(result, new[] { prop.Execute(match, lastRange.Index, lastRange.Length, cache) });
}
}
}
Expand All @@ -192,7 +199,9 @@ internal virtual void Validate()
throw new ArgumentException("Regex didn't match.");
}

return Execute(match, match.Groups[groupName].Index, match.Groups[groupName].Length);
Dictionary<string, (string Value, int Index, int Length)[]> cache = new();

return Execute(match, match.Groups[groupName].Index, match.Groups[groupName].Length, cache);
}

protected const string VALUETUPLE_TYPENAME = "System.ValueTuple`";
Expand Down
47 changes: 30 additions & 17 deletions RegExtract/ExtractionPlanNodeTypes.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ namespace RegExtract.ExtractionPlanNodeTypes
internal record UninitializedNode() :
ExtractionPlanNode("", typeof(void), new ExtractionPlanNode[0], new ExtractionPlanNode[0])
{
internal override object? Execute(Match match, int captureStart, int captureLength)
internal override object? Execute(Match match, int captureStart, int captureLength, Dictionary<string, (string Value, int Index, int Length)[]> cache)
{
throw new InvalidOperationException("Extraction plan was not initialized before execution.");
}
Expand All @@ -19,9 +19,9 @@ internal record UninitializedNode() :
internal record VirtualUnaryTupleNode(string groupName, Type type, ExtractionPlanNode[] constructorParams, ExtractionPlanNode[] propertySetters) :
ExtractionPlanNode(groupName, type, constructorParams, propertySetters)
{
internal override object? Execute(Match match, int captureStart, int captureLength)
internal override object? Execute(Match match, int captureStart, int captureLength, Dictionary<string, (string Value, int Index, int Length)[]> cache)
{
return constructorParams.Single().Execute(match, captureStart, captureLength);
return constructorParams.Single().Execute(match, captureStart, captureLength, cache);
}

internal override void Validate()
Expand All @@ -33,7 +33,7 @@ internal override void Validate()
internal record CollectionInitializerNode(string groupName, Type type, ExtractionPlanNode[] constructorParams, ExtractionPlanNode[] propertySetters) :
ExtractionPlanNode(groupName, type, constructorParams, propertySetters)
{
internal override object? Execute(Match match, int captureStart, int captureLength)
internal override object? Execute(Match match, int captureStart, int captureLength, Dictionary<string, (string Value, int Index, int Length)[]> cache)
{
var genericArgs = type.GetGenericArguments();

Expand All @@ -43,15 +43,15 @@ internal record CollectionInitializerNode(string groupName, Type type, Extractio

object?[] itemVals = new object[genericArgs.Length];

var rangeArray = constructorParams.Select(c => Ranges(match, groupName, captureStart, captureLength).GetEnumerator()).ToArray();
var rangeArray = constructorParams.Select(c => Ranges(match, groupName, captureStart, captureLength, cache).GetEnumerator()).ToArray();

do
{
for (int i = 0; i < genericArgs.Length; i++)
{
if (rangeArray[i].MoveNext())
{
itemVals[i] = constructorParams[i].Execute(match, rangeArray[i].Current.Index, rangeArray[i].Current.Length);
itemVals[i] = constructorParams[i].Execute(match, rangeArray[i].Current.Index, rangeArray[i].Current.Length, cache);
}
else
{
Expand All @@ -75,12 +75,27 @@ internal record CollectionInitializerNode(string groupName, Type type, Extractio
internal record ConstructTupleNode(string groupName, Type type, ExtractionPlanNode[] constructorParams, ExtractionPlanNode[] propertySetters) :
ExtractionPlanNode(groupName, type, constructorParams, propertySetters)
{
internal override object? Construct(Match match, Type type, (string Value, int Index, int Length) range)
ConstructorInfo? _constructor = null;

ConstructorInfo constructor
{
get
{
if (_constructor != null) return _constructor;
else
{
var wrappedType = IsNullable(type) ? type.GetGenericArguments().Single() : type;
return (_constructor = wrappedType.GetConstructor(wrappedType.GetGenericArguments()));
}
}
}

internal override object? Construct(Match match, Type type, (string Value, int Index, int Length) range, Dictionary<string, (string Value, int Index, int Length)[]> cache)
{
type = IsNullable(type) ? type.GetGenericArguments().Single() : type;
var constructor = type.GetConstructor(type.GetGenericArguments());

return constructor.Invoke(constructorParams.Select(i => i.Execute(match, range.Index, range.Length)).ToArray());
return constructor.Invoke(constructorParams.Select(i => i.Execute(match, range.Index, range.Length, cache)).ToArray());
}

internal override void Validate()
Expand All @@ -101,15 +116,13 @@ ConstructorInfo constructor
{
get
{
return _constructor ?? (_constructor = type.GetConstructors().Where(cons => cons.GetParameters().Length == constructorParams.Length).Single());
return _constructor ?? (_constructor = (IsNullable(type) ? type.GetGenericArguments().Single() : type).GetConstructors().Where(cons => cons.GetParameters().Length == constructorParams.Length).Single());
}
}

internal override object? Construct(Match match, Type type, (string Value, int Index, int Length) range)
internal override object? Construct(Match match, Type type, (string Value, int Index, int Length) range, Dictionary<string, (string Value, int Index, int Length)[]> cache)
{
type = IsNullable(type) ? type.GetGenericArguments().Single() : type;

return constructor.Invoke(constructorParams.Select(i => i.Execute(match, range.Index, range.Length)).ToArray());
return constructor.Invoke(constructorParams.Select(i => i.Execute(match, range.Index, range.Length, cache)).ToArray());
}

internal override void Validate()
Expand All @@ -130,7 +143,7 @@ internal override void Validate()
internal record EnumParseNode(string groupName, Type type, ExtractionPlanNode[] constructorParams, ExtractionPlanNode[] propertySetters) :
ExtractionPlanNode(groupName, type, constructorParams, propertySetters)
{
internal override object? Construct(Match match, Type type, (string Value, int Index, int Length) range)
internal override object? Construct(Match match, Type type, (string Value, int Index, int Length) range, Dictionary<string, (string Value, int Index, int Length)[]> cache)
{
return Enum.Parse(type, range.Value);
}
Expand All @@ -150,7 +163,7 @@ ConstructorInfo constructor
}


internal override object? Construct(Match match, Type type, (string Value, int Index, int Length) range)
internal override object? Construct(Match match, Type type, (string Value, int Index, int Length) range, Dictionary<string, (string Value, int Index, int Length)[]> cache)
{
Debug.Assert(type == this.type);
return constructor.Invoke(new[] { range.Value });
Expand Down Expand Up @@ -187,7 +200,7 @@ MethodInfo parse
}
}

internal override object? Construct(Match match, Type type, (string Value, int Index, int Length) range)
internal override object? Construct(Match match, Type type, (string Value, int Index, int Length) range, Dictionary<string, (string Value, int Index, int Length)[]> cache)
{
type = IsNullable(type) ? type.GetGenericArguments().Single() : type;

Expand Down Expand Up @@ -215,7 +228,7 @@ internal override void Validate()
internal record StringCastNode(string groupName, Type type, ExtractionPlanNode[] constructorParams, ExtractionPlanNode[] propertySetters) :
ExtractionPlanNode(groupName, type, constructorParams, propertySetters)
{
internal override object? Construct(Match match, Type type, (string Value, int Index, int Length) range)
internal override object? Construct(Match match, Type type, (string Value, int Index, int Length) range, Dictionary<string, (string Value, int Index, int Length)[]> cache)
{
return range.Value;
}
Expand Down

0 comments on commit 2c9db75

Please sign in to comment.