From 2c9db75e8a3da37f54dae3bf77a9ce4c88c83d24 Mon Sep 17 00:00:00 2001 From: Scott Blomquist Date: Sat, 16 Dec 2023 17:17:49 -0800 Subject: [PATCH] Add capture group cache for Range calculation --- RegExtract.Test/Usage.cs | 14 ++++++-- RegExtract/ExtractionPlanNode.cs | 27 ++++++++++----- RegExtract/ExtractionPlanNodeTypes.cs | 47 +++++++++++++++++---------- 3 files changed, 60 insertions(+), 28 deletions(-) diff --git a/RegExtract.Test/Usage.cs b/RegExtract.Test/Usage.cs index a8f4c4e..8c0c191 100644 --- a/RegExtract.Test/Usage.cs +++ b/RegExtract.Test/Usage.cs @@ -299,10 +299,10 @@ public void can_extract_capture_collections_to_lists() public void can_extract_single_item() { var output = "asdf".Extract("(.*)"); - Assert.Equal(output, "asdf"); + Assert.Equal("asdf", output); var n = "2023".Extract(@"(\d+)"); - Assert.Equal(n, 2023); + Assert.Equal(2023, n); } [Fact] @@ -395,11 +395,21 @@ record bounds(int lo, int hi); [Fact] public void nested_extraction() { + // TODO: Why is this so much slower than nested_extraction_control? var result = "2-12 c: abcdefg".Extract<(bounds, char, string)>(@"((\d+)-(\d+)) (.): (.*)"); Assert.Equal((new bounds(2, 12), 'c', "abcdefg"), result); } + [Fact] + public void nested_extraction_control() + { + var result = "2-12 c: abcdefg".Extract<((int lo, int hi), char ch, string str)>(@"((\d+)-(\d+)) (.): (.*)"); + + Assert.Equal(((2, 12), 'c', "abcdefg"), result); + } + + [Fact] public void nested_extraction_of_list() { diff --git a/RegExtract/ExtractionPlanNode.cs b/RegExtract/ExtractionPlanNode.cs index 720c185..45fd5bd 100644 --- a/RegExtract/ExtractionPlanNode.cs +++ b/RegExtract/ExtractionPlanNode.cs @@ -145,20 +145,27 @@ internal virtual void Validate() return; } - internal virtual object? Construct(Match match, Type type, (string Value, int Index, int Length) range) + internal virtual object? Construct(Match match, Type type, (string Value, int Index, int Length) range, Dictionary cache) { throw new InvalidOperationException("Can't construct a node based on base ExtractionPlanNode type."); } - protected IEnumerable<(string Value, int Index, int Length)> Ranges(Match match, string groupName, int captureStart, int captureLength) => AsEnumerable(match.Groups[groupName].Captures) - .Where(cap => cap.Index >= captureStart && cap.Index + cap.Length <= captureStart + captureLength) - .Select(cap => (cap.Value, cap.Index, cap.Length)); + protected IEnumerable<(string Value, int Index, int Length)> Ranges(Match match, string groupName, int captureStart, int captureLength, Dictionary cache) + { + if (!cache.ContainsKey(groupName)) + { + cache[groupName] = AsEnumerable(match.Groups[groupName].Captures) + .Select(cap => (cap.Value, cap.Index, cap.Length)) + .ToArray(); + } + return cache[groupName].Where(cap => cap.Index >= captureStart && cap.Index + cap.Length <= captureStart + captureLength); + } - internal virtual object? Execute(Match match, int captureStart, int captureLength) + internal virtual object? Execute(Match match, int captureStart, int captureLength, Dictionary cache) { object? result = null; - var ranges = Ranges(match, groupName, captureStart, captureLength); + var ranges = Ranges(match, groupName, captureStart, captureLength, cache).ToArray(); Type innerType = IsNullable(type) ? type.GetGenericArguments().Single() : type; @@ -171,13 +178,13 @@ internal virtual void Validate() { var lastRange = ranges.Last(); - result = Construct(match, innerType, lastRange); + result = Construct(match, innerType, lastRange, cache); if (result is not null) { foreach (var prop in propertyNodes) { - result.GetType().GetProperty(prop.groupName).GetSetMethod().Invoke(result, new[] { prop.Execute(match, lastRange.Index, lastRange.Length) }); + result.GetType().GetProperty(prop.groupName).GetSetMethod().Invoke(result, new[] { prop.Execute(match, lastRange.Index, lastRange.Length, cache) }); } } } @@ -192,7 +199,9 @@ internal virtual void Validate() throw new ArgumentException("Regex didn't match."); } - return Execute(match, match.Groups[groupName].Index, match.Groups[groupName].Length); + Dictionary cache = new(); + + return Execute(match, match.Groups[groupName].Index, match.Groups[groupName].Length, cache); } protected const string VALUETUPLE_TYPENAME = "System.ValueTuple`"; diff --git a/RegExtract/ExtractionPlanNodeTypes.cs b/RegExtract/ExtractionPlanNodeTypes.cs index 810a6ab..7f6ad83 100644 --- a/RegExtract/ExtractionPlanNodeTypes.cs +++ b/RegExtract/ExtractionPlanNodeTypes.cs @@ -10,7 +10,7 @@ namespace RegExtract.ExtractionPlanNodeTypes internal record UninitializedNode() : ExtractionPlanNode("", typeof(void), new ExtractionPlanNode[0], new ExtractionPlanNode[0]) { - internal override object? Execute(Match match, int captureStart, int captureLength) + internal override object? Execute(Match match, int captureStart, int captureLength, Dictionary cache) { throw new InvalidOperationException("Extraction plan was not initialized before execution."); } @@ -19,9 +19,9 @@ internal record UninitializedNode() : internal record VirtualUnaryTupleNode(string groupName, Type type, ExtractionPlanNode[] constructorParams, ExtractionPlanNode[] propertySetters) : ExtractionPlanNode(groupName, type, constructorParams, propertySetters) { - internal override object? Execute(Match match, int captureStart, int captureLength) + internal override object? Execute(Match match, int captureStart, int captureLength, Dictionary cache) { - return constructorParams.Single().Execute(match, captureStart, captureLength); + return constructorParams.Single().Execute(match, captureStart, captureLength, cache); } internal override void Validate() @@ -33,7 +33,7 @@ internal override void Validate() internal record CollectionInitializerNode(string groupName, Type type, ExtractionPlanNode[] constructorParams, ExtractionPlanNode[] propertySetters) : ExtractionPlanNode(groupName, type, constructorParams, propertySetters) { - internal override object? Execute(Match match, int captureStart, int captureLength) + internal override object? Execute(Match match, int captureStart, int captureLength, Dictionary cache) { var genericArgs = type.GetGenericArguments(); @@ -43,7 +43,7 @@ internal record CollectionInitializerNode(string groupName, Type type, Extractio object?[] itemVals = new object[genericArgs.Length]; - var rangeArray = constructorParams.Select(c => Ranges(match, groupName, captureStart, captureLength).GetEnumerator()).ToArray(); + var rangeArray = constructorParams.Select(c => Ranges(match, groupName, captureStart, captureLength, cache).GetEnumerator()).ToArray(); do { @@ -51,7 +51,7 @@ internal record CollectionInitializerNode(string groupName, Type type, Extractio { if (rangeArray[i].MoveNext()) { - itemVals[i] = constructorParams[i].Execute(match, rangeArray[i].Current.Index, rangeArray[i].Current.Length); + itemVals[i] = constructorParams[i].Execute(match, rangeArray[i].Current.Index, rangeArray[i].Current.Length, cache); } else { @@ -75,12 +75,27 @@ internal record CollectionInitializerNode(string groupName, Type type, Extractio internal record ConstructTupleNode(string groupName, Type type, ExtractionPlanNode[] constructorParams, ExtractionPlanNode[] propertySetters) : ExtractionPlanNode(groupName, type, constructorParams, propertySetters) { - internal override object? Construct(Match match, Type type, (string Value, int Index, int Length) range) + ConstructorInfo? _constructor = null; + + ConstructorInfo constructor + { + get + { + if (_constructor != null) return _constructor; + else + { + var wrappedType = IsNullable(type) ? type.GetGenericArguments().Single() : type; + return (_constructor = wrappedType.GetConstructor(wrappedType.GetGenericArguments())); + } + } + } + + internal override object? Construct(Match match, Type type, (string Value, int Index, int Length) range, Dictionary cache) { type = IsNullable(type) ? type.GetGenericArguments().Single() : type; var constructor = type.GetConstructor(type.GetGenericArguments()); - return constructor.Invoke(constructorParams.Select(i => i.Execute(match, range.Index, range.Length)).ToArray()); + return constructor.Invoke(constructorParams.Select(i => i.Execute(match, range.Index, range.Length, cache)).ToArray()); } internal override void Validate() @@ -101,15 +116,13 @@ ConstructorInfo constructor { get { - return _constructor ?? (_constructor = type.GetConstructors().Where(cons => cons.GetParameters().Length == constructorParams.Length).Single()); + return _constructor ?? (_constructor = (IsNullable(type) ? type.GetGenericArguments().Single() : type).GetConstructors().Where(cons => cons.GetParameters().Length == constructorParams.Length).Single()); } } - internal override object? Construct(Match match, Type type, (string Value, int Index, int Length) range) + internal override object? Construct(Match match, Type type, (string Value, int Index, int Length) range, Dictionary cache) { - type = IsNullable(type) ? type.GetGenericArguments().Single() : type; - - return constructor.Invoke(constructorParams.Select(i => i.Execute(match, range.Index, range.Length)).ToArray()); + return constructor.Invoke(constructorParams.Select(i => i.Execute(match, range.Index, range.Length, cache)).ToArray()); } internal override void Validate() @@ -130,7 +143,7 @@ internal override void Validate() internal record EnumParseNode(string groupName, Type type, ExtractionPlanNode[] constructorParams, ExtractionPlanNode[] propertySetters) : ExtractionPlanNode(groupName, type, constructorParams, propertySetters) { - internal override object? Construct(Match match, Type type, (string Value, int Index, int Length) range) + internal override object? Construct(Match match, Type type, (string Value, int Index, int Length) range, Dictionary cache) { return Enum.Parse(type, range.Value); } @@ -150,7 +163,7 @@ ConstructorInfo constructor } - internal override object? Construct(Match match, Type type, (string Value, int Index, int Length) range) + internal override object? Construct(Match match, Type type, (string Value, int Index, int Length) range, Dictionary cache) { Debug.Assert(type == this.type); return constructor.Invoke(new[] { range.Value }); @@ -187,7 +200,7 @@ MethodInfo parse } } - internal override object? Construct(Match match, Type type, (string Value, int Index, int Length) range) + internal override object? Construct(Match match, Type type, (string Value, int Index, int Length) range, Dictionary cache) { type = IsNullable(type) ? type.GetGenericArguments().Single() : type; @@ -215,7 +228,7 @@ internal override void Validate() internal record StringCastNode(string groupName, Type type, ExtractionPlanNode[] constructorParams, ExtractionPlanNode[] propertySetters) : ExtractionPlanNode(groupName, type, constructorParams, propertySetters) { - internal override object? Construct(Match match, Type type, (string Value, int Index, int Length) range) + internal override object? Construct(Match match, Type type, (string Value, int Index, int Length) range, Dictionary cache) { return range.Value; }