From 30861ca7a5eb746ce01704ddce164074342ae6b3 Mon Sep 17 00:00:00 2001 From: "Flavio S. Glock" Date: Fri, 18 Oct 2024 16:40:32 +0200 Subject: [PATCH] regex variable %+ wip --- misc/test/regex_named_capture.pl | 32 ++++++++ .../runtime/HashSpecialVariable.java | 77 +++++++++++++++++++ 2 files changed, 109 insertions(+) create mode 100644 misc/test/regex_named_capture.pl create mode 100644 src/main/java/org/perlonjava/runtime/HashSpecialVariable.java diff --git a/misc/test/regex_named_capture.pl b/misc/test/regex_named_capture.pl new file mode 100644 index 00000000..e103d45a --- /dev/null +++ b/misc/test/regex_named_capture.pl @@ -0,0 +1,32 @@ +#!/usr/bin/perl +use strict; +use warnings; +use Test::More tests => 4; + +# Test case 1: Simple named capture +my $string1 = 'foo'; +if ($string1 =~ /(?foo)/) { + is($+{foo}, 'foo', 'Test case 1: Named capture for "foo"'); +} else { + fail('Test case 1: Pattern did not match'); +} + +# Test case 2: Multiple named captures +my $string2 = 'barbaz'; +if ($string2 =~ /(?bar)(?baz)/) { + is($+{bar}, 'bar', 'Test case 2: Named capture for "bar"'); + is($+{baz}, 'baz', 'Test case 2: Named capture for "baz"'); +} else { + fail('Test case 2: Pattern did not match'); +} + +# Test case 3: Overlapping named captures +my $string3 = 'foobar'; +if ($string3 =~ /(?foo)(?bar)|(?foobar)/) { + is($+{foo}, 'foo', 'Test case 3: Overlapping named capture for "foo"'); +} else { + fail('Test case 3: Pattern did not match'); +} + +done_testing(); + diff --git a/src/main/java/org/perlonjava/runtime/HashSpecialVariable.java b/src/main/java/org/perlonjava/runtime/HashSpecialVariable.java new file mode 100644 index 00000000..65d53846 --- /dev/null +++ b/src/main/java/org/perlonjava/runtime/HashSpecialVariable.java @@ -0,0 +1,77 @@ +package org.perlonjava.runtime; + +import java.util.AbstractMap; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.regex.PatternSyntaxException; + +import static org.perlonjava.runtime.RuntimeScalarCache.scalarUndef; + +/** + * HashSpecialVariable provides a dynamic view over named capturing groups + * in a Matcher object, reflecting the current state of the Matcher. + * This implements the Perl special variable %+. + */ +public class HashSpecialVariable extends AbstractMap { + + private final Matcher matcher; + private final Map namedGroups; + + /** + * Constructs a HashSpecialVariable for the given Matcher. + * + * @param matcher the Matcher object to query for named capturing groups + */ + public HashSpecialVariable(Matcher matcher) { + this.matcher = matcher; + this.namedGroups = extractNamedGroups(matcher.pattern()); + } + + /** + * Extracts named groups and their indices from the given pattern. + * + * @param pattern the regex pattern + * @return a map of named group names to their indices + */ + private Map extractNamedGroups(Pattern pattern) { + Map namedGroups = new HashMap<>(); + String regex = pattern.toString(); + Matcher groupMatcher = Pattern.compile("\\(\\?<([a-zA-Z][a-zA-Z0-9]*)>").matcher(regex); + int index = 1; // Group indices start at 1 + + while (groupMatcher.find()) { + String groupName = groupMatcher.group(1); + namedGroups.put(groupName, index++); + } + + return namedGroups; + } + + @Override + public Set> entrySet() { + Set> entries = new HashSet<>(); + for (String name : namedGroups.keySet()) { + int groupIndex = namedGroups.get(name); + if (groupIndex != -1 && matcher.group(name) != null) { + entries.add(new SimpleEntry<>(name, new RuntimeScalar(matcher.group(name)))); + } + } + return entries; + } + + @Override + public RuntimeScalar get(Object key) { + if (key instanceof String) { + String name = (String) key; + int groupIndex = namedGroups.getOrDefault(name, -1); + if (groupIndex != -1 && matcher.group(groupIndex) != null) { + return new RuntimeScalar(matcher.group(groupIndex)); + } + } + return scalarUndef; + } +}