diff --git a/Changes b/Changes index 83a4f8c..900f5b7 100644 --- a/Changes +++ b/Changes @@ -2,6 +2,9 @@ Revision history for String-Utils {{$NEXT}} +0.0.26 2024-08-16T13:53:24+02:00 + - Add support for "regexify" + 0.0.25 2024-08-05T16:32:56+02:00 - Add support for "paragraphs" - Separate documentation into a separate file diff --git a/META6.json b/META6.json index 6c01abd..f7d87a5 100644 --- a/META6.json +++ b/META6.json @@ -30,5 +30,5 @@ ], "test-depends": [ ], - "version": "0.0.25" + "version": "0.0.26" } diff --git a/README.md b/README.md index 6dba7ca..633059e 100644 --- a/README.md +++ b/README.md @@ -69,6 +69,10 @@ say all-same(""); # Nil .say for paragraphs("a\n\nb"); # 0 => a␤2 => b␤ .say for paragraphs($path.IO.lines); # … +my $string = "foo"; +my $regex = regexify($string, :ignorecase); +say "FOOBAR" ~~ $regex; # 「FOO」 + use String::Utils ; # only import "before" and "after" ``` @@ -356,6 +360,61 @@ Lazily produces a `Seq` of `Pairs` with paragraphs from a `Seq` or string in whi The optional second argument can be used to indicate the ordinal number of the first line in the string. +regexify +-------- + +```raku +my $string = "foo"; +my $regex = regexify($string, :ignorecase); +say "FOOBAR" ~~ $regex; # 「FOO」 +``` + +Produce a `Regex` object from a given string and modifiers. Note that this is similar to the `/ <$string> /` syntax. But opposed to that syntax, which interpolates the contents of the string **each time** the regex is executed, the `Regex` object returned by `regexify` is immutable. + +The following modifiers are supported: + +### i / ignorecase + +```raku +# accept haystack if "bar" is found, regardless of case +my $regex = regexify("bar", :i); # or :ignorecase +``` + +Allow characters to match even if they are of mixed case. + +### smartcase + +```raku +# accept haystack if "bar" is found, regardless of case +my &anycase = regexify("bar", :smartcase); + +# accept haystack if "Bar" is found +my &exactcase = regexify("Bar", :smartcase); +``` + +If the needle is a string and does **not** contain any uppercase characters, then `ignorecase` semantics will be assumed. + +### m / ignoremark + +```raku +# accept haystack if "bar" is found, regardless of any accents +my &anycase = regexify("bar", :m); # or :ignoremark +``` + +Allow characters to match even if they have accents (or not). + +### smartmark + +```raku +# accept haystack if "bar" is found, regardless of any accents +my &anymark = regexify("bar", :smartmark); + +# accept haystack if "bår" is found +my &exactmark = regexify("bår", :smartmark); +``` + +If the needle is a string and does **not** contain any characters with accents, then `ignoremark` semantics will be assumed. + AUTHOR ====== diff --git a/doc/String-Utils.rakudoc b/doc/String-Utils.rakudoc index 06fbe1f..960c608 100644 --- a/doc/String-Utils.rakudoc +++ b/doc/String-Utils.rakudoc @@ -68,6 +68,10 @@ say all-same(""); # Nil .say for paragraphs("a\n\nb"); # 0 => a␤2 => b␤ .say for paragraphs($path.IO.lines); # … +my $string = "foo"; +my $regex = regexify($string, :ignorecase); +say "FOOBAR" ~~ $regex; # 「FOO」 + use String::Utils ; # only import "before" and "after" =end code @@ -419,6 +423,75 @@ and the value is the paragraph (without trailing newline). The optional second argument can be used to indicate the ordinal number of the first line in the string. +=head2 regexify + +=begin code :lang + +my $string = "foo"; +my $regex = regexify($string, :ignorecase); +say "FOOBAR" ~~ $regex; # 「FOO」 + +=end code + +Produce a C object from a given string and modifiers. Note that this +is similar to the C /> syntax. But opposed to that syntax, +which interpolates the contents of the string B the regex is +executed, the C object returned by C is immutable. + +The following modifiers are supported: + +=head3 i / ignorecase + +=begin code :lang + +# accept haystack if "bar" is found, regardless of case +my $regex = regexify("bar", :i); # or :ignorecase + +=end code + +Allow characters to match even if they are of mixed case. + +=head3 smartcase + +=begin code :lang + +# accept haystack if "bar" is found, regardless of case +my &anycase = regexify("bar", :smartcase); + +# accept haystack if "Bar" is found +my &exactcase = regexify("Bar", :smartcase); + +=end code + +If the needle is a string and does B contain any uppercase characters, +then C semantics will be assumed. + +=head3 m / ignoremark + +=begin code :lang + +# accept haystack if "bar" is found, regardless of any accents +my &anycase = regexify("bar", :m); # or :ignoremark + +=end code + +Allow characters to match even if they have accents (or not). + +=head3 smartmark + +=begin code :lang + +# accept haystack if "bar" is found, regardless of any accents +my &anymark = regexify("bar", :smartmark); + +# accept haystack if "bår" is found +my &exactmark = regexify("bår", :smartmark); + +=end code + +If the needle is a string and does B contain any characters with accents, +then C semantics will be assumed. + =head1 AUTHOR Elizabeth Mattijsen diff --git a/lib/String/Utils.rakumod b/lib/String/Utils.rakumod index 8976b31..7d4afc6 100644 --- a/lib/String/Utils.rakumod +++ b/lib/String/Utils.rakumod @@ -462,6 +462,21 @@ my multi sub paragraphs(Cool:D $string, Int:D $initial = 0) { paragraphs $string.Str.lines, $initial } +my sub regexify(str $spec, *%_) { + my str $i = %_ + || %_ + || ((%_ || %_) && is-lowercase($spec)) + ?? ':i ' + !! ''; + my str $m = %_ + || %_ + || ((%_ || %_) && !has-marks($spec)) + ?? ':m ' + !! ''; + + "/$i$m$spec/".EVAL # until there's a better solution +} + my sub EXPORT(*@names) { Map.new: @names ?? @names.map: { diff --git a/t/02-selective-importing.rakutest b/t/02-selective-importing.rakutest index 550cb34..9888ca0 100644 --- a/t/02-selective-importing.rakutest +++ b/t/02-selective-importing.rakutest @@ -3,7 +3,7 @@ use Test; my constant @subs = < after all-same around before between between-included chomp-needle consists-of has-marks is-sha1 is-lowercase is-uppercase is-whitespace - leading-whitespace leaf letters ngram non-word root stem + leading-whitespace leaf letters ngram non-word regexify root stem trailing-whitespace >; diff --git a/t/03-regexify.rakutest b/t/03-regexify.rakutest new file mode 100644 index 0000000..e836460 --- /dev/null +++ b/t/03-regexify.rakutest @@ -0,0 +1,32 @@ +BEGIN %*ENV = 1; + +use Test; +use String::Utils; + +my @tests = + \('\w+'), "foobar", "foobar", + \('foo', :i), "FOOBAR", "FOO", + \('foo', :ignorecase), "FOOBAR", "FOO", + \('foo', :smartcase), "FOOBAR", "FOO", + \('FOO', :m), "FÖOBAR", "FÖO", + \('FOO', :ignoremark), "FÖOBAR", "FÖO", + \('FOO', :smartmark), "FÖOBAR", "FÖO", +; + +plan @tests / 3; + +for @tests -> $capture, $haystack, $result { + subtest "Checking '$capture.raku.substr(2,*-1)'" => { + plan 3; + + my $regex := regexify(|$capture); + isa-ok $regex, Regex; + + $haystack ~~ $regex; + isa-ok $/, Match; + + is $/.Str, $result, "did '$haystack' produce '$result'"; + } +} + +# vim: expandtab shiftwidth=4