diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index cdfe614..77fc671 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -62,7 +62,9 @@ jobs: if: ${{ matrix.os == 'windows-latest' }} run: | choco install zip - zip target/${{ matrix.target }}/release/grex-${{ steps.get_version.outputs.version }}-${{ matrix.target }}.zip target/${{ matrix.target }}/release/grex.exe + cd target/${{ matrix.target }}/release + zip grex-${{ steps.get_version.outputs.version }}-${{ matrix.target }}.zip grex.exe + cd ../../.. - name: Create tar.gz file on macOS and Linux if: ${{ matrix.os != 'windows-latest' }} @@ -79,9 +81,8 @@ jobs: file_glob: true file: target/${{ matrix.target }}/release/grex-${{ steps.get_version.outputs.version }}-${{ matrix.target }}.{zip,tar.gz} - # ENABLE AGAIN FOR NEXT RELEASE 1.3 - #- name: Upload release to crates.io - # uses: katyo/publish-crates@v1 - # if: ${{ matrix.os == 'ubuntu-latest' }} - # with: - # registry-token: ${{ secrets.CARGO_REGISTRY_TOKEN }} + - name: Upload release to crates.io + uses: katyo/publish-crates@v1 + if: ${{ matrix.os == 'ubuntu-latest' }} + with: + registry-token: ${{ secrets.CARGO_REGISTRY_TOKEN }} diff --git a/Cargo.lock b/Cargo.lock index 801ef67..fa59c49 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,10 +1,12 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. +version = 3 + [[package]] name = "aho-corasick" -version = "0.7.15" +version = "0.7.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7404febffaa47dac81aa44dba71523c9d069b1bdc50a77db41195149e17f68e5" +checksum = "1e37cfd5e7657ada45f742d6e99ca5788580b5c529dc78faf11ece6dc702656f" dependencies = [ "memchr", ] @@ -20,9 +22,9 @@ dependencies = [ [[package]] name = "assert_cmd" -version = "1.0.3" +version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2475b58cd94eb4f70159f4fd8844ba3b807532fe3131b3373fae060bbe30396" +checksum = "b800c4403e8105d959595e1f88119e78bc12bc874c4336973658b648a746ba93" dependencies = [ "bstr", "doc-comment", @@ -109,10 +111,10 @@ dependencies = [ ] [[package]] -name = "difference" -version = "2.0.0" +name = "difflib" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "524cbf6897b527295dff137cec09ecf3a05f4fddffd7dfcd1585403449e74198" +checksum = "6184e33543162437515c2e2b48714794e37845ec9851711914eec9d308f6ebe8" [[package]] name = "doc-comment" @@ -128,15 +130,15 @@ checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457" [[package]] name = "fixedbitset" -version = "0.2.0" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37ab347416e802de484e4d03c7316c48f1ecb56574dfd4a46a80f173ce1de04d" +checksum = "398ea4fabe40b9b0d885340a2a991a44c8a645624075ad966d21f88688e2b69e" [[package]] name = "float-cmp" -version = "0.8.0" +version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1267f4ac4f343772758f7b1bdcbe767c218bbab93bb432acbf5162bbf85a6c4" +checksum = "98de4bbd547a563b716d8dfa9aad1cb19bfab00f4fa09a6a4ed21dbcf44ce9c4" dependencies = [ "num-traits", ] @@ -160,9 +162,10 @@ dependencies = [ [[package]] name = "grex" -version = "1.2.0" +version = "1.3.0" dependencies = [ "assert_cmd", + "atty", "indoc", "itertools", "lazy_static", @@ -181,9 +184,9 @@ dependencies = [ [[package]] name = "hashbrown" -version = "0.9.1" +version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7afe4a420e3fe79967a00898cc1f4db7c8a49a9333a29f8a4bd76a253d5cd04" +checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e" [[package]] name = "heck" @@ -205,9 +208,9 @@ dependencies = [ [[package]] name = "indexmap" -version = "1.6.1" +version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fb1fa934250de4de8aef298d81c729a7d33d8c239daa3a7575e6b92bfc7313b" +checksum = "bc633605454125dec4b66843673f01c7df2b89479b32e0ed634e43a91cff62a5" dependencies = [ "autocfg", "hashbrown", @@ -224,9 +227,9 @@ dependencies = [ [[package]] name = "itertools" -version = "0.10.0" +version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37d572918e350e82412fe766d24b15e6682fb2ed2bbe018280caa810397cb319" +checksum = "69ddb889f9d0d08a67338271fa9b62996bc788c7796a5c18cf057420aaed5eaf" dependencies = [ "either", ] @@ -260,15 +263,15 @@ dependencies = [ [[package]] name = "memchr" -version = "2.3.4" +version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ee1c47aaa256ecabcaea351eae4a9b01ef39ed810004e298d2511ed284b1525" +checksum = "b16bd47d9e329435e309c58469fe0791c2d0d1ba96ec0954152a5ae2b04387dc" [[package]] name = "ndarray" -version = "0.15.0" +version = "0.15.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "073c7c76f7b90654996f08db92290e9f300d11de0634493d6f1c4fd11d8a1583" +checksum = "08e854964160a323e65baa19a0b1a027f76d590faba01f05c0cbc3187221a8c9" dependencies = [ "matrixmultiply", "num-complex", @@ -311,20 +314,11 @@ dependencies = [ "autocfg", ] -[[package]] -name = "pest" -version = "2.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "10f4872ae94d7b90ae48754df22fd42ad52ce740b8f370b03da4835417403e53" -dependencies = [ - "ucd-trie", -] - [[package]] name = "petgraph" -version = "0.5.1" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "467d164a6de56270bd7c4d070df81d07beace25012d5103ced4e9ff08d6afdb7" +checksum = "4a13a2fa9d0b63e5f22328828741e523766fff0ee9e779316902290dff3f824f" dependencies = [ "fixedbitset", "indexmap", @@ -338,12 +332,13 @@ checksum = "ac74c624d6b2d21f425f752262f42188365d7b8ff1aff74c82e45136510a4857" [[package]] name = "predicates" -version = "1.0.7" +version = "2.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eeb433456c1a57cc93554dea3ce40b4c19c4057e41c55d4a0f3d84ea71c325aa" +checksum = "c143348f141cc87aab5b950021bac6145d0e5ae754b0591de23244cee42c9308" dependencies = [ - "difference", + "difflib", "float-cmp", + "itertools", "normalize-line-endings", "predicates-core", "regex", @@ -391,9 +386,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.24" +version = "1.0.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e0704ee1a7e00d7bb417d0770ea303c1bccbabf0ef1667dae92b5967f5f8a71" +checksum = "f0d8caf72986c1a598726adc988bb5984792ef84f5ee5aa50209145ee8077038" dependencies = [ "unicode-xid", ] @@ -505,9 +500,9 @@ dependencies = [ [[package]] name = "regex" -version = "1.4.5" +version = "1.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "957056ecddbeba1b26965114e191d2e8589ce74db242b6ea25fc4062427a5c19" +checksum = "d07a8629359eb56f1e2fb1652bb04212c072a87ba68546a04065d525673ac461" dependencies = [ "aho-corasick", "memchr", @@ -525,9 +520,9 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.6.22" +version = "0.6.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5eb417147ba9860a96cfe72a0b93bf88fee1744b5636ec99ab20c1aa9376581" +checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b" [[package]] name = "remove_dir_all" @@ -540,9 +535,9 @@ dependencies = [ [[package]] name = "rstest" -version = "0.7.0" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5056bc1e7cfd438570e8292ef9512774b1d0afc8a50d683fda0ebe74f6233cc6" +checksum = "2288c66aeafe3b2ed227c981f364f9968fa952ef0b30e84ada4486e7ee24d00a" dependencies = [ "cfg-if", "proc-macro2", @@ -553,9 +548,9 @@ dependencies = [ [[package]] name = "rustc_version" -version = "0.3.3" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0dfe2087c51c460008730de8b57e6a320782fbfb312e1f4d520e6c6fae155ee" +checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366" dependencies = [ "semver", ] @@ -574,21 +569,9 @@ dependencies = [ [[package]] name = "semver" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f301af10236f6df4160f7c3f04eec6dbc70ace82d23326abad5edee88801c6b6" -dependencies = [ - "semver-parser", -] - -[[package]] -name = "semver-parser" -version = "0.10.2" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00b0bef5b7f9e0df16536d3961cfb6e84331c065b4066afb39768d0e319411f7" -dependencies = [ - "pest", -] +checksum = "568a8e6258aa33c13358f81fd834adb854c6f7c9468520910a9b1e8fac068012" [[package]] name = "strsim" @@ -598,9 +581,9 @@ checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" [[package]] name = "structopt" -version = "0.3.21" +version = "0.3.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5277acd7ee46e63e5168a80734c9f6ee81b1367a7d8772a2d765df2a3705d28c" +checksum = "bf9d950ef167e25e0bdb073cf1d68e9ad2795ac826f2f3f59647817cf23c0bfa" dependencies = [ "clap", "lazy_static", @@ -609,9 +592,9 @@ dependencies = [ [[package]] name = "structopt-derive" -version = "0.4.14" +version = "0.4.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ba9cdfda491b814720b6b06e0cac513d922fc407582032e8706e9f137976f90" +checksum = "134d838a2c9943ac3125cf6df165eda53493451b719f3255b2a26b85f772d0ba" dependencies = [ "heck", "proc-macro-error", @@ -622,9 +605,9 @@ dependencies = [ [[package]] name = "syn" -version = "1.0.65" +version = "1.0.73" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3a1d708c221c5a612956ef9f75b37e454e88d1f7b899fbd3a18d4252012d663" +checksum = "f71489ff30030d2ae598524f61326b902466f72a0fb1a8564c001cc63425bcc7" dependencies = [ "proc-macro2", "quote", @@ -660,12 +643,6 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a7f741b240f1a48843f9b8e0444fb55fb2a4ff67293b50a9179dfd5ea67f8d41" -[[package]] -name = "ucd-trie" -version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56dee185309b50d1f11bfedef0fe6d036842e3fb77413abef29f8f8d1c5d4c1c" - [[package]] name = "unic-char-property" version = "0.9.0" @@ -710,9 +687,9 @@ dependencies = [ [[package]] name = "unicode-segmentation" -version = "1.7.1" +version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bb0d2e7be6ae3a5fa87eed5fb451aff96f2573d2694942e40543ae0bbe19c796" +checksum = "8895849a949e7845e06bd6dc1aa51731a103c42707010a5b591c0038fb73385b" [[package]] name = "unicode-width" diff --git a/Cargo.toml b/Cargo.toml index 1ea0649..3e15d17 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,7 +14,7 @@ [package] name = "grex" -version = "1.2.0" +version = "1.3.0" authors = ["Peter M. Stahl "] description = """ grex generates regular expressions from user-provided test cases. @@ -29,20 +29,21 @@ categories = ["command-line-utilities"] keywords = ["pattern", "regex", "regexp"] [dependencies] -itertools = "0.10.0" +atty = "0.2.14" +itertools = "0.10.1" lazy_static = "1.4.0" -ndarray = "0.15.0" -petgraph = {version = "0.5.1", default-features = false, features = ["stable_graph"]} -regex = "1.4.5" -structopt = "0.3.21" +ndarray = "0.15.3" +petgraph = {version = "0.6.0", default-features = false, features = ["stable_graph"]} +regex = "1.5.4" +structopt = "0.3.23" unic-char-range = "0.9.0" unic-ucd-category = "0.9.0" -unicode-segmentation = "1.7.1" +unicode-segmentation = "1.8.0" [dev-dependencies] -assert_cmd = "1.0.3" +assert_cmd = "2.0.1" indoc = "1.0.3" -predicates = "1.0.7" +predicates = "2.0.2" proptest = "1.0.0" -rstest = "0.7.0" +rstest = "0.11.0" tempfile = "3.2.0" diff --git a/README.md b/README.md index e482960..1b52485 100644 --- a/README.md +++ b/README.md @@ -3,19 +3,19 @@
[![build](https://github.com/pemistahl/grex/actions/workflows/build.yml/badge.svg)](https://github.com/pemistahl/grex/actions/workflows/build.yml) -[![dependency status](https://deps.rs/crate/grex/1.2.0/status.svg)](https://deps.rs/crate/grex/1.2.0) +[![dependency status](https://deps.rs/crate/grex/1.3.0/status.svg)](https://deps.rs/crate/grex/1.3.0) [![codecov](https://codecov.io/gh/pemistahl/grex/branch/main/graph/badge.svg)](https://codecov.io/gh/pemistahl/grex) [![lines of code](https://tokei.rs/b1/github/pemistahl/grex?category=code)](https://github.com/XAMPPRocky/tokei) [![Downloads](https://img.shields.io/crates/d/grex.svg)](https://crates.io/crates/grex) [![Docs.rs](https://docs.rs/grex/badge.svg)](https://docs.rs/grex) [![Crates.io](https://img.shields.io/crates/v/grex.svg)](https://crates.io/crates/grex) -[![Lib.rs](https://img.shields.io/badge/lib.rs-v1.2.0-blue)](https://lib.rs/crates/grex) +[![Lib.rs](https://img.shields.io/badge/lib.rs-v1.3.0-blue)](https://lib.rs/crates/grex) [![license](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](https://www.apache.org/licenses/LICENSE-2.0) -[![Linux Download](https://img.shields.io/badge/Linux%20Download-v1.2.0-blue?logo=Linux)](https://github.com/pemistahl/grex/releases/download/v1.2.0/grex-v1.2.0-x86_64-unknown-linux-musl.tar.gz) -[![MacOS Download](https://img.shields.io/badge/macOS%20Download-v1.2.0-blue?logo=Apple)](https://github.com/pemistahl/grex/releases/download/v1.2.0/grex-v1.2.0-x86_64-apple-darwin.tar.gz) -[![Windows Download](https://img.shields.io/badge/Windows%20Download-v1.2.0-blue?logo=Windows)](https://github.com/pemistahl/grex/releases/download/v1.2.0/grex-v1.2.0-x86_64-pc-windows-msvc.zip) +[![Linux Download](https://img.shields.io/badge/Linux%20Download-v1.3.0-blue?logo=Linux)](https://github.com/pemistahl/grex/releases/download/v1.3.0/grex-v1.3.0-x86_64-unknown-linux-musl.tar.gz) +[![MacOS Download](https://img.shields.io/badge/macOS%20Download-v1.3.0-blue?logo=Apple)](https://github.com/pemistahl/grex/releases/download/v1.3.0/grex-v1.3.0-x86_64-apple-darwin.tar.gz) +[![Windows Download](https://img.shields.io/badge/Windows%20Download-v1.3.0-blue?logo=Windows)](https://github.com/pemistahl/grex/releases/download/v1.3.0/grex-v1.3.0-x86_64-pc-windows-msvc.zip)
@@ -114,7 +114,7 @@ toolchain installed, you can install by compiling from source using So the summary of your installation options is: ``` -( choco | scoop | brew | cargo | huber | port ) install grex +( brew | cargo | choco | huber | port | scoop ) install grex ``` ### 4.2 The library [Top ▲](#table-of-contents) @@ -123,7 +123,7 @@ In order to use *grex* as a library, simply add it as a dependency to your `Carg ```toml [dependencies] -grex = "1.2.0" +grex = "1.3.0" ``` ## 5. How to use? [Top ▲](#table-of-contents) @@ -133,10 +133,15 @@ All settings can be freely combined with each other. ### 5.1 The command-line tool [Top ▲](#table-of-contents) +Test cases are passed either directly (`grex a b c`) or from a file (`grex -f test_cases.txt`). +*grex* is able to receive its input from Unix pipelines as well, e.g. `cat test_cases.txt | grex -`. + +The following table shows all available flags and options: + ``` $ grex -h -grex 1.2.0 +grex 1.3.0 © 2019-today Peter M. Stahl Licensed under the Apache License, Version 2.0 Downloadable from https://crates.io/crates/grex @@ -161,6 +166,9 @@ FLAGS: -i, --ignore-case Performs case-insensitive matching, letters match both upper and lower case -g, --capture-groups Replaces non-capturing groups by capturing ones -x, --verbose Produces a nicer looking regular expression in verbose mode + --no-start-anchor Removes the caret anchor '^' from the resulting regular expression + --no-end-anchor Removes the dollar sign anchor '$' from the resulting regular expression + --no-anchors Removes the caret and dollar sign anchors from the resulting regular expression -c, --colorize Provides syntax highlighting for the resulting regular expression -h, --help Prints help information -v, --version Prints version information @@ -180,8 +188,8 @@ ARGS: #### 5.2.1 Default settings -Test cases are passed either from a collection via [`RegExpBuilder::from()`](https://docs.rs/grex/1.2.0/grex/struct.RegExpBuilder.html#method.from) -or from a file via [`RegExpBuilder::from_file()`](https://docs.rs/grex/1.2.0/grex/struct.RegExpBuilder.html#method.from_file). +Test cases are passed either from a collection via [`RegExpBuilder::from()`](https://docs.rs/grex/1.3.0/grex/struct.RegExpBuilder.html#method.from) +or from a file via [`RegExpBuilder::from_file()`](https://docs.rs/grex/1.3.0/grex/struct.RegExpBuilder.html#method.from_file). If read from a file, each test case must be on a separate line. Lines may be ended with either a newline `\n` or a carriage return with a line feed `\r\n`. @@ -195,10 +203,11 @@ assert_eq!(regexp, "^a(?:aa?)?$"); #### 5.2.2 Convert to character classes ```rust -use grex::{Feature, RegExpBuilder}; +use grex::RegExpBuilder; let regexp = RegExpBuilder::from(&["a", "aa", "123"]) - .with_conversion_of(&[Feature::Digit, Feature::Word]) + .with_conversion_of_digits() + .with_conversion_of_words() .build(); assert_eq!(regexp, "^(\\d\\d\\d|\\w(?:\\w)?)$"); ``` @@ -206,10 +215,10 @@ assert_eq!(regexp, "^(\\d\\d\\d|\\w(?:\\w)?)$"); #### 5.2.3 Convert repeated substrings ```rust -use grex::{Feature, RegExpBuilder}; +use grex::RegExpBuilder; let regexp = RegExpBuilder::from(&["aa", "bcbc", "defdefdef"]) - .with_conversion_of(&[Feature::Repetition]) + .with_conversion_of_repetitions() .build(); assert_eq!(regexp, "^(?:a{2}|(?:bc){2}|(?:def){3})$"); ``` @@ -221,10 +230,10 @@ In the following example, the test case `aa` is not converted to `a{2}` because `a` has a length of 1, but the minimum substring length has been set to 2. ```rust -use grex::{Feature, RegExpBuilder}; +use grex::RegExpBuilder; let regexp = RegExpBuilder::from(&["aa", "bcbc", "defdefdef"]) - .with_conversion_of(&[Feature::Repetition]) + .with_conversion_of_repetitions() .with_minimum_substring_length(2) .build(); assert_eq!(regexp, "^(?:aa|(?:bc){2}|(?:def){3})$"); @@ -234,10 +243,10 @@ Setting a minimum number of 2 repetitions in the next example, only the test cas converted because it is the only one that is repeated twice. ```rust -use grex::{Feature, RegExpBuilder}; +use grex::RegExpBuilder; let regexp = RegExpBuilder::from(&["aa", "bcbc", "defdefdef"]) - .with_conversion_of(&[Feature::Repetition]) + .with_conversion_of_repetitions() .with_minimum_repetitions(2) .build(); assert_eq!(regexp, "^(?:bcbc|aa|(?:def){3})$"); @@ -274,10 +283,10 @@ The regular expressions that *grex* generates are case-sensitive by default. Case-insensitive matching can be enabled like so: ```rust -use grex::{Feature, RegExpBuilder}; +use grex::RegExpBuilder; let regexp = RegExpBuilder::from(&["big", "BIGGER"]) - .with_conversion_of(&[Feature::CaseInsensitivity]) + .with_case_insensitive_matching() .build(); assert_eq!(regexp, "(?i)^big(?:ger)?$"); ``` @@ -288,10 +297,11 @@ Non-capturing groups are used by default. Extending the previous example, you can switch to capturing groups instead. ```rust -use grex::{Feature, RegExpBuilder}; +use grex::RegExpBuilder; let regexp = RegExpBuilder::from(&["big", "BIGGER"]) - .with_conversion_of(&[Feature::CaseInsensitivity, Feature::CapturingGroup]) + .with_case_insensitive_matching() + .with_capturing_groups() .build(); assert_eq!(regexp, "(?i)^big(ger)?$"); ``` @@ -325,7 +335,23 @@ assert_eq!(regexp, indoc!( )); ``` -#### 5.2.8 Syntax highlighting +#### 5.2.8 Disable anchors + +By default, the anchors `^` and `$` are put around every generated regular expression in order +to ensure that it matches only the test cases given as input. Often enough, however, it is +desired to use the generated pattern as part of a larger one. For this purpose, the anchors +can be disabled, either separately or both of them. + +```rust +use grex::RegExpBuilder; + +let regexp = RegExpBuilder::from(&["a", "aa", "aaa"]) + .without_anchors() + .build(); +assert_eq!(regexp, "a(?:aa?)?"); +``` + +#### 5.2.9 Syntax highlighting ⚠ The method `with_syntax_highlighting()` may only be used if the resulting regular expression is meant to be printed to the console. It is mainly meant to be used for the command-line tool output. @@ -463,22 +489,12 @@ cargo build ``` The source code is accompanied by an extensive test suite consisting of unit tests, integration -tests and property tests. For running the unit and integration tests, simply say: +tests and property tests. For running them, simply say: ``` cargo test ``` -Property tests are disabled by default with the `#[ignore]` annotation because they are -very long-running. They are used for automatically generating test cases for regular -expression conversion. If a test case is found that produces a wrong conversion, it is -shrinked to the shortest test case possible that still produces a wrong result. -This is a very useful tool for finding bugs. If you want to run these tests, say: - -``` -cargo test -- --ignored -``` - ## 7. How does it work? [Top ▲](#table-of-contents) 1. A [deterministic finite automaton](https://en.wikipedia.org/wiki/Deterministic_finite_automaton) (DFA) diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index d3651e2..d90e820 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -1,3 +1,15 @@ +## grex 1.3.0 (released on 15 Sep 2021) + +### Features +- anchors can now be disabled so that the generated expression can be used as part of a larger one (#30) +- the command-line tool can now be used within Unix pipelines (#45) + +### Changes +- Additional methods have been added to `RegExpBuilder` in order to replace the enum `Feature` and make the library API more consistent. (#47) + +### Bug Fixes +- Under rare circumstances, the conversion of repetitions did not work. This has been fixed. (#36) + ## grex 1.2.0 (released on 28 Mar 2021) ### Features diff --git a/src/ast/expression.rs b/src/ast/expression.rs index 006fa72..494fedf 100644 --- a/src/ast/expression.rs +++ b/src/ast/expression.rs @@ -212,15 +212,8 @@ impl Expression { expr: &Option, config: &RegExpConfig, ) -> Option { - if let Some(value) = expr { - Some(Expression::new_repetition( - value.clone(), - Quantifier::KleeneStar, - config, - )) - } else { - None - } + expr.as_ref() + .map(|value| Expression::new_repetition(value.clone(), Quantifier::KleeneStar, config)) } fn concatenate( diff --git a/src/ast/format.rs b/src/ast/format.rs index 8977f70..d1ef68e 100644 --- a/src/ast/format.rs +++ b/src/ast/format.rs @@ -26,17 +26,17 @@ impl Display for Expression { fn fmt(&self, f: &mut Formatter<'_>) -> Result { match self { Expression::Alternation(options, config) => { - format_alternation(f, &self, options, config) + format_alternation(f, self, options, config) } Expression::CharacterClass(char_set, config) => { format_character_class(f, char_set, config) } Expression::Concatenation(expr1, expr2, config) => { - format_concatenation(f, &self, expr1, expr2, config) + format_concatenation(f, self, expr1, expr2, config) } Expression::Literal(cluster, config) => format_literal(f, cluster, config), Expression::Repetition(expr, quantifier, config) => { - format_repetition(f, &self, expr, quantifier, config) + format_repetition(f, self, expr, quantifier, config) } } } @@ -77,11 +77,11 @@ fn format_character_class( char_set: &BTreeSet, config: &RegExpConfig, ) -> Result { - let chars_to_escape = ['[', ']', '\\', '-', '^']; + let chars_to_escape = ['[', ']', '\\', '-', '^', '$']; let escaped_char_set = char_set .iter() .map(|c| { - if chars_to_escape.contains(&c) { + if chars_to_escape.contains(c) { format!("{}{}", "\\", c) } else if c == &'\n' { "\\n".to_string() diff --git a/src/char/cluster.rs b/src/char/cluster.rs index 29d4941..7a2edbe 100644 --- a/src/char/cluster.rs +++ b/src/char/cluster.rs @@ -167,7 +167,7 @@ fn convert_repetitions( config: &RegExpConfig, ) { let repeated_substrings = collect_repeated_substrings(graphemes); - let ranges_of_repetitions = create_ranges_of_repetitions(repeated_substrings); + let ranges_of_repetitions = create_ranges_of_repetitions(repeated_substrings, config); let coalesced_repetitions = coalesce_repetitions(ranges_of_repetitions); replace_graphemes_with_repetitions(coalesced_repetitions, graphemes, repetitions, config) } @@ -190,43 +190,38 @@ fn collect_repeated_substrings(graphemes: &[Grapheme]) -> HashMap, V fn create_ranges_of_repetitions( repeated_substrings: HashMap, Vec>, + config: &RegExpConfig, ) -> Vec<(Range, Vec)> { let mut repetitions = Vec::<(Range, Vec)>::new(); for (prefix_length, group) in &repeated_substrings .iter() - .filter(|&(_, indices)| indices.len() > 1) + .filter(|&(prefix, indices)| { + indices + .iter() + .tuple_windows() + .all(|(first, second)| (second - first) >= prefix.len()) + }) .sorted_by_key(|&(prefix, _)| prefix.len()) .rev() .group_by(|&(prefix, _)| prefix.len()) { for (prefix, indices) in group.sorted_by_key(|&(_, indices)| indices[0]) { - let all_even = indices - .iter() - .all(|it| it % prefix_length == 0 || it % 2 == 0); - let all_odd = indices + indices .iter() - .all(|it| it % prefix_length == 1 || it % 2 == 1); - - if all_even || all_odd { - let ranges = indices - .iter() - .cloned() - .map(|it| it..it + prefix_length) - .coalesce(|x, y| { - if x.end == y.start { - Ok(x.start..y.end) - } else { - Err((x, y)) - } - }) - .filter(|it| (it.end - it.start) > prefix_length) - .collect_vec(); - - for range in ranges { - repetitions.push((range, prefix.clone())); - } - } + .map(|it| *it..it + prefix_length) + .coalesce(|x, y| { + if x.end == y.start { + Ok(x.start..y.end) + } else { + Err((x, y)) + } + }) + .filter(|range| { + let count = ((range.end - range.start) / prefix_length) as u32; + count > config.minimum_repetitions + }) + .for_each(|range| repetitions.push((range, prefix.clone()))); } } repetitions @@ -281,22 +276,10 @@ fn replace_graphemes_with_repetitions( let count = ((range.end - range.start) / substr.len()) as u32; - if count <= config.minimum_repetitions - || substr.len() < config.minimum_substring_length as usize - { + if substr.len() < config.minimum_substring_length as usize { continue; } - let joined_substr = substr.iter().join("").repeat(count as usize); - let graphemes_slice = repetitions[range.clone()] - .iter() - .map(|it| it.value()) - .join(""); - - if graphemes_slice != joined_substr { - break; - } - repetitions.splice( range.clone(), [Grapheme::new(substr.clone(), count, count, config)] diff --git a/src/fsm/dfa.rs b/src/fsm/dfa.rs index 90e02ed..62b9c30 100644 --- a/src/fsm/dfa.rs +++ b/src/fsm/dfa.rs @@ -37,12 +37,18 @@ pub struct Dfa { } impl Dfa { - pub(crate) fn from(grapheme_clusters: Vec, config: &RegExpConfig) -> Self { + pub(crate) fn from( + grapheme_clusters: &[GraphemeCluster], + is_minimized: bool, + config: &RegExpConfig, + ) -> Self { let mut dfa = Self::new(config); for cluster in grapheme_clusters { dfa.insert(cluster); } - dfa.minimize(); + if is_minimized { + dfa.minimize(); + } dfa } @@ -79,17 +85,17 @@ impl Dfa { } } - fn insert(&mut self, cluster: GraphemeCluster) { + fn insert(&mut self, cluster: &GraphemeCluster) { let mut current_state = self.initial_state; for grapheme in cluster.graphemes() { self.alphabet.insert(grapheme.clone()); - current_state = self.get_next_state(current_state, grapheme); + current_state = self.return_next_state(current_state, grapheme); } self.final_state_indices.insert(current_state.index()); } - fn get_next_state(&mut self, current_state: State, edge_label: &Grapheme) -> State { + fn return_next_state(&mut self, current_state: State, edge_label: &Grapheme) -> State { match self.find_next_state(current_state, edge_label) { Some(next_state) => next_state, None => self.add_new_state(current_state, edge_label), @@ -267,7 +273,7 @@ mod tests { let mut dfa = Dfa::new(&config); assert_eq!(dfa.state_count(), 1); - dfa.insert(GraphemeCluster::from("abcd", &RegExpConfig::new())); + dfa.insert(&GraphemeCluster::from("abcd", &RegExpConfig::new())); assert_eq!(dfa.state_count(), 5); } @@ -275,7 +281,8 @@ mod tests { fn test_is_final_state() { let config = RegExpConfig::new(); let dfa = Dfa::from( - vec![GraphemeCluster::from("abcd", &RegExpConfig::new())], + &vec![GraphemeCluster::from("abcd", &RegExpConfig::new())], + true, &config, ); @@ -290,10 +297,11 @@ mod tests { fn test_outgoing_edges() { let config = RegExpConfig::new(); let dfa = Dfa::from( - vec![ + &vec![ GraphemeCluster::from("abcd", &RegExpConfig::new()), GraphemeCluster::from("abxd", &RegExpConfig::new()), ], + true, &config, ); let state = State::new(2); @@ -321,10 +329,11 @@ mod tests { fn test_states_in_depth_first_order() { let config = RegExpConfig::new(); let dfa = Dfa::from( - vec![ + &vec![ GraphemeCluster::from("abcd", &RegExpConfig::new()), GraphemeCluster::from("axyz", &RegExpConfig::new()), ], + true, &config, ); let states = dfa.states_in_depth_first_order(); @@ -394,11 +403,11 @@ mod tests { assert_eq!(dfa.graph.node_count(), 1); assert_eq!(dfa.graph.edge_count(), 0); - dfa.insert(GraphemeCluster::from("abcd", &RegExpConfig::new())); + dfa.insert(&GraphemeCluster::from("abcd", &RegExpConfig::new())); assert_eq!(dfa.graph.node_count(), 5); assert_eq!(dfa.graph.edge_count(), 4); - dfa.insert(GraphemeCluster::from("abxd", &RegExpConfig::new())); + dfa.insert(&GraphemeCluster::from("abxd", &RegExpConfig::new())); assert_eq!(dfa.graph.node_count(), 7); assert_eq!(dfa.graph.edge_count(), 6); @@ -411,10 +420,11 @@ mod tests { fn test_dfa_constructor() { let config = RegExpConfig::new(); let dfa = Dfa::from( - vec![ + &vec![ GraphemeCluster::from("abcd", &RegExpConfig::new()), GraphemeCluster::from("abxd", &RegExpConfig::new()), ], + true, &config, ); assert_eq!(dfa.graph.node_count(), 5); diff --git a/src/lib.rs b/src/lib.rs index cc5883c..7266386 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -99,10 +99,11 @@ //! ### 4.2 Convert to character classes //! //! ``` -//! use grex::{Feature, RegExpBuilder}; +//! use grex::RegExpBuilder; //! //! let regexp = RegExpBuilder::from(&["a", "aa", "123"]) -//! .with_conversion_of(&[Feature::Digit, Feature::Word]) +//! .with_conversion_of_digits() +//! .with_conversion_of_words() //! .build(); //! assert_eq!(regexp, "^(?:\\d\\d\\d|\\w(?:\\w)?)$"); //! ``` @@ -110,10 +111,10 @@ //! ### 4.3 Convert repeated substrings //! //! ``` -//! use grex::{Feature, RegExpBuilder}; +//! use grex::RegExpBuilder; //! //! let regexp = RegExpBuilder::from(&["aa", "bcbc", "defdefdef"]) -//! .with_conversion_of(&[Feature::Repetition]) +//! .with_conversion_of_repetitions() //! .build(); //! assert_eq!(regexp, "^(?:a{2}|(?:bc){2}|(?:def){3})$"); //! ``` @@ -126,10 +127,10 @@ //! substring `a` has a length of 1, but the minimum substring length has been set to 2. //! //! ``` -//! use grex::{Feature, RegExpBuilder}; +//! use grex::RegExpBuilder; //! //! let regexp = RegExpBuilder::from(&["aa", "bcbc", "defdefdef"]) -//! .with_conversion_of(&[Feature::Repetition]) +//! .with_conversion_of_repetitions() //! .with_minimum_substring_length(2) //! .build(); //! assert_eq!(regexp, "^(?:aa|(?:bc){2}|(?:def){3})$"); @@ -139,10 +140,10 @@ //! will be converted because it is the only one that is repeated twice. //! //! ``` -//! use grex::{Feature, RegExpBuilder}; +//! use grex::RegExpBuilder; //! //! let regexp = RegExpBuilder::from(&["aa", "bcbc", "defdefdef"]) -//! .with_conversion_of(&[Feature::Repetition]) +//! .with_conversion_of_repetitions() //! .with_minimum_repetitions(2) //! .build(); //! assert_eq!(regexp, "^(?:bcbc|aa|(?:def){3})$"); @@ -180,10 +181,10 @@ //! Case-insensitive matching can be enabled like so: //! //! ``` -//! use grex::{Feature, RegExpBuilder}; +//! use grex::RegExpBuilder; //! //! let regexp = RegExpBuilder::from(&["big", "BIGGER"]) -//! .with_conversion_of(&[Feature::CaseInsensitivity]) +//! .with_case_insensitive_matching() //! .build(); //! assert_eq!(regexp, "(?i)^big(?:ger)?$"); //! ``` @@ -194,10 +195,11 @@ //! Extending the previous example, you can switch to capturing groups instead. //! //! ``` -//! use grex::{Feature, RegExpBuilder}; +//! use grex::RegExpBuilder; //! //! let regexp = RegExpBuilder::from(&["big", "BIGGER"]) -//! .with_conversion_of(&[Feature::CaseInsensitivity, Feature::CapturingGroup]) +//! .with_case_insensitive_matching() +//! .with_capturing_groups() //! .build(); //! assert_eq!(regexp, "(?i)^big(ger)?$"); //! ``` @@ -231,6 +233,22 @@ //! )); //! ``` //! +//! ### 4.8 Disable anchors +//! +//! By default, the anchors `^` and `$` are put around every generated regular expression in order +//! to ensure that it matches only the test cases given as input. Often enough, however, it is +//! desired to use the generated pattern as part of a larger one. For this purpose, the anchors +//! can be disabled, either separately or both of them. +//! +//! ``` +//! use grex::RegExpBuilder; +//! +//! let regexp = RegExpBuilder::from(&["a", "aa", "aaa"]) +//! .without_anchors() +//! .build(); +//! assert_eq!(regexp, "a(?:aa?)?"); +//! ``` +//! //! ### 5. How does it work? //! //! 1. A [deterministic finite automaton](https://en.wikipedia.org/wiki/Deterministic_finite_automaton) (DFA) @@ -252,5 +270,6 @@ mod fsm; mod regexp; mod unicode_tables; +#[allow(deprecated)] pub use regexp::Feature; pub use regexp::RegExpBuilder; diff --git a/src/main.rs b/src/main.rs index 338d01d..a644dfa 100644 --- a/src/main.rs +++ b/src/main.rs @@ -14,9 +14,9 @@ * limitations under the License. */ -use grex::{Feature, RegExpBuilder}; +use grex::RegExpBuilder; use itertools::Itertools; -use std::io::{Error, ErrorKind}; +use std::io::{BufRead, Error, ErrorKind, Read}; use std::path::PathBuf; use structopt::clap::AppSettings::{AllowLeadingHyphen, ColoredHelp}; use structopt::StructOpt; @@ -178,12 +178,54 @@ struct Cli { )] is_verbose_mode_enabled: bool, + #[structopt( + name = "no-start-anchor", + long, + help = "Removes the caret anchor '^' from the resulting regular expression", + long_help = "Removes the caret anchor '^' from the resulting regular expression.\n\n\ + By default, the caret anchor is added to every generated regular\n\ + expression which guarantees that the expression matches the test cases\n\ + given as input only at the start of a string.\n\ + This flag removes the anchor, thereby allowing to match the test cases also\n\ + when they do not occur at the start of a string.", + display_order = 13 + )] + is_caret_anchor_disabled: bool, + + #[structopt( + name = "no-end-anchor", + long, + help = "Removes the dollar sign anchor '$' from the resulting regular expression", + long_help = "Removes the dollar sign anchor '$' from the resulting regular expression.\n\n\ + By default, the dollar sign anchor is added to every generated regular\n\ + expression which guarantees that the expression matches the test cases\n\ + given as input only at the end of a string.\n\ + This flag removes the anchor, thereby allowing to match the test cases also\n\ + when they do not occur at the end of a string.", + display_order = 14 + )] + is_dollar_sign_anchor_disabled: bool, + + #[structopt( + name = "no-anchors", + long, + help = "Removes the caret and dollar sign anchors from the resulting regular expression", + long_help = "Removes the caret and dollar sign anchors from the resulting regular expression.\n\n\ + By default, anchors are added to every generated regular expression\n\ + which guarantee that the expression exactly matches only the test cases\n\ + given as input and nothing else.\n\ + This flag removes the anchors, thereby allowing to match the test cases also\n\ + when they occur within a larger string that contains other content as well.", + display_order = 15 + )] + are_anchors_disabled: bool, + #[structopt( name = "colorize", short, long, help = "Provides syntax highlighting for the resulting regular expression", - display_order = 13 + display_order = 16 )] is_output_colorized: bool, @@ -234,10 +276,31 @@ fn main() { } fn obtain_input(cli: &Cli) -> Result, Error> { + let is_stdin_available = atty::isnt(atty::Stream::Stdin); + if !cli.input.is_empty() { - Ok(cli.input.clone()) + let is_single_item = cli.input.len() == 1; + let is_hyphen = cli.input.get(0).unwrap() == "-"; + + if is_single_item && is_hyphen && is_stdin_available { + Ok(std::io::stdin() + .lock() + .lines() + .map(|line| line.unwrap()) + .collect_vec()) + } else { + Ok(cli.input.clone()) + } } else if let Some(file_path) = &cli.file_path { - match std::fs::read_to_string(&file_path) { + let is_hyphen = file_path.as_os_str() == "-"; + let path = if is_hyphen && is_stdin_available { + let mut stdin_file_path = String::new(); + std::io::stdin().read_to_string(&mut stdin_file_path)?; + PathBuf::from(stdin_file_path.trim()) + } else { + file_path.to_path_buf() + }; + match std::fs::read_to_string(&path) { Ok(file_content) => Ok(file_content.lines().map(|it| it.to_string()).collect_vec()), Err(error) => Err(error), } @@ -253,46 +316,41 @@ fn handle_input(cli: &Cli, input: Result, Error>) { match input { Ok(test_cases) => { let mut builder = RegExpBuilder::from(&test_cases); - let mut conversion_features = vec![]; if cli.is_digit_converted { - conversion_features.push(Feature::Digit); + builder.with_conversion_of_digits(); } if cli.is_non_digit_converted { - conversion_features.push(Feature::NonDigit); + builder.with_conversion_of_non_digits(); } if cli.is_space_converted { - conversion_features.push(Feature::Space); + builder.with_conversion_of_whitespace(); } if cli.is_non_space_converted { - conversion_features.push(Feature::NonSpace); + builder.with_conversion_of_non_whitespace(); } if cli.is_word_converted { - conversion_features.push(Feature::Word); + builder.with_conversion_of_words(); } if cli.is_non_word_converted { - conversion_features.push(Feature::NonWord); + builder.with_conversion_of_non_words(); } if cli.is_repetition_converted { - conversion_features.push(Feature::Repetition); + builder.with_conversion_of_repetitions(); } if cli.is_case_ignored { - conversion_features.push(Feature::CaseInsensitivity); + builder.with_case_insensitive_matching(); } if cli.is_group_captured { - conversion_features.push(Feature::CapturingGroup); - } - - if !conversion_features.is_empty() { - builder.with_conversion_of(&conversion_features); + builder.with_capturing_groups(); } if cli.is_non_ascii_char_escaped { @@ -305,6 +363,18 @@ fn handle_input(cli: &Cli, input: Result, Error>) { builder.with_verbose_mode(); } + if cli.is_caret_anchor_disabled { + builder.without_start_anchor(); + } + + if cli.is_dollar_sign_anchor_disabled { + builder.without_end_anchor(); + } + + if cli.are_anchors_disabled { + builder.without_anchors(); + } + if cli.is_output_colorized { builder.with_syntax_highlighting(); } diff --git a/src/regexp/builder.rs b/src/regexp/builder.rs index 99a8abb..8a6cdfd 100644 --- a/src/regexp/builder.rs +++ b/src/regexp/builder.rs @@ -14,6 +14,8 @@ * limitations under the License. */ +#![allow(deprecated)] + use crate::regexp::feature::Feature; use crate::regexp::{RegExp, RegExpConfig}; use itertools::Itertools; @@ -74,11 +76,107 @@ impl RegExpBuilder { } } + /// Tells `RegExpBuilder` to convert any Unicode decimal digit to character class `\d`. + /// + /// This method takes precedence over + /// [`with_conversion_of_words`](Self::with_conversion_of_words) if both are set. + /// Decimal digits are converted to `\d`, the remaining word characters to `\w`. + /// + /// This method takes precedence over + /// [`with_conversion_of_non_whitespace`](Self::with_conversion_of_non_whitespace) if both are set. + /// Decimal digits are converted to `\d`, the remaining non-whitespace characters to `\S`. + pub fn with_conversion_of_digits(&mut self) -> &mut Self { + self.config.is_digit_converted = true; + self + } + + /// Tells `RegExpBuilder` to convert any character which is not + /// a Unicode decimal digit to character class `\D`. + /// + /// This method takes precedence over + /// [`with_conversion_of_non_words`](Self::with_conversion_of_non_words) if both are set. + /// Non-digits which are also non-word characters are converted to `\D`. + /// + /// This method takes precedence over + /// [`with_conversion_of_non_whitespace`](Self::with_conversion_of_non_whitespace) if both are set. + /// Non-digits which are also non-space characters are converted to `\D`. + pub fn with_conversion_of_non_digits(&mut self) -> &mut Self { + self.config.is_non_digit_converted = true; + self + } + + /// Tells `RegExpBuilder` to convert any Unicode whitespace character to character class `\s`. + /// + /// This method takes precedence over + /// [`with_conversion_of_non_digits`](Self::with_conversion_of_non_digits) if both are set. + /// Whitespace characters are converted to `\s`, the remaining non-digit characters to `\D`. + /// + /// This method takes precedence over + /// [`with_conversion_of_non_words`](Self::with_conversion_of_non_words) if both are set. + /// Whitespace characters are converted to `\s`, the remaining non-word characters to `\W`. + pub fn with_conversion_of_whitespace(&mut self) -> &mut Self { + self.config.is_space_converted = true; + self + } + + /// Tells `RegExpBuilder` to convert any character which is not + /// a Unicode whitespace character to character class `\S`. + pub fn with_conversion_of_non_whitespace(&mut self) -> &mut Self { + self.config.is_non_space_converted = true; + self + } + + /// Tells `RegExpBuilder` to convert any Unicode word character to character class `\w`. + /// + /// This method takes precedence over + /// [`with_conversion_of_non_digits`](Self::with_conversion_of_non_digits) if both are set. + /// Word characters are converted to `\w`, the remaining non-digit characters to `\D`. + /// + /// This method takes precedence over + /// [`with_conversion_of_non_whitespace`](Self::with_conversion_of_non_whitespace) if both are set. + /// Word characters are converted to `\w`, the remaining non-space characters to `\S`. + pub fn with_conversion_of_words(&mut self) -> &mut Self { + self.config.is_word_converted = true; + self + } + + /// Tells `RegExpBuilder` to convert any character which is not + /// a Unicode word character to character class `\W`. + /// + /// This method takes precedence over + /// [`with_conversion_of_non_whitespace`](Self::with_conversion_of_non_whitespace) if both are set. + /// Non-words which are also non-space characters are converted to `\W`. + pub fn with_conversion_of_non_words(&mut self) -> &mut Self { + self.config.is_non_word_converted = true; + self + } + + /// Tells `RegExpBuilder` to detect repeated non-overlapping substrings and + /// to convert them to `{min,max}` quantifier notation. + pub fn with_conversion_of_repetitions(&mut self) -> &mut Self { + self.config.is_repetition_converted = true; + self + } + + /// Tells `RegExpBuilder` to enable case-insensitive matching of test cases + /// so that letters match both upper and lower case. + pub fn with_case_insensitive_matching(&mut self) -> &mut Self { + self.config.is_case_insensitive_matching = true; + self + } + + /// Tells `RegExpBuilder` to replace non-capturing groups by capturing ones. + pub fn with_capturing_groups(&mut self) -> &mut Self { + self.config.is_capturing_group_enabled = true; + self + } + /// Tells `RegExpBuilder` which conversions should be performed during /// regular expression generation. The available conversion features /// are listed in the [`Feature`](./enum.Feature.html#variants) enum. /// /// ⚠ Panics if `features` is empty. + #[deprecated(since = "1.3.0", note = "This method will be removed in 1.4.0.")] pub fn with_conversion_of(&mut self, features: &[Feature]) -> &mut Self { if features.is_empty() { panic!("No conversion features have been provided for regular expression generation"); @@ -88,9 +186,7 @@ impl RegExpBuilder { } /// Specifies the minimum quantity of substring repetitions to be converted if - /// [`Feature::Repetition`](./enum.Feature.html#variant.Repetition) - /// is set as one of the features in method - /// [`with_conversion_of`](./struct.RegExpBuilder.html#method.with_conversion_of). + /// [`with_conversion_of_repetitions`](Self::with_conversion_of_repetitions) is set. /// /// If the quantity is not explicitly set with this method, a default value of 1 will be used. /// @@ -104,9 +200,7 @@ impl RegExpBuilder { } /// Specifies the minimum length a repeated substring must have in order to be converted if - /// [`Feature::Repetition`](./enum.Feature.html#variant.Repetition) - /// is set as one of the features in method - /// [`with_conversion_of`](./struct.RegExpBuilder.html#method.with_conversion_of). + /// [`with_conversion_of_repetitions`](Self::with_conversion_of_repetitions) is set. /// /// If the length is not explicitly set with this method, a default value of 1 will be used. /// @@ -128,11 +222,37 @@ impl RegExpBuilder { self } + /// Tells `RegExpBuilder` to produce a nicer looking regular expression in verbose mode. pub fn with_verbose_mode(&mut self) -> &mut Self { self.config.is_verbose_mode_enabled = true; self } + /// Tells `RegExpBuilder` to remove the caret anchor '^' from the resulting regular + /// expression, thereby allowing to match the test cases also when they do not occur + /// at the start of a string. + pub fn without_start_anchor(&mut self) -> &mut Self { + self.config.is_start_anchor_disabled = true; + self + } + + /// Tells `RegExpBuilder` to remove the dollar sign anchor '$' from the resulting regular + /// expression, thereby allowing to match the test cases also when they do not occur + /// at the end of a string. + pub fn without_end_anchor(&mut self) -> &mut Self { + self.config.is_end_anchor_disabled = true; + self + } + + /// Tells `RegExpBuilder` to remove the caret and dollar sign anchors from the resulting + /// regular expression, thereby allowing to match the test cases also when they occur + /// within a larger string that contains other content as well. + pub fn without_anchors(&mut self) -> &mut Self { + self.config.is_start_anchor_disabled = true; + self.config.is_end_anchor_disabled = true; + self + } + /// Tells `RegExpBuilder` to provide syntax highlighting for the resulting regular expression. /// /// ⚠ This method may only be used if the resulting regular expression is meant to diff --git a/src/regexp/config.rs b/src/regexp/config.rs index 95ff71e..2042fe6 100644 --- a/src/regexp/config.rs +++ b/src/regexp/config.rs @@ -14,6 +14,8 @@ * limitations under the License. */ +#![allow(deprecated)] + use crate::regexp::Feature; #[derive(Clone, Debug, Hash, Ord, PartialOrd, Eq, PartialEq)] @@ -21,9 +23,20 @@ pub struct RegExpConfig { pub(crate) conversion_features: Vec, pub(crate) minimum_repetitions: u32, pub(crate) minimum_substring_length: u32, + pub(crate) is_digit_converted: bool, + pub(crate) is_non_digit_converted: bool, + pub(crate) is_space_converted: bool, + pub(crate) is_non_space_converted: bool, + pub(crate) is_word_converted: bool, + pub(crate) is_non_word_converted: bool, + pub(crate) is_repetition_converted: bool, + pub(crate) is_case_insensitive_matching: bool, + pub(crate) is_capturing_group_enabled: bool, pub(crate) is_non_ascii_char_escaped: bool, pub(crate) is_astral_code_point_converted_to_surrogate: bool, pub(crate) is_verbose_mode_enabled: bool, + pub(crate) is_start_anchor_disabled: bool, + pub(crate) is_end_anchor_disabled: bool, pub(crate) is_output_colorized: bool, } @@ -33,51 +46,73 @@ impl RegExpConfig { conversion_features: vec![], minimum_repetitions: 1, minimum_substring_length: 1, + is_digit_converted: false, + is_non_digit_converted: false, + is_space_converted: false, + is_non_space_converted: false, + is_word_converted: false, + is_non_word_converted: false, + is_repetition_converted: false, + is_case_insensitive_matching: false, + is_capturing_group_enabled: false, is_non_ascii_char_escaped: false, is_astral_code_point_converted_to_surrogate: false, is_verbose_mode_enabled: false, + is_start_anchor_disabled: false, + is_end_anchor_disabled: false, is_output_colorized: false, } } pub(crate) fn is_digit_converted(&self) -> bool { - self.conversion_features.contains(&Feature::Digit) + self.is_digit_converted || self.conversion_features.contains(&Feature::Digit) } pub(crate) fn is_non_digit_converted(&self) -> bool { - self.conversion_features.contains(&Feature::NonDigit) + self.is_non_digit_converted || self.conversion_features.contains(&Feature::NonDigit) } pub(crate) fn is_space_converted(&self) -> bool { - self.conversion_features.contains(&Feature::Space) + self.is_space_converted || self.conversion_features.contains(&Feature::Space) } pub(crate) fn is_non_space_converted(&self) -> bool { - self.conversion_features.contains(&Feature::NonSpace) + self.is_non_space_converted || self.conversion_features.contains(&Feature::NonSpace) } pub(crate) fn is_word_converted(&self) -> bool { - self.conversion_features.contains(&Feature::Word) + self.is_word_converted || self.conversion_features.contains(&Feature::Word) } pub(crate) fn is_non_word_converted(&self) -> bool { - self.conversion_features.contains(&Feature::NonWord) + self.is_non_word_converted || self.conversion_features.contains(&Feature::NonWord) } pub(crate) fn is_repetition_converted(&self) -> bool { - self.conversion_features.contains(&Feature::Repetition) + self.is_repetition_converted || self.conversion_features.contains(&Feature::Repetition) } pub(crate) fn is_case_insensitive_matching(&self) -> bool { - self.conversion_features - .contains(&Feature::CaseInsensitivity) + self.is_case_insensitive_matching + || self + .conversion_features + .contains(&Feature::CaseInsensitivity) } pub(crate) fn is_capturing_group_enabled(&self) -> bool { - self.conversion_features.contains(&Feature::CapturingGroup) + self.is_capturing_group_enabled + || self.conversion_features.contains(&Feature::CapturingGroup) } pub(crate) fn is_char_class_feature_enabled(&self) -> bool { - self.conversion_features.iter().any(|it| it.is_char_class()) + self.is_digit_converted + || self.is_non_digit_converted + || self.is_space_converted + || self.is_non_space_converted + || self.is_word_converted + || self.is_non_word_converted + || self.is_case_insensitive_matching + || self.is_capturing_group_enabled + || self.conversion_features.iter().any(|it| it.is_char_class()) } } diff --git a/src/regexp/feature.rs b/src/regexp/feature.rs index 1ffc739..e6c9bad 100644 --- a/src/regexp/feature.rs +++ b/src/regexp/feature.rs @@ -14,18 +14,21 @@ * limitations under the License. */ +#![allow(deprecated)] + /// This enum specifies the supported conversion features which can be passed to method /// [`RegExpBuilder.with_conversion_of`](./struct.RegExpBuilder.html#method.with_conversion_of). #[derive(Clone, Debug, Hash, Ord, PartialOrd, Eq, PartialEq)] +#[deprecated(since = "1.3.0", note = "This enum will be removed in 1.4.0.")] pub enum Feature { /// This feature converts any Unicode decimal digit to character class `\d`. /// /// It takes precedence over the - /// [`Word`](./enum.Feature.html#variant.Word) feature if both are set. + /// [`Word`](Feature::Word) feature if both are set. /// Decimal digits are converted to `\d`, the remaining word characters to `\w`. /// /// It takes precedence over the - /// [`NonSpace`](./enum.Feature.html#variant.NonSpace) feature if both are set. + /// [`NonSpace`](Feature::NonSpace) feature if both are set. /// Decimal digits are converted to `\d`, the remaining non-whitespace characters to `\S`. Digit, @@ -33,22 +36,22 @@ pub enum Feature { /// a Unicode decimal digit to character class `\D`. /// /// It takes precedence over the - /// [`NonWord`](./enum.Feature.html#variant.NonWord) feature if both are set. + /// [`NonWord`](Feature::NonWord) feature if both are set. /// Non-digits which are also non-word characters are converted to `\D`. /// /// It takes precedence over the - /// [`NonSpace`](./enum.Feature.html#variant.NonSpace) feature if both are set. + /// [`NonSpace`](Feature::NonSpace) feature if both are set. /// Non-digits which are also non-space characters are converted to `\D`. NonDigit, /// This feature converts any Unicode whitespace character to character class `\s`. /// /// It takes precedence over the - /// [`NonDigit`](./enum.Feature.html#variant.NonDigit) feature if both are set. + /// [`NonDigit`](Feature::NonDigit) feature if both are set. /// Whitespace characters are converted to `\s`, the remaining non-digit characters to `\D`. /// /// It takes precedence over the - /// [`NonWord`](./enum.Feature.html#variant.NonWord) feature if both are set. + /// [`NonWord`](Feature::NonWord) feature if both are set. /// Whitespace characters are converted to `\s`, the remaining non-word characters to `\W`. Space, @@ -59,11 +62,11 @@ pub enum Feature { /// This feature converts any Unicode word character to character class `\w`. /// /// It takes precedence over the - /// [`NonDigit`](./enum.Feature.html#variant.NonDigit) feature if both are set. + /// [`NonDigit`](Feature::NonDigit) feature if both are set. /// Word characters are converted to `\w`, the remaining non-digit characters to `\D`. /// /// It takes precedence over the - /// [`NonSpace`](./enum.Feature.html#variant.NonSpace) feature if both are set. + /// [`NonSpace`](Feature::NonSpace) feature if both are set. /// Word characters are converted to `\w`, the remaining non-space characters to `\S`. Word, @@ -71,7 +74,7 @@ pub enum Feature { /// a Unicode word character to character class `\W`. /// /// It takes precedence over the - /// [`NonSpace`](./enum.Feature.html#variant.NonSpace) feature if both are set. + /// [`NonSpace`](Feature::NonSpace) feature if both are set. /// Non-words which are also non-space characters are converted to `\W`. NonWord, diff --git a/src/regexp/mod.rs b/src/regexp/mod.rs index a4132bf..9b46185 100644 --- a/src/regexp/mod.rs +++ b/src/regexp/mod.rs @@ -14,6 +14,8 @@ * limitations under the License. */ +#![allow(deprecated)] + mod builder; mod component; mod config; diff --git a/src/regexp/regexp.rs b/src/regexp/regexp.rs index a2b0d4d..e394d03 100644 --- a/src/regexp/regexp.rs +++ b/src/regexp/regexp.rs @@ -36,9 +36,18 @@ impl RegExp { Self::convert_to_lowercase(test_cases); } Self::sort(test_cases); - let grapheme_clusters = Self::grapheme_clusters(&test_cases, config); - let dfa = Dfa::from(grapheme_clusters, config); - let ast = Expression::from(dfa, config); + let grapheme_clusters = Self::grapheme_clusters(test_cases, config); + let mut dfa = Dfa::from(&grapheme_clusters, true, config); + let mut ast = Expression::from(dfa, config); + + if config.is_start_anchor_disabled + && config.is_end_anchor_disabled + && !Self::is_each_test_case_matched(&mut ast, test_cases, config) + { + dfa = Dfa::from(&grapheme_clusters, false, config); + ast = Expression::from(dfa, config); + } + Self { ast, config: config.clone(), @@ -53,7 +62,7 @@ impl RegExp { test_cases.sort(); test_cases.dedup(); test_cases.sort_by(|a, b| match a.len().cmp(&b.len()) { - Ordering::Equal => a.cmp(&b), + Ordering::Equal => a.cmp(b), other => other, }); } @@ -78,6 +87,40 @@ impl RegExp { clusters } + + fn is_each_test_case_matched( + expr: &mut Expression, + test_cases: &[String], + config: &RegExpConfig, + ) -> bool { + let regex = if config.is_output_colorized { + let color_replace_regex = Regex::new("\u{1b}\\[(?:\\d+;\\d+|0)m").unwrap(); + Regex::new(&*color_replace_regex.replace_all(&expr.to_string(), "")).unwrap() + } else { + Regex::new(&expr.to_string()).unwrap() + }; + + for _ in 1..test_cases.len() { + if test_cases + .iter() + .all(|test_case| regex.find_iter(test_case).count() == 1) + { + return true; + } else if let Expression::Alternation(options, _) = expr { + options.rotate_right(1); + } else if let Expression::Concatenation(first, second, _) = expr { + let a: &mut Expression = first; + let b: &mut Expression = second; + + if let Expression::Alternation(options, _) = a { + options.rotate_right(1); + } else if let Expression::Alternation(options, _) = b { + options.rotate_right(1); + } + } + } + false + } } impl Display for RegExp { @@ -87,8 +130,16 @@ impl Display for RegExp { } else { String::new() }; - let caret = Component::Caret.to_repr(self.config.is_output_colorized); - let dollar_sign = Component::DollarSign.to_repr(self.config.is_output_colorized); + let caret = if self.config.is_start_anchor_disabled { + String::new() + } else { + Component::Caret.to_repr(self.config.is_output_colorized) + }; + let dollar_sign = if self.config.is_end_anchor_disabled { + String::new() + } else { + Component::DollarSign.to_repr(self.config.is_output_colorized) + }; let mut regexp = match self.ast { Expression::Alternation(_, _) => { format!( @@ -238,7 +289,11 @@ fn apply_verbose_mode(regexp: String, config: &RegExpConfig) -> String { }; let mut verbose_regexp = vec![verbose_mode_flag]; - let mut nesting_level = 0; + let mut nesting_level = if config.is_start_anchor_disabled { + 1 + } else { + 0 + }; let regexp_with_replacements = regexp .replace( @@ -254,6 +309,7 @@ fn apply_verbose_mode(regexp: String, config: &RegExpConfig) -> String { .replace(" ", "\\s") .replace(" ", "\\s") .replace("\u{85}", "\\s") + .replace("\u{2005}", "\\s") .replace("\u{2028}", "\\s") .replace(" ", "\\ "); diff --git a/tests/cli_integration_tests.rs b/tests/cli_integration_tests.rs index e810208..f9e0463 100644 --- a/tests/cli_integration_tests.rs +++ b/tests/cli_integration_tests.rs @@ -14,11 +14,10 @@ * limitations under the License. */ -use assert_cmd::prelude::*; +use assert_cmd::Command; use indoc::indoc; use predicates::prelude::*; use std::io::Write; -use std::process::Command; use tempfile::NamedTempFile; const TEST_CASE: &str = "I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."; @@ -128,6 +127,27 @@ mod no_conversion { .stdout(predicate::eq("^(?:b\\\\n|äöü|[ac♥])$\n")); } + #[test] + fn succeeds_with_test_cases_from_stdin() { + let mut grex = init_command(); + grex.write_stdin("a\nb\\n\n\nc\näöü\n♥") + .arg("-") + .assert() + .stdout(predicate::eq("^(?:b\\\\n|äöü|[ac♥])$\n")); + } + + #[test] + fn succeeds_with_file_from_stdin() { + let mut file = NamedTempFile::new().unwrap(); + writeln!(file, "a\nb\\n\n\nc\näöü\n♥").unwrap(); + + let mut grex = init_command(); + grex.write_stdin(file.path().to_str().unwrap()) + .args(&["-f", "-"]) + .assert() + .stdout(predicate::eq("^(?:b\\\\n|äöü|[ac♥])$\n")); + } + #[test] fn fails_with_surrogate_but_without_escape_option() { let mut grex = init_command(); @@ -855,7 +875,7 @@ mod word_conversion { let mut grex = init_command(); grex.args(&["--repetitions", "--words", TEST_CASE]); grex.assert().success().stdout(predicate::eq( - "^\\w {3}♥{3} \\w{2} \\w{3} \\w \\w{3} \\w{4} \\w{3} 💩{2}\\.$\n", + "^\\w {3}♥{3} \\w{2}(?: \\w{3} \\w){2}(?:\\w{3} ){2}💩{2}\\.$\n", )); } @@ -864,7 +884,7 @@ mod word_conversion { let mut grex = init_command(); grex.args(&["--repetitions", "--words", "--escape", TEST_CASE]); grex.assert().success().stdout(predicate::eq( - "^\\w {3}\\u{2665}{3} \\w{2} \\w{3} \\w \\w{3} \\w{4} \\w{3} \\u{1f4a9}{2}\\.$\n", + "^\\w {3}\\u{2665}{3} \\w{2}(?: \\w{3} \\w){2}(?:\\w{3} ){2}\\u{1f4a9}{2}\\.$\n", )); } @@ -879,7 +899,7 @@ mod word_conversion { TEST_CASE, ]); grex.assert().success().stdout(predicate::eq( - "^\\w {3}\\u{2665}{3} \\w{2} \\w{3} \\w \\w{3} \\w{4} \\w{3} (?:\\u{d83d}\\u{dca9}){2}\\.$\n", + "^\\w {3}\\u{2665}{3} \\w{2}(?: \\w{3} \\w){2}(?:\\w{3} ){2}(?:\\u{d83d}\\u{dca9}){2}\\.$\n", )); } @@ -891,7 +911,14 @@ mod word_conversion { r#" (?x) ^ - \w\ {3}♥{3}\ \w{2}\ \w{3}\ \w\ \w{3}\ \w{4}\ \w{3}\ 💩{2}\. + \w\ {3}♥{3}\ \w{2} + (?: + \ \w{3}\ \w + ){2} + (?: + \w{3}\ + ){2} + 💩{2}\. $ "# ))); @@ -911,7 +938,14 @@ mod word_conversion { r#" (?x) ^ - \w\ {3}\u{2665}{3}\ \w{2}\ \w{3}\ \w\ \w{3}\ \w{4}\ \w{3}\ \u{1f4a9}{2}\. + \w\ {3}\u{2665}{3}\ \w{2} + (?: + \ \w{3}\ \w + ){2} + (?: + \w{3}\ + ){2} + \u{1f4a9}{2}\. $ "# ))); @@ -932,7 +966,13 @@ mod word_conversion { r#" (?x) ^ - \w\ {3}\u{2665}{3}\ \w{2}\ \w{3}\ \w\ \w{3}\ \w{4}\ \w{3}\ + \w\ {3}\u{2665}{3}\ \w{2} + (?: + \ \w{3}\ \w + ){2} + (?: + \w{3}\ + ){2} (?: \u{d83d}\u{dca9} ){2} @@ -1263,7 +1303,7 @@ mod digit_word_conversion { let mut grex = init_command(); grex.args(&["--repetitions", "--digits", "--words", TEST_CASE]); grex.assert().success().stdout(predicate::eq( - "^\\w {3}♥{3} \\d(?:\\d \\w{3} ){2}\\w{4} \\w{3} 💩{2}\\.$\n", + "^\\w {3}♥{3} \\d(?:\\d \\w{3} ){2}\\w(?:\\w{3} ){2}💩{2}\\.$\n", )); } @@ -1278,7 +1318,7 @@ mod digit_word_conversion { TEST_CASE, ]); grex.assert().success().stdout(predicate::eq( - "^\\w {3}\\u{2665}{3} \\d(?:\\d \\w{3} ){2}\\w{4} \\w{3} \\u{1f4a9}{2}\\.$\n", + "^\\w {3}\\u{2665}{3} \\d(?:\\d \\w{3} ){2}\\w(?:\\w{3} ){2}\\u{1f4a9}{2}\\.$\n", )); } @@ -1294,7 +1334,7 @@ mod digit_word_conversion { TEST_CASE, ]); grex.assert().success().stdout(predicate::eq( - "^\\w {3}\\u{2665}{3} \\d(?:\\d \\w{3} ){2}\\w{4} \\w{3} (?:\\u{d83d}\\u{dca9}){2}\\.$\n", + "^\\w {3}\\u{2665}{3} \\d(?:\\d \\w{3} ){2}\\w(?:\\w{3} ){2}(?:\\u{d83d}\\u{dca9}){2}\\.$\n", )); } @@ -1316,7 +1356,11 @@ mod digit_word_conversion { (?: \d\ \w{3}\ ){2} - \w{4}\ \w{3}\ 💩{2}\. + \w + (?: + \w{3}\ + ){2} + 💩{2}\. $ "# ))); @@ -1341,7 +1385,11 @@ mod digit_word_conversion { (?: \d\ \w{3}\ ){2} - \w{4}\ \w{3}\ \u{1f4a9}{2}\. + \w + (?: + \w{3}\ + ){2} + \u{1f4a9}{2}\. $ "# ))); @@ -1367,7 +1415,10 @@ mod digit_word_conversion { (?: \d\ \w{3}\ ){2} - \w{4}\ \w{3}\ + \w + (?: + \w{3}\ + ){2} (?: \u{d83d}\u{dca9} ){2} @@ -1476,7 +1527,7 @@ mod space_word_conversion { let mut grex = init_command(); grex.args(&["--repetitions", "--words", "--spaces", TEST_CASE]); grex.assert().success().stdout(predicate::eq( - "^\\w\\s{3}♥{3}\\s\\w{2}\\s\\w{3}\\s\\w\\s\\w{3}\\s\\w{4}\\s\\w{3}\\s💩{2}\\.$\n", + "^\\w\\s{3}♥{3}\\s\\w{2}(?:\\s\\w{3}\\s\\w){2}(?:\\w{3}\\s){2}💩{2}\\.$\n", )); } @@ -1491,7 +1542,7 @@ mod space_word_conversion { TEST_CASE, ]); grex.assert().success().stdout(predicate::eq( - "^\\w\\s{3}\\u{2665}{3}\\s\\w{2}\\s\\w{3}\\s\\w\\s\\w{3}\\s\\w{4}\\s\\w{3}\\s\\u{1f4a9}{2}\\.$\n", + "^\\w\\s{3}\\u{2665}{3}\\s\\w{2}(?:\\s\\w{3}\\s\\w){2}(?:\\w{3}\\s){2}\\u{1f4a9}{2}\\.$\n", )); } @@ -1507,7 +1558,7 @@ mod space_word_conversion { TEST_CASE, ]); grex.assert().success().stdout(predicate::eq( - "^\\w\\s{3}\\u{2665}{3}\\s\\w{2}\\s\\w{3}\\s\\w\\s\\w{3}\\s\\w{4}\\s\\w{3}\\s(?:\\u{d83d}\\u{dca9}){2}\\.$\n", + "^\\w\\s{3}\\u{2665}{3}\\s\\w{2}(?:\\s\\w{3}\\s\\w){2}(?:\\w{3}\\s){2}(?:\\u{d83d}\\u{dca9}){2}\\.$\n", )); } @@ -1525,7 +1576,14 @@ mod space_word_conversion { r#" (?x) ^ - \w\s{3}♥{3}\s\w{2}\s\w{3}\s\w\s\w{3}\s\w{4}\s\w{3}\s💩{2}\. + \w\s{3}♥{3}\s\w{2} + (?: + \s\w{3}\s\w + ){2} + (?: + \w{3}\s + ){2} + 💩{2}\. $ "# ))); @@ -1546,7 +1604,14 @@ mod space_word_conversion { r#" (?x) ^ - \w\s{3}\u{2665}{3}\s\w{2}\s\w{3}\s\w\s\w{3}\s\w{4}\s\w{3}\s\u{1f4a9}{2}\. + \w\s{3}\u{2665}{3}\s\w{2} + (?: + \s\w{3}\s\w + ){2} + (?: + \w{3}\s + ){2} + \u{1f4a9}{2}\. $ "# ))); @@ -1568,7 +1633,13 @@ mod space_word_conversion { r#" (?x) ^ - \w\s{3}\u{2665}{3}\s\w{2}\s\w{3}\s\w\s\w{3}\s\w{4}\s\w{3}\s + \w\s{3}\u{2665}{3}\s\w{2} + (?: + \s\w{3}\s\w + ){2} + (?: + \w{3}\s + ){2} (?: \u{d83d}\u{dca9} ){2} @@ -1692,7 +1763,7 @@ mod digit_space_word_conversion { TEST_CASE, ]); grex.assert().success().stdout(predicate::eq( - "^\\w\\s{3}♥{3}\\s\\d(?:\\d\\s\\w{3}\\s){2}\\w{4}\\s\\w{3}\\s💩{2}\\.$\n", + "^\\w\\s{3}♥{3}\\s\\d(?:\\d\\s\\w{3}\\s){2}\\w(?:\\w{3}\\s){2}💩{2}\\.$\n", )); } @@ -1708,7 +1779,7 @@ mod digit_space_word_conversion { TEST_CASE, ]); grex.assert().success().stdout(predicate::eq( - "^\\w\\s{3}\\u{2665}{3}\\s\\d(?:\\d\\s\\w{3}\\s){2}\\w{4}\\s\\w{3}\\s\\u{1f4a9}{2}\\.$\n", + "^\\w\\s{3}\\u{2665}{3}\\s\\d(?:\\d\\s\\w{3}\\s){2}\\w(?:\\w{3}\\s){2}\\u{1f4a9}{2}\\.$\n", )); } @@ -1725,7 +1796,7 @@ mod digit_space_word_conversion { TEST_CASE, ]); grex.assert().success().stdout(predicate::eq( - "^\\w\\s{3}\\u{2665}{3}\\s\\d(?:\\d\\s\\w{3}\\s){2}\\w{4}\\s\\w{3}\\s(?:\\u{d83d}\\u{dca9}){2}\\.$\n", + "^\\w\\s{3}\\u{2665}{3}\\s\\d(?:\\d\\s\\w{3}\\s){2}\\w(?:\\w{3}\\s){2}(?:\\u{d83d}\\u{dca9}){2}\\.$\n", )); } @@ -1748,7 +1819,11 @@ mod digit_space_word_conversion { (?: \d\s\w{3}\s ){2} - \w{4}\s\w{3}\s💩{2}\. + \w + (?: + \w{3}\s + ){2} + 💩{2}\. $ "# ))); @@ -1774,7 +1849,11 @@ mod digit_space_word_conversion { (?: \d\s\w{3}\s ){2} - \w{4}\s\w{3}\s\u{1f4a9}{2}\. + \w + (?: + \w{3}\s + ){2} + \u{1f4a9}{2}\. $ "# ))); @@ -1801,7 +1880,10 @@ mod digit_space_word_conversion { (?: \d\s\w{3}\s ){2} - \w{4}\s\w{3}\s + \w + (?: + \w{3}\s + ){2} (?: \u{d83d}\u{dca9} ){2} @@ -2078,7 +2160,7 @@ mod non_space_conversion { let mut grex = init_command(); grex.args(&["--repetitions", "--non-spaces", TEST_CASE]); grex.assert().success().stdout(predicate::eq( - "^\\S {3}\\S{3} \\S{2} \\S{3} \\S \\S{3} \\S{4} \\S{3} \\S{3}$\n", + "^\\S {3}\\S(?:\\S{2} ){2}\\S{3} (?:\\S(?: \\S{3}){2}){2}$\n", )); } @@ -2087,7 +2169,7 @@ mod non_space_conversion { let mut grex = init_command(); grex.args(&["--repetitions", "--non-spaces", "--escape", TEST_CASE]); grex.assert().success().stdout(predicate::eq( - "^\\S {3}\\S{3} \\S{2} \\S{3} \\S \\S{3} \\S{4} \\S{3} \\S{3}$\n", + "^\\S {3}\\S(?:\\S{2} ){2}\\S{3} (?:\\S(?: \\S{3}){2}){2}$\n", )); } @@ -2102,7 +2184,7 @@ mod non_space_conversion { TEST_CASE, ]); grex.assert().success().stdout(predicate::eq( - "^\\S {3}\\S{3} \\S{2} \\S{3} \\S \\S{3} \\S{4} \\S{3} \\S{3}$\n", + "^\\S {3}\\S(?:\\S{2} ){2}\\S{3} (?:\\S(?: \\S{3}){2}){2}$\n", )); } @@ -2114,7 +2196,17 @@ mod non_space_conversion { r#" (?x) ^ - \S\ {3}\S{3}\ \S{2}\ \S{3}\ \S\ \S{3}\ \S{4}\ \S{3}\ \S{3} + \S\ {3}\S + (?: + \S{2}\ + ){2} + \S{3}\ + (?: + \S + (?: + \ \S{3} + ){2} + ){2} $ "# ))); @@ -2134,7 +2226,17 @@ mod non_space_conversion { r#" (?x) ^ - \S\ {3}\S{3}\ \S{2}\ \S{3}\ \S\ \S{3}\ \S{4}\ \S{3}\ \S{3} + \S\ {3}\S + (?: + \S{2}\ + ){2} + \S{3}\ + (?: + \S + (?: + \ \S{3} + ){2} + ){2} $ "# ))); @@ -2155,7 +2257,17 @@ mod non_space_conversion { r#" (?x) ^ - \S\ {3}\S{3}\ \S{2}\ \S{3}\ \S\ \S{3}\ \S{4}\ \S{3}\ \S{3} + \S\ {3}\S + (?: + \S{2}\ + ){2} + \S{3}\ + (?: + \S + (?: + \ \S{3} + ){2} + ){2} $ "# ))); @@ -2859,7 +2971,7 @@ mod non_space_non_word_conversion { let mut grex = init_command(); grex.args(&["--repetitions", "--non-spaces", "--non-words", TEST_CASE]); grex.assert().success().stdout(predicate::eq( - "^\\S\\W{7}\\S{2}\\W\\S{3}\\W\\S\\W\\S{3}\\W\\S{4}\\W\\S{3}\\W{4}$\n", + "^\\S\\W{7}\\S(?:\\S\\W\\S{3}\\W){2}\\S{4}\\W\\S{3}\\W{4}$\n", )); } @@ -2874,7 +2986,7 @@ mod non_space_non_word_conversion { TEST_CASE, ]); grex.assert().success().stdout(predicate::eq( - "^\\S\\W{7}\\S{2}\\W\\S{3}\\W\\S\\W\\S{3}\\W\\S{4}\\W\\S{3}\\W{4}$\n", + "^\\S\\W{7}\\S(?:\\S\\W\\S{3}\\W){2}\\S{4}\\W\\S{3}\\W{4}$\n", )); } @@ -2890,7 +3002,7 @@ mod non_space_non_word_conversion { TEST_CASE, ]); grex.assert().success().stdout(predicate::eq( - "^\\S\\W{7}\\S{2}\\W\\S{3}\\W\\S\\W\\S{3}\\W\\S{4}\\W\\S{3}\\W{4}$\n", + "^\\S\\W{7}\\S(?:\\S\\W\\S{3}\\W){2}\\S{4}\\W\\S{3}\\W{4}$\n", )); } @@ -2908,7 +3020,11 @@ mod non_space_non_word_conversion { r#" (?x) ^ - \S\W{7}\S{2}\W\S{3}\W\S\W\S{3}\W\S{4}\W\S{3}\W{4} + \S\W{7}\S + (?: + \S\W\S{3}\W + ){2} + \S{4}\W\S{3}\W{4} $ "# ))); @@ -2929,7 +3045,11 @@ mod non_space_non_word_conversion { r#" (?x) ^ - \S\W{7}\S{2}\W\S{3}\W\S\W\S{3}\W\S{4}\W\S{3}\W{4} + \S\W{7}\S + (?: + \S\W\S{3}\W + ){2} + \S{4}\W\S{3}\W{4} $ "# ))); @@ -2951,7 +3071,11 @@ mod non_space_non_word_conversion { r#" (?x) ^ - \S\W{7}\S{2}\W\S{3}\W\S\W\S{3}\W\S{4}\W\S{3}\W{4} + \S\W{7}\S + (?: + \S\W\S{3}\W + ){2} + \S{4}\W\S{3}\W{4} $ "# ))); @@ -3494,7 +3618,7 @@ mod space_non_space_conversion { let mut grex = init_command(); grex.args(&["--repetitions", "--spaces", "--non-spaces", TEST_CASE]); grex.assert().success().stdout(predicate::eq( - "^\\S\\s{3}\\S{3}\\s\\S{2}\\s\\S{3}\\s\\S\\s\\S{3}\\s\\S{4}\\s\\S{3}\\s\\S{3}$\n", + "^\\S\\s{3}\\S(?:\\S{2}\\s){2}\\S{3}\\s(?:\\S(?:\\s\\S{3}){2}){2}$\n", )); } @@ -3509,7 +3633,7 @@ mod space_non_space_conversion { TEST_CASE, ]); grex.assert().success().stdout(predicate::eq( - "^\\S\\s{3}\\S{3}\\s\\S{2}\\s\\S{3}\\s\\S\\s\\S{3}\\s\\S{4}\\s\\S{3}\\s\\S{3}$\n", + "^\\S\\s{3}\\S(?:\\S{2}\\s){2}\\S{3}\\s(?:\\S(?:\\s\\S{3}){2}){2}$\n", )); } @@ -3525,7 +3649,7 @@ mod space_non_space_conversion { TEST_CASE, ]); grex.assert().success().stdout(predicate::eq( - "^\\S\\s{3}\\S{3}\\s\\S{2}\\s\\S{3}\\s\\S\\s\\S{3}\\s\\S{4}\\s\\S{3}\\s\\S{3}$\n", + "^\\S\\s{3}\\S(?:\\S{2}\\s){2}\\S{3}\\s(?:\\S(?:\\s\\S{3}){2}){2}$\n", )); } @@ -3543,7 +3667,17 @@ mod space_non_space_conversion { r#" (?x) ^ - \S\s{3}\S{3}\s\S{2}\s\S{3}\s\S\s\S{3}\s\S{4}\s\S{3}\s\S{3} + \S\s{3}\S + (?: + \S{2}\s + ){2} + \S{3}\s + (?: + \S + (?: + \s\S{3} + ){2} + ){2} $ "# ))); @@ -3564,7 +3698,17 @@ mod space_non_space_conversion { r#" (?x) ^ - \S\s{3}\S{3}\s\S{2}\s\S{3}\s\S\s\S{3}\s\S{4}\s\S{3}\s\S{3} + \S\s{3}\S + (?: + \S{2}\s + ){2} + \S{3}\s + (?: + \S + (?: + \s\S{3} + ){2} + ){2} $ "# ))); @@ -3586,7 +3730,17 @@ mod space_non_space_conversion { r#" (?x) ^ - \S\s{3}\S{3}\s\S{2}\s\S{3}\s\S\s\S{3}\s\S{4}\s\S{3}\s\S{3} + \S\s{3}\S + (?: + \S{2}\s + ){2} + \S{3}\s + (?: + \S + (?: + \s\S{3} + ){2} + ){2} $ "# ))); @@ -3691,7 +3845,7 @@ mod word_non_word_conversion { let mut grex = init_command(); grex.args(&["--repetitions", "--words", "--non-words", TEST_CASE]); grex.assert().success().stdout(predicate::eq( - "^\\w\\W{7}\\w{2}\\W\\w{3}\\W\\w\\W\\w{3}\\W\\w{4}\\W\\w{3}\\W{4}$\n", + "^\\w\\W{7}\\w(?:\\w\\W\\w{3}\\W){2}\\w{4}\\W\\w{3}\\W{4}$\n", )); } @@ -3706,7 +3860,7 @@ mod word_non_word_conversion { TEST_CASE, ]); grex.assert().success().stdout(predicate::eq( - "^\\w\\W{7}\\w{2}\\W\\w{3}\\W\\w\\W\\w{3}\\W\\w{4}\\W\\w{3}\\W{4}$\n", + "^\\w\\W{7}\\w(?:\\w\\W\\w{3}\\W){2}\\w{4}\\W\\w{3}\\W{4}$\n", )); } @@ -3722,7 +3876,7 @@ mod word_non_word_conversion { TEST_CASE, ]); grex.assert().success().stdout(predicate::eq( - "^\\w\\W{7}\\w{2}\\W\\w{3}\\W\\w\\W\\w{3}\\W\\w{4}\\W\\w{3}\\W{4}$\n", + "^\\w\\W{7}\\w(?:\\w\\W\\w{3}\\W){2}\\w{4}\\W\\w{3}\\W{4}$\n", )); } @@ -3740,7 +3894,11 @@ mod word_non_word_conversion { r#" (?x) ^ - \w\W{7}\w{2}\W\w{3}\W\w\W\w{3}\W\w{4}\W\w{3}\W{4} + \w\W{7}\w + (?: + \w\W\w{3}\W + ){2} + \w{4}\W\w{3}\W{4} $ "# ))); @@ -3761,7 +3919,11 @@ mod word_non_word_conversion { r#" (?x) ^ - \w\W{7}\w{2}\W\w{3}\W\w\W\w{3}\W\w{4}\W\w{3}\W{4} + \w\W{7}\w + (?: + \w\W\w{3}\W + ){2} + \w{4}\W\w{3}\W{4} $ "# ))); @@ -3783,7 +3945,11 @@ mod word_non_word_conversion { r#" (?x) ^ - \w\W{7}\w{2}\W\w{3}\W\w\W\w{3}\W\w{4}\W\w{3}\W{4} + \w\W{7}\w + (?: + \w\W\w{3}\W + ){2} + \w{4}\W\w{3}\W{4} $ "# ))); @@ -3791,6 +3957,83 @@ mod word_non_word_conversion { } } +mod anchor_conversion { + use super::*; + + mod no_verbose { + use super::*; + + #[test] + fn succeeds_with_no_start_anchor_option() { + let mut grex = init_command(); + grex.args(&["--no-start-anchor", TEST_CASE]); + grex.assert() + .success() + .stdout(predicate::eq("I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩\\.$\n")); + } + + #[test] + fn succeeds_with_no_end_anchor_option() { + let mut grex = init_command(); + grex.args(&["--no-end-anchor", TEST_CASE]); + grex.assert() + .success() + .stdout(predicate::eq("^I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩\\.\n")); + } + + #[test] + fn succeeds_with_no_anchors_option() { + let mut grex = init_command(); + grex.args(&["--no-anchors", TEST_CASE]); + grex.assert() + .success() + .stdout(predicate::eq("I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩\\.\n")); + } + } + + mod verbose { + use super::*; + + #[test] + fn succeeds_with_verbose_mode_and_no_start_anchor_option() { + let mut grex = init_command(); + grex.args(&["--verbose", "--no-start-anchor", TEST_CASE]); + grex.assert().success().stdout(predicate::eq(indoc!( + r#" + (?x) + I\ \ \ ♥♥♥\ 36\ and\ ٣\ and\ y̆y̆\ and\ 💩💩\. + $ + "#, + ))); + } + + #[test] + fn succeeds_with_verbose_mode_and_no_end_anchor_option() { + let mut grex = init_command(); + grex.args(&["--verbose", "--no-end-anchor", TEST_CASE]); + grex.assert().success().stdout(predicate::eq(indoc!( + r#" + (?x) + ^ + I\ \ \ ♥♥♥\ 36\ and\ ٣\ and\ y̆y̆\ and\ 💩💩\. + "#, + ))); + } + + #[test] + fn succeeds_with_verbose_mode_and_no_anchors_option() { + let mut grex = init_command(); + grex.args(&["--verbose", "--no-anchors", TEST_CASE]); + grex.assert().success().stdout(predicate::eq(indoc!( + r#" + (?x) + I\ \ \ ♥♥♥\ 36\ and\ ٣\ and\ y̆y̆\ and\ 💩💩\. + "#, + ))); + } + } +} + fn init_command() -> Command { Command::cargo_bin("grex").unwrap() } diff --git a/tests/lib_integration_tests.rs b/tests/lib_integration_tests.rs index 1ce6133..970343b 100644 --- a/tests/lib_integration_tests.rs +++ b/tests/lib_integration_tests.rs @@ -14,7 +14,7 @@ * limitations under the License. */ -use grex::{Feature, RegExpBuilder}; +use grex::RegExpBuilder; use indoc::indoc; use regex::Regex; use rstest::rstest; @@ -105,7 +105,7 @@ mod no_conversion { )] fn succeeds_with_ignore_case_option(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) - .with_conversion_of(&[Feature::CaseInsensitivity]) + .with_case_insensitive_matching() .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); @@ -150,7 +150,7 @@ mod no_conversion { )] fn succeeds_with_capturing_groups_option(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) - .with_conversion_of(&[Feature::CapturingGroup]) + .with_capturing_groups() .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); @@ -287,7 +287,7 @@ mod no_conversion { expected_output: &str, ) { let regexp = RegExpBuilder::from(&test_cases) - .with_conversion_of(&[Feature::CaseInsensitivity]) + .with_case_insensitive_matching() .with_verbose_mode() .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); @@ -306,6 +306,21 @@ mod no_conversion { assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); } + + #[rstest(test_cases, expected_output, + case(vec!["bab", "b", "cb", "bba"], "(?:b(?:ba|ab)?|cb)"), + case(vec!["a", "aba", "baaa", "aaab"], "(?:baaa|a(?:aab|ba)?)"), + case(vec!["a", "abab", "bbb", "aaac"], "(?:a(?:bab|aac)?|bbb)"), + case( + // https://github.com/pemistahl/grex/issues/31 + vec!["agbhd", "eibcd", "egbcd", "fbjbf", "agbh", "eibc", "egbc", "ebc", "fbc", "cd", "f", "c", "abcd", "ebcd", "fbcd"], + "(?:a(?:gbhd?|bcd)|e(?:ibcd?|gbcd?|bcd?)|f(?:b(?:jbf|cd?))?|cd?)") + )] + fn succeeds_without_anchors(test_cases: Vec<&str>, expected_output: &str) { + let regexp = RegExpBuilder::from(&test_cases).without_anchors().build(); + assert_that_regexp_is_correct(regexp, expected_output, &test_cases); + assert_that_regexp_matches_test_cases(expected_output, test_cases); + } } mod repetition { @@ -318,6 +333,12 @@ mod no_conversion { case(vec!["a"], "^a$"), case(vec!["aa"], "^a{2}$"), case(vec!["aaa"], "^a{3}$"), + case(vec!["aaa aaa"], "^a{3} a{3}$"), + case(vec!["ababab ababab"], "^(?:ab){3} (?:ab){3}$"), + case(vec!["ababab ababab"], "^(?:ab){3} {2}(?:ab){3}$"), + case(vec!["a ababab ababab"], "^a(?: (?:ab){3}){2}$"), + case(vec!["ababab ababab a"], "^a(?:b(?:ab){2} a){2}$"), + case(vec!["ababababab abab ababab"], "^ababab(?:(?:ab){2} ){2}(?:ab){3}$"), case(vec!["a", "aa"], "^a{1,2}$"), case(vec!["aaa", "a", "aa"], "^a{1,3}$"), case(vec!["aaaa", "a", "aa"], "^(?:a{1,2}|a{4})$"), @@ -331,9 +352,9 @@ mod no_conversion { case(vec!["aababab"], "^a(?:ab){3}$"), case(vec!["abababaa"], "^(?:ab){3}a{2}$"), case(vec!["aaaaaabbbbb"], "^a{6}b{5}$"), - case(vec!["aabaababab"], "^(?:a{2}b){2}abab$"), // goal: ^(a{2}b){2}(ab){2}$ + case(vec!["aabaababab"], "^a{2}ba(?:ab){3}$"), case(vec!["aaaaaaabbbbbba"], "^a{7}b{6}a$"), - case(vec!["abaaaabaaba"], "^abaa(?:a{2}b){2}a$"), + case(vec!["abaaaabaaba"], "^abaaa(?:aba){2}$"), case(vec!["bbaababb"], "^b{2}a{2}bab{2}$"), case(vec!["b", "ba"], "^ba?$"), case(vec!["b", "ba", "baa"], "^b(?:a{1,2})?$"), @@ -366,7 +387,7 @@ mod no_conversion { )] fn succeeds(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) - .with_conversion_of(&[Feature::Repetition]) + .with_conversion_of_repetitions() .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); @@ -378,7 +399,8 @@ mod no_conversion { )] fn succeeds_with_ignore_case_option(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) - .with_conversion_of(&[Feature::Repetition, Feature::CaseInsensitivity]) + .with_conversion_of_repetitions() + .with_case_insensitive_matching() .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); @@ -394,7 +416,7 @@ mod no_conversion { )] fn succeeds_with_escape_option(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) - .with_conversion_of(&[Feature::Repetition]) + .with_conversion_of_repetitions() .with_escaping_of_non_ascii_chars(false) .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); @@ -411,7 +433,7 @@ mod no_conversion { )] fn succeeds_with_escape_and_surrogate_option(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) - .with_conversion_of(&[Feature::Repetition]) + .with_conversion_of_repetitions() .with_escaping_of_non_ascii_chars(true) .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); @@ -483,21 +505,20 @@ mod no_conversion { r#" (?x) ^ + a{2}ba (?: - a{2}b - ){2} - abab + ab + ){3} $"# )), case(vec!["abaaaabaaba"], indoc!( r#" (?x) ^ - abaa + abaaa (?: - a{2}b + aba ){2} - a $"# )), case(vec!["xy̆y̆z", "xy̆y̆y̆y̆z"], indoc!( @@ -547,7 +568,7 @@ mod no_conversion { )] fn succeeds_with_verbose_mode_option(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) - .with_conversion_of(&[Feature::Repetition]) + .with_conversion_of_repetitions() .with_verbose_mode() .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); @@ -565,6 +586,7 @@ mod no_conversion { case(vec!["aaa"], "^aaa$"), case(vec!["aaaa"], "^a{4}$"), case(vec!["aaaaa"], "^a{5}$"), + case(vec!["ababababab abab ababab"], "^(?:ab){5} abab ababab$"), case(vec!["aabbaaaabbbabbbbba"], "^aabba{4}bbbab{5}a$"), case(vec!["baabaaaaaabb"], "^baaba{6}bb$"), case(vec!["ababab"], "^ababab$"), @@ -583,7 +605,7 @@ mod no_conversion { expected_output: &str, ) { let regexp = RegExpBuilder::from(&test_cases) - .with_conversion_of(&[Feature::Repetition]) + .with_conversion_of_repetitions() .with_minimum_repetitions(3) .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); @@ -595,14 +617,15 @@ mod no_conversion { case(vec!["ababab"], "^ababab$"), case(vec!["abcabcabc"], "^(?:abc){3}$"), case(vec!["abcabcabc", "dede"], "^(?:dede|(?:abc){3})$"), - case(vec!["abcabcabc", "defgdefg"], "^(?:(?:defg){2}|(?:abc){3})$") + case(vec!["abcabcabc", "defgdefg"], "^(?:(?:defg){2}|(?:abc){3})$"), + case(vec!["ababababab abab ababab"], "^ababab(?:abab ){2}ababab$") )] fn succeeds_with_increased_minimum_substring_length( test_cases: Vec<&str>, expected_output: &str, ) { let regexp = RegExpBuilder::from(&test_cases) - .with_conversion_of(&[Feature::Repetition]) + .with_conversion_of_repetitions() .with_minimum_substring_length(3) .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); @@ -614,14 +637,15 @@ mod no_conversion { case(vec!["abcabcabc"], "^abcabcabc$"), case(vec!["abcabcabcabc"], "^(?:abc){4}$"), case(vec!["aaaaaaaaaaaa"], "^aaaaaaaaaaaa$"), - case(vec!["abababab", "abcabcabcabc"], "^(?:abababab|(?:abc){4})$") + case(vec!["abababab", "abcabcabcabc"], "^(?:abababab|(?:abc){4})$"), + case(vec!["ababababab abab ababab"], "^ababababab abab ababab$") )] fn succeeds_with_increased_minimum_repetitions_and_substring_length( test_cases: Vec<&str>, expected_output: &str, ) { let regexp = RegExpBuilder::from(&test_cases) - .with_conversion_of(&[Feature::Repetition]) + .with_conversion_of_repetitions() .with_minimum_repetitions(3) .with_minimum_substring_length(3) .build(); @@ -659,7 +683,7 @@ mod digit_conversion { )] fn succeeds(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) - .with_conversion_of(&[Feature::Digit]) + .with_conversion_of_digits() .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); @@ -673,7 +697,7 @@ mod digit_conversion { )] fn succeeds_with_escape_option(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) - .with_conversion_of(&[Feature::Digit]) + .with_conversion_of_digits() .with_escaping_of_non_ascii_chars(false) .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); @@ -688,7 +712,7 @@ mod digit_conversion { )] fn succeeds_with_escape_and_surrogate_option(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) - .with_conversion_of(&[Feature::Digit]) + .with_conversion_of_digits() .with_escaping_of_non_ascii_chars(true) .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); @@ -706,7 +730,8 @@ mod digit_conversion { )] fn succeeds(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) - .with_conversion_of(&[Feature::Repetition, Feature::Digit]) + .with_conversion_of_repetitions() + .with_conversion_of_digits() .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); @@ -720,7 +745,8 @@ mod digit_conversion { )] fn succeeds_with_escape_option(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) - .with_conversion_of(&[Feature::Repetition, Feature::Digit]) + .with_conversion_of_repetitions() + .with_conversion_of_digits() .with_escaping_of_non_ascii_chars(false) .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); @@ -735,7 +761,8 @@ mod digit_conversion { )] fn succeeds_with_escape_and_surrogate_option(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) - .with_conversion_of(&[Feature::Repetition, Feature::Digit]) + .with_conversion_of_repetitions() + .with_conversion_of_digits() .with_escaping_of_non_ascii_chars(true) .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); @@ -755,7 +782,8 @@ mod digit_conversion { expected_output: &str, ) { let regexp = RegExpBuilder::from(&test_cases) - .with_conversion_of(&[Feature::Repetition, Feature::Digit]) + .with_conversion_of_repetitions() + .with_conversion_of_digits() .with_minimum_repetitions(2) .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); @@ -786,7 +814,7 @@ mod space_conversion { )] fn succeeds(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) - .with_conversion_of(&[Feature::Space]) + .with_conversion_of_whitespace() .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); @@ -800,7 +828,7 @@ mod space_conversion { )] fn succeeds_with_escape_option(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) - .with_conversion_of(&[Feature::Space]) + .with_conversion_of_whitespace() .with_escaping_of_non_ascii_chars(false) .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); @@ -815,7 +843,7 @@ mod space_conversion { )] fn succeeds_with_escape_and_surrogate_option(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) - .with_conversion_of(&[Feature::Space]) + .with_conversion_of_whitespace() .with_escaping_of_non_ascii_chars(true) .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); @@ -833,7 +861,8 @@ mod space_conversion { )] fn succeeds(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) - .with_conversion_of(&[Feature::Repetition, Feature::Space]) + .with_conversion_of_repetitions() + .with_conversion_of_whitespace() .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); @@ -847,7 +876,8 @@ mod space_conversion { )] fn succeeds_with_escape_option(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) - .with_conversion_of(&[Feature::Repetition, Feature::Space]) + .with_conversion_of_repetitions() + .with_conversion_of_whitespace() .with_escaping_of_non_ascii_chars(false) .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); @@ -862,7 +892,8 @@ mod space_conversion { )] fn succeeds_with_escape_and_surrogate_option(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) - .with_conversion_of(&[Feature::Repetition, Feature::Space]) + .with_conversion_of_repetitions() + .with_conversion_of_whitespace() .with_escaping_of_non_ascii_chars(true) .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); @@ -885,7 +916,8 @@ mod space_conversion { expected_output: &str, ) { let regexp = RegExpBuilder::from(&test_cases) - .with_conversion_of(&[Feature::Repetition, Feature::Space]) + .with_conversion_of_repetitions() + .with_conversion_of_whitespace() .with_minimum_repetitions(2) .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); @@ -920,7 +952,7 @@ mod word_conversion { )] fn succeeds(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) - .with_conversion_of(&[Feature::Word]) + .with_conversion_of_words() .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); @@ -934,7 +966,7 @@ mod word_conversion { )] fn succeeds_with_escape_option(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) - .with_conversion_of(&[Feature::Word]) + .with_conversion_of_words() .with_escaping_of_non_ascii_chars(false) .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); @@ -949,7 +981,7 @@ mod word_conversion { )] fn succeeds_with_escape_and_surrogate_option(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) - .with_conversion_of(&[Feature::Word]) + .with_conversion_of_words() .with_escaping_of_non_ascii_chars(true) .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); @@ -962,12 +994,13 @@ mod word_conversion { #[rstest(test_cases, expected_output, case( vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], - "^\\w {3}♥{3} \\w{2} \\w{3} \\w \\w{3} \\w{4} \\w{3} 💩{2}\\.$" + "^\\w {3}♥{3} \\w{2}(?: \\w{3} \\w){2}(?:\\w{3} ){2}💩{2}\\.$" ) )] fn succeeds(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) - .with_conversion_of(&[Feature::Repetition, Feature::Word]) + .with_conversion_of_repetitions() + .with_conversion_of_words() .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); @@ -976,12 +1009,13 @@ mod word_conversion { #[rstest(test_cases, expected_output, case( vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], - "^\\w {3}\\u{2665}{3} \\w{2} \\w{3} \\w \\w{3} \\w{4} \\w{3} \\u{1f4a9}{2}\\.$" + "^\\w {3}\\u{2665}{3} \\w{2}(?: \\w{3} \\w){2}(?:\\w{3} ){2}\\u{1f4a9}{2}\\.$" ) )] fn succeeds_with_escape_option(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) - .with_conversion_of(&[Feature::Repetition, Feature::Word]) + .with_conversion_of_repetitions() + .with_conversion_of_words() .with_escaping_of_non_ascii_chars(false) .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); @@ -991,12 +1025,13 @@ mod word_conversion { #[rstest(test_cases, expected_output, case( vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], - "^\\w {3}\\u{2665}{3} \\w{2} \\w{3} \\w \\w{3} \\w{4} \\w{3} (?:\\u{d83d}\\u{dca9}){2}\\.$" + "^\\w {3}\\u{2665}{3} \\w{2}(?: \\w{3} \\w){2}(?:\\w{3} ){2}(?:\\u{d83d}\\u{dca9}){2}\\.$" ) )] fn succeeds_with_escape_and_surrogate_option(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) - .with_conversion_of(&[Feature::Repetition, Feature::Word]) + .with_conversion_of_repetitions() + .with_conversion_of_words() .with_escaping_of_non_ascii_chars(true) .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); @@ -1019,7 +1054,8 @@ mod word_conversion { expected_output: &str, ) { let regexp = RegExpBuilder::from(&test_cases) - .with_conversion_of(&[Feature::Repetition, Feature::Word]) + .with_conversion_of_repetitions() + .with_conversion_of_words() .with_minimum_repetitions(2) .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); @@ -1042,7 +1078,8 @@ mod digit_space_conversion { )] fn succeeds(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) - .with_conversion_of(&[Feature::Digit, Feature::Space]) + .with_conversion_of_digits() + .with_conversion_of_whitespace() .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); @@ -1056,7 +1093,8 @@ mod digit_space_conversion { )] fn succeeds_with_escape_option(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) - .with_conversion_of(&[Feature::Digit, Feature::Space]) + .with_conversion_of_digits() + .with_conversion_of_whitespace() .with_escaping_of_non_ascii_chars(false) .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); @@ -1071,7 +1109,8 @@ mod digit_space_conversion { )] fn succeeds_with_escape_and_surrogate_option(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) - .with_conversion_of(&[Feature::Digit, Feature::Space]) + .with_conversion_of_digits() + .with_conversion_of_whitespace() .with_escaping_of_non_ascii_chars(true) .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); @@ -1089,7 +1128,9 @@ mod digit_space_conversion { )] fn succeeds(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) - .with_conversion_of(&[Feature::Repetition, Feature::Digit, Feature::Space]) + .with_conversion_of_repetitions() + .with_conversion_of_digits() + .with_conversion_of_whitespace() .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); @@ -1103,7 +1144,9 @@ mod digit_space_conversion { )] fn succeeds_with_escape_option(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) - .with_conversion_of(&[Feature::Repetition, Feature::Digit, Feature::Space]) + .with_conversion_of_repetitions() + .with_conversion_of_digits() + .with_conversion_of_whitespace() .with_escaping_of_non_ascii_chars(false) .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); @@ -1118,7 +1161,9 @@ mod digit_space_conversion { )] fn succeeds_with_escape_and_surrogate_option(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) - .with_conversion_of(&[Feature::Repetition, Feature::Digit, Feature::Space]) + .with_conversion_of_repetitions() + .with_conversion_of_digits() + .with_conversion_of_whitespace() .with_escaping_of_non_ascii_chars(true) .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); @@ -1141,7 +1186,9 @@ mod digit_space_conversion { expected_output: &str, ) { let regexp = RegExpBuilder::from(&test_cases) - .with_conversion_of(&[Feature::Repetition, Feature::Digit, Feature::Space]) + .with_conversion_of_repetitions() + .with_conversion_of_digits() + .with_conversion_of_whitespace() .with_minimum_repetitions(2) .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); @@ -1157,7 +1204,7 @@ mod digit_space_conversion { expected_output: &str, ) { let regexp = RegExpBuilder::from(&test_cases) - .with_conversion_of(&[Feature::Repetition]) + .with_conversion_of_repetitions() .with_minimum_substring_length(3) .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); @@ -1175,7 +1222,7 @@ mod digit_space_conversion { expected_output: &str, ) { let regexp = RegExpBuilder::from(&test_cases) - .with_conversion_of(&[Feature::Repetition]) + .with_conversion_of_repetitions() .with_minimum_repetitions(2) .with_minimum_substring_length(3) .build(); @@ -1199,7 +1246,8 @@ mod digit_word_conversion { )] fn succeeds(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) - .with_conversion_of(&[Feature::Digit, Feature::Word]) + .with_conversion_of_digits() + .with_conversion_of_words() .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); @@ -1213,7 +1261,8 @@ mod digit_word_conversion { )] fn succeeds_with_escape_option(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) - .with_conversion_of(&[Feature::Digit, Feature::Word]) + .with_conversion_of_digits() + .with_conversion_of_words() .with_escaping_of_non_ascii_chars(false) .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); @@ -1228,7 +1277,8 @@ mod digit_word_conversion { )] fn succeeds_with_escape_and_surrogate_option(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) - .with_conversion_of(&[Feature::Digit, Feature::Word]) + .with_conversion_of_digits() + .with_conversion_of_words() .with_escaping_of_non_ascii_chars(true) .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); @@ -1241,12 +1291,14 @@ mod digit_word_conversion { #[rstest(test_cases, expected_output, case( vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], - "^\\w {3}♥{3} \\d(?:\\d \\w{3} ){2}\\w{4} \\w{3} 💩{2}\\.$" + "^\\w {3}♥{3} \\d(?:\\d \\w{3} ){2}\\w(?:\\w{3} ){2}💩{2}\\.$" ) )] fn succeeds(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) - .with_conversion_of(&[Feature::Repetition, Feature::Digit, Feature::Word]) + .with_conversion_of_repetitions() + .with_conversion_of_digits() + .with_conversion_of_words() .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); @@ -1255,12 +1307,14 @@ mod digit_word_conversion { #[rstest(test_cases, expected_output, case( vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], - "^\\w {3}\\u{2665}{3} \\d(?:\\d \\w{3} ){2}\\w{4} \\w{3} \\u{1f4a9}{2}\\.$" + "^\\w {3}\\u{2665}{3} \\d(?:\\d \\w{3} ){2}\\w(?:\\w{3} ){2}\\u{1f4a9}{2}\\.$" ) )] fn succeeds_with_escape_option(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) - .with_conversion_of(&[Feature::Repetition, Feature::Digit, Feature::Word]) + .with_conversion_of_repetitions() + .with_conversion_of_digits() + .with_conversion_of_words() .with_escaping_of_non_ascii_chars(false) .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); @@ -1270,12 +1324,14 @@ mod digit_word_conversion { #[rstest(test_cases, expected_output, case( vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], - "^\\w {3}\\u{2665}{3} \\d(?:\\d \\w{3} ){2}\\w{4} \\w{3} (?:\\u{d83d}\\u{dca9}){2}\\.$" + "^\\w {3}\\u{2665}{3} \\d(?:\\d \\w{3} ){2}\\w(?:\\w{3} ){2}(?:\\u{d83d}\\u{dca9}){2}\\.$" ) )] fn succeeds_with_escape_and_surrogate_option(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) - .with_conversion_of(&[Feature::Repetition, Feature::Digit, Feature::Word]) + .with_conversion_of_repetitions() + .with_conversion_of_digits() + .with_conversion_of_words() .with_escaping_of_non_ascii_chars(true) .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); @@ -1297,7 +1353,8 @@ mod space_word_conversion { )] fn succeeds(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) - .with_conversion_of(&[Feature::Space, Feature::Word]) + .with_conversion_of_whitespace() + .with_conversion_of_words() .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); @@ -1311,7 +1368,8 @@ mod space_word_conversion { )] fn succeeds_with_escape_option(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) - .with_conversion_of(&[Feature::Space, Feature::Word]) + .with_conversion_of_whitespace() + .with_conversion_of_words() .with_escaping_of_non_ascii_chars(false) .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); @@ -1326,7 +1384,8 @@ mod space_word_conversion { )] fn succeeds_with_escape_and_surrogate_option(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) - .with_conversion_of(&[Feature::Space, Feature::Word]) + .with_conversion_of_whitespace() + .with_conversion_of_words() .with_escaping_of_non_ascii_chars(true) .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); @@ -1339,12 +1398,14 @@ mod space_word_conversion { #[rstest(test_cases, expected_output, case( vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], - "^\\w\\s{3}♥{3}\\s\\w{2}\\s\\w{3}\\s\\w\\s\\w{3}\\s\\w{4}\\s\\w{3}\\s💩{2}\\.$" + "^\\w\\s{3}♥{3}\\s\\w{2}(?:\\s\\w{3}\\s\\w){2}(?:\\w{3}\\s){2}💩{2}\\.$" ) )] fn succeeds(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) - .with_conversion_of(&[Feature::Repetition, Feature::Space, Feature::Word]) + .with_conversion_of_repetitions() + .with_conversion_of_whitespace() + .with_conversion_of_words() .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); @@ -1353,12 +1414,14 @@ mod space_word_conversion { #[rstest(test_cases, expected_output, case( vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], - "^\\w\\s{3}\\u{2665}{3}\\s\\w{2}\\s\\w{3}\\s\\w\\s\\w{3}\\s\\w{4}\\s\\w{3}\\s\\u{1f4a9}{2}\\.$" + "^\\w\\s{3}\\u{2665}{3}\\s\\w{2}(?:\\s\\w{3}\\s\\w){2}(?:\\w{3}\\s){2}\\u{1f4a9}{2}\\.$" ) )] fn succeeds_with_escape_option(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) - .with_conversion_of(&[Feature::Repetition, Feature::Space, Feature::Word]) + .with_conversion_of_repetitions() + .with_conversion_of_whitespace() + .with_conversion_of_words() .with_escaping_of_non_ascii_chars(false) .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); @@ -1368,12 +1431,14 @@ mod space_word_conversion { #[rstest(test_cases, expected_output, case( vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], - "^\\w\\s{3}\\u{2665}{3}\\s\\w{2}\\s\\w{3}\\s\\w\\s\\w{3}\\s\\w{4}\\s\\w{3}\\s(?:\\u{d83d}\\u{dca9}){2}\\.$" + "^\\w\\s{3}\\u{2665}{3}\\s\\w{2}(?:\\s\\w{3}\\s\\w){2}(?:\\w{3}\\s){2}(?:\\u{d83d}\\u{dca9}){2}\\.$" ) )] fn succeeds_with_escape_and_surrogate_option(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) - .with_conversion_of(&[Feature::Repetition, Feature::Space, Feature::Word]) + .with_conversion_of_repetitions() + .with_conversion_of_whitespace() + .with_conversion_of_words() .with_escaping_of_non_ascii_chars(true) .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); @@ -1395,7 +1460,9 @@ mod digit_space_word_conversion { )] fn succeeds(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) - .with_conversion_of(&[Feature::Digit, Feature::Space, Feature::Word]) + .with_conversion_of_digits() + .with_conversion_of_whitespace() + .with_conversion_of_words() .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); @@ -1409,7 +1476,9 @@ mod digit_space_word_conversion { )] fn succeeds_with_escape_option(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) - .with_conversion_of(&[Feature::Digit, Feature::Space, Feature::Word]) + .with_conversion_of_digits() + .with_conversion_of_whitespace() + .with_conversion_of_words() .with_escaping_of_non_ascii_chars(false) .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); @@ -1424,7 +1493,9 @@ mod digit_space_word_conversion { )] fn succeeds_with_escape_and_surrogate_option(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) - .with_conversion_of(&[Feature::Digit, Feature::Space, Feature::Word]) + .with_conversion_of_digits() + .with_conversion_of_whitespace() + .with_conversion_of_words() .with_escaping_of_non_ascii_chars(true) .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); @@ -1437,17 +1508,15 @@ mod digit_space_word_conversion { #[rstest(test_cases, expected_output, case( vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], - "^\\w\\s{3}♥{3}\\s\\d(?:\\d\\s\\w{3}\\s){2}\\w{4}\\s\\w{3}\\s💩{2}\\.$" + "^\\w\\s{3}♥{3}\\s\\d(?:\\d\\s\\w{3}\\s){2}\\w(?:\\w{3}\\s){2}💩{2}\\.$" ) )] fn succeeds(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) - .with_conversion_of(&[ - Feature::Repetition, - Feature::Digit, - Feature::Space, - Feature::Word, - ]) + .with_conversion_of_repetitions() + .with_conversion_of_digits() + .with_conversion_of_whitespace() + .with_conversion_of_words() .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); @@ -1456,17 +1525,15 @@ mod digit_space_word_conversion { #[rstest(test_cases, expected_output, case( vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], - "^\\w\\s{3}\\u{2665}{3}\\s\\d(?:\\d\\s\\w{3}\\s){2}\\w{4}\\s\\w{3}\\s\\u{1f4a9}{2}\\.$" + "^\\w\\s{3}\\u{2665}{3}\\s\\d(?:\\d\\s\\w{3}\\s){2}\\w(?:\\w{3}\\s){2}\\u{1f4a9}{2}\\.$" ) )] fn succeeds_with_escape_option(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) - .with_conversion_of(&[ - Feature::Repetition, - Feature::Digit, - Feature::Space, - Feature::Word, - ]) + .with_conversion_of_repetitions() + .with_conversion_of_digits() + .with_conversion_of_whitespace() + .with_conversion_of_words() .with_escaping_of_non_ascii_chars(false) .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); @@ -1476,17 +1543,15 @@ mod digit_space_word_conversion { #[rstest(test_cases, expected_output, case( vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], - "^\\w\\s{3}\\u{2665}{3}\\s\\d(?:\\d\\s\\w{3}\\s){2}\\w{4}\\s\\w{3}\\s(?:\\u{d83d}\\u{dca9}){2}\\.$" + "^\\w\\s{3}\\u{2665}{3}\\s\\d(?:\\d\\s\\w{3}\\s){2}\\w(?:\\w{3}\\s){2}(?:\\u{d83d}\\u{dca9}){2}\\.$" ) )] fn succeeds_with_escape_and_surrogate_option(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) - .with_conversion_of(&[ - Feature::Repetition, - Feature::Digit, - Feature::Space, - Feature::Word, - ]) + .with_conversion_of_repetitions() + .with_conversion_of_digits() + .with_conversion_of_whitespace() + .with_conversion_of_words() .with_escaping_of_non_ascii_chars(true) .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); @@ -1508,7 +1573,7 @@ mod non_digit_conversion { )] fn succeeds(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) - .with_conversion_of(&[Feature::NonDigit]) + .with_conversion_of_non_digits() .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); @@ -1522,7 +1587,7 @@ mod non_digit_conversion { )] fn succeeds_with_escape_option(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) - .with_conversion_of(&[Feature::NonDigit]) + .with_conversion_of_non_digits() .with_escaping_of_non_ascii_chars(false) .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); @@ -1538,7 +1603,8 @@ mod non_digit_conversion { )] fn succeeds(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) - .with_conversion_of(&[Feature::Repetition, Feature::NonDigit]) + .with_conversion_of_repetitions() + .with_conversion_of_non_digits() .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); @@ -1549,7 +1615,8 @@ mod non_digit_conversion { )] fn succeeds_with_escape_option(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) - .with_conversion_of(&[Feature::Repetition, Feature::NonDigit]) + .with_conversion_of_repetitions() + .with_conversion_of_non_digits() .with_escaping_of_non_ascii_chars(false) .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); @@ -1572,7 +1639,7 @@ mod non_space_conversion { )] fn succeeds(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) - .with_conversion_of(&[Feature::NonSpace]) + .with_conversion_of_non_whitespace() .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); @@ -1585,12 +1652,13 @@ mod non_space_conversion { #[rstest(test_cases, expected_output, case( vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], - "^\\S {3}\\S{3} \\S{2} \\S{3} \\S \\S{3} \\S{4} \\S{3} \\S{3}$" + "^\\S {3}\\S(?:\\S{2} ){2}\\S{3} (?:\\S(?: \\S{3}){2}){2}$" ) )] fn succeeds(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) - .with_conversion_of(&[Feature::Repetition, Feature::NonSpace]) + .with_conversion_of_repetitions() + .with_conversion_of_non_whitespace() .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); @@ -1612,7 +1680,7 @@ mod non_word_conversion { )] fn succeeds(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) - .with_conversion_of(&[Feature::NonWord]) + .with_conversion_of_non_words() .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); @@ -1626,7 +1694,7 @@ mod non_word_conversion { )] fn succeeds_with_escape_option(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) - .with_conversion_of(&[Feature::NonWord]) + .with_conversion_of_non_words() .with_escaping_of_non_ascii_chars(false) .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); @@ -1645,7 +1713,8 @@ mod non_word_conversion { )] fn succeeds(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) - .with_conversion_of(&[Feature::Repetition, Feature::NonWord]) + .with_conversion_of_repetitions() + .with_conversion_of_non_words() .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); @@ -1659,7 +1728,8 @@ mod non_word_conversion { )] fn succeeds_with_escape_option(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) - .with_conversion_of(&[Feature::Repetition, Feature::NonWord]) + .with_conversion_of_repetitions() + .with_conversion_of_non_words() .with_escaping_of_non_ascii_chars(false) .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); @@ -1682,7 +1752,8 @@ mod non_digit_non_space_conversion { )] fn succeeds(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) - .with_conversion_of(&[Feature::NonDigit, Feature::NonSpace]) + .with_conversion_of_non_digits() + .with_conversion_of_non_whitespace() .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); @@ -1697,7 +1768,9 @@ mod non_digit_non_space_conversion { )] fn succeeds(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) - .with_conversion_of(&[Feature::Repetition, Feature::NonDigit, Feature::NonSpace]) + .with_conversion_of_repetitions() + .with_conversion_of_non_digits() + .with_conversion_of_non_whitespace() .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); @@ -1719,7 +1792,8 @@ mod non_digit_non_word_conversion { )] fn succeeds(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) - .with_conversion_of(&[Feature::NonDigit, Feature::NonWord]) + .with_conversion_of_non_digits() + .with_conversion_of_non_words() .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); @@ -1734,7 +1808,9 @@ mod non_digit_non_word_conversion { )] fn succeeds(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) - .with_conversion_of(&[Feature::Repetition, Feature::NonDigit, Feature::NonWord]) + .with_conversion_of_repetitions() + .with_conversion_of_non_digits() + .with_conversion_of_non_words() .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); @@ -1756,7 +1832,8 @@ mod non_space_non_word_conversion { )] fn succeeds(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) - .with_conversion_of(&[Feature::NonSpace, Feature::NonWord]) + .with_conversion_of_non_whitespace() + .with_conversion_of_non_words() .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); @@ -1769,12 +1846,14 @@ mod non_space_non_word_conversion { #[rstest(test_cases, expected_output, case( vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], - "^\\S\\W{7}\\S{2}\\W\\S{3}\\W\\S\\W\\S{3}\\W\\S{4}\\W\\S{3}\\W{4}$" + "^\\S\\W{7}\\S(?:\\S\\W\\S{3}\\W){2}\\S{4}\\W\\S{3}\\W{4}$" ) )] fn succeeds(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) - .with_conversion_of(&[Feature::Repetition, Feature::NonSpace, Feature::NonWord]) + .with_conversion_of_repetitions() + .with_conversion_of_non_whitespace() + .with_conversion_of_non_words() .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); @@ -1796,7 +1875,9 @@ mod non_digit_non_space_non_word_conversion { )] fn succeeds(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) - .with_conversion_of(&[Feature::NonDigit, Feature::NonSpace, Feature::NonWord]) + .with_conversion_of_non_digits() + .with_conversion_of_non_whitespace() + .with_conversion_of_non_words() .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); @@ -1811,12 +1892,10 @@ mod non_digit_non_space_non_word_conversion { )] fn succeeds(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) - .with_conversion_of(&[ - Feature::Repetition, - Feature::NonDigit, - Feature::NonSpace, - Feature::NonWord, - ]) + .with_conversion_of_repetitions() + .with_conversion_of_non_digits() + .with_conversion_of_non_whitespace() + .with_conversion_of_non_words() .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); @@ -1838,7 +1917,8 @@ mod digit_non_digit_conversion { )] fn succeeds(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) - .with_conversion_of(&[Feature::Digit, Feature::NonDigit]) + .with_conversion_of_digits() + .with_conversion_of_non_digits() .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); @@ -1853,7 +1933,9 @@ mod digit_non_digit_conversion { )] fn succeeds(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) - .with_conversion_of(&[Feature::Repetition, Feature::Digit, Feature::NonDigit]) + .with_conversion_of_repetitions() + .with_conversion_of_digits() + .with_conversion_of_non_digits() .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); @@ -1875,7 +1957,8 @@ mod space_non_space_conversion { )] fn succeeds(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) - .with_conversion_of(&[Feature::Space, Feature::NonSpace]) + .with_conversion_of_whitespace() + .with_conversion_of_non_whitespace() .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); @@ -1888,12 +1971,14 @@ mod space_non_space_conversion { #[rstest(test_cases, expected_output, case( vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], - "^\\S\\s{3}\\S{3}\\s\\S{2}\\s\\S{3}\\s\\S\\s\\S{3}\\s\\S{4}\\s\\S{3}\\s\\S{3}$" + "^\\S\\s{3}\\S(?:\\S{2}\\s){2}\\S{3}\\s(?:\\S(?:\\s\\S{3}){2}){2}$" ) )] fn succeeds(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) - .with_conversion_of(&[Feature::Repetition, Feature::Space, Feature::NonSpace]) + .with_conversion_of_repetitions() + .with_conversion_of_whitespace() + .with_conversion_of_non_whitespace() .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); @@ -1915,7 +2000,8 @@ mod word_non_word_conversion { )] fn succeeds(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) - .with_conversion_of(&[Feature::Word, Feature::NonWord]) + .with_conversion_of_words() + .with_conversion_of_non_words() .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); @@ -1928,12 +2014,14 @@ mod word_non_word_conversion { #[rstest(test_cases, expected_output, case( vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], - "^\\w\\W{7}\\w{2}\\W\\w{3}\\W\\w\\W\\w{3}\\W\\w{4}\\W\\w{3}\\W{4}$" + "^\\w\\W{7}\\w(?:\\w\\W\\w{3}\\W){2}\\w{4}\\W\\w{3}\\W{4}$" ) )] fn succeeds(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) - .with_conversion_of(&[Feature::Repetition, Feature::Word, Feature::NonWord]) + .with_conversion_of_repetitions() + .with_conversion_of_words() + .with_conversion_of_non_words() .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); @@ -1941,6 +2029,102 @@ mod word_non_word_conversion { } } +mod anchor_conversion { + use super::*; + + mod no_verbose { + use super::*; + + #[rstest(test_cases, expected_output, + case(vec!["My ♥♥♥ and 💩💩 is yours."], "My ♥♥♥ and 💩💩 is yours\\.$"), + )] + fn succeeds_with_no_start_anchor_option(test_cases: Vec<&str>, expected_output: &str) { + let regexp = RegExpBuilder::from(&test_cases) + .without_start_anchor() + .build(); + assert_that_regexp_is_correct(regexp, expected_output, &test_cases); + } + + #[rstest(test_cases, expected_output, + case(vec!["My ♥♥♥ and 💩💩 is yours."], "^My ♥♥♥ and 💩💩 is yours\\."), + )] + fn succeeds_with_no_end_anchor_option(test_cases: Vec<&str>, expected_output: &str) { + let regexp = RegExpBuilder::from(&test_cases) + .without_end_anchor() + .build(); + assert_that_regexp_is_correct(regexp, expected_output, &test_cases); + } + + #[rstest(test_cases, expected_output, + case(vec!["My ♥♥♥ and 💩💩 is yours."], "My ♥♥♥ and 💩💩 is yours\\."), + )] + fn succeeds_with_no_match_line_option(test_cases: Vec<&str>, expected_output: &str) { + let regexp = RegExpBuilder::from(&test_cases).without_anchors().build(); + assert_that_regexp_is_correct(regexp, expected_output, &test_cases); + } + } + + mod verbose { + use super::*; + + #[rstest(test_cases, expected_output, + case(vec!["My ♥♥♥ and 💩💩 is yours."], indoc!( + r#" + (?x) + My\ ♥♥♥\ and\ 💩💩\ is\ yours\. + $"# + )) + )] + fn succeeds_with_verbose_mode_and_no_start_anchor_option( + test_cases: Vec<&str>, + expected_output: &str, + ) { + let regexp = RegExpBuilder::from(&test_cases) + .with_verbose_mode() + .without_start_anchor() + .build(); + assert_that_regexp_is_correct(regexp, expected_output, &test_cases); + } + + #[rstest(test_cases, expected_output, + case(vec!["My ♥♥♥ and 💩💩 is yours."], indoc!( + r#" + (?x) + ^ + My\ ♥♥♥\ and\ 💩💩\ is\ yours\."# + )) + )] + fn succeeds_with_verbose_mode_and_no_end_anchor_option( + test_cases: Vec<&str>, + expected_output: &str, + ) { + let regexp = RegExpBuilder::from(&test_cases) + .with_verbose_mode() + .without_end_anchor() + .build(); + assert_that_regexp_is_correct(regexp, expected_output, &test_cases); + } + + #[rstest(test_cases, expected_output, + case(vec!["My ♥♥♥ and 💩💩 is yours."], indoc!( + r#" + (?x) + My\ ♥♥♥\ and\ 💩💩\ is\ yours\."# + )) + )] + fn succeeds_with_verbose_mode_and_no_anchors_option( + test_cases: Vec<&str>, + expected_output: &str, + ) { + let regexp = RegExpBuilder::from(&test_cases) + .with_verbose_mode() + .without_anchors() + .build(); + assert_that_regexp_is_correct(regexp, expected_output, &test_cases); + } + } +} + fn assert_that_regexp_is_correct(regexp: String, expected_output: &str, test_cases: &[&str]) { assert_eq!( regexp, expected_output, @@ -1952,8 +2136,9 @@ fn assert_that_regexp_is_correct(regexp: String, expected_output: &str, test_cas fn assert_that_regexp_matches_test_cases(expected_output: &str, test_cases: Vec<&str>) { let re = Regex::new(expected_output).unwrap(); for test_case in test_cases { - assert!( - re.is_match(test_case), + assert_eq!( + re.find_iter(test_case).count(), + 1, "\n\n\"{}\" does not match regex {}\n\n", test_case, expected_output diff --git a/tests/property_tests.rs b/tests/property_tests.rs index c8ec407..7cce08a 100644 --- a/tests/property_tests.rs +++ b/tests/property_tests.rs @@ -14,6 +14,8 @@ * limitations under the License. */ +#![allow(deprecated)] + use grex::{Feature, RegExpBuilder}; use proptest::prelude::*; use regex::{Error, Regex, RegexBuilder}; @@ -22,9 +24,8 @@ proptest! { #![proptest_config(ProptestConfig::with_cases(500))] #[test] - #[ignore] fn valid_regexes_with_default_settings( - test_cases in prop::collection::hash_set(".{1,20}", 1..=10) + test_cases in prop::collection::hash_set(".{1,10}", 1..=5) ) { let test_cases_vec = test_cases.iter().cloned().collect::>(); let regexp = RegExpBuilder::from(&test_cases_vec).build(); @@ -32,9 +33,8 @@ proptest! { } #[test] - #[ignore] fn valid_regexes_with_escape_sequences( - test_cases in prop::collection::hash_set(".{1,20}", 1..=10) + test_cases in prop::collection::hash_set(".{1,10}", 1..=5) ) { let test_cases_vec = test_cases.iter().cloned().collect::>(); let regexp = RegExpBuilder::from(&test_cases_vec) @@ -44,9 +44,8 @@ proptest! { } #[test] - #[ignore] fn valid_regexes_with_verbose_mode( - test_cases in prop::collection::hash_set(".{1,20}", 1..=10) + test_cases in prop::collection::hash_set(".{1,10}", 1..=5) ) { let test_cases_vec = test_cases.iter().cloned().collect::>(); let regexp = RegExpBuilder::from(&test_cases_vec) @@ -56,9 +55,8 @@ proptest! { } #[test] - #[ignore] fn valid_regexes_with_escape_sequences_and_verbose_mode( - test_cases in prop::collection::hash_set(".{1,20}", 1..=10) + test_cases in prop::collection::hash_set(".{1,10}", 1..=5) ) { let test_cases_vec = test_cases.iter().cloned().collect::>(); let regexp = RegExpBuilder::from(&test_cases_vec) @@ -69,9 +67,8 @@ proptest! { } #[test] - #[ignore] fn valid_regexes_with_conversion_features( - test_cases in prop::collection::hash_set(".{1,20}", 1..=10), + test_cases in prop::collection::hash_set(".{1,10}", 1..=5), conversion_features in prop::collection::hash_set(conversion_feature_strategy(), 1..=9), minimum_repetitions in 1..100u32, minimum_substring_length in 1..100u32 @@ -86,9 +83,8 @@ proptest! { } #[test] - #[ignore] fn valid_regexes_with_conversion_features_and_escape_sequences( - test_cases in prop::collection::hash_set(".{1,20}", 1..=10), + test_cases in prop::collection::hash_set(".{1,10}", 1..=5), conversion_features in prop::collection::hash_set(conversion_feature_strategy(), 1..=9), minimum_repetitions in 1..100u32, minimum_substring_length in 1..100u32 @@ -104,9 +100,8 @@ proptest! { } #[test] - #[ignore] fn valid_regexes_with_conversion_features_and_verbose_mode( - test_cases in prop::collection::hash_set(".{1,20}", 1..=10), + test_cases in prop::collection::hash_set(".{1,10}", 1..=5), conversion_features in prop::collection::hash_set(conversion_feature_strategy(), 1..=9), minimum_repetitions in 1..100u32, minimum_substring_length in 1..100u32 @@ -122,9 +117,8 @@ proptest! { } #[test] - #[ignore] fn matching_regexes_with_default_settings( - test_cases in prop::collection::hash_set(".{1,20}", 1..=10) + test_cases in prop::collection::hash_set(".{1,10}", 1..=5) ) { let test_cases_vec = test_cases.iter().cloned().collect::>(); let regexp = RegExpBuilder::from(&test_cases_vec).build(); @@ -134,9 +128,8 @@ proptest! { } #[test] - #[ignore] fn matching_regexes_with_escape_sequences( - test_cases in prop::collection::hash_set(".{1,20}", 1..=10) + test_cases in prop::collection::hash_set(".{1,10}", 1..=5) ) { let test_cases_vec = test_cases.iter().cloned().collect::>(); let regexp = RegExpBuilder::from(&test_cases_vec) @@ -148,9 +141,8 @@ proptest! { } #[test] - #[ignore] fn matching_regexes_with_verbose_mode( - test_cases in prop::collection::hash_set(".{1,20}", 1..=10) + test_cases in prop::collection::hash_set(".{1,10}", 1..=5) ) { let test_cases_vec = test_cases.iter().cloned().collect::>(); let regexp = RegExpBuilder::from(&test_cases_vec) @@ -162,9 +154,8 @@ proptest! { } #[test] - #[ignore] fn matching_regexes_with_escape_sequences_and_verbose_mode( - test_cases in prop::collection::hash_set(".{1,20}", 1..=10) + test_cases in prop::collection::hash_set(".{1,10}", 1..=5) ) { let test_cases_vec = test_cases.iter().cloned().collect::>(); let regexp = RegExpBuilder::from(&test_cases_vec) @@ -177,9 +168,8 @@ proptest! { } #[test] - #[ignore] fn matching_regexes_with_conversion_features( - test_cases in prop::collection::hash_set(".{1,20}", 1..=10), + test_cases in prop::collection::hash_set(".{1,10}", 1..=5), conversion_features in prop::collection::hash_set(conversion_feature_strategy(), 1..=9), minimum_repetitions in 1..100u32, minimum_substring_length in 1..100u32 @@ -196,9 +186,8 @@ proptest! { } #[test] - #[ignore] fn matching_regexes_with_conversion_features_and_escape_sequences( - test_cases in prop::collection::hash_set(".{1,20}", 1..=10), + test_cases in prop::collection::hash_set(".{1,10}", 1..=5), conversion_features in prop::collection::hash_set(conversion_feature_strategy(), 1..=9), minimum_repetitions in 1..100u32, minimum_substring_length in 1..100u32 @@ -216,9 +205,8 @@ proptest! { } #[test] - #[ignore] fn matching_regexes_with_conversion_features_and_verbose_mode( - test_cases in prop::collection::hash_set(".{1,20}", 1..=10), + test_cases in prop::collection::hash_set(".{1,10}", 1..=5), conversion_features in prop::collection::hash_set(conversion_feature_strategy(), 1..=9), minimum_repetitions in 1..100u32, minimum_substring_length in 1..100u32 @@ -236,10 +224,75 @@ proptest! { } #[test] - #[ignore] + fn matching_regexes_without_start_anchor( + test_cases in prop::collection::hash_set("[A-C]{1,10}", 1..=5) + ) { + let test_cases_vec = test_cases.iter().cloned().collect::>(); + let regexp = RegExpBuilder::from(&test_cases_vec).without_start_anchor().build(); + if let Ok(compiled_regexp) = compile_regexp(®exp) { + for test_case in test_cases_vec { + let substrings = compiled_regexp.find_iter(&test_case).map(|m| m.as_str()).collect::>(); + prop_assert_eq!( + substrings.len(), + 1, + "expression '{}' does not match test case '{}' entirely but {} of its substrings: {:?}", + regexp, + test_case, + substrings.len(), + substrings + ); + } + } + } + + #[test] + fn matching_regexes_without_end_anchor( + test_cases in prop::collection::hash_set("[A-C]{1,10}", 1..=5) + ) { + let test_cases_vec = test_cases.iter().cloned().collect::>(); + let regexp = RegExpBuilder::from(&test_cases_vec).without_end_anchor().build(); + if let Ok(compiled_regexp) = compile_regexp(®exp) { + for test_case in test_cases_vec { + let substrings = compiled_regexp.find_iter(&test_case).map(|m| m.as_str()).collect::>(); + prop_assert_eq!( + substrings.len(), + 1, + "expression '{}' does not match test case '{}' entirely but {} of its substrings: {:?}", + regexp, + test_case, + substrings.len(), + substrings + ); + } + } + } + + #[test] + fn matching_regexes_without_anchors( + test_cases in prop::collection::hash_set("[A-C]{1,10}", 1..=5) + ) { + let test_cases_vec = test_cases.iter().cloned().collect::>(); + let regexp = RegExpBuilder::from(&test_cases_vec).without_anchors().build(); + if let Ok(compiled_regexp) = compile_regexp(®exp) { + for test_case in test_cases_vec { + let substrings = compiled_regexp.find_iter(&test_case).map(|m| m.as_str()).collect::>(); + prop_assert_eq!( + substrings.len(), + 1, + "expression '{}' does not match test case '{}' entirely but {} of its substrings: {:?}", + regexp, + test_case, + substrings.len(), + substrings + ); + } + } + } + + #[test] fn regexes_not_matching_other_strings_with_default_settings( - test_cases in prop::collection::hash_set(".{1,20}", 1..=10), - other_strings in prop::collection::hash_set(".{1,20}", 1..=10) + test_cases in prop::collection::hash_set(".{1,10}", 1..=5), + other_strings in prop::collection::hash_set(".{1,10}", 1..=5) ) { if test_cases.is_disjoint(&other_strings) { let test_cases_vec = test_cases.iter().cloned().collect::>(); @@ -251,10 +304,9 @@ proptest! { } #[test] - #[ignore] fn regexes_not_matching_other_strings_with_escape_sequences( - test_cases in prop::collection::hash_set(".{1,20}", 1..=10), - other_strings in prop::collection::hash_set(".{1,20}", 1..=10) + test_cases in prop::collection::hash_set(".{1,10}", 1..=5), + other_strings in prop::collection::hash_set(".{1,10}", 1..=5) ) { if test_cases.is_disjoint(&other_strings) { let test_cases_vec = test_cases.iter().cloned().collect::>();