From 58657e0d1490249f7bbe81c005da568cafcb3316 Mon Sep 17 00:00:00 2001
From: Bingran Hu <bingran.hu@yscope.com>
Date: Sat, 13 Jul 2024 22:25:55 -0400
Subject: [PATCH] Add regex utils including regex to wildcard translation

---
 components/core/CMakeLists.txt                |   3 +
 .../core/src/clp/regex_utils/CMakeLists.txt   |  22 +
 .../core/src/clp/regex_utils/ErrorCode.cpp    |  93 +++
 .../core/src/clp/regex_utils/ErrorCode.hpp    |  46 ++
 .../RegexToWildcardTranslatorConfig.hpp       |  42 ++
 .../core/src/clp/regex_utils/constants.hpp    |  48 ++
 .../core/src/clp/regex_utils/regex_utils.hpp  |  49 ++
 .../clp/regex_utils/regex_utils_anchors.cpp   |  64 ++
 .../regex_utils_regex_to_wildcard.cpp         | 614 ++++++++++++++++++
 components/core/tests/test-regex_utils.cpp    | 297 +++++++++
 10 files changed, 1278 insertions(+)
 create mode 100644 components/core/src/clp/regex_utils/CMakeLists.txt
 create mode 100644 components/core/src/clp/regex_utils/ErrorCode.cpp
 create mode 100644 components/core/src/clp/regex_utils/ErrorCode.hpp
 create mode 100644 components/core/src/clp/regex_utils/RegexToWildcardTranslatorConfig.hpp
 create mode 100644 components/core/src/clp/regex_utils/constants.hpp
 create mode 100644 components/core/src/clp/regex_utils/regex_utils.hpp
 create mode 100644 components/core/src/clp/regex_utils/regex_utils_anchors.cpp
 create mode 100644 components/core/src/clp/regex_utils/regex_utils_regex_to_wildcard.cpp
 create mode 100644 components/core/tests/test-regex_utils.cpp
diff --git a/components/core/CMakeLists.txt b/components/core/CMakeLists.txt
index 7cba49acb..e3d73843a 100644
--- a/components/core/CMakeLists.txt
+++ b/components/core/CMakeLists.txt
@@ -210,6 +210,7 @@ include(cmake/Modules/FindLibraryDependencies.cmake)
 FindDynamicLibraryDependencies(sqlite "${sqlite_DYNAMIC_LIBS}")
 
 add_subdirectory(src/clp/string_utils)
+add_subdirectory(src/clp/regex_utils)
 
 add_subdirectory(src/clp/clg)
 add_subdirectory(src/clp/clo)
@@ -475,6 +476,7 @@ set(SOURCE_FILES_unitTest
         tests/test-Stopwatch.cpp
         tests/test-StreamingCompression.cpp
         tests/test-string_utils.cpp
+        tests/test-regex_utils.cpp
         tests/test-TimestampPattern.cpp
         tests/test-utf8_utils.cpp
         tests/test-Utils.cpp
@@ -498,6 +500,7 @@ target_link_libraries(unitTest
         ${sqlite_LIBRARY_DEPENDENCIES}
         ${STD_FS_LIBS}
         clp::string_utils
+        clp::regex_utils
         yaml-cpp::yaml-cpp
         ZStd::ZStd
         )
diff --git a/components/core/src/clp/regex_utils/CMakeLists.txt b/components/core/src/clp/regex_utils/CMakeLists.txt
new file mode 100644
index 000000000..39a290a9c
--- /dev/null
+++ b/components/core/src/clp/regex_utils/CMakeLists.txt
@@ -0,0 +1,22 @@
+set(
+        REGEX_UTILS_HEADER_LIST
+        "ErrorCode.hpp"
+        "RegexToWildcardTranslatorConfig.hpp"
+        "constants.hpp"
+        "regex_utils.hpp"
+)
+add_library(
+        regex_utils
+        regex_utils_regex_to_wildcard.cpp
+        regex_utils_anchors.cpp
+        ErrorCode.cpp
+        ${REGEX_UTILS_HEADER_LIST}
+)
+add_library(clp::regex_utils ALIAS regex_utils)
+target_include_directories(regex_utils
+        PUBLIC
+        ../
+        PRIVATE
+        "${PROJECT_SOURCE_DIR}/submodules"
+)
+target_compile_features(regex_utils PRIVATE cxx_std_20)
diff --git a/components/core/src/clp/regex_utils/ErrorCode.cpp b/components/core/src/clp/regex_utils/ErrorCode.cpp
new file mode 100644
index 000000000..acc59abeb
--- /dev/null
+++ b/components/core/src/clp/regex_utils/ErrorCode.cpp
@@ -0,0 +1,93 @@
+#include "regex_utils/ErrorCode.hpp"
+
+#include <string>
+#include <string_view>
+#include <system_error>
+
+using std::error_category;
+using std::error_code;
+using std::string;
+using std::string_view;
+
+namespace clp::regex_utils {
+
+/**
+ * Class for giving the error codes more detailed string descriptions.
+ * This class does not need to be seen outside the std error code wrapper implementation.
+ */
+class ErrorCodeCategory : public error_category {
+public:
+    /**
+     * @return The class of errors.
+     */
+    [[nodiscard]] char const* name() const noexcept override;
+
+    /**
+     * @param The error code encoded in int.
+     * @return The descriptive message for the error.
+     */
+    [[nodiscard]] string message(int ev) const override;
+};
+
+auto ErrorCodeCategory::name() const noexcept -> char const* {
+    return "regex utility";
+}
+
+auto ErrorCodeCategory::message(int ev) const -> string {
+    switch (static_cast<ErrorCode>(ev)) {
+        case ErrorCode::Success:
+            return "Success.";
+
+        case ErrorCode::IllegalState:
+            return "Unrecognized state.";
+
+        case ErrorCode::Star:
+            return "Failed to translate due to metachar `*` (zero or more occurences).";
+
+        case ErrorCode::Plus:
+            return "Failed to translate due to metachar `+` (one or more occurences).";
+
+        case ErrorCode::Question:
+            return "Currently does not support returning a list of wildcard translations. The "
+                   "metachar `?` (lazy match) may be supported in the future.";
+
+        case ErrorCode::Pipe:
+            return "Currently does not support returning a list of wildcard translations. The "
+                   "regex OR condition feature may be supported in the future.";
+
+        case ErrorCode::Caret:
+            return "Failed to translate due to start anchor `^` in the middle of the string.";
+
+        case ErrorCode::Dollar:
+            return "Failed to translate due to end anchor `$` in the middle of the string.";
+
+        case ErrorCode::DisallowedEscapeSequence:
+            return "Disallowed escape sequence.";
+
+        case ErrorCode::UnmatchedParenthesis:
+            return "Unmatched opening `(` or closing `)`.";
+
+        case ErrorCode::UnsupportedCharsets:
+            return "Currently only supports case-insensitive single-char charset (i.e. [aA] [bB]).";
+
+        case ErrorCode::IncompleteCharsetStructure:
+            return "Unmatched closing `]` at the end of the string.";
+
+        case ErrorCode::UnsupportedQuantifier:
+            return "Currently only supports exact positive number of repetitions in regex syntax.";
+
+        case ErrorCode::TokenUnquantifiable:
+            return "The preceding token is not quantifiable.";
+
+        default:
+            return "(unrecognized error)";
+    }
+}
+
+ErrorCodeCategory const cTheErrorCodeCategory{};
+
+auto make_error_code(ErrorCode e) -> error_code {
+    return {static_cast<int>(e), cTheErrorCodeCategory};
+}
+
+}  // namespace clp::regex_utils
diff --git a/components/core/src/clp/regex_utils/ErrorCode.hpp b/components/core/src/clp/regex_utils/ErrorCode.hpp
new file mode 100644
index 000000000..4fa9204fc
--- /dev/null
+++ b/components/core/src/clp/regex_utils/ErrorCode.hpp
@@ -0,0 +1,46 @@
+#ifndef CLP_REGEX_UTILS_ERRORCODE_HPP
+#define CLP_REGEX_UTILS_ERRORCODE_HPP
+
+#include <cstdint>
+#include <system_error>
+#include <type_traits>
+
+namespace clp::regex_utils {
+
+/**
+ * Enum class for propagating and handling various regex utility errors.
+ * More detailed descriptions can be found in ErrorCode.cpp.
+ */
+enum class ErrorCode : uint8_t {
+    Success = 0,
+    IllegalState,
+    Star,
+    Plus,
+    Question,
+    Pipe,
+    Caret,
+    Dollar,
+    DisallowedEscapeSequence,
+    UnmatchedParenthesis,
+    UnsupportedCharsets,
+    IncompleteCharsetStructure,
+    UnsupportedQuantifier,
+    TokenUnquantifiable,
+};
+
+/**
+ * Wrapper function to turn a regular enum class into an std::error_code.
+ *
+ * @param An error code enum.
+ * @return The corresponding std::error_code type variable.
+ */
+[[nodiscard]] auto make_error_code(ErrorCode ec) -> std::error_code;
+
+}  // namespace clp::regex_utils
+
+namespace std {
+template <>
+struct is_error_code_enum<clp::regex_utils::ErrorCode> : true_type {};
+}  // namespace std
+
+#endif  // CLP_REGEX_UTILS_ERRORCODE_HPP
diff --git a/components/core/src/clp/regex_utils/RegexToWildcardTranslatorConfig.hpp b/components/core/src/clp/regex_utils/RegexToWildcardTranslatorConfig.hpp
new file mode 100644
index 000000000..379b327e5
--- /dev/null
+++ b/components/core/src/clp/regex_utils/RegexToWildcardTranslatorConfig.hpp
@@ -0,0 +1,42 @@
+#ifndef CLP_REGEX_UTILS_REGEXTOWILDCARDTRANSLATORCONFIG_HPP
+#define CLP_REGEX_UTILS_REGEXTOWILDCARDTRANSLATORCONFIG_HPP
+
+namespace clp::regex_utils {
+
+class RegexToWildcardTranslatorConfig {
+public:
+    // Constructors
+    RegexToWildcardTranslatorConfig() = default;
+
+    // Getters
+    [[nodiscard]] auto case_insensitive_wildcard() const -> bool {
+        return m_case_insensitive_wildcard;
+    }
+
+    [[nodiscard]] auto allow_anchors() const -> bool { return m_allow_anchors; }
+
+    [[nodiscard]] auto add_prefix_suffix_wildcards() const -> bool {
+        return m_add_prefix_suffix_wildcards;
+    }
+
+    // Setters
+    void set_case_insensitive_wildcard(bool case_insensitive_wildcard) {
+        m_case_insensitive_wildcard = case_insensitive_wildcard;
+    }
+
+    void set_allow_anchors(bool allow_anchors) { m_allow_anchors = allow_anchors; }
+
+    void set_add_prefix_suffix_wildcards(bool add_prefix_suffix_wildcards) {
+        m_add_prefix_suffix_wildcards = add_prefix_suffix_wildcards;
+    }
+
+private:
+    // Variables
+    bool m_case_insensitive_wildcard = false;
+    bool m_allow_anchors = true;
+    bool m_add_prefix_suffix_wildcards = false;
+};
+
+}  // namespace clp::regex_utils
+
+#endif  // CLP_REGEX_UTILS_REGEXTOWILDCARDTRANSLATORCONFIG_HPP
diff --git a/components/core/src/clp/regex_utils/constants.hpp b/components/core/src/clp/regex_utils/constants.hpp
new file mode 100644
index 000000000..e05ccfe83
--- /dev/null
+++ b/components/core/src/clp/regex_utils/constants.hpp
@@ -0,0 +1,48 @@
+#ifndef CLP_REGEX_UTILS_CONSTANTS_HPP
+#define CLP_REGEX_UTILS_CONSTANTS_HPP
+
+#include <array>
+#include <cstddef>
+#include <string_view>
+
+namespace clp::regex_utils {
+
+constexpr size_t cCharBitarraySize = 128;
+
+/**
+ * Create an ASCII character lookup table (bit array) at compile time.
+ *
+ * @param char_str A string that contains the characters to look up.
+ * @return The lookup table as bit array
+ */
+[[nodiscard]] constexpr auto create_char_bit_array(std::string_view char_str
+) -> std::array<bool, cCharBitarraySize> {
+    std::array<bool, cCharBitarraySize> bit_array{};
+    bit_array.fill(false);
+    for (char const ch : char_str) {
+        bit_array.at(ch) = true;
+    }
+    return bit_array;
+}
+
+constexpr char cZeroOrMoreCharsWildcard{'*'};
+constexpr char cSingleCharWildcard{'?'};
+constexpr char cRegexZeroOrMore{'*'};
+constexpr char cRegexOneOrMore{'+'};
+constexpr char cRegexZeroOrOne{'+'};
+constexpr char cRegexStartAnchor{'^'};
+constexpr char cRegexEndAnchor{'$'};
+constexpr char cEscapeChar{'\\'};
+constexpr char cCharsetNegate{'^'};
+
+// This is a more complete set of meta characters than necessary, as the user might not be fully
+// knowledgeable on which meta characters to escape, and may introduce unnecessary escape sequences.
+constexpr auto cRegexEscapeSeqAcceptedMetaChars = create_char_bit_array("^$.*{}[]()+|?<>-_/=!\\");
+// This is the set of meta characters that need escaping in the wildcard syntax.
+constexpr auto cRegexEscapeSeqWildcardOnlyMetaChars = create_char_bit_array("?*\\");
+// This is the set of meta characters that need escaping in the character set.
+constexpr auto cRegexCharsetEscapeSeqMetaChars = create_char_bit_array("^-]\\");
+
+}  // namespace clp::regex_utils
+
+#endif  // CLP_REGEX_UTILS_CONSTANTS_HPP
diff --git a/components/core/src/clp/regex_utils/regex_utils.hpp b/components/core/src/clp/regex_utils/regex_utils.hpp
new file mode 100644
index 000000000..2d1bf43f0
--- /dev/null
+++ b/components/core/src/clp/regex_utils/regex_utils.hpp
@@ -0,0 +1,49 @@
+#ifndef CLP_REGEX_UTILS_REGEX_UTILS_HPP
+#define CLP_REGEX_UTILS_REGEX_UTILS_HPP
+
+#include <string>
+#include <string_view>
+
+#include <boost-outcome/include/boost/outcome/config.hpp>
+#include <boost-outcome/include/boost/outcome/std_result.hpp>
+
+#include "regex_utils/RegexToWildcardTranslatorConfig.hpp"
+
+namespace clp::regex_utils {
+
+[[nodiscard]] auto regex_to_wildcard(std::string_view regex_str
+) -> BOOST_OUTCOME_V2_NAMESPACE::std_result<std::string>;
+
+[[nodiscard]] auto regex_to_wildcard(
+        std::string_view regex_str,
+        RegexToWildcardTranslatorConfig const& config
+) -> BOOST_OUTCOME_V2_NAMESPACE::std_result<std::string>;
+
+/**
+ * If a regex expression contains multiple starting or ending anchors, remove the duplicates.
+ *
+ * @param regex_str
+ * @return Trimmed the regex string, leaving at most one starting or ending anchor.
+ */
+[[nodiscard]] auto regex_trim_line_anchors(std::string_view regex_str) -> std::string;
+
+/**
+ * Check if a regex string has a starting anchor character `^` (caret).
+ *
+ * @param regex_str
+ * @return True if the regex string begins with `^`, false otherwise.
+ */
+[[nodiscard]] auto regex_has_start_anchor(std::string_view regex_str) -> bool;
+
+/**
+ * Check if a regex string has an ending anchor character `$` (dollar sign).
+ * Note that the regex string may end with an escaped `$`, in which case the `$` character retain
+ * its literal meaning.
+ *
+ * @param regex_str
+ * @return True if the regex string ends with an unescaped `$`, false otherwise.
+ */
+[[nodiscard]] auto regex_has_end_anchor(std::string_view regex_str) -> bool;
+}  // namespace clp::regex_utils
+
+#endif  // CLP_REGEX_UTILS_REGEX_UTILS_HPP
diff --git a/components/core/src/clp/regex_utils/regex_utils_anchors.cpp b/components/core/src/clp/regex_utils/regex_utils_anchors.cpp
new file mode 100644
index 000000000..a204a3cfc
--- /dev/null
+++ b/components/core/src/clp/regex_utils/regex_utils_anchors.cpp
@@ -0,0 +1,64 @@
+#include <string>
+#include <string_view>
+
+#include "regex_utils/constants.hpp"
+#include "regex_utils/regex_utils.hpp"
+
+using std::string;
+using std::string_view;
+
+namespace clp::regex_utils {
+
+auto regex_trim_line_anchors(string_view regex_str) -> string {
+    string_view::const_iterator left(regex_str.begin());
+    string_view::const_iterator right(regex_str.end());
+
+    // Find the position of the first non-caret character
+    while (left != right && cRegexStartAnchor == *left) {
+        ++left;
+    }
+    // Backtrack one char to include at least one start anchor, if there was any.
+    if (left != regex_str.begin()) {
+        --left;
+    }
+
+    // Find the position of the last non-dollar-sign character
+    while (left != right && cRegexEndAnchor == *(right - 1)) {
+        --right;
+    }
+    if (left != right && right != regex_str.end()) {
+        // There was at least one end anchor so we include it by advancing one char
+        ++right;
+    }
+
+    // If there was more than one end anchor, we need to check if the current end anchor is escaped.
+    // If so, it's not a real end anchor, and we need to advance the end position once more to
+    // append a real end anchor.
+    string trimmed_regex_str(left, right);
+    if (right != regex_str.end() && !regex_has_end_anchor(trimmed_regex_str)) {
+        trimmed_regex_str += cRegexEndAnchor;
+    }
+    return trimmed_regex_str;
+}
+
+auto regex_has_start_anchor(string_view regex_str) -> bool {
+    return !regex_str.empty() && cRegexStartAnchor == regex_str.at(0);
+}
+
+auto regex_has_end_anchor(string_view regex_str) -> bool {
+    auto it{regex_str.rbegin()};
+    if (it == regex_str.rend() || cRegexEndAnchor != *it) {
+        return false;
+    }
+
+    // Check that ending regex dollar sigh char is unescaped.
+    // We need to scan the suffix until we encounter a character that is not an
+    // escape char, since escape chars can escape themselves.
+    bool escaped{false};
+    for (++it; it != regex_str.rend() && cEscapeChar == *it; ++it) {
+        escaped = !escaped;
+    }
+    return !escaped;
+}
+
+}  // namespace clp::regex_utils
diff --git a/components/core/src/clp/regex_utils/regex_utils_regex_to_wildcard.cpp b/components/core/src/clp/regex_utils/regex_utils_regex_to_wildcard.cpp
new file mode 100644
index 000000000..5435c9ab6
--- /dev/null
+++ b/components/core/src/clp/regex_utils/regex_utils_regex_to_wildcard.cpp
@@ -0,0 +1,614 @@
+#include <cstddef>
+#include <cstdint>
+#include <string>
+#include <string_view>
+#include <system_error>
+#include <utility>
+#include <variant>
+
+#include <boost-outcome/include/boost/outcome/config.hpp>
+#include <boost-outcome/include/boost/outcome/std_result.hpp>
+#include <string_utils/string_utils.hpp>
+
+#include "regex_utils/constants.hpp"
+#include "regex_utils/ErrorCode.hpp"
+#include "regex_utils/regex_utils.hpp"
+#include "regex_utils/RegexToWildcardTranslatorConfig.hpp"
+
+using clp::string_utils::is_alphabet;
+using clp::string_utils::is_decimal_digit;
+using std::error_code;
+using std::get;
+using std::make_pair;
+using std::monostate;
+using std::pair;
+using std::string;
+using std::string_view;
+using std::variant;
+
+namespace clp::regex_utils {
+
+/**
+ * Class for storing regex translation config, states, capture group and quantifier information.
+ */
+class TranslatorState {
+public:
+    enum class RegexPatternState : uint8_t {
+        // The initial state, where characters have no special meanings and are treated literally.
+        NORMAL = 0,
+        // Encountered a period `.`. Expecting wildcard expression.
+        DOT,
+        // Encountered a backslash `\`, used to suppress special meanings of regex meta characters.
+        ESCAPED,
+        // Enclosed by parenthesis `()`, used to specify a capture group.
+        GROUP,
+        // Encountered a backslash `\` in the capture group.
+        GROUPESCAPED,
+        // Enclosed by square brackets `[]`, used to specify a character set.
+        CHARSET,
+        // Encountered a backslash `\` in the character set..
+        CHARSETESCAPED,
+        // Enclosed by curly brackets `{}`, used to specify a quantity to repeat.
+        QUANTIFIER,
+        // Encountered a dollar sign `$`, meaning the regex string has reached the end anchor.
+        END,
+    };
+
+    // Constructor
+    TranslatorState(RegexToWildcardTranslatorConfig const& config, string_view regex_str)
+            : m_config(config),
+              m_it(regex_str.begin()) {}
+
+    // Getters
+    [[nodiscard]] auto get_config() const -> RegexToWildcardTranslatorConfig const& {
+        return m_config;
+    }
+
+    [[nodiscard]] auto get_state() const -> RegexPatternState const& { return m_state; }
+
+    [[nodiscard]] auto get_marked_iterator() const -> string_view::const_iterator const& {
+        return m_it;
+    }
+
+    [[nodiscard]] auto get_preceding_token(
+    ) const -> BOOST_OUTCOME_V2_NAMESPACE::std_result<string>;
+    [[nodiscard]] auto get_quantifier() const -> BOOST_OUTCOME_V2_NAMESPACE::std_result<size_t>;
+
+    [[nodiscard]] auto get_quantifier_as_str() const -> string { return m_quantifier_str; }
+
+    [[nodiscard]] auto quantifier_number_start() const -> bool {
+        return m_quantifier_str.empty() || ',' == m_quantifier_str.back();
+    }
+
+    // Setters
+    void set_next_state(RegexPatternState const& state) { m_state = state; }
+
+    void mark_iterator(string_view::const_iterator const& it) { m_it = it; }
+
+    void invalidate_preceding_token() { m_preceding_token = monostate{}; }
+
+    void set_preceding_token(char ch) { m_preceding_token = ch; }
+
+    void set_preceding_token(string const& s) { m_preceding_token = s; }
+
+    void reset_quantifiers() {
+        m_quantifier = size_t{0};
+        m_quantifier_str.clear();
+    }
+
+    void add_to_quantifier(char ch);
+
+    void switch_to_second_quantifier() {
+        m_quantifier = make_pair(get<size_t>(m_quantifier), 0);
+        m_quantifier_str += ',';
+    }
+
+    void inc_nested_group_count() { ++m_nested_group_count; }
+
+    [[nodiscard]] auto dec_nested_group_count() -> BOOST_OUTCOME_V2_NAMESPACE::std_result<size_t>;
+
+private:
+    // Variables
+    RegexToWildcardTranslatorConfig m_config;
+    RegexPatternState m_state = RegexPatternState::NORMAL;
+    string_view::const_iterator m_it;
+    variant<monostate, char, string> m_preceding_token;
+    variant<size_t, pair<size_t, size_t>> m_quantifier;
+    string m_quantifier_str;
+    size_t m_nested_group_count = 0;
+};
+
+auto TranslatorState::get_preceding_token(
+) const -> BOOST_OUTCOME_V2_NAMESPACE::std_result<string> {
+    switch (m_preceding_token.index()) {
+        case 0:
+            return ErrorCode::TokenUnquantifiable;
+        case 1:
+            return string{get<char>(m_preceding_token)};
+        case 2:
+            return get<string>(m_preceding_token);
+        default:
+            return ErrorCode::IllegalState;
+    }
+}
+
+auto TranslatorState::get_quantifier() const -> BOOST_OUTCOME_V2_NAMESPACE::std_result<size_t> {
+    switch (m_quantifier.index()) {
+        case 0:
+            return get<size_t>(m_quantifier);
+        case 1:
+            // Maybe we can support a ranged pair of quantifiers in the future
+            return ErrorCode::UnsupportedQuantifier;
+        default:
+            return ErrorCode::IllegalState;
+    }
+}
+
+void TranslatorState::add_to_quantifier(char ch) {
+    int const num{ch - '0'};
+    int const base = 10;
+    switch (m_quantifier.index()) {
+        case 0:
+            m_quantifier = get<0>(m_quantifier) * base + num;
+            break;
+        case 1:
+            get<1>(m_quantifier).second = get<1>(m_quantifier).second * base + num;
+            break;
+        default:
+            break;
+    }
+    m_quantifier_str += ch;
+}
+
+auto TranslatorState::dec_nested_group_count() -> BOOST_OUTCOME_V2_NAMESPACE::std_result<size_t> {
+    if (0 == m_nested_group_count) {
+        return ErrorCode::UnmatchedParenthesis;
+    }
+    --m_nested_group_count;
+    return m_nested_group_count;
+}
+
+// State transition functions common signature
+// typedef [[nodiscard]] auto
+// StateTransitionFunc(TranslatorState&, string_view::const_iterator&, string&) -> error_code;
+
+using StateTransitionFunc
+        = auto(TranslatorState&, string_view::const_iterator&, string&) -> error_code;
+
+// State transition functions
+[[nodiscard]] StateTransitionFunc normal_state_transition;
+[[nodiscard]] StateTransitionFunc dot_state_transition;
+[[nodiscard]] StateTransitionFunc escaped_state_transition;
+[[nodiscard]] StateTransitionFunc group_state_transition;
+[[nodiscard]] StateTransitionFunc group_escaped_state_transition;
+[[nodiscard]] StateTransitionFunc charset_state_transition;
+[[nodiscard]] StateTransitionFunc charset_escaped_state_transition;
+[[nodiscard]] StateTransitionFunc quantifier_state_transition;
+[[nodiscard]] StateTransitionFunc end_state_transition;
+[[nodiscard]] StateTransitionFunc final_state_cleanup;
+
+// Helper function
+void append_incomplete_quantifier_structure(TranslatorState& state, string& wildcard_str);
+[[nodiscard]] auto matching_upper_lower_case_char_pair(char ch0, char ch1) -> bool;
+
+// Main API
+auto regex_to_wildcard(string_view regex_str) -> BOOST_OUTCOME_V2_NAMESPACE::std_result<string> {
+    RegexToWildcardTranslatorConfig const default_config{};
+    return regex_to_wildcard(regex_str, default_config);
+}
+
+auto regex_to_wildcard(string_view regex_str, RegexToWildcardTranslatorConfig const& config)
+        -> BOOST_OUTCOME_V2_NAMESPACE::std_result<string> {
+    if (regex_str.empty()) {
+        return string();
+    }
+
+    // Initialize translation state, scan position, and return string
+    TranslatorState state{config, regex_str};
+    string_view::const_iterator it = regex_str.cbegin();
+    string wildcard_str;
+
+    // If there is no starting anchor character, append multichar wildcard prefix
+    if (cRegexStartAnchor == *it) {
+        if (config.allow_anchors()) {
+            ++it;
+        } else {
+            return ErrorCode::Caret;
+        }
+    } else if (config.add_prefix_suffix_wildcards()) {
+        wildcard_str += cZeroOrMoreCharsWildcard;
+    }
+
+    error_code ec{};
+    while (it != regex_str.end()) {
+        switch (state.get_state()) {
+            case TranslatorState::RegexPatternState::NORMAL:
+                ec = normal_state_transition(state, it, wildcard_str);
+                break;
+            case TranslatorState::RegexPatternState::DOT:
+                ec = dot_state_transition(state, it, wildcard_str);
+                break;
+            case TranslatorState::RegexPatternState::ESCAPED:
+                ec = escaped_state_transition(state, it, wildcard_str);
+                break;
+            case TranslatorState::RegexPatternState::GROUP:
+                ec = group_state_transition(state, it, wildcard_str);
+                break;
+            case TranslatorState::RegexPatternState::GROUPESCAPED:
+                ec = group_escaped_state_transition(state, it, wildcard_str);
+                break;
+            case TranslatorState::RegexPatternState::CHARSET:
+                ec = charset_state_transition(state, it, wildcard_str);
+                break;
+            case TranslatorState::RegexPatternState::CHARSETESCAPED:
+                ec = charset_escaped_state_transition(state, it, wildcard_str);
+                break;
+            case TranslatorState::RegexPatternState::QUANTIFIER:
+                ec = quantifier_state_transition(state, it, wildcard_str);
+                break;
+            case TranslatorState::RegexPatternState::END:
+                ec = end_state_transition(state, it, wildcard_str);
+                break;
+            default:
+                ec = ErrorCode::IllegalState;
+                break;
+        }
+
+        if (ec) {
+            return ec;
+        }
+        ++it;
+    }
+
+    // Do the final state check and clean up
+    ec = final_state_cleanup(state, it, wildcard_str);
+    if (ec) {
+        return ec;
+    }
+
+    return wildcard_str;
+}
+
+auto normal_state_transition(
+        TranslatorState& state,
+        string_view::const_iterator& it,
+        string& wildcard_str
+) -> error_code {
+    char const ch = *it;
+    auto const& config = state.get_config();
+    switch (ch) {
+        case '.':
+            state.set_next_state(TranslatorState::RegexPatternState::DOT);
+            break;
+        case cEscapeChar:
+            state.set_next_state(TranslatorState::RegexPatternState::ESCAPED);
+            break;
+        case '(':
+            state.inc_nested_group_count();
+            state.mark_iterator(it + 1);  // Mark the beginning of group expression
+            state.set_next_state(TranslatorState::RegexPatternState::GROUP);
+            break;
+        case '[':
+            state.mark_iterator(it + 1);  // Mark the beginning of charset expression
+            state.set_next_state(TranslatorState::RegexPatternState::CHARSET);
+            break;
+        case '{':
+            state.reset_quantifiers();
+            state.set_next_state(TranslatorState::RegexPatternState::QUANTIFIER);
+            break;
+        case cRegexEndAnchor:
+            if (!config.allow_anchors()) {
+                return ErrorCode::Dollar;
+            }
+            state.set_next_state(TranslatorState::RegexPatternState::END);
+            break;
+        case '*':
+            return ErrorCode::Star;
+        case '+':
+            return ErrorCode::Plus;
+        case '?':
+            return ErrorCode::Question;
+        case '|':
+            return ErrorCode::Pipe;
+        case cRegexStartAnchor:
+            return ErrorCode::Caret;
+        case ')':
+            return ErrorCode::UnmatchedParenthesis;
+        default:
+            wildcard_str += ch;
+            state.set_preceding_token(ch);
+            break;
+    }
+    return ErrorCode::Success;
+}
+
+auto dot_state_transition(
+        TranslatorState& state,
+        string_view::const_iterator& it,
+        string& wildcard_str
+) -> error_code {
+    switch (*it) {
+        case '*':
+            // .* gets translated to *
+            wildcard_str += cZeroOrMoreCharsWildcard;
+            state.invalidate_preceding_token();
+            break;
+        case '+':
+            // .+ gets translated to ?*
+            wildcard_str = wildcard_str + cSingleCharWildcard + cZeroOrMoreCharsWildcard;
+            state.invalidate_preceding_token();
+            break;
+        default:
+            // . gets translated to ?
+            wildcard_str += cSingleCharWildcard;
+            state.set_preceding_token(cSingleCharWildcard);
+            // Backtrack the scan by one position to handle the current char in the next iteration.
+            --it;
+            break;
+    }
+    state.set_next_state(TranslatorState::RegexPatternState::NORMAL);
+    return ErrorCode::Success;
+}
+
+auto escaped_state_transition(
+        TranslatorState& state,
+        string_view::const_iterator& it,
+        string& wildcard_str
+) -> error_code {
+    char const ch = *it;
+    if (!cRegexEscapeSeqAcceptedMetaChars.at(ch)) {
+        return ErrorCode::DisallowedEscapeSequence;
+    }
+    if (cRegexEscapeSeqWildcardOnlyMetaChars.at(ch)) {
+        // Need to keep the backslash for characters that are special in the wildcard syntax too
+        string const escape_seq = string{cEscapeChar} + ch;
+        wildcard_str += escape_seq;
+        state.set_preceding_token(escape_seq);
+    } else {
+        wildcard_str += ch;
+        state.set_preceding_token(ch);
+    }
+    state.set_next_state(TranslatorState::RegexPatternState::NORMAL);
+    return ErrorCode::Success;
+}
+
+auto group_state_transition(
+        TranslatorState& state,
+        string_view::const_iterator& it,
+        string& wildcard_str
+) -> error_code {
+    char const ch = *it;
+    if (cEscapeChar == ch) {
+        state.set_next_state(TranslatorState::RegexPatternState::GROUPESCAPED);
+        return ErrorCode::Success;
+    }
+    // TODO: make the group unrolling iterative
+    if ('(' == ch) {
+        state.inc_nested_group_count();
+        return ErrorCode::Success;
+    }
+    if (')' != ch) {
+        return ErrorCode::Success;
+    }
+    auto num_nested_group = state.dec_nested_group_count();
+    if (num_nested_group.has_error()) {
+        return num_nested_group.error();
+    }
+    if (num_nested_group.value() > 0) {
+        // Still within nested group
+        return ErrorCode::Success;
+    }
+
+    // End of group: translate the captured group expression.
+    // capture group should not enable anchors or prefix/suffix wildcards.
+    string const captured_group(state.get_marked_iterator(), it);
+    auto config{state.get_config()};
+    config.set_allow_anchors(false);
+    config.set_add_prefix_suffix_wildcards(false);
+
+    // Perform translation
+    auto translated_group = regex_to_wildcard(captured_group, config);
+    if (translated_group.has_error()) {
+        return translated_group.error();
+    }
+
+    wildcard_str += translated_group.value();
+    state.set_preceding_token(translated_group.value());
+    state.set_next_state(TranslatorState::RegexPatternState::NORMAL);
+    return ErrorCode::Success;
+}
+
+auto group_escaped_state_transition(
+        TranslatorState& state,
+        string_view::const_iterator& /*it*/,
+        string& /*wildcard_str*/
+) -> error_code {
+    // Defer the handling of escape sequences to entire capture group translation.
+    state.set_next_state(TranslatorState::RegexPatternState::GROUP);
+    return ErrorCode::Success;
+}
+
+auto charset_state_transition(
+        TranslatorState& state,
+        string_view::const_iterator& it,
+        string& wildcard_str
+) -> error_code {
+    char const ch = *it;
+    string_view::const_iterator const& charset_start = state.get_marked_iterator();
+    size_t const charset_len = it - charset_start;
+    if (cEscapeChar == ch) {
+        state.set_next_state(TranslatorState::RegexPatternState::CHARSETESCAPED);
+        return ErrorCode::Success;
+    }
+    if (charset_len > 2) {
+        // Short circuit: the currently accepted charset is at most 2-char long.
+        return ErrorCode::UnsupportedCharsets;
+    }
+    if (']' != ch) {
+        return ErrorCode::Success;
+    }
+    if (0 == charset_len) {
+        // Empty charset
+        return ErrorCode::UnsupportedCharsets;
+    }
+
+    // End of charset: perform analysis on accepted charset patterns.
+    char const ch0 = *charset_start;
+    char const ch1 = *(charset_start + 1);
+    auto config{state.get_config()};
+    char parsed_char{};
+
+    if (1 == charset_len) {
+        if (cCharsetNegate == ch0 || cEscapeChar == ch0) {
+            return ErrorCode::UnsupportedCharsets;
+        }
+        parsed_char = ch0;
+    } else {  // 2 == charset_len
+        if (cEscapeChar == ch0 && cRegexCharsetEscapeSeqMetaChars.at(ch1)) {
+            // 2-char escape sequence
+            parsed_char = ch1;
+        } else if (config.case_insensitive_wildcard()
+                   && matching_upper_lower_case_char_pair(ch0, ch1))
+        {
+            // case-insensitive patterns like [aA] [Bb] etc.
+            parsed_char = ch0 > ch1 ? ch0 : ch1;  // Get the lower case char
+        } else {
+            return ErrorCode::UnsupportedCharsets;
+        }
+    }
+
+    // Add the parsed character to the string
+    if (cRegexEscapeSeqWildcardOnlyMetaChars.at(parsed_char)) {
+        auto escaped_char = string{cEscapeChar} + parsed_char;
+        wildcard_str += escaped_char;
+        state.set_preceding_token(escaped_char);
+    } else {
+        wildcard_str += parsed_char;
+        state.set_preceding_token(parsed_char);
+    }
+    state.set_next_state(TranslatorState::RegexPatternState::NORMAL);
+    return ErrorCode::Success;
+}
+
+auto matching_upper_lower_case_char_pair(char ch0, char ch1) -> bool {
+    int const upper_lower_case_ascii_offset = 'a' - 'A';
+    return (is_alphabet(ch0) && is_alphabet(ch1)
+            && ((ch0 - ch1 == upper_lower_case_ascii_offset)
+                || (ch1 - ch0 == upper_lower_case_ascii_offset)));
+}
+
+auto charset_escaped_state_transition(
+        TranslatorState& state,
+        string_view::const_iterator& /*it*/,
+        string& /*wildcard_str*/
+) -> error_code {
+    // Defer the handling of escape sequences to entire character set analysis..
+    state.set_next_state(TranslatorState::RegexPatternState::CHARSET);
+    return ErrorCode::Success;
+}
+
+auto quantifier_state_transition(
+        TranslatorState& state,
+        string_view::const_iterator& it,
+        string& wildcard_str
+) -> error_code {
+    char const ch = *it;
+    if ('-' == ch && state.quantifier_number_start()) {
+        // Disallow negative quantifiers
+        return ErrorCode::UnsupportedQuantifier;
+    }
+    if (',' == ch) {
+        // Expecting a pair of quantifiers
+        state.switch_to_second_quantifier();
+    } else if (is_decimal_digit(ch)) {
+        // Is a regular decimal digit
+        state.add_to_quantifier(ch);
+    } else if ('}' != ch) {
+        // Invalid quantifier syntax. In such case, the special meaning of `(` is suppressed.
+        // So far we've only seen opening bracket/digits/comma, so append directly.
+        append_incomplete_quantifier_structure(state, wildcard_str);
+        // Backtrack the scan by one position to handle the current char in the next iteration.
+        --it;
+        state.set_next_state(TranslatorState::RegexPatternState::NORMAL);
+    } else {
+        // Quantifier expression complete. Perform repetition
+        auto quantifier = state.get_quantifier();
+        if (quantifier.has_error()) {
+            return quantifier.error();
+        }
+        auto prev_token = state.get_preceding_token();
+        if (prev_token.has_error()) {
+            return prev_token.error();
+        }
+
+        size_t const q_val = quantifier.value();
+        string const token = prev_token.value();
+        if (0 == q_val) {
+            // Zero repetition removes the token from the string
+            wildcard_str.erase(wildcard_str.length() - token.length());
+        } else {
+            // Repeat the token for n-1 times
+            for (size_t i{1}; i < q_val; ++i) {
+                wildcard_str += token;
+            }
+        }
+        // Compound repetition is not allowed.
+        state.invalidate_preceding_token();
+        state.set_next_state(TranslatorState::RegexPatternState::NORMAL);
+    }
+    return ErrorCode::Success;
+}
+
+auto end_state_transition(
+        TranslatorState& /*state*/,
+        string_view::const_iterator& it,
+        string& /*wildcard_str*/
+) -> error_code {
+    if (cRegexEndAnchor != *it) {
+        return ErrorCode::Dollar;
+    }
+    return ErrorCode::Success;
+}
+
+auto final_state_cleanup(
+        TranslatorState& state,
+        string_view::const_iterator& /*it*/,
+        string& wildcard_str
+) -> error_code {
+    switch (state.get_state()) {
+        case TranslatorState::RegexPatternState::DOT:
+            // The last character is a single `.`, without the possibility of becoming a
+            // multichar wildcard
+            wildcard_str += cSingleCharWildcard;
+            break;
+        case TranslatorState::RegexPatternState::ESCAPED:
+            return ErrorCode::DisallowedEscapeSequence;
+        case TranslatorState::RegexPatternState::GROUP:
+        case TranslatorState::RegexPatternState::GROUPESCAPED:
+            return ErrorCode::UnmatchedParenthesis;
+        case TranslatorState::RegexPatternState::CHARSET:
+            return ErrorCode::IncompleteCharsetStructure;
+        case TranslatorState::RegexPatternState::QUANTIFIER:
+            append_incomplete_quantifier_structure(state, wildcard_str);
+            break;
+        default:
+            break;
+    }
+
+    auto const& config = state.get_config();
+    if (TranslatorState::RegexPatternState::END != state.get_state()
+        && config.add_prefix_suffix_wildcards())
+    {
+        wildcard_str += cZeroOrMoreCharsWildcard;
+    }
+    return ErrorCode::Success;
+}
+
+void append_incomplete_quantifier_structure(TranslatorState& state, string& wildcard_str) {
+    // Invalid quantifier syntax. So far we've only seen digits/comma so append directly.
+    string const invalid_quantifier_str = string{'{'} + state.get_quantifier_as_str();
+    wildcard_str += invalid_quantifier_str;
+    state.set_preceding_token(invalid_quantifier_str.back());
+}
+
+}  // namespace clp::regex_utils
diff --git a/components/core/tests/test-regex_utils.cpp b/components/core/tests/test-regex_utils.cpp
new file mode 100644
index 000000000..5e02c2fb0
--- /dev/null
+++ b/components/core/tests/test-regex_utils.cpp
@@ -0,0 +1,297 @@
+#include <regex_utils/ErrorCode.hpp>
+#include <regex_utils/regex_utils.hpp>
+#include <regex_utils/RegexToWildcardTranslatorConfig.hpp>
+
+#include <Catch2/single_include/catch2/catch.hpp>
+
+using clp::regex_utils::regex_has_end_anchor;
+using clp::regex_utils::regex_has_start_anchor;
+using clp::regex_utils::regex_to_wildcard;
+using clp::regex_utils::regex_trim_line_anchors;
+
+TEST_CASE("regex_to_wildcard", "[regex_utils][regex_to_wildcard]") {
+    // Test empty string
+    REQUIRE(regex_to_wildcard("").value().empty());
+
+    // Test simple wildcard translations
+    REQUIRE((regex_to_wildcard("^xyz$").value() == "xyz"));
+    REQUIRE((regex_to_wildcard("xyz").value() == "xyz"));
+    REQUIRE((regex_to_wildcard(". xyz .* zyx .").value() == "? xyz * zyx ?"));
+    REQUIRE((regex_to_wildcard(". xyz .+ zyx .*").value() == "? xyz ?* zyx *"));
+
+    // Test unescaped meta characters
+    REQUIRE((regex_to_wildcard(".? xyz .* zyx .").error() == clp::regex_utils::ErrorCode::Question)
+    );
+    REQUIRE((regex_to_wildcard(". xyz .** zyx .").error() == clp::regex_utils::ErrorCode::Star));
+    REQUIRE((regex_to_wildcard(". xyz .*+ zyx .").error() == clp::regex_utils::ErrorCode::Plus));
+    REQUIRE((regex_to_wildcard(". xyz |.* zyx .").error() == clp::regex_utils::ErrorCode::Pipe));
+    REQUIRE((regex_to_wildcard(". xyz ^.* zyx .").error() == clp::regex_utils::ErrorCode::Caret));
+
+    // Test properly escaped meta characters
+    REQUIRE(
+            (regex_to_wildcard("\\^\\$\\.\\*\\{\\}\\[\\]\\(\\)\\+\\|\\?\\<\\>\\-\\_\\/\\=\\!\\\\")
+                     .value()
+             == "^$.\\*{}[]()+|\\?<>-_/=!\\\\")
+    );
+    REQUIRE(
+            (regex_to_wildcard("abc\\Qdefghi\\Ejkl").error()
+             == clp::regex_utils::ErrorCode::DisallowedEscapeSequence)
+    );
+
+    // Test quantifiers
+    REQUIRE((regex_to_wildcard("abc{3}").value() == "abccc"));
+    REQUIRE((regex_to_wildcard("abc{4}").value() == "abcccc"));
+    REQUIRE((regex_to_wildcard("abc{0}").value() == "ab"));
+    REQUIRE((regex_to_wildcard("abc.{4}").value() == "abc????"));
+    REQUIRE((regex_to_wildcard("abc\\[{4}").value() == "abc[[[["));
+    REQUIRE((regex_to_wildcard("abc\\^{4}").value() == "abc^^^^"));
+    REQUIRE((regex_to_wildcard("abc\\*{4}").value() == "abc\\*\\*\\*\\*"));
+    REQUIRE((regex_to_wildcard("abc\\?{4}").value() == "abc\\?\\?\\?\\?"));
+    REQUIRE((regex_to_wildcard("abc{123").value() == "abc{123"));
+    REQUIRE((regex_to_wildcard("abc{123,456").value() == "abc{123,456"));
+    REQUIRE((regex_to_wildcard("abc{00123\\*").value() == "abc{00123\\*"));
+    REQUIRE((regex_to_wildcard("abc{3,4{{{{3}").value() == "abc{3,4{{{{{"));
+    REQUIRE((regex_to_wildcard("abc{3,4{3,4{3,{3}").value() == "abc{3,4{3,4{3,,,"));
+    REQUIRE((regex_to_wildcard("abc{3,4{3,4{3,4{3}").value() == "abc{3,4{3,4{3,444"));
+    REQUIRE((regex_to_wildcard("abc{3,4{3,4{3,4.*").value() == "abc{3,4{3,4{3,4*"));
+    REQUIRE((regex_to_wildcard("abc{3,4{3,4{3,4\\[a-z]").value() == "abc{3,4{3,4{3,4[a-z]"));
+    REQUIRE((regex_to_wildcard("abc{3,4{3,4{3,4\\*{4}").value() == "abc{3,4{3,4{3,4\\*\\*\\*\\*"));
+
+    REQUIRE(
+            (regex_to_wildcard("abc{-3}").error()
+             == clp::regex_utils::ErrorCode::UnsupportedQuantifier)
+    );
+    REQUIRE(
+            (regex_to_wildcard("abc{3,4}").error()
+             == clp::regex_utils::ErrorCode::UnsupportedQuantifier)
+    );
+
+    REQUIRE((
+            regex_to_wildcard("{3}abc").error() == clp::regex_utils::ErrorCode::TokenUnquantifiable
+    ));
+    REQUIRE(
+            (regex_to_wildcard("abc{3}{3}").error()
+             == clp::regex_utils::ErrorCode::TokenUnquantifiable)
+    );
+    REQUIRE(
+            (regex_to_wildcard("abc.*{3}").error()
+             == clp::regex_utils::ErrorCode::TokenUnquantifiable)
+    );
+    REQUIRE(
+            (regex_to_wildcard("abc.+{3}").error()
+             == clp::regex_utils::ErrorCode::TokenUnquantifiable)
+    );
+
+    // Test grouping and quantifiers
+    REQUIRE((regex_to_wildcard("(xyz)").value() == "xyz"));
+    REQUIRE((regex_to_wildcard("abc (xyz) def").value() == "abc xyz def"));
+    REQUIRE((regex_to_wildcard("abc () def").value() == "abc  def"));
+    REQUIRE(
+            (regex_to_wildcard("abc (. xyz .+ zyx .*){2} def").value()
+             == "abc ? xyz ?* zyx *? xyz ?* zyx * def")
+    );
+    REQUIRE(
+            (regex_to_wildcard("abc (.{3} xyz .+ zyx .*){2} def").value()
+             == "abc ??? xyz ?* zyx *??? xyz ?* zyx * def")
+    );
+    REQUIRE((regex_to_wildcard("abc (\\)){2} def").value() == "abc )) def"));
+    REQUIRE((regex_to_wildcard("abc (\\)\\*){2} def").value() == "abc )\\*)\\* def"));
+    REQUIRE((
+            regex_to_wildcard("abc (x(\\*){3}z){2} def").value() == "abc x\\*\\*\\*zx\\*\\*\\*z def"
+    ));
+
+    REQUIRE(
+            (regex_to_wildcard("abc (. xyz .+ zyx .*{2} def").error()
+             == clp::regex_utils::ErrorCode::UnmatchedParenthesis)
+    );
+    REQUIRE(
+            (regex_to_wildcard("abc (x(\\*{3}z){2} def").error()
+             == clp::regex_utils::ErrorCode::UnmatchedParenthesis)
+    );
+    REQUIRE(
+            (regex_to_wildcard("abc (x(\\*){3}z{2} def").error()
+             == clp::regex_utils::ErrorCode::UnmatchedParenthesis)
+    );
+    REQUIRE(
+            (regex_to_wildcard("abc x(\\*){3}z){2} def").error()
+             == clp::regex_utils::ErrorCode::UnmatchedParenthesis)
+    );
+    REQUIRE(
+            (regex_to_wildcard("abc (x\\*){3}z){2} def").error()
+             == clp::regex_utils::ErrorCode::UnmatchedParenthesis)
+    );
+    REQUIRE((
+            regex_to_wildcard("abc (abc | def){2} def").error() == clp::regex_utils::ErrorCode::Pipe
+    ));
+    REQUIRE(
+            (regex_to_wildcard("abc (* xyz .+ zyx .*){2} def").error()
+             == clp::regex_utils::ErrorCode::Star)
+    );
+    REQUIRE(
+            (regex_to_wildcard("abc (+ xyz .+ zyx .*){2} def").error()
+             == clp::regex_utils::ErrorCode::Plus)
+    );
+    REQUIRE(
+            (regex_to_wildcard("abc (.{3}{3} xyz .+ zyx .*){2} def").error()
+             == clp::regex_utils::ErrorCode::TokenUnquantifiable)
+    );
+    REQUIRE(
+            (regex_to_wildcard("abc (. xyz .+{3} zyx .*){2} def").error()
+             == clp::regex_utils::ErrorCode::TokenUnquantifiable)
+    );
+
+    // Test charset and quantifiers
+    REQUIRE((regex_to_wildcard("x[y]z").value() == "xyz"));
+    REQUIRE((regex_to_wildcard("x[y]{2}z").value() == "xyyz"));
+    REQUIRE((regex_to_wildcard("x[+]{2}z").value() == "x++z"));
+    REQUIRE((regex_to_wildcard("x[-]{2}z").value() == "x--z"));
+    REQUIRE((regex_to_wildcard("x[|]{2}z").value() == "x||z"));
+    REQUIRE((regex_to_wildcard("x[\\-]{2}z").value() == "x--z"));
+    REQUIRE((regex_to_wildcard("x[\\^]{2}z").value() == "x^^z"));
+    REQUIRE((regex_to_wildcard("x[\\]]{2}z").value() == "x]]z"));
+    REQUIRE((regex_to_wildcard("x[*]{2}z").value() == "x\\*\\*z"));
+    REQUIRE((regex_to_wildcard("x[?]{2}z").value() == "x\\?\\?z"));
+    REQUIRE((regex_to_wildcard("x[\\\\]{2}z").value() == "x\\\\\\\\z"));
+
+    REQUIRE((regex_to_wildcard("abc (x[*]{2}z){2} def").value() == "abc x\\*\\*zx\\*\\*z def"));
+    REQUIRE((regex_to_wildcard("abc (x[\\]]{2}z){2} def").value() == "abc x]]zx]]z def"));
+
+    REQUIRE(
+            (regex_to_wildcard("x[aA").error()
+             == clp::regex_utils::ErrorCode::IncompleteCharsetStructure)
+    );
+    REQUIRE((
+            regex_to_wildcard("x[]{2}z").error() == clp::regex_utils::ErrorCode::UnsupportedCharsets
+    ));
+    REQUIRE(
+            (regex_to_wildcard("x[^]{2}z").error()
+             == clp::regex_utils::ErrorCode::UnsupportedCharsets)
+    );
+    REQUIRE(
+            (regex_to_wildcard("x[\\]{2}z").error()
+             == clp::regex_utils::ErrorCode::UnsupportedCharsets)
+    );
+
+    // Need to set case-insensitive wildcard config for the following to work
+    REQUIRE((regex_to_wildcard("[aA]").error() == clp::regex_utils::ErrorCode::UnsupportedCharsets)
+    );
+    REQUIRE((regex_to_wildcard("[Aa]").error() == clp::regex_utils::ErrorCode::UnsupportedCharsets)
+    );
+    REQUIRE(
+            (regex_to_wildcard("[Ee][Xx][Cc][Ee][Pp][Tt][Ii][Oo][Nn]").error()
+             == clp::regex_utils::ErrorCode::UnsupportedCharsets)
+    );
+    REQUIRE(
+            (regex_to_wildcard("[eE][Xx][cC][eE][pP][Tt][iI][Oo]{2}[Nn]").error()
+             == clp::regex_utils::ErrorCode::UnsupportedCharsets)
+    );
+}
+
+TEST_CASE(
+        "regex_to_wildcard_case_insensitive_wildcard",
+        "[regex_utils][regex_to_wildcard][case_insensitive_wildcard]"
+) {
+    clp::regex_utils::RegexToWildcardTranslatorConfig config;
+    config.set_case_insensitive_wildcard(true);
+
+    REQUIRE((regex_to_wildcard("[aA]", config).value() == "a"));
+    REQUIRE((regex_to_wildcard("[Aa]", config).value() == "a"));
+    REQUIRE((regex_to_wildcard("[Aa][pP]{2}[Ll][eE]", config).value() == "apple"));
+    REQUIRE((
+            regex_to_wildcard("[Ee][Xx][Cc][Ee][Pp][Tt][Ii][Oo][Nn]", config).value() == "exception"
+    ));
+    REQUIRE(
+            (regex_to_wildcard("[eE][Xx][cC][eE][pP][Tt][iI][Oo]{2}[Nn]", config).value()
+             == "exceptioon")
+    );
+
+    REQUIRE(
+            (regex_to_wildcard("[eE][Xx][cC][eE][pP][Tk][iI][Oo][Nn]", config).error()
+             == clp::regex_utils::ErrorCode::UnsupportedCharsets)
+    );
+
+    // The other test cases should not be affected
+    REQUIRE((regex_to_wildcard("x[y]z", config).value() == "xyz"));
+    REQUIRE((regex_to_wildcard("x[y]{2}z", config).value() == "xyyz"));
+    REQUIRE((regex_to_wildcard("x[+]{2}z", config).value() == "x++z"));
+    REQUIRE((regex_to_wildcard("x[-]{2}z", config).value() == "x--z"));
+    REQUIRE((regex_to_wildcard("x[|]{2}z", config).value() == "x||z"));
+    REQUIRE((regex_to_wildcard("x[\\-]{2}z", config).value() == "x--z"));
+    REQUIRE((regex_to_wildcard("x[\\^]{2}z", config).value() == "x^^z"));
+    REQUIRE((regex_to_wildcard("x[\\]]{2}z", config).value() == "x]]z"));
+    REQUIRE((regex_to_wildcard("x[*]{2}z", config).value() == "x\\*\\*z"));
+    REQUIRE((regex_to_wildcard("x[?]{2}z", config).value() == "x\\?\\?z"));
+    REQUIRE((regex_to_wildcard("x[\\\\]{2}z", config).value() == "x\\\\\\\\z"));
+
+    REQUIRE((
+            regex_to_wildcard("abc (x[*]{2}z){2} def", config).value() == "abc x\\*\\*zx\\*\\*z def"
+    ));
+    REQUIRE((regex_to_wildcard("abc (x[\\]]{2}z){2} def", config).value() == "abc x]]zx]]z def"));
+
+    REQUIRE(
+            (regex_to_wildcard("x[]{2}z", config).error()
+             == clp::regex_utils::ErrorCode::UnsupportedCharsets)
+    );
+    REQUIRE(
+            (regex_to_wildcard("x[^]{2}z", config).error()
+             == clp::regex_utils::ErrorCode::UnsupportedCharsets)
+    );
+    REQUIRE(
+            (regex_to_wildcard("x[\\]{2}z", config).error()
+             == clp::regex_utils::ErrorCode::UnsupportedCharsets)
+    );
+}
+
+TEST_CASE("regex_to_wildcard_anchor_config", "[regex_utils][regex_to_wildcard][anchor_config]") {
+    // Test anchors and prefix/suffix wildcards
+    clp::regex_utils::RegexToWildcardTranslatorConfig config;
+    config.set_add_prefix_suffix_wildcards(true);
+    REQUIRE(((regex_to_wildcard("^", config).value() == "*")));
+    REQUIRE((regex_to_wildcard("$", config).value() == "*"));
+    REQUIRE((regex_to_wildcard("^xyz$", config).value() == "xyz"));
+    REQUIRE((regex_to_wildcard("xyz", config).value() == "*xyz*"));
+
+    // Test in groups
+    REQUIRE((regex_to_wildcard("xyz(. xyz .* zyx .)zyx", config).value() == "*xyz? xyz * zyx ?zyx*")
+    );
+    REQUIRE(
+            (regex_to_wildcard("xyz(^. xyz .* zyx .)zyx", config).error()
+             == clp::regex_utils::ErrorCode::Caret)
+    );
+    REQUIRE(
+            (regex_to_wildcard("xyz(. xyz .* zyx .$)zyx", config).error()
+             == clp::regex_utils::ErrorCode::Dollar)
+    );
+}
+
+TEST_CASE("regex_trim_line_anchors", "[regex_utils][regex_trim_line_anchors]") {
+    REQUIRE(regex_trim_line_anchors("").empty());
+    REQUIRE((regex_trim_line_anchors("^^^hello$$$") == "^hello$"));
+    REQUIRE((regex_trim_line_anchors("^^\\^hello$$$") == "^\\^hello$"));
+    REQUIRE((regex_trim_line_anchors("^^^hello\\$$$") == "^hello\\$$"));
+    REQUIRE((regex_trim_line_anchors("^^\\^hello\\$$$") == "^\\^hello\\$$"));
+    REQUIRE((regex_trim_line_anchors("^^^hello\\\\\\\\\\\\\\$$$") == "^hello\\\\\\\\\\\\\\$$"));
+    REQUIRE((regex_trim_line_anchors("^^^\\\\goodbye\\\\\\\\$$$") == "^\\\\goodbye\\\\\\\\$"));
+}
+
+TEST_CASE("regex_has_start_anchor", "[regex_utils][regex_has_start_anchor]") {
+    REQUIRE_FALSE(regex_has_start_anchor(""));
+    REQUIRE(regex_has_start_anchor("^hello$"));
+    REQUIRE_FALSE(regex_has_start_anchor("\\^hello$"));
+    REQUIRE(regex_has_start_anchor("^hello\\$"));
+    REQUIRE_FALSE(regex_has_start_anchor("\\^hello\\$"));
+    REQUIRE(regex_has_start_anchor("^hello\\\\\\\\\\\\\\$"));
+    REQUIRE(regex_has_start_anchor("^\\\\goodbye\\\\\\\\\\\\$"));
+}
+
+TEST_CASE("regex_has_end_anchor", "[regex_utils][regex_has_end_anchor]") {
+    REQUIRE_FALSE(regex_has_end_anchor(""));
+    REQUIRE(regex_has_end_anchor("^hello$"));
+    REQUIRE(regex_has_end_anchor("\\^hello$"));
+    REQUIRE_FALSE(regex_has_end_anchor("^hello\\$"));
+    REQUIRE_FALSE(regex_has_end_anchor("\\^hello\\$"));
+    REQUIRE_FALSE(regex_has_end_anchor("^hello\\\\\\\\\\\\\\$"));
+    REQUIRE(regex_has_end_anchor("^\\\\goodbye\\\\\\\\\\\\$"));
+    REQUIRE(regex_has_end_anchor("\\\\\\\\\\\\$"));
+    REQUIRE_FALSE(regex_has_end_anchor("\\\\\\\\\\\\\\$"));
+}