From f4f73356ff49bfd871e0b586861a3f3d9c185035 Mon Sep 17 00:00:00 2001
From: Bingran Hu <bingran.hu@yscope.com>
Date: Mon, 9 Dec 2024 18:30:14 +0000
Subject: [PATCH] Remove duplicate string utils functions in clp-s and use
 clp::string_utils

---
 .../core/src/clp_s/DictionaryReader.hpp       |   6 +-
 components/core/src/clp_s/JsonParser.cpp      |   3 +-
 .../src/clp_s/TimestampDictionaryReader.cpp   |   5 +-
 components/core/src/clp_s/Utils.cpp           | 283 +-----------------
 components/core/src/clp_s/Utils.hpp           | 137 ---------
 components/core/src/clp_s/clp-s.cpp           |   6 +-
 components/core/src/clp_s/search/Output.cpp   |   8 +-
 .../core/src/clp_s/search/clp_search/Grep.cpp |  35 ++-
 .../core/src/clp_s/search/kql/CMakeLists.txt  |   8 +-
 components/core/src/clp_s/search/kql/kql.cpp  |   9 +-
 components/core/submodules/clp-cpp            |   2 +-
 11 files changed, 56 insertions(+), 446 deletions(-)
diff --git a/components/core/src/clp_s/DictionaryReader.hpp b/components/core/src/clp_s/DictionaryReader.hpp
index 175214d88..52de582ac 100644
--- a/components/core/src/clp_s/DictionaryReader.hpp
+++ b/components/core/src/clp_s/DictionaryReader.hpp
@@ -6,11 +6,13 @@
 #include <unordered_set>
 
 #include <boost/algorithm/string/case_conv.hpp>
+#include <clp/string_utils/string_utils.hpp>
 
 #include "DictionaryEntry.hpp"
-#include "Utils.hpp"
 
 namespace clp_s {
+using clp::string_utils::wildcard_match_unsafe;
+
 template <typename DictionaryIdType, typename EntryType>
 class DictionaryReader {
 public:
@@ -200,7 +202,7 @@ void DictionaryReader<DictionaryIdType, EntryType>::get_entries_matching_wildcar
         std::unordered_set<EntryType const*>& entries
 ) const {
     for (auto const& entry : m_entries) {
-        if (StringUtils::wildcard_match_unsafe(entry.get_value(), wildcard_string, !ignore_case)) {
+        if (wildcard_match_unsafe(entry.get_value(), wildcard_string, !ignore_case)) {
             entries.insert(&entry);
         }
     }
diff --git a/components/core/src/clp_s/JsonParser.cpp b/components/core/src/clp_s/JsonParser.cpp
index 9e8293510..aee9e819b 100644
--- a/components/core/src/clp_s/JsonParser.cpp
+++ b/components/core/src/clp_s/JsonParser.cpp
@@ -3,6 +3,7 @@
 #include <iostream>
 #include <stack>
 
+#include <clp/string_utils/string_utils.hpp>
 #include <simdjson.h>
 #include <spdlog/spdlog.h>
 
@@ -23,7 +24,7 @@ JsonParser::JsonParser(JsonParserOption const& option)
 
     if (false == m_timestamp_key.empty()) {
         if (false
-            == clp_s::StringUtils::tokenize_column_descriptor(m_timestamp_key, m_timestamp_column))
+            == clp::string_utils::tokenize_column_descriptor(m_timestamp_key, m_timestamp_column))
         {
             SPDLOG_ERROR("Can not parse invalid timestamp key: \"{}\"", m_timestamp_key);
             throw OperationFailed(ErrorCodeBadParam, __FILENAME__, __LINE__);
diff --git a/components/core/src/clp_s/TimestampDictionaryReader.cpp b/components/core/src/clp_s/TimestampDictionaryReader.cpp
index 15685a97e..fbcfacb87 100644
--- a/components/core/src/clp_s/TimestampDictionaryReader.cpp
+++ b/components/core/src/clp_s/TimestampDictionaryReader.cpp
@@ -1,9 +1,8 @@
 #include "TimestampDictionaryReader.hpp"
 
+#include <clp/string_utils/string_utils.hpp>
 #include <unordered_set>
 
-#include "Utils.hpp"
-
 namespace clp_s {
 void TimestampDictionaryReader::open(std::string const& dictionary_path) {
     if (m_is_open) {
@@ -44,7 +43,7 @@ void TimestampDictionaryReader::read_new_entries() {
         TimestampEntry entry;
         std::vector<std::string> tokens;
         entry.try_read_from_file(m_dictionary_decompressor);
-        if (false == StringUtils::tokenize_column_descriptor(entry.get_key_name(), tokens)) {
+        if (false == clp::string_utils::tokenize_column_descriptor(entry.get_key_name(), tokens)) {
             throw OperationFailed(ErrorCodeCorrupt, __FILENAME__, __LINE__);
         }
         m_entries.emplace_back(std::move(entry));
diff --git a/components/core/src/clp_s/Utils.cpp b/components/core/src/clp_s/Utils.cpp
index acee48851..fa1f7e603 100644
--- a/components/core/src/clp_s/Utils.cpp
+++ b/components/core/src/clp_s/Utils.cpp
@@ -1,12 +1,16 @@
 #include "Utils.hpp"
 
 #include <boost/filesystem.hpp>
+#include <clp/string_utils/string_utils.hpp>
 #include <spdlog/spdlog.h>
 
+namespace clp_s {
 using std::string;
 using std::string_view;
+using clp::string_utils::is_alphabet;
+using clp::string_utils::is_decimal_digit;
+using clp::string_utils::is_delim;
 
-namespace clp_s {
 bool FileUtils::find_all_files(std::string const& path, std::vector<std::string>& file_paths) {
     try {
         if (false == boost::filesystem::is_directory(path)) {
@@ -110,68 +114,6 @@ bool StringUtils::get_bounds_of_next_var(string const& msg, size_t& begin_pos, s
     return (msg_length != begin_pos);
 }
 
-size_t StringUtils::find_first_of(
-        string const& haystack,
-        char const* needles,
-        size_t search_start_pos,
-        size_t& needle_ix
-) {
-    size_t haystack_length = haystack.length();
-    size_t needles_length = strlen(needles);
-    for (size_t i = search_start_pos; i < haystack_length; ++i) {
-        for (needle_ix = 0; needle_ix < needles_length; ++needle_ix) {
-            if (haystack[i] == needles[needle_ix]) {
-                return i;
-            }
-        }
-    }
-
-    return string::npos;
-}
-
-string StringUtils::replace_characters(
-        char const* characters_to_escape,
-        char const* replacement_characters,
-        string const& value,
-        bool escape
-) {
-    string new_value;
-    size_t search_start_pos = 0;
-    while (true) {
-        size_t replace_char_ix;
-        size_t char_to_replace_pos
-                = find_first_of(value, characters_to_escape, search_start_pos, replace_char_ix);
-        if (string::npos == char_to_replace_pos) {
-            new_value.append(value, search_start_pos, string::npos);
-            break;
-        } else {
-            new_value.append(value, search_start_pos, char_to_replace_pos - search_start_pos);
-            if (escape) {
-                new_value += "\\";
-            }
-            new_value += replacement_characters[replace_char_ix];
-            search_start_pos = char_to_replace_pos + 1;
-        }
-    }
-    return new_value;
-}
-
-void StringUtils::to_lower(string& str) {
-    std::transform(str.cbegin(), str.cend(), str.begin(), [](unsigned char c) {
-        return std::tolower(c);
-    });
-}
-
-bool StringUtils::is_wildcard(char c) {
-    static constexpr char cWildcards[] = "?*";
-    for (size_t i = 0; i < strlen(cWildcards); ++i) {
-        if (cWildcards[i] == c) {
-            return true;
-        }
-    }
-    return false;
-}
-
 bool StringUtils::has_unescaped_wildcards(std::string const& str) {
     for (size_t i = 0; i < str.size(); ++i) {
         if ('*' == str[i] || '?' == str[i]) {
@@ -184,42 +126,6 @@ bool StringUtils::has_unescaped_wildcards(std::string const& str) {
     return false;
 }
 
-string StringUtils::clean_up_wildcard_search_string(string_view str) {
-    string cleaned_str;
-
-    bool is_escaped = false;
-    auto str_end = str.cend();
-    for (auto current = str.cbegin(); current != str_end;) {
-        auto c = *current;
-        if (is_escaped) {
-            is_escaped = false;
-
-            if (is_wildcard(c) || '\\' == c) {
-                // Keep escaping if c is a wildcard character or an escape character
-                cleaned_str += '\\';
-            }
-            cleaned_str += c;
-            ++current;
-        } else if ('*' == c) {
-            cleaned_str += c;
-
-            // Skip over all '*' to find the next non-'*'
-            do {
-                ++current;
-            } while (current != str_end && '*' == *current);
-        } else {
-            if ('\\' == c) {
-                is_escaped = true;
-            } else {
-                cleaned_str += c;
-            }
-            ++current;
-        }
-    }
-
-    return cleaned_str;
-}
-
 bool StringUtils::advance_tame_to_next_match(
         char const*& tame_current,
         char const*& tame_bookmark,
@@ -261,154 +167,6 @@ bool StringUtils::advance_tame_to_next_match(
     return true;
 }
 
-bool StringUtils::wildcard_match_unsafe(
-        string_view tame,
-        string_view wild,
-        bool case_sensitive_match
-) {
-    if (case_sensitive_match) {
-        return wildcard_match_unsafe_case_sensitive(tame, wild);
-    } else {
-        // We convert to lowercase (rather than uppercase) anticipating that
-        // callers use lowercase more frequently, so little will need to change.
-        string lowercase_tame(tame);
-        to_lower(lowercase_tame);
-        string lowercase_wild(wild);
-        to_lower(lowercase_wild);
-        return wildcard_match_unsafe_case_sensitive(lowercase_tame, lowercase_wild);
-    }
-}
-
-/**
- * The algorithm basically works as follows:
- * Given a wild string "*abc*def*ghi*", it can be broken into groups of
- * characters delimited by one or more '*' characters. The goal of the
- * algorithm is then to determine whether the tame string contains each of
- * those groups in the same order.
- *
- * Thus, the algorithm:
- * 1. searches for the start of one of these groups in wild,
- * 2. searches for a group in tame starting with the same character, and then
- * 3. checks if the two match. If not, the search repeats with the next group in
- *    tame.
- */
-bool StringUtils::wildcard_match_unsafe_case_sensitive(string_view tame, string_view wild) {
-    auto const tame_length = tame.length();
-    auto const wild_length = wild.length();
-    char const* tame_current = tame.data();
-    char const* wild_current = wild.data();
-    char const* tame_bookmark = nullptr;
-    char const* wild_bookmark = nullptr;
-    char const* tame_end = tame_current + tame_length;
-    char const* wild_end = wild_current + wild_length;
-
-    // Handle wild or tame being empty
-    if (0 == wild_length) {
-        return 0 == tame_length;
-    } else {
-        if (0 == tame_length) {
-            return "*" == wild;
-        }
-    }
-
-    char w;
-    char t;
-    bool is_escaped = false;
-    while (true) {
-        w = *wild_current;
-        if ('*' == w) {
-            ++wild_current;
-            if (wild_end == wild_current) {
-                // Trailing '*' means everything remaining in tame will match
-                return true;
-            }
-
-            // Set wild and tame bookmarks
-            wild_bookmark = wild_current;
-            if (!advance_tame_to_next_match(
-                        tame_current,
-                        tame_bookmark,
-                        tame_end,
-                        wild_current,
-                        wild_bookmark
-                ))
-            {
-                return false;
-            }
-        } else {
-            // Handle escaped characters
-            if ('\\' == w) {
-                is_escaped = true;
-                ++wild_current;
-                // This is safe without a bounds check since this the caller
-                // ensures there are no dangling escape characters
-                w = *wild_current;
-            }
-
-            // Handle a mismatch
-            t = *tame_current;
-            if (false == ((false == is_escaped && '?' == w) || t == w)) {
-                if (nullptr == wild_bookmark) {
-                    // No bookmark to return to
-                    return false;
-                }
-
-                wild_current = wild_bookmark;
-                tame_current = tame_bookmark + 1;
-                if (!advance_tame_to_next_match(
-                            tame_current,
-                            tame_bookmark,
-                            tame_end,
-                            wild_current,
-                            wild_bookmark
-                    ))
-                {
-                    return false;
-                }
-            }
-        }
-
-        ++tame_current;
-        ++wild_current;
-
-        // Handle reaching the end of tame or wild
-        if (tame_end == tame_current) {
-            return (wild_end == wild_current
-                    || ('*' == *wild_current && (wild_current + 1) == wild_end));
-        } else {
-            if (wild_end == wild_current) {
-                if (nullptr == wild_bookmark) {
-                    // No bookmark to return to
-                    return false;
-                } else {
-                    wild_current = wild_bookmark;
-                    tame_current = tame_bookmark + 1;
-                    if (!advance_tame_to_next_match(
-                                tame_current,
-                                tame_bookmark,
-                                tame_end,
-                                wild_current,
-                                wild_bookmark
-                        ))
-                    {
-                        return false;
-                    }
-                }
-            }
-        }
-    }
-}
-
-bool StringUtils::convert_string_to_int64(std::string_view raw, int64_t& converted) {
-    auto raw_end = raw.cend();
-    auto result = std::from_chars(raw.cbegin(), raw_end, converted);
-    if (raw_end != result.ptr) {
-        return false;
-    } else {
-        return result.ec == std::errc();
-    }
-}
-
 bool StringUtils::convert_string_to_double(std::string const& raw, double& converted) {
     if (raw.empty()) {
         // Can't convert an empty string
@@ -426,35 +184,4 @@ bool StringUtils::convert_string_to_double(std::string const& raw, double& conve
     converted = raw_as_double;
     return true;
 }
-
-bool StringUtils::tokenize_column_descriptor(
-        std::string const& descriptor,
-        std::vector<std::string>& tokens
-) {
-    // TODO: add support for unicode sequences e.g. \u263A
-    std::string cur_tok;
-    for (size_t cur = 0; cur < descriptor.size(); ++cur) {
-        if ('\\' == descriptor[cur]) {
-            ++cur;
-            if (cur >= descriptor.size()) {
-                return false;
-            }
-        } else if ('.' == descriptor[cur]) {
-            if (cur_tok.empty()) {
-                return false;
-            }
-            tokens.push_back(cur_tok);
-            cur_tok.clear();
-            continue;
-        }
-        cur_tok.push_back(descriptor[cur]);
-    }
-
-    if (cur_tok.empty()) {
-        return false;
-    }
-
-    tokens.push_back(cur_tok);
-    return true;
-}
 }  // namespace clp_s
diff --git a/components/core/src/clp_s/Utils.hpp b/components/core/src/clp_s/Utils.hpp
index d6deb3280..f7800fdc0 100644
--- a/components/core/src/clp_s/Utils.hpp
+++ b/components/core/src/clp_s/Utils.hpp
@@ -28,34 +28,6 @@ class FileUtils {
 
 class StringUtils {
 public:
-    /**
-     * Checks if the given character is an alphabet
-     * @param c
-     * @return true if c is an alphabet, false otherwise
-     */
-    static inline bool is_alphabet(char c) {
-        return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z');
-    }
-
-    /**
-     * Checks if character is a decimal (base-10) digit
-     * @param c
-     * @return true if c is a decimal digit, false otherwise
-     */
-    static inline bool is_decimal_digit(char c) { return '0' <= c && c <= '9'; }
-
-    /**
-     * Checks if character is a hexadecimal (base-16) digit
-     * @param c
-     * @return true if c is a hexadecimal digit, false otherwise
-     */
-    static inline bool is_delim(char c) {
-        return !(
-                '+' == c || ('-' <= c && c <= '9') || ('A' <= c && c <= 'Z') || '\\' == c
-                || '_' == c || ('a' <= c && c <= 'z')
-        );
-    }
-
     /**
      * Checks if the string could be a hexadecimal value
      * @param str
@@ -92,62 +64,6 @@ class StringUtils {
      */
     static bool get_bounds_of_next_var(std::string const& msg, size_t& begin_pos, size_t& end_pos);
 
-    /**
-     * Searches haystack starting at the given position for one of the given needles
-     * @param haystack
-     * @param needles
-     * @param search_start_pos
-     * @param needle_ix The index of the needle found
-     * @return The position of the match or string::npos if none
-     */
-    static size_t find_first_of(
-            std::string const& haystack,
-            char const* needles,
-            size_t search_start_pos,
-            size_t& needle_ix
-    );
-
-    /**
-     * Replaces the given characters in the given value with the given replacements
-     * @param characters_to_escape
-     * @param replacement_characters
-     * @param value
-     * @param escape Whether to precede the replacement with a '\' (e.g., so that a
-     * line-feed character is output as "\n")
-     * @return The string with replacements
-     */
-    static std::string replace_characters(
-            char const* characters_to_escape,
-            char const* replacement_characters,
-            std::string const& value,
-            bool escape
-    );
-
-    /**
-     * Converts a string to lowercase
-     * @param str
-     */
-    static void to_lower(std::string& str);
-
-    /**
-     * Cleans wildcard search string
-     * <ul>
-     *   <li>Removes consecutive '*'</li>
-     *   <li>Removes escaping from non-wildcard characters</li>
-     *   <li>Removes dangling escape character from the end of the string</li>
-     * </ul>
-     * @param str Wildcard search string to clean
-     * @return Cleaned wildcard search string
-     */
-    static std::string clean_up_wildcard_search_string(std::string_view str);
-
-    /**
-     * Checks if character is a wildcard
-     * @param c
-     * @return true if c is a wildcard, false otherwise
-     */
-    static bool is_wildcard(char c);
-
     /**
      * Checks if the given string has unescaped wildcards
      * @param str
@@ -155,50 +71,6 @@ class StringUtils {
      */
     static bool has_unescaped_wildcards(std::string const& str);
 
-    /**
-     * Same as ``wildcard_match_unsafe_case_sensitive`` except this method
-     * allows the caller to specify whether the match should be case sensitive.
-     *
-     * @param tame The literal string
-     * @param wild The wildcard string
-     * @param case_sensitive_match Whether to consider case when matching
-     * @return Whether the two strings match
-     */
-    static bool wildcard_match_unsafe(
-            std::string_view tame,
-            std::string_view wild,
-            bool case_sensitive_match = true
-    );
-
-    /**
-     * Checks if a string matches a wildcard string. Two wildcards are currently
-     * supported: '*' to match 0 or more characters, and '?' to match any single
-     * character. Each can be escaped using a preceding '\'. Other characters which
-     * are escaped are treated as normal characters.
-     * <br/>
-     * This method is optimized for performance by omitting some checks on the
-     * wildcard string that are unnecessary if the caller cleans up the wildcard
-     * string as follows:
-     * <ul>
-     *   <li>The wildcard string should not contain consecutive '*'.</li>
-     *   <li>The wildcard string should not contain an escape character without a
-     *   character following it.</li>
-     * </ul>
-     *
-     * @param tame The literal string
-     * @param wild The wildcard string
-     * @return Whether the two strings match
-     */
-    static bool wildcard_match_unsafe_case_sensitive(std::string_view tame, std::string_view wild);
-
-    /**
-     * Converts the given string to a 64-bit integer if possible
-     * @param raw
-     * @param converted
-     * @return true if the conversion was successful, false otherwise
-     */
-    static bool convert_string_to_int64(std::string_view raw, int64_t& converted);
-
     /**
      * Converts the given string to a double if possible
      * @param raw
@@ -207,15 +79,6 @@ class StringUtils {
      */
     static bool convert_string_to_double(std::string const& raw, double& converted);
 
-    /**
-     * Converts a string column descriptor delimited by '.' into a list of tokens
-     * @param descriptor
-     * @param tokens
-     * @return true if the descriptor was tokenized successfully, false otherwise
-     */
-    [[nodiscard]] static bool
-    tokenize_column_descriptor(std::string const& descriptor, std::vector<std::string>& tokens);
-
 private:
     /**
      * Helper for ``wildcard_match_unsafe_case_sensitive`` to advance the
diff --git a/components/core/src/clp_s/clp-s.cpp b/components/core/src/clp_s/clp-s.cpp
index a74693e33..377e0fc80 100644
--- a/components/core/src/clp_s/clp-s.cpp
+++ b/components/core/src/clp_s/clp-s.cpp
@@ -6,6 +6,7 @@
 #include <string>
 #include <utility>
 
+#include <clp/string_utils/string_utils.hpp>
 #include <json/single_include/nlohmann/json.hpp>
 #include <mongocxx/instance.hpp>
 #include <spdlog/sinks/stdout_sinks.h>
@@ -33,14 +34,13 @@
 #include "search/SchemaMatch.hpp"
 #include "TimestampPattern.hpp"
 #include "TraceableException.hpp"
-#include "Utils.hpp"
 
 using namespace clp_s::search;
+using clp::string_utils::tokenize_column_descriptor;
 using clp_s::cArchiveFormatDevelopmentVersionFlag;
 using clp_s::cEpochTimeMax;
 using clp_s::cEpochTimeMin;
 using clp_s::CommandLineArguments;
-using clp_s::StringUtils;
 
 namespace {
 /**
@@ -192,7 +192,7 @@ bool search_archive(
     try {
         for (auto const& column : command_line_arguments.get_projection_columns()) {
             std::vector<std::string> descriptor_tokens;
-            if (false == StringUtils::tokenize_column_descriptor(column, descriptor_tokens)) {
+            if (false == tokenize_column_descriptor(column, descriptor_tokens)) {
                 SPDLOG_ERROR("Can not tokenize invalid column: \"{}\"", column);
                 return false;
             }
diff --git a/components/core/src/clp_s/search/Output.cpp b/components/core/src/clp_s/search/Output.cpp
index c9954779b..ef2b6eae8 100644
--- a/components/core/src/clp_s/search/Output.cpp
+++ b/components/core/src/clp_s/search/Output.cpp
@@ -3,6 +3,8 @@
 #include <memory>
 #include <vector>
 
+#include <clp/string_utils/string_utils.hpp>
+
 #include "../../clp/type_utils.hpp"
 #include "../Utils.hpp"
 #include "AndExpr.hpp"
@@ -17,6 +19,8 @@
 #define eval(op, a, b) (((op) == FilterOperation::EQ) ? ((a) == (b)) : ((a) != (b)))
 
 namespace clp_s::search {
+using clp::string_utils::wildcard_match_unsafe;
+
 bool Output::filter() {
     auto top_level_expr = m_expr;
 
@@ -484,7 +488,7 @@ bool Output::evaluate_clp_string_filter(
             for (auto const& subquery : q->get_sub_queries()) {
                 if (subquery.matches_logtype(id) && subquery.matches_vars(vars)) {
                     if (subquery.wildcard_match_required()) {
-                        matched = StringUtils::wildcard_match_unsafe(
+                        matched = wildcard_match_unsafe(
                                 std::get<std::string>(reader->extract_value(m_cur_message)),
                                 q->get_search_string(),
                                 !q->get_ignore_case()
@@ -496,7 +500,7 @@ bool Output::evaluate_clp_string_filter(
                 }
             }
         } else {
-            matched = StringUtils::wildcard_match_unsafe(
+            matched = wildcard_match_unsafe(
                     std::get<std::string>(reader->extract_value(m_cur_message)),
                     q->get_search_string(),
                     !q->get_ignore_case()
diff --git a/components/core/src/clp_s/search/clp_search/Grep.cpp b/components/core/src/clp_s/search/clp_search/Grep.cpp
index 40ffb456f..f2b39c300 100644
--- a/components/core/src/clp_s/search/clp_search/Grep.cpp
+++ b/components/core/src/clp_s/search/clp_search/Grep.cpp
@@ -6,14 +6,21 @@
 #include <string>
 #include <utility>
 
-#include "../../Utils.hpp"
+#include <clp/string_utils/string_utils.hpp>
+
 #include "../../VariableEncoder.hpp"
 #include "EncodedVariableInterpreter.hpp"
 
+namespace clp_s::search::clp_search {
+using clp::string_utils::clean_up_wildcard_search_string;
+using clp::string_utils::is_alphabet;
+using clp::string_utils::is_decimal_digit;
+using clp::string_utils::is_delim;
+using clp::string_utils::is_wildcard;
+using clp::string_utils::wildcard_match_unsafe;
 using std::string;
 using std::vector;
 
-namespace clp_s::search::clp_search {
 // Local types
 enum class SubQueryMatchabilityResult {
     MayMatch,  // The subquery might match a message
@@ -422,13 +429,13 @@ std::optional<Query> Grep::process_raw_query(
     }
 
     // Clean-up search string
-    processed_search_string = StringUtils::clean_up_wildcard_search_string(processed_search_string);
+    processed_search_string = clean_up_wildcard_search_string(processed_search_string);
 
     // Replace non-greedy wildcards with greedy wildcards since we currently have no support for
     // searching compressed files with non-greedy wildcards
     std::replace(processed_search_string.begin(), processed_search_string.end(), '?', '*');
     // Clean-up in case any instances of "?*" or "*?" were changed into "**"
-    processed_search_string = StringUtils::clean_up_wildcard_search_string(processed_search_string);
+    processed_search_string = clean_up_wildcard_search_string(processed_search_string);
 
     // Split search_string into tokens with wildcards
     vector<QueryToken> query_tokens;
@@ -528,7 +535,7 @@ bool Grep::get_bounds_of_next_potential_var(
             if (is_escaped) {
                 is_escaped = false;
 
-                if (StringUtils::is_delim(c)) {
+                if (is_delim(c)) {
                     // Found escaped non-delimiter, so reverse the index to retain the escape
                     // character
                     --begin_pos;
@@ -538,11 +545,11 @@ bool Grep::get_bounds_of_next_potential_var(
                 // Escape character
                 is_escaped = true;
             } else {
-                if (StringUtils::is_wildcard(c)) {
+                if (is_wildcard(c)) {
                     contains_wildcard = true;
                     break;
                 }
-                if (false == StringUtils::is_delim(c)) {
+                if (false == is_delim(c)) {
                     break;
                 }
             }
@@ -560,7 +567,7 @@ bool Grep::get_bounds_of_next_potential_var(
             if (is_escaped) {
                 is_escaped = false;
 
-                if (StringUtils::is_delim(c)) {
+                if (is_delim(c)) {
                     // Found escaped delimiter, so reverse the index to retain the escape
                     // character
                     --end_pos;
@@ -570,17 +577,17 @@ bool Grep::get_bounds_of_next_potential_var(
                 // Escape character
                 is_escaped = true;
             } else {
-                if (StringUtils::is_wildcard(c)) {
+                if (is_wildcard(c)) {
                     contains_wildcard = true;
-                } else if (StringUtils::is_delim(c)) {
+                } else if (is_delim(c)) {
                     // Found delimiter that's not also a wildcard
                     break;
                 }
             }
 
-            if (StringUtils::is_decimal_digit(c)) {
+            if (is_decimal_digit(c)) {
                 contains_decimal_digit = true;
-            } else if (StringUtils::is_alphabet(c)) {
+            } else if (is_alphabet(c)) {
                 contains_alphabet = true;
             }
         }
@@ -604,13 +611,13 @@ bool Grep::get_bounds_of_next_potential_var(
                 if (is_escaped) {
                     is_escaped = false;
 
-                    if (StringUtils::is_alphabet(c)) {
+                    if (is_alphabet(c)) {
                         break;
                     }
                 } else if ('\\' == c) {
                     // Escape character
                     is_escaped = true;
-                } else if (StringUtils::is_wildcard(c)) {
+                } else if (is_wildcard(c)) {
                     found_wildcard_before_alphabet = true;
                     break;
                 }
diff --git a/components/core/src/clp_s/search/kql/CMakeLists.txt b/components/core/src/clp_s/search/kql/CMakeLists.txt
index ee36ee124..a8593bd37 100644
--- a/components/core/src/clp_s/search/kql/CMakeLists.txt
+++ b/components/core/src/clp_s/search/kql/CMakeLists.txt
@@ -7,7 +7,6 @@ ANTLR_TARGET(
 
 add_library(
         kql
-        ../../Utils.hpp
         ../AndExpr.hpp
         ../BooleanLiteral.hpp
         ../ColumnDescriptor.hpp
@@ -25,4 +24,9 @@ add_library(
 )
 target_compile_features(kql PRIVATE cxx_std_20)
 target_include_directories(kql PRIVATE ${ANTLR_KqlParser_OUTPUT_DIR})
-target_link_libraries(kql PRIVATE antlr4_static Boost::filesystem)
+target_link_libraries(kql
+    PRIVATE
+    antlr4_static
+    Boost::filesystem
+    clp::string_utils
+)
diff --git a/components/core/src/clp_s/search/kql/kql.cpp b/components/core/src/clp_s/search/kql/kql.cpp
index 972e44ad7..312dad9c7 100644
--- a/components/core/src/clp_s/search/kql/kql.cpp
+++ b/components/core/src/clp_s/search/kql/kql.cpp
@@ -3,6 +3,7 @@
 #include <vector>
 
 #include <antlr4-runtime.h>
+#include <clp/string_utils/string_utils.hpp>
 #include <spdlog/spdlog.h>
 
 #include "KqlBaseVisitor.h"
@@ -11,7 +12,6 @@
 // If redlining may want to add ${workspaceFolder}/build/**
 // to include path for vscode C/C++ utils
 
-#include "../../Utils.hpp"
 #include "../AndExpr.hpp"
 #include "../BooleanLiteral.hpp"
 #include "../ColumnDescriptor.hpp"
@@ -27,6 +27,9 @@ using namespace antlr4;
 using namespace kql;
 
 namespace clp_s::search::kql {
+using clp::string_utils::clean_up_wildcard_search_string;
+using clp::string_utils::tokenize_column_descriptor;
+
 class ErrorListener : public BaseErrorListener {
 public:
     void syntaxError(
@@ -92,7 +95,7 @@ class ParseTreeVisitor : public KqlBaseVisitor {
         } else if (auto ret = NullLiteral::create_from_string(token)) {
             return ret;
         } else {
-            return StringLiteral::create(StringUtils::clean_up_wildcard_search_string(token));
+            return StringLiteral::create(clean_up_wildcard_search_string(token));
         }
     }
 
@@ -112,7 +115,7 @@ class ParseTreeVisitor : public KqlBaseVisitor {
         std::string column = unquote_string(ctx->LITERAL()->getText());
 
         std::vector<std::string> descriptor_tokens;
-        if (false == StringUtils::tokenize_column_descriptor(column, descriptor_tokens)) {
+        if (false == tokenize_column_descriptor(column, descriptor_tokens)) {
             SPDLOG_ERROR("Can not tokenize invalid column: \"{}\"", column);
             return nullptr;
         }
diff --git a/components/core/submodules/clp-cpp b/components/core/submodules/clp-cpp
index 852b07533..e39f7688f 160000
--- a/components/core/submodules/clp-cpp
+++ b/components/core/submodules/clp-cpp
@@ -1 +1 @@
-Subproject commit 852b075335ae5fcfb4e2843798b16cd2e1209a26
+Subproject commit e39f7688fc3798e87bd3372995c04a00b4508ed0