Skip to content

Commit

Permalink
Address review comments
Browse files Browse the repository at this point in the history
  • Loading branch information
Bingran Hu committed Jul 18, 2024
1 parent f640e28 commit 876e9f9
Show file tree
Hide file tree
Showing 10 changed files with 173 additions and 103 deletions.
10 changes: 5 additions & 5 deletions components/core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -209,8 +209,8 @@ set(sqlite_DYNAMIC_LIBS "dl;m;pthread")
include(cmake/Modules/FindLibraryDependencies.cmake)
FindDynamicLibraryDependencies(sqlite "${sqlite_DYNAMIC_LIBS}")

add_subdirectory(src/clp/string_utils)
add_subdirectory(src/clp/regex_utils)
add_subdirectory(src/clp/string_utils)

add_subdirectory(src/clp/clg)
add_subdirectory(src/clp/clo)
Expand Down Expand Up @@ -305,11 +305,11 @@ set(SOURCE_FILES_unitTest
src/clp/ffi/ir_stream/decoding_methods.inc
src/clp/ffi/ir_stream/encoding_methods.cpp
src/clp/ffi/ir_stream/encoding_methods.hpp
src/clp/ffi/ir_stream/protocol_constants.hpp
src/clp/ffi/ir_stream/Serializer.cpp
src/clp/ffi/ir_stream/Serializer.hpp
src/clp/ffi/ir_stream/utils.cpp
src/clp/ffi/ir_stream/utils.hpp
src/clp/ffi/ir_stream/protocol_constants.hpp
src/clp/ffi/SchemaTree.cpp
src/clp/ffi/SchemaTree.hpp
src/clp/ffi/SchemaTreeNode.hpp
Expand Down Expand Up @@ -436,10 +436,10 @@ set(SOURCE_FILES_unitTest
src/clp/StringReader.hpp
src/clp/Thread.cpp
src/clp/Thread.hpp
src/clp/time_types.hpp
src/clp/TimestampPattern.cpp
src/clp/TimestampPattern.hpp
src/clp/TraceableException.hpp
src/clp/time_types.hpp
src/clp/type_utils.hpp
src/clp/utf8_utils.cpp
src/clp/utf8_utils.hpp
Expand Down Expand Up @@ -471,12 +471,12 @@ set(SOURCE_FILES_unitTest
tests/test-NetworkReader.cpp
tests/test-ParserWithUserSchema.cpp
tests/test-query_methods.cpp
tests/test-regex_utils.cpp
tests/test-Segment.cpp
tests/test-SQLiteDB.cpp
tests/test-Stopwatch.cpp
tests/test-StreamingCompression.cpp
tests/test-string_utils.cpp
tests/test-regex_utils.cpp
tests/test-TimestampPattern.cpp
tests/test-utf8_utils.cpp
tests/test-Utils.cpp
Expand All @@ -499,8 +499,8 @@ target_link_libraries(unitTest
spdlog::spdlog
${sqlite_LIBRARY_DEPENDENCIES}
${STD_FS_LIBS}
clp::string_utils
clp::regex_utils
clp::string_utils
yaml-cpp::yaml-cpp
ZStd::ZStd
)
Expand Down
9 changes: 4 additions & 5 deletions components/core/src/clp/regex_utils/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,21 +1,20 @@
set(
REGEX_UTILS_HEADER_LIST
"ErrorCode.hpp"
"RegexToWildcardTranslatorConfig.hpp"
"constants.hpp"
"ErrorCode.hpp"
"regex_translation_utils.hpp"
"RegexToWildcardTranslatorConfig.hpp"
)
add_library(
regex_utils
regex_translation_utils.cpp
ErrorCode.cpp
regex_translation_utils.cpp
${REGEX_UTILS_HEADER_LIST}
)
add_library(clp::regex_utils ALIAS regex_utils)
target_include_directories(regex_utils
PUBLIC
../
PRIVATE
../
"${PROJECT_SOURCE_DIR}/submodules"
)
target_compile_features(regex_utils PRIVATE cxx_std_20)
13 changes: 6 additions & 7 deletions components/core/src/clp/regex_utils/ErrorCode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,23 +10,22 @@ using std::string;
using std::string_view;

namespace clp::regex_utils {

namespace {
/**
* Class for giving the error codes more detailed string descriptions.
* This class does not need to be seen outside the std error code wrapper implementation.
*/
class ErrorCodeCategory : public error_category {
public:
/**
* @return The class of errors.
*/
[[nodiscard]] char const* name() const noexcept override;
[[nodiscard]] auto name() const noexcept -> char const* override;

/**
* @param The error code encoded in int.
* @return The descriptive message for the error.
*/
[[nodiscard]] string message(int ev) const override;
[[nodiscard]] auto message(int ev) const -> string override;
};

auto ErrorCodeCategory::name() const noexcept -> char const* {
Expand Down Expand Up @@ -69,10 +68,10 @@ auto ErrorCodeCategory::message(int ev) const -> string {
}
}

ErrorCodeCategory const cTheErrorCodeCategory{};
ErrorCodeCategory const cErrorCodeCategoryInstance;
} // namespace

auto make_error_code(ErrorCode e) -> error_code {
return {static_cast<int>(e), cTheErrorCodeCategory};
return {static_cast<int>(e), cErrorCodeCategoryInstance};
}

} // namespace clp::regex_utils
2 changes: 0 additions & 2 deletions components/core/src/clp/regex_utils/ErrorCode.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
#include <type_traits>

namespace clp::regex_utils {

/**
* Enum class for propagating and handling various regex utility errors.
* More detailed descriptions can be found in ErrorCode.cpp.
Expand Down Expand Up @@ -35,7 +34,6 @@ enum class ErrorCode : uint8_t {
* @return The corresponding std::error_code type variable.
*/
[[nodiscard]] auto make_error_code(ErrorCode ec) -> std::error_code;

} // namespace clp::regex_utils

namespace std {
Expand Down
46 changes: 46 additions & 0 deletions components/core/src/clp/regex_utils/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# Regex_utils

This library contains useful utilities to handle all regex related tasks.

## Regex to Wildcard Translator

### Goal

Performs a best-effort translation to turn a regex string to an equivalent wildcard string.

CLP currently only recognizes three meta-characters in the wildcard syntax:

* `?` Matches any single character
* `*` Matches zero or more characters
* `\` Suppresses the special meaning of meta characters (including itself)

If the regex query can actually be expressed as a wildcard query only deploying the three
metacharacters above, CLP should use the wildcard version.

### Includes

* To use the translator:

```shell
#include <regex_utils/regex_translation_utils.hpp>
```

* To add custom configuration to the translator:

```shell
#include <regex_utils/RegexToWildcardTranslatorConfig.hpp>
```

### Functionalities

* Wildcards
- Turn `.` into `?`
- Turn `.*` into `*`
- Turn `.+` into `?*`

### Custom configuration

* `add_prefix_suffix_wildcards`: in the absence of regex anchors, add prefix or suffix wildcards so
the query becomes a substring query.
- E.g. `info.*system` gets translated into `*info*system*` which makes the original query a
substring query.
Original file line number Diff line number Diff line change
Expand Up @@ -2,34 +2,35 @@
#define CLP_REGEX_UTILS_REGEXTOWILDCARDTRANSLATORCONFIG_HPP

namespace clp::regex_utils {

/**
* Allows users to customize and fine tune how to translate a regex string to wildcard.
*
* This class won't affect the core logic and state trasition mechanics of the regex to wildcard
* translator, but it can make the translator more versatile. For detailed descriptions of how each
* option should be used, see the getter function docstrings.
*/
class RegexToWildcardTranslatorConfig {
public:
// Constructors
RegexToWildcardTranslatorConfig(
bool case_insensitive_wildcard,
bool add_prefix_suffix_wildcards
)
: m_case_insensitive_wildcard{case_insensitive_wildcard},
m_add_prefix_suffix_wildcards{add_prefix_suffix_wildcards} {};

// Getters

/**
* @return True if the final translated wildcard string will be fed into
* a case-insensitive wildcard analyzer. In such cases, we can
* safely translate charset patterns such as [aA] [Bb] into singular
* lowercase characters a, b.
* @return True if the final translated wildcard string will be fed into a case-insensitive
* wildcard analyzer. In such cases, we can safely translate charset patterns such as [aA] [Bb]
* into singular lowercase characters a, b.
*/
[[nodiscard]] auto case_insensitive_wildcard() const -> bool {
return m_case_insensitive_wildcard;
}

/**
* @return True if in the absense of starting or ending anchors in the
* regex string, we append prefix or suffix zero or more characters
* wildcards. In other words, this config is true if the search
* is a substring search, and false if the search is an exact search.
* @return True if in the absense of starting or ending anchors in the regex string, we append
* prefix or suffix zero or more characters wildcards. In other words, this config is true if
* the search is a substring search, and false if the search is an exact search.
*/
[[nodiscard]] auto add_prefix_suffix_wildcards() const -> bool {
return m_add_prefix_suffix_wildcards;
Expand All @@ -40,7 +41,6 @@ class RegexToWildcardTranslatorConfig {
bool m_case_insensitive_wildcard;
bool m_add_prefix_suffix_wildcards;
};

} // namespace clp::regex_utils

#endif // CLP_REGEX_UTILS_REGEXTOWILDCARDTRANSLATORCONFIG_HPP
2 changes: 0 additions & 2 deletions components/core/src/clp/regex_utils/constants.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
#define CLP_REGEX_UTILS_CONSTANTS_HPP

namespace clp::regex_utils {

// Wildcard meta characters
constexpr char cZeroOrMoreCharsWildcard{'*'};
constexpr char cSingleCharWildcard{'?'};
Expand All @@ -15,7 +14,6 @@ constexpr char cRegexStartAnchor{'^'};
constexpr char cRegexEndAnchor{'$'};
constexpr char cEscapeChar{'\\'};
constexpr char cCharsetNegate{'^'};

} // namespace clp::regex_utils

#endif // CLP_REGEX_UTILS_CONSTANTS_HPP
Loading

0 comments on commit 876e9f9

Please sign in to comment.