Skip to content

Commit

Permalink
Merge branch 'remove-redundant-typenames' into remove-regex-prefix
Browse files Browse the repository at this point in the history
  • Loading branch information
SharafMohamed committed Dec 8, 2024
2 parents 43aa3be + 0e2d593 commit cf5980d
Show file tree
Hide file tree
Showing 8 changed files with 81 additions and 67 deletions.
2 changes: 1 addition & 1 deletion examples/intersect-test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ auto get_intersect_for_query(
}
Nfa<NfaByteState> nfa(std::move(rules));
auto dfa2 = ByteLexer::nfa_to_dfa(nfa);
auto schema_types = dfa1->get_intersect(dfa2);
auto schema_types = dfa1->get_intersect(dfa2.get());
std::cout << search_string << ":";
for (auto const& schema_type : schema_types) {
std::cout << m_id_symbol[schema_type] << ",";
Expand Down
1 change: 1 addition & 0 deletions src/log_surgeon/Lexer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

#include <log_surgeon/Constants.hpp>
#include <log_surgeon/finite_automata/Dfa.hpp>
#include <log_surgeon/finite_automata/DfaState.hpp>
#include <log_surgeon/finite_automata/Nfa.hpp>
#include <log_surgeon/finite_automata/RegexAST.hpp>
#include <log_surgeon/LexicalRule.hpp>
Expand Down
26 changes: 11 additions & 15 deletions src/log_surgeon/finite_automata/Dfa.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,27 +13,24 @@ template <typename DfaStateType>
class Dfa {
public:
/**
* Creates a new DFA state based on a set of NFA states and adds it to
* m_states
* @param nfa_state_set
* @return DfaStateType*
* Creates a new DFA state based on a set of NFA states and adds it to `m_states`.
* @param nfa_state_set The set of NFA states represented by this DFA state.
* @return A pointer to the new DFA state.
*/
template <typename NfaStateType>
auto new_state(std::set<NfaStateType*> const& nfa_state_set) -> DfaStateType*;

auto get_root() const -> DfaStateType const* { return m_states.at(0).get(); }

/**
* Compares this dfa with dfa_in to determine the set of schema types in
* this dfa that are reachable by any type in dfa_in. A type is considered
* reachable if there is at least one string for which: (1) this dfa returns
* a set of types containing the type, and (2) dfa_in returns any non-empty
* set of types.
* @param dfa_in
* @return The set of schema types reachable by dfa_in
* Compares this dfa with `dfa_in` to determine the set of schema types in this dfa that are
* reachable by any type in `dfa_in`. A type is considered reachable if there is at least one
* string for which: (1) this dfa returns a set of types containing the type, and (2) `dfa_in`
* returns any non-empty set of types.
* @param dfa_in The dfa with which to take the intersect.
* @return The set of schema types reachable by `dfa_in`.
*/
[[nodiscard]] auto get_intersect(std::unique_ptr<Dfa> const& dfa_in
) const -> std::set<uint32_t>;
[[nodiscard]] auto get_intersect(Dfa const* dfa_in) const -> std::set<uint32_t>;

private:
std::vector<std::unique_ptr<DfaStateType>> m_states;
Expand All @@ -53,8 +50,7 @@ auto Dfa<DfaStateType>::new_state(std::set<NfaStateType*> const& nfa_state_set)
}

template <typename DfaStateType>
auto Dfa<DfaStateType>::get_intersect(std::unique_ptr<Dfa> const& dfa_in
) const -> std::set<uint32_t> {
auto Dfa<DfaStateType>::get_intersect(Dfa const* dfa_in) const -> std::set<uint32_t> {
std::set<uint32_t> schema_types;
std::set<DfaStatePair<DfaStateType>> unvisited_pairs;
std::set<DfaStatePair<DfaStateType>> visited_pairs;
Expand Down
31 changes: 19 additions & 12 deletions src/log_surgeon/finite_automata/DfaState.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@
#include <cstdint>
#include <memory>
#include <tuple>
#include <type_traits>
#include <vector>

#include <log_surgeon/Constants.hpp>
#include <log_surgeon/finite_automata/DfaStateType.hpp>
#include <log_surgeon/finite_automata/UnicodeIntervalTree.hpp>

Expand All @@ -17,11 +19,15 @@ class DfaState;
using DfaByteState = DfaState<DfaStateType::Byte>;
using DfaUtf8State = DfaState<DfaStateType::Utf8>;

template <DfaStateType stateType>
template <DfaStateType state_type>
class DfaState {
public:
using Tree = UnicodeIntervalTree<DfaState*>;

DfaState() {
std::fill(std::begin(m_bytes_transition), std::end(m_bytes_transition), nullptr);
}

auto add_matching_variable_id(uint32_t const variable_id) -> void {
m_matching_variable_ids.push_back(variable_id);
}
Expand All @@ -30,30 +36,31 @@ class DfaState {
return m_matching_variable_ids;
}

[[nodiscard]] auto is_accepting() const -> bool { return !m_matching_variable_ids.empty(); }
[[nodiscard]] auto is_accepting() const -> bool {
return false == m_matching_variable_ids.empty();
}

auto add_byte_transition(uint8_t const& byte, DfaState* dest_state) -> void {
m_bytes_transition[byte] = dest_state;
}

/**
* Returns the next state the DFA transitions to on input character (byte or utf8).
* @param character
* @return DfaState<stateType>*
* @param character The character (byte or utf8) to transition on.
* @return A pointer to the DFA state reached after transitioning on `character`.
*/
[[nodiscard]] auto next(uint32_t character) const -> DfaState*;

private:
std::vector<uint32_t> m_matching_variable_ids;
DfaState* m_bytes_transition[cSizeOfByte];
// NOTE: We don't need m_tree_transitions for the `stateType == DfaStateType::Byte` case,
// so we use an empty class (`std::tuple<>`) in that case.
std::conditional_t<stateType == DfaStateType::Utf8, Tree, std::tuple<>> m_tree_transitions;
// NOTE: We don't need m_tree_transitions for the `state_type == DfaStateType::Byte` case, so we
// use an empty class (`std::tuple<>`) in that case.
std::conditional_t<state_type == DfaStateType::Utf8, Tree, std::tuple<>> m_tree_transitions;
};

template <DfaStateType stateType>
auto DfaState<stateType>::next(uint32_t character) const -> DfaState* {
if constexpr (DfaStateType::Byte == stateType) {
template <DfaStateType state_type>
auto DfaState<state_type>::next(uint32_t character) const -> DfaState* {
if constexpr (DfaStateType::Byte == state_type) {
return m_bytes_transition[character];
} else {
if (character < cSizeOfByte) {
Expand All @@ -62,7 +69,7 @@ auto DfaState<stateType>::next(uint32_t character) const -> DfaState* {
std::unique_ptr<std::vector<typename Tree::Data>> result
= m_tree_transitions.find(Interval(character, character));
assert(result->size() <= 1);
if (!result->empty()) {
if (false == result->empty()) {
return result->front().m_value;
}
return nullptr;
Expand Down
3 changes: 2 additions & 1 deletion src/log_surgeon/finite_automata/DfaStatePair.hpp
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
#ifndef LOG_SURGEON_FINITE_AUTOMATA_DFA_STATE_PAIR
#define LOG_SURGEON_FINITE_AUTOMATA_DFA_STATE_PAIR

#include <cstdint>
#include <set>
#include <vector>

#include <log_surgeon/finite_automata/DfaState.hpp>
#include <log_surgeon/Constants.hpp>

namespace log_surgeon::finite_automata {
/**
Expand Down
5 changes: 5 additions & 0 deletions src/log_surgeon/finite_automata/PrefixTree.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,11 @@ class PrefixTree {

[[nodiscard]] auto is_root() const -> bool { return false == m_parent_id.has_value(); }

/**
* Gets the parent ID without checking if it's `std::nullopt`.
* NOTE: This method should only be used if the caller has checked the node is not the root.
* @return The ID of the parent node in the prefix tree.
*/
[[nodiscard]] auto get_parent_id_unsafe() const -> id_t {
// NOLINTNEXTLINE(bugprone-unchecked-optional-access)
return m_parent_id.value();
Expand Down
63 changes: 33 additions & 30 deletions tests/test-prefix-tree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,22 +10,11 @@ using log_surgeon::finite_automata::PrefixTree;
using id_t = PrefixTree::id_t;
using position_t = PrefixTree::position_t;

constexpr auto cRootId{PrefixTree::cRootId};
constexpr id_t cInvalidNodeId{100};
constexpr position_t cInsertPos1{4};
constexpr position_t cInsertPos2{7};
constexpr position_t cInsertPos3{9};
constexpr position_t cMaxPos{std::numeric_limits<position_t>::max()};
constexpr position_t cNegativePos1{-1};
constexpr position_t cNegativePos2{-100};
constexpr position_t cSetPos1{10};
constexpr position_t cSetPos2{12};
constexpr position_t cSetPos3{15};
constexpr position_t cSetPos4{20};
constexpr position_t cTreeSize1{4};
constexpr position_t cTreeSize2{8};

TEST_CASE("`PrefixTree` operations", "[PrefixTree]") {
constexpr auto cRootId{PrefixTree::cRootId};
constexpr position_t cInitialPos1{4};
constexpr position_t cSetPos1{10};

SECTION("Newly constructed tree works correctly") {
PrefixTree const tree;

Expand All @@ -34,16 +23,24 @@ TEST_CASE("`PrefixTree` operations", "[PrefixTree]") {
}

SECTION("Inserting nodes into the prefix tree works correctly") {
constexpr position_t cInitialPos2{7};
constexpr position_t cInitialPos3{9};
constexpr position_t cMaxPos{std::numeric_limits<position_t>::max()};
constexpr position_t cNegativePos1{-1};
constexpr position_t cNegativePos2{-100};
constexpr position_t cTreeSize1{4};
constexpr position_t cTreeSize2{8};

PrefixTree tree;

// Test basic insertions
auto const node_id_1{tree.insert(cRootId, cInsertPos1)};
auto const node_id_2{tree.insert(node_id_1, cInsertPos2)};
auto const node_id_3{tree.insert(node_id_2, cInsertPos3)};
REQUIRE(std::vector<position_t>{cInsertPos1} == tree.get_reversed_positions(node_id_1));
REQUIRE(std::vector<position_t>{cInsertPos2, cInsertPos1}
auto const node_id_1{tree.insert(cRootId, cInitialPos1)};
auto const node_id_2{tree.insert(node_id_1, cInitialPos2)};
auto const node_id_3{tree.insert(node_id_2, cInitialPos3)};
REQUIRE(std::vector<position_t>{cInitialPos1} == tree.get_reversed_positions(node_id_1));
REQUIRE(std::vector<position_t>{cInitialPos2, cInitialPos1}
== tree.get_reversed_positions(node_id_2));
REQUIRE(std::vector<position_t>{cInsertPos3, cInsertPos2, cInsertPos1}
REQUIRE(std::vector<position_t>{cInitialPos3, cInitialPos2, cInitialPos1}
== tree.get_reversed_positions(node_id_3));
REQUIRE(cTreeSize1 == tree.size());

Expand All @@ -53,12 +50,12 @@ TEST_CASE("`PrefixTree` operations", "[PrefixTree]") {

// Test insertion with negative position values
auto const node_id_5{tree.insert(cRootId, cNegativePos1)};
auto const node_id_6{tree.insert(node_id_5, cInsertPos1)};
auto const node_id_6{tree.insert(node_id_5, cInitialPos1)};
auto const node_id_7{tree.insert(node_id_6, cNegativePos2)};
REQUIRE(std::vector<position_t>{cNegativePos1} == tree.get_reversed_positions(node_id_5));
REQUIRE(std::vector<position_t>{cInsertPos1, cNegativePos1}
REQUIRE(std::vector<position_t>{cInitialPos1, cNegativePos1}
== tree.get_reversed_positions(node_id_6));
REQUIRE(std::vector<position_t>{cNegativePos2, cInsertPos1, cNegativePos1}
REQUIRE(std::vector<position_t>{cNegativePos2, cInitialPos1, cNegativePos1}
== tree.get_reversed_positions(node_id_7));
REQUIRE(cTreeSize2 == tree.size());
}
Expand All @@ -67,7 +64,7 @@ TEST_CASE("`PrefixTree` operations", "[PrefixTree]") {
PrefixTree tree;
REQUIRE_THROWS_AS(tree.get_reversed_positions(tree.size()), std::out_of_range);

tree.insert(cRootId, cInsertPos1);
tree.insert(cRootId, cInitialPos1);
REQUIRE_THROWS_AS(tree.get_reversed_positions(tree.size()), std::out_of_range);

REQUIRE_THROWS_AS(
Expand All @@ -77,13 +74,17 @@ TEST_CASE("`PrefixTree` operations", "[PrefixTree]") {
}

SECTION("Set position for a valid index works correctly") {
constexpr position_t cSetPos2{12};
constexpr position_t cSetPos3{15};
constexpr position_t cSetPos4{20};

PrefixTree tree;
// Test that you can set the root node for sanity, although this value is not used
tree.set(cRootId, cSetPos1);

// Test updates to different nodes
auto const node_id_1{tree.insert(cRootId, cInsertPos1)};
auto const node_id_2{tree.insert(node_id_1, cInsertPos1)};
auto const node_id_1{tree.insert(cRootId, cInitialPos1)};
auto const node_id_2{tree.insert(node_id_1, cInitialPos1)};
tree.set(node_id_1, cSetPos1);
tree.set(node_id_2, cSetPos2);
REQUIRE(std::vector<position_t>{cSetPos1} == tree.get_reversed_positions(node_id_1));
Expand All @@ -105,13 +106,15 @@ TEST_CASE("`PrefixTree` operations", "[PrefixTree]") {
}

SECTION("Set position for an invalid index throws correctly") {
constexpr id_t cInvalidNodeId{100};

PrefixTree tree;

// Test setting position before any insertions
REQUIRE_THROWS_AS(tree.set(cInvalidNodeId, cSetPos4), std::out_of_range);
REQUIRE_THROWS_AS(tree.set(cInvalidNodeId, cSetPos1), std::out_of_range);

// Test setting position just beyond valid range
auto const node_id_1{tree.insert(cRootId, cInsertPos1)};
REQUIRE_THROWS_AS(tree.set(node_id_1 + 1, cSetPos4), std::out_of_range);
auto const node_id_1{tree.insert(cRootId, cInitialPos1)};
REQUIRE_THROWS_AS(tree.set(node_id_1 + 1, cSetPos1), std::out_of_range);
}
}
17 changes: 9 additions & 8 deletions tests/test-register-handler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,19 @@ using position_t = log_surgeon::finite_automata::PrefixTree::position_t;

namespace {
/**
* @param handler The register handler that will contain the new registers.
* @param num_registers The number of registers to initialize.
* @param num_registers The number of registers managed by the handler.
* @return The newly initialized register handler.
*/
auto registers_init(RegisterHandler& handler, size_t num_registers) -> void;
[[nodiscard]] auto handler_init(size_t num_registers) -> RegisterHandler;

auto registers_init(RegisterHandler& handler, size_t const num_registers) -> void {
auto handler_init(size_t const num_registers) -> RegisterHandler {
constexpr position_t cDefaultPos{0};

RegisterHandler handler;
for (size_t i{0}; i < num_registers; ++i) {
handler.add_register(i, cDefaultPos);
}
return handler;
}
} // namespace

Expand All @@ -32,13 +34,12 @@ TEST_CASE("`RegisterHandler` tests", "[RegisterHandler]") {
constexpr size_t cRegId1{0};
constexpr size_t cRegId2{1};

RegisterHandler handler;

SECTION("Initial state is empty") {
REQUIRE_THROWS_AS(handler.get_reversed_positions(cRegId1), std::out_of_range);
RegisterHandler empty_handler{handler_init(0)};
REQUIRE_THROWS_AS(empty_handler.get_reversed_positions(cRegId1), std::out_of_range);
}

registers_init(handler, cNumRegisters);
RegisterHandler handler{handler_init(cNumRegisters)};

SECTION("Set register position correctly") {
handler.set_register(cRegId1, cInitialPos1);
Expand Down

0 comments on commit cf5980d

Please sign in to comment.