Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix transducer operations #405

Merged
merged 6 commits into from
Apr 17, 2024
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 6 additions & 5 deletions include/mata/alphabet.hh
Original file line number Diff line number Diff line change
Expand Up @@ -184,10 +184,10 @@ public:

std::string reverse_translate_symbol(Symbol symbol) const override;

private:
EnumAlphabet& operator=(const EnumAlphabet& rhs);

public:
EnumAlphabet& operator=(const EnumAlphabet& rhs) = default;
EnumAlphabet& operator=(EnumAlphabet&& rhs) = default;

/**
* @brief Expand alphabet by symbols from the passed @p symbols.
*
Expand Down Expand Up @@ -297,10 +297,11 @@ public:

std::string reverse_translate_symbol(Symbol symbol) const override;

private:
OnTheFlyAlphabet& operator=(const OnTheFlyAlphabet& rhs);

public:
OnTheFlyAlphabet& operator=(const OnTheFlyAlphabet& rhs) = default;
OnTheFlyAlphabet& operator=(OnTheFlyAlphabet&& rhs) = default;

/**
* @brief Expand alphabet by symbols from the passed @p symbol_names.
*
Expand Down
7 changes: 5 additions & 2 deletions include/mata/nft/builder.hh
Original file line number Diff line number Diff line change
Expand Up @@ -40,16 +40,19 @@ Nft create_empty_string_nft();

/**
* Create automaton accepting sigma star over the passed alphabet using DONT_CARE symbol.
*
* @param[in] num_of_levels Number of levels in the created NFT.
*/
Nft create_sigma_star_nft();
Nft create_sigma_star_nft(size_t num_of_levels = DEFAULT_NUM_OF_LEVELS);

/**
* Create automaton accepting sigma star over the passed alphabet.
*
* @param[in] alphabet Alphabet to construct sigma star automaton with. When alphabet is left empty, the default empty
* alphabet is used, creating an automaton accepting only the empty string.
* @param[in] num_of_levels Number of levels in the created NFT.
*/
Nft create_sigma_star_nft(Alphabet* alphabet = new OnTheFlyAlphabet{});
Nft create_sigma_star_nft(Alphabet* alphabet = new OnTheFlyAlphabet{}, size_t num_of_levels = DEFAULT_NUM_OF_LEVELS);

/** Loads an automaton from Parsed object */
// TODO this function should the same thing as the one taking IntermediateAut or be deleted
Expand Down
7 changes: 7 additions & 0 deletions include/mata/nft/nft.hh
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,13 @@ public:
using super::vector;
Levels(const std::vector<Level>& levels): super(levels) {}
Levels(std::vector<Level>&& levels): super(std::move(levels)) {}

/**
* @brief Append post vector to the delta.
*
* @param post_vector Vector of posts to be appended.
*/
void append(const Levels& levels_vector) { for (const Level& level: levels_vector) { push_back(level); } }
koniksedy marked this conversation as resolved.
Show resolved Hide resolved
};

/**
Expand Down
10 changes: 6 additions & 4 deletions src/nft/builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -249,14 +249,16 @@ Nft builder::create_empty_string_nft() {
return Nft(mata::nfa::builder::create_empty_string_nfa());
}

Nft builder::create_sigma_star_nft() {
Nft nft{ 1, { 0 }, { 0 }, { 0 }, 1 };
Nft builder::create_sigma_star_nft(const size_t num_of_levels) {
Nft nft{ 1, { 0 }, { 0 }, { 0 }, num_of_levels };
nft.delta.add(0, DONT_CARE, 0);
return nft;
}

Nft builder::create_sigma_star_nft(mata::Alphabet* alphabet) {
return Nft(mata::nfa::builder::create_sigma_star_nfa(alphabet));
Nft builder::create_sigma_star_nft(mata::Alphabet* alphabet, const size_t num_of_levels) {
Nft nft{1, { 0 }, { 0 }, { 0 }, num_of_levels};
nft.insert_identity(0, alphabet->get_alphabet_symbols().ToVector());
return nft;
}

Nft builder::parse_from_mata(std::istream& nft_stream) {
Expand Down
2 changes: 2 additions & 0 deletions src/nft/concatenation.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ Nft concatenate(const Nft& lhs, const Nft& rhs, bool use_epsilon,
}

Nft& Nft::concatenate(const Nft& aut) {
assert(num_of_levels == aut.num_of_levels);
size_t n = this->num_of_states();
auto upd_fnc = [&](State st) {
return st + n;
Expand All @@ -27,6 +28,7 @@ Nft& Nft::concatenate(const Nft& aut) {

this->delta.allocate(n);
this->delta.append(aut.delta.renumber_targets(upd_fnc));
this->levels.append(aut.levels);

// set accepting states
utils::SparseSet<State> new_fin{};
Expand Down
23 changes: 9 additions & 14 deletions src/nft/strings.cc
Original file line number Diff line number Diff line change
Expand Up @@ -404,20 +404,15 @@ Nft ReluctantReplace::reluctant_leftmost_nft(nfa::Nfa nfa, Alphabet* alphabet, S
const size_t regex_num_of_states{ nft_reluctant_leftmost.num_of_states() };
assert(nft_reluctant_leftmost.is_deterministic());
const utils::OrdVector<Symbol> alphabet_symbols{ alphabet->get_alphabet_symbols() };
nft_reluctant_leftmost.levels.resize(
regex_num_of_states + replacement.size() * 2 + alphabet_symbols.size() + 4);
nft_reluctant_leftmost.levels.resize(regex_num_of_states + replacement.size() * 2 + alphabet_symbols.size() + 4);

// Create self-loop on the new initial state.
const State initial{ nft_reluctant_leftmost.add_state_with_level(0) };
State curr_state{ regex_num_of_states + 1 };
StatePost& initial_state_post{ nft_reluctant_leftmost.delta.mutable_state_post(initial) };
for (const Symbol symbol: alphabet_symbols) {
initial_state_post.push_back({ symbol, curr_state });
nft_reluctant_leftmost.delta.add(curr_state, symbol, initial);
nft_reluctant_leftmost.levels[curr_state] = 1;
++curr_state;
}
nft_reluctant_leftmost.insert_identity(initial, alphabet_symbols.ToVector());

// Move to replace mode when begin marker is encountered.
initial_state_post.insert({ begin_marker, curr_state });
State curr_state{ nft_reluctant_leftmost.num_of_states() };
nft_reluctant_leftmost.delta.add(initial, begin_marker, curr_state);
nft_reluctant_leftmost.delta.mutable_state_post(curr_state).push_back(
SymbolPost{ EPSILON, StateSet{ nft_reluctant_leftmost.initial } }
);
Expand All @@ -436,11 +431,11 @@ Nft ReluctantReplace::reluctant_leftmost_nft(nfa::Nfa nfa, Alphabet* alphabet, S
nft_reluctant_leftmost.final.insert(next_state);
nft_reluctant_leftmost.final.clear();
switch (replace_mode) {
case ReplaceMode::All: {
nft_reluctant_leftmost.delta.add(next_state, EPSILON, initial);
case ReplaceMode::All: { // Return to beginning to possibly repeat replacement.
nft_reluctant_leftmost.insert_word_by_parts(next_state, { { EPSILON }, { EPSILON } }, initial);
break;
};
case ReplaceMode::Single: {
case ReplaceMode::Single: { // End the replacement mode.
nft_reluctant_leftmost.levels.resize(nft_reluctant_leftmost.levels.size() + alphabet_symbols.size() + 1);
const State final{ next_state };
nft_reluctant_leftmost.final.insert(final);
Expand Down
22 changes: 22 additions & 0 deletions tests/nft/strings.cc
Original file line number Diff line number Diff line change
Expand Up @@ -925,4 +925,26 @@ TEST_CASE("mata::nft::strings::replace_reluctant_regex()") {
// ));
// CHECK(nft::are_equivalent(nft, expected));
}

SECTION("bugfix: replace from Noodler") {
nfa::Nfa regex = nfa::builder::parse_from_mata(std::string(
"@NFA-explicit\n%Alphabet-auto\n%Initial q0\n%Final q7\nq0 60 q1\nq1 83 q2\nq2 67 q3\nq3 82 q4\nq4 73 q5\nq5 80 q6\nq6 84 q7\nq7 48 q7\nq7 49 q7\nq7 50 q7\nq7 51 q7\nq7 52 q7\nq7 53 q7\nq7 54 q7\nq7 55 q7\nq7 56 q7\nq7 57 q7\nq7 60 q7\nq7 66 q7\nq7 67 q7\nq7 68 q7\nq7 69 q7\nq7 70 q7\nq7 71 q7\nq7 72 q7\nq7 73 q7\nq7 74 q7\nq7 75 q7\nq7 76 q7\nq7 78 q7\nq7 79 q7\nq7 80 q7\nq7 82 q7\nq7 83 q7\nq7 84 q7\nq7 86 q7\nq7 87 q7\nq7 92 q7\nq7 97 q7\nq7 98 q7\nq7 99 q7\nq7 100 q7\nq7 101 q7\nq7 102 q7\nq7 103 q7\nq7 104 q7\nq7 105 q7\nq7 106 q7\nq7 107 q7\nq7 108 q7\nq7 109 q7\nq7 110 q7\nq7 112 q7\nq7 114 q7\nq7 115 q7\nq7 116 q7\nq7 119 q7\nq7 120 q7\nq7 121 q7\nq7 196608 q7\n"
));
mata::Word replacement{ 66, 76, 79, 67, 75, 69, 68 };
alphabet = mata::EnumAlphabet{ 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 60, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 78, 79, 80, 82, 83, 84, 86, 87, 92, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 114, 115, 116, 119, 120, 121, 196608 };
nft = nft::strings::replace_reluctant_regex(regex, replacement, &alphabet, ReplaceMode::All);

CHECK(nft.is_tuple_in_lang({ { '<', 'S', 'C', 'R', 'I', 'P', 'T', '<', '<', '<', '<' },
{ 'B', 'L', 'O', 'C', 'K', 'E', 'D', '<', '<', '<', '<' } }));
CHECK(nft.is_tuple_in_lang({ { '<', '<', 'S', 'C', 'R', 'I', 'P', 'T', '<', '<', '<', '<' },
{ '<', 'B', 'L', 'O', 'C', 'K', 'E', 'D', '<', '<', '<', '<' } }));
CHECK(nft.is_tuple_in_lang({ { '<', 'S', '<', 'S', 'C', 'R', 'I', 'P', 'T', '<', '<', '<', '<' },
{ '<', 'S', 'B', 'L', 'O', 'C', 'K', 'E', 'D', '<', '<', '<', '<' } }));
CHECK(nft.is_tuple_in_lang({ { '<', 'S', '<', 'S', 'C', 'R', 'I', 'P', 'T', 'T', 'T', '<', '<' },
{ '<', 'S', 'B', 'L', 'O', 'C', 'K', 'E', 'D', 'T', 'T', '<', '<' } }));
CHECK(!nft.is_tuple_in_lang({ { '<', 'S', '<', 'S', 'C', 'R', 'I', 'P', 'T', 'T', 'T', '<', '<' },
{ '<', 'S', 'B', 'L', 'O', 'C', 'K', 'E', 'D', 'T', '<', '<' } }));
CHECK(!nft.is_tuple_in_lang({ { '<', 's', '<', 's', 'c', 'r', 'i', 'p', 't', 't', 't', '<', '<' },
{ '<', 's', 'B', 'L', 'O', 'C', 'K', 'E', 'D', 't', 't', '<', '<' } }));
}
}
Loading