Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tabakov-Vardi generator #468

Merged
merged 9 commits into from
Nov 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions include/mata/nfa/builder.hh
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,20 @@ Nfa create_empty_string_nfa();
*/
Nfa create_sigma_star_nfa(Alphabet* alphabet = new OnTheFlyAlphabet{});

/**
* Creates Tabakov-Vardi random NFA.
* The implementation is based on the paper "Experimental Evaluation of Classical Automata Constructions" by Tabakov and Vardi.
*
* @param num_of_states Number of states in the automaton.
* @param alphabet_size Size of the alphabet.
* @param states_transitions_ratio_per_symbol Ratio between number of transitions and number of states for each symbol.
* The value must be in range [0, num_of_states]. A value of 1 means that there will be num_of_states transitions for each symbol.
* A value of num_of_states means that there will be a transition between every pair of states for each symbol.
* @param final_state_density Density of final states in the automaton. The value must be in range [0, 1]. The state 0 is always final.
* If the density is 1, every state will be final.
*/
Nfa create_random_nfa_tabakov_vardi(const size_t num_of_states, const size_t alphabet_size, const double states_trans_ratio_per_symbol, const double final_state_density);

/** Loads an automaton from Parsed object */
// TODO this function should the same thing as the one taking IntermediateAut or be deleted
Nfa construct(const mata::parser::ParsedSection& parsec, Alphabet* alphabet, NameStateMap* state_map = nullptr);
Expand Down
51 changes: 51 additions & 0 deletions src/nfa/builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
#include "mata/parser/mintermization.hh"

#include <fstream>
#include <random>
#include <cmath>

using namespace mata::nfa;
using mata::nfa::Nfa;
Expand Down Expand Up @@ -217,6 +219,55 @@ Nfa builder::create_sigma_star_nfa(mata::Alphabet* alphabet) {
return nfa;
}

Nfa builder::create_random_nfa_tabakov_vardi(const size_t num_of_states, const size_t alphabet_size, const double states_trans_ratio_per_symbol, const double final_state_density) {
if (num_of_states == 0) {
return Nfa();
}
if (states_trans_ratio_per_symbol < 0 || static_cast<size_t>(states_trans_ratio_per_symbol) > num_of_states) {
// Maximum of num_of_states^2 unique transitions for one symbol can be created.
throw std::runtime_error("Transition density must be in range [0, num_of_states]");
}
if (final_state_density < 0 || final_state_density > 1) {
// Maximum of num_of_states final states can be created.
throw std::runtime_error("Final state density must be in range (0, 1]");
}

Nfa nfa{ num_of_states, StateSet{ 0 }, StateSet{ 0 }, new OnTheFlyAlphabet{} };

// Initialize the random number generator
std::random_device rd; // Seed for the random number engine
std::mt19937 gen(rd()); // Mersenne Twister engine

// Unique final state generator
std::vector<State> states(num_of_states);
std::iota(states.begin(), states.end(), 0);
std::shuffle(states.begin() + 1, states.end(), gen); // Starting from 1, because 0 is allways final state.

// Create final states
const size_t num_of_final_states{ static_cast<size_t>(std::round(static_cast<double>(num_of_states) * final_state_density)) };
for (size_t i = 0; i < num_of_final_states; ++i) {
nfa.final.insert(states[i]);
}

// Unique transition generator
std::vector<State> one_dimensional_transition_matrix(num_of_states * num_of_states);
std::iota(one_dimensional_transition_matrix.begin(), one_dimensional_transition_matrix.end(), 0);

// Create transitions
// Using std::min because, in some universe, casting and rounding might cause the number of transitions to exceed the number of possible transitions by 1
// and then an access to the non-existing element of one_dimensional_transition_matrix would occur.
const size_t num_of_transitions_per_symbol{ std::min(static_cast<size_t>(std::round(static_cast<double>(num_of_states) * states_trans_ratio_per_symbol)), one_dimensional_transition_matrix.size()) };
for (Symbol symbol{ 0 }; symbol < alphabet_size; ++symbol) {
std::shuffle(one_dimensional_transition_matrix.begin(), one_dimensional_transition_matrix.end(), gen);
for (size_t i = 0; i < num_of_transitions_per_symbol; ++i) {
const State source{ one_dimensional_transition_matrix[i] / num_of_states };
const State target{ one_dimensional_transition_matrix[i] % num_of_states };
nfa.delta.add(source, symbol, target);
}
}
return nfa;
}

Nfa builder::parse_from_mata(std::istream& nfa_stream) {
const std::string nfa_str = "NFA";
parser::Parsed parsed{ parser::parse_mf(nfa_stream) };
Expand Down
142 changes: 142 additions & 0 deletions tests/nfa/builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

#include <unordered_set>
#include <fstream>
#include <cmath>
koniksedy marked this conversation as resolved.
Show resolved Hide resolved

#include <catch2/catch_test_macros.hpp>
#include <catch2/matchers/catch_matchers_string.hpp>
Expand Down Expand Up @@ -141,3 +142,144 @@ TEST_CASE("parse_from_mata()") {
}
}
}

TEST_CASE("Create Tabakov-Vardi NFA") {
size_t num_of_states;
size_t alphabet_size;
double states_trans_ratio_per_symbol;
double final_state_density;

SECTION("EMPTY") {
num_of_states = 0;
alphabet_size = 0;
states_trans_ratio_per_symbol = 0;
final_state_density = 0;

Nfa nfa = mata::nfa::builder::create_random_nfa_tabakov_vardi(num_of_states, alphabet_size, states_trans_ratio_per_symbol, final_state_density);
CHECK(nfa.num_of_states() == 0);
CHECK(nfa.initial.size() == 0);
CHECK(nfa.final.size() == 0);
CHECK(nfa.delta.empty());
}

SECTION("10-5-0.5-0.5") {
num_of_states = 10;
alphabet_size = 5;
states_trans_ratio_per_symbol = 0.5;
final_state_density = 0.5;

Nfa nfa = mata::nfa::builder::create_random_nfa_tabakov_vardi(num_of_states, alphabet_size, states_trans_ratio_per_symbol, final_state_density);
CHECK(nfa.num_of_states() == num_of_states);
CHECK(nfa.initial.size() == 1);
CHECK(nfa.final.size() == 5);
CHECK(nfa.delta.get_used_symbols().size() == alphabet_size);
CHECK(nfa.delta.num_of_transitions() == 25);
}

SECTION("Min final") {
num_of_states = 10;
alphabet_size = 5;
states_trans_ratio_per_symbol = 0.5;
final_state_density = 0.0001;

Nfa nfa = mata::nfa::builder::create_random_nfa_tabakov_vardi(num_of_states, alphabet_size, states_trans_ratio_per_symbol, final_state_density);
CHECK(nfa.num_of_states() == num_of_states);
CHECK(nfa.initial.size() == 1);
CHECK(nfa.final.size() == 1);
CHECK(nfa.delta.get_used_symbols().size() == alphabet_size);
CHECK(nfa.delta.num_of_transitions() == 25);
}

SECTION("Max final") {
num_of_states = 10;
alphabet_size = 5;
states_trans_ratio_per_symbol = 0.5;
final_state_density = 1;

Nfa nfa = mata::nfa::builder::create_random_nfa_tabakov_vardi(num_of_states, alphabet_size, states_trans_ratio_per_symbol, final_state_density);
CHECK(nfa.num_of_states() == num_of_states);
CHECK(nfa.initial.size() == 1);
CHECK(nfa.final.size() == num_of_states);
CHECK(nfa.delta.get_used_symbols().size() == alphabet_size);
CHECK(nfa.delta.num_of_transitions() == 25);
}

SECTION("Min transitions") {
num_of_states = 10;
alphabet_size = 5;
states_trans_ratio_per_symbol = 0;
final_state_density = 0.5;

Nfa nfa = mata::nfa::builder::create_random_nfa_tabakov_vardi(num_of_states, alphabet_size, states_trans_ratio_per_symbol, final_state_density);
CHECK(nfa.num_of_states() == num_of_states);
CHECK(nfa.initial.size() == 1);
CHECK(nfa.final.size() == 5);
CHECK(nfa.delta.get_used_symbols().size() == 0);
CHECK(nfa.delta.num_of_transitions() == 0);
}

SECTION("Max transitions") {
num_of_states = 10;
alphabet_size = 5;
states_trans_ratio_per_symbol = 10;
final_state_density = 0.5;

Nfa nfa = mata::nfa::builder::create_random_nfa_tabakov_vardi(num_of_states, alphabet_size, states_trans_ratio_per_symbol, final_state_density);
CHECK(nfa.num_of_states() == num_of_states);
CHECK(nfa.initial.size() == 1);
CHECK(nfa.final.size() == 5);
CHECK(nfa.delta.get_used_symbols().size() == alphabet_size);
CHECK(nfa.delta.num_of_transitions() == 500);
}

SECTION("BIG") {
num_of_states = 200;
alphabet_size = 100;
states_trans_ratio_per_symbol = 5;
final_state_density = 1;

Nfa nfa = mata::nfa::builder::create_random_nfa_tabakov_vardi(num_of_states, alphabet_size, states_trans_ratio_per_symbol, final_state_density);
CHECK(nfa.num_of_states() == num_of_states);
CHECK(nfa.initial.size() == 1);
CHECK(nfa.final.size() == num_of_states);
CHECK(nfa.delta.get_used_symbols().size() == alphabet_size);
CHECK(nfa.delta.num_of_transitions() == 100000);

}

SECTION("Throw runtime_error. transition_density < 0") {
num_of_states = 10;
alphabet_size = 5;
states_trans_ratio_per_symbol = static_cast<double>(-0.1);
final_state_density = 0.5;

CHECK_THROWS_AS(mata::nfa::builder::create_random_nfa_tabakov_vardi(num_of_states, alphabet_size, states_trans_ratio_per_symbol, final_state_density), std::runtime_error);
}

SECTION("Throw runtime_error. transition_density > num_of_states") {
num_of_states = 10;
alphabet_size = 5;
states_trans_ratio_per_symbol = 11;
final_state_density = 0.5;

CHECK_THROWS_AS(mata::nfa::builder::create_random_nfa_tabakov_vardi(num_of_states, alphabet_size, states_trans_ratio_per_symbol, final_state_density), std::runtime_error);
}

SECTION("Throw runtime_error. final_state_density < 0") {
num_of_states = 10;
alphabet_size = 5;
states_trans_ratio_per_symbol = 0.5;
final_state_density = static_cast<double>(-0.1);

CHECK_THROWS_AS(mata::nfa::builder::create_random_nfa_tabakov_vardi(num_of_states, alphabet_size, states_trans_ratio_per_symbol, final_state_density), std::runtime_error);
}

SECTION("Throw runtime_error. final_state_density > 1") {
num_of_states = 10;
alphabet_size = 5;
states_trans_ratio_per_symbol = 0.5;
final_state_density = static_cast<double>(1.1);

CHECK_THROWS_AS(mata::nfa::builder::create_random_nfa_tabakov_vardi(num_of_states, alphabet_size, states_trans_ratio_per_symbol, final_state_density), std::runtime_error);
}
}
Loading