diff --git a/src/libfsm/Makefile b/src/libfsm/Makefile index d3714b3e7..ca056518a 100644 --- a/src/libfsm/Makefile +++ b/src/libfsm/Makefile @@ -7,7 +7,6 @@ SRC += src/libfsm/complete.c SRC += src/libfsm/consolidate.c SRC += src/libfsm/clone.c SRC += src/libfsm/closure.c -SRC += src/libfsm/eager_endid.c SRC += src/libfsm/eager_output.c SRC += src/libfsm/edge.c SRC += src/libfsm/empty.c diff --git a/src/libfsm/clone.c b/src/libfsm/clone.c index 88910df46..2161599ae 100644 --- a/src/libfsm/clone.c +++ b/src/libfsm/clone.c @@ -19,7 +19,6 @@ #include "internal.h" #include "capture.h" #include "endids.h" -#include "eager_endid.h" #include "eager_output.h" #define LOG_CLONE_ENDIDS 0 @@ -30,9 +29,6 @@ copy_capture_actions(struct fsm *dst, const struct fsm *src); static int copy_end_ids(struct fsm *dst, const struct fsm *src); -static int -copy_eager_end_ids(struct fsm *dst, const struct fsm *src); - static int copy_eager_output_ids(struct fsm *dst, const struct fsm *src); @@ -90,11 +86,6 @@ fsm_clone(const struct fsm *fsm) } /* does not copy callback */ - if (!copy_eager_end_ids(new, fsm)) { - fsm_free(new); - return NULL; - } - if (!copy_eager_output_ids(new, fsm)) { fsm_free(new); return NULL; @@ -179,32 +170,6 @@ copy_end_ids(struct fsm *dst, const struct fsm *src) return env.ok; } -static int -copy_eager_end_ids_cb(fsm_state_t from, fsm_state_t to, fsm_end_id_t id, void *opaque) -{ - struct copy_end_ids_env *env = opaque; - assert(env->tag == 'E'); - if (!fsm_eager_endid_insert_entry(env->dst, from, to, id)) { - env->ok = false; - return 0; - } - - return 1; -} - -static int -copy_eager_end_ids(struct fsm *dst, const struct fsm *src) -{ - struct copy_end_ids_env env; - env.tag = 'E'; /* for 'E'ager endids */ - env.dst = dst; - env.src = src; - env.ok = 1; - - fsm_eager_endid_iter_edges_all(src, copy_eager_end_ids_cb, &env); - return env.ok; -} - struct copy_eager_output_ids_env { bool ok; struct fsm *dst; diff --git a/src/libfsm/consolidate.c b/src/libfsm/consolidate.c index 59f34a40a..b7a8905b2 100644 --- a/src/libfsm/consolidate.c +++ b/src/libfsm/consolidate.c @@ -25,7 +25,6 @@ #include "internal.h" #include "capture.h" #include "endids.h" -#include "eager_endid.h" #include "eager_output.h" #define LOG_MAPPING 0 @@ -55,10 +54,6 @@ static int consolidate_end_ids(struct fsm *dst, const struct fsm *src, const fsm_state_t *mapping, size_t mapping_count); -static int -consolidate_eager_end_ids(struct fsm *dst, const struct fsm *src, - const fsm_state_t *mapping, size_t mapping_count); - static int consolidate_eager_output_ids(struct fsm *dst, const struct fsm *src, const fsm_state_t *mapping, size_t mapping_count); @@ -164,10 +159,6 @@ fsm_consolidate(const struct fsm *src, } } - if (!consolidate_eager_end_ids(dst, src, mapping, mapping_count)) { - goto cleanup; - } - if (!consolidate_eager_output_ids(dst, src, mapping, mapping_count)) { goto cleanup; } @@ -289,47 +280,6 @@ consolidate_end_ids(struct fsm *dst, const struct fsm *src, return ret; } -struct consolidate_eager_end_ids_env { - bool ok; - struct fsm *dst; - const fsm_state_t *mapping; - size_t mapping_count; -}; - -static int -consolidate_eager_end_ids_cb(fsm_state_t from, fsm_state_t to, fsm_end_id_t id, void *opaque) -{ - struct consolidate_eager_end_ids_env *env = opaque; - assert(from < env->mapping_count); - assert(to < env->mapping_count); - - const fsm_state_t dst_from = env->mapping[from]; - const fsm_state_t dst_to = env->mapping[to]; - - if (!fsm_eager_endid_insert_entry(env->dst, - dst_from, dst_to, id)) { - env->ok = false; - return 0; - } - - return 1; -} - -static int -consolidate_eager_end_ids(struct fsm *dst, const struct fsm *src, - const fsm_state_t *mapping, size_t mapping_count) -{ - struct consolidate_eager_end_ids_env env = { - .ok = true, - .dst = dst, - .mapping = mapping, - .mapping_count = mapping_count, - }; - fsm_eager_endid_iter_edges_all(src, - consolidate_eager_end_ids_cb, &env); - return env.ok; -} - struct consolidate_eager_output_ids_env { bool ok; struct fsm *dst; diff --git a/src/libfsm/determinise.c b/src/libfsm/determinise.c index 7665860d7..e1ad12c0e 100644 --- a/src/libfsm/determinise.c +++ b/src/libfsm/determinise.c @@ -266,10 +266,6 @@ fsm_determinise(struct fsm *nfa) goto cleanup; } - if (!remap_eager_endids(&map, issp, dfa, nfa)) { - goto cleanup; - } - if (!remap_eager_outputs(&map, issp, dfa, nfa)) { goto cleanup; } @@ -754,380 +750,6 @@ remap_capture_actions(struct map *map, struct interned_state_set_pool *issp, return res; } -#define LOG_REMAP 0 -#if LOG_REMAP -#include -#endif - -struct remap_eager_endids_env { - enum remap_eager_endids_mode { - REMAP_EAGER_ENDIDS_NAIVE, - REMAP_EAGER_ENDIDS_FILTER_BY_CHECKED_T, - REMAP_EAGER_ENDIDS_FILTER_BY_CONNECTIVITY, - } mode; - bool ok; - const struct map *map; - struct interned_state_set_pool *issp; - struct fsm *dst; - const struct fsm *src; - fsm_state_t src_start; - fsm_state_t dst_start; - - struct state_set **eemap; - - size_t froms; - size_t tos; - size_t Fs; - size_t Ts; - size_t unconnected; - size_t hits; -}; - -static bool -is_connected(const struct fsm *dfa, fsm_state_t from, fsm_state_t to) -{ - struct edge_group_iter iter; - struct edge_group_iter_info info; - /* fprintf(stderr, "%s: %d -- %d ?\n", __func__, from, to); */ - - assert(from < dfa->statecount); - edge_set_group_iter_reset(dfa->states[from].edges, - EDGE_GROUP_ITER_ALL, &iter); - while (edge_set_group_iter_next(&iter, &info)) { - if (info.to == to) { return true; } - } - return false; -} - -static int -remap_eager_endids_cb_naive(fsm_state_t from, fsm_state_t to, fsm_end_id_t id, struct remap_eager_endids_env *env) -{ - const struct map *map = env->map; - /* naive implementation. potentially very expensive. rework later. - * for every ee(From, To, Id) - * for every F' - * if F' contains from - * for every T' - * if T' contains to - * if edge F' -> T' exists on dfa - * add ee(F, T', Id) - * */ - for (size_t f_i = 0; f_i < map->count; f_i++) { /* F' */ - env->Fs++; - struct mapping *f_m = map->buckets[f_i]; - if (f_m == NULL) { continue; } - - struct state_set *f_ss = interned_state_set_get_state_set(env->issp, f_m->iss); - /* fprintf(stderr, "%s: from %d, to %d, id %d, F %zd\n", */ - /* __func__, from, to, id, f_i); */ - if (state_set_contains(f_ss, from)) { - env->froms++; - /* fprintf(stderr, "%s: from %d, to %d, id %d, F %zd (contained)\n", */ - /* __func__, from, to, id, f_i); */ - for (size_t t_i = 0; t_i < map->count; t_i++) { /* T' */ - env->Ts++; - struct mapping *t_m = map->buckets[t_i]; - if (t_m == NULL) { continue; } - struct state_set *t_ss = interned_state_set_get_state_set(env->issp, t_m->iss); - - /* fprintf(stderr, "%s: from %d, to %d, id %d, F %zd, T %zd\n", */ - /* __func__, from, to, id, f_i, t_i); */ - if (state_set_contains(t_ss, to)) { - env->tos++; - const fsm_state_t dfa_from = f_m->dfastate; - const fsm_state_t dfa_to = t_m->dfastate; - - /* fprintf(stderr, "%s: from %d, to %d, id %d, F %zd, T %zd (both contained)\n", */ - /* __func__, from, to, id, f_i, t_i); */ - - if (!is_connected(env->dst, dfa_from, dfa_to)) { - /* fprintf(stderr, "%s: from %d, to %d, id %d, F %zd, T %zd -- connected, HIT\n", */ - /* __func__, from, to, id, f_i, t_i); */ -#if LOG_REMAP - fprintf(stderr, "determinise:%s: (%d -> %d, %d) to (%d -> %d, %d) is not connected on dfa, skipping\n", - __func__, - from, to, id, - dfa_from, dfa_to, id); -#endif - - env->unconnected++; - continue; - } - env->hits++; -#if LOG_REMAP - fprintf(stderr, "determinise:%s: rewriting (%d -> %d, %d) to (%d -> %d, %d)\n", - __func__, - from, to, id, - dfa_from, dfa_to, id); -#endif - if (!fsm_eager_endid_insert_entry(env->dst, - dfa_from, dfa_to, id)) { - env->ok = false; - return 0; - } - } - } - } - } - - return 1; -} - -static int -remap_eager_endids_cb_filter_by_checked_T(fsm_state_t from, fsm_state_t to, fsm_end_id_t id, struct remap_eager_endids_env *env) -{ - /* for every src ee(From, To, Id) - * for every F' with From as a member (via reverse map) - * for every T' with To as a member (via reverse map) - * if F' has an edge to T' - * add dst ee(F', T', Id) if not present - * */ - struct state_set **eemaps = env->eemap; - const size_t dfa_state_count = fsm_countstates(env->dst); - - for (size_t f_i = 0; f_i < dfa_state_count; f_i++) { - /* if F' contains from */ - if (!state_set_contains(eemaps[f_i], from)) { continue; } - const struct edge_set *f_edges = env->dst->states[f_i].edges; - - for (size_t t_i = 0; t_i < dfa_state_count; t_i++) { - if (!state_set_contains(eemaps[t_i], to)) { continue; } - - struct edge_group_iter iter; - struct edge_group_iter_info info; - edge_set_group_iter_reset(f_edges, EDGE_GROUP_ITER_ALL, &iter); - while (edge_set_group_iter_next(&iter, &info)) { - if (info.to == t_i) { - - env->hits++; - -#if LOG_REMAP - fprintf(stderr, "determinise:%s: rewriting (%d -> %d, %d) to (%zd -> %zd, %d)\n", - __func__, - from, to, id, - f_i, t_i, id); -#endif - if (!fsm_eager_endid_insert_entry(env->dst, - f_i, t_i, id)) { - env->ok = false; - return 0; - } - } - } - } - } - - return 1; -} - -static int -remap_eager_endids_cb_filter_by_connectivity(fsm_state_t from, fsm_state_t to, fsm_end_id_t id, struct remap_eager_endids_env *env) -{ - /* for every src ee(From, To, Id) - * for every F' with From as a member (via reverse map) - * for every labeled edge F'->T' on F' - * if T' contains to - * add dst ee(F', T', Id) if not present - * */ - - struct state_set **eemaps = env->eemap; - const size_t dfa_state_count = fsm_countstates(env->dst); - - for (size_t f_i = 0; f_i < dfa_state_count; f_i++) { - if (!state_set_contains(eemaps[f_i], from)) { continue; } - const struct edge_set *f_edges = env->dst->states[f_i].edges; - - struct edge_group_iter iter; - struct edge_group_iter_info info; - edge_set_group_iter_reset(f_edges, EDGE_GROUP_ITER_ALL, &iter); - while (edge_set_group_iter_next(&iter, &info)) { - const size_t t_i = info.to; - if (!state_set_contains(eemaps[t_i], to)) { continue; } - env->hits++; - -#if LOG_REMAP - fprintf(stderr, "determinise:%s: rewriting (%d -> %d, %d) to (%zd -> %zd, %d)\n", - __func__, - from, to, id, - f_i, t_i, id); -#endif - if (!fsm_eager_endid_insert_entry(env->dst, - f_i, t_i, id)) { - env->ok = false; - return 0; - } - } - } - - return 1; -} - -static int -remap_eager_endids_cb(fsm_state_t from, fsm_state_t to, fsm_end_id_t id, void *opaque) -{ - struct remap_eager_endids_env *env = opaque; - -#if LOG_REMAP - fprintf(stderr, "%s: remapping (%d -> %d, %d)\n", __func__, from, to, id); - - for (size_t f_i = 0; f_i < map->count; f_i++) { /* from */ - struct mapping *f_m = map->buckets[f_i]; - if (f_m == NULL) { continue; } - - fprintf(stderr, " == dfastate %ld: ", f_m->dfastate); - interned_state_set_dump(stderr, env->issp, f_m->iss); - fprintf(stderr, "\n"); - } -#endif - - /* Live self-edges should only appear on the start state, they represent - * eager endids that match at start. Other self-edges present became - * garbage after epsilon removal, don't carry them on. */ - if (from == to) { - if (from != env->src_start) { - return 1; /* discard stale self-edge */ - } - if (!fsm_eager_endid_insert_entry(env->dst, - env->dst_start, env->dst_start, id)) { - env->ok = false; - return 0; - } - return 1; - } - - switch (env->mode) { - case REMAP_EAGER_ENDIDS_NAIVE: - return remap_eager_endids_cb_naive(from, to, id, env); - - /* need to check every from->F', can use bitset for checked-T' OR make it connectivity-based, - * one will probably be much quicker than the other but not sure which */ - case REMAP_EAGER_ENDIDS_FILTER_BY_CHECKED_T: - return remap_eager_endids_cb_filter_by_checked_T(from, to, id, env); - case REMAP_EAGER_ENDIDS_FILTER_BY_CONNECTIVITY: - return remap_eager_endids_cb_filter_by_connectivity(from, to, id, env); - default: - assert(!"match fail"); - return 0; - } -} - -static struct state_set ** -collect_remap_eager_endids_mapping(const struct map *map, struct interned_state_set_pool *issp, - const struct fsm *dfa) -{ - const size_t dfa_state_count = fsm_countstates(dfa); - struct state_set **res = f_calloc(dfa->alloc, dfa_state_count, sizeof(res[0])); - if (res == NULL) { return NULL; } - - /* copy the state set pointers from the interned state set so they are arranged by DFA state ID */ - struct map_iter iter; - for (struct mapping *b = map_first(map, &iter); b != NULL; b = map_next(&iter)) { - struct state_set *s = interned_state_set_get_state_set(issp, b->iss); - assert(s != NULL); - res[b->dfastate] = s; - } - - return res; -} - -/* For every existing eager endid edge on src_nfa, ee(From, To, Id): - * Add an eager endid edge on dst_dfa ee(F', T', Id) - * when: - * - src_nfa's From is in dst_dfa's F' state set - * - src_nfa's To is in dst_dfa's T' state set - * - a labeled F' -> T' edge exists on dst_dfa - * - ee(F', T', Id) is not already present */ -static int -remap_eager_endids(const struct map *map, struct interned_state_set_pool *issp, - struct fsm *dst_dfa, const struct fsm *src_nfa) -{ - if (!fsm_eager_endid_has_eager_endids(src_nfa)) { - return 1; /* nothing to do */ - } -#if LOG_REMAP - fprintf(stderr, "==== before determinisation\n"); - fsm_dump(stderr, src_nfa); - fsm_eager_endid_dump(stderr, src_nfa); -#endif - - fsm_state_t dst_start; - if (!fsm_getstart(dst_dfa, &dst_start)) { - return 0; - } - - fsm_state_t src_start; - if (!fsm_getstart(src_nfa, &src_start)) { - return 0; - } - - struct state_set **eemap = collect_remap_eager_endids_mapping(map, issp, dst_dfa); - if (eemap == NULL) { goto cleanup; } - - enum remap_eager_endids_mode mode = REMAP_EAGER_ENDIDS_FILTER_BY_CONNECTIVITY; /* default */ - { - const char *modestr = getenv("REEMODE"); - if (modestr != NULL) { - switch (modestr[0]) { - case 'c': - mode = REMAP_EAGER_ENDIDS_FILTER_BY_CONNECTIVITY; - break; - case 't': - mode = REMAP_EAGER_ENDIDS_FILTER_BY_CHECKED_T; - break; - case 'n': - mode = REMAP_EAGER_ENDIDS_NAIVE; /* default */ - break; - default: - assert(!"unknown REEMODE: must be 'c' or 't' or 'n'"); - } - } - } - - struct remap_eager_endids_env env = { - .mode = mode, - .ok = true, - .map = map, - .issp = issp, - .eemap = eemap, - .dst = dst_dfa, - .src = src_nfa, - .src_start = src_start, - .dst_start = dst_start, - }; - - INIT_TIMERS(); - TIME(&pre); - fsm_eager_endid_iter_edges_all(src_nfa, remap_eager_endids_cb, &env); - - fprintf(stderr, "%s: froms %zd, tos %zd, Fs %zd, Ts %zd, unconnected %zd, hits %zd\n", - __func__, - env.froms, - env.tos, - env.Fs, - env.Ts, - env.unconnected, - env.hits); - - if (env.ok) { - TIME(&post); - DIFF_MSEC_ALWAYS("det_remap_eager_endids", pre, post, NULL); - } - -#if LOG_REMAP - if (env.ok) { - fprintf(stderr, "==== after determinisation\n"); - fsm_dump(stderr, dst_dfa); - fsm_eager_endid_dump(stderr, dst_dfa); - } -#endif - -cleanup: - /* free the eemap array itself, but its state sets are managed by the issp. */ - f_free(dst_dfa->alloc, eemap); - - return env.ok; -} - static int group_labels_overlap(const struct ac_group *a, const struct ac_group *b) { diff --git a/src/libfsm/determinise_internal.h b/src/libfsm/determinise_internal.h index 9f93a65f1..f95852148 100644 --- a/src/libfsm/determinise_internal.h +++ b/src/libfsm/determinise_internal.h @@ -23,7 +23,6 @@ #include "internal.h" #include "capture.h" #include "endids.h" -#include "eager_endid.h" #include "eager_output.h" #include @@ -328,10 +327,6 @@ static int remap_capture_actions(struct map *map, struct interned_state_set_pool *issp, struct fsm *dst_dfa, struct fsm *src_nfa); -static int -remap_eager_endids(const struct map *map, struct interned_state_set_pool *issp, - struct fsm *dst_dfa, const struct fsm *src_nfa); - static int remap_eager_outputs(const struct map *map, struct interned_state_set_pool *issp, struct fsm *dst_dfa, const struct fsm *src_nfa); diff --git a/src/libfsm/eager_endid.c b/src/libfsm/eager_endid.c deleted file mode 100644 index f90f95fea..000000000 --- a/src/libfsm/eager_endid.c +++ /dev/null @@ -1,314 +0,0 @@ -/* - * Copyright 2024 Scott Vokes - * - * See LICENCE for the full copyright terms. - */ - -#include -#include - -#include "internal.h" - -#include -#include - -#include -#include - -#include "eager_endid.h" - -#define DEF_ENTRY_CEIL 32 - -#define LOG_LEVEL 1 - -struct eager_endid_info { - fsm_eager_endid_cb *cb; - void *opaque; - - /* janky vector impl, replace with something else later */ - size_t ceil; - size_t used; - - /* Edge with an eager endid. - * - * If from == to == the start state, then apply the IDs at the start of - * execution (they are always set), otherwise they must differ. - * */ - struct eager_endid_entry { - fsm_state_t from; - fsm_state_t to; - fsm_end_id_t id; - } *entries; -}; - -void -fsm_eager_endid_set_cb(struct fsm *fsm, fsm_eager_endid_cb *cb, void *opaque) -{ -#if LOG_LEVEL > 2 - fprintf(stderr, "-- fsm_eager_endid_set_cb %p\n", (void *)fsm); -#endif - assert(fsm != NULL); - assert(fsm->eager_endid_info != NULL); - fsm->eager_endid_info->cb = cb; - fsm->eager_endid_info->opaque = opaque; -} - -void -fsm_eager_endid_get_cb(const struct fsm *fsm, fsm_eager_endid_cb **cb, void **opaque) -{ - *cb = fsm->eager_endid_info->cb; - *opaque = fsm->eager_endid_info->opaque; -} - -int -fsm_eager_endid_init(struct fsm *fsm) -{ - struct eager_endid_info *ei = f_calloc(fsm->alloc, 1, sizeof(*ei)); - struct eager_endid_entry *entries = f_calloc(fsm->alloc, DEF_ENTRY_CEIL, sizeof(entries[0])); - - if (ei == NULL || entries == NULL) { - f_free(fsm->alloc, ei); - f_free(fsm->alloc, entries); - return 0; - } - - ei->ceil = DEF_ENTRY_CEIL; - ei->entries = entries; - -#if LOG_LEVEL > 2 - fprintf(stderr, "-- fsm_eager_endid_init %p\n", (void *)fsm); -#endif - fsm->eager_endid_info = ei; - return 1; -} - -void -fsm_eager_endid_free(struct fsm *fsm) -{ - if (fsm == NULL || fsm->eager_endid_info == NULL) { return; } - - f_free(fsm->alloc, fsm->eager_endid_info->entries); - f_free(fsm->alloc, fsm->eager_endid_info); -#if LOG_LEVEL > 2 - fprintf(stderr, "-- fsm_eager_endid_free %p\n", (void *)fsm); -#endif - fsm->eager_endid_info = NULL; -} - -bool -fsm_eager_endid_has_eager_endids(const struct fsm *fsm) -{ - return fsm->eager_endid_info && fsm->eager_endid_info->used > 0; -} - -static int -insert_eager_endid_entry(const struct fsm_alloc *alloc, struct eager_endid_info *info, - fsm_state_t from, fsm_state_t to, fsm_end_id_t id) -{ -#if LOG_LEVEL > 1 - fprintf(stderr, "%s: %d, %d, %d\n", __func__, from, to, id); -#endif - if (info->ceil == info->used) { - const size_t nceil = 2*info->used; - struct eager_endid_entry *nentries = f_realloc(alloc, - info->entries, nceil * sizeof(info->entries[0])); - if (nentries == NULL) { - return 0; - } - -#if LOG_LEVEL > 1 - fprintf(stderr, "%s: grew %zd -> %zd\n", __func__, info->ceil, nceil); -#endif - - info->ceil = nceil; - info->entries = nentries; - } - - /* FIXME linear scan */ - for (size_t i = 0; i < info->used; i++) { - struct eager_endid_entry *e = &info->entries[i]; - if (e->from == from && e->to == to && e->id == id) { - return 1; /* already present, discarding duplicate */ - } - } - - info->entries[info->used] = (struct eager_endid_entry){ - .from = from, - .to = to, - .id = id, - }; - info->used++; - return 1; -} - -int -fsm_eager_endid_insert_entry(struct fsm *fsm, - fsm_state_t from, fsm_state_t to, fsm_end_id_t id) -{ - fsm_state_t start; - if (!fsm_getstart(fsm, &start)) { - assert(!"no start"); - return 0; - } - - /* FIXME: don't reject self-edges here, reachable self-edges appear in the epsilon closure - * after combining DFAs */ - if (from == to && from != start && 0) { -#if LOG_LEVEL > 1 || 1 - fprintf(stderr, "%s: skipping adding entry (%d -> %d, %d) with self-edge \n", - __func__, from, to, id); -#endif - return 1; - } - - const int res = insert_eager_endid_entry(fsm->alloc, fsm->eager_endid_info, - from, to, id); - if (res) { - assert(from < fsm->statecount); - fsm->states[from].has_eager_endids = 1; - } - return res; -} - -int -fsm_seteagerendid(struct fsm *fsm, fsm_end_id_t id) -{ - const size_t scount = fsm_countstates(fsm); - - fsm_state_t start; - if (!fsm_getstart(fsm, &start)) { - return 0; - } - -#if LOG_LEVEL > 1 - fprintf(stderr, "%s: id %d\n", __func__, id); - fsm_dump(stderr, fsm); -#endif - - if (fsm_isend(fsm, start)) { - /* Special case: The start state is an end, so add - * an edge of . This will be the - * only possible self-edge in a DFA, and should be - * checked for at the start of FSM execution. */ - if (!insert_eager_endid_entry(fsm->alloc, fsm->eager_endid_info, - start, start, id)) { - return 0; - } - - fsm->states[start].has_eager_endids = 1; - } - - /* For every non-self edge leading to an end state, mark the - * edge with the eager endid. */ - for (fsm_state_t s_i = 0; s_i < scount; s_i++) { - struct edge_group_iter iter; - struct edge_group_iter_info info; - struct state_iter epsilon_iter; - fsm_state_t to; - -#if LOG_LEVEL > 1 - fprintf(stderr, "%s: s_i %d, is_end %d\n", __func__, s_i, fsm_isend(fsm, s_i)); -#endif - - struct fsm_state *s = &fsm->states[s_i]; - - /* mark epsilon edges to end states */ - state_set_reset(s->epsilons, &epsilon_iter); - while (state_set_next(&epsilon_iter, &to)) { - /* fprintf(stderr, "??? %d --eps--> %d: %d\n", s_i, to, fsm_isend(fsm, to)); */ - if (to != s_i && fsm_isend(fsm, to)) { - if (!insert_eager_endid_entry(fsm->alloc, fsm->eager_endid_info, - s_i, to, id)) { - return 0; - } - s->has_eager_endids = 1; - } - } - - /* mark labeled edges to end states */ - edge_set_group_iter_reset(s->edges, EDGE_GROUP_ITER_ALL, &iter); - while (edge_set_group_iter_next(&iter, &info)) { - /* fprintf(stderr, "??? %d -> %d: %d\n", s_i, info.to, fsm_isend(fsm, info.to)); */ - if (info.to != s_i && fsm_isend(fsm, info.to)) { - if (!insert_eager_endid_entry(fsm->alloc, fsm->eager_endid_info, - s_i, info.to, id)) { - return 0; - } - s->has_eager_endids = 1; - } - } - } - - return 1; -} - -void -fsm_eager_endid_iter_edges_from_state(const struct fsm *fsm, - fsm_state_t from, fsm_eager_endid_iter_edges_cb *cb, void *opaque) -{ - assert(fsm != NULL); - assert(fsm->eager_endid_info != NULL); - assert(cb != NULL); - - const struct eager_endid_info *info = fsm->eager_endid_info; - for (size_t i = 0; i < info->used; i++) { - const struct eager_endid_entry *e = &info->entries[i]; - if (e->from == from) { - if (!cb(e->from, e->to, e->id, opaque)) { return; } - } - } -} - -void -fsm_eager_endid_iter_edges_between_states(const struct fsm *fsm, - fsm_state_t from, fsm_state_t to, fsm_eager_endid_iter_edges_cb *cb, void *opaque) -{ - assert(fsm != NULL); - assert(fsm->eager_endid_info != NULL); - assert(cb != NULL); - - const struct eager_endid_info *info = fsm->eager_endid_info; - for (size_t i = 0; i < info->used; i++) { - const struct eager_endid_entry *e = &info->entries[i]; - if (e->from == from && e->to == to) { - if (!cb(e->from, e->to, e->id, opaque)) { return; } - } - } -} - -void -fsm_eager_endid_iter_edges_all(const struct fsm *fsm, - fsm_eager_endid_iter_edges_cb *cb, void *opaque) -{ - assert(fsm != NULL); - assert(fsm->eager_endid_info != NULL); - assert(cb != NULL); - - const struct eager_endid_info *info = fsm->eager_endid_info; - for (size_t i = 0; i < info->used; i++) { - const struct eager_endid_entry *e = &info->entries[i]; - -#if 1 - assert(fsm->states[e->from].has_eager_endids); -#endif - - if (!cb(e->from, e->to, e->id, opaque)) { return; } - } -} - -static int -dump_cb(fsm_state_t from, fsm_state_t to, fsm_end_id_t id, void *opaque) -{ - FILE *f = opaque; - fprintf(f, "-- %d -> %d: id %d\n", from, to, id); - return 1; -} - -void -fsm_eager_endid_dump(FILE *f, const struct fsm *fsm) -{ - if (!fsm_eager_endid_has_eager_endids(fsm)) { return; } - - fprintf(f, "%s:\n", __func__); - fsm_eager_endid_iter_edges_all(fsm, dump_cb, (void *)f); -} diff --git a/src/libfsm/eager_endid.h b/src/libfsm/eager_endid.h deleted file mode 100644 index 8def0c118..000000000 --- a/src/libfsm/eager_endid.h +++ /dev/null @@ -1,46 +0,0 @@ -#ifndef EAGER_ENDID_H -#define EAGER_ENDID_H - -#include -#include -#include - -struct eager_endid_info; - -int -fsm_eager_endid_init(struct fsm *fsm); - -void -fsm_eager_endid_free(struct fsm *fsm); - -bool -fsm_eager_endid_has_eager_endids(const struct fsm *fsm); - -void -fsm_eager_endid_dump(FILE *f, const struct fsm *fsm); - -/* Internal interface, used during epsilon removal, - * determinisation, and minimisation. */ -int -fsm_eager_endid_insert_entry(struct fsm *fsm, - fsm_state_t from, fsm_state_t to, fsm_end_id_t id); - -/* Callback for fsm_eager_endid_iter_*. - * The return value indicates whether iteration should continue. - * The results may not be sorted in any particular order. */ -typedef int -fsm_eager_endid_iter_edges_cb(fsm_state_t from, fsm_state_t to, fsm_end_id_t id, void *opaque); - -void -fsm_eager_endid_iter_edges_from_state(const struct fsm *fsm, - fsm_state_t from, fsm_eager_endid_iter_edges_cb *cb, void *opaque); - -void -fsm_eager_endid_iter_edges_between_states(const struct fsm *fsm, - fsm_state_t from, fsm_state_t to, fsm_eager_endid_iter_edges_cb *cb, void *opaque); - -void -fsm_eager_endid_iter_edges_all(const struct fsm *fsm, - fsm_eager_endid_iter_edges_cb *cb, void *opaque); - -#endif diff --git a/src/libfsm/epsilons.c b/src/libfsm/epsilons.c index 66fa23f60..6522d0e2c 100644 --- a/src/libfsm/epsilons.c +++ b/src/libfsm/epsilons.c @@ -24,7 +24,6 @@ #include "internal.h" #include "capture.h" #include "endids.h" -#include "eager_endid.h" #include "eager_output.h" #define DUMP_EPSILON_CLOSURES 0 @@ -76,9 +75,6 @@ static int carry_endids(struct fsm *fsm, struct state_set *states, fsm_state_t s); -static int -remap_eager_endids(struct fsm *nfa, struct state_set **eclosures); - static void mark_states_reachable_by_label(const struct fsm *nfa, uint64_t *reachable_by_label); @@ -282,16 +278,6 @@ fsm_remove_epsilons(struct fsm *nfa) eager_output_buf.used = 0; /* clear */ } - /* Remap edge metadata for eagerly matching endids. - * - * This depends on the previous phase finishing (because it pulls end states forward to - * the last labeled edges) but has to happen before the epsilon-edge state sets are - * removed (because it has to explore those to copy over edge metadata). */ - if (!remap_eager_endids(nfa, eclosures)) { - fprintf(stderr, "%s: remap_eager_endids failed\n", __func__); - goto cleanup; - } - /* Remove the epsilon-edge state sets from everything. * This can make states unreachable. */ for (s = 0; s < state_count; s++) { @@ -326,111 +312,6 @@ fsm_remove_epsilons(struct fsm *nfa) return res; } -#define DEF_EDGES_CEIL 4 - -struct ee_cache { - unsigned ceil; - unsigned used; - struct ee_edges { - fsm_state_t to; - fsm_end_id_t id; - } *edges; -}; - -#define DEF_STATE_STACK_CEIL 8 -#define DEF_DATA_STACK_CEIL 4 - -struct ee_cache_env { - bool ok; - struct fsm *nfa; - struct ee_cache **cache; - - struct state_set **eclosures; - - /* Start state for all the epsilon closure paths being analyzed. */ - fsm_state_t start_of_path; - - struct ee_stack { - struct ee_state_stack { - unsigned ceil; - unsigned used; - struct ee_state_stack_frame { - /* fsm_state_t labeled_edge_from; */ - fsm_state_t state; - - /* FIXME: is it actually necessary to track this? */ - unsigned id_count; /* how many IDs were pushed on the data stack */ - } *frames; - } state; - - struct ee_data_stack { - unsigned ceil; - unsigned used; - struct ee_data_stack_frame { - fsm_end_id_t id; - } *frames; - } data; - } stack; -}; - -static int -save_edges_cb(fsm_state_t from, fsm_state_t to, fsm_end_id_t id, void *opaque) -{ - LOG(1, "%s: from %d, to %d, id %d\n", __func__, from, to, id); - - struct ee_cache_env *env = opaque; - if (env->cache[from] == NULL) { - struct ee_cache *c = f_calloc(env->nfa->alloc, 1, sizeof(*c)); - if (c == NULL) { goto fail; } - - struct ee_edges *edges = f_calloc(env->nfa->alloc, DEF_EDGES_CEIL, sizeof(edges[0])); - if (edges == NULL) { - f_free(env->nfa->alloc, c); - goto fail; - } - - c->edges = edges; - c->ceil = DEF_EDGES_CEIL; - env->cache[from] = c; - } - - struct ee_cache *c = env->cache[from]; - if (c->used == c->ceil) { - const size_t nceil = 2*c->ceil; - struct ee_edges *nedges = f_realloc(env->nfa->alloc, c->edges, - nceil * sizeof(nedges[0])); - if (nedges == NULL) { goto fail; } - - c->edges = nedges; - c->ceil = nceil; - } - - struct ee_edges *e = &c->edges[c->used]; - e->to = to; - e->id = id; - c->used++; - return 1; - -fail: - env->ok = false; - return 0; -} - -/* These macros just exist so logging can use __func__ and __LINE__. */ -#define MARK_VISITED(NFA, S_ID) \ - do { \ - LOG(1, "%s(%d): marking visited: %d\n", \ - __func__, __LINE__, S_ID); \ - NFA->states[S_ID].visited = 1; \ - } while(0) - -#define CLEAR_VISITED(NFA, S_ID) \ - do { \ - LOG(1, "%s(%d): clearing visited: %d\n", \ - __func__, __LINE__, S_ID); \ - NFA->states[S_ID].visited = 0; \ - } while(0) - /* For every state, mark every state reached by a labeled edge as * reachable. This doesn't check that the FROM state is reachable from * the start state (trim will do that soon enough), it's just used to @@ -468,582 +349,6 @@ mark_states_reachable_by_label(const struct fsm *nfa, uint64_t *reachable_by_lab } } -static bool -remap_eager_endids__push_state_stack(struct ee_cache_env *env, /*fsm_state_t labeled_edge_from, */ fsm_state_t s_id) -{ - LOG(2, "%s: s_id %d\n", __func__, s_id); - struct ee_state_stack *sstack = &env->stack.state; - if (sstack->used == sstack->ceil) { - const size_t nceil = sstack->ceil == 0 - ? DEF_STATE_STACK_CEIL - : 2*sstack->ceil; - struct ee_state_stack_frame *nframes = f_realloc(env->nfa->alloc, - sstack->frames, nceil * sizeof(nframes[0])); - if (nframes == NULL) { return false; } - - sstack->ceil = nceil; - sstack->frames = nframes; - } - - struct ee_state_stack_frame *f = &sstack->frames[sstack->used]; - /* f->labeled_edge_from = labeled_edge_from; */ - f->state = s_id; - f->id_count = 0; - sstack->used++; - return true; -} - -static bool -remap_eager_endids__pop_state_stack(struct ee_cache_env *env, /*fsm_state_t *labeled_edge_from, */ fsm_state_t *state_id, unsigned *id_count) -{ - struct ee_state_stack *sstack = &env->stack.state; - if (sstack->used == 0) { return false; } - - sstack->used--; - struct ee_state_stack_frame *f = &sstack->frames[sstack->used]; - /* *labeled_edge_from = f->labeled_edge_from; */ - *state_id = f->state; - *id_count = f->id_count; - // labeled_edge_from %d, , *labeled_edge_from - LOG(1, "%s: => state_id %d, id_count %d\n", __func__, *state_id, *id_count); - return true; -} - -static bool -remap_eager_endids__push_data_stack(struct ee_cache_env *env, fsm_end_id_t id) -{ - LOG(1, "%s: id %d\n", __func__, id); - struct ee_data_stack *dstack = &env->stack.data; - if (dstack->used == dstack->ceil) { - const size_t nceil = dstack->ceil == 0 - ? DEF_STATE_STACK_CEIL - : 2*dstack->ceil; - struct ee_data_stack_frame *nframes = f_realloc(env->nfa->alloc, - dstack->frames, nceil * sizeof(nframes[0])); - if (nframes == NULL) { return false; } - - dstack->ceil = nceil; - dstack->frames = nframes; - } - - struct ee_data_stack_frame *f = &dstack->frames[dstack->used]; - f->id = id; - dstack->used++; - - /* FIXME: the caller should manage this */ - if (false) { - struct ee_state_stack *sstack = &env->stack.state; - assert(sstack->used > 0); - sstack->frames[sstack->used - 1].id_count++; - } - - return true; -} - -static bool -remap_eager_endids__pop_data_stack(struct ee_cache_env *env, unsigned id_count) -{ - if (id_count > 0) { - LOG(1, "%s: count %d\n", __func__, id_count); - } - - struct ee_data_stack *dstack = &env->stack.data; - if (dstack->used < id_count) { - LOG(1, "%s: expected count %d, dstack->used %d\n", __func__, id_count, dstack->used); - return false; - } - - dstack->used -= id_count; - return true; -} - -static int -push_on_data_stack_cb(fsm_state_t from, fsm_state_t to, fsm_end_id_t id, void *opaque) -{ - struct ee_cache_env *env = opaque; - - if (from != to) { /* ignore self-edges, because they're always skippable */ - if (!remap_eager_endids__push_data_stack(env, id)) { - env->ok = false; - return 0; - } - } - return 1; -} - -static bool -remap_eager_endids__push_any_endids_on_epsilon_edge(struct ee_cache_env *env, fsm_state_t from, fsm_state_t to, size_t *pushed) -{ - *pushed = 0; - const size_t before = env->stack.data.used; - - fsm_eager_endid_iter_edges_between_states(env->nfa, from, to, push_on_data_stack_cb, env); - const size_t after = env->stack.data.used; - assert(after >= before); - if (env->ok) { - *pushed = after - before; - } - - return env->ok; -} - -static bool -collect_labeled_endid_edges(struct ee_cache_env *env, fsm_state_t from, fsm_state_t to, size_t *count) -{ - *count = 0; - const unsigned used_before = env->stack.data.used; - fsm_eager_endid_iter_edges_between_states(env->nfa, from, to, push_on_data_stack_cb, env); - if (env->ok) { - *count = env->stack.data.used - used_before; - LOG(1, "%s: collected %zd\n", __func__, *count); - } - - return env->ok; -} - -struct after_labeled_edge_info { - fsm_state_t from; - fsm_state_t to; - unsigned data_stack_floor; -}; - -/* For each state, step through its epsilon closure and remap eager endid edge metadata to any new - * labeled edges that have been added to the state. A stack is used to track edge metadata on the path - * between the original state and states in the epsilon closure with labeled edges. The states' visited - * flag is used to avoid cycles. - * - * - s_id: The current state being evaluated. - * - * - after_labeled_edge_info: FIXME - * */ -static bool -remap_eager_endids__step_for_state(struct ee_cache_env *env, fsm_state_t s_id, - struct after_labeled_edge_info *after_labeled_edge_info) -{ - struct state_iter eps_iter; - fsm_state_t eps_id; - - if (LOG_LEVEL > 1) { - LOG(2, "//// %s: s_id %d (start of step_for_state)\n", __func__, s_id); - fprintf(stderr, "%s: current stacks:\n", __func__); - for (size_t i = 0; i < env->stack.state.used; i++) { - fprintf(stderr, "-- state %zd: id %d, count %u\n", - i, - //env->stack.state.frames[i].labeled_edge_from, - env->stack.state.frames[i].state, env->stack.state.frames[i].id_count); - } - for (size_t i = 0; i < env->stack.data.used; i++) { - fprintf(stderr, "-- data %zd: %d\n", i, env->stack.data.frames[i].id); - } - LOG(2, "////\n"); - } - - /* Explore each non-visited epsilon edge, pushing any edges with - * eager endid metadata to the stack. */ - /* state_set_reset(env->eclosures[s_id], &eps_iter); */ - const struct state_set *epsilons = env->nfa->states[s_id].epsilons; - state_set_reset(epsilons, &eps_iter); - LOG(1, "-- %s: checking epsilon edges on %d\n", __func__, s_id); - while (state_set_next(&eps_iter, &eps_id)) { - LOG(1, "%s: state_set_next s_id %d => eps_id %d\n", __func__, s_id, eps_id); - struct fsm_state *es = &env->nfa->states[eps_id]; - if (es->visited) { - LOG(1, "%s: already visited %d, skipping\n", __func__, eps_id); - continue; - } - MARK_VISITED(env->nfa, eps_id); - - if (!remap_eager_endids__push_state_stack(env, s_id)) { - LOG(0, "%s %d: returning false\n", __func__, __LINE__); - return false; - } - - /* If there is any endid edge metadata on this epsilon edge, save it - * on the stack, so it can be added to labeled edges later. */ - size_t pushed; - if (!remap_eager_endids__push_any_endids_on_epsilon_edge(env, s_id, eps_id, &pushed)) { - LOG(0, "%s %d: returning false\n", __func__, __LINE__); - return false; - } - - if (!remap_eager_endids__step_for_state(env, eps_id, after_labeled_edge_info)) { - LOG(0, "%s %d: returning false\n", __func__, __LINE__); - return false; - } - - fsm_state_t popped_s_id; - unsigned id_count; - if (!remap_eager_endids__pop_state_stack(env, &popped_s_id, &id_count)) { - LOG(0, "%s %d: returning false\n", __func__, __LINE__); - return false; - } - assert(popped_s_id == s_id); - assert(id_count == 0); /* no longer updated, remove this */ - - id_count = pushed; - LOG(0, "<<< calling pop_data_stack with %u from line %d, s_id %d, eps_id %d\n", id_count, __LINE__, s_id, eps_id); - if (!remap_eager_endids__pop_data_stack(env, id_count)) { - LOG(0, "%s %d: returning false\n", __func__, __LINE__); - return false; - } - - assert(es->visited); - CLEAR_VISITED(env->nfa, eps_id); - } - - /* For every end state EcS in the epsilon closure of the current state S - * where S is an end state - * directly after a labeled edge Prev->S - * - * for every eager endid edge (X->Y, Id) in the epsilon closure between S and EcS - * - * add an eager endid edge (Prev -> S, Id) - * */ - if (after_labeled_edge_info != NULL && fsm_isend(env->nfa, s_id)) { - LOG(1, "%s: after_labeled_edge_info && is_end, data_stack_floor %d now %d\n", - __func__, after_labeled_edge_info->data_stack_floor, env->stack.data.used); - - for (size_t i = after_labeled_edge_info->data_stack_floor; i < env->stack.data.used; i++) { - const fsm_end_id_t id = env->stack.data.frames[i].id; - LOG(1, "%s: adding eager endid md for edge (%d -> %d, %d) in epsilon closure between labeled edge and end\n", - __func__, after_labeled_edge_info->from, after_labeled_edge_info->to, id); - if (!fsm_eager_endid_insert_entry(env->nfa, - after_labeled_edge_info->from, after_labeled_edge_info->to, id)) { - LOG(0, "%s %d: returning false\n", __func__, __LINE__); - return false; - } - } - } - - if (after_labeled_edge_info != NULL) { - LOG(1 -1, "%s: after_labeled_edge_info != NULL, skipping labeled edge checks on %d\n", __func__, s_id); - - LOG(2, "//// %s: s_id %d (early return from step_for_state)\n", __func__, s_id); - return true; - } - - /* For each labeled edge (s_id -> Dst): - * - If there is an endid edge (s_id -> Dst, Id), add an endid edge (env->start_of_path -> Dst, Id) - * - For every id X on the data stack, add an endid edge (env->start_of_path -> Dst, X) - * */ - struct edge_group_iter egi; - struct edge_group_iter_info info; - LOG(1, "-- %s: checking labeled edges on %d\n", __func__, s_id); - edge_set_group_iter_reset(env->nfa->states[s_id].edges, EDGE_GROUP_ITER_ALL, &egi); - while (edge_set_group_iter_next(&egi, &info)) { - const size_t used_before = env->stack.data.used; - const fsm_state_t dst = info.to; - - LOG(1, "%s: edge_set_group_iter_next => %d -> dst %d\n", __func__, s_id, dst); - if (info.to == s_id) { - LOG(1, "%s: skipping self-edge\n", __func__); - continue; - } - - /* Collect any eager endids associated with this labeled edge on the data stack. */ - size_t endid_labeled_edges; - assert(env->ok); - if (!collect_labeled_endid_edges(env, s_id, dst, &endid_labeled_edges)) { - LOG(0, "%s %d: returning false\n", __func__, __LINE__); - return false; - } - - /* If the destination state for the labeled edge is an end state (because - * the first phase of epsilon removal processing has carried any end-state-ness - * forward from its epsilon closure), then copy any eager endid metadata that - * appears on epsilon edges within its epsilon closure to the labeled edge. */ - if (fsm_isend(env->nfa, dst)) { - /* Continue exploring (only) the epsilon closure after the labeled - * edge, so that this labeled edge can be associated with any - * eager endids on epsilon edges that lead to an end state. */ - struct after_labeled_edge_info labeled_edge = { - .from = s_id, - .to = dst, - .data_stack_floor = env->stack.data.used, - }; - - LOG(0, "-- %s: %d is an end state, exploring its epsilon closure, labeled_edge { from = %d, to = %d, data_stack_floor = %d } \n", - __func__, dst, labeled_edge.from, labeled_edge.to, labeled_edge.data_stack_floor); - - if (!remap_eager_endids__step_for_state(env, dst, &labeled_edge)) { - LOG(0, "%s %d: returning false\n", __func__, __LINE__); - return false; - } - - /* Drop the data stack back to where it was before. */ - const size_t new_discoveries = env->stack.data.used - labeled_edge.data_stack_floor; - LOG(0, "%s: dropping %zd new_discoveries\n", __func__, new_discoveries); - LOG(0, "<<< calling pop_data_stack with %zd from line %d, s_id %d\n", new_discoveries, __LINE__, s_id); - if (!remap_eager_endids__pop_data_stack(env, new_discoveries)) { - LOG(0, "%s %d: returning false\n", __func__, __LINE__); - return false; - } - - } - - const size_t used_after = env->stack.data.used; - - LOG(1, "### endid_labeled_edges %zd, used_before %zd, used_after %zd\n", - endid_labeled_edges, used_before, used_after); - assert(used_before == used_after - endid_labeled_edges); - - /* Now that iteration is done, add the collected edges. */ - for (size_t i = 0; i < endid_labeled_edges; i++) { - LOG(1, "%s: adding labeled edge (%d -> %d, %d)\n", - __func__, env->start_of_path, dst, env->stack.data.frames[used_before + i].id); - if (!fsm_eager_endid_insert_entry(env->nfa, - env->start_of_path, dst, env->stack.data.frames[used_before + i].id)) { - LOG(0, "%s %d: returning false\n", __func__, __LINE__); - return false; - } - } - - /* Drop the data stack back to where it was before. */ - LOG(0, "<<< calling pop_data_stack with %zd from line %d, s_id %d\n", endid_labeled_edges, __LINE__, s_id); - if (!remap_eager_endids__pop_data_stack(env, endid_labeled_edges)) { - LOG(0, "%s %d: returning false\n", __func__, __LINE__); - return false; - } - - /* Add any other eager endid IDs on the data stack, these represent edges passed - * through as the transitive closure's epsilon edges were collapsed into a - * single labeled edge. - * - * (This could be done by iterating over the entire data stack before dropping - * the labeled edge endids, rather than adding the labeled and epsilon edges' - * endids separately.) */ - for (size_t i = 0; i < env->stack.data.used; i++) { - LOG(1, "%s: adding intermediate epsilon edge (%d -> %d, %d)\n", - __func__, env->start_of_path, dst, env->stack.data.frames[i].id); - if (!fsm_eager_endid_insert_entry(env->nfa, - env->start_of_path, dst, env->stack.data.frames[i].id)) { - LOG(0, "%s %d: returning false\n", __func__, __LINE__); - return false; - } - } - } - - LOG(2, "//// %s: s_id %d (normal return from step_for_state)\n", __func__, s_id); - return true; -} - -struct remap_eager_endids_for_start_env { - bool ok; - struct fsm *nfa; - fsm_state_t start; - struct state_set *eclosure; - -#define DEF_ADD_START_ENDIDS_CEIL 4 - struct { - size_t ceil; - size_t used; - fsm_end_id_t *ids; - } add; -}; - -static int -remap_eager_endids_for_start_cb(fsm_state_t from, fsm_state_t to, fsm_end_id_t id, void *opaque) -{ - struct remap_eager_endids_for_start_env *env = opaque; - - if (state_set_contains(env->eclosure, from) && state_set_contains(env->eclosure, to)) { - if (env->add.used == env->add.ceil) { - const size_t nceil = env->add.ceil == 0 - ? DEF_ADD_START_ENDIDS_CEIL : 2*env->add.ceil; - fsm_end_id_t *nids = f_realloc(env->nfa->alloc, - env->add.ids, nceil * sizeof(nids[0])); - if (nids == NULL) { - env->ok = false; - return 0; - } - env->add.ids = nids; - env->add.ceil = nceil; - } - env->add.ids[env->add.used++] = id; - } - - return 1; -} - - -/* This could potentially be integrated into epsilon_closure_single. */ -static int -remap_eager_endids(struct fsm *nfa, struct state_set **eclosures) -{ -#define DROP_EAGER_ENDIDS 1 - if (DROP_EAGER_ENDIDS) { return 1; } - - if (!fsm_eager_endid_has_eager_endids(nfa)) { - return 1; /* nothing to do */ - } - - const fsm_state_t state_count = fsm_countstates(nfa); - const size_t state_words = u64bitset_words(state_count); - int res = 0; - - struct ee_cache **cache = NULL; - uint64_t *ends = NULL; - uint64_t *enqueued = NULL; - uint64_t *reachable_by_label = NULL; - fsm_end_id_t *start_ids = NULL; - - ends = f_calloc(nfa->alloc, state_words, sizeof(ends[0])); - if (ends == NULL) { goto cleanup; } - - enqueued = f_calloc(nfa->alloc, state_words, sizeof(enqueued[0])); - if (enqueued == NULL) { goto cleanup; } - - cache = f_calloc(nfa->alloc, state_count, sizeof(*cache)); - if (cache == NULL) { goto cleanup; } - - reachable_by_label = f_calloc(nfa->alloc, state_words, sizeof(reachable_by_label[0])); - if (reachable_by_label == NULL) { goto cleanup; } - - INIT_TIMERS(); - - TIME(&pre); - mark_states_reachable_by_label(nfa, reachable_by_label); - TIME(&post); - DIFF_MSEC("epsilon_mark_states_reachable_by_label", pre, post, NULL); - - struct ee_cache_env env = { - .ok = true, - .nfa = nfa, - .cache = cache, - .eclosures = eclosures, - }; - - /* First pass: build edge cache. This avoids a great deal of redundant lookup. */ - /* FIXME: remove -- no longer used */ - if (false) for (fsm_state_t s_id = 0; s_id < state_count; s_id++) { - const struct fsm_state *s = &nfa->states[s_id]; - if (!s->has_eager_endids) { continue; } - - fsm_eager_endid_iter_edges_from_state(nfa, s_id, save_edges_cb, &env); - } - - /* dump cache */ - if (false) { - fprintf(stderr, "# caches:\n"); - for (fsm_state_t s_id = 0; s_id < state_count; s_id++) { - const struct ee_cache *c = cache[s_id]; - if (c == NULL) { continue; } - for (size_t i = 0; i < c->used; i++) { - fprintf(stderr, "-- %d -> %d: %d\n", - s_id, c->edges[i].to, c->edges[i].id); - } - } - } - - /* Special case for the start state -- if there are any actions on epsilon edges - * within the start state's closure, they need to be applied at start. */ - fsm_state_t start; - if (fsm_getstart(nfa, &start)) { - struct remap_eager_endids_for_start_env start_env = { - .ok = true, - .nfa = nfa, - .start = start, - .eclosure = eclosures[start], - }; - /* collect IDs from the start state's epsilon closure */ - fsm_eager_endid_iter_edges_all(nfa, remap_eager_endids_for_start_cb, &start_env); - - start_ids = start_env.add.ids; /* for cleanup */ - - if (!start_env.ok) { - goto cleanup; - } - - /* now that iteration is done, add them */ - for (size_t i = 0; i < start_env.add.used; i++) { - if (!fsm_eager_endid_insert_entry(nfa, start, start, start_env.add.ids[i])) { - goto cleanup; - } - } - } else { - LOG(1, "no start\n"); - goto cleanup; - } - - /* For each state, carry over eager endid edge metadata from its epsilon closure. - * The state order in which this happens shouldn't matter, but it depends on the - * previous epsilon removal pass carrying end-ness around. */ - for (fsm_state_t s_id = 0; s_id < state_count; s_id++) { - /* must start and finish with empty stacks */ - assert(env.stack.state.used == 0); - assert(env.stack.data.used == 0); - - if (!u64bitset_get(reachable_by_label, s_id)) { - LOG(1, "\n%s: skipping state not directly reachable by label: %d\n", __func__, s_id); - continue; - } - - LOG(1, "\n%s: start_of_path = %d\n", __func__, s_id); - assert(nfa->states[s_id].visited == 0); - MARK_VISITED(nfa, s_id); - - env.start_of_path = s_id; - - if (!remap_eager_endids__push_state_stack(&env, s_id)) { - fprintf(stderr, "%s: fail %s:%d\n", __FILE__, __func__, __LINE__); - goto cleanup; - } - - if (!remap_eager_endids__step_for_state(&env, s_id, NULL)) { - fprintf(stderr, "%s: fail %s:%d\n", __FILE__, __func__, __LINE__); - goto cleanup; - } - - CLEAR_VISITED(nfa, s_id); - unsigned count; - if (!remap_eager_endids__pop_state_stack(&env, &s_id, &count)) { - fprintf(stderr, "%s: fail %s:%d\n", __FILE__, __func__, __LINE__); - goto cleanup; - } - - - assert(env.stack.state.used == 0); - assert(env.stack.data.used == 0); - } - - res = 1; - - if (LOG_LEVEL >= 2) { - LOG(2, "%s: finishing up... [[\n", __func__); - fsm_eager_endid_dump(stderr, nfa); - LOG(2, "]]\n"); - } - -cleanup: - f_free(nfa->alloc, ends); - f_free(nfa->alloc, enqueued); - f_free(nfa->alloc, reachable_by_label); - f_free(nfa->alloc, start_ids); - - f_free(nfa->alloc, env.stack.state.frames); - f_free(nfa->alloc, env.stack.data.frames); - - if (cache != NULL) { - for (size_t i = 0; i < state_count; i++) { - if (cache[i] == NULL) { continue; } - f_free(nfa->alloc, cache[i]->edges); - f_free(nfa->alloc, cache[i]); - } - } - f_free(nfa->alloc, cache); - - for (size_t s_i = 0; s_i < state_count; s_i++) { - if (res == 1) { - /* These should be cleared during normal - * operation, but may remain set on error. */ - assert(nfa->states[s_i].visited == 0); - } - nfa->states[s_i].visited = 0; - } - - return res; -} - static int remap_capture_actions(struct fsm *nfa, struct state_set **eclosures) { diff --git a/src/libfsm/exec.c b/src/libfsm/exec.c index d15d4ee1c..e74cf7c7c 100644 --- a/src/libfsm/exec.c +++ b/src/libfsm/exec.c @@ -20,7 +20,6 @@ #include "internal.h" #include "capture.h" -#include "eager_endid.h" #include "eager_output.h" #define LOG_EXEC 0 @@ -47,73 +46,6 @@ transition(const struct fsm *fsm, fsm_state_t state, int c, return 1; } -struct check_eager_endids_for_edge_env { - const struct fsm *fsm; - fsm_eager_endid_cb *cb; - void *opaque; -}; - -static int -set_for_label_cb(fsm_state_t from, fsm_state_t to, fsm_end_id_t id, void *opaque) -{ - /* HACK update the types here once it's working */ - - (void)from; - (void)to; - struct check_eager_endids_for_edge_env *env = opaque; - env->cb(id, env->opaque); - return 1; -} - -static void -check_eager_endids_for_edge(const struct fsm *fsm, fsm_state_t from, fsm_state_t to, int label) -{ - fsm_eager_endid_cb *cb = NULL; - void *opaque = NULL; - - /* FIXME: this isn't specific to the label set. maybe it should be? */ - (void)label; - - fsm_eager_endid_get_cb(fsm, &cb, &opaque); - - struct check_eager_endids_for_edge_env env = { - .fsm = fsm, - .cb = cb, - .opaque = opaque, - }; - fsm_eager_endid_iter_edges_between_states(fsm, from, to, set_for_label_cb, &env); -} - -static int -match_eager_endids_at_start_cb(fsm_state_t from, fsm_state_t to, fsm_end_id_t id, void *opaque) -{ - /* HACK update the types here once it's working */ - (void)from; - (void)to; - struct check_eager_endids_for_edge_env *env = opaque; - env->cb(id, env->opaque); - return 1; -} - -static int -match_eager_endids_at_start(const struct fsm *fsm, fsm_state_t start) -{ - /* HACK update the types here once it's working */ - fsm_eager_endid_cb *cb = NULL; - void *opaque = NULL; - fsm_eager_endid_get_cb(fsm, &cb, &opaque); - struct check_eager_endids_for_edge_env env = { - .fsm = fsm, - .cb = cb, - .opaque = opaque, - }; - fsm_eager_endid_iter_edges_between_states(fsm, - start, start, match_eager_endids_at_start_cb, &env); - return 1; -} - - - struct check_eager_outputs_for_state_env { const struct fsm *fsm; fsm_eager_output_cb *cb; @@ -195,19 +127,6 @@ fsm_exec(const struct fsm *fsm, fprintf(stderr, "fsm_exec: starting at %d\n", state); #endif -#if LOG_EAGER - if (fsm_eager_endid_has_eager_endids(fsm)) { - fprintf(stderr, "%s: HAS EAGER ENDIDS\n", __func__); - fsm_eager_endid_dump(stderr, fsm); - } -#endif - - /* if (fsm->states[start].has_eager_endids) { */ - /* if (!match_eager_endids_at_start(fsm, start)) { */ - /* return 0; */ - /* } */ - /* } */ - if (fsm->states[start].has_eager_outputs) { if (!match_eager_outputs_for_state(fsm, start)) { return 0; @@ -215,7 +134,6 @@ fsm_exec(const struct fsm *fsm, } while (c = fsm_getc(opaque), c != EOF) { - const fsm_state_t prev_state = state; if (!transition(fsm, state, c, offset, captures, &state)) { #if LOG_EXEC fprintf(stderr, "fsm_exec: edge not found\n"); @@ -223,19 +141,6 @@ fsm_exec(const struct fsm *fsm, return 0; } -/* #if LOG_EAGER */ -/* fprintf(stderr, "%s: %d -> %d\n", __func__, prev_state, state); */ -/* #endif */ -/* if (fsm->states[prev_state].has_eager_endids) { */ -/* check_eager_endids_for_edge(fsm, prev_state, state, c); */ -/* } */ - (void)prev_state; - (void)check_eager_endids_for_edge; - (void)match_eager_endids_at_start; - -#if LOG_EAGER > 1 - fprintf(stderr, "%s: %d -> %d\n", __func__, prev_state, state); -#endif if (fsm->states[state].has_eager_outputs) { if (!match_eager_outputs_for_state(fsm, state)) { return 0; @@ -249,7 +154,6 @@ fsm_exec(const struct fsm *fsm, offset++; } - /* FIXME: eager output also counts */ if (!fsm_isend(fsm, state)) { return 0; } diff --git a/src/libfsm/fsm.c b/src/libfsm/fsm.c index 2f28a6d55..c442c8262 100644 --- a/src/libfsm/fsm.c +++ b/src/libfsm/fsm.c @@ -21,7 +21,6 @@ #include "internal.h" #include "capture.h" #include "endids.h" -#include "eager_endid.h" #include "eager_output.h" /* guess for default state allocation */ @@ -41,7 +40,6 @@ free_contents(struct fsm *fsm) fsm_capture_free(fsm); fsm_endid_free(fsm); - fsm_eager_endid_free(fsm); fsm_eager_output_free(fsm); f_free(fsm->alloc, fsm->states); @@ -96,14 +94,6 @@ fsm_new_statealloc(const struct fsm_alloc *alloc, size_t statealloc) return NULL; } - if (!fsm_eager_endid_init(new)) { - f_free(new->alloc, new->states); - f_free(new->alloc, new); - fsm_capture_free(new); - fsm_endid_free(new); - return NULL; - } - if (!fsm_eager_output_init(new)) { f_free(new->alloc, new->states); f_free(new->alloc, new); @@ -153,7 +143,6 @@ fsm_move(struct fsm *dst, struct fsm *src) dst->capture_info = src->capture_info; dst->endid_info = src->endid_info; - dst->eager_endid_info = src->eager_endid_info; dst->eager_output_info = src->eager_output_info; f_free(src->alloc, src); diff --git a/src/libfsm/internal.h b/src/libfsm/internal.h index 48cb64277..46997c82a 100644 --- a/src/libfsm/internal.h +++ b/src/libfsm/internal.h @@ -61,10 +61,6 @@ struct fsm_state { /* meaningful within one particular transformation only */ unsigned int visited:1; - /* If 0, then this state has no need for checking - * the fsm->eager_endid_info struct. */ - unsigned int has_eager_endids:1; - /* If 0, then this state has no need for checking * the fsm->eager_output_info struct. */ unsigned int has_eager_outputs:1; @@ -83,7 +79,6 @@ struct fsm { struct fsm_capture_info *capture_info; struct endid_info *endid_info; - struct eager_endid_info *eager_endid_info; struct eager_output_info *eager_output_info; }; diff --git a/src/libfsm/libfsm.syms b/src/libfsm/libfsm.syms index 0876f4b1a..a1a8c72a9 100644 --- a/src/libfsm/libfsm.syms +++ b/src/libfsm/libfsm.syms @@ -96,12 +96,7 @@ fsm_setendid fsm_mapendids fsm_increndids -fsm_seteagerendid -# debug: should it be added to public API? -fsm_eager_endid_dump fsm_endid_dump -# short term hack -fsm_eager_endid_set_cb fsm_seteageroutput fsm_seteageroutputonends diff --git a/src/libfsm/merge.c b/src/libfsm/merge.c index b693b2b8b..ccc1568ff 100644 --- a/src/libfsm/merge.c +++ b/src/libfsm/merge.c @@ -22,7 +22,6 @@ #include "capture.h" #include "internal.h" #include "endids.h" -#include "eager_endid.h" #include "eager_output.h" #define LOG_MERGE_ENDIDS 0 @@ -41,9 +40,6 @@ copy_capture_actions(struct fsm *dst, struct fsm *src); static int copy_end_ids(struct fsm *dst, struct fsm *src, fsm_state_t base_src); -static int -copy_eager_end_ids(struct fsm *dst, struct fsm *src, fsm_state_t base_src); - static int copy_eager_output_ids(struct fsm *dst, struct fsm *src, fsm_state_t base_src); @@ -121,11 +117,6 @@ merge(struct fsm *dst, struct fsm *src, return NULL; } - if (!copy_eager_end_ids(dst, src, *base_src)) { - /* non-recoverable -- destructive operation */ - return NULL; - } - if (!copy_eager_output_ids(dst, src, *base_src)) { /* non-recoverable -- destructive operation */ return NULL; @@ -212,41 +203,6 @@ copy_end_ids(struct fsm *dst, struct fsm *src, fsm_state_t base_src) return fsm_endid_iter_bulk(src, copy_end_ids_cb, &env); } -struct copy_eager_end_ids_env { - bool ok; - char tag; - struct fsm *dst; - struct fsm *src; - fsm_state_t base_src; -}; - -static int -copy_eager_end_ids_cb(fsm_state_t from, fsm_state_t to, fsm_end_id_t id, void *opaque) -{ - struct copy_eager_end_ids_env *env = opaque; - assert(env->tag == 'E'); - if (!fsm_eager_endid_insert_entry(env->dst, from + env->base_src, to + env->base_src, id)) { - env->ok = false; - return 0; - } - - return 1; -} - -static int -copy_eager_end_ids(struct fsm *dst, struct fsm *src, fsm_state_t base_src) -{ - struct copy_eager_end_ids_env env = { - .tag = 'E', /* for Eager */ - .ok = true, - .dst = dst, - .src = src, - .base_src = base_src, - }; - fsm_eager_endid_iter_edges_all(src, copy_eager_end_ids_cb, &env); - return env.ok; -} - struct copy_eager_output_ids_env { bool ok; struct fsm *dst; diff --git a/tests/eager_endids/Makefile b/tests/eager_endids/Makefile deleted file mode 100644 index e75982ea1..000000000 --- a/tests/eager_endids/Makefile +++ /dev/null @@ -1,22 +0,0 @@ -.include "../../share/mk/top.mk" - -TEST.tests/eager_endids != ls -1 tests/eager_endids/eager_endids*.c -TEST_SRCDIR.tests/eager_endids = tests/eager_endids -TEST_OUTDIR.tests/eager_endids = ${BUILD}/tests/eager_endids - -.for n in ${TEST.tests/eager_endids:T:R:C/^eager_endids//} -INCDIR.${TEST_SRCDIR.tests/eager_endids}/eager_endids${n}.c += src/adt -.endfor - -SRC += ${TEST_SRCDIR.tests/eager_endids}/utils.c - -.for n in ${TEST.tests/eager_endids:T:R:C/^eager_endids//} -test:: ${TEST_OUTDIR.tests/eager_endids}/res${n} -SRC += ${TEST_SRCDIR.tests/eager_endids}/eager_endids${n}.c -CFLAGS.${TEST_SRCDIR.tests/eager_endids}/eager_endids${n}.c += -UNDEBUG - -${TEST_OUTDIR.tests/eager_endids}/run${n}: ${TEST_OUTDIR.tests/eager_endids}/eager_endids${n}.o ${TEST_OUTDIR.tests/eager_endids}/utils.o ${BUILD}/lib/libfsm.a ${BUILD}/lib/libre.a - ${CC} ${CFLAGS} ${CFLAGS.${TEST_SRCDIR.tests/eager_endids}/eager_endids${n}.c} -o ${TEST_OUTDIR.tests/eager_endids}/run${n} ${TEST_OUTDIR.tests/eager_endids}/eager_endids${n}.o ${TEST_OUTDIR.tests/eager_endids}/utils.o ${BUILD}/lib/libfsm.a ${BUILD}/lib/libre.a -${TEST_OUTDIR.tests/eager_endids}/res${n}: ${TEST_OUTDIR.tests/eager_endids}/run${n} - ( ${TEST_OUTDIR.tests/eager_endids}/run${n} 1>&2 && echo PASS || echo FAIL ) > ${TEST_OUTDIR.tests/eager_endids}/res${n} -.endfor diff --git a/tests/eager_endids/eager_endids1.c b/tests/eager_endids/eager_endids1.c deleted file mode 100644 index e4019fae5..000000000 --- a/tests/eager_endids/eager_endids1.c +++ /dev/null @@ -1,12 +0,0 @@ -#include "utils.h" - -int main(void) -{ - struct eager_endid_test test = { - .patterns = { "abc" }, - .inputs = { - { .input = "abc", .expected_ids = { 1 } }, - }, - }; - return run_test(&test, false, true); -} diff --git a/tests/eager_endids/eager_endids2.c b/tests/eager_endids/eager_endids2.c deleted file mode 100644 index 7ac3638cd..000000000 --- a/tests/eager_endids/eager_endids2.c +++ /dev/null @@ -1,17 +0,0 @@ -#include "utils.h" - -int main(void) -{ - struct eager_endid_test test = { - .patterns = { "ab(c|d|e)" }, - .inputs = { - { .input = "abc", .expected_ids = { 1 } }, - { .input = "abd", .expected_ids = { 1 } }, - { .input = "abe", .expected_ids = { 1 } }, - { .input = "Xabe", .expected_ids = { 1 } }, - { .input = "abeX", .expected_ids = { 1 } }, - { .input = "XabeX", .expected_ids = { 1 } }, - }, - }; - return run_test(&test, false, true); -} diff --git a/tests/eager_endids/eager_endids3.c b/tests/eager_endids/eager_endids3.c deleted file mode 100644 index db16019a7..000000000 --- a/tests/eager_endids/eager_endids3.c +++ /dev/null @@ -1,16 +0,0 @@ -#include "utils.h" - -/* test that eager endids are correctly propagated through fsm_determinise() and fsm_minimise() */ -int main(void) -{ - struct eager_endid_test test = { - .patterns = { "ab(c|d|e)?" }, - .inputs = { - { .input = "ab", .expected_ids = { 1 } }, - { .input = "abc", .expected_ids = { 1 } }, - { .input = "abd", .expected_ids = { 1 } }, - { .input = "abe", .expected_ids = { 1 } }, - }, - }; - return run_test(&test, false, true); -} diff --git a/tests/eager_endids/eager_endids4.c b/tests/eager_endids/eager_endids4.c deleted file mode 100644 index d32f5bf8d..000000000 --- a/tests/eager_endids/eager_endids4.c +++ /dev/null @@ -1,13 +0,0 @@ -#include "utils.h" - -int main(void) -{ - struct eager_endid_test test = { - .patterns = { "abcde$" }, - .inputs = { - { .input = "abcde", .expected_ids = { 1 } }, - { .input = "Xabcde", .expected_ids = { 1 } }, - }, - }; - return run_test(&test, false, true); -} diff --git a/tests/eager_endids/eager_endids5.c b/tests/eager_endids/eager_endids5.c deleted file mode 100644 index bf49b125c..000000000 --- a/tests/eager_endids/eager_endids5.c +++ /dev/null @@ -1,14 +0,0 @@ -#include "utils.h" - -int main(void) -{ - struct eager_endid_test test = { - .patterns = { "^abc$", "^ab*c$" }, - .inputs = { - { .input = "ac", .expected_ids = { 2 } }, - { .input = "abc", .expected_ids = { 1, 2 } }, - { .input = "abbc", .expected_ids = { 2 } }, - }, - }; - return run_test(&test, false, true); -} diff --git a/tests/eager_endids/eager_endids6.c b/tests/eager_endids/eager_endids6.c deleted file mode 100644 index 92dada20d..000000000 --- a/tests/eager_endids/eager_endids6.c +++ /dev/null @@ -1,44 +0,0 @@ -#include "utils.h" - -#define ALL 1 - -int main(void) -{ - /* assert(!"fixme: revisit once remap_eager_endids is more efficient"); */ - - struct eager_endid_test test = { - .patterns = { - "apple", - "banana", - "carrot", - "durian", - "eggplant", - "fig", - "grapefruit", -#if ALL - "hazelnut", - "iceberg lettuce", - "jicama", -#endif - }, - .inputs = { - { .input = "apple", .expected_ids = { 1 } }, - { .input = "banana", .expected_ids = { 2 } }, - { .input = "carrot", .expected_ids = { 3 } }, - { .input = "durian", .expected_ids = { 4 } }, - { .input = "eggplant", .expected_ids = { 5 } }, - { .input = "fig", .expected_ids = { 6 } }, - { .input = "grapefruit", .expected_ids = { 7 } }, -#if ALL - { .input = "hazelnut", .expected_ids = { 8 } }, - { .input = "iceberg lettuce", .expected_ids = { 9 } }, - { .input = "jicama", .expected_ids = { 10 } }, -#endif - { .input = "apple banana fig", .expected_ids = { 1, 2, 6 } }, - }, - }; - - const bool min = getenv("MIN") != NULL; - /* return run_test(&test, false, true); */ - return run_test(&test, min, true); -} diff --git a/tests/eager_endids/utils.c b/tests/eager_endids/utils.c deleted file mode 100644 index 548ea8f64..000000000 --- a/tests/eager_endids/utils.c +++ /dev/null @@ -1,200 +0,0 @@ -#include "utils.h" - -void -fsm_eager_endid_dump(FILE *f, const struct fsm *fsm); - -void -append_eager_endid_cb(fsm_end_id_t id, void *opaque) -{ - struct cb_info *info = (struct cb_info *)opaque; - assert(info->used < MAX_IDS); - - for (size_t i = 0; i < info->used; i++) { - if (info->ids[i] == id) { - return; /* already present */ - } - } - - info->ids[info->used++] = id; -} - -static int -cmp_endid(const void *pa, const void *pb) -{ - const fsm_end_id_t a = *(fsm_end_id_t *)pa; - const fsm_end_id_t b = *(fsm_end_id_t *)pb; - return a < b ? -1 : a > b ? 1 : 0; -} - -struct fsm_options print_options = { - .consolidate_edges = 1, - .comments = 0, - .group_edges = 1, -}; - -static void -dump(const struct fsm *fsm) -{ - fsm_print(stderr, fsm, - &print_options, NULL, FSM_PRINT_DOT); -} - -int -run_test(const struct eager_endid_test *test, bool minimise, bool allow_extra_endids) -{ - struct fsm *fsms[MAX_PATTERNS] = {0}; - size_t fsms_used = 0; - int ret; - - int log = 0; - { - const char *logstr = getenv("LOG"); - if (logstr != NULL) { - log = atoi(logstr); - } - } - - for (size_t i = 0; i < MAX_PATTERNS; i++) { - const char *p = test->patterns[i]; - if (test->patterns[i] == NULL) { break; } - - struct fsm *fsm = re_comp(RE_NATIVE, fsm_sgetc, &p, NULL, 0, NULL); - assert(fsm != NULL); - - /* Zero is used to terminate expected_ids, so don't use it here. */ - const fsm_end_id_t endid = (fsm_end_id_t) (i + 1); - ret = fsm_seteagerendid(fsm, endid); - assert(ret == 1); - - if (log) { - fprintf(stderr, "==== source DFA %zd (pre det+min)\\n", i); - if (log > 1) { dump(fsm); } - fsm_eager_endid_dump(stderr, fsm); - fprintf(stderr, "====\n"); - } - - // consolidate_edges - - ret = fsm_determinise(fsm); - assert(ret == 1); - - if (minimise) { - ret = fsm_minimise(fsm); - assert(ret == 1); - } - - /* TODO: assert that it doesn't match the empty string? - * Eager endids will always report true for those, no matter the input. */ - - if (log) { - fprintf(stderr, "==== source DFA %zd (post det+min)\\n", i); - if (log > 1) { dump(fsm); } - fsm_eager_endid_dump(stderr, fsm); - fprintf(stderr, "====\n"); - } - - fsms[fsms_used++] = fsm; - } - - /* If there's only one pattern this just returns fsms[0]. */ - struct fsm *fsm = fsm_union_array(fsms_used, fsms, NULL); - assert(fsm != NULL); - - if (log) { - fprintf(stderr, "==== combined (pre det+min)\\n"); - if (log > 1) { dump(fsm); } - fsm_eager_endid_dump(stderr, fsm); - fprintf(stderr, "====\n"); - } - - fprintf(stderr, "=== determinising combined... NFA has %u states\n", fsm_countstates(fsm)); - ret = fsm_determinise(fsm); - assert(ret == 1); - fprintf(stderr, "=== determinising combined...done, DFA has %u states\n", fsm_countstates(fsm)); - - if (minimise) { - ret = fsm_minimise(fsm); - fprintf(stderr, "=== minimised combined...done, DFA has %u states\n", fsm_countstates(fsm)); - assert(ret == 1); - } - - if (log) { - fprintf(stderr, "==== combined (post det+min)\n"); - if (log > 1) { dump(fsm); } - fsm_eager_endid_dump(stderr, fsm); - fprintf(stderr, "====\n"); - } - - - struct cb_info endids = { 0 }; - fsm_eager_endid_set_cb(fsm, append_eager_endid_cb, &endids); - - for (size_t i_i = 0; i_i < MAX_INPUTS; i_i++) { - endids.used = 0; - const char *input = test->inputs[i_i].input; - if (input == NULL) { break; } - - size_t expected_id_count = 0; - for (size_t id_i = 0; id_i < MAX_ENDIDS; id_i++) { - const fsm_end_id_t id = test->inputs[i_i].expected_ids[id_i]; - if (id == 0) { break; } - expected_id_count++; - - /* must be ascending */ - if (id_i > 0) { - assert(id > test->inputs[i_i].expected_ids[id_i - 1]); - } - } - - if (log) { - fprintf(stderr, "%s: input %zd: \"%s\", expecting %zd ids:", - __func__, i_i, input, expected_id_count); - for (size_t i = 0; i < expected_id_count; i++) { - fprintf(stderr, " %d", test->inputs[i_i].expected_ids[i]); - } - fprintf(stderr, "\n"); - } - - fsm_state_t end; - ret = fsm_exec(fsm, fsm_sgetc, &input, &end, NULL); - if (expected_id_count == 0) { - assert(ret == 0); /* no match */ - } else { - assert(ret == 1); - } - - /* NEXT match IDs, sort endids[] buffer first */ - qsort(endids.ids, endids.used, sizeof(endids.ids[0]), cmp_endid); - - if (log) { - fprintf(stderr, "-- got %zd:", endids.used); - for (size_t i = 0; i < endids.used; i++) { - fprintf(stderr, " %d", endids.ids[i]); - } - fprintf(stderr, "\n"); - } - - if (!allow_extra_endids) { - assert(endids.used == expected_id_count); - } else { - assert(endids.used >= expected_id_count); - } - - size_t floor = 0; - for (size_t exp_i = 0; exp_i < endids.used; exp_i++) { - bool found = false; - for (size_t got_i = floor; got_i < endids.used; got_i++) { - if (endids.ids[got_i] == test->inputs[i_i].expected_ids[exp_i]) { - floor = got_i + 1; - found = true; - break; - } - } - assert(found); - } - } - - fsm_free(fsm); - - return EXIT_SUCCESS;; -} diff --git a/tests/eager_endids/utils.h b/tests/eager_endids/utils.h deleted file mode 100644 index 7fa1eb616..000000000 --- a/tests/eager_endids/utils.h +++ /dev/null @@ -1,54 +0,0 @@ -#ifndef UTILS_H -#define UTILS_H - -#include -#include -#include -#include -#include - -#include - -#include - -#include - -#include -#include -#include -#include -#include -#include - -#define MAX_IDS 10 - -#include - -#include - -#define MAX_PATTERNS 10 -#define MAX_INPUTS 16 -#define MAX_ENDIDS 8 - -struct eager_endid_test { - const char *patterns[MAX_PATTERNS]; - - struct { - const char *input; - /* Terminated by 0. pattern[i] => id of i+1. Must be sorted. */ - fsm_end_id_t expected_ids[MAX_ENDIDS]; - } inputs[MAX_INPUTS]; -}; - -int -run_test(const struct eager_endid_test *test, bool minimise, bool allow_extra_endids); - -struct cb_info { - size_t used; - fsm_end_id_t ids[MAX_IDS]; -}; - -void -append_eager_endid_cb(fsm_end_id_t id, void *opaque); - -#endif