Skip to content

Commit

Permalink
Merge branch 'f/main' into sv/add-cdata-codegen-with-eager-outputs
Browse files Browse the repository at this point in the history
Addressed merge conflicts.
  • Loading branch information
silentbicycle committed Oct 12, 2024
2 parents 7a935b4 + c7b9e1d commit f0e58bc
Show file tree
Hide file tree
Showing 10 changed files with 106 additions and 54 deletions.
21 changes: 12 additions & 9 deletions fuzz/target.c
Original file line number Diff line number Diff line change
Expand Up @@ -422,6 +422,11 @@ fsm_eager_output_dump(FILE *f, const struct fsm *fsm);
static int
fuzz_eager_output(const uint8_t *data, size_t size)
{
if (size > 0) {
const unsigned seed = data[0];
srand(seed);
}

struct feo_env env = {
.ok = true,
.pattern_count = 0,
Expand All @@ -446,9 +451,6 @@ fuzz_eager_output(const uint8_t *data, size_t size)

size_t max_pattern_length = 0;

const unsigned seed = size == 0 ? 0 : data[0];
srand(seed);

/* chop data into a series of patterns */
{
size_t prev = 0;
Expand Down Expand Up @@ -506,13 +508,14 @@ fuzz_eager_output(const uint8_t *data, size_t size)
}
}

struct re_anchoring_info anchorage[MAX_PATTERNS] = {0};
enum re_is_anchored_res anchorage[MAX_PATTERNS] = {0};

/* for each pattern, attempt to compile to a DFA */
for (size_t p_i = 0; p_i < env.pattern_count; p_i++) {
const char *p = env.patterns[p_i];

if (!re_is_anchored(RE_PCRE, fsm_sgetc, &p, 0, NULL, &anchorage[p_i])) {
enum re_is_anchored_res a = re_is_anchored(RE_PCRE, fsm_sgetc, &p, 0, NULL);
if (a == RE_IS_ANCHORED_ERROR) {
continue; /* unsupported regex */
}

Expand Down Expand Up @@ -602,8 +605,8 @@ fuzz_eager_output(const uint8_t *data, size_t size)
}

entries[used].fsm = cp;
entries[used].anchored_start = anchorage[i].start;
entries[used].anchored_end = anchorage[i].end;
entries[used].anchored_start = anchorage[i] & RE_IS_ANCHORED_START;
entries[used].anchored_end = anchorage[i] & RE_IS_ANCHORED_END;
used++;
}

Expand Down Expand Up @@ -648,7 +651,7 @@ fuzz_eager_output(const uint8_t *data, size_t size)
* Use the combined DFA to generate matches, check that the
* match behavior agrees with the individual DFA copies. */
env.current_pattern = (size_t)-1;
if (!fsm_generate_matches(env.combined, max_pattern_length, seed, gen_combined_check_individual_cb, &env)) {
if (!fsm_generate_matches(env.combined, max_pattern_length, 1, gen_combined_check_individual_cb, &env)) {
goto cleanup;
}

Expand All @@ -658,7 +661,7 @@ fuzz_eager_output(const uint8_t *data, size_t size)
/* check behavior against the combined DFA. */
for (size_t i = 0; i < env.pattern_count; i++) {
env.current_pattern = i;
if (!fsm_generate_matches(env.combined, max_pattern_length, seed, gen_individual_check_combined_cb, &env)) {
if (!fsm_generate_matches(env.combined, max_pattern_length, 1, gen_individual_check_combined_cb, &env)) {
goto cleanup;
}
}
Expand Down
2 changes: 1 addition & 1 deletion include/adt/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
/* If non-zero, expand the timer macros defined below, otherwise
* they compile away. */
#ifndef TRACK_TIMES
#define TRACK_TIMES 0
#define TRACK_TIMES (0 && !BUILD_FOR_FUZZER)
#endif

#if EXPENSIVE_CHECKS && TRACK_TIMES
Expand Down
15 changes: 15 additions & 0 deletions include/fsm/fsm.h
Original file line number Diff line number Diff line change
Expand Up @@ -432,6 +432,21 @@ fsm_remove_epsilons(struct fsm *fsm);
int
fsm_determinise(struct fsm *fsm);

/* Determinise, with a passed in configuration
* and a distinct return value for reaching
* the state limit. */
struct fsm_determinise_config {
size_t state_limit; /* 0: no limit */
};
enum fsm_determinise_with_config_res {
FSM_DETERMINISE_WITH_CONFIG_OK,
FSM_DETERMINISE_WITH_CONFIG_STATE_LIMIT_REACHED,
FSM_DETERMINISE_WITH_CONFIG_ERRNO,
};
enum fsm_determinise_with_config_res
fsm_determinise_with_config(struct fsm *fsm,
const struct fsm_determinise_config *config);

/*
* Make a DFA complete, as per fsm_iscomplete.
*/
Expand Down
9 changes: 5 additions & 4 deletions include/fsm/walk.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,9 +90,10 @@ fsm_walk_edges(const struct fsm *fsm, void *opaque,
* functionally equivalent cases makes testing dramatically faster,
* but exploring every edge could be added later.
*
* If seed is zero then it will generate the first label in the label
* set, otherwise a label from the set will be chosen using rand()
* (favoring printable characters).
* If randomized is zero then it will generate the first label in the
* label set, otherwise a label from the set will be chosen using rand()
* (favoring printable characters). The caller can use srand()
* beforehand to set a PRNG seed.
*
* Note: fsm is non-const because it calls fsm_trim on the FSM
* internally. This records the shortest distance from each state to an
Expand All @@ -118,7 +119,7 @@ fsm_generate_matches_cb(const struct fsm *fsm,
const char *input, size_t input_length,
fsm_state_t end_state, void *opaque);
int
fsm_generate_matches(struct fsm *fsm, size_t max_length, unsigned seed,
fsm_generate_matches(struct fsm *fsm, size_t max_length, int randomized,
fsm_generate_matches_cb *cb, void *opaque);

/* Callback provided for the most basic use case for
Expand Down
25 changes: 13 additions & 12 deletions include/re/re.h
Original file line number Diff line number Diff line change
Expand Up @@ -136,20 +136,21 @@ re_comp(enum re_dialect dialect,
const struct fsm_alloc *alloc,
enum re_flags flags, struct re_err *err);

struct re_anchoring_info {
int start;
int end;
/* FIXME: this could also check for AST_FLAG_NULLABLE, AST_FLAG_UNSATISFIABLE,
* AST_FLAG_ALWAYS_CONSUMES, AST_FLAG_CAN_CONSUME */
};

/* Parse and analyze the regex enough to determine whether it is
* anchored at the start and/or end. Returns 0 if the regex is not
* supported, otherwise returns 1 and writes anchoring flags into *info. */
int
* anchored at the start and/or end.
*
* As long as the result is checked for RE_IS_ANCHORED_ERROR first,
* the result can be used like a bitset. */
enum re_is_anchored_res {
RE_IS_ANCHORED_NONE = 0x00,
RE_IS_ANCHORED_START = 0x01,
RE_IS_ANCHORED_END = 0x02,
RE_IS_ANCHORED_BOTH = 0x03,
RE_IS_ANCHORED_ERROR = 0xFFFF,
};
enum re_is_anchored_res
re_is_anchored(enum re_dialect dialect, re_getchar_fun *f, void *opaque,
enum re_flags flags, struct re_err *err,
struct re_anchoring_info *info);
enum re_flags flags, struct re_err *err);

/*
* Return a human-readable string describing a given error code. The string
Expand Down
46 changes: 37 additions & 9 deletions src/libfsm/determinise.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,16 +20,20 @@ dump_labels(FILE *f, const uint64_t labels[4])
}
}

int
fsm_determinise(struct fsm *nfa)
enum fsm_determinise_with_config_res
fsm_determinise_with_config(struct fsm *nfa,
const struct fsm_determinise_config *config)
{
int res = 0;
enum fsm_determinise_with_config_res res = FSM_DETERMINISE_WITH_CONFIG_ERRNO;
struct mappingstack *stack = NULL;

struct interned_state_set_pool *issp = NULL;
struct map map = { NULL, 0, 0, NULL };
struct mapping *curr = NULL;
size_t dfacount = 0;
const size_t state_limit = config == NULL
? 0
: config->state_limit;

struct analyze_closures_env ac_env = { 0 };

Expand All @@ -43,7 +47,7 @@ fsm_determinise(struct fsm *nfa)
*/
if (fsm_has(nfa, fsm_hasepsilons)) {
if (!fsm_remove_epsilons(nfa)) {
return 0;
return FSM_DETERMINISE_WITH_CONFIG_ERRNO;
}
}

Expand All @@ -55,7 +59,12 @@ fsm_determinise(struct fsm *nfa)

issp = interned_state_set_pool_alloc(nfa->alloc);
if (issp == NULL) {
return 0;
return FSM_DETERMINISE_WITH_CONFIG_ERRNO;
}

if (state_limit != 0 && fsm_countstates(nfa) > state_limit) {
res = FSM_DETERMINISE_WITH_CONFIG_STATE_LIMIT_REACHED;
goto cleanup;
}

{
Expand All @@ -77,7 +86,7 @@ fsm_determinise(struct fsm *nfa)
*/

if (!fsm_getstart(nfa, &start)) {
res = 1;
res = FSM_DETERMINISE_WITH_CONFIG_OK;
goto cleanup;
}

Expand Down Expand Up @@ -153,6 +162,11 @@ fsm_determinise(struct fsm *nfa)
assert(m->dfastate < dfacount);
} else {
/* not found -- add a new one and push it to the stack for processing */

if (state_limit != 0 && dfacount > state_limit) {
res = FSM_DETERMINISE_WITH_CONFIG_STATE_LIMIT_REACHED;
goto cleanup;
}
if (!map_add(&map, dfacount, iss, &m)) {
goto cleanup;
}
Expand All @@ -174,8 +188,6 @@ fsm_determinise(struct fsm *nfa)
}

ac_env.output_count = 0;

/* All elements in sclosures[] are interned, so they will be freed later. */
} while ((curr = stack_pop(stack)));

{
Expand Down Expand Up @@ -267,7 +279,7 @@ fsm_determinise(struct fsm *nfa)
assert(fsm_all(nfa, fsm_isdfa));
#endif

res = 1;
res = FSM_DETERMINISE_WITH_CONFIG_OK;

cleanup:
map_free(&map);
Expand Down Expand Up @@ -318,6 +330,22 @@ fsm_determinise(struct fsm *nfa)
return res;
}

int
fsm_determinise(struct fsm *nfa)
{
enum fsm_determinise_with_config_res res = fsm_determinise_with_config(nfa, NULL);
switch (res) {
case FSM_DETERMINISE_WITH_CONFIG_OK:
return 1;
case FSM_DETERMINISE_WITH_CONFIG_STATE_LIMIT_REACHED:
/* unreachable */
return 0;
case FSM_DETERMINISE_WITH_CONFIG_ERRNO:
default:
return 0;
}
}

/* Add DFA_state to the list for NFA_state. */
static int
add_reverse_mapping(const struct fsm_alloc *alloc,
Expand Down
12 changes: 4 additions & 8 deletions src/libfsm/gen.c
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ struct gen_ctx {
static bool
gen_init_outer(struct fsm *fsm, size_t max_length,
fsm_generate_matches_cb *cb, void *opaque,
bool randomized, unsigned seed);
bool randomized);

static bool
gen_init(struct gen_ctx *ctx, struct fsm *fsm);
Expand Down Expand Up @@ -140,7 +140,7 @@ static bool
grow_stack(struct gen_ctx *ctx);

int
fsm_generate_matches(struct fsm *fsm, size_t max_length, unsigned seed,
fsm_generate_matches(struct fsm *fsm, size_t max_length, int randomized,
fsm_generate_matches_cb *cb, void *opaque)
{
if (max_length == 0) {
Expand All @@ -154,7 +154,7 @@ fsm_generate_matches(struct fsm *fsm, size_t max_length, unsigned seed,

INIT_TIMERS();
TIME(&pre);
int res = gen_init_outer(fsm, max_length, cb, opaque, seed != 0, seed);
int res = gen_init_outer(fsm, max_length, cb, opaque, randomized != 0);
TIME(&post);

DIFF_MSEC("fsm_generate_matches", pre, post, NULL);
Expand Down Expand Up @@ -204,7 +204,7 @@ fsm_generate_cb_printf(const struct fsm *fsm,
static bool
gen_init_outer(struct fsm *fsm, size_t max_length,
fsm_generate_matches_cb *cb, void *opaque,
bool randomized, unsigned seed)
bool randomized)
{
int res = false;
if (fsm == NULL || cb == NULL || max_length == 0) {
Expand All @@ -213,10 +213,6 @@ gen_init_outer(struct fsm *fsm, size_t max_length,

assert(fsm_all(fsm, fsm_isdfa)); /* DFA-only */

if (randomized) {
srand(seed);
}

#if LOG_GEN > 1
fprintf(stderr, "%s: %u states\n", __func__, fsm_countstates(fsm));
#endif
Expand Down
1 change: 1 addition & 0 deletions src/libfsm/libfsm.syms
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ fsm_countstates
fsm_trim
fsm_reverse
fsm_determinise
fsm_determinise_with_config
fsm_remove_epsilons
fsm_complete
fsm_minimise
Expand Down
4 changes: 4 additions & 0 deletions src/libfsm/trim.c
Original file line number Diff line number Diff line change
Expand Up @@ -462,6 +462,10 @@ integrity_check(const char *descr, const struct fsm *fsm)
return;
#endif

#if !EXPENSIVE_CHECKS
return;
#endif

if (LOG_TRIM > 1) {
fprintf(stderr, "integrity check: %s...\n", descr);
}
Expand Down
25 changes: 14 additions & 11 deletions src/libre/re.c
Original file line number Diff line number Diff line change
Expand Up @@ -335,37 +335,40 @@ re_is_literal(enum re_dialect dialect, int (*getc)(void *opaque), void *opaque,
return -1;
}

/* FIXME: placeholder interface */
int
enum re_is_anchored_res
re_is_anchored(enum re_dialect dialect, re_getchar_fun *getc, void *opaque,
enum re_flags flags, struct re_err *err,
struct re_anchoring_info *info)
enum re_flags flags, struct re_err *err)
{
/* FIXME: copy/pasted from above, factor out common */
/* FIXME: copy/pasted from above, factor out common code later. */

struct ast *ast;
const struct dialect *m;
int unsatisfiable;

assert(getc != NULL);
assert(info != NULL);

m = re_dialect(dialect);
if (m == NULL) {
if (err != NULL) { err->e = RE_EBADDIALECT; }
return 0;
return RE_IS_ANCHORED_ERROR;
}

flags |= m->flags;

ast = re_parse(dialect, getc, opaque, flags, err, &unsatisfiable);
if (ast == NULL) {
return 0;
return RE_IS_ANCHORED_ERROR;
}

info->start = (ast->expr->flags & AST_FLAG_ANCHORED_START) != 0;
info->end = (ast->expr->flags & AST_FLAG_ANCHORED_END) != 0;
/* Copy anchoring flags, ending up with NONE, START, END, or BOTH. */
enum re_is_anchored_res res = RE_IS_ANCHORED_NONE;
if (ast->expr->flags & AST_FLAG_ANCHORED_START) {
res |= RE_IS_ANCHORED_START;
}
if (ast->expr->flags & AST_FLAG_ANCHORED_END) {
res |= RE_IS_ANCHORED_END;
}

ast_free(ast);
return 1;
return res;
}

0 comments on commit f0e58bc

Please sign in to comment.