Skip to content

Commit

Permalink
Merge branch 'sv/reject-unsupported-pcre-extensions-during-ast-analys…
Browse files Browse the repository at this point in the history
…is' into sv/integrate-combinable-DFA-capture-resolution--fuzzing-branch

Integrate #447's changes for continuing local fuzzing. It hasn't been reviewed yet.
  • Loading branch information
silentbicycle committed Oct 19, 2023
2 parents 239927c + 928a696 commit 2f3e6d9
Show file tree
Hide file tree
Showing 7 changed files with 120 additions and 22 deletions.
1 change: 1 addition & 0 deletions src/libre/ast.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ enum ast_flags {
AST_FLAG_ANCHORED_START = 1 << 6,
AST_FLAG_ANCHORED_END = 1 << 7,
AST_FLAG_END_NL = 1 << 8,
AST_FLAG_MATCHES_1NEWLINE= 1 << 9,

AST_FLAG_NONE = 0x00
};
Expand Down
122 changes: 105 additions & 17 deletions src/libre/ast_analysis.c
Original file line number Diff line number Diff line change
Expand Up @@ -814,6 +814,46 @@ set_flags_subtree(struct ast_expr *n, enum ast_flags flags)
}
}

static int
can_consume_single_newline(struct ast_expr *n)
{
if (!can_consume_input(n)) { return 0; }

if (n->flags & AST_FLAG_MATCHES_1NEWLINE) { return 1; }

switch (n->type) {
case AST_EXPR_LITERAL:
return n->u.literal.c == '\n';

case AST_EXPR_CODEPOINT:
return n->u.codepoint.u == (uint32_t)'\n';

case AST_EXPR_RANGE:
if ((n->u.range.from.type == AST_ENDPOINT_LITERAL) &&
(n->u.range.to.type == AST_ENDPOINT_LITERAL)) {
return n->u.range.from.u.literal.c <= '\n'
&& n->u.range.to.u.literal.c >= '\n';
} else if ((n->u.range.from.type == AST_ENDPOINT_CODEPOINT) &&
(n->u.range.to.type == AST_ENDPOINT_CODEPOINT)) {
return n->u.range.from.u.codepoint.u <= '\n'
&& n->u.range.to.u.codepoint.u >= '\n';
} else if (n->u.range.from.type == AST_ENDPOINT_NAMED) {
/* TODO: unreachable? */
break;
}
break;

case AST_EXPR_SUBTRACT:
return can_consume_single_newline(n->u.subtract.a)
&& !can_consume_single_newline(n->u.subtract.b);

default:
break;
}

return 0;
}

struct anchoring_env {
enum re_flags re_flags;

Expand Down Expand Up @@ -977,6 +1017,9 @@ analysis_iter_anchoring(struct anchoring_env *env, struct ast_expr *n)
break;
case AST_EXPR_CODEPOINT:
case AST_EXPR_RANGE:
if (can_consume_single_newline(n)) {
set_flags(n, AST_FLAG_MATCHES_1NEWLINE);
}
break; /* handled outside switch/case */

case AST_EXPR_CONCAT: {
Expand Down Expand Up @@ -1145,6 +1188,13 @@ analysis_iter_anchoring(struct anchoring_env *env, struct ast_expr *n)
}
}

for (i = 0; i < n->u.concat.count; i++) {
struct ast_expr *child = n->u.concat.n[i];
if (can_consume_single_newline(child)) {
set_flags(n, AST_FLAG_MATCHES_1NEWLINE);
}
}

break;
}

Expand Down Expand Up @@ -1183,7 +1233,6 @@ analysis_iter_anchoring(struct anchoring_env *env, struct ast_expr *n)
any_sat = 1;
} else if (res == AST_ANALYSIS_ERROR_UNSUPPORTED_CAPTURE
|| res == AST_ANALYSIS_ERROR_UNSUPPORTED_PCRE) {
assert(child->flags & AST_FLAG_UNSATISFIABLE);
continue;
} else {
return res;
Expand All @@ -1197,6 +1246,10 @@ analysis_iter_anchoring(struct anchoring_env *env, struct ast_expr *n)
all_end_anchored = 0;
}
}

if (child->flags & AST_FLAG_MATCHES_1NEWLINE) {
set_flags(n, AST_FLAG_MATCHES_1NEWLINE);
}
}

if (!env->past_always_consuming && all_set_past_always_consuming) {
Expand Down Expand Up @@ -1285,6 +1338,21 @@ analysis_iter_anchoring(struct anchoring_env *env, struct ast_expr *n)
n->u.repeat.max = 1;
}
}

if (can_consume_single_newline(n->u.repeat.e)) {
set_flags(n, AST_FLAG_MATCHES_1NEWLINE);
}

if (n->u.repeat.e->flags & AST_FLAG_ANCHORED_END && n->u.repeat.min > 0) {
/* FIXME: if repeating something that is always
* anchored at the end, repeat.max could be
* capped at 1, but I have not yet found any
* inputs where that change is necessary to
* produce a correct result. */
LOG(3 - LOG_ANCHORING,
"%s: REPEAT: repeating ANCHORED_END subtree >0 times -> ANCHORED_END\n", __func__);
set_flags(n, n->u.repeat.e->flags & END_ANCHOR_FLAG_MASK);
}
break;

case AST_EXPR_GROUP:
Expand All @@ -1302,13 +1370,18 @@ analysis_iter_anchoring(struct anchoring_env *env, struct ast_expr *n)
}
if (res == AST_ANALYSIS_UNSATISFIABLE) {
LOG(3 - LOG_ANCHORING,
"%s: GROUP: setting UNSATISFIABLE due to unsatisfiable childn",
"%s: GROUP: setting UNSATISFIABLE due to unsatisfiable child",
__func__);
set_flags(n, AST_FLAG_UNSATISFIABLE);
}
if (res != AST_ANALYSIS_OK) {
return res;
}

if (n->u.group.e->flags & AST_FLAG_MATCHES_1NEWLINE) {
set_flags(n, AST_FLAG_MATCHES_1NEWLINE);
}

break;

case AST_EXPR_SUBTRACT:
Expand Down Expand Up @@ -1345,6 +1418,10 @@ analysis_iter_anchoring(struct anchoring_env *env, struct ast_expr *n)
}
return res;
}
if (can_consume_single_newline(n->u.repeat.e)) {
set_flags(n, AST_FLAG_MATCHES_1NEWLINE);
}

break;

default:
Expand Down Expand Up @@ -1412,18 +1489,19 @@ analysis_iter_reverse_anchoring(struct anchoring_env *env, struct ast_expr *n)
* have reached it. */
set_flags(n, AST_FLAG_ANCHORED_END);

if (env->followed_by_consuming_newline) {
LOG(3 - LOG_ANCHORING,
"%s: RANGE: rejecting possible newline match after $ as unsupported\n",
__func__);
set_flags(n, AST_FLAG_UNSATISFIABLE);
return AST_ANALYSIS_ERROR_UNSUPPORTED_PCRE;
} else if (env->followed_by_consuming) {
LOG(3 - LOG_ANCHORING,
"%s: END anchor & followed_by_consuming, setting UNSATISFIABLE\n",
__func__);
set_flags(n, AST_FLAG_UNSATISFIABLE);
return AST_ANALYSIS_UNSATISFIABLE;
if (env->followed_by_consuming) {
if (env->followed_by_consuming_newline) {
LOG(3 - LOG_ANCHORING,
"%s: END anchor & followed_by_consuming, returning UNSUPPORTED_PCRE\n",
__func__);
return AST_ANALYSIS_ERROR_UNSUPPORTED_PCRE;
} else {
LOG(3 - LOG_ANCHORING,
"%s: END anchor & followed_by_consuming, setting UNSATISFIABLE\n",
__func__);
set_flags(n, AST_FLAG_UNSATISFIABLE);
return AST_ANALYSIS_UNSATISFIABLE;
}
}

break;
Expand Down Expand Up @@ -1484,7 +1562,8 @@ analysis_iter_reverse_anchoring(struct anchoring_env *env, struct ast_expr *n)
set_flags(n, AST_FLAG_UNSATISFIABLE);
}
} else if (res != AST_ANALYSIS_OK) {
set_flags(n, AST_FLAG_UNSATISFIABLE);
LOG(3 - LOG_ANCHORING,
"%s: CONCAT: got res of %d, bubbling up\n", __func__, res);
return res;
}

Expand All @@ -1500,6 +1579,15 @@ analysis_iter_reverse_anchoring(struct anchoring_env *env, struct ast_expr *n)
env->followed_by_consuming = 1;
}

if (!env->followed_by_consuming_newline &&
(child_env.followed_by_consuming_newline
|| child->flags & AST_FLAG_MATCHES_1NEWLINE)) {
LOG(3 - LOG_ANCHORING,
"%s: setting followed_by_consuming_newline due to child %p's analysis\n",
__func__, (void *)child);
env->followed_by_consuming_newline = 1;
}

if (!env->before_start_anchor && child_env.before_start_anchor
&& !is_nullable(child)) {
LOG(3 - LOG_ANCHORING,
Expand Down Expand Up @@ -1554,8 +1642,8 @@ analysis_iter_reverse_anchoring(struct anchoring_env *env, struct ast_expr *n)
any_sat = 1;
} else if (res == AST_ANALYSIS_ERROR_UNSUPPORTED_CAPTURE
|| res == AST_ANALYSIS_ERROR_UNSUPPORTED_PCRE) {
assert(child->flags & AST_FLAG_UNSATISFIABLE);
continue;
LOG(3 - LOG_ANCHORING, "%s: got res of UNSUPPORTED_*, bubbling up\n", __func__);
return res;
} else {
return res;
}
Expand Down
1 change: 1 addition & 0 deletions src/libre/print/tree.c
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ fprintf_flags(FILE *f, enum ast_flags flags)
PR_FLAG(END_NL, "N");
PR_FLAG(CAN_CONSUME, "c");
PR_FLAG(ALWAYS_CONSUMES, "C");
PR_FLAG(MATCHES_1NEWLINE, "n");

#undef PR_FLAG

Expand Down
6 changes: 6 additions & 0 deletions tests/capture/captest.c
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,14 @@ captest_run_case(const struct captest_case_single *testcase,
if (testcase->match == SHOULD_REJECT_AS_UNSUPPORTED) {
if (fsm != NULL) {
fsm_free(fsm);
if (verbosity > 0) {
printf("FAIL (expected UNSUPPORTED)\n");
}
return CAPTEST_RUN_CASE_FAIL;
}
if (verbosity > 0) {
printf("pass\n");
}
return CAPTEST_RUN_CASE_PASS;
}

Expand Down
10 changes: 5 additions & 5 deletions tests/capture/capture_test_case_list.c
Original file line number Diff line number Diff line change
Expand Up @@ -1370,11 +1370,11 @@ const struct captest_case_single single_cases[] = {

{
.regex = "a|_$[^b]",
.input = "a",
.count = 1,
.expected = {
{ .pos = {0, 1}, },
},
.match = SHOULD_REJECT_AS_UNSUPPORTED,
},
{
.regex = "^a|$[^x]b*",
.match = SHOULD_REJECT_AS_UNSUPPORTED,
},

{
Expand Down
1 change: 1 addition & 0 deletions tests/pcre/in48.re
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
^a|$[^x]b*
1 change: 1 addition & 0 deletions tests/pcre/out48.err
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
tests/pcre/in48.re: Unsupported PCRE edge case

0 comments on commit 2f3e6d9

Please sign in to comment.