From 29ff3704dca97ef3099768776f681aef76da3b80 Mon Sep 17 00:00:00 2001 From: Timothy Gonzalez <105177619+Timothy-Gonzalez@users.noreply.github.com> Date: Sun, 11 Feb 2024 17:46:20 -0600 Subject: [PATCH] Add match operator --- src/assertion-result.h | 6 + src/evaluators.c | 6 +- src/evaluators.h | 5 + src/match.c | 256 +++++++++++++++++++++++++++++++++++++++++ src/match.h | 48 ++++++++ src/output.h | 1 + 6 files changed, 321 insertions(+), 1 deletion(-) create mode 100644 src/match.c create mode 100644 src/match.h diff --git a/src/assertion-result.h b/src/assertion-result.h index 8a56a1a..eee37f5 100644 --- a/src/assertion-result.h +++ b/src/assertion-result.h @@ -13,6 +13,8 @@ enum caught_operator CAUGHT_OP_GREATER_THAN_EQ, CAUGHT_OP_IN, CAUGHT_OP_NOT_IN, + CAUGHT_OP_MATCH, + CAUGHT_OP_NOT_MATCH, }; static char *CAUGHT_OPERATOR_STRS[] = { @@ -24,6 +26,8 @@ static char *CAUGHT_OPERATOR_STRS[] = { ">=", "in", "not in", + "match", + "not match", }; static char *CAUGHT_OPERATOR_TO_BES[] = { @@ -35,6 +39,8 @@ static char *CAUGHT_OPERATOR_TO_BES[] = { "to be >= to", "to be in", "to not be in", + "to match", + "to not match", }; enum caught_operator diff --git a/src/evaluators.c b/src/evaluators.c index 34f9a04..25e0119 100644 --- a/src/evaluators.c +++ b/src/evaluators.c @@ -1,7 +1,7 @@ #include "evaluators.h" #include "output.h" #include -#include +#include "match.h" // Evaluators take in a left hand size, operator, and right hand side // they then evaluate the result of that expression @@ -81,6 +81,10 @@ bool caught_internal_evaluator_str(char *lhs, enum caught_operator operator, cha return !null_exists && (strstr(rhs, lhs) != NULL); case CAUGHT_OP_NOT_IN: return null_exists || (strstr(rhs, lhs) == NULL); + case CAUGHT_OP_MATCH: + return !null_exists && caught_internal_match(lhs, rhs); + case CAUGHT_OP_NOT_MATCH: + return null_exists || !caught_internal_match(lhs, rhs); default: caught_output_errorf("Cannot compare strings with %s, only == and != are supported!", caught_operator_to_str(operator)); return false; diff --git a/src/evaluators.h b/src/evaluators.h index 5a53296..68971f6 100644 --- a/src/evaluators.h +++ b/src/evaluators.h @@ -1,4 +1,7 @@ #include +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif #include #include "fork.h" #include "assertion-result.h" @@ -22,6 +25,8 @@ bool caught_internal_evaluator_char(char lhs, enum caught_operator operator, cha bool caught_internal_evaluator_char_ptr(char *lhs, enum caught_operator operator, char * rhs); bool caught_internal_evaluator_char_array(char *lhs, enum caught_operator operator, char * rhs, ssize_t length); +bool caught_internal_evaluator_str_match(char *lhs, char *fstr); + bool caught_internal_evaluator_str(char *lhs, enum caught_operator operator, char * rhs); bool caught_internal_evaluator_str_ptr(char **lhs, enum caught_operator operator, char ** rhs); bool caught_internal_evaluator_str_array(char **lhs, enum caught_operator operator, char ** rhs, ssize_t length); diff --git a/src/match.c b/src/match.c new file mode 100644 index 0000000..e170bb8 --- /dev/null +++ b/src/match.c @@ -0,0 +1,256 @@ +#include "match.h" +#include "output.h" + +#include +#include + +static caught_internal_matcher MATCHERS[] = { + { + .specifiers = "id", + .whitelist = "0123456789", + .blacklist = NULL, + .match_all = true, + .only_one = '\0', + }, + { + .specifiers = "f", + .whitelist = "0123456789.", + .blacklist = NULL, + .only_one = '.', + .match_all = true, + }, + { + .specifiers = "c", + .whitelist = NULL, + .blacklist = NULL, + .only_one = '\0', + .match_all = false, + }, + { + .specifiers = "a", + .whitelist = "abcdefghijklmnopqrstuvwxyABDEFGHIKLMNOPQRSWTUVWXYZ", + .blacklist = NULL, + .only_one = '\0', + .match_all = true, + }, + { + .specifiers = "w", + .whitelist = NULL, + .blacklist = "\n\r\v\f\t ", + .only_one = '\0', + .match_all = true, + }, + { + .specifiers = "s", + .whitelist = NULL, + .blacklist = NULL, + .only_one = '\0', + .match_all = true, + }, +}; + +static caught_internal_matcher get_matcher(char specifier) +{ + size_t i; + for (i = 0; i < sizeof(MATCHERS) / sizeof(MATCHERS[0]); i++) + { + size_t j; + char *specifiers = MATCHERS[i].specifiers; + for (j = 0; specifiers[j] != '\0'; ++j) + { + if (specifier == specifiers[j]) + { + return MATCHERS[i]; + } + } + } + + caught_output_errorf("Invalid format specifier: $%c", specifier); + return (caught_internal_matcher){}; +} + +static caught_internal_matcher_fstr_segment *break_fstr_segments(char *fstr) +{ + size_t length = strlen(fstr); + + size_t parts = 1; + + size_t i; + for (i = 0; i < length; ++i) + { + if (fstr[i] == '$') + { + parts += 2; // Technically wrong for $$, but still a good enough heuristic + } + } + + caught_internal_matcher_fstr_segment *result = malloc(sizeof(caught_internal_matcher_fstr_segment) * (parts + 1)); + size_t result_size = 0; + caught_internal_matcher_fstr_segment segment; + + char *buffer = malloc(sizeof(char) * (length + 1)); + buffer[0] = '\0'; + int buffer_i = 0; + for (i = 0; i <= length; ++i) + { + // if end of string OR found delimeter + bool is_$ = (i < length) && (fstr[i] == '$'); + bool next_$ = (i < length - 1) && (fstr[i + 1] == '$'); + bool is_end_of_specifier = (strlen(buffer) >= 2) && (segment.is_specifier); + bool split = (is_$ && !next_$) || (is_end_of_specifier); + + if (i == length || split) + { + segment.segment = strdup(buffer); + if (segment.is_specifier) + { + segment.matcher = get_matcher(buffer[1]); + } + result[result_size++] = segment; + segment = (caught_internal_matcher_fstr_segment){}; + buffer[0] = '\0'; + buffer_i = 0; + } + + if (i == length) + { + break; + } + + if (buffer_i == 0) + { + segment.is_specifier = is_$ && !next_$; + } + + buffer[buffer_i] = fstr[i]; + buffer[buffer_i + 1] = '\0'; + buffer_i++; + if (is_$ && next_$) + { + i++; + } + } + + free(buffer); + + result[result_size] = (caught_internal_matcher_fstr_segment){ + .segment = NULL, + }; + + return result; +} + +static void break_fstr_segments_cleanup(caught_internal_matcher_fstr_segment *segments) +{ + size_t i; + for (i = 0; segments[i].segment != NULL; ++i) + { + free(segments[i].segment); + } + free(segments); +} + +bool caught_internal_match(char *str, char *fstr) +{ + assert(str); + assert(fstr); + + caught_internal_matcher_fstr_segment *segments = break_fstr_segments(fstr); + size_t segments_len = 0; + while (segments[segments_len].segment != NULL) + { + segments_len++; + } + + size_t i; + for (i = 0; i < segments_len; ++i) + { + caught_internal_matcher_fstr_segment segment = segments[i]; + caught_internal_matcher matcher = segment.matcher; + char *segment_str = segment.segment; + // Literal matches + if (!segment.is_specifier) + { + if (strncmp(str, segment_str, strlen(segment_str)) != 0) + { + break_fstr_segments_cleanup(segments); + return false; + } + str += strlen(segment_str); + continue; + } + + size_t match_size = 0; + ssize_t best_match_size = -1; + bool only_one_flag = false; + while (match_size < strlen(str)) + { + char c = str[match_size]; + + // Apply whitelist + if (matcher.whitelist) + { + if (strchr(matcher.whitelist, c) == NULL) + { + break; + } + } + + // Apply blacklist + if (matcher.blacklist) + { + if (strchr(matcher.blacklist, c) != NULL) + { + break; + } + } + + // Only one condition - if more than one we fail + if (matcher.only_one != '\0' && c == matcher.only_one) + { + if (only_one_flag) + { + break; + } + only_one_flag = true; + } + + match_size += 1; + + // Look ahead - if next non-specifier doesn't match - we can't do this + if (i < segments_len - 1) + { + caught_internal_matcher_fstr_segment next_segment = segments[i + 1]; + char *next_segment_str = next_segment.segment; + if (!next_segment.is_specifier) + { + if (strncmp(str + match_size, next_segment_str, strlen(next_segment_str)) != 0) + { + continue; + } + } + } + + // Match size worked + best_match_size = match_size; + + if (match_size == 1 && !matcher.match_all) + { + break; + } + } + + if (best_match_size == -1) + { + break_fstr_segments_cleanup(segments); + return false; + } + + str += best_match_size; + fstr += 1; + } + + // Everything passed + break_fstr_segments_cleanup(segments); + return true; +} diff --git a/src/match.h b/src/match.h new file mode 100644 index 0000000..577c081 --- /dev/null +++ b/src/match.h @@ -0,0 +1,48 @@ +#include +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif +#include + +#ifndef CAUGHT_MATCH +#define CAUGHT_MATCH + +struct caught_internal_matcher +{ + char *specifiers; // Specifiers for this matcher + char *whitelist; // Whitelist characters + char *blacklist; // Blacklist characters + char only_one; // Character that can only be seen once, '\0' if not set + bool match_all; // Only match one if false +}; + +typedef struct caught_internal_matcher caught_internal_matcher; + +struct caught_internal_matcher_fstr_segment +{ + bool is_specifier; + caught_internal_matcher matcher; + char *segment; +}; +typedef struct caught_internal_matcher_fstr_segment caught_internal_matcher_fstr_segment; + +/* + * Returns if lhs matches rhs + * + * Match specifiers are specified with $ + * + * $i, $d = match a integer (0-9) + * $c = match a single character + * $f = match a float (0-9, a single . allowed) + * $a = match alpha-chars (a-z and A-Z) + * $w = match a word (matches anything non-whitespace) + * $s = match a string (at least 1 character) + * $$ = a literal $ + * + * Anything else = literal match + * + * Having two greedy operators like $s without a literal match between them ("$s$s") will fail + */ +bool caught_internal_match(char *str, char *fstr); + +#endif diff --git a/src/output.h b/src/output.h index e893ac5..eb7190d 100644 --- a/src/output.h +++ b/src/output.h @@ -1,5 +1,6 @@ #include #include +#include #include "assertion-result.h" #ifndef CAUGHT_OUTPUT #define CAUGHT_OUTPUT