Skip to content

Commit

Permalink
Add match operator
Browse files Browse the repository at this point in the history
  • Loading branch information
Timothy-Gonzalez committed Feb 12, 2024
1 parent e26179b commit 29ff370
Show file tree
Hide file tree
Showing 6 changed files with 321 additions and 1 deletion.
6 changes: 6 additions & 0 deletions src/assertion-result.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ enum caught_operator
CAUGHT_OP_GREATER_THAN_EQ,
CAUGHT_OP_IN,
CAUGHT_OP_NOT_IN,
CAUGHT_OP_MATCH,
CAUGHT_OP_NOT_MATCH,
};

static char *CAUGHT_OPERATOR_STRS[] = {
Expand All @@ -24,6 +26,8 @@ static char *CAUGHT_OPERATOR_STRS[] = {
">=",
"in",
"not in",
"match",
"not match",
};

static char *CAUGHT_OPERATOR_TO_BES[] = {
Expand All @@ -35,6 +39,8 @@ static char *CAUGHT_OPERATOR_TO_BES[] = {
"to be >= to",
"to be in",
"to not be in",
"to match",
"to not match",
};

enum caught_operator
Expand Down
6 changes: 5 additions & 1 deletion src/evaluators.c
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#include "evaluators.h"
#include "output.h"
#include <stdlib.h>
#include <stdio.h>
#include "match.h"

// Evaluators take in a left hand size, operator, and right hand side
// they then evaluate the result of that expression
Expand Down Expand Up @@ -81,6 +81,10 @@ bool caught_internal_evaluator_str(char *lhs, enum caught_operator operator, cha
return !null_exists && (strstr(rhs, lhs) != NULL);
case CAUGHT_OP_NOT_IN:
return null_exists || (strstr(rhs, lhs) == NULL);
case CAUGHT_OP_MATCH:
return !null_exists && caught_internal_match(lhs, rhs);
case CAUGHT_OP_NOT_MATCH:
return null_exists || !caught_internal_match(lhs, rhs);
default:
caught_output_errorf("Cannot compare strings with %s, only == and != are supported!", caught_operator_to_str(operator));
return false;
Expand Down
5 changes: 5 additions & 0 deletions src/evaluators.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
#include <stdbool.h>
#ifndef _GNU_SOURCE
#define _GNU_SOURCE
#endif
#include <string.h>
#include "fork.h"
#include "assertion-result.h"
Expand All @@ -22,6 +25,8 @@ bool caught_internal_evaluator_char(char lhs, enum caught_operator operator, cha
bool caught_internal_evaluator_char_ptr(char *lhs, enum caught_operator operator, char * rhs);
bool caught_internal_evaluator_char_array(char *lhs, enum caught_operator operator, char * rhs, ssize_t length);

bool caught_internal_evaluator_str_match(char *lhs, char *fstr);

bool caught_internal_evaluator_str(char *lhs, enum caught_operator operator, char * rhs);
bool caught_internal_evaluator_str_ptr(char **lhs, enum caught_operator operator, char ** rhs);
bool caught_internal_evaluator_str_array(char **lhs, enum caught_operator operator, char ** rhs, ssize_t length);
Expand Down
256 changes: 256 additions & 0 deletions src/match.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,256 @@
#include "match.h"
#include "output.h"

#include <assert.h>
#include <stdlib.h>

static caught_internal_matcher MATCHERS[] = {
{
.specifiers = "id",
.whitelist = "0123456789",
.blacklist = NULL,
.match_all = true,
.only_one = '\0',
},
{
.specifiers = "f",
.whitelist = "0123456789.",
.blacklist = NULL,
.only_one = '.',
.match_all = true,
},
{
.specifiers = "c",
.whitelist = NULL,
.blacklist = NULL,
.only_one = '\0',
.match_all = false,
},
{
.specifiers = "a",
.whitelist = "abcdefghijklmnopqrstuvwxyABDEFGHIKLMNOPQRSWTUVWXYZ",
.blacklist = NULL,
.only_one = '\0',
.match_all = true,
},
{
.specifiers = "w",
.whitelist = NULL,
.blacklist = "\n\r\v\f\t ",
.only_one = '\0',
.match_all = true,
},
{
.specifiers = "s",
.whitelist = NULL,
.blacklist = NULL,
.only_one = '\0',
.match_all = true,
},
};

static caught_internal_matcher get_matcher(char specifier)
{
size_t i;
for (i = 0; i < sizeof(MATCHERS) / sizeof(MATCHERS[0]); i++)
{
size_t j;
char *specifiers = MATCHERS[i].specifiers;
for (j = 0; specifiers[j] != '\0'; ++j)
{
if (specifier == specifiers[j])
{
return MATCHERS[i];
}
}
}

caught_output_errorf("Invalid format specifier: $%c", specifier);
return (caught_internal_matcher){};
}

static caught_internal_matcher_fstr_segment *break_fstr_segments(char *fstr)
{
size_t length = strlen(fstr);

size_t parts = 1;

size_t i;
for (i = 0; i < length; ++i)
{
if (fstr[i] == '$')
{
parts += 2; // Technically wrong for $$, but still a good enough heuristic
}
}

caught_internal_matcher_fstr_segment *result = malloc(sizeof(caught_internal_matcher_fstr_segment) * (parts + 1));
size_t result_size = 0;
caught_internal_matcher_fstr_segment segment;

char *buffer = malloc(sizeof(char) * (length + 1));
buffer[0] = '\0';
int buffer_i = 0;
for (i = 0; i <= length; ++i)
{
// if end of string OR found delimeter
bool is_$ = (i < length) && (fstr[i] == '$');
bool next_$ = (i < length - 1) && (fstr[i + 1] == '$');
bool is_end_of_specifier = (strlen(buffer) >= 2) && (segment.is_specifier);
bool split = (is_$ && !next_$) || (is_end_of_specifier);

if (i == length || split)
{
segment.segment = strdup(buffer);
if (segment.is_specifier)
{
segment.matcher = get_matcher(buffer[1]);
}
result[result_size++] = segment;
segment = (caught_internal_matcher_fstr_segment){};
buffer[0] = '\0';
buffer_i = 0;
}

if (i == length)
{
break;
}

if (buffer_i == 0)
{
segment.is_specifier = is_$ && !next_$;
}

buffer[buffer_i] = fstr[i];
buffer[buffer_i + 1] = '\0';
buffer_i++;
if (is_$ && next_$)
{
i++;
}
}

free(buffer);

result[result_size] = (caught_internal_matcher_fstr_segment){
.segment = NULL,
};

return result;
}

static void break_fstr_segments_cleanup(caught_internal_matcher_fstr_segment *segments)
{
size_t i;
for (i = 0; segments[i].segment != NULL; ++i)
{
free(segments[i].segment);
}
free(segments);
}

bool caught_internal_match(char *str, char *fstr)
{
assert(str);
assert(fstr);

caught_internal_matcher_fstr_segment *segments = break_fstr_segments(fstr);
size_t segments_len = 0;
while (segments[segments_len].segment != NULL)
{
segments_len++;
}

size_t i;
for (i = 0; i < segments_len; ++i)
{
caught_internal_matcher_fstr_segment segment = segments[i];
caught_internal_matcher matcher = segment.matcher;
char *segment_str = segment.segment;
// Literal matches
if (!segment.is_specifier)
{
if (strncmp(str, segment_str, strlen(segment_str)) != 0)
{
break_fstr_segments_cleanup(segments);
return false;
}
str += strlen(segment_str);
continue;
}

size_t match_size = 0;
ssize_t best_match_size = -1;
bool only_one_flag = false;
while (match_size < strlen(str))
{
char c = str[match_size];

// Apply whitelist
if (matcher.whitelist)
{
if (strchr(matcher.whitelist, c) == NULL)
{
break;
}
}

// Apply blacklist
if (matcher.blacklist)
{
if (strchr(matcher.blacklist, c) != NULL)
{
break;
}
}

// Only one condition - if more than one we fail
if (matcher.only_one != '\0' && c == matcher.only_one)
{
if (only_one_flag)
{
break;
}
only_one_flag = true;
}

match_size += 1;

// Look ahead - if next non-specifier doesn't match - we can't do this
if (i < segments_len - 1)
{
caught_internal_matcher_fstr_segment next_segment = segments[i + 1];
char *next_segment_str = next_segment.segment;
if (!next_segment.is_specifier)
{
if (strncmp(str + match_size, next_segment_str, strlen(next_segment_str)) != 0)
{
continue;
}
}
}

// Match size worked
best_match_size = match_size;

if (match_size == 1 && !matcher.match_all)
{
break;
}
}

if (best_match_size == -1)
{
break_fstr_segments_cleanup(segments);
return false;
}

str += best_match_size;
fstr += 1;
}

// Everything passed
break_fstr_segments_cleanup(segments);
return true;
}
48 changes: 48 additions & 0 deletions src/match.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#include <stdbool.h>
#ifndef _GNU_SOURCE
#define _GNU_SOURCE
#endif
#include <string.h>

#ifndef CAUGHT_MATCH
#define CAUGHT_MATCH

struct caught_internal_matcher
{
char *specifiers; // Specifiers for this matcher
char *whitelist; // Whitelist characters
char *blacklist; // Blacklist characters
char only_one; // Character that can only be seen once, '\0' if not set
bool match_all; // Only match one if false
};

typedef struct caught_internal_matcher caught_internal_matcher;

struct caught_internal_matcher_fstr_segment
{
bool is_specifier;
caught_internal_matcher matcher;
char *segment;
};
typedef struct caught_internal_matcher_fstr_segment caught_internal_matcher_fstr_segment;

/*
* Returns if lhs matches rhs
*
* Match specifiers are specified with $
*
* $i, $d = match a integer (0-9)
* $c = match a single character
* $f = match a float (0-9, a single . allowed)
* $a = match alpha-chars (a-z and A-Z)
* $w = match a word (matches anything non-whitespace)
* $s = match a string (at least 1 character)
* $$ = a literal $
*
* Anything else = literal match
*
* Having two greedy operators like $s without a literal match between them ("$s$s") will fail
*/
bool caught_internal_match(char *str, char *fstr);

#endif
1 change: 1 addition & 0 deletions src/output.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include <stdlib.h>
#include <string.h>
#include <stdarg.h>
#include "assertion-result.h"
#ifndef CAUGHT_OUTPUT
#define CAUGHT_OUTPUT
Expand Down

0 comments on commit 29ff370

Please sign in to comment.