From 227597589c82ec530c61031de2f81880e27f3e5a Mon Sep 17 00:00:00 2001 From: Sigmanificient Date: Sat, 20 Apr 2024 02:33:43 +0200 Subject: [PATCH] Setup expr tokeinizer --- src/expr/Makefile | 1 + src/expr/expr.c | 50 +++++++++++++++++++++++++--- src/expr/expr.h | 77 +++++++++++++++++++++++++++++++++++++++++++ src/expr/expr_lexer.c | 58 ++++++++++++++++++++++++++++++++ 4 files changed, 182 insertions(+), 4 deletions(-) create mode 100644 src/expr/expr.h create mode 100644 src/expr/expr_lexer.c diff --git a/src/expr/Makefile b/src/expr/Makefile index 2dc2d7f..2f27afb 100644 --- a/src/expr/Makefile +++ b/src/expr/Makefile @@ -1,5 +1,6 @@ OUT := expr SRC := expr.c +SRC += expr_lexer.c include ../shared.mk diff --git a/src/expr/expr.c b/src/expr/expr.c index 6e6d777..0668af5 100644 --- a/src/expr/expr.c +++ b/src/expr/expr.c @@ -1,6 +1,48 @@ +#include +#include #include +#include +#include -int main(void) { - printf("Hello, World!\n"); - return 0; -} \ No newline at end of file +#include "expr.h" + +#define NAME "expr (canoutils)" +#define VERSION "1.0.0" +#define AUTHOR "Yohann Boniface (Sigmanificient)" + +#include "version_info.h" + +static char const *TOKEN_REPR[] = { + [TOK_UKN] = "???", [TOK_INT] = "int", [TOK_ADD] = "add", [TOK_SUB] = "sub", + [TOK_MUL] = "mul", [TOK_DIV] = "div", [TOK_EOF] = "eof", [TOK_WIP] = "wip", +}; + +static bool expr_run(char **argv) { + token *tokp; + lexer lex = {.argv = argv}; + + do { + tokp = expr_lex_get_next_token(&lex); + if (tokp == NULL) { + fprintf(stderr, "Failed to tokeinize: %s\n", strerror(errno)); + free(lex.tokens); + return false; + } + printf("T[%s](%.*s)\n", TOKEN_REPR[tokp->typ], (int)tokp->len, tokp->val); + } while (tokp->typ != TOK_EOF); + free(lex.tokens); + return true; +} + +int main(int argc, char **argv) { + for (int i = 0; argv[i] != NULL; i++) + if (!strcmp(argv[i], "--version")) { + print_version(); + return EXIT_SUCCESS; + } + if (argc < 2) { + fprintf(stderr, "expr: missing operand\n"); + return EXIT_FAILURE; + } + return expr_run(&argv[1]) ? EXIT_SUCCESS : EXIT_FAILURE; +} diff --git a/src/expr/expr.h b/src/expr/expr.h new file mode 100644 index 0000000..832728e --- /dev/null +++ b/src/expr/expr.h @@ -0,0 +1,77 @@ +#ifndef EXPR_H +#define EXPR_H + +#include + +#define FIRST_TOKEN_BATCH_SIZE 64 + +typedef unsigned char uchar; + +typedef enum { + TOK_UKN = 0, + TOK_INT, + TOK_ADD, + TOK_SUB, + TOK_MUL, + TOK_DIV, + TOK_EOF, + TOK_WIP, +} token_type; + +typedef struct { + token_type typ; + char *val; + size_t len; +} token; + +typedef struct { + char **argv; + char prev; + token *tokens; + size_t token_count; + size_t token_capacity; +} lexer; + +typedef struct { + token *tok; + lexer *lx; +} parser; + +typedef enum { + AST_ANY, + AST_UNARY, + AST_BINOP, + AST_NUM, + AST_COUNT, +} ast_type; + +union ast_; +typedef union ast_ { + struct { + token *tok; + ast_type typ; + } any; + + struct { + token *tok; + ast_type typ; + union ast_ *next; + } unary; + + struct { + token *tok; + ast_type typ; + union ast_ *next; + union ast_ *prev; + } binop; + + struct { + token *tok; + ast_type typ; + int val; + } num; +} ast; + +token *expr_lex_get_next_token(lexer *lx); + +#endif diff --git a/src/expr/expr_lexer.c b/src/expr/expr_lexer.c new file mode 100644 index 0000000..5166477 --- /dev/null +++ b/src/expr/expr_lexer.c @@ -0,0 +1,58 @@ +#include +#include +#include +#include + +#include "expr.h" + +static token_type CHAR_TOKENS[UCHAR_MAX] = { + [(uchar)'+'] = TOK_ADD, + [(uchar)'-'] = TOK_SUB, + [(uchar)'*'] = TOK_MUL, + [(uchar)'/'] = TOK_DIV, +}; + +static token *get_new_token(lexer *lx) { + size_t new_capacity; + token *new; + + if (lx->token_count < lx->token_capacity) + return &lx->tokens[lx->token_count++]; + + new_capacity = (lx->token_capacity == 0) ? FIRST_TOKEN_BATCH_SIZE + : lx->token_capacity << 1; + new = realloc(lx->tokens, new_capacity * sizeof *lx->tokens); + if (new == NULL) + return NULL; + lx->tokens = new; + lx->token_capacity = new_capacity; + return get_new_token(lx); +} + +static inline token *mk_token(token *tokp, token_type typ, char *val, + size_t len) { + *tokp = (token){.typ = typ, .val = val, .len = len}; + return tokp; +} + +token *expr_lex_get_next_token(lexer *lx) { + token *tokp = get_new_token(lx); + char *arg = *lx->argv++; + + if (tokp == NULL) + return NULL; + if (arg == NULL) + return mk_token(tokp, TOK_EOF, NULL, 0); + + tokp->typ = CHAR_TOKENS[(uchar)*arg]; + if (tokp->typ != TOK_UKN) + return mk_token(tokp, tokp->typ, arg, strlen(arg)); + + for (size_t i = 0; arg[i] != '\0'; i++) + if (!isdigit(arg[i])) + goto not_digit; + + return mk_token(tokp, TOK_INT, arg, strlen(arg)); +not_digit: + return mk_token(tokp, TOK_UKN, arg, strlen(arg)); +}