Setup expr tokeinizer

proh14 · Apr 20, 2024 · 2275975 · 2275975
1 parent 6c35700
commit 2275975
Show file tree

Hide file tree

Showing 4 changed files with 182 additions and 4 deletions.
diff --git a/src/expr/Makefile b/src/expr/Makefile
@@ -1,5 +1,6 @@
 OUT := expr
 
 SRC := expr.c
+SRC += expr_lexer.c
 
 include ../shared.mk
diff --git a/src/expr/expr.c b/src/expr/expr.c
@@ -1,6 +1,48 @@
+#include <errno.h>
+#include <stdbool.h>
 #include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
 
-int main(void) {
-  printf("Hello, World!\n");
-  return 0;
-}
+#include "expr.h"
+
+#define NAME "expr (canoutils)"
+#define VERSION "1.0.0"
+#define AUTHOR "Yohann Boniface (Sigmanificient)"
+
+#include "version_info.h"
+
+static char const *TOKEN_REPR[] = {
+    [TOK_UKN] = "???", [TOK_INT] = "int", [TOK_ADD] = "add", [TOK_SUB] = "sub",
+    [TOK_MUL] = "mul", [TOK_DIV] = "div", [TOK_EOF] = "eof", [TOK_WIP] = "wip",
+};
+
+static bool expr_run(char **argv) {
+  token *tokp;
+  lexer lex = {.argv = argv};
+
+  do {
+    tokp = expr_lex_get_next_token(&lex);
+    if (tokp == NULL) {
+      fprintf(stderr, "Failed to tokeinize: %s\n", strerror(errno));
+      free(lex.tokens);
+      return false;
+    }
+    printf("T[%s](%.*s)\n", TOKEN_REPR[tokp->typ], (int)tokp->len, tokp->val);
+  } while (tokp->typ != TOK_EOF);
+  free(lex.tokens);
+  return true;
+}
+
+int main(int argc, char **argv) {
+  for (int i = 0; argv[i] != NULL; i++)
+    if (!strcmp(argv[i], "--version")) {
+      print_version();
+      return EXIT_SUCCESS;
+    }
+  if (argc < 2) {
+    fprintf(stderr, "expr: missing operand\n");
+    return EXIT_FAILURE;
+  }
+  return expr_run(&argv[1]) ? EXIT_SUCCESS : EXIT_FAILURE;
+}
diff --git a/src/expr/expr.h b/src/expr/expr.h
@@ -0,0 +1,77 @@
+#ifndef EXPR_H
+#define EXPR_H
+
+#include <stddef.h>
+
+#define FIRST_TOKEN_BATCH_SIZE 64
+
+typedef unsigned char uchar;
+
+typedef enum {
+  TOK_UKN = 0,
+  TOK_INT,
+  TOK_ADD,
+  TOK_SUB,
+  TOK_MUL,
+  TOK_DIV,
+  TOK_EOF,
+  TOK_WIP,
+} token_type;
+
+typedef struct {
+  token_type typ;
+  char *val;
+  size_t len;
+} token;
+
+typedef struct {
+  char **argv;
+  char prev;
+  token *tokens;
+  size_t token_count;
+  size_t token_capacity;
+} lexer;
+
+typedef struct {
+  token *tok;
+  lexer *lx;
+} parser;
+
+typedef enum {
+  AST_ANY,
+  AST_UNARY,
+  AST_BINOP,
+  AST_NUM,
+  AST_COUNT,
+} ast_type;
+
+union ast_;
+typedef union ast_ {
+  struct {
+    token *tok;
+    ast_type typ;
+  } any;
+
+  struct {
+    token *tok;
+    ast_type typ;
+    union ast_ *next;
+  } unary;
+
+  struct {
+    token *tok;
+    ast_type typ;
+    union ast_ *next;
+    union ast_ *prev;
+  } binop;
+
+  struct {
+    token *tok;
+    ast_type typ;
+    int val;
+  } num;
+} ast;
+
+token *expr_lex_get_next_token(lexer *lx);
+
+#endif
diff --git a/src/expr/expr_lexer.c b/src/expr/expr_lexer.c
@@ -0,0 +1,58 @@
+#include <ctype.h>
+#include <limits.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "expr.h"
+
+static token_type CHAR_TOKENS[UCHAR_MAX] = {
+    [(uchar)'+'] = TOK_ADD,
+    [(uchar)'-'] = TOK_SUB,
+    [(uchar)'*'] = TOK_MUL,
+    [(uchar)'/'] = TOK_DIV,
+};
+
+static token *get_new_token(lexer *lx) {
+  size_t new_capacity;
+  token *new;
+
+  if (lx->token_count < lx->token_capacity)
+    return &lx->tokens[lx->token_count++];
+
+  new_capacity = (lx->token_capacity == 0) ? FIRST_TOKEN_BATCH_SIZE
+                                           : lx->token_capacity << 1;
+  new = realloc(lx->tokens, new_capacity * sizeof *lx->tokens);
+  if (new == NULL)
+    return NULL;
+  lx->tokens = new;
+  lx->token_capacity = new_capacity;
+  return get_new_token(lx);
+}
+
+static inline token *mk_token(token *tokp, token_type typ, char *val,
+                              size_t len) {
+  *tokp = (token){.typ = typ, .val = val, .len = len};
+  return tokp;
+}
+
+token *expr_lex_get_next_token(lexer *lx) {
+  token *tokp = get_new_token(lx);
+  char *arg = *lx->argv++;
+
+  if (tokp == NULL)
+    return NULL;
+  if (arg == NULL)
+    return mk_token(tokp, TOK_EOF, NULL, 0);
+
+  tokp->typ = CHAR_TOKENS[(uchar)*arg];
+  if (tokp->typ != TOK_UKN)
+    return mk_token(tokp, tokp->typ, arg, strlen(arg));
+
+  for (size_t i = 0; arg[i] != '\0'; i++)
+    if (!isdigit(arg[i]))
+      goto not_digit;
+
+  return mk_token(tokp, TOK_INT, arg, strlen(arg));
+not_digit:
+  return mk_token(tokp, TOK_UKN, arg, strlen(arg));
+}