Skip to content

Commit

Permalink
Setup expr tokeinizer
Browse files Browse the repository at this point in the history
  • Loading branch information
Sigmanificient committed Apr 20, 2024
1 parent 6c35700 commit 2275975
Show file tree
Hide file tree
Showing 4 changed files with 182 additions and 4 deletions.
1 change: 1 addition & 0 deletions src/expr/Makefile
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
OUT := expr

SRC := expr.c
SRC += expr_lexer.c

include ../shared.mk
50 changes: 46 additions & 4 deletions src/expr/expr.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,48 @@
#include <errno.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int main(void) {
printf("Hello, World!\n");
return 0;
}
#include "expr.h"

#define NAME "expr (canoutils)"
#define VERSION "1.0.0"
#define AUTHOR "Yohann Boniface (Sigmanificient)"

#include "version_info.h"

static char const *TOKEN_REPR[] = {
[TOK_UKN] = "???", [TOK_INT] = "int", [TOK_ADD] = "add", [TOK_SUB] = "sub",
[TOK_MUL] = "mul", [TOK_DIV] = "div", [TOK_EOF] = "eof", [TOK_WIP] = "wip",
};

static bool expr_run(char **argv) {
token *tokp;
lexer lex = {.argv = argv};

do {
tokp = expr_lex_get_next_token(&lex);
if (tokp == NULL) {
fprintf(stderr, "Failed to tokeinize: %s\n", strerror(errno));
free(lex.tokens);
return false;
}
printf("T[%s](%.*s)\n", TOKEN_REPR[tokp->typ], (int)tokp->len, tokp->val);
} while (tokp->typ != TOK_EOF);
free(lex.tokens);
return true;
}

int main(int argc, char **argv) {
for (int i = 0; argv[i] != NULL; i++)
if (!strcmp(argv[i], "--version")) {
print_version();
return EXIT_SUCCESS;
}
if (argc < 2) {
fprintf(stderr, "expr: missing operand\n");
return EXIT_FAILURE;
}
return expr_run(&argv[1]) ? EXIT_SUCCESS : EXIT_FAILURE;
}
77 changes: 77 additions & 0 deletions src/expr/expr.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
#ifndef EXPR_H
#define EXPR_H

#include <stddef.h>

#define FIRST_TOKEN_BATCH_SIZE 64

typedef unsigned char uchar;

typedef enum {
TOK_UKN = 0,
TOK_INT,
TOK_ADD,
TOK_SUB,
TOK_MUL,
TOK_DIV,
TOK_EOF,
TOK_WIP,
} token_type;

typedef struct {
token_type typ;
char *val;
size_t len;
} token;

typedef struct {
char **argv;
char prev;
token *tokens;
size_t token_count;
size_t token_capacity;
} lexer;

typedef struct {
token *tok;
lexer *lx;
} parser;

typedef enum {
AST_ANY,
AST_UNARY,
AST_BINOP,
AST_NUM,
AST_COUNT,
} ast_type;

union ast_;
typedef union ast_ {
struct {
token *tok;
ast_type typ;
} any;

struct {
token *tok;
ast_type typ;
union ast_ *next;
} unary;

struct {
token *tok;
ast_type typ;
union ast_ *next;
union ast_ *prev;
} binop;

struct {
token *tok;
ast_type typ;
int val;
} num;
} ast;

token *expr_lex_get_next_token(lexer *lx);

#endif
58 changes: 58 additions & 0 deletions src/expr/expr_lexer.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#include <ctype.h>
#include <limits.h>
#include <stdlib.h>
#include <string.h>

#include "expr.h"

static token_type CHAR_TOKENS[UCHAR_MAX] = {
[(uchar)'+'] = TOK_ADD,
[(uchar)'-'] = TOK_SUB,
[(uchar)'*'] = TOK_MUL,
[(uchar)'/'] = TOK_DIV,
};

static token *get_new_token(lexer *lx) {
size_t new_capacity;
token *new;

if (lx->token_count < lx->token_capacity)
return &lx->tokens[lx->token_count++];

new_capacity = (lx->token_capacity == 0) ? FIRST_TOKEN_BATCH_SIZE
: lx->token_capacity << 1;
new = realloc(lx->tokens, new_capacity * sizeof *lx->tokens);
if (new == NULL)
return NULL;
lx->tokens = new;
lx->token_capacity = new_capacity;
return get_new_token(lx);
}

static inline token *mk_token(token *tokp, token_type typ, char *val,
size_t len) {
*tokp = (token){.typ = typ, .val = val, .len = len};
return tokp;
}

token *expr_lex_get_next_token(lexer *lx) {
token *tokp = get_new_token(lx);
char *arg = *lx->argv++;

if (tokp == NULL)
return NULL;
if (arg == NULL)
return mk_token(tokp, TOK_EOF, NULL, 0);

tokp->typ = CHAR_TOKENS[(uchar)*arg];
if (tokp->typ != TOK_UKN)
return mk_token(tokp, tokp->typ, arg, strlen(arg));

for (size_t i = 0; arg[i] != '\0'; i++)
if (!isdigit(arg[i]))
goto not_digit;

return mk_token(tokp, TOK_INT, arg, strlen(arg));
not_digit:
return mk_token(tokp, TOK_UKN, arg, strlen(arg));
}

0 comments on commit 2275975

Please sign in to comment.