Skip to content

Commit

Permalink
Fix memory issues in lexer and tokenise some extra stuff
Browse files Browse the repository at this point in the history
- change VAR token to NAME so it doubles as the token for functions (also tokenising those now)
- replace op tokens with BINARY_OP and UNARY_OP tokens (also parsing unary minus now)
- added null byte to end of string when reading file (lexer didn't properly know when to stop, causing invalid read)
- also made a failed (uncommitted) attempt at a parser
  • Loading branch information
tobyck committed Aug 21, 2024
1 parent e519e5a commit 94e05c1
Show file tree
Hide file tree
Showing 11 changed files with 62 additions and 95 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
build
result
experiments
old
5 changes: 2 additions & 3 deletions examples/a.bas
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
hello there!
this is some
text.
let a=abs(2+3-6)*-4
print a, "test"
3 changes: 1 addition & 2 deletions examples/e.bas
Original file line number Diff line number Diff line change
@@ -1,2 +1 @@
reM hello
REm this is a commentntntntttt
1+2*-3
4 changes: 2 additions & 2 deletions examples/s.bas
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
REM this is a remark
10 LET A=(2+3)*-4
REM another remark
LET B="missing end quotes
PRINT A, 567, "th1s i5 string"
LET B = "missing end quotes
PRINT A, 567, "th1s i5 a string"
PRINT invalid token 12.34
3 changes: 1 addition & 2 deletions examples/test.bas
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
REM this is a demo
1 let A=2
1 let A = 2
REM this is another comment
REM and so is this
REM

20 B=A+3*4 Rem 2+3*4=14
LET C=(A + 3) / a REM should be 5/2=2

66 changes: 41 additions & 25 deletions src/lexer.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,12 @@
char *stringify_token_type(TokenType token_type) {
switch (token_type) {
case TOKEN_LET: return "LET";
case TOKEN_VAR: return "VAR";
case TOKEN_EQ: return "EQ";
case TOKEN_NAME: return "NAME";
case TOKEN_ASSIGN: return "ASSIGN";
case TOKEN_INT: return "INT";
case TOKEN_ADD: return "ADD";
case TOKEN_SUB: return "SUB";
case TOKEN_MUL: return "MUL";
case TOKEN_DIV: return "DIV";
case TOKEN_MOD: return "MOD";
case TOKEN_NEGATE: return "NEGATE";
case TOKEN_BINARY_OP: return "BINARY_OP";
case TOKEN_UNARY_OP: return "UNARY_OP";
case TOKEN_OPEN_PAREN: return "OPEN_PAREN";
case TOKEN_CLOSE_PAREN: return "CLOSE_PAREN";
case TOKEN_PRINT: return "PRINT";
Expand Down Expand Up @@ -173,14 +171,17 @@ inline char peek_nth(Lexer *lexer, size_t n) { return lexer->code[lexer->current
inline char consume(Lexer *lexer) { return lexer->code[lexer->current_index++]; }

void lexer_invalid_token(Lexer *lexer, size_t line, size_t column) {
printf("whaaaat\n");
Token previous_invalid = last_token(lexer->result.invalid);

if (
lexer->result.invalid.length > 0 && // if there's already an invalid token
// and its column + length = the current column, append to previous token
// i.e. this invalid char is directly next to the previous invalid token
previous_invalid.column + strlen(previous_invalid.literal) == column
) append_char(&previous_invalid.literal, consume(lexer));
)
// append the char to the previous invalid token
append_char(&lexer->result.invalid.tokens[lexer->result.invalid.length - 1].literal, consume(lexer));

// otherwise push a new invalid token
else push_token(&lexer->result.invalid, (Token){
TOKEN_INVALID, alloc_char_as_str(consume(lexer)), line, column
Expand Down Expand Up @@ -226,19 +227,38 @@ LexerResult lex(char *code) {

// simple single char tokens

// simple case where a single char is mapped to single token with no additional info
#define simple_token_case(char, type) \
case char: \
push_token(&lexer->result.valid, (Token){ type, NULL, l, c }); \
consume(lexer); \
continue;

switch (peek(lexer)) {
simple_token_case('=', TOKEN_EQ)
simple_token_case('+', TOKEN_ADD)
simple_token_case('-', TOKEN_SUB)
simple_token_case('*', TOKEN_MUL)
simple_token_case('/', TOKEN_DIV)
simple_token_case('%', TOKEN_MOD)
case '+':
case '*':
case '/':
case '%':
push_token(&lexer->result.valid, (Token){
TOKEN_BINARY_OP, alloc_char_as_str(consume(lexer)), l, c
});
continue;
case '-': {
char *literal = alloc_char_as_str(consume(lexer));
switch (last_token(lexer->result.valid).type) {
case TOKEN_CLOSE_PAREN:
case TOKEN_INT:
case TOKEN_NAME:
push_token(&lexer->result.valid, (Token){
TOKEN_BINARY_OP, literal, l, c
});
break;
default:
push_token(&lexer->result.valid, (Token){ TOKEN_UNARY_OP, literal, l, c });
}
continue;
}
simple_token_case('=', TOKEN_ASSIGN)
simple_token_case('(', TOKEN_OPEN_PAREN)
simple_token_case(')', TOKEN_CLOSE_PAREN)
simple_token_case(',', TOKEN_COMMA)
Expand Down Expand Up @@ -306,19 +326,15 @@ LexerResult lex(char *code) {
continue;
}

// vars
// names (vars/functions)

Token previous_token = last_token(lexer->result.valid);
if (
isalpha(peek(lexer)) &&
lexer->current_index > 0 &&
!isalpha(peek_nth(lexer, 1)) &&
!isalpha(peek_nth(lexer, -1))
) {
if (isalpha(peek(lexer))) {
char *name = alloc_char_as_str(consume(lexer));
while (isalpha(peek(lexer)))
append_char(&name, consume(lexer));
push_token(&lexer->result.valid, (Token){
TOKEN_VAR, alloc_char_as_str(toupper(consume(lexer))), l, c
TOKEN_NAME, name, l, c
});
consume(lexer);
continue;
}

Expand Down
12 changes: 5 additions & 7 deletions src/lexer.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,12 @@

typedef enum {
TOKEN_LET,
TOKEN_VAR,
TOKEN_EQ,
TOKEN_NAME,
TOKEN_ASSIGN,
TOKEN_INT,
TOKEN_ADD,
TOKEN_SUB,
TOKEN_MUL,
TOKEN_DIV,
TOKEN_MOD,
TOKEN_NEGATE,
TOKEN_BINARY_OP,
TOKEN_UNARY_OP,
TOKEN_OPEN_PAREN,
TOKEN_CLOSE_PAREN,
TOKEN_PRINT,
Expand Down
8 changes: 6 additions & 2 deletions src/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@
#include <stdio.h>
#include <stdbool.h>

#include "lexer.h"
#include "utils.h"
#include "lexer.h"
#include "parser.h"

int main(int argc, char *argv[]) {
if (argc > 2) {
Expand All @@ -16,11 +17,14 @@ int main(int argc, char *argv[]) {
char *code = read_file(argv[1]);

LexerResult tokens = lex(code);

print_token_list(tokens.valid);
print_token_list(tokens.invalid);
print_lexer_errors(tokens.errors);

// AST ast = parse(tokens.valid);
// print_ast(ast);
// free_ast(ast);

free(code);
free_lexer_result(tokens);

Expand Down
1 change: 0 additions & 1 deletion src/parser.c
Original file line number Diff line number Diff line change
@@ -1 +0,0 @@
#include "parser.h"
50 changes: 0 additions & 50 deletions src/parser.h
Original file line number Diff line number Diff line change
@@ -1,50 +0,0 @@
#ifndef INCLUDE_PARSER_H
#define INCLUDE_PARSER_H

#include <stdlib.h>

typedef enum {
AST_ASSIGNMENT,
AST_EXPRESSION,
AST_STRING,
AST_PRINT
} ASTNodeType;

typedef enum {
MATH_OP_ADD,
MATH_OP_SUB,
MATH_OP_MUL,
MATH_OP_DIV,
MATH_OP_MOD,
} MathOp;

typedef struct {
struct ASTExpression *exprs;
size_t length;
} ASTExpressionList;

typedef struct ASTNode {
ASTNodeType type;
union {
struct ASTExpression {
MathOp op;
struct ASTExpression *lhs;
struct ASTExpression *rhs;
} expr;
struct ASTAssignment {
char variable;
struct ASTExpression *expr;
} assingment;
char *string;
ASTExpressionList print;
} node;
} ASTNode;

typedef struct {
ASTNode nodes;
size_t length;
} AST;

extern AST new_ast();

#endif // INCLUDE_PARSER_H
4 changes: 3 additions & 1 deletion src/utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ char *read_file(char *path) {
exit(EXIT_FAILURE);
}

char *buffer = malloc(file_length);
char *buffer = malloc(file_length + 1);

if (buffer == NULL) {
printf("Error: could not allocate buffer for file content\n");
Expand All @@ -37,6 +37,8 @@ char *read_file(char *path) {
size_t amount_read = fread(buffer, 1, file_length, file);
fclose(file);

buffer[file_length] = '\0';

if (amount_read != file_length) {
printf("Error: did not manage to read the whole file.\n");
exit(EXIT_FAILURE);
Expand Down

0 comments on commit 94e05c1

Please sign in to comment.