Skip to content

Commit

Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Read assertion from file with offsets
Browse files Browse the repository at this point in the history
Akuli committed Jul 28, 2024
1 parent 28b7c18 commit cf413fb
Showing 3 changed files with 79 additions and 36 deletions.
11 changes: 10 additions & 1 deletion src/jou_compiler.h
Original file line number Diff line number Diff line change
@@ -91,7 +91,6 @@ struct Token {
TOKEN_OPERATOR,
TOKEN_END_OF_FILE, // Marks the end of an array of Token
} type;
Location location;
union {
int16_t short_value; // TOKEN_SHORT
int32_t int_value; // TOKEN_INT
@@ -102,6 +101,16 @@ struct Token {
char name[100]; // TOKEN_NAME and TOKEN_KEYWORD. Also TOKEN_DOUBLE & TOKEN_FLOAT (LLVM wants a string anyway)
char operator[4]; // TOKEN_OPERATOR
} data;

/*
Contains only the line number, not column. In some cases you can use
start_offset and end_offset to work around that.
*/
Location location;

// Number of bytes from start of file to start/end of this token.
long start_offset;
long end_offset;
};

// Constants can appear in AST and also compilation steps after AST.
90 changes: 56 additions & 34 deletions src/parse.c
Original file line number Diff line number Diff line change
@@ -725,42 +725,65 @@ static enum AstStatementKind determine_the_kind_of_a_statement_that_starts_with_
return AST_STMT_EXPRESSION_STATEMENT;
}

// TODO: this function is just bad...
static char *read_assertion_from_file(Location start, Location end)
static char *read_assertion_from_file(Location error_location, const Token *start, const Token *end)
{
assert(start.filename == end.filename);
FILE *f = fopen(start.filename, "rb");
assert(f);

char line[1024];
int lineno = 1;
while (lineno < start.lineno) {
fgets(line, sizeof line, f);
lineno++;
}
FILE *f = fopen(error_location.filename, "rb");
if (!f)
goto error;

List(char) result = {0};

long ostart, oend; // offsets within file to include
for (const Token *t = start; t < end; t++) {
assert(t->start_offset < t->end_offset);

if (t == start || t->location.lineno != t[-1].location.lineno) {
// First token of a new line
ostart = t->start_offset;
oend = t->end_offset;
} else {
// Include more tokens from the line of code so that this token is added too.
// We cannot include the entire line because it might contain comments.
assert(oend <= t->start_offset);
oend = t->end_offset;
}

List(char) str = {0};
while (lineno <= end.lineno) {
memset(line, 0, sizeof line);
fgets(line, sizeof line, f);
lineno++;

if (strstr(line, "#"))
*strstr(line, "#") = '\0';
trim_whitespace(line);
// Add spaces between the lines, but not after '(' or before ')'
if (line[0] != ')' && str.len >= 1 && str.ptr[str.len-1] != '(')
AppendStr(&str, " ");
AppendStr(&str, line);
if (t == end-1 || t[0].location.lineno != t[1].location.lineno) {
// Last token of a line. Read code from file.
char *line = malloc(oend - ostart + 1);
if (!line)
goto error;
if (fseek(f, ostart, SEEK_SET) < 0)
goto error;
if (result.len > 0)
Append(&result, '\n');
for (long i = ostart; i < oend; i++) {
int c = fgetc(f);
if (c == EOF || c == '\r' || c == '\n')
goto error;
Append(&result, (char)c);
}
}
}

fclose(f);
Append(&str, '\0');
/*
Join lines with spaces, but do not put spaces just after '(' or before ')'.
This makes multiline asserts nicer, so "assert (\n foo and bar\n)"
shows "foo and bar" as the assert condition.
*/
Append(&result, '\0');
char *p;
while ((p = strstr(result.ptr, "\n"))) {
if ((p > result.ptr && p[-1] == '(') || (p[1] == ')')) {
memmove(p, p+1, strlen(p)); // delete newline character at p
} else {
*p = ' '; // join lines with a space
}
}
return result.ptr;

if(!strncmp(str.ptr, "assert",6))
memmove(str.ptr, &str.ptr[6], strlen(&str.ptr[6]) + 1);
trim_whitespace(str.ptr);
return str.ptr;
error:
fail(error_location, "internal error: cannot read assertion text from file");
}

// does not eat a trailing newline
@@ -777,10 +800,9 @@ static AstStatement parse_oneline_statement(ParserState *ps)
} else if (is_keyword(ps->tokens, "assert")) {
ps->tokens++;
result.kind = AST_STMT_ASSERT;
Location start = ps->tokens->location;
const Token *condstart = ps->tokens;
result.data.assertion.condition = parse_expression(ps);
Location end = ps->tokens->location;
result.data.assertion.condition_str = read_assertion_from_file(start, end);
result.data.assertion.condition_str = read_assertion_from_file(result.location, condstart, ps->tokens);
} else if (is_keyword(ps->tokens, "pass")) {
ps->tokens++;
result.kind = AST_STMT_PASS;
14 changes: 13 additions & 1 deletion src/tokenize.c
Original file line number Diff line number Diff line change
@@ -420,10 +420,20 @@ static void handle_parentheses(struct State *st, const struct Token *t)
}
}

// Returns the offset of the current location as number of bytes from start of file.
static long get_offset(const struct State *st)
{
long off = ftell(st->f);
if (off < 0)
fail(st->location, "internal error: ftell() failed");

return off - st->pushback.len;
}

static Token read_token(struct State *st)
{
while(1) {
Token t = { .location = st->location };
Token t = { .location = st->location, .start_offset = get_offset(st) };
char c = read_byte(st);

switch(c) {
@@ -482,6 +492,8 @@ static Token read_token(struct State *st)
}
break;
}

t.end_offset = get_offset(st);
return t;
}
}

0 comments on commit cf413fb

Please sign in to comment.