Skip to content

Commit

Permalink
Atomize token data.
Browse files Browse the repository at this point in the history
This makes tokens easier to pass between the AST and lexer.
  • Loading branch information
dvander committed Sep 28, 2023
1 parent 4328449 commit 49de1c4
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 21 deletions.
35 changes: 22 additions & 13 deletions compiler/lexer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ Lexer::SynthesizeIncludePathToken()

if (!open_c)
open_c = '"';
tok->data = ke::StringPrintf("%c%s", open_c, name);
tok->atom = cc_.atom(ke::StringPrintf("%c%s", open_c, name));
}

/* ftoi
Expand Down Expand Up @@ -469,7 +469,7 @@ void Lexer::HandleDirectives() {
}

auto tok = PushSynthesizedToken(tSYN_PRAGMA_UNUSED, col);
tok->data = ke::Join(parts, ",");
tok->atom = cc_.atom(ke::Join(parts, ","));
} else {
error(207); /* unknown #pragma */
}
Expand Down Expand Up @@ -1002,6 +1002,7 @@ void Lexer::HandleMultiLineComment() {
}

void Lexer::packedstring(full_token_t* tok, char term) {
std::string data;
while (true) {
char c = peek();
if (c == term || c == 0)
Expand All @@ -1012,19 +1013,20 @@ void Lexer::packedstring(full_token_t* tok, char term) {
}
if (IsNewline(c))
break;
packedstring_char(tok);
packedstring_char(&data);
}
tok->atom = cc_.atom(data);
}

void Lexer::packedstring_char(full_token_t* tok) {
void Lexer::packedstring_char(std::string* data) {
bool is_codepoint;
cell ch = litchar(kLitcharUtf8, &is_codepoint);
if (ch < 0)
return;
if (is_codepoint)
UnicodeCodepointToUtf8(ch, &tok->data);
UnicodeCodepointToUtf8(ch, data);
else
tok->data.push_back(static_cast<char>(ch));
data->push_back(static_cast<char>(ch));
}

/* lex(lexvalue,lexsym) Lexical Analysis
Expand Down Expand Up @@ -1325,7 +1327,6 @@ Lexer::PushSynthesizedToken(TokenKind kind, int col)
auto tok = current_token();
tok->id = kind;
tok->value = 0;
tok->data.clear();
tok->atom = nullptr;
tok->start.line = state_.tokline;
tok->start.col = col;
Expand Down Expand Up @@ -1681,7 +1682,6 @@ bool Lexer::lex_number(full_token_t* tok) {

void Lexer::LexStringLiteral(full_token_t* tok, int flags) {
tok->id = tSTRING;
tok->data.clear();
tok->atom = nullptr;
tok->value = -1; // Catch consumers expecting automatic litadd().

Expand All @@ -1693,7 +1693,11 @@ void Lexer::LexStringLiteral(full_token_t* tok, int flags) {
error(37);
} else {
advance();
packedstring_char(tok);

std::string data;
packedstring_char(&data);
tok->atom = cc_.atom(data);

/* invalid char declaration */
if (!match_char('\''))
error(27); /* invalid character constant (must be one character) */
Expand Down Expand Up @@ -2369,24 +2373,29 @@ cell Lexer::get_utf8_char() {
}

void Lexer::LexStringContinuation() {
ke::SaveAndSet<bool> stop_recursion(&in_string_continuation_, true);

if (!peek(tELLIPS))
return;

auto initial = std::move(*current_token());
assert(initial.id == tSTRING);

ke::SaveAndSet<bool> stop_recursion(&in_string_continuation_, true);

std::string data = initial.data();
while (match(tELLIPS)) {
if (match(tCHAR_LITERAL)) {
initial.data.push_back(current_token()->value);
data.push_back(current_token()->value);
continue;
}
if (!need(tSTRING)) {
lexpush();
break;
}
initial.data += current_token()->data;
data += current_token()->data();
}

*current_token() = std::move(initial);
current_token()->atom = cc_.atom(data);
}

bool Lexer::HasMacro(sp::Atom* atom) {
Expand Down
6 changes: 4 additions & 2 deletions compiler/lexer.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,12 @@ struct token_pos_t {
struct full_token_t {
int id = 0;
int value = 0;
std::string data;
sp::Atom* atom = nullptr;
token_pos_t start;
token_pos_t end;
const std::string& data() const {
return atom->str();
}
};

#define MAX_TOKEN_DEPTH 4
Expand Down Expand Up @@ -366,7 +368,7 @@ class Lexer
void lex_float(full_token_t* tok, cell whole);
cell litchar(int flags, bool* is_codepoint = nullptr);
void packedstring(full_token_t* tok, char term);
void packedstring_char(full_token_t* tok);
void packedstring_char(std::string* data);

bool IsSkipping() const {
return skiplevel_ > 0 && (ifstack_[skiplevel_ - 1] & SKIPMODE) == SKIPMODE;
Expand Down
12 changes: 6 additions & 6 deletions compiler/parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ Parser::Parse()
case tpTRYINCLUDE: {
if (!lexer_->need(tSYN_INCLUDE_PATH))
break;
auto name = lexer_->current_token()->data;
auto name = lexer_->current_token()->data();
auto result = lexer_->PlungeFile(name.c_str() + 1, (name[0] != '<'), TRUE);
if (!result && tok != tpTRYINCLUDE) {
report(417) << name.substr(1);
Expand Down Expand Up @@ -601,7 +601,7 @@ Parser::parse_pragma_unused()
{
auto pos = lexer_->pos();

auto data = std::move(lexer_->current_token()->data);
const auto& data = lexer_->current_token()->data();
std::vector<std::string> raw_names = ke::Split(data, ",");
std::vector<sp::Atom*> names;
for (const auto& raw_name : raw_names)
Expand Down Expand Up @@ -1061,7 +1061,7 @@ Parser::constant()
case tRATIONAL:
return new FloatExpr(cc_, pos, lexer_->current_token()->value);
case tSTRING: {
const auto& str = lexer_->current_token()->data;
const auto& str = lexer_->current_token()->data();
return new StringExpr(pos, str.c_str(), str.size());
}
case tTRUE:
Expand Down Expand Up @@ -1177,7 +1177,7 @@ Parser::struct_init()
Expr* expr = nullptr;
switch (lexer_->lex()) {
case tSTRING: {
const auto& str = lexer_->current_token()->data;
const auto& str = lexer_->current_token()->data();
expr = new StringExpr(pos, str.c_str(), str.size());
break;
}
Expand Down Expand Up @@ -1218,7 +1218,7 @@ Parser::parse_static_assert()
PoolString * text = nullptr;
if (lexer_->match(',') && lexer_->need(tSTRING)) {
auto tok = lexer_->current_token();
text = new PoolString(tok->data.c_str(), tok->data.size());
text = new PoolString(tok->data().c_str(), tok->data().size());
}

lexer_->need(')');
Expand Down Expand Up @@ -1261,7 +1261,7 @@ Parser::var_init(int vclass)

if (lexer_->match(tSTRING)) {
auto tok = lexer_->current_token();
return new StringExpr(tok->start, tok->data.c_str(), tok->data.size());
return new StringExpr(tok->start, tok->data().c_str(), tok->data().size());
}

// We'll check const or symbol-ness for non-sLOCALs in the semantic pass.
Expand Down

0 comments on commit 49de1c4

Please sign in to comment.