-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtokenize.c
156 lines (139 loc) · 3.85 KB
/
tokenize.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
///////////
//Tokenizer
///////////
#include "mycc.h"
//Input program
char *user_input;
//Current token
Token *token;
//Raise error
void error(char *fmt, ...){
va_list ap; //Variable arguments
va_start(ap, fmt); //Start dealing with variable arguments
vfprintf(stderr, fmt, ap);
fprintf(stderr, "\n");
exit(1);
}
//Show error location when raisng error
void error_at(char *loc, char *fmt, ...){
va_list ap;
va_start(ap, fmt);
int pos = loc - user_input;
fprintf(stderr, "%s\n", user_input);
fprintf(stderr, "%*s", pos, " "); //Output #pos spaces
fprintf(stderr, "^ ");
vfprintf(stderr, fmt, ap);
fprintf(stderr, "\n");
exit(1);
}
//When the current token is the same as the expected symbol, move one step.
//Return true when moving one step. Otherwise, return false.
bool consume(char *op){
if(token->kind != TK_RESERVED || strlen(op) != token->len || memcmp(token->str, op, token->len)){
return false;
}
token = token->next;
return true;
}
//When the current token is the same as the expected identifier, move one step.
//Return true when moving one step. Otherwise, return false.
Token *consume_ident(){
if(token->kind != TK_IDENT){
return NULL;
}
Token *t = token;
token = token->next;
return t;
}
//When the current token is the same as the expected symbol, move one step.
//Otherwise, raise error
void expect(char *op){
if(token->kind != TK_RESERVED || strlen(op) != token->len || memcmp(token->str, op, token->len)){
error_at(token->str, "expected '%c'", op);
}
token = token->next;
}
//When the current token is TK_NUM, move one step.
//When moving one step, return the integer value. Otherwise, raise error.
int expect_number(){
if(token->kind != TK_NUM){
error_at(token->str, "expected a number");
}
int val = token->val;
token = token->next;
return val;
}
//Return true when current token is TK_EOF
bool at_eof(){
return token->kind == TK_EOF;
}
//Make a new token and connect to cur
Token *new_token(TokenKind kind, Token *cur, char *str, int len){
Token *tok = calloc(1, sizeof(Token));
tok->kind = kind;
tok->str = str;
tok->len = len;
cur->next = tok;
return tok;
}
//Print tokens for debug
void print_tokens(){
printf("# ");
Token *tmp = token;
while(tmp != NULL){
if(tmp->kind == TK_NUM){
printf("%d -> ", tmp->val);
}
else if(tmp->kind == TK_RESERVED){
printf("%c -> ", tmp->str[0]);
}
else if(tmp->kind == TK_EOF){
printf("EOF\n");
}
tmp = tmp->next;
}
}
//Judge whether p starts with q
bool startswith(char *p, char *q){
return memcmp(p, q, strlen(q)) == 0;
}
//Tokenize input sequence
Token *tokenize(){
char *p = user_input;
Token head;
head.next = NULL;
Token *cur = &head; //Pointer for the last token
while(*p){
//Skip space
if(isspace(*p)){
p++;
}
//Multi-letter symbol
//Multi-letter check should precede Single-letter check
else if(startswith(p, "==") || startswith(p, "!=") || startswith(p, "<=") || startswith(p, ">=")){
cur = new_token(TK_RESERVED, cur, p, 2);
p += 2;
}
//Single-letter symbol
else if(strchr("+-*/()<>;=", *p)){
cur = new_token(TK_RESERVED, cur, p++, 1);
}
//Integer
else if(isdigit(*p)){
cur = new_token(TK_NUM, cur, p, 0);
char *q = p;
cur->val = strtol(p, &p, 10);
cur->len = p - q;
}
//Identifiers
else if('a' <= *p && *p <= 'z'){
cur = new_token(TK_IDENT, cur, p++, 1);
}
//Error
else{
error_at(p, "Unable to tokenize");
}
}
new_token(TK_EOF, cur, p, 0);
return head.next;
}