-
Notifications
You must be signed in to change notification settings - Fork 18
/
c4.c
600 lines (571 loc) · 26 KB
/
c4.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
// c4.c - C in four functions
// char, int, and pointer types
// if, while, return, and expression statements
// just enough features to allow self-compilation and a bit more
// Written by Robert Swierczek
#include <stdio.h>
#include <stdlib.h>
#include <memory.h>
#include <unistd.h>
char *p, *lp, // current position in source code
*data; // data/bss pointer
int *e, *le, // current position in emitted code
*id, // currently parsed identifier
*sym, // symbol table (simple list of identifiers)
tk, // current token
ival, // current token value
ty, // current expression type
loc, // local variable offset
line, // current line number
src, // print source and assembly flag
debug; // print executed instructions
// tokens and classes (operators last and in precedence order)
/*
Id = identifier 用户定义的identifier
//运算符
Assign =
Cond ?
Lor ||
Lan &&
Or |
Xor ^
And &
Eq ==
Ne !=
Lt <
Gt >
Le <=
Ge >=
Shl <<
Shr >>
Add +
Sub -
Mul *
Div /
Mod %
Inc ++
Dec --
Brak [
*/
/*
id[Class] =
Num 常量数值
Fun 函数
Sys 系统调用
Glo 全局变量
Loc 局部变量
*/
enum {
Num = 128, Fun, Sys, Glo, Loc, Id,
Char, Else, Enum, If, Int, Return, Sizeof, While,
Assign, Cond, Lor, Lan, Or, Xor, And, Eq, Ne, Lt, Gt, Le, Ge, Shl, Shr, Add, Sub, Mul, Div, Mod, Inc, Dec, Brak
};
// opcodes
enum { LEA ,IMM ,JMP ,JSR ,BZ ,BNZ ,ENT ,ADJ ,LEV ,LI ,LC ,SI ,SC ,PSH ,
OR ,XOR ,AND ,EQ ,NE ,LT ,GT ,LE ,GE ,SHL ,SHR ,ADD ,SUB ,MUL ,DIV ,MOD ,
OPEN,READ,CLOS,PRTF,MALC,MSET,MCMP,EXIT };
// types
enum { CHAR, INT, PTR };
// identifier offsets (since we can't create an ident struct)
//因为作者没有实现结构体,所以[id]指向的空间被分割为Idsz大小的块(模拟结构体)
//当id指向块首时,id[0] == id[Tk] 访问的就是Tk成员的数据(一般是指针)
//Name 指向的是这个identifier的Name
//Type 为数据类型(比如返回值类型),如CHAR,INT,INT+PTR
//Class 为类型,如Num(常量数值),Fun(函数),Sys(系统调用),Glo全局变量,Loc 局部变量
enum { Tk, Hash, Name, Class, Type, Val, HClass, HType, HVal, Idsz };
//词法分析
void next()
{
char *pp;
// 用循环来忽略空白字符,不过不能被词法分析器识别的字符都被认为是空白字符 比如 '@', '$'
while (tk = *p) {
++p;
if (tk == '\n') {
if (src) { //命令行指明-s参数,输出源代码和对应字节码
printf("%d: %.*s", line, p - lp, lp);
lp = p;
while (le < e) {
printf("%8.4s", &"LEA ,IMM ,JMP ,JSR ,BZ ,BNZ ,ENT ,ADJ ,LEV ,LI ,LC ,SI ,SC ,PSH ,"
"OR ,XOR ,AND ,EQ ,NE ,LT ,GT ,LE ,GE ,SHL ,SHR ,ADD ,SUB ,MUL ,DIV ,MOD ,"
"OPEN,READ,CLOS,PRTF,MALC,MSET,MCMP,EXIT,"[*++le * 5]);
if (*le <= ADJ) printf(" %d\n", *++le); else printf("\n"); //ADJ之前的指令均有操作数
}
}
++line;
}
else if (tk == '#') {//#作为单行注释符号,处理#include等情况
while (*p != 0 && *p != '\n') ++p;
}
else if ((tk >= 'a' && tk <= 'z') || (tk >= 'A' && tk <= 'Z') || tk == '_') {
pp = p - 1; //因为有++p,pp回退一个字符,pp指向 [这个符号] 的首字母
while ((*p >= 'a' && *p <= 'z') || (*p >= 'A' && *p <= 'Z') || (*p >= '0' && *p <= '9') || *p == '_')
tk = tk * 147 + *p++;//计算哈希值,tk起始时已经等于*p,从p开始正确
tk = (tk << 6) + (p - pp); //哈希值加上[符号长度]
id = sym;
while (id[Tk]) {
if (tk == id[Hash] && !memcmp((char *)id[Name], pp, p - pp)) {
//找到同名,则tk = id[Tk] (将id看做结构体,访问其Tk成员,解释见上)
tk = id[Tk];
return;
}
id = id + Idsz;//继续循环identifier表
}
//找不到,发现新identifier,此时id已经指向新的identifier结构
id[Name] = (int)pp;
id[Hash] = tk;//哈希值
tk = id[Tk] = Id; //token 类型为identifier
return;
}
else if (tk >= '0' && tk <= '9') {// 第一位为数字,认为是数值
if (ival = tk - '0') { while (*p >= '0' && *p <= '9') ival = ival * 10 + *p++ - '0'; }//第一位不为0,认为是十进制数
else if (*p == 'x' || *p == 'X') {//第一位为0,且以x开头,认为是16进制数
while ((tk = *++p) && ((tk >= '0' && tk <= '9') || (tk >= 'a' && tk <= 'f') || (tk >= 'A' && tk <= 'F')))
ival = ival * 16 + (tk & 15) + (tk >= 'A' ? 9 : 0);
}
else { while (*p >= '0' && *p <= '7') ival = ival * 8 + *p++ - '0'; }//认为是八进制数
tk = Num;//token 为数值型,返回
return;
}
else if (tk == '/') {
if (*p == '/') {//两个'/'开头,单行注释
++p;
while (*p != 0 && *p != '\n') ++p;
}
else {
tk = Div; //除法
return;
}
}
else if (tk == '\'' || tk == '"') {//引号开头,认为是字符(串)
pp = data;
while (*p != 0 && *p != tk) {//直到找到匹配的引号为止;p == 0 是什么情况没看懂;
if ((ival = *p++) == '\\') {
if ((ival = *p++) == 'n') ival = '\n';// '\n' 认为是'\n' 其他直接忽略'\'转义
}
if (tk == '"') *data++ = ival;//如果是双引号,认为是字符串,向data拷贝字符
}
++p;
if (tk == '"') ival = (int)pp; else tk = Num;//双引号则ival指向data中字符串开始,单引号则认为是数字
return;
}
else if (tk == '=') { if (*p == '=') { ++p; tk = Eq; } else tk = Assign; return; }//等于,赋值
else if (tk == '+') { if (*p == '+') { ++p; tk = Inc; } else tk = Add; return; }//加,自增
else if (tk == '-') { if (*p == '-') { ++p; tk = Dec; } else tk = Sub; return; }//减,自减
else if (tk == '!') { if (*p == '=') { ++p; tk = Ne; } return; }//不等于
else if (tk == '<') { if (*p == '=') { ++p; tk = Le; } else if (*p == '<') { ++p; tk = Shl; } else tk = Lt; return; }// <,<=, <<
else if (tk == '>') { if (*p == '=') { ++p; tk = Ge; } else if (*p == '>') { ++p; tk = Shr; } else tk = Gt; return; }//>,>= >>
else if (tk == '|') { if (*p == '|') { ++p; tk = Lor; } else tk = Or; return; }//逻辑或,或
else if (tk == '&') { if (*p == '&') { ++p; tk = Lan; } else tk = And; return; }
else if (tk == '^') { tk = Xor; return; }
else if (tk == '%') { tk = Mod; return; }
else if (tk == '*') { tk = Mul; return; }
else if (tk == '[') { tk = Brak; return; }
else if (tk == '?') { tk = Cond; return; }
else if (tk == '~' || tk == ';' || tk == '{' || tk == '}' || tk == '(' || tk == ')' || tk == ']' || tk == ',' || tk == ':') return;//不做处理
}
}
//表达式分析
//lev表示运算符,因为各个运算符token是按照优先级生序排列的,所以lev大表示优先级高
void expr(int lev)
{
int t, *d;
if (!tk) { printf("%d: unexpected eof in expression\n", line); exit(-1); }
else if (tk == Num) { *++e = IMM; *++e = ival; next(); ty = INT; } //直接取立即数作为表达式值
else if (tk == '"') { //字符串
*++e = IMM; *++e = ival; next();
while (tk == '"') next();//连续的'"' 处理C风格多行文本 比如["abc" "def"]
data = (char *)((int)data + sizeof(int) & -sizeof(int));//字节对齐到int
ty = PTR;
}
else if (tk == Sizeof) {
next(); if (tk == '(') next(); else { printf("%d: open paren expected in sizeof\n", line); exit(-1); }
ty = INT; if (tk == Int) next(); else if (tk == Char) { next(); ty = CHAR; }
while (tk == Mul) { next(); ty = ty + PTR; }//多级指针,每多一级加PTR
if (tk == ')') next(); else { printf("%d: close paren expected in sizeof\n", line); exit(-1); }
*++e = IMM; *++e = (ty == CHAR) ? sizeof(char) : sizeof(int);//除了char是一字节,int和多级指针都是int大小
ty = INT;
}
else if (tk == Id) {//identifier
d = id; next();
if (tk == '(') {//函数
next();
t = 0;//形参个数
while (tk != ')') { expr(Assign); *++e = PSH; ++t; if (tk == ',') next(); }//计算实参的值,压栈(传参)
next();
if (d[Class] == Sys) *++e = d[Val]; //系统调用,如malloc,memset,d[val]为opcode
else if (d[Class] == Fun) { *++e = JSR; *++e = d[Val]; } //用户定义函数,d[Val]为函数入口地址
else { printf("%d: bad function call\n", line); exit(-1); }
if (t) { *++e = ADJ; *++e = t; }//因为用栈传参,调整栈
ty = d[Type];//函数返回值类型
}
else if (d[Class] == Num) { *++e = IMM; *++e = d[Val]; ty = INT; }//d[Class] == Num,处理枚举(只有枚举是Class==Num)
else {//变量
//变量先取地址然后再LC/LI
if (d[Class] == Loc) { *++e = LEA; *++e = loc - d[Val]; }//取地址,d[Val]是局部变量偏移量
else if (d[Class] == Glo) { *++e = IMM; *++e = d[Val]; }//取地址,d[Val] 是全局变量指针
else { printf("%d: undefined variable\n", line); exit(-1); }
*++e = ((ty = d[Type]) == CHAR) ? LC : LI;
}
}
else if (tk == '(') {
next();
if (tk == Int || tk == Char) {//强制类型转换
t = (tk == Int) ? INT : CHAR; next();
while (tk == Mul) { next(); t = t + PTR; }//指针
if (tk == ')') next(); else { printf("%d: bad cast\n", line); exit(-1); }
expr(Inc); //高优先级
ty = t;
}
else { //一般语法括号
expr(Assign);
if (tk == ')') next(); else { printf("%d: close paren expected\n", line); exit(-1); }
}
}
else if (tk == Mul) {//取指针指向值
next(); expr(Inc);//高优先级
if (ty > INT) ty = ty - PTR; else { printf("%d: bad dereference\n", line); exit(-1); }
*++e = (ty == CHAR) ? LC : LI;
}
else if (tk == And) {//&,取地址操作
next(); expr(Inc);
if (*e == LC || *e == LI) --e;//根据上面的代码,token为变量时都是先取地址再LI/LC,所以--e就变成了取地址到a
else { printf("%d: bad address-of\n", line); exit(-1); }
ty = ty + PTR;
}
else if (tk == '!') { next(); expr(Inc); *++e = PSH; *++e = IMM; *++e = 0; *++e = EQ; ty = INT; }//!x相当于 x==0
else if (tk == '~') { next(); expr(Inc); *++e = PSH; *++e = IMM; *++e = -1; *++e = XOR; ty = INT; }//~x 相当于x ^ -1
else if (tk == Add) { next(); expr(Inc); ty = INT; }
else if (tk == Sub) {
next(); *++e = IMM;
if (tk == Num) { *++e = -ival; next(); } //数值,取负
else { *++e = -1; *++e = PSH; expr(Inc); *++e = MUL; }//乘-1
ty = INT;
}
else if (tk == Inc || tk == Dec) {//处理++x,--x//x--,x++在后面处理
t = tk; next(); expr(Inc);
//处理++x,--x
if (*e == LC) { *e = PSH; *++e = LC; }//地址压栈(下面SC/SI用到),再取数
else if (*e == LI) { *e = PSH; *++e = LI; }
else { printf("%d: bad lvalue in pre-increment\n", line); exit(-1); }
*++e = PSH;//将数值压栈
*++e = IMM; *++e = (ty > PTR) ? sizeof(int) : sizeof(char);//指针则加减一字,否则加减1
*++e = (t == Inc) ? ADD : SUB;//运算
*++e = (ty == CHAR) ? SC : SI;//存回变量
}
else { printf("%d: bad expression\n", line); exit(-1); }
//爬山法
//tk为ASCII码的都不会超过Num=128
while (tk >= lev) { // "precedence climbing" or "Top Down Operator Precedence" method
t = ty;//ty在递归过程中可能会改变,所以备份当前处理的表达式类型
if (tk == Assign) { //赋值
next();
if (*e == LC || *e == LI) *e = PSH; //左边被tk=Id中变量部分处理过了,将地址压栈
else { printf("%d: bad lvalue in assignment\n", line); exit(-1); }
expr(Assign); *++e = ((ty = t) == CHAR) ? SC : SI;//取得右值expr的值,作为a=expr的结果
}
else if (tk == Cond) {//x?a:b和if类似,除了不能没有else
next();
*++e = BZ; d = ++e;
expr(Assign);
if (tk == ':') next(); else { printf("%d: conditional missing colon\n", line); exit(-1); }
*d = (int)(e + 3); *++e = JMP; d = ++e;
expr(Cond);
*d = (int)(e + 1);
}
else if (tk == Lor) { next(); *++e = BNZ; d = ++e; expr(Lan); *d = (int)(e + 1); ty = INT; }//短路,逻辑Or运算符左边为true则表达式为true,不用计算运算符右侧的值
else if (tk == Lan) { next(); *++e = BZ; d = ++e; expr(Or); *d = (int)(e + 1); ty = INT; }//短路,逻辑And,同上
else if (tk == Or) { next(); *++e = PSH; expr(Xor); *++e = OR; ty = INT; }//将当前值Push,计算运算符右边值,再与当前值(在栈中)做运算;
else if (tk == Xor) { next(); *++e = PSH; expr(And); *++e = XOR; ty = INT; }//expr中lev指明递归函数中最结合性不得低于哪一个运算符
else if (tk == And) { next(); *++e = PSH; expr(Eq); *++e = AND; ty = INT; }
else if (tk == Eq) { next(); *++e = PSH; expr(Lt); *++e = EQ; ty = INT; }
else if (tk == Ne) { next(); *++e = PSH; expr(Lt); *++e = NE; ty = INT; }
else if (tk == Lt) { next(); *++e = PSH; expr(Shl); *++e = LT; ty = INT; }
else if (tk == Gt) { next(); *++e = PSH; expr(Shl); *++e = GT; ty = INT; }
else if (tk == Le) { next(); *++e = PSH; expr(Shl); *++e = LE; ty = INT; }
else if (tk == Ge) { next(); *++e = PSH; expr(Shl); *++e = GE; ty = INT; }
else if (tk == Shl) { next(); *++e = PSH; expr(Add); *++e = SHL; ty = INT; }
else if (tk == Shr) { next(); *++e = PSH; expr(Add); *++e = SHR; ty = INT; }
else if (tk == Add) {
next(); *++e = PSH; expr(Mul);
if ((ty = t) > PTR) { *++e = PSH; *++e = IMM; *++e = sizeof(int); *++e = MUL; }//处理指针
*++e = ADD;
}
else if (tk == Sub) {
next(); *++e = PSH; expr(Mul);
if (t > PTR && t == ty) { *++e = SUB; *++e = PSH; *++e = IMM; *++e = sizeof(int); *++e = DIV; ty = INT; }//指针相减
else if ((ty = t) > PTR) { *++e = PSH; *++e = IMM; *++e = sizeof(int); *++e = MUL; *++e = SUB; }//指针减数值
else *++e = SUB;
}
else if (tk == Mul) { next(); *++e = PSH; expr(Inc); *++e = MUL; ty = INT; }
else if (tk == Div) { next(); *++e = PSH; expr(Inc); *++e = DIV; ty = INT; }
else if (tk == Mod) { next(); *++e = PSH; expr(Inc); *++e = MOD; ty = INT; }
else if (tk == Inc || tk == Dec) {//处理x++,x--
if (*e == LC) { *e = PSH; *++e = LC; }
else if (*e == LI) { *e = PSH; *++e = LI; }
else { printf("%d: bad lvalue in post-increment\n", line); exit(-1); }
*++e = PSH; *++e = IMM; *++e = (ty > PTR) ? sizeof(int) : sizeof(char);
*++e = (tk == Inc) ? ADD : SUB;//先自增/自减
*++e = (ty == CHAR) ? SC : SI;//存到内存里
*++e = PSH; *++e = IMM; *++e = (ty > PTR) ? sizeof(int) : sizeof(char);
*++e = (tk == Inc) ? SUB : ADD;//再相反操作,保证后自增/自减不影响这次表达式的求值
//PS:我终于知道哪些a=1;b=a+++a++为啥等于3了
next();
}
else if (tk == Brak) {//数组下标
next(); *++e = PSH; expr(Assign);//保存数组指针, 计算下标
if (tk == ']') next(); else { printf("%d: close bracket expected\n", line); exit(-1); }
if (t > PTR) { *++e = PSH; *++e = IMM; *++e = sizeof(int); *++e = MUL; } //t==PTR时是Char,Char = 0
else if (t < PTR) { printf("%d: pointer type expected\n", line); exit(-1); }
*++e = ADD;
*++e = ((ty = t - PTR) == CHAR) ? LC : LI;
}
else { printf("%d: compiler error tk=%d\n", line, tk); exit(-1); }
}
}
//分析函数中除了声明之外的部分,语法分析
void stmt()
{
int *a, *b;
if (tk == If) {
next();
if (tk == '(') next(); else { printf("%d: open paren expected\n", line); exit(-1); }
expr(Assign);
if (tk == ')') next(); else { printf("%d: close paren expected\n", line); exit(-1); }
*++e = BZ; //branch if zero
b = ++e;//branch address pointer
stmt(); // 继续分析
if (tk == Else) {
*b = (int)(e + 3); // e + 3 位置是else 起始位置
*++e = JMP; // if 语句 else 之前插入 JMP 跳过Else 部分
b = ++e; // JMP目标
next();
stmt();//分析else 部分
}
*b = (int)(e + 1);//if 语句结束,无论是if BZ 跳转目标还是 else 之前的JMP的跳转目标
}
else if (tk == While) {//循环
next();
a = e + 1; // While 循环体起始地址
if (tk == '(') next(); else { printf("%d: open paren expected\n", line); exit(-1); }
expr(Assign);
if (tk == ')') next(); else { printf("%d: close paren expected\n", line); exit(-1); }
*++e = BZ; b = ++e;//b = While 语句结束后地址
stmt();//处理While 语句体
*++e = JMP; *++e = (int)a;//无条件跳转到While语句开始(包括循环条件的代码),实现循环
*b = (int)(e + 1);//BZ跳转目标(循环结束)
}
else if (tk == Return) {
next();
if (tk != ';') expr(Assign);//计算返回值
*++e = LEV;
if (tk == ';') next(); else { printf("%d: semicolon expected\n", line); exit(-1); }//What's this?
}
else if (tk == '{') {//复合语句
next();
while (tk != '}') stmt();
next();
}
else if (tk == ';') {
next();
}
else {
expr(Assign);//一般的语句认为是赋值语句/表达式
if (tk == ';') next(); else { printf("%d: semicolon expected\n", line); exit(-1); }
}
}
int main(int argc, char **argv)
{
//fd file descriptor 文件描述
//bt basetype
//poolsz 一系列池的大小
int fd, bt, ty, poolsz, *idmain;
int *pc, *sp, *bp, a, cycle; // vm registers 虚拟器的寄存器
int i, *t; // temps
--argc; ++argv;
if (argc > 0 && **argv == '-' && (*argv)[1] == 's') { src = 1; --argc; ++argv; }
if (argc > 0 && **argv == '-' && (*argv)[1] == 'd') { debug = 1; --argc; ++argv; }
if (argc < 1) { printf("usage: c4 [-s] [-d] file ...\n"); return -1; }
if ((fd = open(*argv, 0)) < 0) { printf("could not open(%s)\n", *argv); return -1; }//打开文件
poolsz = 256*1024; // arbitrary size
if (!(sym = malloc(poolsz))) { printf("could not malloc(%d) symbol area\n", poolsz); return -1; }//符号表
if (!(le = e = malloc(poolsz))) { printf("could not malloc(%d) text area\n", poolsz); return -1; }//// current position in emitted code
if (!(data = malloc(poolsz))) { printf("could not malloc(%d) data area\n", poolsz); return -1; }//数据段
if (!(sp = malloc(poolsz))) { printf("could not malloc(%d) stack area\n", poolsz); return -1; }//栈
memset(sym, 0, poolsz);
memset(e, 0, poolsz);
memset(data, 0, poolsz);
// 用词法分析器先把这些关键词放进符号表
p = "char else enum if int return sizeof while "
"open read close printf malloc memset memcmp exit void main";
// 把关键词加进去,id[Tk]修改为和Enum一致
i = Char; while (i <= While) { next(); id[Tk] = i++; } // add keywords to symbol table
//把[库]里定义的符号(系统函数等) 加进去 Class 赋值为Sys
i = OPEN; while (i <= EXIT) { next(); id[Class] = Sys; id[Type] = INT; id[Val] = i++; } // add library to symbol table
// void 认为是char
next(); id[Tk] = Char; // handle void type
// 记录main函数的符号id
next(); idmain = id; // keep track of main
if (!(lp = p = malloc(poolsz))) { printf("could not malloc(%d) source area\n", poolsz); return -1; }
if ((i = read(fd, p, poolsz-1)) <= 0) { printf("read() returned %d\n", i); return -1; }
p[i] = 0;//字符串结尾置0
close(fd);
// parse declarations
line = 1;
next();
while (tk) {
bt = INT; // basetype
if (tk == Int) next(); //已经有bt == INT
else if (tk == Char) { next(); bt = CHAR; }//char 变量;
else if (tk == Enum) {
next();
if (tk != '{') next(); // 似乎忽略了枚举名,例如 enum xxx{}
if (tk == '{') {
next();
i = 0; //Enum 默认从0 开始
while (tk != '}') {
if (tk != Id) { printf("%d: bad enum identifier %d\n", line, tk); return -1; } //不是Identifier 就出错
next();
if (tk == Assign) { // 发现赋值语句 如 enum { Num = 128 };
next();
if (tk != Num) { printf("%d: bad enum initializer\n", line); return -1; }
i = ival;
next();
}
//id 已经由 next 函数处理过
id[Class] = Num; id[Type] = INT; id[Val] = i++;
if (tk == ',') next();
}
next();
}
}
//Enum 处理完tk == ';', 略过下面
while (tk != ';' && tk != '}') {
ty = bt;//type 类型
while (tk == Mul) { next(); ty = ty + PTR; } // tk == Mul 表示已*开头,为指针类型,类型加PTR表示何种类型的指针
if (tk != Id) { printf("%d: bad global declaration\n", line); return -1; }
if (id[Class]) { printf("%d: duplicate global definition\n", line); return -1; } //重复全局变量定义,解释见后
next();
id[Type] = ty; //赋值类型
if (tk == '(') { // 函数
id[Class] = Fun;//类型为函数型
id[Val] = (int)(e + 1); //函数指针? 在字节码中的偏移量/地址
next(); i = 0;
while (tk != ')') {//参数列表
ty = INT;
if (tk == Int) next();
else if (tk == Char) { next(); ty = CHAR; }
while (tk == Mul) { next(); ty = ty + PTR; }
if (tk != Id) { printf("%d: bad parameter declaration\n", line); return -1; }
if (id[Class] == Loc) { printf("%d: duplicate parameter definition\n", line); return -1; } //函数参数是局部变量
//备份符号信息,要进入函数上下文了
id[HClass] = id[Class]; id[Class] = Loc;
id[HType] = id[Type]; id[Type] = ty;
id[HVal] = id[Val]; id[Val] = i++;//局部变量编号
next();
if (tk == ',') next();
}
next();
if (tk != '{') { printf("%d: bad function definition\n", line); return -1; }
loc = ++i; //局部变量偏移量
next();
while (tk == Int || tk == Char) { //函数内变量声明
bt = (tk == Int) ? INT : CHAR;
next();
while (tk != ';') {
ty = bt;
while (tk == Mul) { next(); ty = ty + PTR; }//处理指针型
if (tk != Id) { printf("%d: bad local declaration\n", line); return -1; }
if (id[Class] == Loc) { printf("%d: duplicate local definition\n", line); return -1; }
//备份符号信息
id[HClass] = id[Class]; id[Class] = Loc;
id[HType] = id[Type]; id[Type] = ty;
id[HVal] = id[Val]; id[Val] = ++i; //存储变量偏移量
next();
if (tk == ',') next();
}
next();
}
*++e = ENT; *++e = i - loc;//函数局部变量数目
while (tk != '}') stmt();//语法分析?
*++e = LEV;//函数返回
id = sym; // unwind symbol table locals
while (id[Tk]) {
//恢复符号信息
if (id[Class] == Loc) {
id[Class] = id[HClass];
id[Type] = id[HType];
id[Val] = id[HVal];
}
id = id + Idsz;
}
}
else {
id[Class] = Glo;//全局变量
id[Val] = (int)data;//给全局变量在data段分配内存
data = data + sizeof(int);
}
if (tk == ',') next();
}
next();
}
if (!(pc = (int *)idmain[Val])) { printf("main() not defined\n"); return -1; }
if (src) return 0;
// setup stack
sp = (int *)((int)sp + poolsz);
*--sp = EXIT; // call exit if main returns
*--sp = PSH; t = sp;
*--sp = argc;
*--sp = (int)argv;
*--sp = (int)t;
// run...
cycle = 0;
while (1) {
i = *pc++; ++cycle;
if (debug) {
printf("%d> %.4s", cycle,
&"LEA ,IMM ,JMP ,JSR ,BZ ,BNZ ,ENT ,ADJ ,LEV ,LI ,LC ,SI ,SC ,PSH ,"
"OR ,XOR ,AND ,EQ ,NE ,LT ,GT ,LE ,GE ,SHL ,SHR ,ADD ,SUB ,MUL ,DIV ,MOD ,"
"OPEN,READ,CLOS,PRTF,MALC,MSET,MCMP,EXIT,"[i * 5]);
if (i <= ADJ) printf(" %d\n", *pc); else printf("\n");
}
if (i == LEA) a = (int)(bp + *pc++); // load local address
else if (i == IMM) a = *pc++; // load global address or immediate
else if (i == JMP) pc = (int *)*pc; // jump
else if (i == JSR) { *--sp = (int)(pc + 1); pc = (int *)*pc; } // jump to subroutine
else if (i == BZ) pc = a ? pc + 1 : (int *)*pc; // branch if zero
else if (i == BNZ) pc = a ? (int *)*pc : pc + 1; // branch if not zero
else if (i == ENT) { *--sp = (int)bp; bp = sp; sp = sp - *pc++; } // enter subroutine
else if (i == ADJ) sp = sp + *pc++; // stack adjust
else if (i == LEV) { sp = bp; bp = (int *)*sp++; pc = (int *)*sp++; } // leave subroutine
else if (i == LI) a = *(int *)a; // load int
else if (i == LC) a = *(char *)a; // load char
else if (i == SI) *(int *)*sp++ = a; // store int
else if (i == SC) a = *(char *)*sp++ = a; // store char
else if (i == PSH) *--sp = a; // push
else if (i == OR) a = *sp++ | a;
else if (i == XOR) a = *sp++ ^ a;
else if (i == AND) a = *sp++ & a;
else if (i == EQ) a = *sp++ == a;
else if (i == NE) a = *sp++ != a;
else if (i == LT) a = *sp++ < a;
else if (i == GT) a = *sp++ > a;
else if (i == LE) a = *sp++ <= a;
else if (i == GE) a = *sp++ >= a;
else if (i == SHL) a = *sp++ << a;
else if (i == SHR) a = *sp++ >> a;
else if (i == ADD) a = *sp++ + a;
else if (i == SUB) a = *sp++ - a;
else if (i == MUL) a = *sp++ * a;
else if (i == DIV) a = *sp++ / a;
else if (i == MOD) a = *sp++ % a;
else if (i == OPEN) a = open((char *)sp[1], *sp);
else if (i == READ) a = read(sp[2], (char *)sp[1], *sp);
else if (i == CLOS) a = close(*sp);
else if (i == PRTF) { t = sp + pc[1]; a = printf((char *)t[-1], t[-2], t[-3], t[-4], t[-5], t[-6]); }
else if (i == MALC) a = (int)malloc(*sp);
else if (i == MSET) a = (int)memset((char *)sp[2], sp[1], *sp);
else if (i == MCMP) a = memcmp((char *)sp[2], (char *)sp[1], *sp);
else if (i == EXIT) { printf("exit(%d) cycle = %d\n", *sp, cycle); return *sp; }
else { printf("unknown instruction = %d! cycle = %d\n", i, cycle); return -1; }
}
}