-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathlexer.js
139 lines (112 loc) · 4.21 KB
/
lexer.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
const WHITESPACE_CHARS = ' \t\u00A0'
const LINEEND_CHARS = '\n\r'
const PRAGMA_OPERATOR = '#'
const COMMENT_OPERATOR = '/'
const ESCAPE_OPERATOR = '\\'
const LANGKEY_ASSIGN_OPERATOR = '='
const LANGSTRING_START = '"'
const LANGSTRING_END = '"'
const LANGSTRING_END_OPERATOR = ';'
const TOKEN_PRAGMA = 'pragma'
const TOKEN_PRAGMA_VALUE = 'pragmaValue'
const TOKEN_LANGKEY = 'langkey'
const TOKEN_LANGKEY_ASSIGN = 'langkeyAssign'
const TOKEN_LANGKEY_STRING = 'langkeyString'
const TOKEN_LANGKEY_END = 'langkeyEnd'
const TOKEN_COMMENT = 'comment'
const TOKEN_INLINE_COMMENT = 'inlineComment'
const TOKEN_WHITESPACE = 'whitespace'
const TOKEN_NEWLINE = 'newline'
const token = (type, value, lineno, colno) => ({ type, value, lineno, colno })
class Lexer {
constructor(str) {
this.str = str
this.index = 0
this.len = this.str.length
this.lineno = 0
this.colno = 0
}
nextToken() {
const { lineno, colno } = this
let tok
let cur = this.current()
if (this.isFinished()) return null
if (cur === LANGSTRING_START) return token(TOKEN_LANGKEY_STRING, this.parseString(cur), lineno, colno)
if (tok = this.extract(WHITESPACE_CHARS)) return token(TOKEN_WHITESPACE, tok, lineno, colno)
if (tok = this.extract(LINEEND_CHARS)) return token(TOKEN_NEWLINE, tok, lineno, colno)
if ((cur === PRAGMA_OPERATOR) && (tok = this.extractUntil(WHITESPACE_CHARS))) return token(TOKEN_PRAGMA, tok, lineno, colno)
if ((cur === COMMENT_OPERATOR) && LINEEND_CHARS.indexOf(this.previous()) === -1 && (tok = this.extractUntil(LINEEND_CHARS))) return token(TOKEN_INLINE_COMMENT, tok, lineno, colno)
if ((cur === COMMENT_OPERATOR) && (tok = this.extractUntil(LINEEND_CHARS))) return token(TOKEN_COMMENT, tok, lineno, colno)
if (cur === LANGKEY_ASSIGN_OPERATOR) {
this.forward()
return token(TOKEN_LANGKEY_ASSIGN, cur, lineno, colno)
}
if (cur === LANGSTRING_END_OPERATOR) {
this.forward()
return token(TOKEN_LANGKEY_END, cur, lineno, colno)
}
if (tok = this.extractUntil(WHITESPACE_CHARS + LINEEND_CHARS)) {
const beforeTokChar = this.previous(tok.length + 1)
const hasWhitespaceBefore = WHITESPACE_CHARS.indexOf(beforeTokChar) !== -1
const hasLineendBefore = LINEEND_CHARS.indexOf(beforeTokChar) !== -1
this.forward()
if (hasWhitespaceBefore) return token(TOKEN_PRAGMA_VALUE, tok, lineno, colno)
if (hasLineendBefore || (hasLineendBefore && hasWhitespaceBefore)) return token(TOKEN_LANGKEY, tok, lineno, colno)
}
tok = this.extractUntil('')
console.error(`Unparsed text (${lineno}:${colno}):\n`, tok)
throw new Error('Uparsed text remained')
}
isFinished() { return this.index >= this.len }
forward() {
this.index++
if (this.previous() === '\n') {
this.lineno++
this.colno = 0
} else {
this.colno++
}
}
current() { return (!this.isFinished()) ? this.str.charAt(this.index) : 'NULL' }
previous(steps = 1) { return this.str.charAt(this.index - steps) }
next(steps = 1) { return this.str.charAt(this.index + steps) }
extract(breakingChars) { return this.extractMatching(false, breakingChars) }
extractUntil(breakingChars) { return this.extractMatching(true, breakingChars) }
extractMatching(breakOnMatch, breakingChars) {
if (this.isFinished()) return null
const getIsBreakingChar = (char) => breakingChars.indexOf(char) !== -1
let isBreakingChar = getIsBreakingChar(this.current())
let t = ''
while (((breakOnMatch && !isBreakingChar) || (!breakOnMatch && isBreakingChar)) && !this.isFinished()) {
t += this.current()
this.forward()
isBreakingChar = getIsBreakingChar(this.current())
}
return t
}
parseString() {
this.forward()
let str = ''
const getIsEndingChar = () => this.previous() !== ESCAPE_OPERATOR && this.current() === LANGSTRING_END
while (!this.isFinished() && !getIsEndingChar()) {
let cur = this.current()
str += cur
this.forward()
}
this.forward()
return str
}
}
module.exports = {
Lexer,
TOKEN_PRAGMA,
TOKEN_PRAGMA_VALUE,
TOKEN_LANGKEY,
TOKEN_LANGKEY_ASSIGN,
TOKEN_LANGKEY_STRING,
TOKEN_LANGKEY_END,
TOKEN_COMMENT,
TOKEN_INLINE_COMMENT,
TOKEN_WHITESPACE,
TOKEN_NEWLINE
}