@@ -8,6 +8,14 @@ import (
8
8
"strings"
9
9
)
10
10
11
+ type scannerOptions struct {
12
+ extensions map [string ]ScannerExt
13
+ }
14
+
15
+ type ScannerOption interface {
16
+ applyScannerOptions (options * scannerOptions )
17
+ }
18
+
11
19
// Token is a lexical token of the NGINX configuration syntax.
12
20
type Token struct {
13
21
// Text is the string corresponding to the token. It could be a directive or symbol. The value is the actual token
@@ -20,6 +28,8 @@ type Token struct {
20
28
IsQuoted bool
21
29
}
22
30
31
+ func (t Token ) String () string { return fmt .Sprintf ("{%d, %s, %t}" , t .Line , t .Text , t .IsQuoted ) }
32
+
23
33
type scannerError struct {
24
34
msg string
25
35
line int
@@ -52,23 +62,33 @@ func LineNumber(err error) (int, bool) {
52
62
//
53
63
// Use NewScanner to construct a Scanner.
54
64
type Scanner struct {
55
- scanner * bufio.Scanner
56
- lineno int
57
- tokenStartLine int
58
- tokenDepth int
59
- repeateSpecialChar bool // only '}' can be repeated
60
- prev string
61
- err error
65
+ scanner * bufio.Scanner
66
+ lineno int
67
+ tokenStartLine int
68
+ tokenDepth int
69
+ repeateSpecialChar bool // only '}' can be repeated
70
+ nextTokenIsDirective bool
71
+ prev string
72
+ err error
73
+ options * scannerOptions
74
+ ext Tokenizer
62
75
}
63
76
64
77
// NewScanner returns a new Scanner to read from r.
65
- func NewScanner (r io.Reader ) * Scanner {
78
+ func NewScanner (r io.Reader , options ... ScannerOption ) * Scanner {
79
+ opts := & scannerOptions {}
80
+ for _ , opt := range options {
81
+ opt .applyScannerOptions (opts )
82
+ }
83
+
66
84
s := & Scanner {
67
- scanner : bufio .NewScanner (r ),
68
- lineno : 1 ,
69
- tokenStartLine : 1 ,
70
- tokenDepth : 0 ,
71
- repeateSpecialChar : false ,
85
+ scanner : bufio .NewScanner (r ),
86
+ lineno : 1 ,
87
+ tokenStartLine : 1 ,
88
+ tokenDepth : 0 ,
89
+ repeateSpecialChar : false ,
90
+ nextTokenIsDirective : true ,
91
+ options : opts ,
72
92
}
73
93
74
94
s .scanner .Split (bufio .ScanRunes )
@@ -92,7 +112,21 @@ func (s *Scanner) setErr(err error) {
92
112
93
113
// Scan reads the next token from source and returns it.. It returns io.EOF at the end of the source. Scanner errors are
94
114
// returned when encountered.
95
- func (s * Scanner ) Scan () (Token , error ) { //nolint: funlen, gocognit, gocyclo
115
+ func (s * Scanner ) Scan () (Token , error ) { //nolint: funlen, gocognit, gocyclo, maintidx // sorry
116
+ if s .ext != nil {
117
+ t , err := s .ext .Next ()
118
+ if err != nil {
119
+ if ! errors .Is (err , ErrTokenizerDone ) {
120
+ s .setErr (err )
121
+ return Token {}, s .err
122
+ }
123
+
124
+ s .ext = nil
125
+ } else {
126
+ return t , nil
127
+ }
128
+ }
129
+
96
130
var tok strings.Builder
97
131
98
132
lexState := skipSpace
@@ -129,6 +163,7 @@ func (s *Scanner) Scan() (Token, error) { //nolint: funlen, gocognit, gocyclo
129
163
r = nextRune
130
164
if isEOL (r ) {
131
165
s .lineno ++
166
+ s .nextTokenIsDirective = true
132
167
}
133
168
default :
134
169
readNext = true
@@ -149,6 +184,16 @@ func (s *Scanner) Scan() (Token, error) { //nolint: funlen, gocognit, gocyclo
149
184
r = "\\ " + r
150
185
}
151
186
187
+ if tok .Len () > 0 {
188
+ t := tok .String ()
189
+ if s .nextTokenIsDirective {
190
+ if ext , ok := s .options .extensions [t ]; ok {
191
+ s .ext = ext .Tokenizer (& SubScanner {parent : s , tokenLine : s .tokenStartLine }, t )
192
+ return Token {Text : t , Line : s .tokenStartLine }, nil
193
+ }
194
+ }
195
+ }
196
+
152
197
switch lexState {
153
198
case skipSpace :
154
199
if ! isSpace (r ) {
@@ -166,11 +211,13 @@ func (s *Scanner) Scan() (Token, error) { //nolint: funlen, gocognit, gocyclo
166
211
tok .WriteString (r )
167
212
lexState = inComment
168
213
s .tokenStartLine = s .lineno
214
+ s .nextTokenIsDirective = false
169
215
continue
170
216
}
171
217
}
172
218
173
219
if isSpace (r ) {
220
+ s .nextTokenIsDirective = false
174
221
return Token {Text : tok .String (), Line : s .tokenStartLine }, nil
175
222
}
176
223
@@ -179,6 +226,7 @@ func (s *Scanner) Scan() (Token, error) { //nolint: funlen, gocognit, gocyclo
179
226
tok .WriteString (r )
180
227
lexState = inVar
181
228
s .repeateSpecialChar = false
229
+ s .nextTokenIsDirective = false
182
230
continue
183
231
}
184
232
@@ -223,6 +271,7 @@ func (s *Scanner) Scan() (Token, error) { //nolint: funlen, gocognit, gocyclo
223
271
}
224
272
225
273
tok .WriteString (r )
274
+ s .nextTokenIsDirective = true
226
275
return Token {Text : tok .String (), Line : s .tokenStartLine }, nil
227
276
}
228
277
@@ -250,3 +299,51 @@ func (s *Scanner) Scan() (Token, error) { //nolint: funlen, gocognit, gocyclo
250
299
}
251
300
}
252
301
}
302
+
303
+ // ScannerExt is the interface that describes an extension for the [Scanner]. Scanner extensions enable scanning of
304
+ // configurations that contain syntaxes that do not follow the usual grammar.
305
+ type ScannerExt interface {
306
+ Tokenizer (s * SubScanner , matchedToken string ) Tokenizer
307
+ }
308
+
309
+ // ErrTokenizerDone is returned by [Tokenizer] when tokenization is complete.
310
+ var ErrTokenizerDone = errors .New ("done" )
311
+
312
+ // Tokenizer is the interface that wraps the Next method.
313
+ //
314
+ // Next returns the next token scanned from the NGINX configuration or an error if the configuration cannot be
315
+ // tokenized. Return the special error, [ErrTokenizerDone] when finished tokenizing.
316
+ type Tokenizer interface {
317
+ Next () (Token , error )
318
+ }
319
+
320
+ // LexerScanner is a compatibility layer between Lexers and Scanner.
321
+ type LexerScanner struct {
322
+ lexer Lexer
323
+ scanner * SubScanner
324
+ matchedToken string
325
+ ch <- chan NgxToken
326
+ }
327
+
328
+ func (s * LexerScanner ) Tokenizer (scanner * SubScanner , matchedtoken string ) Tokenizer {
329
+ s .scanner = scanner
330
+ s .matchedToken = matchedtoken
331
+ return s
332
+ }
333
+
334
+ func (s * LexerScanner ) Next () (Token , error ) {
335
+ if s .ch == nil {
336
+ s .ch = s .lexer .Lex (s .scanner , s .matchedToken )
337
+ }
338
+
339
+ ngxTok , ok := <- s .ch
340
+ if ! ok {
341
+ return Token {}, ErrTokenizerDone
342
+ }
343
+
344
+ if ngxTok .Error != nil {
345
+ return Token {}, newScannerErrf (ngxTok .Line , ngxTok .Error .Error ())
346
+ }
347
+
348
+ return Token {Text : ngxTok .Value , Line : ngxTok .Line , IsQuoted : ngxTok .IsQuoted }, nil
349
+ }
0 commit comments