nginxinc · ornj · Oct 17, 2023 · Jan 31, 2024 · Apr 4, 2024 · Jul 5, 2024
@@ -65,6 +65,7 @@ type LexOptions struct {
 // RegisterLexer is an option that cna be used to add a lexer to tokenize external NGINX tokens.
 type RegisterLexer interface {
 	applyLexOptions(options *LexOptions)
+	applyScannerOptions(options *scannerOptions)
 }
 
 type registerLexer struct {
@@ -82,6 +83,16 @@ func (rl registerLexer) applyLexOptions(o *LexOptions) {
 	}
 }
 
+func (rl registerLexer) applyScannerOptions(o *scannerOptions) {
+	if o.extensions == nil {
+		o.extensions = make(map[string]ScannerExt)
+	}
+
+	for _, s := range rl.stringTokens {
+		o.extensions[s] = &LexerScanner{lexer: rl.l}
+	}
+}
+
 // LexWithLexer registers a Lexer that implements tokenization of an NGINX configuration after one of the given
 // stringTokens is encountered by Lex.
 func LexWithLexer(l Lexer, stringTokens ...string) RegisterLexer { //nolint:ireturn
@@ -106,12 +117,38 @@ func Lex(reader io.Reader) chan NgxToken {
 // SubScanner provides an interface for scanning alternative grammars within NGINX configuration data.
 type SubScanner struct {
 	scanner   *bufio.Scanner
+	parent    *Scanner
 	tokenLine int
 }
 
 // Scan advances the scanner to the next token which will be available though the Text method. It returns false
 // when the scan stops by reaching the end of input.
 func (e *SubScanner) Scan() bool {
+	if e.scanner != nil {
+		return e.lexScan()
+	}
+
+	if e.parent.err != nil {
+		return false
+	}
+
+	if !e.parent.scanner.Scan() {
+		if err := e.parent.scanner.Err(); err != nil {
+			e.parent.setErr(err)
+		}
+		return false
+	}
+
+	// e.parent.prev = e.parent.scanner.Text()
+	// if isEOL(e.parent.prev) {
+	if t := e.parent.scanner.Text(); isEOL(t) {
+		e.parent.lineno++
+	}
+
+	return true
+}
+
+func (e *SubScanner) lexScan() bool {
 	if !e.scanner.Scan() {
 		return false
 	}
@@ -122,13 +159,30 @@ func (e *SubScanner) Scan() bool {
 }
 
 // Err returns the fist non-EOF error encountered by the Scanner.
-func (e *SubScanner) Err() error { return e.scanner.Err() }
+func (e *SubScanner) Err() error {
+	if e.scanner != nil {
+		return e.scanner.Err()
+	}
+	return e.parent.Err()
+}
 
 // Text returns the most recent token generated by a call to Scan.
-func (e *SubScanner) Text() string { return e.scanner.Text() }
+func (e *SubScanner) Text() string {
+	if e.scanner != nil {
+		return e.scanner.Text()
+	}
+	// return e.parent.prev
+	return e.parent.scanner.Text()
+}
 
 // Line returns the line number of the most recent token generated by a call to Scan.
-func (e *SubScanner) Line() int { return e.tokenLine }
+func (e *SubScanner) Line() int {
+	if e.scanner != nil {
+		return e.tokenLine
+	}
+
+	return e.parent.lineno
+}
 
 //nolint:gocyclo,funlen,gocognit,maintidx
 func tokenize(reader io.Reader, tokenCh chan NgxToken, options LexOptions) {

@@ -415,6 +415,20 @@ var lexFixtures = []lexFixture{
 		{"}", 20},
 		{"}", 21},
 	}},
+	{"comments-between-args", []tokenLine{
+		{"http", 1},
+		{"{", 1},
+		{"#comment 1", 1},
+		{"log_format", 2},
+		{"#comment 2", 2},
+		{"\\#arg\\ 1", 3},
+		{"#comment 3", 3},
+		{"#arg 2", 4},
+		{"#comment 4", 4},
+		{"#comment 5", 5},
+		{";", 6},
+		{"}", 7},
+	}},
 }
 
 func TestLex(t *testing.T) {
@@ -446,22 +460,72 @@ func TestLex(t *testing.T) {
 	}
 }
 
-func TestLex_unhappy(t *testing.T) {
-	t.Parallel()
+func benchmarkLex(b *testing.B, path string, options LexOptions) {
+	var t NgxToken
+
+	file, err := os.Open(path)
+	if err != nil {
+		b.Fatal(err)
+	}
+	defer file.Close()
+	b.ResetTimer()
+
+	for i := 0; i < b.N; i++ {
+		if _, err := file.Seek(0, 0); err != nil {
+			b.Fatal(err)
+		}
+
+		for tok := range LexWithOptions(file, options) {
+			t = tok
+		}
+	}
+
+	_ = t
+}
+
+func BenchmarkLex(b *testing.B) {
+	for _, bm := range lexFixtures {
+		if strings.HasPrefix(bm.name, "lua") {
+			continue
+		}
 
-	testcases := map[string]string{
-		"unbalanced open brance":                  `http {{}`,
-		"unbalanced closing brace":                `http {}}`,
-		"multiple open braces":                    `http {{server {}}`,
-		"multiple closing braces after block end": `http {server {}}}`,
-		"multiple semicolons":                     `server { listen 80;; }`,
-		"semicolon afer closing brace":            `server { listen 80; };`,
-		"open brace after semicolon":              `server { listen 80; {}`,
-		"braces with no directive":                `http{}{}`,
-		"missing final brace":                     `http{`,
+		b.Run(bm.name, func(b *testing.B) {
+			path := getTestConfigPath(bm.name, "nginx.conf")
+			benchmarkLex(b, path, LexOptions{})
+		})
 	}
+}
+
+func BenchmarkLexWithLua(b *testing.B) {
+	for _, bm := range lexFixtures {
+		if !strings.HasPrefix(bm.name, "lua") {
+			continue
+		}
+
+		b.Run(bm.name, func(b *testing.B) {
+			path := getTestConfigPath(bm.name, "nginx.conf")
+			benchmarkLex(b, path, LexOptions{Lexers: []RegisterLexer{lua.RegisterLexer()}})
+		})
+	}
+}
+
+//nolint:gochecknoglobals
+var unhappyFixtures = map[string]string{
+	"unbalanced open brance":                  `http {{}`,
+	"unbalanced closing brace":                `http {}}`,
+	"multiple open braces":                    `http {{server {}}`,
+	"multiple closing braces after block end": `http {server {}}}`,
+	"multiple semicolons":                     `server { listen 80;; }`,
+	"semicolon afer closing brace":            `server { listen 80; };`,
+	"open brace after semicolon":              `server { listen 80; {}`,
+	"braces with no directive":                `http{}{}`,
+	"missing final brace":                     `http{`,
+}
+
+func TestLex_unhappy(t *testing.T) {
+	t.Parallel()
 
-	for name, c := range testcases {
+	for name, c := range unhappyFixtures {
 		c := c
 		t.Run(name, func(t *testing.T) {
 			t.Parallel()