Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implements Scanner type for tokenizing nginx configs #80

Draft
wants to merge 4 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 57 additions & 3 deletions lex.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ type LexOptions struct {
// RegisterLexer is an option that cna be used to add a lexer to tokenize external NGINX tokens.
type RegisterLexer interface {
applyLexOptions(options *LexOptions)
applyScannerOptions(options *scannerOptions)
}

type registerLexer struct {
Expand All @@ -82,6 +83,16 @@ func (rl registerLexer) applyLexOptions(o *LexOptions) {
}
}

func (rl registerLexer) applyScannerOptions(o *scannerOptions) {
if o.extensions == nil {
o.extensions = make(map[string]ScannerExt)
}

for _, s := range rl.stringTokens {
o.extensions[s] = &LexerScanner{lexer: rl.l}
}
}

// LexWithLexer registers a Lexer that implements tokenization of an NGINX configuration after one of the given
// stringTokens is encountered by Lex.
func LexWithLexer(l Lexer, stringTokens ...string) RegisterLexer { //nolint:ireturn
Expand All @@ -106,12 +117,38 @@ func Lex(reader io.Reader) chan NgxToken {
// SubScanner provides an interface for scanning alternative grammars within NGINX configuration data.
type SubScanner struct {
scanner *bufio.Scanner
parent *Scanner
tokenLine int
}

// Scan advances the scanner to the next token which will be available though the Text method. It returns false
// when the scan stops by reaching the end of input.
func (e *SubScanner) Scan() bool {
if e.scanner != nil {
return e.lexScan()
}

if e.parent.err != nil {
return false
}

if !e.parent.scanner.Scan() {
if err := e.parent.scanner.Err(); err != nil {
e.parent.setErr(err)
}
return false
}

// e.parent.prev = e.parent.scanner.Text()
// if isEOL(e.parent.prev) {
if t := e.parent.scanner.Text(); isEOL(t) {
e.parent.lineno++
}

return true
}

func (e *SubScanner) lexScan() bool {
if !e.scanner.Scan() {
return false
}
Expand All @@ -122,13 +159,30 @@ func (e *SubScanner) Scan() bool {
}

// Err returns the fist non-EOF error encountered by the Scanner.
func (e *SubScanner) Err() error { return e.scanner.Err() }
func (e *SubScanner) Err() error {
if e.scanner != nil {
return e.scanner.Err()
}
return e.parent.Err()
}

// Text returns the most recent token generated by a call to Scan.
func (e *SubScanner) Text() string { return e.scanner.Text() }
func (e *SubScanner) Text() string {
if e.scanner != nil {
return e.scanner.Text()
}
// return e.parent.prev
return e.parent.scanner.Text()
}

// Line returns the line number of the most recent token generated by a call to Scan.
func (e *SubScanner) Line() int { return e.tokenLine }
func (e *SubScanner) Line() int {
if e.scanner != nil {
return e.tokenLine
}

return e.parent.lineno
}

//nolint:gocyclo,funlen,gocognit,maintidx
func tokenize(reader io.Reader, tokenCh chan NgxToken, options LexOptions) {
Expand Down
90 changes: 77 additions & 13 deletions lex_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -415,6 +415,20 @@ var lexFixtures = []lexFixture{
{"}", 20},
{"}", 21},
}},
{"comments-between-args", []tokenLine{
{"http", 1},
{"{", 1},
{"#comment 1", 1},
{"log_format", 2},
{"#comment 2", 2},
{"\\#arg\\ 1", 3},
{"#comment 3", 3},
{"#arg 2", 4},
{"#comment 4", 4},
{"#comment 5", 5},
{";", 6},
{"}", 7},
}},
}

func TestLex(t *testing.T) {
Expand Down Expand Up @@ -446,22 +460,72 @@ func TestLex(t *testing.T) {
}
}

func TestLex_unhappy(t *testing.T) {
t.Parallel()
func benchmarkLex(b *testing.B, path string, options LexOptions) {
var t NgxToken

file, err := os.Open(path)
if err != nil {
b.Fatal(err)
}
defer file.Close()
b.ResetTimer()

for i := 0; i < b.N; i++ {
if _, err := file.Seek(0, 0); err != nil {
b.Fatal(err)
}

for tok := range LexWithOptions(file, options) {
t = tok
}
}

_ = t
}

func BenchmarkLex(b *testing.B) {
for _, bm := range lexFixtures {
if strings.HasPrefix(bm.name, "lua") {
continue
}

testcases := map[string]string{
"unbalanced open brance": `http {{}`,
"unbalanced closing brace": `http {}}`,
"multiple open braces": `http {{server {}}`,
"multiple closing braces after block end": `http {server {}}}`,
"multiple semicolons": `server { listen 80;; }`,
"semicolon afer closing brace": `server { listen 80; };`,
"open brace after semicolon": `server { listen 80; {}`,
"braces with no directive": `http{}{}`,
"missing final brace": `http{`,
b.Run(bm.name, func(b *testing.B) {
path := getTestConfigPath(bm.name, "nginx.conf")
benchmarkLex(b, path, LexOptions{})
})
}
}

func BenchmarkLexWithLua(b *testing.B) {
for _, bm := range lexFixtures {
if !strings.HasPrefix(bm.name, "lua") {
continue
}

b.Run(bm.name, func(b *testing.B) {
path := getTestConfigPath(bm.name, "nginx.conf")
benchmarkLex(b, path, LexOptions{Lexers: []RegisterLexer{lua.RegisterLexer()}})
})
}
}

//nolint:gochecknoglobals
var unhappyFixtures = map[string]string{
"unbalanced open brance": `http {{}`,
"unbalanced closing brace": `http {}}`,
"multiple open braces": `http {{server {}}`,
"multiple closing braces after block end": `http {server {}}}`,
"multiple semicolons": `server { listen 80;; }`,
"semicolon afer closing brace": `server { listen 80; };`,
"open brace after semicolon": `server { listen 80; {}`,
"braces with no directive": `http{}{}`,
"missing final brace": `http{`,
}

func TestLex_unhappy(t *testing.T) {
t.Parallel()

for name, c := range testcases {
for name, c := range unhappyFixtures {
c := c
t.Run(name, func(t *testing.T) {
t.Parallel()
Expand Down
Loading
Loading