From 5575a252eb4708750c479e788449be7a12dbb227 Mon Sep 17 00:00:00 2001
From: Alec Thomas <alec@swapoff.org>
Date: Sun, 4 Sep 2022 20:41:48 +1000
Subject: [PATCH] Allow lexers to be code-generated from JSON.

- Add a CLI tool that can ingest the JSON and dump out the generated code.
- Lexers can now be JSON marshalled.
- Add a goreleaser step for the binary.

As discussed in #213
---
 .github/workflows/release.yml                 |  17 +
 .goreleaser.yml                               |  37 ++
 COPYING                                       |   2 +-
 bin/.goreleaser-1.11.2.pkg                    |   1 +
 bin/.jq-1.6.pkg                               |   1 +
 bin/goreleaser                                |   1 +
 bin/hermit.hcl                                |   3 +
 bin/jq                                        |   1 +
 .../participle/gen_lexer_cmd.go               | 108 +++-
 cmd/participle/go.mod                         |  10 +
 cmd/participle/go.sum                         |  17 +
 cmd/participle/main.go                        |  22 +
 go.mod                                        |   7 +-
 go.sum                                        |   4 +
 lexer/internal/basiclexer.go                  | 561 ++++++++++++++++++
 lexer/internal/basiclexer.json                |  32 +
 lexer/internal/codegen_gen_test.go            | 336 -----------
 lexer/internal/codegen_test.go                | 106 ----
 lexer/stateful.go                             | 113 +++-
 lexer/stateful_codegen_test.go                | 437 --------------
 lexer/stateful_test.go                        |  39 +-
 scripts/participle                            |   4 +
 scripts/regen-lexer                           |   3 +
 23 files changed, 914 insertions(+), 948 deletions(-)
 create mode 100644 .github/workflows/release.yml
 create mode 100644 .goreleaser.yml
 create mode 120000 bin/.goreleaser-1.11.2.pkg
 create mode 120000 bin/.jq-1.6.pkg
 create mode 120000 bin/goreleaser
 create mode 120000 bin/jq
 rename lexer/codegen.go => cmd/participle/gen_lexer_cmd.go (80%)
 create mode 100644 cmd/participle/go.mod
 create mode 100644 cmd/participle/go.sum
 create mode 100644 cmd/participle/main.go
 create mode 100644 lexer/internal/basiclexer.go
 create mode 100644 lexer/internal/basiclexer.json
 delete mode 100644 lexer/internal/codegen_gen_test.go
 delete mode 100644 lexer/internal/codegen_test.go
 delete mode 100644 lexer/stateful_codegen_test.go
 create mode 100755 scripts/participle
 create mode 100755 scripts/regen-lexer

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
new file mode 100644
index 00000000..3d06f8fa
--- /dev/null
+++ b/.github/workflows/release.yml
@@ -0,0 +1,17 @@
+name: Release
+on:
+  push:
+    tags:
+      - 'v*'
+jobs:
+  release:
+    name: Release
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v2
+      with:
+        fetch-depth: 0
+    - run: ./bin/hermit env --raw >> $GITHUB_ENV
+    - run: goreleaser release
+      env:
+        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.goreleaser.yml b/.goreleaser.yml
new file mode 100644
index 00000000..4018712d
--- /dev/null
+++ b/.goreleaser.yml
@@ -0,0 +1,37 @@
+project_name: participle
+release:
+  github:
+    owner: alecthomas
+    name: participle
+brews:
+  -
+    install: bin.install "participle"
+env:
+  - CGO_ENABLED=0
+builds:
+- goos:
+    - linux
+    - darwin
+    - windows
+  goarch:
+    - arm64
+    - amd64
+    - "386"
+  goarm:
+    - "6"
+  dir: ./cmd/participle
+  main: .
+  ldflags: -s -w -X main.version={{.Version}}
+  binary: participle
+archives:
+  -
+    format: tar.gz
+    name_template: '{{ .Binary }}-{{ .Version }}-{{ .Os }}-{{ .Arch }}{{ if .Arm }}v{{
+    .Arm }}{{ end }}'
+    files:
+      - COPYING
+      - README*
+snapshot:
+  name_template: SNAPSHOT-{{ .Commit }}
+checksum:
+  name_template: '{{ .ProjectName }}-{{ .Version }}-checksums.txt'
diff --git a/COPYING b/COPYING
index 92dc39f7..44fed8b5 100644
--- a/COPYING
+++ b/COPYING
@@ -1,4 +1,4 @@
-Copyright (C) 2017 Alec Thomas
+Copyright (C) 2017-2022 Alec Thomas
 
 Permission is hereby granted, free of charge, to any person obtaining a copy of
 this software and associated documentation files (the "Software"), to deal in
diff --git a/bin/.goreleaser-1.11.2.pkg b/bin/.goreleaser-1.11.2.pkg
new file mode 120000
index 00000000..383f4511
--- /dev/null
+++ b/bin/.goreleaser-1.11.2.pkg
@@ -0,0 +1 @@
+hermit
\ No newline at end of file
diff --git a/bin/.jq-1.6.pkg b/bin/.jq-1.6.pkg
new file mode 120000
index 00000000..383f4511
--- /dev/null
+++ b/bin/.jq-1.6.pkg
@@ -0,0 +1 @@
+hermit
\ No newline at end of file
diff --git a/bin/goreleaser b/bin/goreleaser
new file mode 120000
index 00000000..10561a7b
--- /dev/null
+++ b/bin/goreleaser
@@ -0,0 +1 @@
+.goreleaser-1.11.2.pkg
\ No newline at end of file
diff --git a/bin/hermit.hcl b/bin/hermit.hcl
index e69de29b..60844159 100644
--- a/bin/hermit.hcl
+++ b/bin/hermit.hcl
@@ -0,0 +1,3 @@
+env = {
+  "PATH": "${HERMIT_ENV}/scripts:${PATH}",
+}
diff --git a/bin/jq b/bin/jq
new file mode 120000
index 00000000..d7e067b8
--- /dev/null
+++ b/bin/jq
@@ -0,0 +1 @@
+.jq-1.6.pkg
\ No newline at end of file
diff --git a/lexer/codegen.go b/cmd/participle/gen_lexer_cmd.go
similarity index 80%
rename from lexer/codegen.go
rename to cmd/participle/gen_lexer_cmd.go
index 1acadc48..c8951cba 100644
--- a/lexer/codegen.go
+++ b/cmd/participle/gen_lexer_cmd.go
@@ -1,33 +1,81 @@
-package lexer
+package main
 
 import (
+	"encoding/json"
 	"fmt"
 	"io"
+	"os"
 	"regexp"
 	"regexp/syntax"
 	"sort"
 	"text/template"
 	"unicode/utf8"
+
+	"github.com/alecthomas/participle/v2/lexer"
 )
 
+type genLexerCmd struct {
+	Name    string `help:"Name of the lexer."`
+	Output  string `short:"o" help:"Output file."`
+	Package string `arg:"" required:"" help:"Go package for generated code."`
+	Lexer   string `arg:"" required:"" default:"-" type:"existingfile" help:"JSON representation of a Participle lexer."`
+}
+
+func (c *genLexerCmd) Help() string {
+	return `
+Generates Go code implementing the given JSON representation of a lexer. The
+generated code should in general by around 10x faster and produce zero garbage
+per token.
+`
+}
+
+func (c *genLexerCmd) Run() error {
+	var r *os.File
+	if c.Lexer == "-" {
+		r = os.Stdin
+	} else {
+		var err error
+		r, err = os.Open(c.Lexer)
+		if err != nil {
+			return err
+		}
+		defer r.Close()
+	}
+
+	rules := lexer.Rules{}
+	err := json.NewDecoder(r).Decode(&rules)
+	if err != nil {
+		return err
+	}
+	def, err := lexer.New(rules)
+	if err != nil {
+		return err
+	}
+	err = generateLexer(os.Stdout, c.Package, def, c.Name)
+	if err != nil {
+		return err
+	}
+	return nil
+}
+
 var codegenBackrefRe = regexp.MustCompile(`(\\+)(\d)`)
 
 var codegenTemplate *template.Template = template.Must(template.New("lexgen").Funcs(template.FuncMap{
-	"IsPush": func(r Rule) string {
-		if p, ok := r.Action.(ActionPush); ok {
+	"IsPush": func(r lexer.Rule) string {
+		if p, ok := r.Action.(lexer.ActionPush); ok {
 			return p.State
 		}
 		return ""
 	},
-	"IsPop": func(r Rule) bool {
-		_, ok := r.Action.(ActionPop)
+	"IsPop": func(r lexer.Rule) bool {
+		_, ok := r.Action.(lexer.ActionPop)
 		return ok
 	},
-	"IsReturn": func(r Rule) bool {
-		return r == ReturnRule
+	"IsReturn": func(r lexer.Rule) bool {
+		return r == lexer.ReturnRule
 	},
 	"OrderRules": orderRules,
-	"HaveBackrefs": func(def *StatefulDefinition, state string) bool {
+	"HaveBackrefs": func(def *lexer.StatefulDefinition, state string) bool {
 		for _, rule := range def.Rules()[state] {
 			if codegenBackrefRe.MatchString(rule.Pattern) {
 				return true
@@ -51,11 +99,11 @@ import (
 
 var _ syntax.Op
 
-var Lexer lexer.Definition = definitionImpl{}
+var {{.Name}}Lexer lexer.Definition = lexer{{.Name}}DefinitionImpl{}
 
-type definitionImpl struct {}
+type lexer{{.Name}}DefinitionImpl struct {}
 
-func (definitionImpl) Symbols() map[string]lexer.TokenType {
+func (lexer{{.Name}}DefinitionImpl) Symbols() map[string]lexer.TokenType {
 	return map[string]lexer.TokenType{
 {{- range $sym, $rn := .Def.Symbols}}
       "{{$sym}}": {{$rn}},
@@ -63,23 +111,23 @@ func (definitionImpl) Symbols() map[string]lexer.TokenType {
 	}
 }
 
-func (definitionImpl) LexString(filename string, s string) (lexer.Lexer, error) {
-	return &lexerImpl{
+func (lexer{{.Name}}DefinitionImpl) LexString(filename string, s string) (lexer.Lexer, error) {
+	return &lexer{{.Name}}Impl{
 		s: s,
 		pos: lexer.Position{
 			Filename: filename,
 			Line:     1,
 			Column:   1,
 		},
-		states: []lexerState{lexerState{name: "Root"}},
+		states: []lexer{{.Name}}State{lexer{{.Name}}State{name: "Root"}},
 	}, nil
 }
 
-func (d definitionImpl) LexBytes(filename string, b []byte) (lexer.Lexer, error) {
+func (d lexer{{.Name}}DefinitionImpl) LexBytes(filename string, b []byte) (lexer.Lexer, error) {
 	return d.LexString(filename, string(b))
 }
 
-func (d definitionImpl) Lex(filename string, r io.Reader) (lexer.Lexer, error) {
+func (d lexer{{.Name}}DefinitionImpl) Lex(filename string, r io.Reader) (lexer.Lexer, error) {
 	s := &strings.Builder{}
 	_, err := io.Copy(s, r)
 	if err != nil {
@@ -88,19 +136,19 @@ func (d definitionImpl) Lex(filename string, r io.Reader) (lexer.Lexer, error) {
 	return d.LexString(filename, s.String())
 }
 
-type lexerState struct {
+type lexer{{.Name}}State struct {
 	name    string
 	groups  []string
 }
 
-type lexerImpl struct {
+type lexer{{.Name}}Impl struct {
 	s       string
 	p       int
 	pos     lexer.Position
-	states  []lexerState
+	states  []lexer{{.Name}}State
 }
 
-func (l *lexerImpl) Next() (lexer.Token, error) {
+func (l *lexer{{.Name}}Impl) Next() (lexer.Token, error) {
 	if l.p == len(l.s) {
 		return lexer.EOFToken(l.pos), nil
 	}
@@ -122,7 +170,7 @@ func (l *lexerImpl) Next() (lexer.Token, error) {
 		if true {
 {{- end}}
 {{- if .|IsPush}}
-			l.states = append(l.states, lexerState{name: "{{.|IsPush}}"{{if HaveBackrefs $.Def $state.Name}}, groups: l.sgroups(groups){{end}}})
+			l.states = append(l.states, lexer{{.Name}}State{name: "{{.|IsPush}}"{{if HaveBackrefs $.Def $state.Name}}, groups: l.sgroups(groups){{end}}})
 {{- else if (or (.|IsPop) (.|IsReturn))}}
 			l.states = l.states[:len(l.states)-1]
 {{- if .|IsReturn}}
@@ -154,7 +202,7 @@ func (l *lexerImpl) Next() (lexer.Token, error) {
 	}, nil
 }
 
-func (l *lexerImpl) sgroups(match []int) []string {
+func (l *lexer{{.Name}}Impl) sgroups(match []int) []string {
 	sgroups := make([]string, len(match)/2)
 	for i := 0; i < len(match)-1; i += 2 {
 		sgroups[i/2] = l.s[l.p+match[i]:l.p+match[i+1]]
@@ -164,18 +212,14 @@ func (l *lexerImpl) sgroups(match []int) []string {
 
 `))
 
-// ExperimentalGenerateLexer generates Go code implementing the given stateful lexer.
-//
-// The generated code should in general by around 10x faster and produce zero garbage per token.
-//
-// NOTE: This is an experimental interface and subject to change.
-func ExperimentalGenerateLexer(w io.Writer, pkg string, def *StatefulDefinition) error {
+func generateLexer(w io.Writer, pkg string, def *lexer.StatefulDefinition, name string) error {
 	type ctx struct {
 		Package string
-		Def     *StatefulDefinition
+		Name    string
+		Def     *lexer.StatefulDefinition
 	}
 	rules := def.Rules()
-	err := codegenTemplate.Execute(w, ctx{pkg, def})
+	err := codegenTemplate.Execute(w, ctx{pkg, name, def})
 	if err != nil {
 		return err
 	}
@@ -201,10 +245,10 @@ func ExperimentalGenerateLexer(w io.Writer, pkg string, def *StatefulDefinition)
 
 type orderedRule struct {
 	Name  string
-	Rules []Rule
+	Rules []lexer.Rule
 }
 
-func orderRules(rules Rules) []orderedRule {
+func orderRules(rules lexer.Rules) []orderedRule {
 	orderedRules := []orderedRule{}
 	for name, rules := range rules {
 		orderedRules = append(orderedRules, orderedRule{
diff --git a/cmd/participle/go.mod b/cmd/participle/go.mod
new file mode 100644
index 00000000..be2c477e
--- /dev/null
+++ b/cmd/participle/go.mod
@@ -0,0 +1,10 @@
+module github.com/alecthomas/participle/v2/cmd/participle
+
+go 1.18
+
+require (
+	github.com/alecthomas/kong v0.6.1
+	github.com/alecthomas/participle/v2 v2.0.0-00010101000000-000000000000
+)
+
+replace github.com/alecthomas/participle/v2 => ../..
diff --git a/cmd/participle/go.sum b/cmd/participle/go.sum
new file mode 100644
index 00000000..73c82483
--- /dev/null
+++ b/cmd/participle/go.sum
@@ -0,0 +1,17 @@
+github.com/alecthomas/assert/v2 v2.1.0 h1:tbredtNcQnoSd3QBhQWI7QZ3XHOVkw1Moklp2ojoH/0=
+github.com/alecthomas/kong v0.6.1 h1:1kNhcFepkR+HmasQpbiKDLylIL8yh5B5y1zPp5bJimA=
+github.com/alecthomas/kong v0.6.1/go.mod h1:JfHWDzLmbh/puW6I3V7uWenoh56YNVONW+w8eKeUr9I=
+github.com/alecthomas/repr v0.0.0-20210801044451-80ca428c5142/go.mod h1:2kn6fqh/zIyPLmm3ugklbEi5hg5wS435eygvNfaDQL8=
+github.com/alecthomas/repr v0.1.0 h1:ENn2e1+J3k09gyj2shc0dHr/yjaWSHRlrJ4DPMevDqE=
+github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
+github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM=
+github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
+github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
+github.com/stretchr/testify v1.7.2 h1:4jaiDzPyXQvSd7D0EjG45355tLlV3VOECpq10pLC+8s=
+github.com/stretchr/testify v1.7.2/go.mod h1:R6va5+xMeoiuVRoj+gSkQ7d3FALtqAAGI1FQKckRals=
+gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
+gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
diff --git a/cmd/participle/main.go b/cmd/participle/main.go
new file mode 100644
index 00000000..3a215317
--- /dev/null
+++ b/cmd/participle/main.go
@@ -0,0 +1,22 @@
+package main
+
+import "github.com/alecthomas/kong"
+
+var (
+	version string = "dev"
+	cli     struct {
+		Version kong.VersionFlag
+		Gen     struct {
+			Lexer genLexerCmd `cmd:""`
+		} `cmd:"" help:"Generate code to accelerate Participle."`
+	}
+)
+
+func main() {
+	kctx := kong.Parse(&cli,
+		kong.Description(`A command-line tool for Participle.`),
+		kong.Vars{"version": version},
+	)
+	err := kctx.Run()
+	kctx.FatalIfErrorf(err)
+}
diff --git a/go.mod b/go.mod
index 8a1ec6d6..22a51ed3 100644
--- a/go.mod
+++ b/go.mod
@@ -3,8 +3,11 @@ module github.com/alecthomas/participle/v2
 go 1.18
 
 require (
-	github.com/alecthomas/assert/v2 v2.0.3
+	github.com/alecthomas/assert/v2 v2.1.0
 	github.com/alecthomas/repr v0.1.0
 )
 
-require github.com/hexops/gotextdiff v1.0.3 // indirect
+require (
+	github.com/hexops/gotextdiff v1.0.3 // indirect
+	github.com/mitchellh/mapstructure v1.5.0
+)
diff --git a/go.sum b/go.sum
index f5017061..e2a72a07 100644
--- a/go.sum
+++ b/go.sum
@@ -1,6 +1,10 @@
 github.com/alecthomas/assert/v2 v2.0.3 h1:WKqJODfOiQG0nEJKFKzDIG3E29CN2/4zR9XGJzKIkbg=
 github.com/alecthomas/assert/v2 v2.0.3/go.mod h1:b/+1DI2Q6NckYi+3mXyH3wFb8qG37K/DuK80n7WefXA=
+github.com/alecthomas/assert/v2 v2.1.0 h1:tbredtNcQnoSd3QBhQWI7QZ3XHOVkw1Moklp2ojoH/0=
+github.com/alecthomas/assert/v2 v2.1.0/go.mod h1:b/+1DI2Q6NckYi+3mXyH3wFb8qG37K/DuK80n7WefXA=
 github.com/alecthomas/repr v0.1.0 h1:ENn2e1+J3k09gyj2shc0dHr/yjaWSHRlrJ4DPMevDqE=
 github.com/alecthomas/repr v0.1.0/go.mod h1:2kn6fqh/zIyPLmm3ugklbEi5hg5wS435eygvNfaDQL8=
 github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM=
 github.com/hexops/gotextdiff v1.0.3/go.mod h1:pSWU5MAI3yDq+fZBTazCSJysOMbxWL1BSow5/V2vxeg=
+github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY=
+github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo=
diff --git a/lexer/internal/basiclexer.go b/lexer/internal/basiclexer.go
new file mode 100644
index 00000000..d4cfa46e
--- /dev/null
+++ b/lexer/internal/basiclexer.go
@@ -0,0 +1,561 @@
+// Code generated by Participle. DO NOT EDIT.
+package internal
+
+import (
+	"io"
+	"regexp/syntax"
+	"strings"
+	"unicode/utf8"
+
+	"github.com/alecthomas/participle/v2"
+	"github.com/alecthomas/participle/v2/lexer"
+)
+
+var _ syntax.Op
+
+var GeneratedBasicLexer lexer.Definition = lexerGeneratedBasicDefinitionImpl{}
+
+type lexerGeneratedBasicDefinitionImpl struct{}
+
+func (lexerGeneratedBasicDefinitionImpl) Symbols() map[string]lexer.TokenType {
+	return map[string]lexer.TokenType{
+		"Comment":    -7,
+		"EOF":        -1,
+		"EOL":        -6,
+		"Ident":      -4,
+		"Number":     -3,
+		"Punct":      -5,
+		"String":     -2,
+		"Whitespace": -8,
+	}
+}
+
+func (lexerGeneratedBasicDefinitionImpl) LexString(filename string, s string) (lexer.Lexer, error) {
+	return &lexerGeneratedBasicImpl{
+		s: s,
+		pos: lexer.Position{
+			Filename: filename,
+			Line:     1,
+			Column:   1,
+		},
+		states: []lexerGeneratedBasicState{lexerGeneratedBasicState{name: "Root"}},
+	}, nil
+}
+
+func (d lexerGeneratedBasicDefinitionImpl) LexBytes(filename string, b []byte) (lexer.Lexer, error) {
+	return d.LexString(filename, string(b))
+}
+
+func (d lexerGeneratedBasicDefinitionImpl) Lex(filename string, r io.Reader) (lexer.Lexer, error) {
+	s := &strings.Builder{}
+	_, err := io.Copy(s, r)
+	if err != nil {
+		return nil, err
+	}
+	return d.LexString(filename, s.String())
+}
+
+type lexerGeneratedBasicState struct {
+	name   string
+	groups []string
+}
+
+type lexerGeneratedBasicImpl struct {
+	s      string
+	p      int
+	pos    lexer.Position
+	states []lexerGeneratedBasicState
+}
+
+func (l *lexerGeneratedBasicImpl) Next() (lexer.Token, error) {
+	if l.p == len(l.s) {
+		return lexer.EOFToken(l.pos), nil
+	}
+	var (
+		state  = l.states[len(l.states)-1]
+		groups []int
+		sym    lexer.TokenType
+	)
+	switch state.name {
+	case "Root":
+		if match := matchString(l.s, l.p); match[1] != 0 {
+			sym = -2
+			groups = match[:]
+		} else if match := matchNumber(l.s, l.p); match[1] != 0 {
+			sym = -3
+			groups = match[:]
+		} else if match := matchIdent(l.s, l.p); match[1] != 0 {
+			sym = -4
+			groups = match[:]
+		} else if match := matchPunct(l.s, l.p); match[1] != 0 {
+			sym = -5
+			groups = match[:]
+		} else if match := matchEOL(l.s, l.p); match[1] != 0 {
+			sym = -6
+			groups = match[:]
+		} else if match := matchComment(l.s, l.p); match[1] != 0 {
+			sym = -7
+			groups = match[:]
+		} else if match := matchWhitespace(l.s, l.p); match[1] != 0 {
+			sym = -8
+			groups = match[:]
+		}
+	}
+	if groups == nil {
+		sample := []rune(l.s[l.p:])
+		if len(sample) > 16 {
+			sample = append(sample[:16], []rune("...")...)
+		}
+		return lexer.Token{}, participle.Errorf(l.pos, "invalid input text %q", sample)
+	}
+	pos := l.pos
+	span := l.s[groups[0]:groups[1]]
+	l.p = groups[1]
+	l.pos.Advance(span)
+	return lexer.Token{
+		Type:  sym,
+		Value: span,
+		Pos:   pos,
+	}, nil
+}
+
+func (l *lexerGeneratedBasicImpl) sgroups(match []int) []string {
+	sgroups := make([]string, len(match)/2)
+	for i := 0; i < len(match)-1; i += 2 {
+		sgroups[i/2] = l.s[l.p+match[i] : l.p+match[i+1]]
+	}
+	return sgroups
+}
+
+// "(\\"|[^"])*"
+func matchString(s string, p int) (groups [4]int) {
+	// " (Literal)
+	l0 := func(s string, p int) int {
+		if p < len(s) && s[p] == '"' {
+			return p + 1
+		}
+		return -1
+	}
+	// \\" (Literal)
+	l1 := func(s string, p int) int {
+		if p+2 < len(s) && s[p:p+2] == "\\\"" {
+			return p + 2
+		}
+		return -1
+	}
+	// [^"] (CharClass)
+	l2 := func(s string, p int) int {
+		if len(s) <= p {
+			return -1
+		}
+		var (
+			rn rune
+			n  int
+		)
+		if s[p] < utf8.RuneSelf {
+			rn, n = rune(s[p]), 1
+		} else {
+			rn, n = utf8.DecodeRuneInString(s[p:])
+		}
+		switch {
+		case rn >= '\x00' && rn <= '!':
+			return p + 1
+		case rn >= '#' && rn <= '\U0010ffff':
+			return p + n
+		}
+		return -1
+	}
+	// \\"|[^"] (Alternate)
+	l3 := func(s string, p int) int {
+		if np := l1(s, p); np != -1 {
+			return np
+		}
+		if np := l2(s, p); np != -1 {
+			return np
+		}
+		return -1
+	}
+	// (\\"|[^"]) (Capture)
+	l4 := func(s string, p int) int {
+		np := l3(s, p)
+		if np != -1 {
+			groups[2] = p
+			groups[3] = np
+		}
+		return np
+	}
+	// (\\"|[^"])* (Star)
+	l5 := func(s string, p int) int {
+		for len(s) > p {
+			if np := l4(s, p); np == -1 {
+				return p
+			} else {
+				p = np
+			}
+		}
+		return p
+	}
+	// "(\\"|[^"])*" (Concat)
+	l6 := func(s string, p int) int {
+		if p = l0(s, p); p == -1 {
+			return -1
+		}
+		if p = l5(s, p); p == -1 {
+			return -1
+		}
+		if p = l0(s, p); p == -1 {
+			return -1
+		}
+		return p
+	}
+	np := l6(s, p)
+	if np == -1 {
+		return
+	}
+	groups[0] = p
+	groups[1] = np
+	return
+}
+
+// [\+\-]?([0-9]*\.)?[0-9]+
+func matchNumber(s string, p int) (groups [4]int) {
+	// [\+\-] (CharClass)
+	l0 := func(s string, p int) int {
+		if len(s) <= p {
+			return -1
+		}
+		rn := s[p]
+		switch {
+		case rn == '+':
+			return p + 1
+		case rn == '-':
+			return p + 1
+		}
+		return -1
+	}
+	// [\+\-]? (Quest)
+	l1 := func(s string, p int) int {
+		if np := l0(s, p); np != -1 {
+			return np
+		}
+		return p
+	}
+	// [0-9] (CharClass)
+	l2 := func(s string, p int) int {
+		if len(s) <= p {
+			return -1
+		}
+		rn := s[p]
+		switch {
+		case rn >= '0' && rn <= '9':
+			return p + 1
+		}
+		return -1
+	}
+	// [0-9]* (Star)
+	l3 := func(s string, p int) int {
+		for len(s) > p {
+			if np := l2(s, p); np == -1 {
+				return p
+			} else {
+				p = np
+			}
+		}
+		return p
+	}
+	// \. (Literal)
+	l4 := func(s string, p int) int {
+		if p < len(s) && s[p] == '.' {
+			return p + 1
+		}
+		return -1
+	}
+	// [0-9]*\. (Concat)
+	l5 := func(s string, p int) int {
+		if p = l3(s, p); p == -1 {
+			return -1
+		}
+		if p = l4(s, p); p == -1 {
+			return -1
+		}
+		return p
+	}
+	// ([0-9]*\.) (Capture)
+	l6 := func(s string, p int) int {
+		np := l5(s, p)
+		if np != -1 {
+			groups[2] = p
+			groups[3] = np
+		}
+		return np
+	}
+	// ([0-9]*\.)? (Quest)
+	l7 := func(s string, p int) int {
+		if np := l6(s, p); np != -1 {
+			return np
+		}
+		return p
+	}
+	// [0-9]+ (Plus)
+	l8 := func(s string, p int) int {
+		if p = l2(s, p); p == -1 {
+			return -1
+		}
+		for len(s) > p {
+			if np := l2(s, p); np == -1 {
+				return p
+			} else {
+				p = np
+			}
+		}
+		return p
+	}
+	// [\+\-]?([0-9]*\.)?[0-9]+ (Concat)
+	l9 := func(s string, p int) int {
+		if p = l1(s, p); p == -1 {
+			return -1
+		}
+		if p = l7(s, p); p == -1 {
+			return -1
+		}
+		if p = l8(s, p); p == -1 {
+			return -1
+		}
+		return p
+	}
+	np := l9(s, p)
+	if np == -1 {
+		return
+	}
+	groups[0] = p
+	groups[1] = np
+	return
+}
+
+// [A-Z_a-z][0-9A-Z_a-z]*
+func matchIdent(s string, p int) (groups [2]int) {
+	// [A-Z_a-z] (CharClass)
+	l0 := func(s string, p int) int {
+		if len(s) <= p {
+			return -1
+		}
+		rn := s[p]
+		switch {
+		case rn >= 'A' && rn <= 'Z':
+			return p + 1
+		case rn == '_':
+			return p + 1
+		case rn >= 'a' && rn <= 'z':
+			return p + 1
+		}
+		return -1
+	}
+	// [0-9A-Z_a-z] (CharClass)
+	l1 := func(s string, p int) int {
+		if len(s) <= p {
+			return -1
+		}
+		rn := s[p]
+		switch {
+		case rn >= '0' && rn <= '9':
+			return p + 1
+		case rn >= 'A' && rn <= 'Z':
+			return p + 1
+		case rn == '_':
+			return p + 1
+		case rn >= 'a' && rn <= 'z':
+			return p + 1
+		}
+		return -1
+	}
+	// [0-9A-Z_a-z]* (Star)
+	l2 := func(s string, p int) int {
+		for len(s) > p {
+			if np := l1(s, p); np == -1 {
+				return p
+			} else {
+				p = np
+			}
+		}
+		return p
+	}
+	// [A-Z_a-z][0-9A-Z_a-z]* (Concat)
+	l3 := func(s string, p int) int {
+		if p = l0(s, p); p == -1 {
+			return -1
+		}
+		if p = l2(s, p); p == -1 {
+			return -1
+		}
+		return p
+	}
+	np := l3(s, p)
+	if np == -1 {
+		return
+	}
+	groups[0] = p
+	groups[1] = np
+	return
+}
+
+// [!-/:-@\[-`\{-~]+
+func matchPunct(s string, p int) (groups [2]int) {
+	// [!-/:-@\[-`\{-~] (CharClass)
+	l0 := func(s string, p int) int {
+		if len(s) <= p {
+			return -1
+		}
+		rn := s[p]
+		switch {
+		case rn >= '!' && rn <= '/':
+			return p + 1
+		case rn >= ':' && rn <= '@':
+			return p + 1
+		case rn >= '[' && rn <= '`':
+			return p + 1
+		case rn >= '{' && rn <= '~':
+			return p + 1
+		}
+		return -1
+	}
+	// [!-/:-@\[-`\{-~]+ (Plus)
+	l1 := func(s string, p int) int {
+		if p = l0(s, p); p == -1 {
+			return -1
+		}
+		for len(s) > p {
+			if np := l0(s, p); np == -1 {
+				return p
+			} else {
+				p = np
+			}
+		}
+		return p
+	}
+	np := l1(s, p)
+	if np == -1 {
+		return
+	}
+	groups[0] = p
+	groups[1] = np
+	return
+}
+
+// \n
+func matchEOL(s string, p int) (groups [2]int) {
+	if p < len(s) && s[p] == '\n' {
+		groups[0] = p
+		groups[1] = p + 1
+	}
+	return
+}
+
+// (?i:REM)[^\n]*(?i:\n)
+func matchComment(s string, p int) (groups [2]int) {
+	// (?i:REM) (Literal)
+	l0 := func(s string, p int) int {
+		if p+3 < len(s) && s[p:p+3] == "REM" {
+			return p + 3
+		}
+		return -1
+	}
+	// [^\n] (CharClass)
+	l1 := func(s string, p int) int {
+		if len(s) <= p {
+			return -1
+		}
+		var (
+			rn rune
+			n  int
+		)
+		if s[p] < utf8.RuneSelf {
+			rn, n = rune(s[p]), 1
+		} else {
+			rn, n = utf8.DecodeRuneInString(s[p:])
+		}
+		switch {
+		case rn >= '\x00' && rn <= '\t':
+			return p + 1
+		case rn >= '\v' && rn <= '\U0010ffff':
+			return p + n
+		}
+		return -1
+	}
+	// [^\n]* (Star)
+	l2 := func(s string, p int) int {
+		for len(s) > p {
+			if np := l1(s, p); np == -1 {
+				return p
+			} else {
+				p = np
+			}
+		}
+		return p
+	}
+	// (?i:\n) (Literal)
+	l3 := func(s string, p int) int {
+		if p < len(s) && s[p] == '\n' {
+			return p + 1
+		}
+		return -1
+	}
+	// (?i:REM)[^\n]*(?i:\n) (Concat)
+	l4 := func(s string, p int) int {
+		if p = l0(s, p); p == -1 {
+			return -1
+		}
+		if p = l2(s, p); p == -1 {
+			return -1
+		}
+		if p = l3(s, p); p == -1 {
+			return -1
+		}
+		return p
+	}
+	np := l4(s, p)
+	if np == -1 {
+		return
+	}
+	groups[0] = p
+	groups[1] = np
+	return
+}
+
+// [\t ]+
+func matchWhitespace(s string, p int) (groups [2]int) {
+	// [\t ] (CharClass)
+	l0 := func(s string, p int) int {
+		if len(s) <= p {
+			return -1
+		}
+		rn := s[p]
+		switch {
+		case rn == '\t':
+			return p + 1
+		case rn == ' ':
+			return p + 1
+		}
+		return -1
+	}
+	// [\t ]+ (Plus)
+	l1 := func(s string, p int) int {
+		if p = l0(s, p); p == -1 {
+			return -1
+		}
+		for len(s) > p {
+			if np := l0(s, p); np == -1 {
+				return p
+			} else {
+				p = np
+			}
+		}
+		return p
+	}
+	np := l1(s, p)
+	if np == -1 {
+		return
+	}
+	groups[0] = p
+	groups[1] = np
+	return
+}
diff --git a/lexer/internal/basiclexer.json b/lexer/internal/basiclexer.json
new file mode 100644
index 00000000..91964342
--- /dev/null
+++ b/lexer/internal/basiclexer.json
@@ -0,0 +1,32 @@
+{
+  "Root": [
+    {
+      "name": "String",
+      "pattern": "\"(\\\\\"|[^\"])*\""
+    },
+    {
+      "name": "Number",
+      "pattern": "[-+]?(\\d*\\.)?\\d+"
+    },
+    {
+      "name": "Ident",
+      "pattern": "[a-zA-Z_]\\w*"
+    },
+    {
+      "name": "Punct",
+      "pattern": "[!-/:-@[-`{-~]+"
+    },
+    {
+      "name": "EOL",
+      "pattern": "\\n"
+    },
+    {
+      "name": "Comment",
+      "pattern": "(?i)rem[^\\n]*\\n"
+    },
+    {
+      "name": "Whitespace",
+      "pattern": "[ \\t]+"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/lexer/internal/codegen_gen_test.go b/lexer/internal/codegen_gen_test.go
deleted file mode 100644
index 3aa76a68..00000000
--- a/lexer/internal/codegen_gen_test.go
+++ /dev/null
@@ -1,336 +0,0 @@
-
-// Code generated by Participle. DO NOT EDIT.
-package internal_test
-
-import (
-	"io"
-	"strings"
-	"unicode/utf8"
-	"regexp/syntax"
-
-	"github.com/alecthomas/participle/v2"
-	"github.com/alecthomas/participle/v2/lexer"
-)
-
-var _ syntax.Op
-
-var Lexer lexer.Definition = definitionImpl{}
-
-type definitionImpl struct {}
-
-func (definitionImpl) Symbols() map[string]lexer.TokenType {
-	return map[string]lexer.TokenType{
-      "Char": -11,
-      "EOF": -1,
-      "Escaped": -8,
-      "Expr": -10,
-      "ExprEnd": -6,
-      "Ident": -5,
-      "Oper": -4,
-      "String": -7,
-      "StringEnd": -9,
-      "Whitespace": -3,
-	}
-}
-
-func (definitionImpl) LexString(filename string, s string) (lexer.Lexer, error) {
-	return &lexerImpl{
-		s: s,
-		pos: lexer.Position{
-			Filename: filename,
-			Line:     1,
-			Column:   1,
-		},
-		states: []lexerState{lexerState{name: "Root"}},
-	}, nil
-}
-
-func (d definitionImpl) LexBytes(filename string, b []byte) (lexer.Lexer, error) {
-	return d.LexString(filename, string(b))
-}
-
-func (d definitionImpl) Lex(filename string, r io.Reader) (lexer.Lexer, error) {
-	s := &strings.Builder{}
-	_, err := io.Copy(s, r)
-	if err != nil {
-		return nil, err
-	}
-	return d.LexString(filename, s.String())
-}
-
-type lexerState struct {
-	name    string
-	groups  []string
-}
-
-type lexerImpl struct {
-	s       string
-	p       int
-	pos     lexer.Position
-	states  []lexerState
-}
-
-func (l *lexerImpl) Next() (lexer.Token, error) {
-	if l.p == len(l.s) {
-		return lexer.EOFToken(l.pos), nil
-	}
-	var (
-		state = l.states[len(l.states)-1]
-		groups []int
-		sym lexer.TokenType
-	)
-	switch state.name {
-	case "Expr":if match := matchString(l.s, l.p); match[1] != 0 {
-			sym = -7
-			groups = match[:]
-			l.states = append(l.states, lexerState{name: "String"})
-		} else if match := matchWhitespace(l.s, l.p); match[1] != 0 {
-			sym = -3
-			groups = match[:]
-		} else if match := matchOper(l.s, l.p); match[1] != 0 {
-			sym = -4
-			groups = match[:]
-		} else if match := matchIdent(l.s, l.p); match[1] != 0 {
-			sym = -5
-			groups = match[:]
-		} else if match := matchExprEnd(l.s, l.p); match[1] != 0 {
-			sym = -6
-			groups = match[:]
-			l.states = l.states[:len(l.states)-1]
-		}
-	case "Root":if match := matchString(l.s, l.p); match[1] != 0 {
-			sym = -7
-			groups = match[:]
-			l.states = append(l.states, lexerState{name: "String"})
-		}
-	case "String":if match := matchEscaped(l.s, l.p); match[1] != 0 {
-			sym = -8
-			groups = match[:]
-		} else if match := matchStringEnd(l.s, l.p); match[1] != 0 {
-			sym = -9
-			groups = match[:]
-			l.states = l.states[:len(l.states)-1]
-		} else if match := matchExpr(l.s, l.p); match[1] != 0 {
-			sym = -10
-			groups = match[:]
-			l.states = append(l.states, lexerState{name: "Expr"})
-		} else if match := matchChar(l.s, l.p); match[1] != 0 {
-			sym = -11
-			groups = match[:]
-		}
-	}
-	if groups == nil {
-		sample := []rune(l.s[l.p:])
-		if len(sample) > 16 {
-			sample = append(sample[:16], []rune("...")...)
-		}
-		return lexer.Token{}, participle.Errorf(l.pos, "invalid input text %q", sample)
-	}
-	pos := l.pos
-	span := l.s[groups[0]:groups[1]]
-	l.p = groups[1]
-	l.pos.Advance(span)
-	return lexer.Token{
-		Type:  sym,
-		Value: span,
-		Pos:   pos,
-	}, nil
-}
-
-func (l *lexerImpl) sgroups(match []int) []string {
-	sgroups := make([]string, len(match)/2)
-	for i := 0; i < len(match)-1; i += 2 {
-		sgroups[i/2] = l.s[l.p+match[i]:l.p+match[i+1]]
-	}
-	return sgroups
-}
-
-
-// "
-func matchString(s string, p int) (groups [2]int) {
-if p < len(s) && s[p] == '"' {
-groups[0] = p
-groups[1] = p + 1
-}
-return
-}
-
-// [\t-\n\f-\r ]+
-func matchWhitespace(s string, p int) (groups [2]int) {
-// [\t-\n\f-\r ] (CharClass)
-l0 := func(s string, p int) int {
-if len(s) <= p { return -1 }
-rn := s[p]
-switch {
-case rn >= '\t' && rn <= '\n': return p+1
-case rn >= '\f' && rn <= '\r': return p+1
-case rn == ' ': return p+1
-}
-return -1
-}
-// [\t-\n\f-\r ]+ (Plus)
-l1 := func(s string, p int) int {
-if p = l0(s, p); p == -1 { return -1 }
-for len(s) > p {
-if np := l0(s, p); np == -1 { return p } else { p = np }
-}
-return p
-}
-np := l1(s, p)
-if np == -1 {
-  return
-}
-groups[0] = p
-groups[1] = np
-return
-}
-
-// [%\*-\+\-/]
-func matchOper(s string, p int) (groups [2]int) {
-// [%\*-\+\-/] (CharClass)
-l0 := func(s string, p int) int {
-if len(s) <= p { return -1 }
-rn := s[p]
-switch {
-case rn == '%': return p+1
-case rn >= '*' && rn <= '+': return p+1
-case rn == '-': return p+1
-case rn == '/': return p+1
-}
-return -1
-}
-np := l0(s, p)
-if np == -1 {
-  return
-}
-groups[0] = p
-groups[1] = np
-return
-}
-
-// [0-9A-Z_a-z]+
-func matchIdent(s string, p int) (groups [2]int) {
-// [0-9A-Z_a-z] (CharClass)
-l0 := func(s string, p int) int {
-if len(s) <= p { return -1 }
-rn := s[p]
-switch {
-case rn >= '0' && rn <= '9': return p+1
-case rn >= 'A' && rn <= 'Z': return p+1
-case rn == '_': return p+1
-case rn >= 'a' && rn <= 'z': return p+1
-}
-return -1
-}
-// [0-9A-Z_a-z]+ (Plus)
-l1 := func(s string, p int) int {
-if p = l0(s, p); p == -1 { return -1 }
-for len(s) > p {
-if np := l0(s, p); np == -1 { return p } else { p = np }
-}
-return p
-}
-np := l1(s, p)
-if np == -1 {
-  return
-}
-groups[0] = p
-groups[1] = np
-return
-}
-
-// \}
-func matchExprEnd(s string, p int) (groups [2]int) {
-if p < len(s) && s[p] == '}' {
-groups[0] = p
-groups[1] = p + 1
-}
-return
-}
-
-// \\(?-s:.)
-func matchEscaped(s string, p int) (groups [2]int) {
-// \\ (Literal)
-l0 := func(s string, p int) int {
-if p < len(s) && s[p] == '\\' { return p+1 }
-return -1
-}
-// (?-s:.) (AnyCharNotNL)
-l1 := func(s string, p int) int {
-var (rn rune; n int)
-if s[p] < utf8.RuneSelf {
-  rn, n = rune(s[p]), 1
-} else {
-  rn, n = utf8.DecodeRuneInString(s[p:])
-}
-if len(s) <= p+n || rn == '\n' { return -1 }
-return p+n
-}
-// \\(?-s:.) (Concat)
-l2 := func(s string, p int) int {
-if p = l0(s, p); p == -1 { return -1 }
-if p = l1(s, p); p == -1 { return -1 }
-return p
-}
-np := l2(s, p)
-if np == -1 {
-  return
-}
-groups[0] = p
-groups[1] = np
-return
-}
-
-// "
-func matchStringEnd(s string, p int) (groups [2]int) {
-if p < len(s) && s[p] == '"' {
-groups[0] = p
-groups[1] = p + 1
-}
-return
-}
-
-// \$\{
-func matchExpr(s string, p int) (groups [2]int) {
-if p+2 < len(s) && s[p:p+2] == "${" {
-groups[0] = p
-groups[1] = p + 2
-}
-return
-}
-
-// [^"\$\\]+
-func matchChar(s string, p int) (groups [2]int) {
-// [^"\$\\] (CharClass)
-l0 := func(s string, p int) int {
-if len(s) <= p { return -1 }
-var (rn rune; n int)
-if s[p] < utf8.RuneSelf {
-  rn, n = rune(s[p]), 1
-} else {
-  rn, n = utf8.DecodeRuneInString(s[p:])
-}
-switch {
-case rn >= '\x00' && rn <= '!': return p+1
-case rn == '#': return p+1
-case rn >= '%' && rn <= '[': return p+1
-case rn >= ']' && rn <= '\U0010ffff': return p+n
-}
-return -1
-}
-// [^"\$\\]+ (Plus)
-l1 := func(s string, p int) int {
-if p = l0(s, p); p == -1 { return -1 }
-for len(s) > p {
-if np := l0(s, p); np == -1 { return p } else { p = np }
-}
-return p
-}
-np := l1(s, p)
-if np == -1 {
-  return
-}
-groups[0] = p
-groups[1] = np
-return
-}
diff --git a/lexer/internal/codegen_test.go b/lexer/internal/codegen_test.go
deleted file mode 100644
index 00d04e66..00000000
--- a/lexer/internal/codegen_test.go
+++ /dev/null
@@ -1,106 +0,0 @@
-package internal_test
-
-import (
-	"os"
-	"os/exec"
-	"strings"
-	"testing"
-	"time"
-
-	require "github.com/alecthomas/assert/v2"
-	"github.com/alecthomas/participle/v2/lexer"
-)
-
-var (
-	testInput      = `hello ${name} world what's the song that you're singing, come on get ${emotion}`
-	benchmarkInput = `"` + strings.Repeat(testInput, 1000) + `"`
-	exprLexer      = lexer.MustStateful(lexer.Rules{
-		"Root": {
-			{`String`, `"`, lexer.Push("String")},
-		},
-		"String": {
-			{"Escaped", `\\.`, nil},
-			{"StringEnd", `"`, lexer.Pop()},
-			{"Expr", `\${`, lexer.Push("Expr")},
-			{"Char", `[^$"\\]+`, nil},
-		},
-		"Expr": {
-			lexer.Include("Root"),
-			{`Whitespace`, `\s+`, nil},
-			{`Oper`, `[-+/*%]`, nil},
-			{"Ident", `\w+`, nil},
-			{"ExprEnd", `}`, lexer.Pop()},
-		},
-	})
-)
-
-func TestGenerate(t *testing.T) {
-	w, err := os.Create("codegen_gen_test.go~")
-	require.NoError(t, err)
-	defer w.Close()
-	defer os.Rename("codegen_gen_test.go~", "codegen_gen_test.go") // nolint
-	err = lexer.ExperimentalGenerateLexer(w, "internal_test", exprLexer)
-	require.NoError(t, err)
-	err = exec.Command("gofmt", "-w", "codegen_gen_test.go").Run()
-	require.NoError(t, err)
-	// cmd.Stdin = strings.NewReader(source)
-	// err = cmd.Run()
-	// require.NoError(t, err)
-}
-
-func TestIdentical(t *testing.T) {
-	lex, err := exprLexer.LexString("", `"`+testInput+`"`)
-	require.NoError(t, err)
-	expected, err := lexer.ConsumeAll(lex)
-	require.NoError(t, err)
-
-	lex, err = Lexer.Lex("", strings.NewReader(`"`+testInput+`"`))
-	require.NoError(t, err)
-	actual, err := lexer.ConsumeAll(lex)
-	require.NoError(t, err)
-
-	require.Equal(t, expected, actual)
-}
-
-func BenchmarkStatefulGenerated(b *testing.B) {
-	b.ReportAllocs()
-	slex := Lexer.(lexer.StringDefinition)
-	start := time.Now()
-	for i := 0; i < b.N; i++ {
-		lex, err := slex.LexString("", benchmarkInput)
-		if err != nil {
-			b.Fatal(err)
-		}
-		for {
-			t, err := lex.Next()
-			if err != nil {
-				b.Fatal(err)
-			}
-			if t.EOF() {
-				break
-			}
-		}
-	}
-	b.ReportMetric(float64(len(benchmarkInput)*b.N)*float64(time.Since(start)/time.Second)/1024/1024, "MiB/s")
-}
-
-func BenchmarkStatefulRegex(b *testing.B) {
-	b.ReportAllocs()
-	start := time.Now()
-	for i := 0; i < b.N; i++ {
-		lex, err := exprLexer.LexString("", benchmarkInput)
-		if err != nil {
-			b.Fatal(err)
-		}
-		for {
-			t, err := lex.Next()
-			if err != nil {
-				b.Fatal(err)
-			}
-			if t.EOF() {
-				break
-			}
-		}
-	}
-	b.ReportMetric(float64(len(benchmarkInput)*b.N)/float64(time.Since(start)/time.Second)/1024/1024, "MiB/s")
-}
diff --git a/lexer/stateful.go b/lexer/stateful.go
index d9e4837d..210b3f33 100644
--- a/lexer/stateful.go
+++ b/lexer/stateful.go
@@ -1,6 +1,7 @@
 package lexer
 
 import (
+	"encoding/json"
 	"errors"
 	"fmt"
 	"io"
@@ -21,9 +22,99 @@ type Option func(d *StatefulDefinition)
 
 // A Rule matching input and possibly changing state.
 type Rule struct {
-	Name    string
-	Pattern string
-	Action  Action
+	Name    string `json:"name"`
+	Pattern string `json:"pattern"`
+	Action  Action `json:"action"`
+}
+
+var _ json.Marshaler = &Rule{}
+var _ json.Unmarshaler = &Rule{}
+
+type jsonRule struct {
+	Name    string          `json:"name,omitempty"`
+	Pattern string          `json:"pattern,omitempty"`
+	Action  json.RawMessage `json:"action,omitempty"`
+}
+
+func (r *Rule) UnmarshalJSON(data []byte) error {
+	jrule := jsonRule{}
+	err := json.Unmarshal(data, &jrule)
+	if err != nil {
+		return err
+	}
+	r.Name = jrule.Name
+	r.Pattern = jrule.Pattern
+	jaction := struct {
+		Kind string `json:"kind"`
+	}{}
+	if jrule.Action == nil {
+		return nil
+	}
+	err = json.Unmarshal(jrule.Action, &jaction)
+	if err != nil {
+		return fmt.Errorf("could not unmarshal action %q: %w", string(jrule.Action), err)
+	}
+	var action Action
+	switch jaction.Kind {
+	case "push":
+		actual := ActionPush{}
+		if err := json.Unmarshal(jrule.Action, &actual); err != nil {
+			return err
+		}
+		action = actual
+	case "pop":
+		actual := ActionPop{}
+		if err := json.Unmarshal(jrule.Action, &actual); err != nil {
+			return err
+		}
+		action = actual
+	case "include":
+		actual := include{}
+		if err := json.Unmarshal(jrule.Action, &actual); err != nil {
+			return err
+		}
+		action = actual
+	case "":
+	default:
+		return fmt.Errorf("unknown action %q", jaction.Kind)
+	}
+	r.Action = action
+	return nil
+}
+
+func (r *Rule) MarshalJSON() ([]byte, error) {
+	jrule := jsonRule{
+		Name:    r.Name,
+		Pattern: r.Pattern,
+	}
+	if r.Action != nil {
+		actionData, err := json.Marshal(r.Action)
+		if err != nil {
+			return nil, fmt.Errorf("failed to map action: %w", err)
+		}
+		jaction := map[string]interface{}{}
+		err = json.Unmarshal(actionData, &jaction)
+		if err != nil {
+			return nil, fmt.Errorf("failed to map action: %w", err)
+		}
+		switch r.Action.(type) {
+		case nil:
+		case ActionPop:
+			jaction["kind"] = "pop"
+		case ActionPush:
+			jaction["kind"] = "push"
+		case include:
+			jaction["kind"] = "include"
+		default:
+			return nil, fmt.Errorf("unsupported action %T", r.Action)
+		}
+		actionJSON, err := json.Marshal(jaction)
+		if err != nil {
+			return nil, err
+		}
+		jrule.Action = actionJSON
+	}
+	return json.Marshal(&jrule)
 }
 
 // Rules grouped by name.
@@ -92,7 +183,9 @@ var ReturnRule = Rule{"returnToParent", "", nil}
 func Return() Rule { return ReturnRule }
 
 // ActionPush pushes the current state and switches to "State" when the Rule matches.
-type ActionPush struct{ State string }
+type ActionPush struct {
+	State string `json:"state"`
+}
 
 func (p ActionPush) applyAction(lexer *StatefulLexer, groups []string) error {
 	if groups[0] == "" {
@@ -110,16 +203,18 @@ func Push(state string) Action {
 	return ActionPush{state}
 }
 
-type include struct{ state string }
+type include struct {
+	State string `json:"state"`
+}
 
 func (i include) applyAction(lexer *StatefulLexer, groups []string) error {
 	panic("should not be called")
 }
 
 func (i include) applyRules(state string, rule int, rules compiledRules) error {
-	includedRules, ok := rules[i.state]
+	includedRules, ok := rules[i.State]
 	if !ok {
-		return fmt.Errorf("invalid include state %q", i.state)
+		return fmt.Errorf("invalid include state %q", i.State)
 	}
 	clone := make([]compiledRule, len(includedRules))
 	copy(clone, includedRules)
@@ -218,6 +313,10 @@ restart:
 	return d, nil
 }
 
+func (d *StatefulDefinition) MarshalJSON() ([]byte, error) {
+	return json.Marshal(d.rules)
+}
+
 // Rules returns the user-provided Rules used to construct the lexer.
 func (d *StatefulDefinition) Rules() Rules {
 	out := Rules{}
diff --git a/lexer/stateful_codegen_test.go b/lexer/stateful_codegen_test.go
deleted file mode 100644
index e184f6e9..00000000
--- a/lexer/stateful_codegen_test.go
+++ /dev/null
@@ -1,437 +0,0 @@
-
-// Code generated by Participle. DO NOT EDIT.
-package lexer_test
-
-import (
-	"io"
-	"strings"
-	"unicode/utf8"
-
-	"github.com/alecthomas/participle/v2"
-	"github.com/alecthomas/participle/v2/lexer"
-)
-
-var Lexer lexer.Definition = definitionImpl{}
-
-type definitionImpl struct {}
-
-func (definitionImpl) Symbols() map[string]lexer.TokenType {
-	return map[string]lexer.TokenType{
-      "Comment": -7,
-      "EOF": -1,
-      "EOL": -6,
-      "Ident": -4,
-      "Number": -3,
-      "Punct": -5,
-      "String": -2,
-      "Whitespace": -8,
-	}
-}
-
-func (definitionImpl) LexString(filename string, s string) (lexer.Lexer, error) {
-	return &lexerImpl{
-		s: s,
-		pos: lexer.Position{
-			Filename: filename,
-			Line:     1,
-			Column:   1,
-		},
-		states: []lexerState{lexerState{name: "Root"}},
-	}, nil
-}
-
-func (d definitionImpl) LexBytes(filename string, b []byte) (lexer.Lexer, error) {
-	return d.LexString(filename, string(b))
-}
-
-func (d definitionImpl) Lex(filename string, r io.Reader) (lexer.Lexer, error) {
-	s := &strings.Builder{}
-	_, err := io.Copy(s, r)
-	if err != nil {
-		return nil, err
-	}
-	return d.LexString(filename, s.String())
-}
-
-type lexerState struct {
-	name    string
-	groups  []string
-}
-
-type lexerImpl struct {
-	s       string
-	p       int
-	pos     lexer.Position
-	states  []lexerState
-}
-
-func (l *lexerImpl) Next() (lexer.Token, error) {
-	if l.p == len(l.s) {
-		return lexer.EOFToken(l.pos), nil
-	}
-	var (
-		state = l.states[len(l.states)-1]
-		groups []int
-		sym lexer.TokenType
-	)
-	switch state.name {
-	case "Root":if match := matchString(l.s, l.p); match[1] != 0 {
-			sym = -2
-			groups = match[:]
-		} else if match := matchNumber(l.s, l.p); match[1] != 0 {
-			sym = -3
-			groups = match[:]
-		} else if match := matchIdent(l.s, l.p); match[1] != 0 {
-			sym = -4
-			groups = match[:]
-		} else if match := matchPunct(l.s, l.p); match[1] != 0 {
-			sym = -5
-			groups = match[:]
-		} else if match := matchEOL(l.s, l.p); match[1] != 0 {
-			sym = -6
-			groups = match[:]
-		} else if match := matchComment(l.s, l.p); match[1] != 0 {
-			sym = -7
-			groups = match[:]
-		} else if match := matchWhitespace(l.s, l.p); match[1] != 0 {
-			sym = -8
-			groups = match[:]
-		}
-	}
-	if groups == nil {
-		sample := []rune(l.s[l.p:])
-		if len(sample) > 16 {
-			sample = append(sample[:16], []rune("...")...)
-		}
-		return lexer.Token{}, participle.Errorf(l.pos, "invalid input text %q", sample)
-	}
-	pos := l.pos
-	span := l.s[groups[0]:groups[1]]
-	l.p = groups[1]
-	l.pos.Advance(span)
-	return lexer.Token{
-		Type:  sym,
-		Value: span,
-		Pos:   pos,
-	}, nil
-}
-
-func (l *lexerImpl) sgroups(match []int) []string {
-	sgroups := make([]string, len(match)/2)
-	for i := 0; i < len(match)-1; i += 2 {
-		sgroups[i/2] = l.s[l.p+match[i]:l.p+match[i+1]]
-	}
-	return sgroups
-}
-
-
-// "(\\"|[^"])*"
-func matchString(s string, p int) (groups [4]int) {
-// " (Literal)
-l0 := func(s string, p int) int {
-if p < len(s) && s[p] == '"' { return p+1 }
-return -1
-}
-// \\" (Literal)
-l1 := func(s string, p int) int {
-if p+2 < len(s) && s[p:p+2] == "\\\"" { return p+2 }
-return -1
-}
-// [^"] (CharClass)
-l2 := func(s string, p int) int {
-if len(s) <= p { return -1 }
-var (rn rune; n int)
-if s[p] < utf8.RuneSelf {
-  rn, n = rune(s[p]), 1
-} else {
-  rn, n = utf8.DecodeRuneInString(s[p:])
-}
-switch {
-case rn >= '\x00' && rn <= '!': return p+1
-case rn >= '#' && rn <= '\U0010ffff': return p+n
-}
-return -1
-}
-// \\"|[^"] (Alternate)
-l3 := func(s string, p int) int {
-if np := l1(s, p); np != -1 { return np }
-if np := l2(s, p); np != -1 { return np }
-return -1
-}
-// (\\"|[^"]) (Capture)
-l4 := func(s string, p int) int {
-np := l3(s, p)
-if np != -1 {
-  groups[2] = p
-  groups[3] = np
-}
-return np}
-// (\\"|[^"])* (Star)
-l5 := func(s string, p int) int {
-for len(s) > p {
-if np := l4(s, p); np == -1 { return p } else { p = np }
-}
-return p
-}
-// "(\\"|[^"])*" (Concat)
-l6 := func(s string, p int) int {
-if p = l0(s, p); p == -1 { return -1 }
-if p = l5(s, p); p == -1 { return -1 }
-if p = l0(s, p); p == -1 { return -1 }
-return p
-}
-np := l6(s, p)
-if np == -1 {
-  return
-}
-groups[0] = p
-groups[1] = np
-return
-}
-
-// [\+\-]?([0-9]*\.)?[0-9]+
-func matchNumber(s string, p int) (groups [4]int) {
-// [\+\-] (CharClass)
-l0 := func(s string, p int) int {
-if len(s) <= p { return -1 }
-rn := s[p]
-switch {
-case rn == '+': return p+1
-case rn == '-': return p+1
-}
-return -1
-}
-// [\+\-]? (Quest)
-l1 := func(s string, p int) int {
-if np := l0(s, p); np != -1 { return np }
-return p
-}
-// [0-9] (CharClass)
-l2 := func(s string, p int) int {
-if len(s) <= p { return -1 }
-rn := s[p]
-switch {
-case rn >= '0' && rn <= '9': return p+1
-}
-return -1
-}
-// [0-9]* (Star)
-l3 := func(s string, p int) int {
-for len(s) > p {
-if np := l2(s, p); np == -1 { return p } else { p = np }
-}
-return p
-}
-// \. (Literal)
-l4 := func(s string, p int) int {
-if p < len(s) && s[p] == '.' { return p+1 }
-return -1
-}
-// [0-9]*\. (Concat)
-l5 := func(s string, p int) int {
-if p = l3(s, p); p == -1 { return -1 }
-if p = l4(s, p); p == -1 { return -1 }
-return p
-}
-// ([0-9]*\.) (Capture)
-l6 := func(s string, p int) int {
-np := l5(s, p)
-if np != -1 {
-  groups[2] = p
-  groups[3] = np
-}
-return np}
-// ([0-9]*\.)? (Quest)
-l7 := func(s string, p int) int {
-if np := l6(s, p); np != -1 { return np }
-return p
-}
-// [0-9]+ (Plus)
-l8 := func(s string, p int) int {
-if p = l2(s, p); p == -1 { return -1 }
-for len(s) > p {
-if np := l2(s, p); np == -1 { return p } else { p = np }
-}
-return p
-}
-// [\+\-]?([0-9]*\.)?[0-9]+ (Concat)
-l9 := func(s string, p int) int {
-if p = l1(s, p); p == -1 { return -1 }
-if p = l7(s, p); p == -1 { return -1 }
-if p = l8(s, p); p == -1 { return -1 }
-return p
-}
-np := l9(s, p)
-if np == -1 {
-  return
-}
-groups[0] = p
-groups[1] = np
-return
-}
-
-// [A-Z_a-z][0-9A-Z_a-z]*
-func matchIdent(s string, p int) (groups [2]int) {
-// [A-Z_a-z] (CharClass)
-l0 := func(s string, p int) int {
-if len(s) <= p { return -1 }
-rn := s[p]
-switch {
-case rn >= 'A' && rn <= 'Z': return p+1
-case rn == '_': return p+1
-case rn >= 'a' && rn <= 'z': return p+1
-}
-return -1
-}
-// [0-9A-Z_a-z] (CharClass)
-l1 := func(s string, p int) int {
-if len(s) <= p { return -1 }
-rn := s[p]
-switch {
-case rn >= '0' && rn <= '9': return p+1
-case rn >= 'A' && rn <= 'Z': return p+1
-case rn == '_': return p+1
-case rn >= 'a' && rn <= 'z': return p+1
-}
-return -1
-}
-// [0-9A-Z_a-z]* (Star)
-l2 := func(s string, p int) int {
-for len(s) > p {
-if np := l1(s, p); np == -1 { return p } else { p = np }
-}
-return p
-}
-// [A-Z_a-z][0-9A-Z_a-z]* (Concat)
-l3 := func(s string, p int) int {
-if p = l0(s, p); p == -1 { return -1 }
-if p = l2(s, p); p == -1 { return -1 }
-return p
-}
-np := l3(s, p)
-if np == -1 {
-  return
-}
-groups[0] = p
-groups[1] = np
-return
-}
-
-// [!-/:-@\[-`\{-~]+
-func matchPunct(s string, p int) (groups [2]int) {
-// [!-/:-@\[-`\{-~] (CharClass)
-l0 := func(s string, p int) int {
-if len(s) <= p { return -1 }
-rn := s[p]
-switch {
-case rn >= '!' && rn <= '/': return p+1
-case rn >= ':' && rn <= '@': return p+1
-case rn >= '[' && rn <= '`': return p+1
-case rn >= '{' && rn <= '~': return p+1
-}
-return -1
-}
-// [!-/:-@\[-`\{-~]+ (Plus)
-l1 := func(s string, p int) int {
-if p = l0(s, p); p == -1 { return -1 }
-for len(s) > p {
-if np := l0(s, p); np == -1 { return p } else { p = np }
-}
-return p
-}
-np := l1(s, p)
-if np == -1 {
-  return
-}
-groups[0] = p
-groups[1] = np
-return
-}
-
-// \n
-func matchEOL(s string, p int) (groups [2]int) {
-if p < len(s) && s[p] == '\n' {
-groups[0] = p
-groups[1] = p + 1
-}
-return
-}
-
-// (?i:REM)[^\n]*(?i:\n)
-func matchComment(s string, p int) (groups [2]int) {
-// (?i:REM) (Literal)
-l0 := func(s string, p int) int {
-if p+3 < len(s) && s[p:p+3] == "REM" { return p+3 }
-return -1
-}
-// [^\n] (CharClass)
-l1 := func(s string, p int) int {
-if len(s) <= p { return -1 }
-var (rn rune; n int)
-if s[p] < utf8.RuneSelf {
-  rn, n = rune(s[p]), 1
-} else {
-  rn, n = utf8.DecodeRuneInString(s[p:])
-}
-switch {
-case rn >= '\x00' && rn <= '\t': return p+1
-case rn >= '\v' && rn <= '\U0010ffff': return p+n
-}
-return -1
-}
-// [^\n]* (Star)
-l2 := func(s string, p int) int {
-for len(s) > p {
-if np := l1(s, p); np == -1 { return p } else { p = np }
-}
-return p
-}
-// (?i:\n) (Literal)
-l3 := func(s string, p int) int {
-if p < len(s) && s[p] == '\n' { return p+1 }
-return -1
-}
-// (?i:REM)[^\n]*(?i:\n) (Concat)
-l4 := func(s string, p int) int {
-if p = l0(s, p); p == -1 { return -1 }
-if p = l2(s, p); p == -1 { return -1 }
-if p = l3(s, p); p == -1 { return -1 }
-return p
-}
-np := l4(s, p)
-if np == -1 {
-  return
-}
-groups[0] = p
-groups[1] = np
-return
-}
-
-// [\t ]+
-func matchWhitespace(s string, p int) (groups [2]int) {
-// [\t ] (CharClass)
-l0 := func(s string, p int) int {
-if len(s) <= p { return -1 }
-rn := s[p]
-switch {
-case rn == '\t': return p+1
-case rn == ' ': return p+1
-}
-return -1
-}
-// [\t ]+ (Plus)
-l1 := func(s string, p int) int {
-if p = l0(s, p); p == -1 { return -1 }
-for len(s) > p {
-if np := l0(s, p); np == -1 { return p } else { p = np }
-}
-return p
-}
-np := l1(s, p)
-if np == -1 {
-  return
-}
-groups[0] = p
-groups[1] = np
-return
-}
diff --git a/lexer/stateful_test.go b/lexer/stateful_test.go
index be7b5e44..c22b5a6e 100644
--- a/lexer/stateful_test.go
+++ b/lexer/stateful_test.go
@@ -1,15 +1,15 @@
 package lexer_test
 
 import (
-	"flag"
+	"encoding/json"
 	"log"
-	"os"
 	"strings"
 	"testing"
 
 	require "github.com/alecthomas/assert/v2"
 	"github.com/alecthomas/participle/v2"
 	"github.com/alecthomas/participle/v2/lexer"
+	"github.com/alecthomas/participle/v2/lexer/internal"
 	"github.com/alecthomas/repr"
 )
 
@@ -32,6 +32,15 @@ var interpolatedRules = lexer.Rules{
 	},
 }
 
+func TestMarshalUnmarshal(t *testing.T) {
+	data, err := json.MarshalIndent(interpolatedRules, "", "  ")
+	require.NoError(t, err)
+	unmarshalledRules := lexer.Rules{}
+	err = json.Unmarshal(data, &unmarshalledRules)
+	require.NoError(t, err)
+	require.Equal(t, interpolatedRules, unmarshalledRules)
+}
+
 func TestStatefulLexer(t *testing.T) {
 	tests := []struct {
 		name   string
@@ -408,30 +417,6 @@ func BenchmarkStatefulBackrefs(b *testing.B) {
 	}
 }
 
-var generateLexer = flag.Bool("generate", false, "generate lexer")
-
-func TestGenerate(t *testing.T) {
-	if !*generateLexer {
-		return
-	}
-	def, err := lexer.New(lexer.Rules{"Root": []lexer.Rule{
-		{"String", `"(\\"|[^"])*"`, nil},
-		{"Number", `[-+]?(\d*\.)?\d+`, nil},
-		{"Ident", `[a-zA-Z_]\w*`, nil},
-		{"Punct", `[!-/:-@[-` + "`" + `{-~]+`, nil},
-		{"EOL", `\n`, nil},
-		{"Comment", `(?i)rem[^\n]*\n`, nil},
-		{"Whitespace", `[ \t]+`, nil},
-	}})
-	require.NoError(t, err)
-	w, err := os.Create("stateful_codegen_test.go~")
-	require.NoError(t, err)
-	err = lexer.ExperimentalGenerateLexer(w, "lexer_test", def)
-	require.NoError(t, err)
-	err = os.Rename("stateful_codegen_test.go~", "stateful_codegen_test.go")
-	require.NoError(t, err)
-}
-
 func basicBenchmark(b *testing.B, def lexer.Definition) {
 	b.Helper()
 	source := strings.Repeat(`
@@ -485,5 +470,5 @@ func BenchmarkStatefulBASIC(b *testing.B) {
 }
 
 func BenchmarkStatefulGeneratedBASIC(b *testing.B) {
-	basicBenchmark(b, Lexer)
+	basicBenchmark(b, internal.GeneratedBasicLexer)
 }
diff --git a/scripts/participle b/scripts/participle
new file mode 100755
index 00000000..eccdee5d
--- /dev/null
+++ b/scripts/participle
@@ -0,0 +1,4 @@
+#!/bin/bash
+set -euo pipefail
+(cd "$(dirname $0)/../cmd/participle" && go install github.com/alecthomas/participle/v2/cmd/participle)
+exec "$(go env GOBIN)/participle" "$@"
diff --git a/scripts/regen-lexer b/scripts/regen-lexer
new file mode 100755
index 00000000..8daa6cc0
--- /dev/null
+++ b/scripts/regen-lexer
@@ -0,0 +1,3 @@
+#!/bin/bash
+set -euo pipefail
+participle gen lexer --name GeneratedBasic internal < lexer/internal/basiclexer.json | gofmt > lexer/internal/basiclexer.go