-
Notifications
You must be signed in to change notification settings - Fork 1
/
lexer.go
106 lines (86 loc) · 2.1 KB
/
lexer.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
package liquid
import (
"fmt"
"regexp"
"strings"
)
type Token struct {
name string
value string
}
const (
// tokens
tPipe = "pipe"
tDot = "dot"
tColon = "colon"
tComma = "comma"
tOpenSquare = "open_square"
tCloseSquare = "close_square"
tOpenRound = "open_round"
tCloseRound = "close_round"
tQuestion = "question"
tDash = "dash"
// sequences
tIdentifier = "id"
tSingleStringLiteral = "string"
tDoubleStringLiteral = "string"
tNumberLiteral = "number"
tDotDot = "dotdot"
tComparisonOperator = "comparison"
// magic
tEndOfString = "end_of_string"
)
// EndOfString is a
var EndOfString = Token{tEndOfString, ""}
var specialTokens = map[uint8]string{
'|': tPipe,
'.': tDot,
':': tColon,
',': tComma,
'[': tOpenSquare,
']': tCloseSquare,
'(': tOpenRound,
')': tCloseRound,
'?': tQuestion,
'-': tDash,
}
type sequence struct {
name string
regex *regexp.Regexp
}
// Types of sequences to look for, in priority order
var sequenceTypes = []sequence{
{tComparisonOperator, regexp.MustCompile(`^==|!=|<>|<=?|>=?|contains`)},
{tSingleStringLiteral, regexp.MustCompile(`^'[^\']*'`)},
{tDoubleStringLiteral, regexp.MustCompile(`^"[^\"]*"`)},
{tNumberLiteral, regexp.MustCompile(`^-?\d+(\.\d+)?`)},
{tIdentifier, regexp.MustCompile(`^[a-zA-Z_][\w-]*\??`)},
{tDotDot, regexp.MustCompile(`^\.\.`)},
}
var whitespace = regexp.MustCompile(`\s`)
// Lexer converts liquid-y strings into lexographic tokens
func Lexer(s string) ([]Token, error) {
s = strings.TrimSpace(s)
var tokens []Token
TokenLoop:
for i := 0; i < len(s); i++ {
t := s[i]
if whitespace.Match([]byte{t}) {
continue
}
for _, seq := range sequenceTypes {
if match := seq.regex.FindString(s[i:]); match != "" {
tokens = append(tokens, Token{seq.name, match})
i += len(match) - 1
continue TokenLoop
}
}
if name, ok := specialTokens[t]; ok {
tokens = append(tokens, Token{name, string(t)})
continue
}
return tokens, fmt.Errorf("Unexpected character: %v", string(t))
}
tokens = append(tokens, EndOfString)
return tokens, nil
}