-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathrx.go
98 lines (84 loc) · 2.36 KB
/
rx.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
// Copyright 2023 Juan Pablo Tosso and the OWASP Coraza contributors
// SPDX-License-Identifier: Apache-2.0
package wasilibs
import (
"fmt"
"strconv"
"unicode/utf8"
"github.com/corazawaf/coraza-wasilibs/internal/memoize"
"github.com/corazawaf/coraza/v3/experimental/plugins"
"github.com/corazawaf/coraza/v3/experimental/plugins/plugintypes"
"github.com/wasilibs/go-re2"
"github.com/wasilibs/go-re2/experimental"
)
type rx struct {
re *re2.Regexp
}
var _ plugintypes.Operator = (*rx)(nil)
func newRX(options plugintypes.OperatorOptions) (plugintypes.Operator, error) {
// (?sm) enables multiline mode which makes 942522-7 work, see
// - https://stackoverflow.com/a/27680233
// - https://groups.google.com/g/golang-nuts/c/jiVdamGFU9E
data := fmt.Sprintf("(?sm)%s", options.Arguments)
var re interface{}
var err error
if matchesArbitraryBytes(data) {
re, err = memoize.Do(data, func() (interface{}, error) { return experimental.CompileLatin1(data) })
} else {
re, err = memoize.Do(data, func() (interface{}, error) { return re2.Compile(data) })
}
if err != nil {
return nil, err
}
return &rx{re: re.(*re2.Regexp)}, nil
}
func (o *rx) Evaluate(tx plugintypes.TransactionState, value string) bool {
if tx.Capturing() {
match := o.re.FindStringSubmatch(value)
if len(match) == 0 {
return false
}
for i, c := range match {
if i == 9 {
return true
}
tx.CaptureField(i, c)
}
return true
} else {
return o.re.MatchString(value)
}
}
// RegisterRX registers the rx operator using a WASI implementation instead of Go.
func RegisterRX() {
plugins.RegisterOperator("rx", newRX)
}
// matchesArbitraryBytes checks for control sequences for byte matches in the expression.
// If the sequences are not valid utf8, it returns true.
func matchesArbitraryBytes(expr string) bool {
decoded := make([]byte, 0, len(expr))
for i := 0; i < len(expr); i++ {
c := expr[i]
if c != '\\' {
decoded = append(decoded, c)
continue
}
if i+3 >= len(expr) {
decoded = append(decoded, expr[i:]...)
break
}
if expr[i+1] != 'x' {
decoded = append(decoded, expr[i])
continue
}
v, mb, _, err := strconv.UnquoteChar(expr[i:], 0)
if err != nil || mb {
// Wasn't a byte escape sequence, shouldn't happen in practice.
decoded = append(decoded, expr[i])
continue
}
decoded = append(decoded, byte(v))
i += 3
}
return !utf8.Valid(decoded)
}