Skip to content

Commit

Permalink
Add word_boundary_start and word_boundary_end Rule options (#100)
Browse files Browse the repository at this point in the history
* Remove unused MatchString method

* Add `word_boundary_start` and `word_boundary_end` options for rules

* Update example rule in README
  • Loading branch information
caitlinelfring authored Jul 20, 2021
1 parent 9eb558b commit 2812f30
Show file tree
Hide file tree
Showing 6 changed files with 118 additions and 66 deletions.
9 changes: 9 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,8 @@ rules:
note: An optional description why these terms are not inclusive. It can be optionally included in the output message.
# options:
# word_boundary: false
# word_boundary_start: false
# word_boundary_end: false
# include_note: false
```

Expand All @@ -233,6 +235,13 @@ Current options include:
- `word_boundary` (default: `false`)
- If `true`, terms will trigger findings when they are surrounded by ASCII word boundaries.
- If `false`, will trigger findings if the term if found anywhere in the line, regardless if it is within an ASCII word boundary.
- **NOTE** setting this to true will always win out over `word_boundary_start` and `word_boundary_end`
- `word_boundary_start` (default: `false`)
- If `true`, terms will trigger findings when they begin with an ASCII word boundaries.
- If `false`, will trigger findings if the term if found anywhere in the line, regardless if it begins with an ASCII word boundary.
- `word_boundary_end` (default: `false`)
- If `true`, terms will trigger findings when they end with an ASCII word boundaries.
- If `false`, will trigger findings if the term if found anywhere in the line, regardless if it ends with an ASCII word boundary.
- `include_note` (default: `not set`)
- If `true`, the rule note will be included in the output message explaining why this finding is not inclusive
- If `false`, the rule note will not be included in the output message
Expand Down
2 changes: 1 addition & 1 deletion pkg/printer/json_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,6 @@ func TestJSON_Print(t *testing.T) {
assert.NoError(t, p.Print(buf, res))
got := buf.String()

expected := `{"Filename":"foo.txt","Results":[{"Rule":{"Name":"whitelist","Terms":["whitelist","white-list","whitelisted","white-listed"],"Alternatives":["allowlist"],"Note":"","Severity":"warning","Options":{"WordBoundary":false,"IncludeNote":null}},"Finding":"whitelist","Line":"this whitelist must change","StartPosition":{"Filename":"foo.txt","Offset":0,"Line":1,"Column":6},"EndPosition":{"Filename":"foo.txt","Offset":0,"Line":1,"Column":15},"Reason":"` + "`whitelist`" + ` may be insensitive, use ` + "`allowlist`" + ` instead"}]}` + "\n"
expected := `{"Filename":"foo.txt","Results":[{"Rule":{"Name":"whitelist","Terms":["whitelist","white-list","whitelisted","white-listed"],"Alternatives":["allowlist"],"Note":"","Severity":"warning","Options":{"WordBoundary":false,"WordBoundaryStart":false,"WordBoundaryEnd":false,"IncludeNote":null}},"Finding":"whitelist","Line":"this whitelist must change","StartPosition":{"Filename":"foo.txt","Offset":0,"Line":1,"Column":6},"EndPosition":{"Filename":"foo.txt","Offset":0,"Line":1,"Column":15},"Reason":"` + "`whitelist`" + ` may be insensitive, use ` + "`allowlist`" + ` instead"}]}` + "\n"
assert.Equal(t, expected, got)
}
2 changes: 1 addition & 1 deletion pkg/result/pathresult_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ func TestMatchPathRulesBoundary(t *testing.T) {
t.Run(test.path, func(t *testing.T) {
defaultRules := rule.DefaultRules
for i := range defaultRules {
defaultRules[i].Options.WordBoundary = true
defaultRules[i].SetOptions(rule.Options{WordBoundary: true})
}
pr := MatchPathRules(defaultRules, test.path)
assert.Len(t, pr, test.matches)
Expand Down
6 changes: 4 additions & 2 deletions pkg/rule/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ package rule

// Options are options that can be configured and applied on a per-rule basis
type Options struct {
WordBoundary bool `yaml:"word_boundary"`
IncludeNote *bool `yaml:"include_note"`
WordBoundary bool `yaml:"word_boundary"`
WordBoundaryStart bool `yaml:"word_boundary_start"`
WordBoundaryEnd bool `yaml:"word_boundary_end"`
IncludeNote *bool `yaml:"include_note"`
}
66 changes: 39 additions & 27 deletions pkg/rule/rule.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ import (

var ignoreRuleRegex = regexp.MustCompile(`wokeignore:rule=(\S+)`)

const wordBoundary = `\b`

// Rule is a linter rule
type Rule struct {
Name string `yaml:"name"`
Expand All @@ -20,15 +22,7 @@ type Rule struct {
Severity Severity `yaml:"severity"`
Options Options `yaml:"options"`

reWordBoundary *regexp.Regexp
re *regexp.Regexp
}

func (r *Rule) findAllStringSubmatchIndex(text string) [][]int {
if r.Options.WordBoundary {
return r.reWordBoundary.FindAllStringSubmatchIndex(text, -1)
}
return r.re.FindAllStringSubmatchIndex(text, -1)
re *regexp.Regexp
}

// FindMatchIndexes returns the start and end indexes for all rule findings for the text supplied.
Expand All @@ -40,7 +34,7 @@ func (r *Rule) FindMatchIndexes(text string) [][]int {
r.SetRegexp()

// Remove inline ignores from text to avoid matching against other rules
matches := r.findAllStringSubmatchIndex(removeInlineIgnore(text))
matches := r.re.FindAllStringSubmatchIndex(removeInlineIgnore(text), -1)
if matches == nil {
return [][]int(nil)
}
Expand Down Expand Up @@ -72,30 +66,48 @@ func (r *Rule) FindMatchIndexes(text string) [][]int {
return idx
}

// MatchString reports whether the string s
// contains any match of the regular expression re.
func (r *Rule) MatchString(s string, wordBoundary bool) bool {
if r.Disabled() {
return false
// SetRegexp populates the regex for matching this rule.
// This is meant to be idempotent, so calling it multiple times won't update the regex
func (r *Rule) SetRegexp() {
if r.re != nil {
return
}
r.setRegex()
}

r.SetRegexp()
// SetOptions sets new Options for the Rule and updates the regex.
func (r *Rule) SetOptions(o Options) {
r.Options = o
r.setRegex()
}

func (r *Rule) setRegex() {
group := strings.Join(escape(r.Terms), "|")
r.re = regexp.MustCompile(fmt.Sprintf(r.regexString(), group))
}

if wordBoundary {
return r.reWordBoundary.MatchString(s)
func (r *Rule) regexString() string {
regex := func(start, end string) string {
s := strings.Builder{}
s.WriteString(start)
s.WriteString("(%s)")
s.WriteString(end)
return s.String()
}

return r.re.MatchString(s)
}
if r.Options.WordBoundary {
return regex(wordBoundary, wordBoundary)
}

// SetRegexp populates the regex for matching this rule
func (r *Rule) SetRegexp() {
if r.re != nil && r.reWordBoundary != nil {
return
start := ""
end := ""
if r.Options.WordBoundaryStart {
start = wordBoundary
}
group := strings.Join(escape(r.Terms), "|")
r.reWordBoundary = regexp.MustCompile(fmt.Sprintf(`(?i)\b(%s)\b`, group))
r.re = regexp.MustCompile(fmt.Sprintf(`(?i)(%s)`, group))
if r.Options.WordBoundaryEnd {
end = wordBoundary
}
return regex(start, end)
}

// Reason returns a human-readable reason for the rule finding
Expand Down
99 changes: 64 additions & 35 deletions pkg/rule/rule_test.go
Original file line number Diff line number Diff line change
@@ -1,14 +1,27 @@
package rule

import (
"fmt"
"testing"

"github.com/stretchr/testify/assert"
)

func TestRule_FindMatchIndexes(t *testing.T) {
func testRuleWithOptions(o Options) Rule {
r := testRule()
r.SetOptions(o)
return r
}

func testRule() Rule {
return Rule{
Name: "rule1",
Terms: []string{"rule1", "rule-1"},
Alternatives: []string{"alt-rule1", "alt-rule-1"},
Severity: SevWarn,
}
}

func TestRule_FindMatchIndexes(t *testing.T) {
tests := []struct {
text string
expected [][]int
Expand All @@ -20,12 +33,13 @@ func TestRule_FindMatchIndexes(t *testing.T) {
{"this string has finding with word boundary rule1rule-1", [][]int{{43, 48}, {48, 54}}, [][]int(nil)},
}
for _, test := range tests {
r := testRule()
got := r.FindMatchIndexes(test.text)
assert.Equal(t, test.expected, got)
}

r.Options.WordBoundary = true
for _, test := range tests {
r := testRuleWithOptions(Options{WordBoundary: true})
got := r.FindMatchIndexes(test.text)
assert.Equal(t, test.expectedWb, got)
}
Expand Down Expand Up @@ -53,15 +67,6 @@ func TestRule_ReasonWithNote(t *testing.T) {
assert.Equal(t, "`rule-1` may be insensitive, use `alt-rule1`, `alt-rule-1` instead (rule note here)", r.ReasonWithNote("rule-1"))
}

func testRule() Rule {
return Rule{
Name: "rule1",
Terms: []string{"rule1", "rule-1"},
Alternatives: []string{"alt-rule1", "alt-rule-1"},
Severity: SevWarn,
}
}

func TestRule_CanIgnoreLine(t *testing.T) {
r := testRule()

Expand Down Expand Up @@ -89,27 +94,6 @@ func TestRule_CanIgnoreLine(t *testing.T) {
}
}

func TestRule_MatchString(t *testing.T) {
r := testRule()
tests := []struct {
s string
wb bool
assertion assert.BoolAssertionFunc
}{
{s: "this has rule1 in the middle with word boundaries", wb: true, assertion: assert.True},
{s: "this has rule1 in the middle", wb: false, assertion: assert.True},
{s: "rule1shouldn't match with word boundaries", wb: true, assertion: assert.False},
{s: "rule1should match without word boundaries", wb: false, assertion: assert.True},
{s: "thisrule1should match without word boundaries", wb: false, assertion: assert.True},
}
for _, tt := range tests {
t.Run(tt.s, func(t *testing.T) {
fmt.Println(r.MatchString(tt.s, tt.wb), tt.s)
tt.assertion(t, r.MatchString(tt.s, tt.wb))
})
}
}

func TestRule_EmptyTerms(t *testing.T) {
r := Rule{
Name: "rule1",
Expand All @@ -126,8 +110,53 @@ func TestRule_EmptyTerms(t *testing.T) {
}
for _, tt := range tests {
t.Run(tt.s, func(t *testing.T) {
fmt.Println(r.MatchString(tt.s, tt.wb), tt.s)
tt.assertion(t, r.MatchString(tt.s, tt.wb))
r.SetOptions(Options{WordBoundary: tt.wb})
tt.assertion(t, len(r.FindMatchIndexes(tt.s)) > 0)
})
}
}

func TestRule_regexString(t *testing.T) {
tests := []struct {
desc string
rule Rule
expected string
}{
{
desc: "default",
rule: testRule(),
expected: `(%s)`,
},
{
desc: "word boundary",
rule: testRuleWithOptions(Options{WordBoundary: true}),
expected: `\b(%s)\b`,
},
{
desc: "word boundary start",
rule: testRuleWithOptions(Options{WordBoundaryStart: true}),
expected: `\b(%s)`,
},
{
desc: "word boundary end",
rule: testRuleWithOptions(Options{WordBoundaryEnd: true}),
expected: `(%s)\b`,
},
{
desc: "word boundary start and end",
rule: testRuleWithOptions(Options{WordBoundaryStart: true, WordBoundaryEnd: true}),
expected: `\b(%s)\b`,
},
{
// To show that enabling WordBoundary will win over other options
desc: "word boundary and word boundary start/end false",
rule: testRuleWithOptions(Options{WordBoundary: true, WordBoundaryStart: false, WordBoundaryEnd: false}),
expected: `\b(%s)\b`,
},
}
for _, tt := range tests {
t.Run(tt.desc, func(t *testing.T) {
assert.Equal(t, tt.expected, tt.rule.regexString())
})
}
}
Expand Down

0 comments on commit 2812f30

Please sign in to comment.