From 2812f3010ba7e9022f2d88295301eaaf59671d47 Mon Sep 17 00:00:00 2001 From: Caitlin Elfring Date: Mon, 19 Jul 2021 21:46:27 -0400 Subject: [PATCH] Add `word_boundary_start` and `word_boundary_end` Rule options (#100) * Remove unused MatchString method * Add `word_boundary_start` and `word_boundary_end` options for rules * Update example rule in README --- README.md | 9 ++++ pkg/printer/json_test.go | 2 +- pkg/result/pathresult_test.go | 2 +- pkg/rule/options.go | 6 ++- pkg/rule/rule.go | 66 +++++++++++++---------- pkg/rule/rule_test.go | 99 ++++++++++++++++++++++------------- 6 files changed, 118 insertions(+), 66 deletions(-) diff --git a/README.md b/README.md index ab9aa51f..89267ca6 100644 --- a/README.md +++ b/README.md @@ -217,6 +217,8 @@ rules: note: An optional description why these terms are not inclusive. It can be optionally included in the output message. # options: # word_boundary: false + # word_boundary_start: false + # word_boundary_end: false # include_note: false ``` @@ -233,6 +235,13 @@ Current options include: - `word_boundary` (default: `false`) - If `true`, terms will trigger findings when they are surrounded by ASCII word boundaries. - If `false`, will trigger findings if the term if found anywhere in the line, regardless if it is within an ASCII word boundary. + - **NOTE** setting this to true will always win out over `word_boundary_start` and `word_boundary_end` +- `word_boundary_start` (default: `false`) + - If `true`, terms will trigger findings when they begin with an ASCII word boundaries. + - If `false`, will trigger findings if the term if found anywhere in the line, regardless if it begins with an ASCII word boundary. +- `word_boundary_end` (default: `false`) + - If `true`, terms will trigger findings when they end with an ASCII word boundaries. + - If `false`, will trigger findings if the term if found anywhere in the line, regardless if it ends with an ASCII word boundary. - `include_note` (default: `not set`) - If `true`, the rule note will be included in the output message explaining why this finding is not inclusive - If `false`, the rule note will not be included in the output message diff --git a/pkg/printer/json_test.go b/pkg/printer/json_test.go index 74419573..4bc4eb7a 100644 --- a/pkg/printer/json_test.go +++ b/pkg/printer/json_test.go @@ -14,6 +14,6 @@ func TestJSON_Print(t *testing.T) { assert.NoError(t, p.Print(buf, res)) got := buf.String() - expected := `{"Filename":"foo.txt","Results":[{"Rule":{"Name":"whitelist","Terms":["whitelist","white-list","whitelisted","white-listed"],"Alternatives":["allowlist"],"Note":"","Severity":"warning","Options":{"WordBoundary":false,"IncludeNote":null}},"Finding":"whitelist","Line":"this whitelist must change","StartPosition":{"Filename":"foo.txt","Offset":0,"Line":1,"Column":6},"EndPosition":{"Filename":"foo.txt","Offset":0,"Line":1,"Column":15},"Reason":"` + "`whitelist`" + ` may be insensitive, use ` + "`allowlist`" + ` instead"}]}` + "\n" + expected := `{"Filename":"foo.txt","Results":[{"Rule":{"Name":"whitelist","Terms":["whitelist","white-list","whitelisted","white-listed"],"Alternatives":["allowlist"],"Note":"","Severity":"warning","Options":{"WordBoundary":false,"WordBoundaryStart":false,"WordBoundaryEnd":false,"IncludeNote":null}},"Finding":"whitelist","Line":"this whitelist must change","StartPosition":{"Filename":"foo.txt","Offset":0,"Line":1,"Column":6},"EndPosition":{"Filename":"foo.txt","Offset":0,"Line":1,"Column":15},"Reason":"` + "`whitelist`" + ` may be insensitive, use ` + "`allowlist`" + ` instead"}]}` + "\n" assert.Equal(t, expected, got) } diff --git a/pkg/result/pathresult_test.go b/pkg/result/pathresult_test.go index d4d26657..1a2d02d9 100644 --- a/pkg/result/pathresult_test.go +++ b/pkg/result/pathresult_test.go @@ -49,7 +49,7 @@ func TestMatchPathRulesBoundary(t *testing.T) { t.Run(test.path, func(t *testing.T) { defaultRules := rule.DefaultRules for i := range defaultRules { - defaultRules[i].Options.WordBoundary = true + defaultRules[i].SetOptions(rule.Options{WordBoundary: true}) } pr := MatchPathRules(defaultRules, test.path) assert.Len(t, pr, test.matches) diff --git a/pkg/rule/options.go b/pkg/rule/options.go index f4e06b2d..c394f07b 100644 --- a/pkg/rule/options.go +++ b/pkg/rule/options.go @@ -2,6 +2,8 @@ package rule // Options are options that can be configured and applied on a per-rule basis type Options struct { - WordBoundary bool `yaml:"word_boundary"` - IncludeNote *bool `yaml:"include_note"` + WordBoundary bool `yaml:"word_boundary"` + WordBoundaryStart bool `yaml:"word_boundary_start"` + WordBoundaryEnd bool `yaml:"word_boundary_end"` + IncludeNote *bool `yaml:"include_note"` } diff --git a/pkg/rule/rule.go b/pkg/rule/rule.go index 4783a4a8..ec4f1914 100644 --- a/pkg/rule/rule.go +++ b/pkg/rule/rule.go @@ -11,6 +11,8 @@ import ( var ignoreRuleRegex = regexp.MustCompile(`wokeignore:rule=(\S+)`) +const wordBoundary = `\b` + // Rule is a linter rule type Rule struct { Name string `yaml:"name"` @@ -20,15 +22,7 @@ type Rule struct { Severity Severity `yaml:"severity"` Options Options `yaml:"options"` - reWordBoundary *regexp.Regexp - re *regexp.Regexp -} - -func (r *Rule) findAllStringSubmatchIndex(text string) [][]int { - if r.Options.WordBoundary { - return r.reWordBoundary.FindAllStringSubmatchIndex(text, -1) - } - return r.re.FindAllStringSubmatchIndex(text, -1) + re *regexp.Regexp } // FindMatchIndexes returns the start and end indexes for all rule findings for the text supplied. @@ -40,7 +34,7 @@ func (r *Rule) FindMatchIndexes(text string) [][]int { r.SetRegexp() // Remove inline ignores from text to avoid matching against other rules - matches := r.findAllStringSubmatchIndex(removeInlineIgnore(text)) + matches := r.re.FindAllStringSubmatchIndex(removeInlineIgnore(text), -1) if matches == nil { return [][]int(nil) } @@ -72,30 +66,48 @@ func (r *Rule) FindMatchIndexes(text string) [][]int { return idx } -// MatchString reports whether the string s -// contains any match of the regular expression re. -func (r *Rule) MatchString(s string, wordBoundary bool) bool { - if r.Disabled() { - return false +// SetRegexp populates the regex for matching this rule. +// This is meant to be idempotent, so calling it multiple times won't update the regex +func (r *Rule) SetRegexp() { + if r.re != nil { + return } + r.setRegex() +} - r.SetRegexp() +// SetOptions sets new Options for the Rule and updates the regex. +func (r *Rule) SetOptions(o Options) { + r.Options = o + r.setRegex() +} + +func (r *Rule) setRegex() { + group := strings.Join(escape(r.Terms), "|") + r.re = regexp.MustCompile(fmt.Sprintf(r.regexString(), group)) +} - if wordBoundary { - return r.reWordBoundary.MatchString(s) +func (r *Rule) regexString() string { + regex := func(start, end string) string { + s := strings.Builder{} + s.WriteString(start) + s.WriteString("(%s)") + s.WriteString(end) + return s.String() } - return r.re.MatchString(s) -} + if r.Options.WordBoundary { + return regex(wordBoundary, wordBoundary) + } -// SetRegexp populates the regex for matching this rule -func (r *Rule) SetRegexp() { - if r.re != nil && r.reWordBoundary != nil { - return + start := "" + end := "" + if r.Options.WordBoundaryStart { + start = wordBoundary } - group := strings.Join(escape(r.Terms), "|") - r.reWordBoundary = regexp.MustCompile(fmt.Sprintf(`(?i)\b(%s)\b`, group)) - r.re = regexp.MustCompile(fmt.Sprintf(`(?i)(%s)`, group)) + if r.Options.WordBoundaryEnd { + end = wordBoundary + } + return regex(start, end) } // Reason returns a human-readable reason for the rule finding diff --git a/pkg/rule/rule_test.go b/pkg/rule/rule_test.go index 029735a1..39910651 100644 --- a/pkg/rule/rule_test.go +++ b/pkg/rule/rule_test.go @@ -1,14 +1,27 @@ package rule import ( - "fmt" "testing" "github.com/stretchr/testify/assert" ) -func TestRule_FindMatchIndexes(t *testing.T) { +func testRuleWithOptions(o Options) Rule { r := testRule() + r.SetOptions(o) + return r +} + +func testRule() Rule { + return Rule{ + Name: "rule1", + Terms: []string{"rule1", "rule-1"}, + Alternatives: []string{"alt-rule1", "alt-rule-1"}, + Severity: SevWarn, + } +} + +func TestRule_FindMatchIndexes(t *testing.T) { tests := []struct { text string expected [][]int @@ -20,12 +33,13 @@ func TestRule_FindMatchIndexes(t *testing.T) { {"this string has finding with word boundary rule1rule-1", [][]int{{43, 48}, {48, 54}}, [][]int(nil)}, } for _, test := range tests { + r := testRule() got := r.FindMatchIndexes(test.text) assert.Equal(t, test.expected, got) } - r.Options.WordBoundary = true for _, test := range tests { + r := testRuleWithOptions(Options{WordBoundary: true}) got := r.FindMatchIndexes(test.text) assert.Equal(t, test.expectedWb, got) } @@ -53,15 +67,6 @@ func TestRule_ReasonWithNote(t *testing.T) { assert.Equal(t, "`rule-1` may be insensitive, use `alt-rule1`, `alt-rule-1` instead (rule note here)", r.ReasonWithNote("rule-1")) } -func testRule() Rule { - return Rule{ - Name: "rule1", - Terms: []string{"rule1", "rule-1"}, - Alternatives: []string{"alt-rule1", "alt-rule-1"}, - Severity: SevWarn, - } -} - func TestRule_CanIgnoreLine(t *testing.T) { r := testRule() @@ -89,27 +94,6 @@ func TestRule_CanIgnoreLine(t *testing.T) { } } -func TestRule_MatchString(t *testing.T) { - r := testRule() - tests := []struct { - s string - wb bool - assertion assert.BoolAssertionFunc - }{ - {s: "this has rule1 in the middle with word boundaries", wb: true, assertion: assert.True}, - {s: "this has rule1 in the middle", wb: false, assertion: assert.True}, - {s: "rule1shouldn't match with word boundaries", wb: true, assertion: assert.False}, - {s: "rule1should match without word boundaries", wb: false, assertion: assert.True}, - {s: "thisrule1should match without word boundaries", wb: false, assertion: assert.True}, - } - for _, tt := range tests { - t.Run(tt.s, func(t *testing.T) { - fmt.Println(r.MatchString(tt.s, tt.wb), tt.s) - tt.assertion(t, r.MatchString(tt.s, tt.wb)) - }) - } -} - func TestRule_EmptyTerms(t *testing.T) { r := Rule{ Name: "rule1", @@ -126,8 +110,53 @@ func TestRule_EmptyTerms(t *testing.T) { } for _, tt := range tests { t.Run(tt.s, func(t *testing.T) { - fmt.Println(r.MatchString(tt.s, tt.wb), tt.s) - tt.assertion(t, r.MatchString(tt.s, tt.wb)) + r.SetOptions(Options{WordBoundary: tt.wb}) + tt.assertion(t, len(r.FindMatchIndexes(tt.s)) > 0) + }) + } +} + +func TestRule_regexString(t *testing.T) { + tests := []struct { + desc string + rule Rule + expected string + }{ + { + desc: "default", + rule: testRule(), + expected: `(%s)`, + }, + { + desc: "word boundary", + rule: testRuleWithOptions(Options{WordBoundary: true}), + expected: `\b(%s)\b`, + }, + { + desc: "word boundary start", + rule: testRuleWithOptions(Options{WordBoundaryStart: true}), + expected: `\b(%s)`, + }, + { + desc: "word boundary end", + rule: testRuleWithOptions(Options{WordBoundaryEnd: true}), + expected: `(%s)\b`, + }, + { + desc: "word boundary start and end", + rule: testRuleWithOptions(Options{WordBoundaryStart: true, WordBoundaryEnd: true}), + expected: `\b(%s)\b`, + }, + { + // To show that enabling WordBoundary will win over other options + desc: "word boundary and word boundary start/end false", + rule: testRuleWithOptions(Options{WordBoundary: true, WordBoundaryStart: false, WordBoundaryEnd: false}), + expected: `\b(%s)\b`, + }, + } + for _, tt := range tests { + t.Run(tt.desc, func(t *testing.T) { + assert.Equal(t, tt.expected, tt.rule.regexString()) }) } }