From 2812f3010ba7e9022f2d88295301eaaf59671d47 Mon Sep 17 00:00:00 2001
From: Caitlin Elfring <celfring@gmail.com>
Date: Mon, 19 Jul 2021 21:46:27 -0400
Subject: [PATCH] Add `word_boundary_start` and `word_boundary_end` Rule
 options (#100)

* Remove unused MatchString method

* Add `word_boundary_start` and `word_boundary_end` options for rules

* Update example rule in README
---
 README.md                     |  9 ++++
 pkg/printer/json_test.go      |  2 +-
 pkg/result/pathresult_test.go |  2 +-
 pkg/rule/options.go           |  6 ++-
 pkg/rule/rule.go              | 66 +++++++++++++----------
 pkg/rule/rule_test.go         | 99 ++++++++++++++++++++++-------------
 6 files changed, 118 insertions(+), 66 deletions(-)

diff --git a/README.md b/README.md
index ab9aa51f..89267ca6 100644
--- a/README.md
+++ b/README.md
@@ -217,6 +217,8 @@ rules:
     note: An optional description why these terms are not inclusive. It can be optionally included in the output message.
     # options:
     #   word_boundary: false
+    #   word_boundary_start: false
+    #   word_boundary_end: false
     #   include_note: false
 ```
 
@@ -233,6 +235,13 @@ Current options include:
 - `word_boundary` (default: `false`)
   - If `true`, terms will trigger findings when they are surrounded by ASCII word boundaries.
   - If `false`, will trigger findings if the term if found anywhere in the line, regardless if it is within an ASCII word boundary.
+  - **NOTE** setting this to true will always win out over `word_boundary_start` and `word_boundary_end`
+- `word_boundary_start` (default: `false`)
+  - If `true`, terms will trigger findings when they begin with an ASCII word boundaries.
+  - If `false`, will trigger findings if the term if found anywhere in the line, regardless if it begins with an ASCII word boundary.
+- `word_boundary_end` (default: `false`)
+  - If `true`, terms will trigger findings when they end with an ASCII word boundaries.
+  - If `false`, will trigger findings if the term if found anywhere in the line, regardless if it ends with an ASCII word boundary.
 - `include_note` (default: `not set`)
   - If `true`, the rule note will be included in the output message explaining why this finding is not inclusive
   - If `false`, the rule note will not be included in the output message
diff --git a/pkg/printer/json_test.go b/pkg/printer/json_test.go
index 74419573..4bc4eb7a 100644
--- a/pkg/printer/json_test.go
+++ b/pkg/printer/json_test.go
@@ -14,6 +14,6 @@ func TestJSON_Print(t *testing.T) {
 	assert.NoError(t, p.Print(buf, res))
 	got := buf.String()
 
-	expected := `{"Filename":"foo.txt","Results":[{"Rule":{"Name":"whitelist","Terms":["whitelist","white-list","whitelisted","white-listed"],"Alternatives":["allowlist"],"Note":"","Severity":"warning","Options":{"WordBoundary":false,"IncludeNote":null}},"Finding":"whitelist","Line":"this whitelist must change","StartPosition":{"Filename":"foo.txt","Offset":0,"Line":1,"Column":6},"EndPosition":{"Filename":"foo.txt","Offset":0,"Line":1,"Column":15},"Reason":"` + "`whitelist`" + ` may be insensitive, use ` + "`allowlist`" + ` instead"}]}` + "\n"
+	expected := `{"Filename":"foo.txt","Results":[{"Rule":{"Name":"whitelist","Terms":["whitelist","white-list","whitelisted","white-listed"],"Alternatives":["allowlist"],"Note":"","Severity":"warning","Options":{"WordBoundary":false,"WordBoundaryStart":false,"WordBoundaryEnd":false,"IncludeNote":null}},"Finding":"whitelist","Line":"this whitelist must change","StartPosition":{"Filename":"foo.txt","Offset":0,"Line":1,"Column":6},"EndPosition":{"Filename":"foo.txt","Offset":0,"Line":1,"Column":15},"Reason":"` + "`whitelist`" + ` may be insensitive, use ` + "`allowlist`" + ` instead"}]}` + "\n"
 	assert.Equal(t, expected, got)
 }
diff --git a/pkg/result/pathresult_test.go b/pkg/result/pathresult_test.go
index d4d26657..1a2d02d9 100644
--- a/pkg/result/pathresult_test.go
+++ b/pkg/result/pathresult_test.go
@@ -49,7 +49,7 @@ func TestMatchPathRulesBoundary(t *testing.T) {
 		t.Run(test.path, func(t *testing.T) {
 			defaultRules := rule.DefaultRules
 			for i := range defaultRules {
-				defaultRules[i].Options.WordBoundary = true
+				defaultRules[i].SetOptions(rule.Options{WordBoundary: true})
 			}
 			pr := MatchPathRules(defaultRules, test.path)
 			assert.Len(t, pr, test.matches)
diff --git a/pkg/rule/options.go b/pkg/rule/options.go
index f4e06b2d..c394f07b 100644
--- a/pkg/rule/options.go
+++ b/pkg/rule/options.go
@@ -2,6 +2,8 @@ package rule
 
 // Options are options that can be configured and applied on a per-rule basis
 type Options struct {
-	WordBoundary bool  `yaml:"word_boundary"`
-	IncludeNote  *bool `yaml:"include_note"`
+	WordBoundary      bool  `yaml:"word_boundary"`
+	WordBoundaryStart bool  `yaml:"word_boundary_start"`
+	WordBoundaryEnd   bool  `yaml:"word_boundary_end"`
+	IncludeNote       *bool `yaml:"include_note"`
 }
diff --git a/pkg/rule/rule.go b/pkg/rule/rule.go
index 4783a4a8..ec4f1914 100644
--- a/pkg/rule/rule.go
+++ b/pkg/rule/rule.go
@@ -11,6 +11,8 @@ import (
 
 var ignoreRuleRegex = regexp.MustCompile(`wokeignore:rule=(\S+)`)
 
+const wordBoundary = `\b`
+
 // Rule is a linter rule
 type Rule struct {
 	Name         string   `yaml:"name"`
@@ -20,15 +22,7 @@ type Rule struct {
 	Severity     Severity `yaml:"severity"`
 	Options      Options  `yaml:"options"`
 
-	reWordBoundary *regexp.Regexp
-	re             *regexp.Regexp
-}
-
-func (r *Rule) findAllStringSubmatchIndex(text string) [][]int {
-	if r.Options.WordBoundary {
-		return r.reWordBoundary.FindAllStringSubmatchIndex(text, -1)
-	}
-	return r.re.FindAllStringSubmatchIndex(text, -1)
+	re *regexp.Regexp
 }
 
 // FindMatchIndexes returns the start and end indexes for all rule findings for the text supplied.
@@ -40,7 +34,7 @@ func (r *Rule) FindMatchIndexes(text string) [][]int {
 	r.SetRegexp()
 
 	// Remove inline ignores from text to avoid matching against other rules
-	matches := r.findAllStringSubmatchIndex(removeInlineIgnore(text))
+	matches := r.re.FindAllStringSubmatchIndex(removeInlineIgnore(text), -1)
 	if matches == nil {
 		return [][]int(nil)
 	}
@@ -72,30 +66,48 @@ func (r *Rule) FindMatchIndexes(text string) [][]int {
 	return idx
 }
 
-// MatchString reports whether the string s
-// contains any match of the regular expression re.
-func (r *Rule) MatchString(s string, wordBoundary bool) bool {
-	if r.Disabled() {
-		return false
+// SetRegexp populates the regex for matching this rule.
+// This is meant to be idempotent, so calling it multiple times won't update the regex
+func (r *Rule) SetRegexp() {
+	if r.re != nil {
+		return
 	}
+	r.setRegex()
+}
 
-	r.SetRegexp()
+// SetOptions sets new Options for the Rule and updates the regex.
+func (r *Rule) SetOptions(o Options) {
+	r.Options = o
+	r.setRegex()
+}
+
+func (r *Rule) setRegex() {
+	group := strings.Join(escape(r.Terms), "|")
+	r.re = regexp.MustCompile(fmt.Sprintf(r.regexString(), group))
+}
 
-	if wordBoundary {
-		return r.reWordBoundary.MatchString(s)
+func (r *Rule) regexString() string {
+	regex := func(start, end string) string {
+		s := strings.Builder{}
+		s.WriteString(start)
+		s.WriteString("(%s)")
+		s.WriteString(end)
+		return s.String()
 	}
 
-	return r.re.MatchString(s)
-}
+	if r.Options.WordBoundary {
+		return regex(wordBoundary, wordBoundary)
+	}
 
-// SetRegexp populates the regex for matching this rule
-func (r *Rule) SetRegexp() {
-	if r.re != nil && r.reWordBoundary != nil {
-		return
+	start := ""
+	end := ""
+	if r.Options.WordBoundaryStart {
+		start = wordBoundary
 	}
-	group := strings.Join(escape(r.Terms), "|")
-	r.reWordBoundary = regexp.MustCompile(fmt.Sprintf(`(?i)\b(%s)\b`, group))
-	r.re = regexp.MustCompile(fmt.Sprintf(`(?i)(%s)`, group))
+	if r.Options.WordBoundaryEnd {
+		end = wordBoundary
+	}
+	return regex(start, end)
 }
 
 // Reason returns a human-readable reason for the rule finding
diff --git a/pkg/rule/rule_test.go b/pkg/rule/rule_test.go
index 029735a1..39910651 100644
--- a/pkg/rule/rule_test.go
+++ b/pkg/rule/rule_test.go
@@ -1,14 +1,27 @@
 package rule
 
 import (
-	"fmt"
 	"testing"
 
 	"github.com/stretchr/testify/assert"
 )
 
-func TestRule_FindMatchIndexes(t *testing.T) {
+func testRuleWithOptions(o Options) Rule {
 	r := testRule()
+	r.SetOptions(o)
+	return r
+}
+
+func testRule() Rule {
+	return Rule{
+		Name:         "rule1",
+		Terms:        []string{"rule1", "rule-1"},
+		Alternatives: []string{"alt-rule1", "alt-rule-1"},
+		Severity:     SevWarn,
+	}
+}
+
+func TestRule_FindMatchIndexes(t *testing.T) {
 	tests := []struct {
 		text       string
 		expected   [][]int
@@ -20,12 +33,13 @@ func TestRule_FindMatchIndexes(t *testing.T) {
 		{"this string has finding with word boundary rule1rule-1", [][]int{{43, 48}, {48, 54}}, [][]int(nil)},
 	}
 	for _, test := range tests {
+		r := testRule()
 		got := r.FindMatchIndexes(test.text)
 		assert.Equal(t, test.expected, got)
 	}
 
-	r.Options.WordBoundary = true
 	for _, test := range tests {
+		r := testRuleWithOptions(Options{WordBoundary: true})
 		got := r.FindMatchIndexes(test.text)
 		assert.Equal(t, test.expectedWb, got)
 	}
@@ -53,15 +67,6 @@ func TestRule_ReasonWithNote(t *testing.T) {
 	assert.Equal(t, "`rule-1` may be insensitive, use `alt-rule1`, `alt-rule-1` instead (rule note here)", r.ReasonWithNote("rule-1"))
 }
 
-func testRule() Rule {
-	return Rule{
-		Name:         "rule1",
-		Terms:        []string{"rule1", "rule-1"},
-		Alternatives: []string{"alt-rule1", "alt-rule-1"},
-		Severity:     SevWarn,
-	}
-}
-
 func TestRule_CanIgnoreLine(t *testing.T) {
 	r := testRule()
 
@@ -89,27 +94,6 @@ func TestRule_CanIgnoreLine(t *testing.T) {
 	}
 }
 
-func TestRule_MatchString(t *testing.T) {
-	r := testRule()
-	tests := []struct {
-		s         string
-		wb        bool
-		assertion assert.BoolAssertionFunc
-	}{
-		{s: "this has rule1 in the middle with word boundaries", wb: true, assertion: assert.True},
-		{s: "this has rule1 in the middle", wb: false, assertion: assert.True},
-		{s: "rule1shouldn't match with word boundaries", wb: true, assertion: assert.False},
-		{s: "rule1should match without word boundaries", wb: false, assertion: assert.True},
-		{s: "thisrule1should match without word boundaries", wb: false, assertion: assert.True},
-	}
-	for _, tt := range tests {
-		t.Run(tt.s, func(t *testing.T) {
-			fmt.Println(r.MatchString(tt.s, tt.wb), tt.s)
-			tt.assertion(t, r.MatchString(tt.s, tt.wb))
-		})
-	}
-}
-
 func TestRule_EmptyTerms(t *testing.T) {
 	r := Rule{
 		Name:         "rule1",
@@ -126,8 +110,53 @@ func TestRule_EmptyTerms(t *testing.T) {
 	}
 	for _, tt := range tests {
 		t.Run(tt.s, func(t *testing.T) {
-			fmt.Println(r.MatchString(tt.s, tt.wb), tt.s)
-			tt.assertion(t, r.MatchString(tt.s, tt.wb))
+			r.SetOptions(Options{WordBoundary: tt.wb})
+			tt.assertion(t, len(r.FindMatchIndexes(tt.s)) > 0)
+		})
+	}
+}
+
+func TestRule_regexString(t *testing.T) {
+	tests := []struct {
+		desc     string
+		rule     Rule
+		expected string
+	}{
+		{
+			desc:     "default",
+			rule:     testRule(),
+			expected: `(%s)`,
+		},
+		{
+			desc:     "word boundary",
+			rule:     testRuleWithOptions(Options{WordBoundary: true}),
+			expected: `\b(%s)\b`,
+		},
+		{
+			desc:     "word boundary start",
+			rule:     testRuleWithOptions(Options{WordBoundaryStart: true}),
+			expected: `\b(%s)`,
+		},
+		{
+			desc:     "word boundary end",
+			rule:     testRuleWithOptions(Options{WordBoundaryEnd: true}),
+			expected: `(%s)\b`,
+		},
+		{
+			desc:     "word boundary start and end",
+			rule:     testRuleWithOptions(Options{WordBoundaryStart: true, WordBoundaryEnd: true}),
+			expected: `\b(%s)\b`,
+		},
+		{
+			// To show that enabling WordBoundary will win over other options
+			desc:     "word boundary and word boundary start/end false",
+			rule:     testRuleWithOptions(Options{WordBoundary: true, WordBoundaryStart: false, WordBoundaryEnd: false}),
+			expected: `\b(%s)\b`,
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.desc, func(t *testing.T) {
+			assert.Equal(t, tt.expected, tt.rule.regexString())
 		})
 	}
 }