From 29c592a7a5b280b914294070108cc52ac2785dfa Mon Sep 17 00:00:00 2001 From: Caitlin Elfring Date: Fri, 4 Sep 2020 13:13:51 -0400 Subject: [PATCH] Use fastwalk instead of filepath.Walk --- go.mod | 1 + go.sum | 3 ++ pkg/parser/parser.go | 35 ++++++----------------- pkg/util/string.go | 10 +++++++ pkg/util/string_test.go | 20 +++++++++++++ pkg/walker/walker.go | 28 +++++++++++++++++++ pkg/walker/walker_test.go | 59 +++++++++++++++++++++++++++++++++++++++ 7 files changed, 129 insertions(+), 27 deletions(-) create mode 100644 pkg/walker/walker.go create mode 100644 pkg/walker/walker_test.go diff --git a/go.mod b/go.mod index bb931868..bc8db986 100644 --- a/go.mod +++ b/go.mod @@ -4,6 +4,7 @@ go 1.14 require ( github.com/fatih/color v1.9.0 + github.com/get-woke/fastwalk v1.0.0 github.com/get-woke/go-gitignore v1.1.1 github.com/rs/zerolog v1.19.0 github.com/spf13/cobra v1.0.0 diff --git a/go.sum b/go.sum index 73b5ea24..93599e63 100644 --- a/go.sum +++ b/go.sum @@ -22,6 +22,8 @@ github.com/dgryski/go-sip13 v0.0.0-20181026042036-e10d5fee7954/go.mod h1:vAd38F8 github.com/fatih/color v1.9.0 h1:8xPHl4/q1VyqGIPif1F+1V3Y3lSmrq01EabUW3CoW5s= github.com/fatih/color v1.9.0/go.mod h1:eQcE1qtQxscV5RaZvpXrrb8Drkc3/DdQ+uUYCNjL+zU= github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= +github.com/get-woke/fastwalk v1.0.0 h1:Ti8z9nf231TUhLzBuJQV/rh/b5qiuyPunYun4FpEshQ= +github.com/get-woke/fastwalk v1.0.0/go.mod h1:WTDsePalrkZGMzdzn5YabupFqFmU/MjxGM2CSSc9sOo= github.com/get-woke/go-gitignore v1.1.1 h1:iPdzLrxru1PsdVnm4qqncVMH+n4ePWEUIwXMDamfQqw= github.com/get-woke/go-gitignore v1.1.1/go.mod h1:qbLKpA7cfqI6TqvUDGqE4hsYke8nuiKvmepVBFBwVh4= github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= @@ -140,6 +142,7 @@ golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxb golang.org/x/tools v0.0.0-20180221164845-07fd8470d635/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= +golang.org/x/tools v0.0.0-20190828213141-aed303cbaa74 h1:4cFkmztxtMslUX2SctSl+blCyXfpzhGOy9LhKAqSMA4= golang.org/x/tools v0.0.0-20190828213141-aed303cbaa74/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= diff --git a/pkg/parser/parser.go b/pkg/parser/parser.go index 7f423f92..2516861f 100644 --- a/pkg/parser/parser.go +++ b/pkg/parser/parser.go @@ -1,19 +1,16 @@ package parser import ( - "errors" - "fmt" "os" - "path/filepath" "sort" "strconv" "sync" - "time" "github.com/get-woke/woke/pkg/ignore" "github.com/get-woke/woke/pkg/result" "github.com/get-woke/woke/pkg/rule" "github.com/get-woke/woke/pkg/util" + "github.com/get-woke/woke/pkg/walker" "github.com/rs/zerolog/log" ) @@ -78,7 +75,7 @@ func (p *Parser) processViolations(paths []string) (fr []result.FileResults) { for r := range rchan { if r.err != nil { - log.Error().Err(r.err).Msg("filepath.Walk error") + log.Error().Err(r.err).Msg("violations error") } sort.Sort(r.res) @@ -109,7 +106,7 @@ func (p *Parser) processFiles(files <-chan string, rchan chan _result, done chan func (p *Parser) processViolationInPath(path string, done chan bool, rchan chan _result) { var wg sync.WaitGroup - files, errc := p.walkDir(path, done) + files := p.walkDir(path, done) // run parallel, but bounded numWorkerStr := util.GetEnvDefault("WORKER_POOL_COUNT", "0") @@ -131,24 +128,14 @@ func (p *Parser) processViolationInPath(path string, done chan bool, rchan chan p.processFiles(files, rchan, done, &wg) } wg.Wait() - - if err := <-errc; err != nil { - rchan <- _result{result.FileResults{}, err} - } } -func (p *Parser) walkDir(dirname string, done chan bool) (<-chan string, <-chan error) { +func (p *Parser) walkDir(dirname string, done chan bool) <-chan string { paths := make(chan string) - errc := make(chan error, 1) go func() { defer close(paths) - - errc <- filepath.Walk(dirname, func(path string, info os.FileInfo, err error) error { - if err != nil { - return err - } - + _ = walker.Walk(dirname, func(path string, _ os.FileMode) error { if p.Ignorer != nil && p.Ignorer.Match(path) { return nil } @@ -157,18 +144,12 @@ func (p *Parser) walkDir(dirname string, done chan bool) (<-chan string, <-chan return nil } - select { - case paths <- path: - return nil - case <-done: - return errors.New("walk canceled") - case <-time.After(time.Second * 30): - return fmt.Errorf("walk timeout: %s", dirname) - } + paths <- path + return nil }) }() - return paths, errc + return paths } // pathsIncludeStdin returns true if any element of the slice is stdin diff --git a/pkg/util/string.go b/pkg/util/string.go index 81df1b9a..9482f87c 100644 --- a/pkg/util/string.go +++ b/pkg/util/string.go @@ -7,3 +7,13 @@ import "fmt" func MarkdownCodify(s string) string { return fmt.Sprintf("`%s`", s) } + +// InSlice returns true if a string is found in the slice +func InSlice(s string, slice []string) bool { + for _, el := range slice { + if el == s { + return true + } + } + return false +} diff --git a/pkg/util/string_test.go b/pkg/util/string_test.go index 68f57a41..6124b04e 100644 --- a/pkg/util/string_test.go +++ b/pkg/util/string_test.go @@ -1,6 +1,7 @@ package util import ( + "fmt" "testing" "github.com/stretchr/testify/assert" @@ -9,3 +10,22 @@ import ( func TestMarkdownCodify(t *testing.T) { assert.Equal(t, "`mystring`", MarkdownCodify("mystring")) } + +func TestInSlice(t *testing.T) { + tests := []struct { + s string + sl []string + assertion assert.BoolAssertionFunc + }{ + {"foo", []string{"foo", "bar"}, assert.True}, + {"bar", []string{"foo", "baz"}, assert.False}, + {"", []string{"", "baz"}, assert.True}, + {"", []string{"foo", "baz"}, assert.False}, + {"baz", []string{}, assert.False}, + } + for i, tt := range tests { + t.Run(fmt.Sprintf("%s-%d", tt.s, i), func(t *testing.T) { + tt.assertion(t, InSlice(tt.s, tt.sl)) + }) + } +} diff --git a/pkg/walker/walker.go b/pkg/walker/walker.go new file mode 100644 index 00000000..3d23232a --- /dev/null +++ b/pkg/walker/walker.go @@ -0,0 +1,28 @@ +package walker + +import ( + "os" + "path/filepath" + + "github.com/get-woke/fastwalk" +) + +// Walk is a helper function that will automatically skip the `.git` directory. +// fastwalk is a fork of code that is a better, faster version of filepath.Walk. +// tl;dr since filepath.Walk get a complete FileInfo for every file, +// it's inherently slow. See https://github.com/golang/go/issues/16399 +func Walk(root string, walkFn func(path string, typ os.FileMode) error) error { + return fastwalk.Walk(root, func(path string, typ os.FileMode) error { + path = filepath.Clean(path) + + if typ.IsDir() && isDotGit(path) { + return filepath.SkipDir + } + + return walkFn(path, typ) + }) +} + +func isDotGit(path string) bool { + return filepath.Base(path) == ".git" +} diff --git a/pkg/walker/walker_test.go b/pkg/walker/walker_test.go new file mode 100644 index 00000000..1ac3b884 --- /dev/null +++ b/pkg/walker/walker_test.go @@ -0,0 +1,59 @@ +package walker + +import ( + "io/ioutil" + "os" + "path/filepath" + "strconv" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestWalker_Walk(t *testing.T) { + dir, err := ioutil.TempDir(os.TempDir(), "") + assert.NoError(t, err) + assert.DirExists(t, dir) + + for i := 0; i < 10; i++ { + var newDir string + if i%2 == 0 { + newDir = filepath.Join(dir, strconv.Itoa(i)) + } else { + newDir = filepath.Join(dir, ".git") + } + assert.NoError(t, os.MkdirAll(newDir, 0777)) + + filename := filepath.Join(newDir, ".foo") + file, err := os.Create(filename) + assert.NoError(t, err) + assert.NoError(t, file.Close()) + } + + defer os.RemoveAll(dir) + err = Walk(dir, func(p string, typ os.FileMode) error { + assert.False(t, isDotGit(p), "path should not be returned in walk: %s", p) + return nil + }) + assert.NoError(t, err) + +} + +func TestInSlice(t *testing.T) { + tests := []struct { + path string + assertion assert.BoolAssertionFunc + }{ + {".git", assert.True}, + {".github", assert.False}, + {"/foo/bar/.git", assert.True}, + {"/foo/.git/bar", assert.False}, + {"/foo/.github", assert.False}, + {"foo/.git", assert.True}, + } + for _, tt := range tests { + t.Run(tt.path, func(t *testing.T) { + tt.assertion(t, isDotGit(tt.path)) + }) + } +}