Skip to content

Commit

Permalink
Use fastwalk instead of filepath.Walk
Browse files Browse the repository at this point in the history
  • Loading branch information
caitlinelfring committed Sep 4, 2020
1 parent d929348 commit 29c592a
Show file tree
Hide file tree
Showing 7 changed files with 129 additions and 27 deletions.
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ go 1.14

require (
github.com/fatih/color v1.9.0
github.com/get-woke/fastwalk v1.0.0
github.com/get-woke/go-gitignore v1.1.1
github.com/rs/zerolog v1.19.0
github.com/spf13/cobra v1.0.0
Expand Down
3 changes: 3 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ github.com/dgryski/go-sip13 v0.0.0-20181026042036-e10d5fee7954/go.mod h1:vAd38F8
github.com/fatih/color v1.9.0 h1:8xPHl4/q1VyqGIPif1F+1V3Y3lSmrq01EabUW3CoW5s=
github.com/fatih/color v1.9.0/go.mod h1:eQcE1qtQxscV5RaZvpXrrb8Drkc3/DdQ+uUYCNjL+zU=
github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
github.com/get-woke/fastwalk v1.0.0 h1:Ti8z9nf231TUhLzBuJQV/rh/b5qiuyPunYun4FpEshQ=
github.com/get-woke/fastwalk v1.0.0/go.mod h1:WTDsePalrkZGMzdzn5YabupFqFmU/MjxGM2CSSc9sOo=
github.com/get-woke/go-gitignore v1.1.1 h1:iPdzLrxru1PsdVnm4qqncVMH+n4ePWEUIwXMDamfQqw=
github.com/get-woke/go-gitignore v1.1.1/go.mod h1:qbLKpA7cfqI6TqvUDGqE4hsYke8nuiKvmepVBFBwVh4=
github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
Expand Down Expand Up @@ -140,6 +142,7 @@ golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxb
golang.org/x/tools v0.0.0-20180221164845-07fd8470d635/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
golang.org/x/tools v0.0.0-20190828213141-aed303cbaa74 h1:4cFkmztxtMslUX2SctSl+blCyXfpzhGOy9LhKAqSMA4=
golang.org/x/tools v0.0.0-20190828213141-aed303cbaa74/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
Expand Down
35 changes: 8 additions & 27 deletions pkg/parser/parser.go
Original file line number Diff line number Diff line change
@@ -1,19 +1,16 @@
package parser

import (
"errors"
"fmt"
"os"
"path/filepath"
"sort"
"strconv"
"sync"
"time"

"github.com/get-woke/woke/pkg/ignore"
"github.com/get-woke/woke/pkg/result"
"github.com/get-woke/woke/pkg/rule"
"github.com/get-woke/woke/pkg/util"
"github.com/get-woke/woke/pkg/walker"
"github.com/rs/zerolog/log"
)

Expand Down Expand Up @@ -78,7 +75,7 @@ func (p *Parser) processViolations(paths []string) (fr []result.FileResults) {

for r := range rchan {
if r.err != nil {
log.Error().Err(r.err).Msg("filepath.Walk error")
log.Error().Err(r.err).Msg("violations error")
}

sort.Sort(r.res)
Expand Down Expand Up @@ -109,7 +106,7 @@ func (p *Parser) processFiles(files <-chan string, rchan chan _result, done chan
func (p *Parser) processViolationInPath(path string, done chan bool, rchan chan _result) {
var wg sync.WaitGroup

files, errc := p.walkDir(path, done)
files := p.walkDir(path, done)

// run parallel, but bounded
numWorkerStr := util.GetEnvDefault("WORKER_POOL_COUNT", "0")
Expand All @@ -131,24 +128,14 @@ func (p *Parser) processViolationInPath(path string, done chan bool, rchan chan
p.processFiles(files, rchan, done, &wg)
}
wg.Wait()

if err := <-errc; err != nil {
rchan <- _result{result.FileResults{}, err}
}
}

func (p *Parser) walkDir(dirname string, done chan bool) (<-chan string, <-chan error) {
func (p *Parser) walkDir(dirname string, done chan bool) <-chan string {
paths := make(chan string)
errc := make(chan error, 1)

go func() {
defer close(paths)

errc <- filepath.Walk(dirname, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}

_ = walker.Walk(dirname, func(path string, _ os.FileMode) error {
if p.Ignorer != nil && p.Ignorer.Match(path) {
return nil
}
Expand All @@ -157,18 +144,12 @@ func (p *Parser) walkDir(dirname string, done chan bool) (<-chan string, <-chan
return nil
}

select {
case paths <- path:
return nil
case <-done:
return errors.New("walk canceled")
case <-time.After(time.Second * 30):
return fmt.Errorf("walk timeout: %s", dirname)
}
paths <- path
return nil
})
}()

return paths, errc
return paths
}

// pathsIncludeStdin returns true if any element of the slice is stdin
Expand Down
10 changes: 10 additions & 0 deletions pkg/util/string.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,13 @@ import "fmt"
func MarkdownCodify(s string) string {
return fmt.Sprintf("`%s`", s)
}

// InSlice returns true if a string is found in the slice
func InSlice(s string, slice []string) bool {
for _, el := range slice {
if el == s {
return true
}
}
return false
}
20 changes: 20 additions & 0 deletions pkg/util/string_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package util

import (
"fmt"
"testing"

"github.com/stretchr/testify/assert"
Expand All @@ -9,3 +10,22 @@ import (
func TestMarkdownCodify(t *testing.T) {
assert.Equal(t, "`mystring`", MarkdownCodify("mystring"))
}

func TestInSlice(t *testing.T) {
tests := []struct {
s string
sl []string
assertion assert.BoolAssertionFunc
}{
{"foo", []string{"foo", "bar"}, assert.True},
{"bar", []string{"foo", "baz"}, assert.False},
{"", []string{"", "baz"}, assert.True},
{"", []string{"foo", "baz"}, assert.False},
{"baz", []string{}, assert.False},
}
for i, tt := range tests {
t.Run(fmt.Sprintf("%s-%d", tt.s, i), func(t *testing.T) {
tt.assertion(t, InSlice(tt.s, tt.sl))
})
}
}
28 changes: 28 additions & 0 deletions pkg/walker/walker.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
package walker

import (
"os"
"path/filepath"

"github.com/get-woke/fastwalk"
)

// Walk is a helper function that will automatically skip the `.git` directory.
// fastwalk is a fork of code that is a better, faster version of filepath.Walk.
// tl;dr since filepath.Walk get a complete FileInfo for every file,
// it's inherently slow. See https://github.com/golang/go/issues/16399
func Walk(root string, walkFn func(path string, typ os.FileMode) error) error {
return fastwalk.Walk(root, func(path string, typ os.FileMode) error {
path = filepath.Clean(path)

if typ.IsDir() && isDotGit(path) {
return filepath.SkipDir
}

return walkFn(path, typ)
})
}

func isDotGit(path string) bool {
return filepath.Base(path) == ".git"
}
59 changes: 59 additions & 0 deletions pkg/walker/walker_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
package walker

import (
"io/ioutil"
"os"
"path/filepath"
"strconv"
"testing"

"github.com/stretchr/testify/assert"
)

func TestWalker_Walk(t *testing.T) {
dir, err := ioutil.TempDir(os.TempDir(), "")
assert.NoError(t, err)
assert.DirExists(t, dir)

for i := 0; i < 10; i++ {
var newDir string
if i%2 == 0 {
newDir = filepath.Join(dir, strconv.Itoa(i))
} else {
newDir = filepath.Join(dir, ".git")
}
assert.NoError(t, os.MkdirAll(newDir, 0777))

filename := filepath.Join(newDir, ".foo")
file, err := os.Create(filename)
assert.NoError(t, err)
assert.NoError(t, file.Close())
}

defer os.RemoveAll(dir)
err = Walk(dir, func(p string, typ os.FileMode) error {
assert.False(t, isDotGit(p), "path should not be returned in walk: %s", p)
return nil
})
assert.NoError(t, err)

}

func TestInSlice(t *testing.T) {
tests := []struct {
path string
assertion assert.BoolAssertionFunc
}{
{".git", assert.True},
{".github", assert.False},
{"/foo/bar/.git", assert.True},
{"/foo/.git/bar", assert.False},
{"/foo/.github", assert.False},
{"foo/.git", assert.True},
}
for _, tt := range tests {
t.Run(tt.path, func(t *testing.T) {
tt.assertion(t, isDotGit(tt.path))
})
}
}

0 comments on commit 29c592a

Please sign in to comment.