Skip to content

Commit

Permalink
Merge pull request #3484 from ActiveState/DX-3032
Browse files Browse the repository at this point in the history
Add hashGlobs query to state service
  • Loading branch information
MDrakos authored Sep 13, 2024
2 parents 11c75f8 + e28c63b commit 46bf5c6
Show file tree
Hide file tree
Showing 302 changed files with 39,979 additions and 7,818 deletions.
75 changes: 75 additions & 0 deletions cmd/state-svc/internal/hash/file_hasher.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
package hash

import (
"encoding/base64"
"fmt"
"io"
"os"
"sort"
"time"

"github.com/ActiveState/cli/internal/errs"
"github.com/ActiveState/cli/internal/rtutils"
"github.com/cespare/xxhash"
"github.com/patrickmn/go-cache"
)

type fileCache interface {
Get(key string) (interface{}, bool)
Set(key string, value interface{}, expiration time.Duration)
}

type FileHasher struct {
cache fileCache
}

func NewFileHasher() *FileHasher {
return &FileHasher{
cache: cache.New(24*time.Hour, 24*time.Hour),
}
}

func (fh *FileHasher) HashFiles(files []string) (_ string, rerr error) {
sort.Strings(files)

hasher := xxhash.New()
for _, f := range files {
file, err := os.Open(f)
if err != nil {
return "", errs.Wrap(err, "Could not open file: %s", file.Name())
}
defer rtutils.Closer(file.Close, &rerr)

fileInfo, err := file.Stat()
if err != nil {
return "", errs.Wrap(err, "Could not stat file: %s", file.Name())
}

var hash string
cachedHash, ok := fh.cache.Get(cacheKey(file.Name(), fileInfo.ModTime()))
if ok {
hash, ok = cachedHash.(string)
if !ok {
return "", errs.New("Could not convert cache value to string")
}
} else {
fileHasher := xxhash.New()
if _, err := io.Copy(fileHasher, file); err != nil {
return "", errs.Wrap(err, "Could not hash file: %s", file.Name())
}

hash = fmt.Sprintf("%x", fileHasher.Sum(nil))
}

fh.cache.Set(cacheKey(file.Name(), fileInfo.ModTime()), hash, cache.NoExpiration)

// Incorporate the individual file hash into the overall hash in hex format
fmt.Fprintf(hasher, "%x", hash)
}

return base64.StdEncoding.EncodeToString(hasher.Sum(nil)), nil
}

func cacheKey(file string, modTime time.Time) string {
return fmt.Sprintf("%s-%d", file, modTime.UTC().UnixNano())
}
228 changes: 228 additions & 0 deletions cmd/state-svc/internal/hash/file_hasher_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,228 @@
package hash

import (
"os"
"testing"
"time"

"github.com/patrickmn/go-cache"
"github.com/stretchr/testify/assert"
)

type testCache struct {
cache *cache.Cache
hits []string
misses []string
}

func (tc *testCache) Get(key string) (interface{}, bool) {
val, ok := tc.cache.Get(key)
if ok {
tc.hits = append(tc.hits, key)
} else {
tc.misses = append(tc.misses, key)
}

return val, ok
}

func (tc *testCache) Set(key string, value interface{}, expiration time.Duration) {
tc.cache.Set(key, value, cache.DefaultExpiration)
}

func TestFileHasher_HashFiles(t *testing.T) {
file1 := createTempFile(t, "file1")
file2 := createTempFile(t, "file2")

hasher := NewFileHasher()

hash1, err := hasher.HashFiles([]string{file1, file2})
assert.NoError(t, err)

hash2, err := hasher.HashFiles([]string{file1, file2})
assert.NoError(t, err)

assert.Equal(t, hash1, hash2)
}

func TestFileHasher_CacheHit(t *testing.T) {
file1 := createTempFile(t, "file1")
file2 := createTempFile(t, "file2")

tc := &testCache{
cache: cache.New(cache.NoExpiration, cache.NoExpiration),
}

hasher := &FileHasher{
cache: tc,
}

hash1, err := hasher.HashFiles([]string{file1, file2})
assert.NoError(t, err)

hash2, err := hasher.HashFiles([]string{file1, file2})
assert.NoError(t, err)

assert.Equal(t, hash1, hash2)
assert.Len(t, tc.hits, 2)
assert.Len(t, tc.misses, 2)
}

func TestFileHasher_CacheMiss(t *testing.T) {
file1 := createTempFile(t, "file1")
file2 := createTempFile(t, "file2")

tc := &testCache{
cache: cache.New(cache.NoExpiration, cache.NoExpiration),
}

hasher := &FileHasher{
cache: tc,
}

hash1, err := hasher.HashFiles([]string{file1, file2})
assert.NoError(t, err)

if err := os.Chtimes(file1, time.Now(), time.Now()); err != nil {
t.Fatal(err)
}

file, err := os.Open(file1)
assert.NoError(t, err)
err = file.Sync()
assert.NoError(t, err)

hash2, err := hasher.HashFiles([]string{file1, file2})
assert.NoError(t, err)

assert.Equal(t, hash1, hash2)
assert.Len(t, tc.hits, 1)
assert.Len(t, tc.misses, 3)
}

func TestFileHasher_ContentAgnostic(t *testing.T) {
// Files have same content but different names and modification times
file1 := createTempFile(t, "file1")

// Ensure mod times are different
time.Sleep(1 * time.Millisecond)
file2 := createTempFile(t, "file1")

tc := &testCache{
cache: cache.New(cache.NoExpiration, cache.NoExpiration),
}

hasher := &FileHasher{
cache: tc,
}

hash1, err := hasher.HashFiles([]string{file1, file2})
assert.NoError(t, err)

hash2, err := hasher.HashFiles([]string{file1, file2})
assert.NoError(t, err)

assert.Equal(t, hash1, hash2)
assert.Len(t, tc.hits, 2)
assert.Len(t, tc.misses, 2)
}

func TestFileHasher_NotEqualFileAdded(t *testing.T) {
file1 := createTempFile(t, "file1")
file2 := createTempFile(t, "file2")
file3 := createTempFile(t, "file3")

tc := &testCache{
cache: cache.New(cache.NoExpiration, cache.NoExpiration),
}

hasher := &FileHasher{
cache: tc,
}

hash1, err := hasher.HashFiles([]string{file1, file2})
assert.NoError(t, err)

hash2, err := hasher.HashFiles([]string{file1, file2, file3})
assert.NoError(t, err)

assert.NotEqual(t, hash1, hash2)
assert.Len(t, tc.hits, 2)
assert.Len(t, tc.misses, 3)
}

func TestFileHasher_NotEqualFileRemoved(t *testing.T) {
file1 := createTempFile(t, "file1")
file2 := createTempFile(t, "file2")
file3 := createTempFile(t, "file3")

tc := &testCache{
cache: cache.New(cache.NoExpiration, cache.NoExpiration),
}

hasher := &FileHasher{
cache: tc,
}

hash1, err := hasher.HashFiles([]string{file1, file2, file3})
assert.NoError(t, err)

hash2, err := hasher.HashFiles([]string{file1, file2})
assert.NoError(t, err)

assert.NotEqual(t, hash1, hash2)
assert.Len(t, tc.hits, 2)
assert.Len(t, tc.misses, 3)
}

func TestFileHasher_NotEqualContentChanged(t *testing.T) {
file1 := createTempFile(t, "file1")
file2 := createTempFile(t, "file2")

tc := &testCache{
cache: cache.New(cache.NoExpiration, cache.NoExpiration),
}

hasher := &FileHasher{
cache: tc,
}

hash1, err := hasher.HashFiles([]string{file1, file2})
assert.NoError(t, err)

hash2, err := hasher.HashFiles([]string{file1, file2})
assert.NoError(t, err)

assert.Equal(t, hash1, hash2)

// Change content of file1 and ensure mod time is different to avoid a cache hit.
// The time these tests take as well as the accuracy of the file system's mod time
// resolution may cause the mod time to be the same.
time.Sleep(10 * time.Millisecond)
if err := os.WriteFile(file1, []byte("file1_changed"), 0644); err != nil {
t.Fatal(err)
}

hash2Modified, err := hasher.HashFiles([]string{file1, file2})
assert.NoError(t, err)

assert.NotEqual(t, hash1, hash2Modified)
assert.Len(t, tc.hits, 3)
assert.Len(t, tc.misses, 3)
}

func createTempFile(t *testing.T, content string) string {
tmpfile, err := os.CreateTemp("", "testfile")
if err != nil {
t.Fatal(err)
}

if _, err := tmpfile.Write([]byte(content)); err != nil {
t.Fatal(err)
}
if err := tmpfile.Close(); err != nil {
t.Fatal(err)
}

return tmpfile.Name()
}
21 changes: 21 additions & 0 deletions cmd/state-svc/internal/resolver/resolver.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,13 @@ import (
"context"
"encoding/json"
"os"
"path/filepath"
"runtime/debug"
"sort"
"strconv"
"time"

"github.com/ActiveState/cli/cmd/state-svc/internal/hash"
"github.com/ActiveState/cli/cmd/state-svc/internal/messages"
"github.com/ActiveState/cli/cmd/state-svc/internal/rtwatcher"
genserver "github.com/ActiveState/cli/cmd/state-svc/internal/server/generated"
Expand Down Expand Up @@ -36,6 +38,7 @@ type Resolver struct {
updatePoller *poller.Poller
authPoller *poller.Poller
projectIDCache *projectcache.ID
fileHasher *hash.FileHasher
an *sync.Client
anForClient *sync.Client // Use separate client for events sent through service so we don't contaminate one with the other
rtwatch *rtwatcher.Watcher
Expand Down Expand Up @@ -81,6 +84,7 @@ func New(cfg *config.Instance, an *sync.Client, auth *authentication.Auth) (*Res
pollUpdate,
pollAuth,
projectcache.NewID(),
hash.NewFileHasher(),
an,
anForClient,
rtwatcher.New(cfg, anForClient),
Expand Down Expand Up @@ -263,6 +267,8 @@ func (r *Resolver) GetProcessesInUse(ctx context.Context, execDir string) ([]*gr
}

func (r *Resolver) GetJwt(ctx context.Context) (*graph.Jwt, error) {
defer func() { handlePanics(recover(), debug.Stack()) }()

if err := r.auth.MaybeRenew(); err != nil {
return nil, errs.Wrap(err, "Could not renew auth token")
}
Expand Down Expand Up @@ -296,6 +302,21 @@ func (r *Resolver) GetJwt(ctx context.Context) (*graph.Jwt, error) {
return jwt, nil
}

func (r *Resolver) HashGlobs(ctx context.Context, globs []string) (string, error) {
defer func() { handlePanics(recover(), debug.Stack()) }()

var files []string
for _, glob := range globs {
matches, err := filepath.Glob(glob)
if err != nil {
return "", errs.Wrap(err, "Could not match glob: %s", glob)
}
files = append(files, matches...)
}

return r.fileHasher.HashFiles(files)
}

func handlePanics(recovered interface{}, stack []byte) {
if recovered != nil {
multilog.Error("Panic: %v", recovered)
Expand Down
Loading

0 comments on commit 46bf5c6

Please sign in to comment.