From c86fd4d47f3ea7f790e103dcd9938086d324e0fa Mon Sep 17 00:00:00 2001 From: Sander Stokseth Skjulsvik Date: Sun, 26 May 2024 20:08:05 +0200 Subject: [PATCH] feat: comparedirs multithread (#19) Added compare dir with support for multithread. And refactored to mostly implement a strategy pattern * fixed test status badge * Selecting run method with passing runfunc around. This is a start of a strategy pattern * Implemented strategi pattern for dupes (I think) * Implemented strategi pattern for compareDirs * Moved func selection to cli.go * runComparison now actually uses compFunc method, and we are adding paths to comparator struct * Selecting the correct compare dirs functions for testing * Added some error handling for cli unput * moved is file to files, and added func AddBarDirSize --------- Co-authored-by: sander skjulsvik --- MAKEFILE | 2 + README.md | 3 +- dupes/lib/common/{main.go => files.go} | 15 --- .../common/{main_test.go => files_test.go} | 0 dupes/lib/common/run.go | 24 +++++ dupes/lib/producerConsumer/main.go | 28 +++--- dupes/lib/producerConsumer/main_test.go | 9 +- dupes/lib/singleThread/main.go | 31 +----- dupes/lib/test/run.go | 5 +- dupes/main.go | 56 +++++++++-- dupesCompareDirs/lib/cli.go | 61 +++++++++++- dupesCompareDirs/lib/lib.go | 95 ++++++++++++------- dupesCompareDirs/lib/lib_test.go | 60 ++++++------ dupesCompareDirs/main.go | 60 +++++++----- libs/files/lib.go | 20 ++-- libs/progressbar/progress.go | 32 +++++++ 16 files changed, 331 insertions(+), 170 deletions(-) rename dupes/lib/common/{main.go => files.go} (61%) rename dupes/lib/common/{main_test.go => files_test.go} (100%) create mode 100644 dupes/lib/common/run.go diff --git a/MAKEFILE b/MAKEFILE index c3de4b5..646ac39 100644 --- a/MAKEFILE +++ b/MAKEFILE @@ -4,6 +4,8 @@ actions: win-build: go build -o bin\\ .\\... +win-test: + go test .\\... win-deps: choco install golangci-lint act-cli diff --git a/README.md b/README.md index 9fc83fb..98a7cbe 100644 --- a/README.md +++ b/README.md @@ -3,5 +3,4 @@ ## Statuses - -[!Test Status](https://github.com/sander-skjulsvik/tools/actions/workflows/test.yml/badge.svg) +![Test Status](https://github.com/sander-skjulsvik/tools/actions/workflows/test.yml/badge.svg) diff --git a/dupes/lib/common/main.go b/dupes/lib/common/files.go similarity index 61% rename from dupes/lib/common/main.go rename to dupes/lib/common/files.go index 368a8cf..8702333 100644 --- a/dupes/lib/common/main.go +++ b/dupes/lib/common/files.go @@ -8,16 +8,8 @@ import ( "fmt" "io" "os" - - "github.com/sander-skjulsvik/tools/libs/progressbar" ) -// Run is the main function to run for consumers of this lib. -// First arg is the path to the folder, -type Run func(string) *Dupes - -type RunWithProgressBar func(string, *progressbar.ProgressBar) *Dupes - type File struct { Path string Hash string @@ -27,13 +19,6 @@ func HashString(b []byte) string { return hex.EncodeToString(b) } -func IsFile(f os.FileInfo) bool { - if f == nil { - panic(fmt.Errorf("file info is nil")) - } - return f.Mode().IsRegular() -} - func HashFile(path string) (string, error) { f, err := os.Open(path) if err != nil { diff --git a/dupes/lib/common/main_test.go b/dupes/lib/common/files_test.go similarity index 100% rename from dupes/lib/common/main_test.go rename to dupes/lib/common/files_test.go diff --git a/dupes/lib/common/run.go b/dupes/lib/common/run.go new file mode 100644 index 0000000..32f68fe --- /dev/null +++ b/dupes/lib/common/run.go @@ -0,0 +1,24 @@ +package common + +import "github.com/sander-skjulsvik/tools/libs/progressbar" + +// Run is the main function to run for consumers of this lib. +// First arg is the path to the folder, +type Run func(string, progressbar.ProgressBar) *Dupes + +type Runner struct { + RunFunc Run + ProgressBar progressbar.ProgressBar + OutputJson bool +} + +func NewRunner(runFunc Run, bar progressbar.ProgressBar) *Runner { + return &Runner{ + RunFunc: runFunc, + ProgressBar: bar, + } +} + +func (r *Runner) Run(path string) *Dupes { + return r.RunFunc(path, r.ProgressBar) +} diff --git a/dupes/lib/producerConsumer/main.go b/dupes/lib/producerConsumer/main.go index 02a6933..27cb171 100644 --- a/dupes/lib/producerConsumer/main.go +++ b/dupes/lib/producerConsumer/main.go @@ -7,9 +7,10 @@ import ( "sync" "github.com/sander-skjulsvik/tools/dupes/lib/common" + "github.com/sander-skjulsvik/tools/libs/progressbar" ) -// Works like a generator, yelding all regular files +// Works like a generator, yielding all regular files func getFiles(root string, filePaths chan<- string) { filepath.Walk(root, func(path string, info fs.FileInfo, err error) error { if err != nil { @@ -37,17 +38,16 @@ func appendFileTreadSafe(dupes *common.Dupes, path string, lock *sync.Mutex) { // dupes.ProgressBar.Add1() } -func ProcessFiles(filePaths <-chan string) *common.Dupes { +func ProcessFiles(filePaths <-chan string, bar progressbar.ProgressBar) *common.Dupes { dupes := common.NewDupes() wg := sync.WaitGroup{} dupesWl := sync.Mutex{} - // if chans.IsClosed(filePaths) { - // log.Fatalln("Chan closed before managed to access it 1") - // } + for filePath := range filePaths { wg.Add(1) go func(fp string) { appendFileTreadSafe(&dupes, fp, &dupesWl) + bar.AddFileSize(filePath) wg.Done() }(filePath) } @@ -55,7 +55,7 @@ func ProcessFiles(filePaths <-chan string) *common.Dupes { return &dupes } -func ProcessFilesNCunsumers(filePaths <-chan string, numberOfConsumers int, doneWg *sync.WaitGroup) *common.Dupes { +func ProcessFilesNConsumers(filePaths <-chan string, numberOfConsumers int, bar progressbar.ProgressBar) *common.Dupes { dupes := common.NewDupes() wg := sync.WaitGroup{} dupesWl := sync.Mutex{} @@ -64,20 +64,26 @@ func ProcessFilesNCunsumers(filePaths <-chan string, numberOfConsumers int, done go func() { for filePath := range filePaths { appendFileTreadSafe(&dupes, filePath, &dupesWl) + bar.AddFileSize(filePath) } wg.Done() }() } wg.Wait() - doneWg.Done() return &dupes } -func Run(path string) *common.Dupes { +func Run(path string, bar progressbar.ProgressBar) *common.Dupes { filePaths := make(chan string) go getFiles(path, filePaths) - // sleep 10 seconds - dupes := ProcessFiles(filePaths) - // storer(files) + dupes := ProcessFiles(filePaths, bar) return dupes } + +func GetRunNThreads(n int) common.Run { + return func(path string, bar progressbar.ProgressBar) *common.Dupes { + filePaths := make(chan string) + go getFiles(path, filePaths) + return ProcessFilesNConsumers(filePaths, n, bar) + } +} diff --git a/dupes/lib/producerConsumer/main_test.go b/dupes/lib/producerConsumer/main_test.go index f37eeaa..3e21528 100644 --- a/dupes/lib/producerConsumer/main_test.go +++ b/dupes/lib/producerConsumer/main_test.go @@ -11,6 +11,7 @@ import ( "github.com/sander-skjulsvik/tools/dupes/lib/common" "github.com/sander-skjulsvik/tools/dupes/lib/test" + "github.com/sander-skjulsvik/tools/libs/progressbar" "gotest.tools/assert" ) @@ -275,7 +276,7 @@ func TestProcessFiles(t *testing.T) { var d *common.Dupes wg.Add(1) go func() { - d = ProcessFiles(filePaths) + d = ProcessFiles(filePaths, progressbar.ProgressBarMoc{}) wg.Done() }() filePaths <- filepath.Clean(path) @@ -331,7 +332,7 @@ func TestProcessFiles(t *testing.T) { var d *common.Dupes wg.Add(1) go func() { - d = ProcessFiles(filePaths) + d = ProcessFiles(filePaths, progressbar.ProgressBarMoc{}) wg.Done() }() wgAdd := sync.WaitGroup{} @@ -392,7 +393,7 @@ func TestProcessFilesNConsumers(t *testing.T) { doneWg := sync.WaitGroup{} doneWg.Add(1) go func() { - d = ProcessFilesNCunsumers(filePaths, 3, &doneWg) + d = ProcessFilesNConsumers(filePaths, 3, progressbar.ProgressBarMoc{}) wg.Done() }() filePaths <- filepath.Clean(path) @@ -450,7 +451,7 @@ func TestProcessFilesNConsumers(t *testing.T) { doneWg := sync.WaitGroup{} doneWg.Add(1) go func() { - d = ProcessFilesNCunsumers(filePaths, 3, &doneWg) + d = ProcessFilesNConsumers(filePaths, 3, progressbar.ProgressBarMoc{}) wg.Done() }() wgAdd := sync.WaitGroup{} diff --git a/dupes/lib/singleThread/main.go b/dupes/lib/singleThread/main.go index b076286..bd0e827 100644 --- a/dupes/lib/singleThread/main.go +++ b/dupes/lib/singleThread/main.go @@ -6,42 +6,17 @@ import ( "path/filepath" "github.com/sander-skjulsvik/tools/dupes/lib/common" + "github.com/sander-skjulsvik/tools/libs/files" "github.com/sander-skjulsvik/tools/libs/progressbar" ) -func Run(src string) *common.Dupes { +func Run(src string, bar progressbar.ProgressBar) *common.Dupes { dupes := &common.Dupes{ D: map[string]*common.Dupe{}, - // ProgressBar: common.NewSchollzProgressbar(), } err := filepath.Walk(src, func(path string, info fs.FileInfo, err error) error { - isFile := common.IsFile(info) - if !isFile { - return nil - } - - dupes, err = dupes.Append(path) - if err != nil { - return nil - } - - return nil - }) - if err != nil { - log.Fatalf("Failed to walk src: %s, with err: %s", src, err.Error()) - } - return dupes -} - -func RunWithProgressBar(src string, bar progressbar.ProgressBar) *common.Dupes { - dupes := &common.Dupes{ - D: map[string]*common.Dupe{}, - // ProgressBar: common.NewSchollzProgressbar(), - } - - err := filepath.Walk(src, func(path string, info fs.FileInfo, err error) error { - isFile := common.IsFile(info) + isFile := files.IsFile(info) if !isFile { return nil } diff --git a/dupes/lib/test/run.go b/dupes/lib/test/run.go index 54d714a..0102b62 100644 --- a/dupes/lib/test/run.go +++ b/dupes/lib/test/run.go @@ -9,6 +9,7 @@ import ( "github.com/sander-skjulsvik/tools/dupes/lib/common" "github.com/sander-skjulsvik/tools/libs/collections" + "github.com/sander-skjulsvik/tools/libs/progressbar" ) func TestRun(path string, run common.Run, t *testing.T) { @@ -17,7 +18,7 @@ func TestRun(path string, run common.Run, t *testing.T) { // Setup the expected dupes SetupExpectedDupes(path) // Run the run function to find the dupes - calculatedDupes := run(path) + calculatedDupes := run(path, progressbar.ProgressBarMoc{}) // Check if the expected dupes are found CheckExpectedDupes(GetExpectedDupes(path), *calculatedDupes, t) } @@ -65,7 +66,7 @@ func TestRunManyFiles(path string, run common.Run, t *testing.T) { GenerateNestedStructure(baseDir, numLevels, numFoldersPerLevel, numFilesPerFolder, content) fmt.Println("Nested folder structure generated successfully.") - run(baseDir) + run(baseDir, progressbar.ProgressBarMoc{}) fmt.Printf("Done running! \n") } diff --git a/dupes/main.go b/dupes/main.go index 8c39799..fac9df5 100644 --- a/dupes/main.go +++ b/dupes/main.go @@ -9,6 +9,7 @@ import ( "github.com/sander-skjulsvik/tools/dupes/lib/common" producerConsumer "github.com/sander-skjulsvik/tools/dupes/lib/producerConsumer" singleThread "github.com/sander-skjulsvik/tools/dupes/lib/singleThread" + "github.com/sander-skjulsvik/tools/libs/progressbar" ) func main() { @@ -16,11 +17,17 @@ func main() { method string path string presentOnlyDupes bool + useProgressBar bool + presentJson bool + nThreads int ) - flag.StringVar(&method, "method", "single", "Method (single or producerConsumer)") + flag.StringVar(&method, "method", "single", "Method (single, producerConsumer or nThreads)") + flag.IntVar(&nThreads, "nThreads", 0, "Number of threads to use, ignored unless nThreads method is chosen") flag.StringVar(&path, "path", ".", "File path") flag.BoolVar(&presentOnlyDupes, "onlyDupes", true, "Only present dupes") + flag.BoolVar(&presentJson, "json", false, "present json") + flag.BoolVar(&useProgressBar, "progressBar", false, "Present a progress bar?") // Parse the command-line arguments flag.Parse() @@ -39,16 +46,45 @@ func main() { fmt.Printf("Path: %s\n", path) fmt.Printf("PresentOnlyDupes: %t\n", presentOnlyDupes) - Run(path, method, presentOnlyDupes) -} - -func Run(path, method string, presentOnlyDupes bool) { - var dupes *common.Dupes + var runFunc common.Run switch { case method == "single": - dupes = singleThread.Run(path) - case method == "producerconsumer": - dupes = producerConsumer.Run(path) + runFunc = singleThread.Run + case method == "producerConsumer": + runFunc = producerConsumer.Run + case method == "nThreads": + runFunc = producerConsumer.GetRunNThreads(nThreads) + } + + var bar progressbar.ProgressBar + switch useProgressBar { + case true: + bar = progressbar.UiProgressBar{} + } + + dupes := NewRunner(runFunc, bar).Run(path) + switch presentOnlyDupes { + case true: + dupes.GetOnlyDupes().Present(presentJson) + case false: + dupes.Present(presentJson) + } + +} + +type Runner struct { + RunFunc common.Run + ProgressBar progressbar.ProgressBar + OutputJson bool +} + +func NewRunner(runFunc common.Run, bar progressbar.ProgressBar) *Runner { + return &Runner{ + RunFunc: runFunc, + ProgressBar: bar, } - dupes.Present(presentOnlyDupes) +} + +func (r *Runner) Run(path string) *common.Dupes { + return r.RunFunc(path, r.ProgressBar) } diff --git a/dupesCompareDirs/lib/cli.go b/dupesCompareDirs/lib/cli.go index 00b7e10..0db78ec 100644 --- a/dupesCompareDirs/lib/cli.go +++ b/dupesCompareDirs/lib/cli.go @@ -5,11 +5,17 @@ import ( "fmt" "os" + "github.com/sander-skjulsvik/tools/dupes/lib/common" + producerconsumer "github.com/sander-skjulsvik/tools/dupes/lib/producerConsumer" + "github.com/sander-skjulsvik/tools/dupes/lib/singleThread" "github.com/sander-skjulsvik/tools/libs/progressbar" ) func RunComparison(comparisonFunc ComparisonFunc) { outputJson := flag.Bool("json", false, "If set to true Output as json") + withProgressBar := flag.Bool("withProgressBar", true, "If set to true display progress bar") + runnerMode := flag.String("runMode", "singleThread", "possible run modes: singleThread, producerConsumer and nThreads") + nThreads := flag.Int("nThreads", 0, "number of threads to use, only used witt runMode nThreads") dir1 := flag.String("dir1", "", "Path to 1st dir") dir2 := flag.String("dir2", "", "Path to 2nd dir") flag.Parse() @@ -21,8 +27,15 @@ func RunComparison(comparisonFunc ComparisonFunc) { } // Progress bar - pbs := progressbar.NewUiPCollection() - dupes := comparisonFunc(pbs, *dir1, *dir2) + pbCollection := SelectProgressBarCollection(*withProgressBar) + + // Runner + runFunc := SelectRunnerFunction(*runnerMode, *nThreads) + + comparator := NewComparator( + []string{*dir1, *dir2}, runFunc, comparisonFunc, pbCollection, + ) + dupes := comparator.Run() if *outputJson { fmt.Println(string(dupes.GetJSON())) @@ -30,3 +43,47 @@ func RunComparison(comparisonFunc ComparisonFunc) { dupes.Present(false) } } + +func SelectProgressBarCollection(b bool) progressbar.ProgressBarCollection { + var pbCollection progressbar.ProgressBarCollection + switch b { + case true: + pbCollection = progressbar.NewUiPCollection() + case false: + pbCollection = progressbar.ProgressBarCollectionMoc{} + } + return pbCollection +} + +// nThreads var ignored for all but nThreads method +func SelectRunnerFunction(s string, nThreads int) common.Run { + var runFunc common.Run + switch s { + case "singleThread": + runFunc = singleThread.Run + case "producerConsumer": + runFunc = producerconsumer.Run + case "nThreads": + runFunc = producerconsumer.GetRunNThreads(nThreads) + } + return runFunc +} + +func SelectComparatorFunc(s string) ComparisonFunc { + var comparatorFunc ComparisonFunc + switch s { + // Show dupes that is present in both directories + case "onlyInBoth": + comparatorFunc = OnlyInAll + // Show dupes that is only present in first + case "onlyInFirst": + comparatorFunc = OnlyInFirst + + case "all": + comparatorFunc = All + default: + panic(fmt.Errorf("unknown mode: %s, supported modes: OnlyInboth, onlyInFirst, all ", s)) + } + return comparatorFunc + +} diff --git a/dupesCompareDirs/lib/lib.go b/dupesCompareDirs/lib/lib.go index b1d3bbe..4674fad 100644 --- a/dupesCompareDirs/lib/lib.go +++ b/dupesCompareDirs/lib/lib.go @@ -6,65 +6,90 @@ import ( "sync" "github.com/sander-skjulsvik/tools/dupes/lib/common" - "github.com/sander-skjulsvik/tools/dupes/lib/singleThread" - "github.com/sander-skjulsvik/tools/libs/files" "github.com/sander-skjulsvik/tools/libs/progressbar" ) -type ComparisonFunc func(progressBars progressbar.ProgressBarCollection, paths ...string) *common.Dupes +type ComparisonFunc func([]*common.Dupes) *common.Dupes -// OnlyInboth returns dupes that is present in all directories -func OnlyInAll(progressBars progressbar.ProgressBarCollection, paths ...string) *common.Dupes { - ds := runDupes(progressBars, paths...) - first := ds[0] - - for _, d := range ds { - first = first.OnlyInBoth(d) +// OnlyInAll returns dupes that is present in all directories +func OnlyInAll(inDupes []*common.Dupes) *common.Dupes { + if len(inDupes) == 0 { + panic(fmt.Errorf("onlyInAll was given empty inDupes")) + } + outDupes := common.NewDupes() + outDupes.AppendDupes(inDupes[0]) + for _, d := range inDupes[1:] { + outDupes = *outDupes.OnlyInBoth(d) } - return first + return &outDupes } // OnlyInFirst returns dupes that is only present in first directory -func OnlyInFirst(progressBarCollection progressbar.ProgressBarCollection, paths ...string) *common.Dupes { - ds := runDupes(progressBarCollection, paths...) - first := ds[0] - for _, d := range ds[1:] { - first = first.OnlyInSelf(d) +func OnlyInFirst(inDupes []*common.Dupes) *common.Dupes { + if len(inDupes) == 0 { + panic(fmt.Errorf("onlyInFirst was given empty inDupes")) + } + outDupes := common.NewDupes() + outDupes.AppendDupes(inDupes[0]) + for _, d := range inDupes[1:] { + outDupes = *outDupes.OnlyInSelf(d) } - return first + return &outDupes } // All returns all dupes in all directories -func All(progressBarCollection progressbar.ProgressBarCollection, paths ...string) *common.Dupes { - dupes := common.NewDupes() - for _, dupe := range runDupes(progressBarCollection, paths...) { - dupes.AppendDupes(dupe) +func All(inDupes []*common.Dupes) *common.Dupes { + if len(inDupes) == 0 { + panic(fmt.Errorf("all was given empty inDupes")) + } + outDupes := common.NewDupes() + for _, dupe := range inDupes { + outDupes.AppendDupes(dupe) + } + return &outDupes +} + +type Comparator struct { + DupesRunners []*common.Runner + CompFunc ComparisonFunc + ProgressBarCollection progressbar.ProgressBarCollection + paths []string +} + +func NewComparator(paths []string, runFunc common.Run, compFunc ComparisonFunc, barCollection progressbar.ProgressBarCollection) *Comparator { + runners := []*common.Runner{} + for _, path := range paths { + runners = append(runners, common.NewRunner( + runFunc, + barCollection.AddDirectorySizeBar(path), + )) + } + + return &Comparator{ + DupesRunners: runners, + CompFunc: compFunc, + ProgressBarCollection: barCollection, + paths: paths, } - return &dupes } -func runDupes(progressBarCollection progressbar.ProgressBarCollection, paths ...string) []*common.Dupes { +func (compr *Comparator) Run() *common.Dupes { wg := sync.WaitGroup{} - wg.Add(len(paths)) - dupesCollection := make([]*common.Dupes, len(paths)) + wg.Add(len(compr.paths)) + dupesCollection := make([]*common.Dupes, len(compr.paths)) - progressBarCollection.Start() + compr.ProgressBarCollection.Start() - for ind, path := range paths { + for ind, path := range compr.paths { go func() { defer wg.Done() log.Printf("Running dupes on: %s", path) - n, err := files.GetNumbeSizeOfDirMb(path) - if err != nil { - panic(fmt.Errorf("unable to get size of directory: %w", err)) - } - bar := progressBarCollection.AddBar(path, n) - dupesCollection[ind] = singleThread.RunWithProgressBar(path, bar) + dupesCollection[ind] = compr.DupesRunners[ind].Run(path) }() } wg.Wait() - progressBarCollection.Stop() + compr.ProgressBarCollection.Stop() - return dupesCollection + return compr.CompFunc(dupesCollection) } diff --git a/dupesCompareDirs/lib/lib_test.go b/dupesCompareDirs/lib/lib_test.go index 7e75740..9314985 100644 --- a/dupesCompareDirs/lib/lib_test.go +++ b/dupesCompareDirs/lib/lib_test.go @@ -7,6 +7,7 @@ import ( "testing" set "github.com/deckarep/golang-set/v2" + "github.com/sander-skjulsvik/tools/dupes/lib/common" "github.com/sander-skjulsvik/tools/dupes/lib/singleThread" "github.com/sander-skjulsvik/tools/dupes/lib/test" comparedirs "github.com/sander-skjulsvik/tools/dupesCompareDirs/lib" @@ -174,31 +175,39 @@ func setupD2(prefix string) test.Folder { return folder } -func setup(rootPath string) (progressbar.ProgressBarCollectionMoc, test.Folder, test.Folder) { +func setup(rootPath string) (test.Folder, test.Folder) { p1 := filepath.Join(rootPath, "d1") p2 := filepath.Join(rootPath, "d2") d1 := setupD1(p1) d2 := setupD2(p2) - pbs := progressbar.NewMocProgressBarCollection() - return pbs, d1, d2 + return d1, d2 } func cleanUp(rootPath string) { os.RemoveAll(rootPath) } +func runComparison(rootPath string, compFunc comparedirs.ComparisonFunc) *common.Dupes { + comparator := comparedirs.NewComparator( + []string{ + filepath.Join(rootPath, "d1"), + filepath.Join(rootPath, "d2"), + filepath.Join(rootPath, "d2"), + }, + singleThread.Run, + compFunc, + progressbar.ProgressBarCollectionMoc{}, + ) + return comparator.Run() +} + // OnlyInAll returns dupes that is present in All directories func TestOnlyInAll(t *testing.T) { rootPath := "test_only_in_all" - pbCollection, d1, d2 := setup(rootPath) + d1, d2 := setup(rootPath) defer cleanUp(rootPath) + calcDupes := runComparison(rootPath, comparedirs.OnlyInAll) - calcDupes := comparedirs.OnlyInAll( - pbCollection, - filepath.Join(rootPath, "d1"), - filepath.Join(rootPath, "d2"), - filepath.Join(rootPath, "d2"), - ) if len(calcDupes.D) != 2 { t.Errorf("Expected 2 dupes, got %d", len(calcDupes.D)) } @@ -209,13 +218,13 @@ func TestOnlyInAll(t *testing.T) { calcHashes.Add(hash) } - d1Dupes := singleThread.Run(filepath.Join(rootPath, "d1")) + d1Dupes := singleThread.Run(filepath.Join(rootPath, "d1"), progressbar.ProgressBarMoc{}) d1Hashes := set.NewSet([]string{}...) for hash := range d1Dupes.D { d1Hashes.Add(hash) } - d2Dupes := singleThread.Run(filepath.Join(rootPath, "d2")) + d2Dupes := singleThread.Run(filepath.Join(rootPath, "d2"), progressbar.ProgressBarMoc{}) d2Hashes := set.NewSet([]string{}...) for hash := range d2Dupes.D { d2Hashes.Add(hash) @@ -259,16 +268,11 @@ func TestOnlyInAll(t *testing.T) { // OnlyInFirst returns dupes that is only present in first directory func TestOnlyInFirst(t *testing.T) { rootPath := "test_only_in_first" - pbCollection, d1, d2 := setup(rootPath) + d1, d2 := setup(rootPath) d1FullPath := filepath.Join(rootPath, "d1") d2FullPath := filepath.Join(rootPath, "d2") defer cleanUp(rootPath) - - calcDupes := comparedirs.OnlyInFirst( - pbCollection, - d1FullPath, - d2FullPath, - ) + calcDupes := runComparison(rootPath, comparedirs.OnlyInFirst) if len(calcDupes.D) != 1 { t.Errorf("Expected 1 dupes, got %d", len(calcDupes.D)) @@ -276,8 +280,8 @@ func TestOnlyInFirst(t *testing.T) { // Hashes calcHashes := set.NewThreadUnsafeSetFromMapKeys(calcDupes.D) - d1Hashes := set.NewThreadUnsafeSetFromMapKeys(singleThread.Run(d1FullPath).D) - d2Hashes := set.NewThreadUnsafeSetFromMapKeys(singleThread.Run(d2FullPath).D) + d1Hashes := set.NewThreadUnsafeSetFromMapKeys(singleThread.Run(d1FullPath, progressbar.ProgressBarMoc{}).D) + d2Hashes := set.NewThreadUnsafeSetFromMapKeys(singleThread.Run(d2FullPath, progressbar.ProgressBarMoc{}).D) expectedHashes := d1Hashes.Intersect(d1Hashes.Difference(d2Hashes)) if !calcHashes.Equal(expectedHashes) { @@ -306,26 +310,20 @@ func TestOnlyInFirst(t *testing.T) { // All returns all dupes in both directories func TestAll(t *testing.T) { rootPath := "test_ony_in_both" - pbCollection, d1, d2 := setup(rootPath) + d1, d2 := setup(rootPath) d1FullPath := filepath.Join(rootPath, "d1") d2FullPath := filepath.Join(rootPath, "d2") defer cleanUp(rootPath) - calcDupes := comparedirs.All( - pbCollection, - d1FullPath, - d2FullPath, - // Running d2 again to check for duplicated entries in path - d2FullPath, - ) + calcDupes := runComparison(rootPath, comparedirs.All) if len(calcDupes.D) != 5 { t.Errorf("Expected 2 dupes, got %d", len(calcDupes.D)) } // Hashes calcHashes := set.NewThreadUnsafeSetFromMapKeys(calcDupes.D) - d1Hashes := set.NewThreadUnsafeSetFromMapKeys(singleThread.Run(d1FullPath).D) - d2Hashes := set.NewThreadUnsafeSetFromMapKeys(singleThread.Run(d2FullPath).D) + d1Hashes := set.NewThreadUnsafeSetFromMapKeys(singleThread.Run(d1FullPath, progressbar.ProgressBarMoc{}).D) + d2Hashes := set.NewThreadUnsafeSetFromMapKeys(singleThread.Run(d2FullPath, progressbar.ProgressBarMoc{}).D) expectedHashes := d1Hashes.Union(d2Hashes) if !calcHashes.Equal(expectedHashes) { t.Errorf("Expected %v, got %v", expectedHashes, calcHashes) diff --git a/dupesCompareDirs/main.go b/dupesCompareDirs/main.go index 0077908..a9d2235 100644 --- a/dupesCompareDirs/main.go +++ b/dupesCompareDirs/main.go @@ -5,43 +5,55 @@ import ( "fmt" "log" - "github.com/sander-skjulsvik/tools/dupes/lib/common" comparedirs "github.com/sander-skjulsvik/tools/dupesCompareDirs/lib" - "github.com/sander-skjulsvik/tools/libs/progressbar" + dupescomparedirs "github.com/sander-skjulsvik/tools/dupesCompareDirs/lib" ) func main() { // Define command-line flags - mode := flag.String("mode", "all", "Mode to run in, modes: OnlyInboth, onlyInFirst, all") + compMode := flag.String("mode", "", "Mode to run in, modes: onlyInboth, onlyInFirst, all") + runnerMode := flag.String("runMode", "singleThread", "possible run modes: singleThread, producerConsumer and nThreads") + nThreads := flag.Int("nThreads", 0, "number of threads to use, only used witt runMode nThreads") outputJson := flag.Bool("json", false, "If set to true Output as json") + withProgressBar := flag.Bool("withProgressBar", true, "If set to true display progress bar") dir1 := flag.String("dir1", "", "Path to 1st dir") dir2 := flag.String("dir2", "", "Path to 2nd dir") flag.Parse() + errString := "" + if *dir1 == "" || *dir2 == "" { + errString = "Please provide `-dir1 ` and `-dir2 `\n" + errString + } + if *compMode == "" { + errString = "Please provide `-mode ` flag\n" + errString + } + if *compMode == "nThreads" && *nThreads == 0 { + errString = "If `-mode nThreads` please provide `-nThreads \n" + errString + } + if errString != "" { + panic(fmt.Errorf("failed to start, error with cli flags\n%s", errString)) + } log.Printf("Comparing directories: %s and %s\n", *dir1, *dir2) // Progress bar - pbs := progressbar.NewUiPCollection() - - var newD *common.Dupes - switch *mode { - // Show dupes that is present in both directories - case "OnlyInboth": - newD = comparedirs.OnlyInAll(pbs, *dir1, *dir2) - // Show dupes that is only present in first - case "onlyInFirst": - newD = comparedirs.OnlyInFirst(pbs, *dir1, *dir2) - log.Println("Only in first") - log.Printf("Number of dupes: %d\n", len(newD.D)) - case "all": - newD = comparedirs.All(pbs, *dir1, *dir2) - default: - panic(fmt.Errorf("unknown mode: %s, supported modes: OnlyInboth, onlyInFirst, all ", *mode)) - } + pbCollection := dupescomparedirs.SelectProgressBarCollection(*withProgressBar) + + // Comparison mode + comparatorFunc := comparedirs.SelectComparatorFunc(*compMode) + + // Runner + runFunc := dupescomparedirs.SelectRunnerFunction(*runnerMode, *nThreads) + + comparator := comparedirs.NewComparator( + []string{*dir1, *dir2}, runFunc, comparatorFunc, pbCollection, + ) + + dupes := comparator.Run() - if *outputJson { - fmt.Println(string(newD.GetJSON())) - } else { - newD.Present(false) + switch *outputJson { + case true: + fmt.Printf(string(dupes.GetJSON())) + case false: + dupes.Present(false) } } diff --git a/libs/files/lib.go b/libs/files/lib.go index ab99cfa..e9a8aa1 100644 --- a/libs/files/lib.go +++ b/libs/files/lib.go @@ -3,9 +3,9 @@ package files import ( "fmt" "io/fs" + "log" + "os" "path/filepath" - - "github.com/sander-skjulsvik/tools/dupes/lib/common" ) func GetNumberOfFiles(path string) (int, error) { @@ -13,7 +13,7 @@ func GetNumberOfFiles(path string) (int, error) { err := filepath.Walk( path, func(path string, info fs.FileInfo, err error) error { - isFile := common.IsFile(info) + isFile := IsFile(info) if !isFile { return nil } @@ -27,15 +27,23 @@ func GetNumberOfFiles(path string) (int, error) { return n, nil } -func GetNumbeSizeOfDirMb(path string) (int, error) { +func IsFile(f os.FileInfo) bool { + if f == nil { + panic(fmt.Errorf("file info is nil")) + } + return f.Mode().IsRegular() +} + +func GetSizeOfDirMb(path string) (int, error) { var size int64 = 0 err := filepath.Walk( path, func(path string, info fs.FileInfo, err error) error { if info == nil { - panic(fmt.Errorf("GetNumbeSizeOfDirMb: fileinfo is nil for: %s", path)) + log.Printf("File info is nil for %s\n", path) + return nil } - isFile := common.IsFile(info) + isFile := IsFile(info) if !isFile { return nil } diff --git a/libs/progressbar/progress.go b/libs/progressbar/progress.go index be9f04f..90ddf98 100644 --- a/libs/progressbar/progress.go +++ b/libs/progressbar/progress.go @@ -2,9 +2,12 @@ package progressbar import ( "fmt" + "log" + "os" "time" uiprogress "github.com/gosuri/uiprogress" + "github.com/sander-skjulsvik/tools/libs/files" ) // /////////////////////////////////// @@ -13,6 +16,8 @@ import ( type ProgressBar interface { Add(x int) + // AddFileSize(path) + AddFileSize(string) Add1() } @@ -21,6 +26,8 @@ type ProgressBarCollection interface { Stop() // header, size AddBar(string, int) ProgressBar + // path + AddDirectorySizeBar(string) ProgressBar } // /////////////////////////////////// @@ -48,6 +55,10 @@ func (pbs ProgressBarCollectionMoc) AddBar(name string, total int) ProgressBar { return ProgressBarMoc{} } +func (pbs ProgressBarCollectionMoc) AddDirectorySizeBar(path string) ProgressBar { + return ProgressBarMoc{} +} + func (pbs ProgressBarCollectionMoc) Start() { } @@ -60,6 +71,9 @@ func (pb ProgressBarMoc) Add(x int) { func (pb ProgressBarMoc) Add1() { } +func (pb ProgressBarMoc) AddFileSize(string) { +} + // /////////////////////////////////// // UiProgressBar implementation // /////////////////////////////////// @@ -97,6 +111,15 @@ func (uiP UiPCollection) AddBar(name string, total int) ProgressBar { return newBar } +func (uiP UiPCollection) AddDirectorySizeBar(path string) ProgressBar { + log.Printf("Getting size of dir for bar: %s", path) + dirSize, err := files.GetSizeOfDirMb(path) + if err != nil { + panic(fmt.Errorf("unable to determine directory size: %w", err)) + } + return uiP.AddBar(path, dirSize) +} + func (uiP UiPCollection) Start() { uiP.progress.Start() } @@ -114,3 +137,12 @@ func (uiP UiProgressBar) Add(x int) { func (uiP UiProgressBar) Add1() { uiP.bar.Incr() } + +func (uiP UiProgressBar) AddFileSize(path string) { + // Get the fileinfo + fileInfo, err := os.Stat(path) + if err != nil { + panic(fmt.Errorf("addFileSize failed for: %s", path)) + } + uiP.Add(int(fileInfo.Size())) +}