-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Implement duplicate handling strategies (#99)
- Removes KEEP_ONE_PER_INCREMENT constant as this duplication strategy does not make sense - Implements duplication strategies comparing files as the configured hash algorithm dictates - Adds error to the backup if the hash changed between parse and backup - Restore pipeline logs warning when restoring an archived file with any errors saved to the backup - Adds new test cases - Updates documentation Resolves #96 {minor} Signed-off-by: Esta Nagy <[email protected]>
- Loading branch information
Showing
16 changed files
with
699 additions
and
57 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
22 changes: 22 additions & 0 deletions
22
...src/main/java/com/github/nagyesta/filebarj/core/backup/worker/BackupScopePartitioner.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
package com.github.nagyesta.filebarj.core.backup.worker; | ||
|
||
import com.github.nagyesta.filebarj.core.model.FileMetadata; | ||
import org.jetbrains.annotations.NotNull; | ||
|
||
import java.util.Collection; | ||
import java.util.List; | ||
|
||
/** | ||
* Partitions the backup scope into smaller batches. | ||
*/ | ||
public interface BackupScopePartitioner { | ||
|
||
/** | ||
* Partitions the backup scope into smaller batches. | ||
* | ||
* @param scope the backup scope | ||
* @return the partitioned scope | ||
*/ | ||
@NotNull | ||
List<List<List<FileMetadata>>> partitionBackupScope(@NotNull Collection<FileMetadata> scope); | ||
} |
73 changes: 73 additions & 0 deletions
73
...n/java/com/github/nagyesta/filebarj/core/backup/worker/DefaultBackupScopePartitioner.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
package com.github.nagyesta.filebarj.core.backup.worker; | ||
|
||
import com.github.nagyesta.filebarj.core.config.enums.DuplicateHandlingStrategy; | ||
import com.github.nagyesta.filebarj.core.config.enums.HashAlgorithm; | ||
import com.github.nagyesta.filebarj.core.model.FileMetadata; | ||
import lombok.NonNull; | ||
import org.jetbrains.annotations.NotNull; | ||
|
||
import java.util.ArrayList; | ||
import java.util.Collection; | ||
import java.util.List; | ||
import java.util.function.Function; | ||
import java.util.stream.Collectors; | ||
|
||
/** | ||
* Base implementation of {@link BackupScopePartitioner}. | ||
*/ | ||
public class DefaultBackupScopePartitioner implements BackupScopePartitioner { | ||
|
||
private final int batchSize; | ||
private final Function<FileMetadata, String> groupingFunction; | ||
|
||
/** | ||
* Creates a new instance with the specified batch size. | ||
* | ||
* @param batchSize the batch size | ||
* @param duplicateHandlingStrategy the duplicate handling strategy | ||
* @param hashAlgorithm the hash algorithm the backup is using | ||
*/ | ||
public DefaultBackupScopePartitioner( | ||
final int batchSize, | ||
@NonNull final DuplicateHandlingStrategy duplicateHandlingStrategy, | ||
@NonNull final HashAlgorithm hashAlgorithm) { | ||
this.batchSize = batchSize; | ||
this.groupingFunction = duplicateHandlingStrategy.fileGroupingFunctionForHash(hashAlgorithm); | ||
} | ||
|
||
@Override | ||
@NotNull | ||
public List<List<List<FileMetadata>>> partitionBackupScope(@NonNull final Collection<FileMetadata> scope) { | ||
final var groupedScope = filterAndGroup(scope); | ||
return partition(groupedScope); | ||
} | ||
|
||
@NotNull | ||
private Collection<List<FileMetadata>> filterAndGroup(@NotNull final Collection<FileMetadata> scope) { | ||
return scope.stream() | ||
.filter(metadata -> metadata.getStatus().isStoreContent()) | ||
.filter(metadata -> metadata.getFileType().isContentSource()) | ||
.collect(Collectors.groupingBy(groupingFunction)) | ||
.values(); | ||
} | ||
|
||
@NotNull | ||
private List<List<List<FileMetadata>>> partition(@NotNull final Collection<List<FileMetadata>> groupedScope) { | ||
final List<List<List<FileMetadata>>> partitionedScope = new ArrayList<>(); | ||
var batch = new ArrayList<List<FileMetadata>>(); | ||
var size = 0; | ||
for (final var group : groupedScope) { | ||
batch.add(group); | ||
size += group.size(); | ||
if (size >= batchSize) { | ||
partitionedScope.add(batch); | ||
batch = new ArrayList<>(); | ||
size = 0; | ||
} | ||
} | ||
if (!batch.isEmpty()) { | ||
partitionedScope.add(batch); | ||
} | ||
return partitionedScope; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.