Skip to content

Commit

Permalink
Add more robust restore options. Fixed usage of -I option on backups …
Browse files Browse the repository at this point in the history
…and an edge case that would corrupt final manifest's volume ordering. Also added verification that provided snapshots exist before processing backups and restores
  • Loading branch information
someone1 committed Aug 11, 2017
1 parent ccd7c54 commit 4284890
Show file tree
Hide file tree
Showing 9 changed files with 294 additions and 50 deletions.
15 changes: 12 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ The compiled binary should be in your $GOPATH/bin directory.

## Usage

### "Smart" Options:
### "Smart" Backup Options:

Use the `--full` option to auto select the most recent snapshot on the target volume to do a full backup of:

Expand All @@ -60,6 +60,16 @@ Use the `--fullIfOlderThan` option to auto select the most recent snapshot on th

$ ./zfsbackup send --encryptTo [email protected] --signFrom [email protected] --publicKeyRingPath pubring.gpg.asc --secretKeyRingPath secring.gpg.asc --fullIfOlderThan 720h Tank/Dataset gs://backup-bucket-target,s3://another-backup-target

### "Smart" Restore Options:
Add the `--auto`` option to automatically restore to the snapshot if one is given, or detect the latest snapshot for the filesystem/volume given and restore to that. It will figure out which snapshots are missing from the local_volume and select them all to restore to get to the desired snapshot. Note: snapshot comparisons work using the name of the snapshot, if you restored a snapshot to a different name, this application won't think it is available and it will break the restore process.

Auto-detect latest snapshot:
$ ./zfsbackup receive --encryptTo [email protected] --signFrom [email protected] --publicKeyRingPath pubring.gpg.asc --secretKeyRingPath secring.gpg.asc --auto -d Tank/Dataset gs://backup-bucket-target Tank

Auto restore to snapshot provided:
$ ./zfsbackup receive --encryptTo [email protected] --signFrom [email protected] --publicKeyRingPath pubring.gpg.asc --secretKeyRingPath secring.gpg.asc --auto -d Tank/Dataset@snapshot-20170201 gs://backup-bucket-target Tank


### Manual Options:

Full backup example:
Expand Down Expand Up @@ -168,11 +178,10 @@ Global Flags:
## TODOs:
* Make PGP cipher configurable.
* Finish the verify command
* Build out more robust restore options (e.g. cascading, parent verification, etc.)
* Refactor
* Test Coverage
* Add more backends (e.g. Azure, BackBlaze, etc.)
* Add delete feature
* Appease linters
* Validate requested snapshots exist
* Track intermediary snaps as part of backup jobs
* Parity archives?
2 changes: 1 addition & 1 deletion backup/backup.go
Original file line number Diff line number Diff line change
Expand Up @@ -308,7 +308,6 @@ func Backup(pctx context.Context, jobInfo *helpers.JobInfo) error {
return nil
}
if !vol.IsManifest {
maniwg.Done()
helpers.AppLogger.Debugf("Volume %s has finished the entire pipeline.", vol.ObjectName)
helpers.AppLogger.Debugf("Adding %s to the manifest volume list.", vol.ObjectName)
jobInfo.Volumes = append(jobInfo.Volumes, vol)
Expand All @@ -320,6 +319,7 @@ func Backup(pctx context.Context, jobInfo *helpers.JobInfo) error {
if err = manifestVol.DeleteVolume(); err != nil {
helpers.AppLogger.Warningf("Error deleting temporary manifest file - %v", err)
}
maniwg.Done()
} else {
// Manifest has been processed, we're done!
return nil
Expand Down
93 changes: 74 additions & 19 deletions backup/list.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ package backup

import (
"context"
"crypto/md5"
"encoding/json"
"fmt"
"path/filepath"
Expand Down Expand Up @@ -62,27 +63,11 @@ func List(pctx context.Context, jobInfo *helpers.JobInfo) error {
return serr
}

// Read in Manifests and display
decodedManifests := make([]*helpers.JobInfo, 0, len(safeManifests))
for _, manifest := range safeManifests {
manifestPath := filepath.Join(localCachePath, manifest)
decodedManifest, oerr := readManifest(ctx, manifestPath, jobInfo)
if oerr != nil {
helpers.AppLogger.Errorf("Could not read manifest %s due to error - %v", manifestPath, oerr)
return oerr
}
decodedManifests = append(decodedManifests, decodedManifest)
decodedManifests, derr := readAndSortManifests(ctx, localCachePath, safeManifests, jobInfo)
if derr != nil {
return derr
}

sort.SliceStable(decodedManifests, func(i, j int) bool {
cmp := strings.Compare(decodedManifests[i].VolumeName, decodedManifests[j].VolumeName)
if cmp == 0 {
return decodedManifests[i].BaseSnapshot.CreationTime.Before(decodedManifests[j].BaseSnapshot.CreationTime)
}
return cmp < 0

})

var output []string
output = append(output, fmt.Sprintf("Found %d backup sets:\n", len(decodedManifests)))
for _, manifest := range decodedManifests {
Expand All @@ -109,6 +94,76 @@ func List(pctx context.Context, jobInfo *helpers.JobInfo) error {
return nil
}

func readAndSortManifests(ctx context.Context, localCachePath string, manifests []string, jobInfo *helpers.JobInfo) ([]*helpers.JobInfo, error) {
// Read in Manifests and display
decodedManifests := make([]*helpers.JobInfo, 0, len(manifests))
for _, manifest := range manifests {
manifestPath := filepath.Join(localCachePath, manifest)
decodedManifest, oerr := readManifest(ctx, manifestPath, jobInfo)
if oerr != nil {
helpers.AppLogger.Errorf("Could not read manifest %s due to error - %v", manifestPath, oerr)
return nil, oerr
}
decodedManifests = append(decodedManifests, decodedManifest)
}

sort.SliceStable(decodedManifests, func(i, j int) bool {
cmp := strings.Compare(decodedManifests[i].VolumeName, decodedManifests[j].VolumeName)
if cmp == 0 {
return decodedManifests[i].BaseSnapshot.CreationTime.Before(decodedManifests[j].BaseSnapshot.CreationTime)
}
return cmp < 0

})

return decodedManifests, nil
}

// linkManifests will group manifests by Volume and link parents to their children
func linkManifests(manifests []*helpers.JobInfo) map[string][]*helpers.JobInfo {
if manifests == nil {
return nil
}
manifestTree := make(map[string][]*helpers.JobInfo)
manifestsByID := make(map[string]*helpers.JobInfo)
for idx := range manifests {
if _, ok := manifestTree[manifests[idx].VolumeName]; !ok {
manifestTree[manifests[idx].VolumeName] = make([]*helpers.JobInfo, 0, 10)
}

manifestID := fmt.Sprintf("%x", md5.Sum([]byte(fmt.Sprintf("%s%s%v", manifests[idx].VolumeName, manifests[idx].BaseSnapshot.Name, manifests[idx].BaseSnapshot.CreationTime))))

manifestTree[manifests[idx].VolumeName] = append(manifestTree[manifests[idx].VolumeName], manifests[idx])

// Case 1: Full Backups, nothing to link
if manifests[idx].IncrementalSnapshot.Name == "" {
// We will always assume full backups are ideal when selecting a parent
manifestsByID[manifestID] = manifests[idx]
} else if _, ok := manifestsByID[manifestID]; !ok {
// Case 2: Incremental Backup - only make it the designated parent if we haven't gone one already
manifestsByID[manifestID] = manifests[idx]
}
}

// Link up parents
for _, snapList := range manifestTree {
for _, val := range snapList {
if val.IncrementalSnapshot.Name == "" {
// Full backup, no parent
continue
}
manifestID := fmt.Sprintf("%x", md5.Sum([]byte(fmt.Sprintf("%s%s%v", val.VolumeName, val.IncrementalSnapshot.Name, val.IncrementalSnapshot.CreationTime))))
if psnap, ok := manifestsByID[manifestID]; ok {
val.ParentSnap = psnap
} else {
helpers.AppLogger.Warningf("Could not find matching parent for %v", val)
}
}

}
return manifestTree
}

func readManifest(ctx context.Context, manifestPath string, j *helpers.JobInfo) (*helpers.JobInfo, error) {
decodedManifest := new(helpers.JobInfo)
manifestVol, err := helpers.ExtractLocal(ctx, j, manifestPath)
Expand Down
164 changes: 160 additions & 4 deletions backup/restore.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ package backup
import (
"context"
"crypto/md5"
"errors"
"fmt"
"io"
"os"
Expand All @@ -43,6 +44,128 @@ type downloadSequence struct {
c chan<- *helpers.VolumeInfo
}

// AutoRestore will compute which snapshots need to be restored to get to the snapshot provided,
// or to the latest snapshot of the volume provided
func AutoRestore(pctx context.Context, jobInfo *helpers.JobInfo) error {
ctx, cancel := context.WithCancel(pctx)
defer cancel()

// Prepare the backend client
target := jobInfo.Destinations[0]
backend, berr := prepareBackend(ctx, jobInfo, target, nil)
if berr != nil {
helpers.AppLogger.Errorf("Could not initialize backend for target %s due to error - %v.", target, berr)
return berr
}
defer backend.Close()

// Get the local cache dir
localCachePath, cerr := getCacheDir(jobInfo.Destinations[0])
if cerr != nil {
helpers.AppLogger.Errorf("Could not get cache dir for target %s due to error - %v.", target, cerr)
return cerr
}

// Sync the local cache
safeManifests, _, serr := syncCache(ctx, jobInfo, localCachePath, backend)
if serr != nil {
helpers.AppLogger.Errorf("Could not sync cache dir for target %s due to error - %v.", target, serr)
return serr
}

decodedManifests, derr := readAndSortManifests(ctx, localCachePath, safeManifests, jobInfo)
if derr != nil {
return derr
}
manifestTree := linkManifests(decodedManifests)
var ok bool
var volumeSnaps []*helpers.JobInfo
if volumeSnaps, ok = manifestTree[jobInfo.VolumeName]; !ok {
helpers.AppLogger.Errorf("Could not find any snapshots for volume %s, none found on target.", jobInfo.VolumeName)
return errors.New("could not determine any snapshots for provided volume")
}

// Restore to the latest snapshot available for the volume provided if no snapshot was provided
if jobInfo.BaseSnapshot.Name == "" {
helpers.AppLogger.Infof("Trying to determine latest snapshot for volume %s.", jobInfo.BaseSnapshot.Name)
job := volumeSnaps[len(volumeSnaps)-1]
jobInfo.BaseSnapshot = job.BaseSnapshot
helpers.AppLogger.Infof("Restoring to snapshot %s.", job.BaseSnapshot.Name)
}

// Find the matching backup job for the snapshot we want to restore to
var jobToRestore *helpers.JobInfo
for _, job := range volumeSnaps {
if strings.Compare(job.BaseSnapshot.Name, jobInfo.BaseSnapshot.Name) == 0 {
jobToRestore = job
break
}
}
if jobToRestore == nil {
helpers.AppLogger.Errorf("Could not find the snapshot %v for volume %s on backend.", jobInfo.BaseSnapshot.Name, jobInfo.VolumeName)
return errors.New("could not find snapshot provided")
}

// We have the snapshot we'd like to restore to, let's figure out whats already found locally and restore as required
jobsToRestore := make([]*helpers.JobInfo, 0, 10)
helpers.AppLogger.Infof("Calculating how to restore to %s.", jobInfo.BaseSnapshot.Name)
volume := jobInfo.LocalVolume
parts := strings.Split(jobInfo.VolumeName, "/")
if jobInfo.FullPath {
parts[0] = volume
volume = strings.Join(parts, "/")
}

if jobInfo.LastPath {
volume = fmt.Sprintf("%s/%s", volume, parts[len(parts)-1])
}

snapshots, err := helpers.GetSnapshots(ctx, volume)
if err != nil {
// TODO: There are some error cases that are ok to ignore!
snapshots = []helpers.SnapshotInfo{}
}

for {
// See if the snapshots we want to restore already exist
if ok := validateSnapShotExistsFromSnaps(&jobToRestore.BaseSnapshot, snapshots); ok {
break
}
helpers.AppLogger.Infof("Adding backup job for %s to the restore list.", jobToRestore.BaseSnapshot.Name)
jobsToRestore = append(jobsToRestore, jobToRestore)
if jobToRestore.IncrementalSnapshot.Name == "" {
// This is a full backup, no need to go further back
break
}
if jobToRestore.ParentSnap == nil {
helpers.AppLogger.Errorf("Want to restore parent snap %s but it is not found in the backend, aborting.", jobToRestore.IncrementalSnapshot.Name)
return errors.New("could not find parent snapshot")
}
jobToRestore = jobToRestore.ParentSnap
}

helpers.AppLogger.Infof("Need to restore %d snapshots.", len(jobsToRestore))

// We have a list of snapshots we need to restore, start at the end and work our way down
for i := len(jobsToRestore) - 1; i >= 0; i-- {
jobInfo.BaseSnapshot = jobsToRestore[i].BaseSnapshot
jobInfo.IncrementalSnapshot = jobsToRestore[i].IncrementalSnapshot
jobInfo.Volumes = jobsToRestore[i].Volumes
jobInfo.Compressor = jobsToRestore[i].Compressor
jobInfo.Separator = jobsToRestore[i].Separator
helpers.AppLogger.Infof("Restoring snapshot %s (%d/%d)", jobInfo.BaseSnapshot.Name, len(jobsToRestore)-i, len(jobsToRestore))
if err := Receive(ctx, jobInfo); err != nil {
helpers.AppLogger.Errorf("Failed to restore snapshot.")
return err
}
}

helpers.AppLogger.Noticef("Done.")

return nil
}

// Receive will download and restore the backup job described to the Volume target provided.
func Receive(pctx context.Context, jobInfo *helpers.JobInfo) error {
ctx, cancel := context.WithCancel(pctx)
defer cancel()
Expand All @@ -64,6 +187,37 @@ func Receive(pctx context.Context, jobInfo *helpers.JobInfo) error {
return cerr
}

// See if the snapshots we want to restore already exist
volume := jobInfo.LocalVolume
parts := strings.Split(jobInfo.VolumeName, "/")
if jobInfo.FullPath {
parts[0] = volume
volume = strings.Join(parts, "/")
}

if jobInfo.LastPath {
volume = fmt.Sprintf("%s/%s", volume, parts[len(parts)-1])
}

if ok, verr := validateSnapShotExists(ctx, &jobInfo.BaseSnapshot, volume); verr != nil {
helpers.AppLogger.Errorf("Cannot validate if selected base snapshot exists due to error - %v", verr)
return verr
} else if ok {
helpers.AppLogger.Infof("Selected base snapshot already exists, nothing to do!")
return nil
}

// Check that we have the parent snap shot this wants to restore from
if jobInfo.IncrementalSnapshot.Name != "" {
if ok, verr := validateSnapShotExists(ctx, &jobInfo.IncrementalSnapshot, volume); verr != nil {
helpers.AppLogger.Errorf("Cannot validate if selected incremental snapshot exists due to error - %v", verr)
return verr
} else if !ok {
helpers.AppLogger.Errorf("Selected incremental snapshot does not exist!")
return fmt.Errorf("selected incremental snapshot does not exist")
}
}

// Compute the Manifest File
tempManifest, err := helpers.CreateManifestVolume(ctx, jobInfo)
if err != nil {
Expand Down Expand Up @@ -100,8 +254,8 @@ func Receive(pctx context.Context, jobInfo *helpers.JobInfo) error {

// Get list of Objects
toDownload := make([]string, len(manifest.Volumes))
for idx, vol := range manifest.Volumes {
toDownload[idx] = vol.ObjectName
for idx := range manifest.Volumes {
toDownload[idx] = manifest.Volumes[idx].ObjectName
}

// PreDownload step
Expand All @@ -126,10 +280,10 @@ func Receive(pctx context.Context, jobInfo *helpers.JobInfo) error {
defer close(bufferChannel)

// Queue up files to download
for idx, vol := range manifest.Volumes {
for idx := range manifest.Volumes {
c := make(chan *helpers.VolumeInfo, 1)
orderedChannels[idx] = c
downloadChannel <- downloadSequence{vol, c}
downloadChannel <- downloadSequence{manifest.Volumes[idx], c}
}
close(downloadChannel)

Expand Down Expand Up @@ -163,6 +317,8 @@ func Receive(pctx context.Context, jobInfo *helpers.JobInfo) error {
return processSequence(ctx, sequence, backend, usePipe)
}

helpers.AppLogger.Debugf("Downloading volume %s.", sequence.volume.ObjectName)

if err := backoff.Retry(operation, retryconf); err != nil {
helpers.AppLogger.Errorf("Failed to download volume %s due to error: %v, aborting...", sequence.volume.ObjectName, err)
return err
Expand Down
Loading

0 comments on commit 4284890

Please sign in to comment.