-
Notifications
You must be signed in to change notification settings - Fork 38
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Backup requests for replicated storage #7
Open
ericox
wants to merge
25
commits into
westerndigitalcorporation:master
Choose a base branch
from
ericox:backup-requests-replicated
base: master
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from 18 commits
Commits
Show all changes
25 commits
Select commit
Hold shift + click to select a range
04bf5c2
request id
a4a6d00
plumbing for reqID in client
ericox 0349476
fix client compile bugs
ericox f1413ea
add cutSlice helper and plumb otherHosts through rpc
ericox d46fb6e
disable backups
ericox 4bbeae9
configurable client backup reads
ericox c6096ed
add read check
ericox 974bc72
rm print
ericox 288ae14
cleanup backup read logic
ericox 3243231
merge two read funcs
ericox 86b5d4c
review feedback
ericox a07e2f7
comments
ericox bf10f9d
move bad ts report logic
ericox 00de983
cleanup orderCh
ericox 4d78d65
cleanup and remove error reporting for now
ericox 4c8ca12
rm reporting
ericox 3adde22
backup error logic
ericox dfa1952
pull logic from fallback loop into helper
ericox 2af38ac
review feedback
ericox ab39a54
review feedback
ericox 84540ed
move select into helper
ericox 571ed3c
refactor again
ericox 7ae0645
backup test scheme + refactor
ericox edfc8db
adding tests
ericox fce1cca
cleanup and tests
ericox File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
package blb | ||
|
||
import ( | ||
"time" | ||
) | ||
|
||
// BackupReadBehavior lets clients decide when to enable or disable backup reads. The delay | ||
// field determines how long before the backup request is sent. | ||
type BackupReadBehavior struct { | ||
// Enabled is a flag to enable the backup read feature. | ||
Enabled bool | ||
|
||
// Delay is the time duration between successive backup reads. | ||
Delay time.Duration | ||
|
||
// MaxNumBackups is the maximum number of requests sent in addition | ||
// to the primary read. | ||
MaxNumBackups int | ||
} | ||
|
||
type backupReadState struct { | ||
BackupReadBehavior | ||
|
||
// backupDelay is the delay before sending backup reads. | ||
delayFunc func(time.Duration) <-chan time.Time | ||
} | ||
|
||
func makeBackupReadState(behavior BackupReadBehavior) backupReadState { | ||
if behavior.Enabled { | ||
if behavior.MaxNumBackups <= 0 { | ||
// Give it a default value of 1. | ||
behavior.MaxNumBackups = 1 | ||
} | ||
} else { | ||
// MaxNumBackups isn't used in the read path if this is disabled, clear this. | ||
behavior.MaxNumBackups = 0 | ||
} | ||
return backupReadState{ | ||
BackupReadBehavior: behavior, | ||
delayFunc: time.After, | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -81,6 +81,9 @@ type Options struct { | |
|
||
// How the client decides whether to attempt client-side RS reconstruction. | ||
ReconstructBehavior ReconstructBehavior | ||
|
||
// How the client decides to send backup reads if the primary read is delayed. | ||
BackupReadBehavior BackupReadBehavior | ||
} | ||
|
||
// Client exposes a simple interface to Blb users for requesting services and | ||
|
@@ -115,6 +118,8 @@ type Client struct { | |
|
||
// Reconstruct behavior. | ||
reconstructState reconstructState | ||
// Backup read behavior. | ||
backupReadState backupReadState | ||
|
||
// Metrics we collect. | ||
metricOpen prometheus.Observer | ||
|
@@ -165,6 +170,7 @@ func newBaseClient(options *Options) *Client { | |
cluster: options.Cluster, | ||
retrier: retrier, | ||
reconstructState: makeReconstructState(options.ReconstructBehavior), | ||
backupReadState: makeBackupReadState(options.BackupReadBehavior), | ||
metricOpen: clientOpLatenciesSet.WithLabelValues("open", options.Instance), | ||
metricCreate: clientOpLatenciesSet.WithLabelValues("create", options.Instance), | ||
metricReadDurations: clientOpLatenciesSet.WithLabelValues("read", options.Instance), | ||
|
@@ -888,7 +894,8 @@ func (cli *Client) writeOneTract( | |
sem.Acquire() | ||
defer sem.Release() | ||
|
||
*result = cli.tractservers.Write(ctx, host, tract.Tract, tract.Version, thisB, thisOffset) | ||
reqID := core.GenRequestID() | ||
*result = cli.tractservers.Write(ctx, host, reqID, tract.Tract, tract.Version, thisB, thisOffset) | ||
|
||
log.V(1).Infof("write %s to %s: %s", tract.Tract, host, *result) | ||
} | ||
|
@@ -909,7 +916,8 @@ func (cli *Client) createOneTract( | |
sem.Acquire() | ||
defer sem.Release() | ||
|
||
*result = cli.tractservers.Create(ctx, host, tsid, tract.Tract, thisB, thisOffset) | ||
reqID := core.GenRequestID() | ||
*result = cli.tractservers.Create(ctx, host, reqID, tsid, tract.Tract, thisB, thisOffset) | ||
|
||
log.V(1).Infof("create %s to %s: %s", tract.Tract, host, *result) | ||
} | ||
|
@@ -1103,6 +1111,13 @@ func (cli *Client) readOneTract( | |
} | ||
} | ||
|
||
type tractResultRepl struct { | ||
tractResult | ||
|
||
badReadHost string // tracks what host has a failed read | ||
thisB []byte | ||
} | ||
|
||
func (cli *Client) readOneTractReplicated( | ||
ctx context.Context, | ||
curAddr string, | ||
|
@@ -1111,43 +1126,123 @@ func (cli *Client) readOneTractReplicated( | |
thisB []byte, | ||
thisOffset int64) { | ||
|
||
// TODO(eric): remove this var when we redo bad ts version reporting | ||
var badVersionHost string | ||
|
||
reqID := core.GenRequestID() | ||
order := rand.Perm(len(tract.Hosts)) | ||
badReadHosts := make(map[string]bool) | ||
|
||
err := core.ErrAllocHost // default error if none present | ||
for _, n := range order { | ||
host := tract.Hosts[n] | ||
if host == "" { | ||
log.V(1).Infof("read %s from tsid %d: no host", tract.Tract, tract.TSIDs[n]) | ||
continue | ||
} | ||
|
||
var read int | ||
read, err = cli.tractservers.ReadInto(ctx, host, tract.Tract, tract.Version, thisB, thisOffset) | ||
readFunc := func(host string, n int, thisBLoc []byte) tractResult { | ||
// TODO(eric): fill in otherHosts when ts-ts cancellation is done. | ||
read, err := cli.tractservers.ReadInto(ctx, host, nil, reqID, tract.Tract, tract.Version, thisBLoc, thisOffset) | ||
dnr marked this conversation as resolved.
Show resolved
Hide resolved
|
||
// TODO(eric): redo this check when we fix error reporting. | ||
if err == core.ErrVersionMismatch { | ||
badVersionHost = host | ||
} | ||
if err != core.NoError && err != core.ErrEOF { | ||
log.V(1).Infof("read %s from tractserver at address %s: %s", tract.Tract, host, err) | ||
// If we failed to read from a TS, report that to the curator. Defer so we can examine | ||
// result.err at the end, to see if we succeeded on another or not. | ||
defer func(host string, err core.Error) { | ||
couldRecover := result.err == core.NoError || result.err == core.ErrEOF | ||
go cli.curators.ReportBadTS(context.Background(), curAddr, tract.Tract, host, "read", err, couldRecover) | ||
}(host, err) | ||
continue // try another host | ||
// TODO(eric): redo bad TS reporting mechanism. | ||
return tractResult{0, 0, err, badVersionHost} | ||
} | ||
log.V(1).Infof("read %s from tractserver at address %s: %s", tract.Tract, host, err) | ||
// In case of short read, i.e., read < len(thisB), we should | ||
// pad the result with 0's. This doesn't need to be done | ||
// explicitly if 'thisB' given to us starts to be empty. | ||
// However, just in case it's not, we will ensure that by | ||
// overwritting untouched bytes with 0's. | ||
for i := read; i < len(thisB); i++ { | ||
thisB[i] = 0 | ||
for i := read; i < len(thisBLoc); i++ { | ||
thisBLoc[i] = 0 | ||
} | ||
return tractResult{len(thisBLoc), read, err, badVersionHost} | ||
} | ||
|
||
if cli.backupReadState.BackupReadBehavior.Enabled { | ||
ch := make(chan tractResultRepl) | ||
|
||
ctx, cancel := context.WithCancel(ctx) | ||
dnr marked this conversation as resolved.
Show resolved
Hide resolved
|
||
defer cancel() | ||
|
||
maxNumBackups := cli.backupReadState.BackupReadBehavior.MaxNumBackups | ||
delay := cli.backupReadState.BackupReadBehavior.Delay | ||
|
||
// Backup request logic involves the following: | ||
// 1) Send the first read to the first host (item 0 available from the orderCh) in a goroutine. | ||
dnr marked this conversation as resolved.
Show resolved
Hide resolved
|
||
// 2) Start another goroutine to issue backup requests. Each subsequent request is spawned after the | ||
// configured backup time.After delay time expires. | ||
// 3) Block until read tractResults are available from the goroutines processing the reads. In the result | ||
// collection phase, we mark which hosts reutrned RPC or other errors so we don't re-send reads in the fallback | ||
dnr marked this conversation as resolved.
Show resolved
Hide resolved
|
||
// phase. | ||
readFuncBackup := func(wg *sync.WaitGroup, n int, thisBLoc []byte) { | ||
defer wg.Done() | ||
host := tract.Hosts[order[n]] | ||
if host == "" { | ||
log.V(1).Infof("read %s from tsid %d: no host", tract.Tract, tract.TSIDs[n]) | ||
ch <- tractResultRepl{ | ||
tractResult: tractResult{0, 0, err, badVersionHost}, | ||
thisB: thisBLoc, | ||
} | ||
return | ||
} | ||
|
||
result := readFunc(host, n, thisBLoc) | ||
|
||
ch <- tractResultRepl{ | ||
tractResult: result, | ||
badReadHost: host, | ||
thisB: thisBLoc, | ||
} | ||
} | ||
|
||
var wg sync.WaitGroup | ||
nb := min(maxNumBackups+1, len(order)) | ||
wg.Add(nb) | ||
go func() { | ||
for n := 0; n < nb; n++ { | ||
select { | ||
case <-cli.backupReadState.delayFunc(time.Duration(n) * delay): | ||
dnr marked this conversation as resolved.
Show resolved
Hide resolved
|
||
thisBLoc := make([]byte, len(thisB)) | ||
go readFuncBackup(&wg, n, thisBLoc) | ||
case <-ctx.Done(): | ||
log.V(1).Infof("backup read %s-%d to tractserver cancelled", tract.Tract, n) | ||
break | ||
} | ||
} | ||
}() | ||
go func() { | ||
dnr marked this conversation as resolved.
Show resolved
Hide resolved
|
||
wg.Wait() | ||
close(ch) | ||
}() | ||
|
||
// Take the first result or mark bad hosts from being read during the | ||
// fallback phase below. | ||
for res := range ch { | ||
if res.err == core.NoError { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you probably want this to be |
||
*result = res.tractResult | ||
copy(thisB, res.thisB) | ||
return | ||
dnr marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
badReadHosts[res.badReadHost] = true | ||
} | ||
} | ||
|
||
// Sequentially read from tract server replicas. This phase will run if backups are disabled from config state, or | ||
// the backup read phase fails and we use this as a fallback. | ||
dnr marked this conversation as resolved.
Show resolved
Hide resolved
|
||
for n := range order { | ||
host := tract.Hosts[n] | ||
dnr marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if host == "" { | ||
log.V(1).Infof("read %s from tsid %d: no host", tract.Tract, tract.TSIDs[n]) | ||
continue | ||
} | ||
if badReadHosts[host] { | ||
dnr marked this conversation as resolved.
Show resolved
Hide resolved
|
||
log.V(1).Infof("read %s from tsid %d: skipping host due to failed in backup phase", tract.Tract, tract.TSIDs[n]) | ||
continue | ||
} | ||
res := readFunc(host, n, thisB) | ||
if res.err != core.NoError && res.err != core.ErrEOF { | ||
continue | ||
} | ||
*result = tractResult{len(thisB), read, err, badVersionHost} | ||
*result = res | ||
return | ||
} | ||
|
||
|
@@ -1166,16 +1261,12 @@ func (cli *Client) readOneTractRS( | |
rsTract := tract.RS.Chunk.ToTractID() | ||
length := min(len(thisB), int(tract.RS.Length)) | ||
offset := int64(tract.RS.Offset) + thisOffset | ||
read, err := cli.tractservers.ReadInto(ctx, tract.RS.Host, rsTract, core.RSChunkVersion, thisB[:length], offset) | ||
reqID := core.GenRequestID() | ||
read, err := cli.tractservers.ReadInto(ctx, tract.RS.Host, nil, reqID, rsTract, core.RSChunkVersion, thisB[:length], offset) | ||
|
||
if err != core.NoError && err != core.ErrEOF { | ||
log.V(1).Infof("rs read %s from tractserver at address %s: %s", tract.Tract, tract.RS.Host, err) | ||
// If we failed to read from a TS, report that to the curator. Defer so | ||
// we can examine result.err at the end, to see if we recovered or not. | ||
defer func(err core.Error) { | ||
couldRecover := result.err == core.NoError || result.err == core.ErrEOF | ||
go cli.curators.ReportBadTS(context.Background(), curAddr, rsTract, tract.RS.Host, "read", err, couldRecover) | ||
}(err) | ||
// TODO(eric): redo bad TS reporting mechanism. | ||
if !cli.shouldReconstruct(tract) { | ||
*result = tractResult{len(thisB), 0, err, ""} | ||
return | ||
|
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
unused?