-
Notifications
You must be signed in to change notification settings - Fork 2k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
stateful deployments: use
TaskGroupVolumeClaim
table to associate v…
…olume requests with volume IDs (#24993) We introduce an alternative solution to the one presented in #24960 which is based on the state store and not previous-next allocation tracking in the reconciler. This new solution reduces cognitive complexity of the scheduler code at the cost of slightly more boilerplate code, but also opens up new possibilities in the future, e.g., allowing users to explicitly "un-stick" volumes with workloads still running. The diagram below illustrates the new logic: SetVolumes() upsertAllocsImpl() sets ns, job +-----------------checks if alloc requests tg in the scheduler v sticky vols and consults | +-----------------------+ state. If there is no claim, | | TaskGroupVolumeClaim: | it creates one. | | - namespace | | | - jobID | | | - tg name | | | - vol ID | v | uniquely identify vol | hasVolumes() +----+------------------+ consults the state | ^ and returns true | | DeleteJobTxn() if there's a match <-----------+ +---------------removes the claim from or if there is no the state previous claim | | | | +-----------------------------+ +------------------------------------------------------+ scheduler state store
- Loading branch information
1 parent
3493551
commit 611452e
Showing
16 changed files
with
610 additions
and
130 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,138 @@ | ||
// Copyright (c) HashiCorp, Inc. | ||
// SPDX-License-Identifier: BUSL-1.1 | ||
|
||
package state | ||
|
||
import ( | ||
"fmt" | ||
|
||
"github.com/hashicorp/go-memdb" | ||
"github.com/hashicorp/nomad/nomad/structs" | ||
) | ||
|
||
// UpsertTaskGroupHostVolumeClaim is used to upsert claims into the state store. | ||
// This method is only used in unit tests. | ||
func (s *StateStore) UpsertTaskGroupHostVolumeClaim(msgType structs.MessageType, index uint64, claim *structs.TaskGroupHostVolumeClaim) error { | ||
// Grab a write transaction. | ||
txn := s.db.WriteTxnMsgT(msgType, index) | ||
defer txn.Abort() | ||
if err := s.upsertTaskGroupHostVolumeClaimImpl(index, claim, txn); err != nil { | ||
return err | ||
} | ||
|
||
return txn.Commit() | ||
} | ||
|
||
// upsertTaskGroupHostVolumeClaimImpl is used to insert a task group volume claim into | ||
// the state store. | ||
func (s *StateStore) upsertTaskGroupHostVolumeClaimImpl( | ||
index uint64, claim *structs.TaskGroupHostVolumeClaim, txn *txn) error { | ||
|
||
existingRaw, err := txn.First(TableTaskGroupHostVolumeClaim, indexID, claim.Namespace, claim.JobID, claim.TaskGroupName, claim.VolumeID) | ||
if err != nil { | ||
return fmt.Errorf("Task group volume association lookup failed: %v", err) | ||
} | ||
|
||
var existing *structs.TaskGroupHostVolumeClaim | ||
if existingRaw != nil { | ||
existing = existingRaw.(*structs.TaskGroupHostVolumeClaim) | ||
} | ||
|
||
if existing != nil { | ||
// do allocation ID and volume ID match? | ||
if existing.ClaimedByAlloc(claim) { | ||
return nil | ||
} | ||
|
||
claim.CreateIndex = existing.CreateIndex | ||
claim.ModifyIndex = index | ||
} else { | ||
claim.CreateIndex = index | ||
claim.ModifyIndex = index | ||
} | ||
|
||
// Insert the claim into the table. | ||
if err := txn.Insert(TableTaskGroupHostVolumeClaim, claim); err != nil { | ||
return fmt.Errorf("Task group volume claim insert failed: %v", err) | ||
} | ||
|
||
// Perform the index table update to mark the new insert. | ||
if err := txn.Insert(tableIndex, &IndexEntry{TableTaskGroupHostVolumeClaim, index}); err != nil { | ||
return fmt.Errorf("index update failed: %v", err) | ||
} | ||
|
||
return nil | ||
} | ||
|
||
// GetTaskGroupHostVolumeClaim returns a volume claim that matches the namespace, | ||
// job id and task group name (there can be only one) | ||
func (s *StateStore) GetTaskGroupHostVolumeClaim(ws memdb.WatchSet, namespace, jobID, taskGroupName, volumeID string) (*structs.TaskGroupHostVolumeClaim, error) { | ||
txn := s.db.ReadTxn() | ||
|
||
watchCh, existing, err := txn.FirstWatch(TableTaskGroupHostVolumeClaim, indexID, namespace, jobID, taskGroupName, volumeID) | ||
if err != nil { | ||
return nil, fmt.Errorf("Task group volume claim lookup failed: %v", err) | ||
} | ||
ws.Add(watchCh) | ||
|
||
if existing != nil { | ||
return existing.(*structs.TaskGroupHostVolumeClaim), nil | ||
} | ||
|
||
return nil, nil | ||
} | ||
|
||
// GetTaskGroupHostVolumeClaims returns all volume claims | ||
func (s *StateStore) GetTaskGroupHostVolumeClaims(ws memdb.WatchSet) (memdb.ResultIterator, error) { | ||
txn := s.db.ReadTxn() | ||
|
||
iter, err := txn.Get(TableTaskGroupHostVolumeClaim, indexID) | ||
if err != nil { | ||
return nil, fmt.Errorf("Task group volume claim lookup failed: %v", err) | ||
} | ||
ws.Add(iter.WatchCh()) | ||
|
||
return iter, nil | ||
} | ||
|
||
// GetTaskGroupHostVolumeClaimsForTaskGroup returns all volume claims for a given | ||
// task group | ||
func (s *StateStore) GetTaskGroupHostVolumeClaimsForTaskGroup(ws memdb.WatchSet, ns, jobID, tg string) (memdb.ResultIterator, error) { | ||
txn := s.db.ReadTxn() | ||
|
||
iter, err := txn.Get(TableTaskGroupHostVolumeClaim, indexID) | ||
if err != nil { | ||
return nil, fmt.Errorf("Task group volume claim lookup failed: %v", err) | ||
} | ||
ws.Add(iter.WatchCh()) | ||
|
||
// Filter out by ns, jobID and tg | ||
filter := memdb.NewFilterIterator(iter, func(raw interface{}) bool { | ||
claim, ok := raw.(*structs.TaskGroupHostVolumeClaim) | ||
if !ok { | ||
return true | ||
} | ||
return claim.Namespace != ns || claim.JobID != jobID || claim.TaskGroupName != tg | ||
}) | ||
|
||
return filter, nil | ||
} | ||
|
||
// deleteTaskGroupHostVolumeClaim deletes all claims for a given namespace and job ID | ||
func (s *StateStore) deleteTaskGroupHostVolumeClaim(index uint64, txn *txn, namespace, jobID string) error { | ||
iter, err := txn.Get(TableTaskGroupHostVolumeClaim, indexID) | ||
if err != nil { | ||
return fmt.Errorf("Task group volume claim lookup failed: %v", err) | ||
} | ||
|
||
for raw := iter.Next(); raw != nil; raw = iter.Next() { | ||
claim := raw.(*structs.TaskGroupHostVolumeClaim) | ||
if claim.JobID == jobID && claim.Namespace == namespace { | ||
if err := txn.Delete(TableTaskGroupHostVolumeClaim, claim); err != nil { | ||
return fmt.Errorf("Task group volume claim deletion failed: %v", err) | ||
} | ||
} | ||
} | ||
|
||
return nil | ||
} |
Oops, something went wrong.