Skip to content

Commit

Permalink
topology-aware: relax NUMA node topology checks.
Browse files Browse the repository at this point in the history
Relax hardware topology checks to allow multiple dies sharing
a NUMA node. On such hardware omit die pools from the tree.

Signed-off-by: Krisztian Litkey <[email protected]>
  • Loading branch information
klihub authored and askervin committed Jun 20, 2024
1 parent 1e6050d commit 38ece6d
Showing 1 changed file with 23 additions and 22 deletions.
45 changes: 23 additions & 22 deletions cmd/plugins/topology-aware/policy/pools.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@ import (

// buildPoolsByTopology builds a hierarchical tree of pools based on HW topology.
func (p *policy) buildPoolsByTopology() error {
if err := p.checkHWTopology(); err != nil {
omitDies, err := p.checkHWTopology()
if err != nil {
return err
}

Expand Down Expand Up @@ -88,20 +89,22 @@ func (p *policy) buildPoolsByTopology() error {

// create dies for every socket, but only if we have more than one die in the socket
numaDies := map[idset.ID]Node{} // created die Nodes per NUMA node id
for socketID, socket := range sockets {
dieIDs := p.sys.Package(socketID).DieIDs()
if len(dieIDs) < 2 {
log.Debug(" - omitted pool %q (die count: %d)", socket.Name()+"/die #0",
len(dieIDs))
continue
}
for _, dieID := range dieIDs {
die := p.NewDieNode(dieID, socket)
p.nodes[die.Name()] = die
for _, numaNodeID := range p.sys.Package(socketID).DieNodeIDs(dieID) {
numaDies[numaNodeID] = die
if !omitDies {
for socketID, socket := range sockets {
dieIDs := p.sys.Package(socketID).DieIDs()
if len(dieIDs) < 2 {
log.Debug(" - omitted pool %q (die count: %d)", socket.Name()+"/die #0",
len(dieIDs))
continue
}
for _, dieID := range dieIDs {
die := p.NewDieNode(dieID, socket)
p.nodes[die.Name()] = die
for _, numaNodeID := range p.sys.Package(socketID).DieNodeIDs(dieID) {
numaDies[numaNodeID] = die
}
log.Debug(" + created pool %q", die.Parent().Name()+"/"+die.Name())
}
log.Debug(" + created pool %q", die.Parent().Name()+"/"+die.Name())
}
}

Expand Down Expand Up @@ -288,7 +291,7 @@ func (p *policy) assignNUMANodes(surrogates map[idset.ID]Node, xmem, dram map[id
}

// checkHWTopology verifies our otherwise implicit assumptions about the HW.
func (p *policy) checkHWTopology() error {
func (p *policy) checkHWTopology() (bool, error) {
// NUMA nodes (memory controllers) should not be shared by multiple sockets.
socketNodes := map[idset.ID]cpuset.CPUSet{}
for _, socketID := range p.sys.PackageIDs() {
Expand All @@ -303,7 +306,7 @@ func (p *policy) checkHWTopology() error {
if shared := nodes1.Intersection(nodes2); !shared.IsEmpty() {
log.Error("can't handle HW topology: sockets #%v, #%v share NUMA node(s) #%s",
id1, id2, shared.String())
return policyError("unhandled HW topology: sockets #%v, #%v share NUMA node(s) #%s",
return false, policyError("unhandled HW topology: sockets #%v, #%v share NUMA node(s) #%s",
id1, id2, shared.String())
}
}
Expand All @@ -320,12 +323,10 @@ func (p *policy) checkHWTopology() error {
}
nodes2 := idset.NewIDSet(pkg.DieNodeIDs(id2)...)
if shared := system.CPUSetFromIDSet(nodes1).Intersection(system.CPUSetFromIDSet(nodes2)); !shared.IsEmpty() {
log.Error("can't handle HW topology: "+
log.Error("will ignore dies: "+
"socket #%v, dies #%v,%v share NUMA node(s) #%s",
socketID, id1, id2, shared.String())
return policyError("unhandled HW topology: "+
"socket #%v, dies #%v,#%v share NUMA node(s) #%s",
socketID, id1, id2, shared.String())
return true, nil
}
}
}
Expand All @@ -339,13 +340,13 @@ func (p *policy) checkHWTopology() error {
if d1 != d2 {
log.Error("asymmetric NUMA distance (#%d, #%d): %d != %d",
from, to, d1, d2)
return policyError("asymmetric NUMA distance (#%d, #%d): %d != %d",
return false, policyError("asymmetric NUMA distance (#%d, #%d): %d != %d",
from, to, d1, d2)
}
}
}

return nil
return false, nil
}

// Pick a pool and allocate resource from it to the container.
Expand Down

0 comments on commit 38ece6d

Please sign in to comment.