From 38ece6dbc26f982e639ab31da133f0538b72436d Mon Sep 17 00:00:00 2001 From: Krisztian Litkey Date: Thu, 20 Jun 2024 09:33:04 +0300 Subject: [PATCH] topology-aware: relax NUMA node topology checks. Relax hardware topology checks to allow multiple dies sharing a NUMA node. On such hardware omit die pools from the tree. Signed-off-by: Krisztian Litkey --- cmd/plugins/topology-aware/policy/pools.go | 45 +++++++++++----------- 1 file changed, 23 insertions(+), 22 deletions(-) diff --git a/cmd/plugins/topology-aware/policy/pools.go b/cmd/plugins/topology-aware/policy/pools.go index 8fa1a6b1c..4fea186c5 100644 --- a/cmd/plugins/topology-aware/policy/pools.go +++ b/cmd/plugins/topology-aware/policy/pools.go @@ -28,7 +28,8 @@ import ( // buildPoolsByTopology builds a hierarchical tree of pools based on HW topology. func (p *policy) buildPoolsByTopology() error { - if err := p.checkHWTopology(); err != nil { + omitDies, err := p.checkHWTopology() + if err != nil { return err } @@ -88,20 +89,22 @@ func (p *policy) buildPoolsByTopology() error { // create dies for every socket, but only if we have more than one die in the socket numaDies := map[idset.ID]Node{} // created die Nodes per NUMA node id - for socketID, socket := range sockets { - dieIDs := p.sys.Package(socketID).DieIDs() - if len(dieIDs) < 2 { - log.Debug(" - omitted pool %q (die count: %d)", socket.Name()+"/die #0", - len(dieIDs)) - continue - } - for _, dieID := range dieIDs { - die := p.NewDieNode(dieID, socket) - p.nodes[die.Name()] = die - for _, numaNodeID := range p.sys.Package(socketID).DieNodeIDs(dieID) { - numaDies[numaNodeID] = die + if !omitDies { + for socketID, socket := range sockets { + dieIDs := p.sys.Package(socketID).DieIDs() + if len(dieIDs) < 2 { + log.Debug(" - omitted pool %q (die count: %d)", socket.Name()+"/die #0", + len(dieIDs)) + continue + } + for _, dieID := range dieIDs { + die := p.NewDieNode(dieID, socket) + p.nodes[die.Name()] = die + for _, numaNodeID := range p.sys.Package(socketID).DieNodeIDs(dieID) { + numaDies[numaNodeID] = die + } + log.Debug(" + created pool %q", die.Parent().Name()+"/"+die.Name()) } - log.Debug(" + created pool %q", die.Parent().Name()+"/"+die.Name()) } } @@ -288,7 +291,7 @@ func (p *policy) assignNUMANodes(surrogates map[idset.ID]Node, xmem, dram map[id } // checkHWTopology verifies our otherwise implicit assumptions about the HW. -func (p *policy) checkHWTopology() error { +func (p *policy) checkHWTopology() (bool, error) { // NUMA nodes (memory controllers) should not be shared by multiple sockets. socketNodes := map[idset.ID]cpuset.CPUSet{} for _, socketID := range p.sys.PackageIDs() { @@ -303,7 +306,7 @@ func (p *policy) checkHWTopology() error { if shared := nodes1.Intersection(nodes2); !shared.IsEmpty() { log.Error("can't handle HW topology: sockets #%v, #%v share NUMA node(s) #%s", id1, id2, shared.String()) - return policyError("unhandled HW topology: sockets #%v, #%v share NUMA node(s) #%s", + return false, policyError("unhandled HW topology: sockets #%v, #%v share NUMA node(s) #%s", id1, id2, shared.String()) } } @@ -320,12 +323,10 @@ func (p *policy) checkHWTopology() error { } nodes2 := idset.NewIDSet(pkg.DieNodeIDs(id2)...) if shared := system.CPUSetFromIDSet(nodes1).Intersection(system.CPUSetFromIDSet(nodes2)); !shared.IsEmpty() { - log.Error("can't handle HW topology: "+ + log.Error("will ignore dies: "+ "socket #%v, dies #%v,%v share NUMA node(s) #%s", socketID, id1, id2, shared.String()) - return policyError("unhandled HW topology: "+ - "socket #%v, dies #%v,#%v share NUMA node(s) #%s", - socketID, id1, id2, shared.String()) + return true, nil } } } @@ -339,13 +340,13 @@ func (p *policy) checkHWTopology() error { if d1 != d2 { log.Error("asymmetric NUMA distance (#%d, #%d): %d != %d", from, to, d1, d2) - return policyError("asymmetric NUMA distance (#%d, #%d): %d != %d", + return false, policyError("asymmetric NUMA distance (#%d, #%d): %d != %d", from, to, d1, d2) } } } - return nil + return false, nil } // Pick a pool and allocate resource from it to the container.