Skip to content

Commit 0ef29ce

Browse files
committed
Add memory policy support
Implement support for Linux memory policy in OCI spec PR: opencontainers/runtime-spec#1282 Signed-off-by: Antti Kervinen <[email protected]>
1 parent 77ead42 commit 0ef29ce

File tree

11 files changed

+298
-6
lines changed

11 files changed

+298
-6
lines changed

features.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,10 @@ var featuresCommand = cli.Command{
5959
Enabled: &t,
6060
Schemata: &t,
6161
},
62+
MemoryPolicy: &features.MemoryPolicy{
63+
Modes: specconv.KnownMemoryPolicyModes(),
64+
Flags: specconv.KnownMemoryPolicyFlags(),
65+
},
6266
MountExtensions: &features.MountExtensions{
6367
IDMap: &features.IDMap{
6468
Enabled: &t,

internal/linux/linux.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package linux
22

33
import (
44
"os"
5+
"unsafe"
56

67
"golang.org/x/sys/unix"
78
)
@@ -72,3 +73,15 @@ func Sendmsg(fd int, p, oob []byte, to unix.Sockaddr, flags int) error {
7273
})
7374
return os.NewSyscallError("sendmsg", err)
7475
}
76+
77+
// SetMempolicy wraps set_mempolicy.
78+
func SetMempolicy(mode uint, mask *unix.CPUSet) error {
79+
err := retryOnEINTR(func() error {
80+
_, _, errno := unix.Syscall(unix.SYS_SET_MEMPOLICY, uintptr(mode), uintptr(unsafe.Pointer(mask)), unsafe.Sizeof(*mask)*8)
81+
if errno != 0 {
82+
return errno
83+
}
84+
return nil
85+
})
86+
return os.NewSyscallError("set_mempolicy", err)
87+
}

libcontainer/configs/config.go

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,9 @@ type Config struct {
214214
// to limit the resources (e.g., L3 cache, memory bandwidth) the container has available
215215
IntelRdt *IntelRdt `json:"intel_rdt,omitempty"`
216216

217+
// MemoryPolicy specifies NUMA memory policy for the container.
218+
MemoryPolicy *LinuxMemoryPolicy `json:"memoryPolicy,omitempty"`
219+
217220
// RootlessEUID is set when the runc was launched with non-zero EUID.
218221
// Note that RootlessEUID is set to false when launched with EUID=0 in userns.
219222
// When RootlessEUID is set, runc creates a new userns for the container.
@@ -305,7 +308,8 @@ type CPUAffinity struct {
305308
Initial, Final *unix.CPUSet
306309
}
307310

308-
func toCPUSet(str string) (*unix.CPUSet, error) {
311+
// ToCPUSet parses a string in list format into a unix.CPUSet, e.g. "0-3,5,7-9".
312+
func ToCPUSet(str string) (*unix.CPUSet, error) {
309313
if str == "" {
310314
return nil, nil
311315
}
@@ -356,7 +360,7 @@ func toCPUSet(str string) (*unix.CPUSet, error) {
356360
}
357361
}
358362
if s.Count() == 0 {
359-
return nil, fmt.Errorf("no CPUs found in %q", str)
363+
return nil, fmt.Errorf("no members found in set %q", str)
360364
}
361365

362366
return s, nil
@@ -367,11 +371,11 @@ func ConvertCPUAffinity(sa *specs.CPUAffinity) (*CPUAffinity, error) {
367371
if sa == nil {
368372
return nil, nil
369373
}
370-
initial, err := toCPUSet(sa.Initial)
374+
initial, err := ToCPUSet(sa.Initial)
371375
if err != nil {
372376
return nil, fmt.Errorf("bad CPUAffinity.Initial: %w", err)
373377
}
374-
final, err := toCPUSet(sa.Final)
378+
final, err := ToCPUSet(sa.Final)
375379
if err != nil {
376380
return nil, fmt.Errorf("bad CPUAffinity.Final: %w", err)
377381
}
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
package configs
2+
3+
import "golang.org/x/sys/unix"
4+
5+
//nolint:revive // ignore ALL_CAPS errors from revive and 2nd staticheck
6+
const (
7+
MPOL_DEFAULT = iota
8+
MPOL_PREFERRED
9+
MPOL_BIND
10+
MPOL_INTERLEAVE
11+
MPOL_LOCAL
12+
MPOL_PREFERRED_MANY
13+
MPOL_WEIGHTED_INTERLEAVE
14+
15+
MPOL_F_STATIC_NODES = 1 << 15
16+
MPOL_F_RELATIVE_NODES = 1 << 14
17+
MPOL_F_NUMA_BALANCING = 1 << 13
18+
)
19+
20+
// LinuxMemoryPolicy contains memory policy configuration.
21+
type LinuxMemoryPolicy struct {
22+
// Mode specifies memory policy mode without mode flags. See
23+
// set_mempolicy() documentation for details.
24+
Mode uint
25+
// Flags contains mode flags.
26+
Flags []uint
27+
// Nodes contains NUMA nodes to which the mode applies.
28+
Nodes *unix.CPUSet
29+
}

libcontainer/configs/tocpuset_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,8 +58,8 @@ func TestToCPUSet(t *testing.T) {
5858

5959
for _, tc := range testCases {
6060
t.Run(tc.in, func(t *testing.T) {
61-
out, err := toCPUSet(tc.in)
62-
t.Logf("toCPUSet(%q) = %v (error: %v)", tc.in, out, err)
61+
out, err := ToCPUSet(tc.in)
62+
t.Logf("ToCPUSet(%q) = %v (error: %v)", tc.in, out, err)
6363
// Check the error.
6464
if tc.isErr {
6565
if err == nil {

libcontainer/configs/validate/validator.go

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ func Validate(config *configs.Config) error {
3333
mountsStrict,
3434
scheduler,
3535
ioPriority,
36+
memoryPolicy,
3637
}
3738
for _, c := range checks {
3839
if err := c(config); err != nil {
@@ -482,3 +483,26 @@ func ioPriority(config *configs.Config) error {
482483

483484
return nil
484485
}
486+
487+
func memoryPolicy(config *configs.Config) error {
488+
mpol := config.MemoryPolicy
489+
if mpol == nil {
490+
return nil
491+
}
492+
switch mpol.Mode {
493+
case configs.MPOL_DEFAULT, configs.MPOL_LOCAL:
494+
if mpol.Nodes != nil && mpol.Nodes.Count() != 0 {
495+
return fmt.Errorf("memory policy mode requires 0 nodes but got %d", mpol.Nodes.Count())
496+
}
497+
case configs.MPOL_BIND, configs.MPOL_INTERLEAVE,
498+
configs.MPOL_PREFERRED_MANY, configs.MPOL_WEIGHTED_INTERLEAVE:
499+
if mpol.Nodes == nil || mpol.Nodes.Count() == 0 {
500+
return fmt.Errorf("memory policy mode requires at least one node but got 0")
501+
}
502+
case configs.MPOL_PREFERRED:
503+
// Zero or more nodes are allowed by the kernel.
504+
default:
505+
return fmt.Errorf("invalid memory policy mode: %d", mpol.Mode)
506+
}
507+
return nil
508+
}

libcontainer/init_linux.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -659,6 +659,18 @@ func setupIOPriority(config *initConfig) error {
659659
return nil
660660
}
661661

662+
func setupMemoryPolicy(config *configs.Config) error {
663+
mpol := config.MemoryPolicy
664+
if mpol == nil {
665+
return nil
666+
}
667+
modeWithFlags := mpol.Mode
668+
for _, flag := range mpol.Flags {
669+
modeWithFlags |= flag
670+
}
671+
return linux.SetMempolicy(modeWithFlags, config.MemoryPolicy.Nodes)
672+
}
673+
662674
func setupPersonality(config *configs.Config) error {
663675
return system.SetLinuxPersonality(config.Personality.Domain)
664676
}

libcontainer/setns_init_linux.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,11 @@ func (l *linuxSetnsInit) Init() error {
8080
if err := setupIOPriority(l.config); err != nil {
8181
return err
8282
}
83+
84+
if err := setupMemoryPolicy(l.config.Config); err != nil {
85+
return err
86+
}
87+
8388
// Tell our parent that we're ready to exec. This must be done before the
8489
// Seccomp rules have been applied, because we need to be able to read and
8590
// write to a socket.

libcontainer/specconv/spec_linux.go

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@ var (
4141
flag int
4242
}
4343
complexFlags map[string]func(*configs.Mount)
44+
mpolModeMap map[specs.MemoryPolicyModeType]uint
45+
mpolModeFMap map[specs.MemoryPolicyFlagType]uint
4446
)
4547

4648
func initMaps() {
@@ -148,6 +150,22 @@ func initMaps() {
148150
m.IDMapping.Recursive = true
149151
},
150152
}
153+
154+
mpolModeMap = map[specs.MemoryPolicyModeType]uint{
155+
specs.MpolDefault: configs.MPOL_DEFAULT,
156+
specs.MpolPreferred: configs.MPOL_PREFERRED,
157+
specs.MpolBind: configs.MPOL_BIND,
158+
specs.MpolInterleave: configs.MPOL_INTERLEAVE,
159+
specs.MpolLocal: configs.MPOL_LOCAL,
160+
specs.MpolPreferredMany: configs.MPOL_PREFERRED_MANY,
161+
specs.MpolWeightedInterleave: configs.MPOL_WEIGHTED_INTERLEAVE,
162+
}
163+
164+
mpolModeFMap = map[specs.MemoryPolicyFlagType]uint{
165+
specs.MpolFStaticNodes: configs.MPOL_F_STATIC_NODES,
166+
specs.MpolFRelativeNodes: configs.MPOL_F_RELATIVE_NODES,
167+
specs.MpolFNumaBalancing: configs.MPOL_F_NUMA_BALANCING,
168+
}
151169
})
152170
}
153171

@@ -184,6 +202,30 @@ func KnownMountOptions() []string {
184202
return res
185203
}
186204

205+
// KnownMemoryPolicyModes returns the list of the known memory policy modes.
206+
// Used by `runc features`.
207+
func KnownMemoryPolicyModes() []string {
208+
initMaps()
209+
var res []string
210+
for k := range mpolModeMap {
211+
res = append(res, string(k))
212+
}
213+
sort.Strings(res)
214+
return res
215+
}
216+
217+
// KnownMemoryPolicyFlags returns the list of the known memory policy mode flags.
218+
// Used by `runc features`.
219+
func KnownMemoryPolicyFlags() []string {
220+
initMaps()
221+
var res []string
222+
for k := range mpolModeFMap {
223+
res = append(res, string(k))
224+
}
225+
sort.Strings(res)
226+
return res
227+
}
228+
187229
// AllowedDevices is the set of devices which are automatically included for
188230
// all containers.
189231
//
@@ -468,6 +510,28 @@ func CreateLibcontainerConfig(opts *CreateOpts) (*configs.Config, error) {
468510
MemBwSchema: spec.Linux.IntelRdt.MemBwSchema,
469511
}
470512
}
513+
if spec.Linux.MemoryPolicy != nil {
514+
var ok bool
515+
var err error
516+
specMp := spec.Linux.MemoryPolicy
517+
confMp := &configs.LinuxMemoryPolicy{}
518+
confMp.Mode, ok = mpolModeMap[specMp.Mode]
519+
if !ok {
520+
return nil, fmt.Errorf("invalid memory policy mode %q", specMp.Mode)
521+
}
522+
confMp.Nodes, err = configs.ToCPUSet(specMp.Nodes)
523+
if err != nil {
524+
return nil, fmt.Errorf("invalid memory policy nodes %q: %w", specMp.Nodes, err)
525+
}
526+
for _, specFlag := range specMp.Flags {
527+
confFlag, ok := mpolModeFMap[specFlag]
528+
if !ok {
529+
return nil, fmt.Errorf("invalid memory policy flag %q", specFlag)
530+
}
531+
confMp.Flags = append(confMp.Flags, confFlag)
532+
}
533+
config.MemoryPolicy = confMp
534+
}
471535
if spec.Linux.Personality != nil {
472536
if len(spec.Linux.Personality.Flags) > 0 {
473537
logrus.Warnf("ignoring unsupported personality flags: %+v because personality flag has not supported at this time", spec.Linux.Personality.Flags)

libcontainer/standard_init_linux.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,10 @@ func (l *linuxStandardInit) Init() error {
164164
return err
165165
}
166166

167+
if err := setupMemoryPolicy(l.config.Config); err != nil {
168+
return err
169+
}
170+
167171
// Tell our parent that we're ready to exec. This must be done before the
168172
// Seccomp rules have been applied, because we need to be able to read and
169173
// write to a socket.

0 commit comments

Comments
 (0)