Skip to content

Commit

Permalink
Merge pull request #52 from thediveo/develop
Browse files Browse the repository at this point in the history
Develop
  • Loading branch information
thediveo authored Jul 17, 2024
2 parents 11758be + 84351f4 commit 0f93a6a
Show file tree
Hide file tree
Showing 21 changed files with 739 additions and 45 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
[![GitHub](https://img.shields.io/github/license/thediveo/lxkns)](https://img.shields.io/github/license/thediveo/lxkns)

![build and test](https://github.com/thediveo/lxkns/workflows/build%20and%20test/badge.svg?branch=master)
![Coverage](https://img.shields.io/badge/Coverage-82.0%25-brightgreen)
![Coverage](https://img.shields.io/badge/Coverage-81.1%25-brightgreen)
![goroutines](https://img.shields.io/badge/go%20routines-not%20leaking-success)
![file descriptors](https://img.shields.io/badge/file%20descriptors-not%20leaking-success)
[![Go Report Card](https://goreportcard.com/badge/github.com/thediveo/lxkns)](https://goreportcard.com/report/github.com/thediveo/lxkns)
Expand Down
1 change: 1 addition & 0 deletions api/types/discovery_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ var _ = Describe("discovery result JSON", func() {
"with-freezer": true,
"with-mounts": true,
"with-socket-processes": false,
"with-affinity-scheduling": false,
"labels": {},
"scanned-namespace-types": [
"time",
Expand Down
7 changes: 6 additions & 1 deletion cmd/lxkns/endpoints.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ func GetNamespacesHandler(cizer containerizer.Containerizer) http.HandlerFunc {
discover.WithFullDiscovery(),
discover.WithContainerizer(cizer),
discover.WithPIDMapper(), // recommended when using WithContainerizer.
discover.WithAffinityAndScheduling(),
)
// Note bene: set header before writing the header with the status code;
// actually makes sense, innit?
Expand All @@ -50,7 +51,11 @@ func GetNamespacesHandler(cizer containerizer.Containerizer) http.HandlerFunc {
// GetProcessesHandler returns the process table (including tasks) with
// namespace references, as JSON.
func GetProcessesHandler(w http.ResponseWriter, req *http.Request) {
disco := discover.Namespaces(discover.FromProcs(), discover.FromTasks())
disco := discover.Namespaces(
discover.FromProcs(),
discover.FromTasks(),
discover.WithAffinityAndScheduling(),
)

w.Header().Set("Content-Type", "application/json")

Expand Down
6 changes: 5 additions & 1 deletion discover/discover.go
Original file line number Diff line number Diff line change
Expand Up @@ -226,9 +226,13 @@ func Namespaces(options ...DiscoveryOption) *Result {
result.PIDMap = NewPIDMap(result)
}

// Optionally discover alive containers and relate the.
// Optionally discover alive containers and relate the containers to
// processes and vice versa.
discoverContainers(result)

// Pick up leader process CPU affinity and scheduling setup.
discoverAffinityScheduling(result)

// As a C oldie it gives me the shivers to return a pointer to what might
// look like an "auto" local struct ;)
return result
Expand Down
37 changes: 37 additions & 0 deletions discover/discovery_affinity_sched.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
// Copyright 2024 Harald Albrecht.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build linux

package discover

import "github.com/thediveo/lxkns/model"

// discoverAffinityScheduling discovers the CPU affinity lists and scheduler
// settings for the leader processes of all discovered namespaces.
func discoverAffinityScheduling(result *Result) {
if !result.Options.DiscoverAffinityScheduling {
return
}
for nstype := model.MountNS; nstype < model.NamespaceTypesCount; nstype++ {
for _, ns := range result.Namespaces[nstype] {
for _, leader := range ns.Leaders() {
if leader.Affinity != nil {
continue
}
_ = leader.RetrieveAffinityScheduling()
}
}
}
}
30 changes: 20 additions & 10 deletions discover/discovery_opt.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,16 +32,17 @@ type DiscoverOpts struct {
// If zero, defaults to discovering all namespaces.
NamespaceTypes species.NamespaceType `json:"-"`

ScanProcs bool `json:"from-procs"` // Scan processes for attached namespaces.
ScanTasks bool `json:"from-tasks"` // Scan all tasks for attached namespaces.
ScanFds bool `json:"from-fds"` // Scan open file descriptors for namespaces.
ScanBindmounts bool `json:"from-bindmounts"` // Scan bind-mounts for namespaces.
DiscoverHierarchy bool `json:"with-hierarchy"` // Discover the hierarchy of PID and user namespaces.
DiscoverOwnership bool `json:"with-ownership"` // Discover the ownership of non-user namespaces.
DiscoverFreezerState bool `json:"with-freezer"` // Discover the cgroup freezer state of processes.
DiscoverMounts bool `json:"with-mounts"` // Discover mount point hierarchy with mount paths and visibility.
DiscoverSocketProcesses bool `json:"with-socket-processes"` // Discover the processes related to specific socket inode numbers.
Labels map[string]string `json:"labels"` // Pass options (in form of labels) to decorators
ScanProcs bool `json:"from-procs"` // Scan processes for attached namespaces.
ScanTasks bool `json:"from-tasks"` // Scan all tasks for attached namespaces.
ScanFds bool `json:"from-fds"` // Scan open file descriptors for namespaces.
ScanBindmounts bool `json:"from-bindmounts"` // Scan bind-mounts for namespaces.
DiscoverHierarchy bool `json:"with-hierarchy"` // Discover the hierarchy of PID and user namespaces.
DiscoverOwnership bool `json:"with-ownership"` // Discover the ownership of non-user namespaces.
DiscoverFreezerState bool `json:"with-freezer"` // Discover the cgroup freezer state of processes.
DiscoverMounts bool `json:"with-mounts"` // Discover mount point hierarchy with mount paths and visibility.
DiscoverSocketProcesses bool `json:"with-socket-processes"` // Discover the processes related to specific socket inode numbers.
DiscoverAffinityScheduling bool `json:"with-affinity-scheduling"` // Disover CPU affinity and scheduling of leader processes.
Labels map[string]string `json:"labels"` // Pass options (in form of labels) to decorators

Containerizer containerizer.Containerizer `json:"-"` // Discover containers using containerizer.

Expand Down Expand Up @@ -88,6 +89,7 @@ func WithFullDiscovery() DiscoveryOption {
o.ScanTasks = true
o.DiscoverMounts = true
o.withPIDmap = true
o.DiscoverAffinityScheduling = true
}
}

Expand Down Expand Up @@ -131,6 +133,14 @@ func NotFromTasks() DiscoveryOption {
return func(o *DiscoverOpts) { o.ScanTasks = false }
}

func WithAffinityAndScheduling() DiscoveryOption {
return func(o *DiscoverOpts) { o.DiscoverAffinityScheduling = true }
}

func WithoutAffinityAndScheduling() DiscoveryOption {
return func(o *DiscoverOpts) { o.DiscoverAffinityScheduling = false }
}

// FromFds opts to find namespaces from the open file descriptors of processes.
func FromFds() DiscoveryOption {
return func(o *DiscoverOpts) { o.ScanFds = true }
Expand Down
199 changes: 199 additions & 0 deletions model/cpulist.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
// Copyright 2024 Harald Albrecht.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
// use this file except in compliance with the License. You may obtain a copy
// of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
// License for the specific language governing permissions and limitations
// under the License.

package model

import (
"sync/atomic"
"unsafe"

"golang.org/x/sys/unix"
)

// CPUList is a list of CPU [from...to] ranges. CPU numbers are starting from
// zero.
type CPUList [][2]uint

// CPUSet is a CPU bit string, such as used for CPU affinity masks. See also
// [sched_getaffinity(2)].
//
// [sched_getaffinity(2)]: https://man7.org/linux/man-pages/man2/sched_getaffinity.2.html
type CPUSet []uint64

// The dynamically determined size of CPUSets on this system (size in uint64
// words). This is usually smaller than the fixed-sized [unix.CPUSet] that Go's
// [unix.SchedGetaffinity] uses.
var setsize atomic.Uint64
var wordbytesize = uint64(unsafe.Sizeof(CPUSet{0}[0]))

func init() {
setsize.Store(1)
}

// NewAffinityCPUList returns the affinity CPUList (list of CPU ranges) of the
// process with the passed PID. Otherwise, it returns an error. If pid is zero,
// then the affinity CPU list of the calling thread is returned (make sure to
// have the OS-level thread locked to the calling go routine in this case).
//
// Notes:
// - we don't use [unix.SchedGetaffinity] as this is tied to the fixed size
// [unix.CPUSet] type; instead, we dynamically figure out the size needed
// and cache the size internally.
// - retrieving the affinity CPU mask and then speed-running it to
// generate the range list is roughly two orders of magnitude faster than
// fetching “/proc/$PID/status” and looking for the “Cpus_allowed_list”,
// because generating the broad status procfs file is expensive.
func NewAffinityCPUList(pid PIDType) (CPUList, error) {
var set CPUSet

setlenStart := setsize.Load()
setlen := setlenStart
for {
set = make([]uint64, setlen)
// see also:
// https://man7.org/linux/man-pages/man2/sched_setaffinity.2.html; we
// use RawSyscall here instead of Syscall as we know that
// SYS_SCHED_GETAFFINITY does not block, following Go's stdlib
// implementation.
_, _, e := unix.RawSyscall(unix.SYS_SCHED_GETAFFINITY,
uintptr(pid), uintptr(setlen*wordbytesize), uintptr(unsafe.Pointer(&set[0])))
if e != 0 {
if e == unix.EINVAL {
setlen *= 2
continue
}
return nil, e
}
// Set the new size; if this fails because another go routine already
// upped the set size, retry until we either notice that we're smaller
// than what was set as the new set size, or we succeed in setting the
// size.
for {
if setsize.CompareAndSwap(setlenStart, setlen) {
break
}
setlenStart = setsize.Load()
if setlenStart > setlen {
break
}
}
break
}
return set.NewCPUList(), nil
}

// NewCPUList returns a list of CPU ranges for the given bitmap CPUSet.
//
// This is an optimized implementation that does not use any division and modulo
// operations; instead, it only uses increment and (single bit position) shift
// operations. Additionally, this implementation fast-forwards through all-0s
// and all-1s CPUSet words (uint64's).
func (s CPUSet) NewCPUList() CPUList {
setlen := uint64(len(s))
cpulist := CPUList{}
cpuno := uint(0)
cpuwordidx := uint64(0)
cpuwordmask := uint64(1)

findNextCPUInWord:
for {
// If we're inside a cpu mask word, try to find the next set cpu bit, if
// any, otherwise stop after we've fallen off the MSB end of the cpu
// mask word.
if cpuwordmask != 1 {
for {
if s[cpuwordidx]&cpuwordmask != 0 {
break
}
cpuno++
cpuwordmask <<= 1
if cpuwordmask == 0 {
// Oh no! We've fallen off the disc^Wcpu mask word.
cpuwordidx++
cpuwordmask = 1
break
}
}
}
// Try to fast-forward through completely unset cpu mask words, where
// possible.
for cpuwordidx < setlen && s[cpuwordidx] == 0 {
cpuno += 64
cpuwordidx++
}
if cpuwordidx >= setlen {
return cpulist
}
// We arrived at a non-zero cpu mask word, so let's now find the first
// cpu in it.
for {
if s[cpuwordidx]&cpuwordmask != 0 {
break
}
cpuno++
cpuwordmask <<= 1
}
// Hooray! We've finally located a CPU in use. Move on to the next CPU,
// handling a word boundary when necessary.
cpufrom := cpuno
cpuno++
cpuwordmask <<= 1
if cpuwordmask == 0 {
// Oh no! We've again fallen off the disc^Wcpu mask word.
cpuwordidx++
cpuwordmask = 1
}
// Now locate the next unset cpu within the currently inspected cpu mask
// word, until we find one or have exhausted our search within the
// current cpu mask word.
if cpuwordmask != 1 {
for {
if s[cpuwordidx]&cpuwordmask == 0 {
cpulist = append(cpulist, [2]uint{cpufrom, cpuno - 1})
continue findNextCPUInWord
}
cpuno++
cpuwordmask <<= 1
if cpuwordmask == 0 {
cpuwordidx++
cpuwordmask = 1
break
}
}
}
// Try to fast-forward through completely set cpu mask words, where
// applicable.
for cpuwordidx < setlen && s[cpuwordidx] == ^uint64(0) {
cpuno += 64
cpuwordidx++
}
// Are we completely done? If so, add the final CPU span and then call
// it a day.
if cpuwordidx >= setlen {
cpulist = append(cpulist, [2]uint{cpufrom, cpuno - 1})
return cpulist
}
// We arrived at a non-all-1s cpu mask word, so let's now find the first
// cpu in it that is unset. Add the CPU span, and then rinse and repeat
// from the beginning: find the next set CPU or fall off the disc.
for {
if s[cpuwordidx]&cpuwordmask == 0 {
cpulist = append(cpulist, [2]uint{cpufrom, cpuno - 1})
break
}
cpuno++
cpuwordmask <<= 1
}
}
}
Loading

0 comments on commit 0f93a6a

Please sign in to comment.