Skip to content

Commit

Permalink
ROX-2423 Fix collector crash due to missing proc data (#285)
Browse files Browse the repository at this point in the history
- Change sysdig submodule branch to ignore errors encountered while reading a procfs directory for a given process. The error is logged and procfs scraping continues.
- Add integration test with mock `/proc` to verify the fix
- Change integration tests to store the container logs from each integration test
  • Loading branch information
robbycochran authored Mar 10, 2020
1 parent 6d03672 commit c31d8e2
Show file tree
Hide file tree
Showing 9 changed files with 106 additions and 20 deletions.
16 changes: 5 additions & 11 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -902,7 +902,7 @@ jobs:
export VM_CONFIG="circle_local_<< parameters.vm-config >>"
export COLLECTOR_REPO="stackrox/collector<<# parameters.use-rhel>>-rhel<</ parameters.use-rhel>>"
export COLLECTOR_IMAGE="${COLLECTOR_REPO}:${COLLECTOR_TAG}-base"
make -C "${SOURCE_ROOT}" integration-tests integration-tests-report
make -C "${SOURCE_ROOT}" integration-tests-missing-proc-scrape integration-tests integration-tests-report
[[ -z "$CIRCLE_BRANCH" ]] || gsutil cp ~/workspace/go/src/github.com/stackrox/collector/integration-tests/integration-test-report.xml "gs://stackrox-ci-results/circleci/collector/${CIRCLE_BRANCH}/$(date +%Y-%m-%d)-${CIRCLE_BUILD_NUM}/"
- store_test_results:
Expand All @@ -911,11 +911,8 @@ jobs:
path: ~/workspace/go/src/github.com/stackrox/collector/integration-tests/integration-test-report.xml
destination: "integration-test-report.xml"
- store_artifacts:
path: ~/workspace/go/src/github.com/stackrox/collector/integration-tests/collector.logs
destination: "collector.logs"
- store_artifacts:
path: ~/workspace/go/src/github.com/stackrox/collector/integration-tests/grpc-server.logs
destination: "grpc-server.logs"
path: ~/workspace/go/src/github.com/stackrox/collector/integration-tests/container-logs
destination: "container-logs"
- store_artifacts:
path: ~/workspace/go/src/github.com/stackrox/collector/integration-tests/perf.json
destination: "kernel_module-circle_local_<< parameters.vm-config >>-perf.json"
Expand Down Expand Up @@ -993,11 +990,8 @@ jobs:
path: ~/workspace/go/src/github.com/stackrox/collector/integration-tests/integration-test-report.xml
destination: "<< parameters.collection_method >>-<< parameters.vm_type >>-<< parameters.image_family >>-integration-test-report.xml"
- store_artifacts:
path: ~/workspace/go/src/github.com/stackrox/collector/integration-tests/collector.logs
destination: "<< parameters.collection_method >>-<< parameters.vm_type >>-<< parameters.image_family >>-collector.logs"
- store_artifacts:
path: ~/workspace/go/src/github.com/stackrox/collector/integration-tests/grpc-server.logs
destination: "<< parameters.collection_method >>-<< parameters.vm_type >>-<< parameters.image_family >>-grpc-server.logs"
path: ~/workspace/go/src/github.com/stackrox/collector/integration-tests/container-logs
destination: "<< parameters.collection_method >>-<< parameters.vm_type >>-<< parameters.image_family >>-container-logs"
- store_artifacts:
path: ~/workspace/go/src/github.com/stackrox/collector/integration-tests/perf.json
destination: "<< parameters.collection_method >>-<< parameters.vm_type >>-<< parameters.image_family >>-perf.json"
Expand Down
9 changes: 9 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,10 @@ integration-tests-baseline:
integration-tests-process-network:
make -C integration-tests process-network

.PHONY: integration-tests-missing-proc-scrape
integration-tests-missing-proc-scrape:
make -C integration-tests missing-proc-scrape

.PHONY: integration-tests-process-network-rhel
integration-tests-process-network-rhel:
COLLECTOR_REPO="stackrox/collector-rhel" \
Expand All @@ -117,6 +121,11 @@ integration-tests-baseline-rhel:
COLLECTOR_REPO="stackrox/collector-rhel" \
make -C integration-tests baseline

.PHONY: integration-tests-missing-proc-scrape-rhel
integration-tests-missing-proc-scrape-rhel:
COLLECTOR_REPO="stackrox/collector-rhel" \
make -C integration-tests missing-proc-scrape

.PHONY: integration-tests-report
integration-tests-report:
make -C integration-tests report
Expand Down
8 changes: 8 additions & 0 deletions integration-tests/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,14 @@ baseline: docker-clean
go test -timeout 30m -count=1 -v \
-run TestBenchmarkBaseline 2>&1 | tee -a integration-test.log

.PHONY: missing-proc-scrape
missing-proc-scrape: docker-clean
./scripts/create-fake-proc-dir.sh
go version
set -o pipefail ; \
go test -timeout 30m -count=1 -v \
-run TestMissingProcScrape 2>&1 | tee -a integration-test.log

.PHONY: report
report:
go get -u github.com/jstemmer/go-junit-report
Expand Down
2 changes: 1 addition & 1 deletion integration-tests/bootstrap_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ func (s *BootstrapTestSuite) TestBootstrapScript() {

for name, tc := range tests {
s.T().Run(name, func(t *testing.T) {
collector := NewCollectorManager(s.executor)
collector := NewCollectorManager(s.executor, s.T().Name())
collector.DisableGrpcServer = true
collector.BootstrapOnly = true

Expand Down
17 changes: 12 additions & 5 deletions integration-tests/collector_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@ package integrationtests
import (
"fmt"
"io/ioutil"
"os"
"os/user"
"path/filepath"
"strings"

"github.com/boltdb/bolt"
Expand All @@ -19,9 +21,10 @@ type collectorManager struct {
GRPCServerImage string
DisableGrpcServer bool
BootstrapOnly bool
TestName string
}

func NewCollectorManager(e Executor) *collectorManager {
func NewCollectorManager(e Executor, name string) *collectorManager {
collectionMethod := ReadEnvVarWithDefault("COLLECTION_METHOD", "kernel_module")
if strings.Contains(collectionMethod, "module") {
collectionMethod = "kernel_module"
Expand Down Expand Up @@ -53,6 +56,7 @@ func NewCollectorManager(e Executor) *collectorManager {
GRPCServerImage: "stackrox/grpc-server:3.0.38.x-89-ga1bf2bc906",
Env: env,
Mounts: mounts,
TestName: name,
}
}

Expand Down Expand Up @@ -87,11 +91,11 @@ func (c *collectorManager) TearDown() error {
if _, err := c.executor.CopyFromHost(c.DBPath, c.DBPath); err != nil {
return err
}
c.captureLogs("grpc-server", "grpc-server.logs")
c.captureLogs("grpc-server")
c.killContainer("grpc-server")
}

c.captureLogs("collector", "collector.logs")
c.captureLogs("collector")
c.killContainer("collector")
return nil
}
Expand Down Expand Up @@ -148,12 +152,15 @@ func (c *collectorManager) launchCollector() error {
return err
}

func (c *collectorManager) captureLogs(containerName, logFile string) (string, error) {
func (c *collectorManager) captureLogs(containerName string) (string, error) {
logs, err := c.executor.Exec("docker", "logs", containerName)
if err != nil {
fmt.Printf("docker logs error (%v) for container %s: %s\n", err, containerName, logFile)
fmt.Printf("docker logs error (%v) for container %s\n", err, containerName)
return "", err
}
logDirectory := filepath.Join(".", "container-logs")
os.MkdirAll(logDirectory, os.ModePerm)
logFile := filepath.Join(logDirectory, c.TestName+"-"+containerName+".log")
err = ioutil.WriteFile(logFile, []byte(logs), 0644)
if err != nil {
return "", err
Expand Down
9 changes: 9 additions & 0 deletions integration-tests/executor.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ var (
type Executor interface {
CopyFromHost(src string, dst string) (string, error)
PullImage(image string) error
IsContainerRunning(image string) (bool, error)
Exec(args ...string) (string, error)
ExecRetry(args ...string) (string, error)
}
Expand Down Expand Up @@ -163,6 +164,14 @@ func (e *executor) PullImage(image string) error {
return err
}

func (e *executor) IsContainerRunning(containerID string) (bool, error) {
result, err := e.Exec("docker", "inspect", containerID, "--format='{{.State.Running}}'")
if err != nil {
return false, nil
}
return result == "'true'", nil
}

func (e *localCommandBuilder) ExecCommand(execArgs ...string) *exec.Cmd {
return exec.Command(execArgs[0], execArgs[1:]...)
}
Expand Down
46 changes: 44 additions & 2 deletions integration-tests/integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,13 @@ func TestBenchmark(t *testing.T) {
suite.Run(t, new(BenchmarkCollectorTestSuite))
}

// TestMissingProcScrape only works with local fake proc directory
func TestMissingProcScrape(t *testing.T) {
if ReadEnvVarWithDefault("REMOTE_HOST_TYPE", "local") == "local" {
suite.Run(t, new(MissingProcScrapeTestSuite))
}
}

type IntegrationTestSuiteBase struct {
suite.Suite
db *bolt.DB
Expand Down Expand Up @@ -81,10 +88,14 @@ type BenchmarkBaselineTestSuite struct {
IntegrationTestSuiteBase
}

type MissingProcScrapeTestSuite struct {
IntegrationTestSuiteBase
}

func (s *BenchmarkCollectorTestSuite) SetupSuite() {
s.executor = NewExecutor()
s.StartContainerStats()
s.collector = NewCollectorManager(s.executor)
s.collector = NewCollectorManager(s.executor, s.T().Name())
s.metrics = map[string]float64{}

err := s.collector.Setup()
Expand Down Expand Up @@ -138,7 +149,7 @@ func (s *ProcessNetworkTestSuite) SetupSuite() {
s.metrics = map[string]float64{}
s.executor = NewExecutor()
s.StartContainerStats()
s.collector = NewCollectorManager(s.executor)
s.collector = NewCollectorManager(s.executor, s.T().Name())

err := s.collector.Setup()
require.NoError(s.T(), err)
Expand Down Expand Up @@ -261,6 +272,37 @@ func (s *ProcessNetworkTestSuite) TestNetworkFlows() {
fmt.Printf("ClientDetails from test: %s %s\n", s.clientContainer, s.clientIP)
}

func (s *MissingProcScrapeTestSuite) SetupSuite() {
_, err := os.Stat("/tmp/fake-proc")
assert.False(s.T(), os.IsNotExist(err), "Missing fake proc directory")

s.executor = NewExecutor()
s.collector = NewCollectorManager(s.executor, s.T().Name())

// Mount the fake proc directory created by 'create-fake-proc.sh'
s.collector.Mounts["/host/proc:ro"] = "/tmp/fake-proc"

err = s.collector.Setup()
require.NoError(s.T(), err)

err = s.collector.Launch()
require.NoError(s.T(), err)

time.Sleep(10 * time.Second)
}

func (s *MissingProcScrapeTestSuite) TestCollectorRunning() {
collectorRunning, err := s.executor.IsContainerRunning("collector")
require.NoError(s.T(), err)
assert.True(s.T(), collectorRunning, "Collector isn't running")
}

func (s *MissingProcScrapeTestSuite) TearDownSuite() {
err := s.collector.TearDown()
require.NoError(s.T(), err)
s.cleanupContainer([]string{"collector"})
}

func (s *IntegrationTestSuiteBase) launchContainer(args ...string) (string, error) {
cmd := []string{"docker", "run", "-d", "--name"}
cmd = append(cmd, args...)
Expand Down
17 changes: 17 additions & 0 deletions integration-tests/scripts/create-fake-proc-dir.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#!/bin/bash
# Creates an incomplete proc entry for a fake process

fake_pid="8989"
fake_proc_name="short"
fake_proc_dir="/tmp/fake-proc"
fake_proc="${fake_proc_dir}/${fake_pid}"

rm -rf "${fake_proc_dir}"

mkdir -p "${fake_proc}"

ln -s "/bin/sh" "${fake_proc}/exe"

echo "Name: ${fake_proc_name}" > "${fake_proc}/status"
echo "/bin/${fake_proc_name}" > "${fake_proc}/cmdline"
env > "${fake_proc}/environ"
2 changes: 1 addition & 1 deletion sysdig/src
Submodule src updated from 4c21d1 to 4748b4

0 comments on commit c31d8e2

Please sign in to comment.