From fbe1127b6bf9de3c4542a6c5146962f98dc721b3 Mon Sep 17 00:00:00 2001 From: Marco Braga Date: Tue, 17 Dec 2024 18:21:51 -0300 Subject: [PATCH] OCPBUGS-46369: cleaning patterns while downloading report file Introduce predefined cleaner to remove sensitive data from files collected by opct/sonobuoy and eventually is exposing the deta from non-standard format (secrets). --- .github/workflows/e2e.yaml | 39 ++++++++- internal/cleaner/cleaner.go | 162 ++++++++++++++++++++++++++++++++++++ pkg/cmd/adm/cleaner.go | 79 ++++++++++++++++++ pkg/cmd/adm/root.go | 1 + pkg/retrieve/retrieve.go | 9 +- 5 files changed, 288 insertions(+), 2 deletions(-) create mode 100644 internal/cleaner/cleaner.go create mode 100644 pkg/cmd/adm/cleaner.go diff --git a/.github/workflows/e2e.yaml b/.github/workflows/e2e.yaml index 5ef68d86..5e0ea431 100644 --- a/.github/workflows/e2e.yaml +++ b/.github/workflows/e2e.yaml @@ -177,4 +177,41 @@ jobs: ${OPCT} adm baseline get --name 4.16_None_latest --dump echo -e "\n\t#>> Retrieve a baseline result by release and platform" - ${OPCT} adm baseline get --release 4.15 --platform None \ No newline at end of file + ${OPCT} adm baseline get --release 4.15 --platform None + + e2e-cmd_adm-cleaner: + name: "e2e-cmd_adm-cleaner" + runs-on: ubuntu-latest + steps: + - name: Download artifacts + uses: actions/download-artifact@v4 + with: + name: opct-linux-amd64 + path: /tmp/build/ + + - name: Preparing testdata + env: + ARTIFACT: fake-cleaner.tar.gz + CUSTOM_BUILD_PATH: /tmp/build/opct-linux-amd64 + LOCAL_TEST_DATA: /tmp/artifact.tar.gz + run: | + TEST_URL=${TEST_DATA_URL}/${ARTIFACT} + echo "> Downloading sample artifact: ${TEST_URL}" + wget -qO ${LOCAL_TEST_DATA} "${TEST_URL}" + + echo "> Setting exec permissions to OPCT:" + chmod u+x ${CUSTOM_BUILD_PATH} + + - name: "e2e parse metrics: opct adm cleaner" + env: + CUSTOM_BUILD_PATH: /tmp/build/opct-linux-amd64 + LOCAL_TEST_DATA: /tmp/artifact.tar.gz + run: | + ${CUSTOM_BUILD_PATH} adm cleaner \ + --input ${LOCAL_TEST_DATA} \ + --output /tmp/redacted.tar.gz + mkdir data + tar xfz /tmp/redacted.tar.gz -C data + # Prevent yaml-ci issues + MCC=machineconfiguration.openshift.io_v1_controllerconfigs.json + jq .items[].spec.internalRegistryPullSecret data/resources/cluster/${MCC} \ No newline at end of file diff --git a/internal/cleaner/cleaner.go b/internal/cleaner/cleaner.go new file mode 100644 index 00000000..84d8f154 --- /dev/null +++ b/internal/cleaner/cleaner.go @@ -0,0 +1,162 @@ +package cleaner + +import ( + "archive/tar" + "bytes" + "compress/gzip" + "fmt" + "io" + "strings" + + jsonpatch "github.com/evanphx/json-patch" + log "github.com/sirupsen/logrus" +) + +var patches = map[string]string{ + "resources/cluster/machineconfiguration.openshift.io_v1_controllerconfigs.json": `[ + { + "op": "replace", + "path": "/items/0/spec/internalRegistryPullSecret", + "value": "REDACTED" + } + ]`, +} + +// ScanPatchTarGzipReaderFor scan for patches the artifact stream, returning +// the cleaned artifact. +func ScanPatchTarGzipReaderFor(r io.Reader) (resp io.Reader, size int, err error) { + log.Debug("Scanning the artifact for patches...") + size = 0 + + // Create a gzip reader + gzipReader, err := gzip.NewReader(r) + if err != nil { + return nil, size, fmt.Errorf("unable to open gzip file: %w", err) + } + defer gzipReader.Close() + + // Create a tar reader + tarReader := tar.NewReader(gzipReader) + + // Create a buffer to store the updated tar.gz content + var buf bytes.Buffer + gzipWriter := gzip.NewWriter(&buf) + tarWriter := tar.NewWriter(gzipWriter) + + // Find and process the desired file + var desiredFile []byte + for { + header, err := tarReader.Next() + if err == io.EOF { + break + } + if err != nil { + return nil, size, fmt.Errorf("unable to process file in archive: %w", err) + } + + // Processing pre-defined patches, including recursively archives inside base. + if _, ok := patches[header.Name]; ok { + // Once the pre-defined/hardcoded patch matches with file stream, apply + // the path according to the extension. Currently only JSON patches + // are supported. + log.Debugf("Patch pattern matched for: %s", header.Name) + if strings.HasSuffix(header.Name, ".json") { + var patchedFile []byte + desiredFile, err = io.ReadAll(tarReader) + if err != nil { + log.Errorf("unable to read file in archive: %v", err) + return nil, size, fmt.Errorf("unable to read file in archive: %w", err) + } + + // Apply JSON patch to the file + patchedFile, err = applyJSONPatch(header.Name, desiredFile) + if err != nil { + log.Errorf("unable to apply patch to file %s: %v", header.Name, err) + return nil, size, fmt.Errorf("unable to apply patch to file %s: %w", header.Name, err) + } + + // Update the file size in the header + header.Size = int64(len(patchedFile)) + log.Debugf("Patched %d bytes", header.Size) + + // Write the updated file to stream. + if err := tarWriter.WriteHeader(header); err != nil { + log.Errorf("unable to write file header to new archive: %v", err) + return nil, size, fmt.Errorf("unable to write file header to new archive: %w", err) + } + if _, err := tarWriter.Write(patchedFile); err != nil { + log.Errorf("unable to write file data to new archive: %v", err) + return nil, size, fmt.Errorf("unable to write file data to new archive: %w", err) + } + } else { + log.Debugf("unknown extension, skipping patch for file %s", header.Name) + } + + } else if strings.HasSuffix(header.Name, ".tar.gz") { + // recursively scan for .tar.gz files rewriting it back to the original archive. + // by default sonobuoy writes a base archive, and the result(s) will be inside of + // the base. So it is required to recursively scan archives to find required, hardcoded, + // patches. + log.Debugf("Scanning tarball archive: %s", header.Name) + resp, size, err = ScanPatchTarGzipReaderFor(tarReader) + if err != nil { + return nil, size, fmt.Errorf("unable to apply patch to file %s: %w", header.Name, err) + } + + // Update the file size in the header + header.Size = int64(size) + + // Write the updated header and file content + buf := new(bytes.Buffer) + _, err := io.Copy(buf, resp) + if err != nil { + return nil, size, err + } + + // write archive back to stream. + if err := tarWriter.WriteHeader(header); err != nil { + log.Errorf("unable to write file header to new archive: %v", err) + return nil, size, fmt.Errorf("unable to write file header to new archive: %w", err) + } + if _, err := tarWriter.Write(buf.Bytes()); err != nil { + log.Errorf("unable to write file data to new archive: %v", err) + return nil, size, fmt.Errorf("unable to write file data to new archive: %w", err) + } + } else { + // Copy other files as-is + if err := tarWriter.WriteHeader(header); err != nil { + return nil, size, fmt.Errorf("error streaming file header to new archive: %w", err) + } + if _, err := io.Copy(tarWriter, tarReader); err != nil { + return nil, size, fmt.Errorf("error streaming file data to new archive: %w", err) + } + } + } + + // Close the writers + if err := tarWriter.Close(); err != nil { + return nil, size, fmt.Errorf("closing tarball: %w", err) + } + if err := gzipWriter.Close(); err != nil { + return nil, size, fmt.Errorf("closing gzip: %w", err) + } + + // Return the updated tar.gz content as an io.Reader + size = len(buf.Bytes()) + return bytes.NewReader(buf.Bytes()), size, nil +} + +// applyJSONPatch apply hard coded patches to stream, returning the cleaned file. +func applyJSONPatch(filepath string, data []byte) ([]byte, error) { + patch, err := jsonpatch.DecodePatch([]byte(patches[filepath])) + if err != nil { + return nil, fmt.Errorf("decoding patch: %w", err) + } + + modified, err := patch.Apply(data) + if err != nil { + return nil, fmt.Errorf("applying patch: %w", err) + } + + return modified, nil +} diff --git a/pkg/cmd/adm/cleaner.go b/pkg/cmd/adm/cleaner.go new file mode 100644 index 00000000..6775425b --- /dev/null +++ b/pkg/cmd/adm/cleaner.go @@ -0,0 +1,79 @@ +package adm + +import ( + "bufio" + "io" + "os" + + "github.com/redhat-openshift-ecosystem/provider-certification-tool/internal/cleaner" + log "github.com/sirupsen/logrus" + "github.com/spf13/cobra" +) + +type cleanerArguments struct { + input string + output string +} + +var cleanerArgs cleanerArguments +var cleanerCmd = &cobra.Command{ + Use: "cleaner", + Example: "opct adm cleaner --input ./results.tar.gz --output ./results-cleaned.tar.gz", + Short: "Utility to apply pre-defined patches to existing result archive.", + Run: cleanerRun, +} + +func init() { + cleanerCmd.Flags().StringVar(&cleanerArgs.input, "input", "", "Input archive file. Example: ./opct-xyz.tar.gz") + cleanerCmd.Flags().StringVar(&cleanerArgs.output, "output", "", "Output archive file. Example: ./opct-cleaned.tar.gz") +} + +func cleanerRun(cmd *cobra.Command, args []string) { + + if cleanerArgs.input == "" { + log.Error("missing argumet --input ") + os.Exit(1) + } + + if cleanerArgs.output == "" { + log.Error("missing argumet --output ") + os.Exit(1) + } + + log.Infof("Starting artifact cleaner for %s", cleanerArgs.input) + + fin, err := os.Open(cleanerArgs.input) + if err != nil { + panic(err) + } + + // close fi on exit and check for its returned error + defer func() { + if err := fin.Close(); err != nil { + panic(err) + } + }() + + r := bufio.NewReader(fin) + + // scanning for sensitive data + cleaned, _, err := cleaner.ScanPatchTarGzipReaderFor(r) + if err != nil { + panic(err) + } + + // Create a new file + file, err := os.Create(cleanerArgs.output) + if err != nil { + panic(err) + } + defer file.Close() + + // Write the cleaned data to the file + _, err = io.Copy(file, cleaned) + if err != nil { + panic(err) + } + + log.Infof("Data successfully written to %s", cleanerArgs.output) +} diff --git a/pkg/cmd/adm/root.go b/pkg/cmd/adm/root.go index 537049b4..22da4f58 100644 --- a/pkg/cmd/adm/root.go +++ b/pkg/cmd/adm/root.go @@ -24,6 +24,7 @@ func init() { admCmd.AddCommand(parseEtcdLogsCmd) admCmd.AddCommand(baseline.NewCmdBaseline()) admCmd.AddCommand(setupNodeCmd) + admCmd.AddCommand(cleanerCmd) } func NewCmdAdm() *cobra.Command { diff --git a/pkg/retrieve/retrieve.go b/pkg/retrieve/retrieve.go index 58ffcfd7..00861052 100644 --- a/pkg/retrieve/retrieve.go +++ b/pkg/retrieve/retrieve.go @@ -15,6 +15,7 @@ import ( config2 "github.com/vmware-tanzu/sonobuoy/pkg/config" "golang.org/x/sync/errgroup" + "github.com/redhat-openshift-ecosystem/provider-certification-tool/internal/cleaner" "github.com/redhat-openshift-ecosystem/provider-certification-tool/pkg" "github.com/redhat-openshift-ecosystem/provider-certification-tool/pkg/status" ) @@ -114,8 +115,14 @@ func writeResultsToDirectory(outputDir string, r io.Reader, ec <-chan error) ([] var results []string eg.Go(func() error { return <-ec }) eg.Go(func() error { + // scanning for sensitive data + scannedReader, _, err := cleaner.ScanPatchTarGzipReaderFor(r) + if err != nil { + return fmt.Errorf("error scanning results: %w", err) + } + // This untars the request itself, which is tar'd as just part of the API request, not the sonobuoy logic. - filesCreated, err := sonobuoyclient.UntarAll(r, outputDir, "") + filesCreated, err := sonobuoyclient.UntarAll(scannedReader, outputDir, "") if err != nil { return err }