Skip to content

Commit

Permalink
OCPBUGS-46369: cleaning patterns while downloading report file
Browse files Browse the repository at this point in the history
Introduce predefined cleaner to remove sensitive data from files
collected by opct/sonobuoy and eventually is exposing the deta from
non-standard format (secrets).
  • Loading branch information
mtulio committed Dec 28, 2024
1 parent b2dccd7 commit fbe1127
Show file tree
Hide file tree
Showing 5 changed files with 288 additions and 2 deletions.
39 changes: 38 additions & 1 deletion .github/workflows/e2e.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -177,4 +177,41 @@ jobs:
${OPCT} adm baseline get --name 4.16_None_latest --dump
echo -e "\n\t#>> Retrieve a baseline result by release and platform"
${OPCT} adm baseline get --release 4.15 --platform None
${OPCT} adm baseline get --release 4.15 --platform None
e2e-cmd_adm-cleaner:
name: "e2e-cmd_adm-cleaner"
runs-on: ubuntu-latest
steps:
- name: Download artifacts
uses: actions/download-artifact@v4
with:
name: opct-linux-amd64
path: /tmp/build/

- name: Preparing testdata
env:
ARTIFACT: fake-cleaner.tar.gz
CUSTOM_BUILD_PATH: /tmp/build/opct-linux-amd64
LOCAL_TEST_DATA: /tmp/artifact.tar.gz
run: |
TEST_URL=${TEST_DATA_URL}/${ARTIFACT}
echo "> Downloading sample artifact: ${TEST_URL}"
wget -qO ${LOCAL_TEST_DATA} "${TEST_URL}"
echo "> Setting exec permissions to OPCT:"
chmod u+x ${CUSTOM_BUILD_PATH}
- name: "e2e parse metrics: opct adm cleaner"
env:
CUSTOM_BUILD_PATH: /tmp/build/opct-linux-amd64
LOCAL_TEST_DATA: /tmp/artifact.tar.gz
run: |
${CUSTOM_BUILD_PATH} adm cleaner \
--input ${LOCAL_TEST_DATA} \
--output /tmp/redacted.tar.gz
mkdir data
tar xfz /tmp/redacted.tar.gz -C data
# Prevent yaml-ci issues
MCC=machineconfiguration.openshift.io_v1_controllerconfigs.json
jq .items[].spec.internalRegistryPullSecret data/resources/cluster/${MCC}
162 changes: 162 additions & 0 deletions internal/cleaner/cleaner.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
package cleaner

import (
"archive/tar"
"bytes"
"compress/gzip"
"fmt"
"io"
"strings"

jsonpatch "github.com/evanphx/json-patch"
log "github.com/sirupsen/logrus"
)

var patches = map[string]string{
"resources/cluster/machineconfiguration.openshift.io_v1_controllerconfigs.json": `[
{
"op": "replace",
"path": "/items/0/spec/internalRegistryPullSecret",
"value": "REDACTED"
}
]`,
}

// ScanPatchTarGzipReaderFor scan for patches the artifact stream, returning
// the cleaned artifact.
func ScanPatchTarGzipReaderFor(r io.Reader) (resp io.Reader, size int, err error) {
log.Debug("Scanning the artifact for patches...")
size = 0

// Create a gzip reader
gzipReader, err := gzip.NewReader(r)
if err != nil {
return nil, size, fmt.Errorf("unable to open gzip file: %w", err)
}
defer gzipReader.Close()

// Create a tar reader
tarReader := tar.NewReader(gzipReader)

// Create a buffer to store the updated tar.gz content
var buf bytes.Buffer
gzipWriter := gzip.NewWriter(&buf)
tarWriter := tar.NewWriter(gzipWriter)

// Find and process the desired file
var desiredFile []byte
for {
header, err := tarReader.Next()
if err == io.EOF {
break
}
if err != nil {
return nil, size, fmt.Errorf("unable to process file in archive: %w", err)
}

// Processing pre-defined patches, including recursively archives inside base.
if _, ok := patches[header.Name]; ok {
// Once the pre-defined/hardcoded patch matches with file stream, apply
// the path according to the extension. Currently only JSON patches
// are supported.
log.Debugf("Patch pattern matched for: %s", header.Name)
if strings.HasSuffix(header.Name, ".json") {
var patchedFile []byte
desiredFile, err = io.ReadAll(tarReader)
if err != nil {
log.Errorf("unable to read file in archive: %v", err)
return nil, size, fmt.Errorf("unable to read file in archive: %w", err)
}

// Apply JSON patch to the file
patchedFile, err = applyJSONPatch(header.Name, desiredFile)
if err != nil {
log.Errorf("unable to apply patch to file %s: %v", header.Name, err)
return nil, size, fmt.Errorf("unable to apply patch to file %s: %w", header.Name, err)
}

// Update the file size in the header
header.Size = int64(len(patchedFile))
log.Debugf("Patched %d bytes", header.Size)

// Write the updated file to stream.
if err := tarWriter.WriteHeader(header); err != nil {
log.Errorf("unable to write file header to new archive: %v", err)
return nil, size, fmt.Errorf("unable to write file header to new archive: %w", err)
}
if _, err := tarWriter.Write(patchedFile); err != nil {
log.Errorf("unable to write file data to new archive: %v", err)
return nil, size, fmt.Errorf("unable to write file data to new archive: %w", err)
}
} else {
log.Debugf("unknown extension, skipping patch for file %s", header.Name)
}

} else if strings.HasSuffix(header.Name, ".tar.gz") {
// recursively scan for .tar.gz files rewriting it back to the original archive.
// by default sonobuoy writes a base archive, and the result(s) will be inside of
// the base. So it is required to recursively scan archives to find required, hardcoded,
// patches.
log.Debugf("Scanning tarball archive: %s", header.Name)
resp, size, err = ScanPatchTarGzipReaderFor(tarReader)
if err != nil {
return nil, size, fmt.Errorf("unable to apply patch to file %s: %w", header.Name, err)
}

// Update the file size in the header
header.Size = int64(size)

// Write the updated header and file content
buf := new(bytes.Buffer)
_, err := io.Copy(buf, resp)
if err != nil {
return nil, size, err
}

// write archive back to stream.
if err := tarWriter.WriteHeader(header); err != nil {
log.Errorf("unable to write file header to new archive: %v", err)
return nil, size, fmt.Errorf("unable to write file header to new archive: %w", err)
}
if _, err := tarWriter.Write(buf.Bytes()); err != nil {
log.Errorf("unable to write file data to new archive: %v", err)
return nil, size, fmt.Errorf("unable to write file data to new archive: %w", err)
}
} else {
// Copy other files as-is
if err := tarWriter.WriteHeader(header); err != nil {
return nil, size, fmt.Errorf("error streaming file header to new archive: %w", err)
}
if _, err := io.Copy(tarWriter, tarReader); err != nil {
return nil, size, fmt.Errorf("error streaming file data to new archive: %w", err)
}
}
}

// Close the writers
if err := tarWriter.Close(); err != nil {
return nil, size, fmt.Errorf("closing tarball: %w", err)
}
if err := gzipWriter.Close(); err != nil {
return nil, size, fmt.Errorf("closing gzip: %w", err)
}

// Return the updated tar.gz content as an io.Reader
size = len(buf.Bytes())
return bytes.NewReader(buf.Bytes()), size, nil
}

// applyJSONPatch apply hard coded patches to stream, returning the cleaned file.
func applyJSONPatch(filepath string, data []byte) ([]byte, error) {
patch, err := jsonpatch.DecodePatch([]byte(patches[filepath]))
if err != nil {
return nil, fmt.Errorf("decoding patch: %w", err)
}

modified, err := patch.Apply(data)
if err != nil {
return nil, fmt.Errorf("applying patch: %w", err)
}

return modified, nil
}
79 changes: 79 additions & 0 deletions pkg/cmd/adm/cleaner.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
package adm

import (
"bufio"
"io"
"os"

"github.com/redhat-openshift-ecosystem/provider-certification-tool/internal/cleaner"
log "github.com/sirupsen/logrus"
"github.com/spf13/cobra"
)

type cleanerArguments struct {
input string
output string
}

var cleanerArgs cleanerArguments
var cleanerCmd = &cobra.Command{
Use: "cleaner",
Example: "opct adm cleaner --input ./results.tar.gz --output ./results-cleaned.tar.gz",
Short: "Utility to apply pre-defined patches to existing result archive.",
Run: cleanerRun,
}

func init() {
cleanerCmd.Flags().StringVar(&cleanerArgs.input, "input", "", "Input archive file. Example: ./opct-xyz.tar.gz")
cleanerCmd.Flags().StringVar(&cleanerArgs.output, "output", "", "Output archive file. Example: ./opct-cleaned.tar.gz")
}

func cleanerRun(cmd *cobra.Command, args []string) {

if cleanerArgs.input == "" {
log.Error("missing argumet --input <archive.tar.gz>")
os.Exit(1)
}

if cleanerArgs.output == "" {
log.Error("missing argumet --output <new-archive.tar.gz>")
os.Exit(1)
}

log.Infof("Starting artifact cleaner for %s", cleanerArgs.input)

fin, err := os.Open(cleanerArgs.input)
if err != nil {
panic(err)
}

// close fi on exit and check for its returned error
defer func() {
if err := fin.Close(); err != nil {
panic(err)
}
}()

r := bufio.NewReader(fin)

// scanning for sensitive data
cleaned, _, err := cleaner.ScanPatchTarGzipReaderFor(r)
if err != nil {
panic(err)
}

// Create a new file
file, err := os.Create(cleanerArgs.output)
if err != nil {
panic(err)
}
defer file.Close()

// Write the cleaned data to the file
_, err = io.Copy(file, cleaned)
if err != nil {
panic(err)
}

log.Infof("Data successfully written to %s", cleanerArgs.output)
}
1 change: 1 addition & 0 deletions pkg/cmd/adm/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ func init() {
admCmd.AddCommand(parseEtcdLogsCmd)
admCmd.AddCommand(baseline.NewCmdBaseline())
admCmd.AddCommand(setupNodeCmd)
admCmd.AddCommand(cleanerCmd)
}

func NewCmdAdm() *cobra.Command {
Expand Down
9 changes: 8 additions & 1 deletion pkg/retrieve/retrieve.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import (
config2 "github.com/vmware-tanzu/sonobuoy/pkg/config"
"golang.org/x/sync/errgroup"

"github.com/redhat-openshift-ecosystem/provider-certification-tool/internal/cleaner"
"github.com/redhat-openshift-ecosystem/provider-certification-tool/pkg"
"github.com/redhat-openshift-ecosystem/provider-certification-tool/pkg/status"
)
Expand Down Expand Up @@ -114,8 +115,14 @@ func writeResultsToDirectory(outputDir string, r io.Reader, ec <-chan error) ([]
var results []string
eg.Go(func() error { return <-ec })
eg.Go(func() error {
// scanning for sensitive data
scannedReader, _, err := cleaner.ScanPatchTarGzipReaderFor(r)
if err != nil {
return fmt.Errorf("error scanning results: %w", err)
}

// This untars the request itself, which is tar'd as just part of the API request, not the sonobuoy logic.
filesCreated, err := sonobuoyclient.UntarAll(r, outputDir, "")
filesCreated, err := sonobuoyclient.UntarAll(scannedReader, outputDir, "")
if err != nil {
return err
}
Expand Down

0 comments on commit fbe1127

Please sign in to comment.