diff --git a/.github/workflows/e2e.yaml b/.github/workflows/e2e.yaml index 08edf5b..d708f68 100644 --- a/.github/workflows/e2e.yaml +++ b/.github/workflows/e2e.yaml @@ -12,6 +12,7 @@ permissions: env: GO_VERSION: 1.22 GOLANGCI_LINT_VERSION: v1.59 + TEST_DATA_URL: "https://d23912a6309zf7.cloudfront.net/ci" jobs: e2e-cmd_report: @@ -26,18 +27,13 @@ jobs: - name: Running report env: - BUCKET: openshift-provider-certification - REGION: us-west-2 - OPCT_MODE: v0.4.0/default - EXEC_MODE: default ARTIFACT: 4.15.0-20240228-HighlyAvailable-vsphere-None.tar.gz OPCT: /tmp/build/opct-linux-amd64 run: | - URI=${OPCT_MODE}/${ARTIFACT} - URL=https://${BUCKET}.s3.${REGION}.amazonaws.com/${URI} + TEST_URL=${TEST_DATA_URL}/${ARTIFACT} - echo "> Downloading sample artifact: ${URL}" - wget -qO /tmp/result.tar.gz "${URL}" + echo "> Downloading sample artifact: ${TEST_URL}" + wget -qO /tmp/result.tar.gz "${TEST_URL}" echo "> Setting run permissions to OPCT:" chmod u+x ${OPCT} @@ -64,16 +60,14 @@ jobs: - name: Preparing testdata env: - BUCKET: openshift-provider-certification - REGION: us-west-2 - VERSION: "testdata/must-gather-etcd-logs.tar.gz" + ARTIFACT: "must-gather-etcd-logs.tar.gz" CUSTOM_BUILD_PATH: /tmp/build/opct-linux-amd64 LOCAL_TEST_DATA: /tmp/must-gather.tar.gz LOCAL_TEST_DATA_DIR: /tmp/must-gather run: | - URL=https://${BUCKET}.s3.${REGION}.amazonaws.com - echo "> Downloading sample artifact: ${URL}/${VERSION}" - wget -qO ${LOCAL_TEST_DATA} "${URL}/${VERSION}" + TEST_URL=${TEST_DATA_URL}/${ARTIFACT} + echo "> Downloading sample artifact: ${TEST_URL}" + wget -qO ${LOCAL_TEST_DATA} "${TEST_URL}" echo "> Setting run permissions to OPCT:" chmod u+x ${CUSTOM_BUILD_PATH} @@ -130,17 +124,15 @@ jobs: - name: Preparing testdata env: - BUCKET: openshift-provider-certification - REGION: us-west-2 - PREFIX: testdata/ci-external-aws-ccm_ + PREFIX: ci-external-aws-ccm_ VERSION: 1757495441294888960-artifacts_must-gather-metrics.tar.xz CUSTOM_BUILD_PATH: /tmp/build/opct-linux-amd64 LOCAL_TEST_DATA: /tmp/opct-metrics.tar.xz run: | - DATA_VERSION=${PREFIX}${VERSION} - URL=https://${BUCKET}.s3.${REGION}.amazonaws.com - echo "> Downloading sample artifact: ${URL}/${DATA_VERSION}" - wget -qO ${LOCAL_TEST_DATA} "${URL}/${DATA_VERSION}" + ARTIFACT=${PREFIX}${VERSION} + TEST_URL=${TEST_DATA_URL}/${ARTIFACT} + echo "> Downloading sample artifact: ${TEST_URL}" + wget -qO ${LOCAL_TEST_DATA} "${TEST_URL}" echo "> Setting exec permissions to OPCT:" chmod u+x ${CUSTOM_BUILD_PATH} @@ -186,3 +178,40 @@ jobs: echo -e "\n\t#>> Retrieve a baseline result by release and platform" ${OPCT} adm baseline get --release 4.15 --platform None + + e2e-cmd_adm-cleaner: + name: "e2e-cmd_adm-cleaner" + runs-on: ubuntu-latest + steps: + - name: Download artifacts + uses: actions/download-artifact@v4 + with: + name: opct-linux-amd64 + path: /tmp/build/ + + - name: Preparing testdata + env: + ARTIFACT: fake-cleaner.tar.gz + CUSTOM_BUILD_PATH: /tmp/build/opct-linux-amd64 + LOCAL_TEST_DATA: /tmp/artifact.tar.gz + run: | + TEST_URL=${TEST_DATA_URL}/${ARTIFACT} + echo "> Downloading sample artifact: ${TEST_URL}" + wget -qO ${LOCAL_TEST_DATA} "${TEST_URL}" + + echo "> Setting exec permissions to OPCT:" + chmod u+x ${CUSTOM_BUILD_PATH} + + - name: "e2e parse metrics: opct adm cleaner" + env: + CUSTOM_BUILD_PATH: /tmp/build/opct-linux-amd64 + LOCAL_TEST_DATA: /tmp/artifact.tar.gz + run: | + ${CUSTOM_BUILD_PATH} adm cleaner \ + --input ${LOCAL_TEST_DATA} \ + --output /tmp/redacted.tar.gz + mkdir data + tar xfz /tmp/redacted.tar.gz -C data + # Prevent yaml-ci issues + MCC=machineconfiguration.openshift.io_v1_controllerconfigs.json + jq .items[].spec.internalRegistryPullSecret data/resources/cluster/${MCC} diff --git a/internal/cleaner/cleaner.go b/internal/cleaner/cleaner.go new file mode 100644 index 0000000..84d8f15 --- /dev/null +++ b/internal/cleaner/cleaner.go @@ -0,0 +1,162 @@ +package cleaner + +import ( + "archive/tar" + "bytes" + "compress/gzip" + "fmt" + "io" + "strings" + + jsonpatch "github.com/evanphx/json-patch" + log "github.com/sirupsen/logrus" +) + +var patches = map[string]string{ + "resources/cluster/machineconfiguration.openshift.io_v1_controllerconfigs.json": `[ + { + "op": "replace", + "path": "/items/0/spec/internalRegistryPullSecret", + "value": "REDACTED" + } + ]`, +} + +// ScanPatchTarGzipReaderFor scan for patches the artifact stream, returning +// the cleaned artifact. +func ScanPatchTarGzipReaderFor(r io.Reader) (resp io.Reader, size int, err error) { + log.Debug("Scanning the artifact for patches...") + size = 0 + + // Create a gzip reader + gzipReader, err := gzip.NewReader(r) + if err != nil { + return nil, size, fmt.Errorf("unable to open gzip file: %w", err) + } + defer gzipReader.Close() + + // Create a tar reader + tarReader := tar.NewReader(gzipReader) + + // Create a buffer to store the updated tar.gz content + var buf bytes.Buffer + gzipWriter := gzip.NewWriter(&buf) + tarWriter := tar.NewWriter(gzipWriter) + + // Find and process the desired file + var desiredFile []byte + for { + header, err := tarReader.Next() + if err == io.EOF { + break + } + if err != nil { + return nil, size, fmt.Errorf("unable to process file in archive: %w", err) + } + + // Processing pre-defined patches, including recursively archives inside base. + if _, ok := patches[header.Name]; ok { + // Once the pre-defined/hardcoded patch matches with file stream, apply + // the path according to the extension. Currently only JSON patches + // are supported. + log.Debugf("Patch pattern matched for: %s", header.Name) + if strings.HasSuffix(header.Name, ".json") { + var patchedFile []byte + desiredFile, err = io.ReadAll(tarReader) + if err != nil { + log.Errorf("unable to read file in archive: %v", err) + return nil, size, fmt.Errorf("unable to read file in archive: %w", err) + } + + // Apply JSON patch to the file + patchedFile, err = applyJSONPatch(header.Name, desiredFile) + if err != nil { + log.Errorf("unable to apply patch to file %s: %v", header.Name, err) + return nil, size, fmt.Errorf("unable to apply patch to file %s: %w", header.Name, err) + } + + // Update the file size in the header + header.Size = int64(len(patchedFile)) + log.Debugf("Patched %d bytes", header.Size) + + // Write the updated file to stream. + if err := tarWriter.WriteHeader(header); err != nil { + log.Errorf("unable to write file header to new archive: %v", err) + return nil, size, fmt.Errorf("unable to write file header to new archive: %w", err) + } + if _, err := tarWriter.Write(patchedFile); err != nil { + log.Errorf("unable to write file data to new archive: %v", err) + return nil, size, fmt.Errorf("unable to write file data to new archive: %w", err) + } + } else { + log.Debugf("unknown extension, skipping patch for file %s", header.Name) + } + + } else if strings.HasSuffix(header.Name, ".tar.gz") { + // recursively scan for .tar.gz files rewriting it back to the original archive. + // by default sonobuoy writes a base archive, and the result(s) will be inside of + // the base. So it is required to recursively scan archives to find required, hardcoded, + // patches. + log.Debugf("Scanning tarball archive: %s", header.Name) + resp, size, err = ScanPatchTarGzipReaderFor(tarReader) + if err != nil { + return nil, size, fmt.Errorf("unable to apply patch to file %s: %w", header.Name, err) + } + + // Update the file size in the header + header.Size = int64(size) + + // Write the updated header and file content + buf := new(bytes.Buffer) + _, err := io.Copy(buf, resp) + if err != nil { + return nil, size, err + } + + // write archive back to stream. + if err := tarWriter.WriteHeader(header); err != nil { + log.Errorf("unable to write file header to new archive: %v", err) + return nil, size, fmt.Errorf("unable to write file header to new archive: %w", err) + } + if _, err := tarWriter.Write(buf.Bytes()); err != nil { + log.Errorf("unable to write file data to new archive: %v", err) + return nil, size, fmt.Errorf("unable to write file data to new archive: %w", err) + } + } else { + // Copy other files as-is + if err := tarWriter.WriteHeader(header); err != nil { + return nil, size, fmt.Errorf("error streaming file header to new archive: %w", err) + } + if _, err := io.Copy(tarWriter, tarReader); err != nil { + return nil, size, fmt.Errorf("error streaming file data to new archive: %w", err) + } + } + } + + // Close the writers + if err := tarWriter.Close(); err != nil { + return nil, size, fmt.Errorf("closing tarball: %w", err) + } + if err := gzipWriter.Close(); err != nil { + return nil, size, fmt.Errorf("closing gzip: %w", err) + } + + // Return the updated tar.gz content as an io.Reader + size = len(buf.Bytes()) + return bytes.NewReader(buf.Bytes()), size, nil +} + +// applyJSONPatch apply hard coded patches to stream, returning the cleaned file. +func applyJSONPatch(filepath string, data []byte) ([]byte, error) { + patch, err := jsonpatch.DecodePatch([]byte(patches[filepath])) + if err != nil { + return nil, fmt.Errorf("decoding patch: %w", err) + } + + modified, err := patch.Apply(data) + if err != nil { + return nil, fmt.Errorf("applying patch: %w", err) + } + + return modified, nil +} diff --git a/pkg/cmd/adm/cleaner.go b/pkg/cmd/adm/cleaner.go new file mode 100644 index 0000000..6775425 --- /dev/null +++ b/pkg/cmd/adm/cleaner.go @@ -0,0 +1,79 @@ +package adm + +import ( + "bufio" + "io" + "os" + + "github.com/redhat-openshift-ecosystem/provider-certification-tool/internal/cleaner" + log "github.com/sirupsen/logrus" + "github.com/spf13/cobra" +) + +type cleanerArguments struct { + input string + output string +} + +var cleanerArgs cleanerArguments +var cleanerCmd = &cobra.Command{ + Use: "cleaner", + Example: "opct adm cleaner --input ./results.tar.gz --output ./results-cleaned.tar.gz", + Short: "Utility to apply pre-defined patches to existing result archive.", + Run: cleanerRun, +} + +func init() { + cleanerCmd.Flags().StringVar(&cleanerArgs.input, "input", "", "Input archive file. Example: ./opct-xyz.tar.gz") + cleanerCmd.Flags().StringVar(&cleanerArgs.output, "output", "", "Output archive file. Example: ./opct-cleaned.tar.gz") +} + +func cleanerRun(cmd *cobra.Command, args []string) { + + if cleanerArgs.input == "" { + log.Error("missing argumet --input ") + os.Exit(1) + } + + if cleanerArgs.output == "" { + log.Error("missing argumet --output ") + os.Exit(1) + } + + log.Infof("Starting artifact cleaner for %s", cleanerArgs.input) + + fin, err := os.Open(cleanerArgs.input) + if err != nil { + panic(err) + } + + // close fi on exit and check for its returned error + defer func() { + if err := fin.Close(); err != nil { + panic(err) + } + }() + + r := bufio.NewReader(fin) + + // scanning for sensitive data + cleaned, _, err := cleaner.ScanPatchTarGzipReaderFor(r) + if err != nil { + panic(err) + } + + // Create a new file + file, err := os.Create(cleanerArgs.output) + if err != nil { + panic(err) + } + defer file.Close() + + // Write the cleaned data to the file + _, err = io.Copy(file, cleaned) + if err != nil { + panic(err) + } + + log.Infof("Data successfully written to %s", cleanerArgs.output) +} diff --git a/pkg/cmd/adm/root.go b/pkg/cmd/adm/root.go index 537049b..22da4f5 100644 --- a/pkg/cmd/adm/root.go +++ b/pkg/cmd/adm/root.go @@ -24,6 +24,7 @@ func init() { admCmd.AddCommand(parseEtcdLogsCmd) admCmd.AddCommand(baseline.NewCmdBaseline()) admCmd.AddCommand(setupNodeCmd) + admCmd.AddCommand(cleanerCmd) } func NewCmdAdm() *cobra.Command { diff --git a/pkg/retrieve/retrieve.go b/pkg/retrieve/retrieve.go index 58ffcfd..0086105 100644 --- a/pkg/retrieve/retrieve.go +++ b/pkg/retrieve/retrieve.go @@ -15,6 +15,7 @@ import ( config2 "github.com/vmware-tanzu/sonobuoy/pkg/config" "golang.org/x/sync/errgroup" + "github.com/redhat-openshift-ecosystem/provider-certification-tool/internal/cleaner" "github.com/redhat-openshift-ecosystem/provider-certification-tool/pkg" "github.com/redhat-openshift-ecosystem/provider-certification-tool/pkg/status" ) @@ -114,8 +115,14 @@ func writeResultsToDirectory(outputDir string, r io.Reader, ec <-chan error) ([] var results []string eg.Go(func() error { return <-ec }) eg.Go(func() error { + // scanning for sensitive data + scannedReader, _, err := cleaner.ScanPatchTarGzipReaderFor(r) + if err != nil { + return fmt.Errorf("error scanning results: %w", err) + } + // This untars the request itself, which is tar'd as just part of the API request, not the sonobuoy logic. - filesCreated, err := sonobuoyclient.UntarAll(r, outputDir, "") + filesCreated, err := sonobuoyclient.UntarAll(scannedReader, outputDir, "") if err != nil { return err }