Skip to content

Commit

Permalink
OCPBUGS-46369: cleaning patterns while downloading report file
Browse files Browse the repository at this point in the history
  • Loading branch information
mtulio committed Dec 17, 2024
1 parent c571466 commit 8c94a55
Show file tree
Hide file tree
Showing 4 changed files with 225 additions and 1 deletion.
135 changes: 135 additions & 0 deletions internal/cleaner/cleaner.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
package cleaner

import (
"archive/tar"
"bytes"
"compress/gzip"
"fmt"
"io"

jsonpatch "github.com/evanphx/json-patch"
log "github.com/sirupsen/logrus"
)

var patches = map[string]string{
"resources/cluster/machineconfiguration.openshift.io_v1_controllerconfigs.json": `[
{
"op": "replace",
"path": "/items/0/spec/internalRegistryPullSecret",
"value": "<redacted>"
}
]`,
}

func ScanPatchTarGzipReaderFor(r io.Reader) (resp io.Reader, err error) {
log.Info("Scanning tar.gz")
// Create a gzip reader
gzipReader, err := gzip.NewReader(r)
if err != nil {
return nil, fmt.Errorf("unable to open gzip file: %w", err)
}
defer gzipReader.Close()

// Create a tar reader
tarReader := tar.NewReader(gzipReader)

// Create a buffer to store the updated tar.gz content
var buf bytes.Buffer
gzipWriter := gzip.NewWriter(&buf)
tarWriter := tar.NewWriter(gzipWriter)

// Find and process the desired file
var desiredFile []byte
for {
header, err := tarReader.Next()
if err == io.EOF {
break
}
if err != nil {
return nil, fmt.Errorf("unable to process file in archive: %w", err)
}

// if header.Name == "resources/myfile.json" {
if _, ok := patches[header.Name]; ok {
desiredFile, err = io.ReadAll(tarReader)
if err != nil {
return nil, fmt.Errorf("unable to read file in archive: %w", err)
}

// Apply JSON patch to the file
patchedFile, err := applyJSONPatch(header.Name, desiredFile)
if err != nil {
return nil, fmt.Errorf("unable to apply patch to file %s: %w", header.Name, err)
}

// Update the file size in the header
header.Size = int64(len(patchedFile))

// Write the updated header and file content
if err := tarWriter.WriteHeader(header); err != nil {
return nil, fmt.Errorf("unable to write file header to new archive: %w", err)
}
if _, err := tarWriter.Write(patchedFile); err != nil {
return nil, fmt.Errorf("unable to write file data to new archive: %w", err)
}
} else {
// Copy other files as-is
if err := tarWriter.WriteHeader(header); err != nil {
return nil, fmt.Errorf("error streaming file header to new archive: %w", err)
}
if _, err := io.Copy(tarWriter, tarReader); err != nil {
return nil, fmt.Errorf("error streaming file data to new archive: %w", err)
}
}
}

// Close the writers
if err := tarWriter.Close(); err != nil {
return nil, fmt.Errorf("closing tarball: %w", err)
}
if err := gzipWriter.Close(); err != nil {
return nil, fmt.Errorf("closing gzip: %w", err)
}

// Return the updated tar.gz content as an io.Reader
return bytes.NewReader(buf.Bytes()), nil
}

// func applyJSONPatch(filepath string, original []byte) ([]byte, error) {
// var data interface{}
// if err := json.Unmarshal(original, &data); err != nil {
// return nil, fmt.Errorf("unmarshalling json: %w", err)
// }

// fmt.Println(data)
// // JMESPath query to find and update the specific item
// // query := `items[?metadata.name=='config-controller'].spec.secretKey = 'redacted'`
// res, err := jmespath.Search(patches[filepath], data)
// if err != nil {
// return nil, fmt.Errorf("applying patch (%v): %w", res, err)
// }

// js, err := json.Marshal(data)
// if err != nil {
// return nil, fmt.Errorf("marshalling json: %w", err)
// }

// return js, nil
// }

// Function to apply the patch
func applyJSONPatch(filepath string, data []byte) ([]byte, error) {
// Parse the JSON Patch
patch, err := jsonpatch.DecodePatch([]byte(patches[filepath]))
if err != nil {
return nil, fmt.Errorf("decoding patch: %w", err)
}

// Apply the patch
modified, err := patch.Apply(data)
if err != nil {
return nil, fmt.Errorf("applying patch: %w", err)
}

return modified, nil
}
79 changes: 79 additions & 0 deletions pkg/cmd/adm/cleaner.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
package adm

import (
"bufio"
"io"
"os"

"github.com/redhat-openshift-ecosystem/provider-certification-tool/internal/cleaner"
log "github.com/sirupsen/logrus"
"github.com/spf13/cobra"
)

type cleanerArguments struct {
input string
output string
}

var cleanerArgs cleanerArguments
var cleanerCmd = &cobra.Command{
Use: "cleaner",
Example: "opct adm cleaner --input ./results.tar.gz --output ./results-cleaned.tar.gz",
Short: "Apply internal rules to clean result files for undesired sharable data.",
Run: cleanerRun,
}

func init() {
cleanerCmd.Flags().StringVar(&cleanerArgs.input, "input", "", "Input metrics file. Example: metrics.tar.xz")
cleanerCmd.Flags().StringVar(&cleanerArgs.output, "output", "", "Output directory. Example: /tmp/metrics")
}

func cleanerRun(cmd *cobra.Command, args []string) {

if cleanerArgs.input == "" {
log.Error("missing argumet --input <metric archive file.tar.xz>")
os.Exit(1)
}

if cleanerArgs.output == "" {
log.Error("missing argumet --output <target directory to save parsed metrics>")
os.Exit(1)
}

log.Infof("Start metrics parser for metric data %s", cleanerArgs.input)

fin, err := os.Open(cleanerArgs.input)
if err != nil {
panic(err)
}
// close fi on exit and check for its returned error
defer func() {
if err := fin.Close(); err != nil {
panic(err)
}
}()

r := bufio.NewReader(fin)

// cleaned, err :=
// scanning for sensitive data
cleaned, err := cleaner.ScanPatchTarGzipReaderFor(r)
if err != nil {
panic(err)
}

// Create a new file
file, err := os.Create(cleanerArgs.output)
if err != nil {
panic(err)
}
defer file.Close()

// Write the cleaned data to the file
_, err = io.Copy(file, cleaned)
if err != nil {
panic(err)
}

log.Infof("Data successfully written to %s", cleanerArgs.output)
}
1 change: 1 addition & 0 deletions pkg/cmd/adm/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ func init() {
admCmd.AddCommand(parseEtcdLogsCmd)
admCmd.AddCommand(baseline.NewCmdBaseline())
admCmd.AddCommand(setupNodeCmd)
admCmd.AddCommand(cleanerCmd)
}

func NewCmdAdm() *cobra.Command {
Expand Down
11 changes: 10 additions & 1 deletion pkg/retrieve/retrieve.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import (
config2 "github.com/vmware-tanzu/sonobuoy/pkg/config"
"golang.org/x/sync/errgroup"

"github.com/redhat-openshift-ecosystem/provider-certification-tool/internal/cleaner"
"github.com/redhat-openshift-ecosystem/provider-certification-tool/pkg"
"github.com/redhat-openshift-ecosystem/provider-certification-tool/pkg/status"
)
Expand Down Expand Up @@ -89,6 +90,8 @@ func retrieveResults(sclient sonobuoyclient.Interface, destinationDirectory stri
return errors.Wrap(err, "error retrieving results from sonobuoy")
}

// TODO: hook results, extract sensitive data

// Download results into target directory
results, err := writeResultsToDirectory(destinationDirectory, reader, ec)
if err != nil {
Expand All @@ -114,8 +117,14 @@ func writeResultsToDirectory(outputDir string, r io.Reader, ec <-chan error) ([]
var results []string
eg.Go(func() error { return <-ec })
eg.Go(func() error {
// scanning for sensitive data
scannedReader, err := cleaner.ScanPatchTarGzipReaderFor(r)
if err != nil {
return fmt.Errorf("error scanning results: %w", err)
}

// This untars the request itself, which is tar'd as just part of the API request, not the sonobuoy logic.
filesCreated, err := sonobuoyclient.UntarAll(r, outputDir, "")
filesCreated, err := sonobuoyclient.UntarAll(scannedReader, outputDir, "")
if err != nil {
return err
}
Expand Down

0 comments on commit 8c94a55

Please sign in to comment.