Skip to content

Commit

Permalink
convert the pdf consumer into the new SDK
Browse files Browse the repository at this point in the history
  • Loading branch information
dlicheva committed Jan 21, 2025
1 parent 39807c9 commit 48b96c5
Show file tree
Hide file tree
Showing 9 changed files with 959 additions and 1 deletion.
12 changes: 11 additions & 1 deletion new-components/docker-compose.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
services:
reporter:
json-logger-reporter:
build:
context: .
dockerfile: Dockerfile
Expand All @@ -12,6 +12,16 @@ services:
depends_on:
enricher:
condition: service_completed_successfully
pdf-reporter:
build:
context: reporters/pdf
dockerfile: Dockerfile
platform: linux/amd64
env_file:
- reporters/pdf/.env
depends_on:
enricher:
condition: service_completed_successfully
enricher:
build:
context: .
Expand Down
10 changes: 10 additions & 0 deletions new-components/reporters/pdf/.env
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# This is for local setup only.
SMITHY_INSTANCE_ID=8d719c1c-c569-4078-87b3-4951bd4012ee
SMITHY_LOG_LEVEL=debug
AWS_ACCESS_KEY_ID=''
AWS_SECRET_ACCESS_KEY=''
BUCKET_NAME=''
BUCKET_REGION=''
SKIP_S3_UPLOAD=true
SMITHY_STORE_TYPE=postgresql
SMITHY_REMOTE_STORE_POSTGRES_DSN="postgresql://smithy:smithy1234@findings-db:5432/findings-db?sslmode=disable&connect_timeout=10"
20 changes: 20 additions & 0 deletions new-components/reporters/pdf/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
FROM golang:1.23.3 AS builder
COPY . /workdir
WORKDIR /workdir
# Install Playwright CLI with the correct version
RUN go install github.com/playwright-community/playwright-go/cmd/[email protected]
# Build your Go application
RUN GOOS=linux GOARCH=amd64 go build -o /bin/reporter cmd/main.go

# Stage 3: Final image
FROM ubuntu:22.04

COPY --from=builder /bin/reporter /
COPY --from=builder /go/ /go/

RUN apt-get update
RUN apt-get install -y ca-certificates tzdata
RUN ./go/bin/playwright install chromium --with-deps
RUN rm -rf /var/lib/apt/lists/*

CMD ["/reporter"]
41 changes: 41 additions & 0 deletions new-components/reporters/pdf/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# PDF

This component implements a [reporter](https://github.com/smithy-security/smithy/blob/main/sdk/component/component.go)
that prints vulnerability findings into a PDF and uploads it to an AWS
S3 bucket.

## Environment variables

The component uses environment variables for configuration.

It requires the component
environment variables defined
[here](https://github.com/smithy-security/smithy/blob/main/sdk/README.md#component)
as the following:

* `CONSUMER_PDF_S3\_ACCESS_KEY_ID` - **string, required**
* Your S3 access key
ID for a user that has write access to the bucket
* `CONSUMER_PDF_S3\_ACCESS_KEY` - **string, required**
* Your S3 access key for a user that has write access to the bucket
* `CONSUMER_PDF_S3\_BUCKET_NAME` - **string, required**
* Your S3 bucket name, e.g. "test-bucket"
* `CONSUMER_PDF_S3_BUCKET_REGION` - **string, required**
* Your S3 bucket region, e.g. "us-west-1"

On AWS, you will need a new IAM user with programmatic access and\
with write permissions for your S3 bucket.

## How to run

Execute:

```shell
docker-compose up --build --force-recreate --remove-orphans
```

Then shutdown with:

```shell
docker-compose down --rmi all
```
40 changes: 40 additions & 0 deletions new-components/reporters/pdf/cmd/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
package main

import (
"context"
"log"
"time"

"github.com/go-errors/errors"

"github.com/smithy-security/smithy/new-components/reporters/pdf/internal/reporter"
"github.com/smithy-security/smithy/sdk/component"
)

func main() {
ctx, cancel := context.WithTimeout(context.Background(), 1*time.Minute)
defer cancel()

if err := Main(ctx); err != nil {
log.Fatalf("unexpected error: %v", err)
}
}

func Main(ctx context.Context, opts ...component.RunnerOption) error {
conf, err := reporter.NewConf(nil)
if err != nil {
return errors.Errorf("could not create new configuration: %w", err)
}

opts = append(opts, component.RunnerWithComponentName("pdf"))

if err := component.RunReporter(
ctx,
reporter.NewReporter(conf),
opts...,
); err != nil {
return errors.Errorf("could not run reporter: %w", err)
}

return nil
}
11 changes: 11 additions & 0 deletions new-components/reporters/pdf/docker-compose.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
services:
reporter:
build:
context: .
dockerfile: Dockerfile
args:
- COMPONENT_PATH=reporters/pdf
- COMPONENT_BINARY_SOURCE_PATH=cmd/main.go
platform: linux/amd64
env_file:
- .env
204 changes: 204 additions & 0 deletions new-components/reporters/pdf/internal/reporter/reporter.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,204 @@
package reporter

import (
"context"
_ "embed"
"fmt"
"html/template"
"log/slog"
"os"
"path/filepath"
"time"

"github.com/go-errors/errors"
"github.com/smithy-security/pkg/env"

playwright "github.com/smithy-security/smithy/pkg/playwright"
s3client "github.com/smithy-security/smithy/pkg/s3"
"github.com/smithy-security/smithy/sdk/component"
vf "github.com/smithy-security/smithy/sdk/component/vulnerability-finding"
)

// NewReporter returns a new PDF reporter.
func NewReporter(conf *Conf) *PdfReporter {
return &PdfReporter{
conf: conf,
}
}

type PdfReporter struct {
conf *Conf
}

type (
Conf struct {
Bucket string
Region string
SkipS3Upload bool
}
)

// NewConf returns a new configuration build from environment lookup.
func NewConf(envLoader env.Loader) (*Conf, error) {
var envOpts = make([]env.ParseOption, 0)
if envLoader != nil {
envOpts = append(envOpts, env.WithLoader(envLoader))
}

skipS3Upload, err := env.GetOrDefault(
"SKIP_S3_UPLOAD",
true,
append(envOpts, env.WithDefaultOnError(false))...,
)
if err != nil {
return nil, errors.Errorf("could not get env variable for SKIP_S3_UPLOAD: %w", err)
}

bucket, err := env.GetOrDefault(
"BUCKET_NAME",
"",
append(envOpts, env.WithDefaultOnError(false))...,
)
if err != nil {
return nil, errors.Errorf("could not get env variable for BUCKET_NAME: %w", err)
}

region, err := env.GetOrDefault(
"BUCKET_REGION",
"",
append(envOpts, env.WithDefaultOnError(false))...,
)
if err != nil {
return nil, errors.Errorf("could not get env variable for BUCKET_REGION: %w", err)
}

return &Conf{
Bucket: bucket,
Region: region,
SkipS3Upload: skipS3Upload,
}, nil
}

func (p PdfReporter) Report(
ctx context.Context,
findings []*vf.VulnerabilityFinding,
) error {
logger := component.LoggerFromContext(ctx)

// get the PDF
resultFilename, pdfBytes, err := p.getPdf(findings)
if err != nil {
return fmt.Errorf("could not build pdf: %w", err)
}
logger.Info("built the PDF")

// upload the pdf to the s3 if needed
if !p.conf.SkipS3Upload {
return p.uploadToS3(resultFilename, pdfBytes)
}
return nil
}

// getPdf initializes Playwright and starts the PDF generation
func (p PdfReporter) getPdf(findings []*vf.VulnerabilityFinding) (string, []byte, error) {
pw, err := playwright.NewClient()
if err != nil {
slog.Error("could not launch playwright: %s", slog.String("err", err.Error()))
}

defer func() {
if err := pw.Stop(); err != nil {
slog.Error("could not stop Playwright", slog.String("err", err.Error()))
}
}()

slog.Info("reading PDF")
resultFilename, pdfBytes, err := p.buildPdf(findings, pw)
if err != nil {
return "", nil, fmt.Errorf("could not build pdf: %w", err)
}
slog.Info("result filename", slog.String("filename", resultFilename))

return resultFilename, pdfBytes, nil
}

//go:embed template.html
var templateFile string

// buildPdf builds a PDF
func (p PdfReporter) buildPdf(data any, pw playwright.Wrapper) (string, []byte, error) {
// process the default template into a html result
tmpl, err := template.New("template.html").Funcs(template.FuncMap{
"formatTime": FormatTime,
}).Parse(templateFile)
if err != nil {
return "", nil, fmt.Errorf("could not parse files: %w", err)
}

currentPath, err := os.Getwd()
if err != nil {
return "", nil, fmt.Errorf("could not get current working directory: %w", err)
}

reportHTMLPath := filepath.Join(currentPath, "report.html")
//#nosec: G304
f, err := os.OpenFile(reportHTMLPath, os.O_WRONLY|os.O_CREATE|os.O_APPEND, 0o600) //#nosec: G304
if err != nil {
return "", nil, fmt.Errorf("could not open report.html: %w", err)
}
if err = tmpl.Execute(f, data); err != nil {
return "", nil, fmt.Errorf("could not apply data to template: %w", err)
}
// close the file after writing it
defer func(f *os.File) {
err := f.Close()
if err != nil {
slog.Error("could not close file", slog.String("err", err.Error()))
}
}(f)

//todo: add instance id to name of file
reportPDFPath := filepath.Join(currentPath, "report.pdf")
reportPage := fmt.Sprintf("file:///%s", reportHTMLPath)
pdfBytes, err := pw.GetPDFOfPage(reportPage, reportPDFPath)
if err != nil {
return "", nil, fmt.Errorf("could not generate pdf from page %s, err: %w", reportPage, err)

}

// delete the intermediate HTML file
if err := os.Remove(reportHTMLPath); err != nil {
slog.Error("could not delete report.html", slog.String("err", err.Error()))
}
return reportPDFPath, pdfBytes, err
}

// FormatTime is a template function for the PDF, that converts a timestamp to a human-readable format
func FormatTime(timestamp *int64) string {
if timestamp == nil {
return ""
}

// Convert the int64 value to a time.Time
parsedTime := time.Unix(*timestamp, 0)

// Format the time using a predefined layout
return parsedTime.Format(time.DateTime)
}

// uploadToS3 uploads the PDF to AWS
func (p PdfReporter) uploadToS3(resultFilename string, pdfBytes []byte) error {
if p.conf.Bucket == "" {
slog.Error("bucket is empty, you need to provide a bucket name")
}

if p.conf.Region == "" {
slog.Error("region is empty, you need to provide a region name")
}
client, err := s3client.NewClient(p.conf.Region)
if err != nil {
slog.Error(err.Error())
}
slog.Info("uploading pdf to s3", slog.String("filename", resultFilename), slog.String("bucket", p.conf.Bucket), slog.String("region", p.conf.Region))
return client.UpsertFile(resultFilename, p.conf.Bucket, "", pdfBytes)
}
Loading

0 comments on commit 48b96c5

Please sign in to comment.