Merge pull request #78 from nextmv-io/merschformann/reenable-auto-ben…

…chmarks Re-enables auto benchmarking / acceptance testing
nextmv-io · Dec 19, 2024 · 82afdb3 · 82afdb3
2 parents f0dc5de + 632b196
commit 82afdb3
Show file tree

Hide file tree

Showing 5 changed files with 309 additions and 0 deletions.
diff --git a/.github/workflows/auto-benchmark.yml b/.github/workflows/auto-benchmark.yml
@@ -0,0 +1,37 @@
+name: auto benchmark
+on: [push]
+
+env:
+  GO_VERSION: 1.23
+  PYTHON_VERSION: 3.12
+
+jobs:
+  auto-benchmark:
+    runs-on: ubuntu-latest
+    steps:
+      - name: git clone
+        uses: actions/checkout@v4
+
+      - name: set up Go
+        uses: actions/setup-go@v5
+        with:
+          go-version: ${{ env.GO_VERSION }}
+
+      - name: set up Python ${{ env.PYTHON_VERSION }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ env.PYTHON_VERSION }}
+
+      - name: install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r .nextmv/benchmark.requirements.txt
+
+      - name: run acceptance test
+        env:
+          BENCHMARK_ACCOUNT_ID: ${{ vars.BENCHMARK_ACCOUNT_ID }}
+          BENCHMARK_API_KEY_PROD: ${{ secrets.BENCHMARK_API_KEY_PROD }}
+          SLACK_URL_DEV_SCIENCE: ${{ secrets.SLACK_URL_DEV_SCIENCE }}
+        run: |
+          export BRANCH_NAME=$(echo $GITHUB_REF | awk -F'/' '{print $3}')
+          python .nextmv/benchmark.py
diff --git a/.nextmv/benchmark.py b/.nextmv/benchmark.py
@@ -0,0 +1,219 @@
+# Description:
+# This script does the following:
+# - Make sure the working directory is clean.
+# - Pushes a new version of the app (if it does not already exist; uses git sha as version).
+# - Updates the candidate instance to use the new version.
+# - Runs an acceptance test between the candidate and baseline instances.
+# - Waits for the test to complete.
+# - Posts the result to Slack (if requested).
+
+import os
+import subprocess
+from datetime import datetime, timezone
+
+import requests
+from nextmv import cloud
+
+APP_ID = "nextroute-bench"
+API_KEY = os.environ["BENCHMARK_API_KEY_PROD"]
+SLACK_WEBHOOK = os.getenv("SLACK_URL_DEV_SCIENCE", None)
+ACCOUNT_ID = os.getenv("BENCHMARK_ACCOUNT_ID", None)
+BRANCH_NAME = os.getenv("BRANCH_NAME", None)
+
+
+METRICS = [
+    cloud.Metric(
+        field="result.value",
+        metric_type=cloud.MetricType.direct_comparison,
+        params=cloud.MetricParams(
+            tolerance=cloud.MetricTolerance(
+                value=0.05,
+                type=cloud.ToleranceType.relative,
+            ),
+            operator=cloud.Comparison.less_than_or_equal_to,
+        ),
+        statistic=cloud.StatisticType.shifted_geometric_mean,
+    )
+]
+
+
+def ensure_clean_working_directory():
+    """
+    Ensure the working directory is clean by throwing an exception if it is not.
+    """
+    if os.system("git diff --quiet") != 0 or os.system("git diff --cached --quiet") != 0:
+        raise Exception("Working directory is not clean")
+
+
+def get_id(app: cloud.Application) -> tuple[str, str]:
+    """
+    Get the ID for the new version (and just the tag).
+    If the version already exists, we append a timestamp to the ID.
+    """
+    # Create ID based on git sha.
+    tag = subprocess.check_output(["git", "rev-parse", "HEAD"]).decode().strip()[0:8]
+    version_id = f"auto-{tag}"
+    # If the version already exists, we append a timestamp to the ID.
+    exists = False
+    try:
+        app.version(version_id)
+        exists = True
+    except Exception:
+        pass
+    if exists:
+        ts = (
+            datetime.now(timezone.utc)
+            .replace(microsecond=0)
+            .isoformat()
+            .replace("+00:00", "Z")
+            .replace(":", "")
+            .replace("-", "")
+        )
+        version_id = f"{version_id}-{ts}"
+        tag = f"{tag}-{ts}"
+    # Otherwise, we just use the git sha.
+    return version_id, tag
+
+
+def push_new_version(app: cloud.Application, tag: str) -> None:
+    """
+    Push a new version of the app and update the candidate instance to use it.
+    """
+    app.push(app_dir=".")
+    app.new_version(
+        id=tag,
+        name=f"Auto version {tag}",
+        description=f"Automatically generated version {tag}",
+    )
+    instance = app.instance("candidate")
+    app.update_instance(
+        id="candidate",
+        version_id=tag,
+        name=instance.name,  # Name is required, but we don't want to change it
+    )
+
+
+def upgrade_baseline(app: cloud.Application, version_id: str) -> None:
+    """
+    Upgrade the baseline instance to use the new version.
+    """
+    instance = app.instance("baseline")
+    app.update_instance(
+        id="baseline",
+        version_id=version_id,
+        name=instance.name,  # Name is required, but we don't want to change it
+    )
+
+
+def run_acceptance_test(
+    app: cloud.Application,
+    id: str,
+    tag: str,
+) -> cloud.AcceptanceTest:
+    """
+    Run an acceptance test between the candidate and baseline instances.
+    """
+    result = app.new_acceptance_test_with_result(
+        candidate_instance_id="candidate",
+        baseline_instance_id="baseline",
+        id=id,
+        metrics=METRICS,
+        name=f"Auto-test {tag}",
+        description=f"Automated test for {tag}",
+        input_set_id="nextroute-bench-v20",
+        polling_options=cloud.PollingOptions(
+            max_duration=600,  # 10 minutes
+            max_tries=1000,  # basically forever - we'll stop by duration
+        ),
+    )
+    return result
+
+
+def create_test_url(result_id: str) -> str:
+    """
+    Create a URL to the acceptance test result.
+    """
+    if ACCOUNT_ID:
+        return f"https://cloud.nextmv.io/acc/{ACCOUNT_ID}/app/nextroute-bench/experiment/acceptance/{result_id}"
+    return "unavailable"
+
+
+def write_to_summary(content):
+    """Appends content to the GitHub Actions step summary (if available)."""
+    summary_file = os.getenv("GITHUB_STEP_SUMMARY")
+    if not summary_file:
+        return
+
+    # Write content to the summary file
+    with open(summary_file, "a") as f:
+        f.write(content + "\n")
+
+
+def main():
+    """
+    Main function that runs the benchmark.
+    """
+    # Change to the directory of the app (sibling directory of this script)
+    os.chdir(os.path.join(os.path.dirname(__file__), "..", "cmd"))
+
+    print("Making sure the working directory is clean...")
+    ensure_clean_working_directory()
+
+    client = cloud.Client(api_key=API_KEY)
+    app = cloud.Application(client=client, id=APP_ID)
+
+    id, tag = get_id(app)  # id is used as version and acceptance test ID
+
+    print(f"Pushing new version with ID: {id}")
+    push_new_version(app, id)
+
+    write_to_summary("# Acceptance Test Report")
+    write_to_summary("")
+    write_to_summary(f"ID: {id}")
+    url = create_test_url(id)
+    write_to_summary(f"Link: [link]({url})")
+    print(f"::notice::Acceptance test URL: {url}", flush=True)
+
+    print(f"Running acceptance test with ID: {id}")
+    print("Waiting for it to complete...")
+    result = run_acceptance_test(app, id, tag)
+    passed = "unknown"
+    if result and result.results:
+        passed = "passed" if result.results.passed else "failed"
+    print(f"Acceptance test completed with status: {passed}")
+
+    if SLACK_WEBHOOK and BRANCH_NAME == "develop":
+        print("Posting to Slack...")
+        response = requests.post(
+            SLACK_WEBHOOK,
+            json={
+                "text": f"nextroute acceptance test {result and result.id} completed with status: {passed}"
+                + f" (<{create_test_url(result and result.id)}|View results>)",
+            },
+        )
+
+        if response.status_code != 200:
+            print(f"Failed to send notification to Slack: {response.text}")
+        else:
+            print("Notification sent to Slack")
+
+    write_to_summary("")
+    write_to_summary(f"Result: {passed}")
+    if result and result.results:
+        if result.results.error:
+            write_to_summary(f"Error: {result.results.error}")
+        else:
+            write_to_summary("Metrics:")
+            write_to_summary("")
+            for metric in result.results.metric_results:
+                write_to_summary(f"- {metric.metric.field}: {metric.passed}")
+
+    if BRANCH_NAME == "develop":
+        print("Upgrading baseline instance to use the new version...")
+        upgrade_baseline(app, id)
+
+    print("Done")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/.nextmv/benchmark.requirements.txt b/.nextmv/benchmark.requirements.txt
@@ -0,0 +1,2 @@
+nextmv>=v0.14.2
+requests>=2.32.3
diff --git a/cmd/app.yaml b/cmd/app.yaml
@@ -0,0 +1,10 @@
+# This manifest holds the information the app needs to run on the Nextmv Cloud.
+type: go
+runtime: ghcr.io/nextmv-io/runtime/default:latest
+build:
+  command: go build -o main .
+  environment:
+    GOOS: linux
+    GOARCH: arm64
+files:
+  - main
diff --git a/cmd/main.go b/cmd/main.go
@@ -5,10 +5,12 @@ package main
 
 import (
 	"context"
+	"encoding/json"
 	"fmt"
 	"log"
 	"os"
 	"strings"
+	"time"
 
 	"github.com/nextmv-io/nextroute"
 	"github.com/nextmv-io/nextroute/check"
@@ -39,31 +41,70 @@ type options struct {
 	Check  check.Options                  `json:"check,omitempty"`
 }
 
+type customOptions struct {
+	MaxDuration *float64 `json:"max_duration,omitempty"`
+}
+
+// applyCustomOptions applies the extended custom options from the input to the
+// actual options.
+func applyCustomOptions(opts options, customOpts any) (options, error) {
+	jOpts, err := json.Marshal(customOpts)
+	if err != nil {
+		return opts, err
+	}
+	var custom customOptions
+	err = json.Unmarshal(jOpts, &custom)
+	if err != nil {
+		return opts, err
+	}
+	if custom.MaxDuration != nil {
+		opts.Solve.Duration = time.Duration(*custom.MaxDuration * float64(time.Second))
+	}
+	return opts, nil
+}
+
 func solver(
 	ctx context.Context,
 	input schema.Input,
 	options options,
 ) (runSchema.Output, error) {
+	// Apply input embedded options, if any. This is used internally for
+	// benchmarking and testing.
+	if input.Options != nil {
+		opts, err := applyCustomOptions(options, input.Options)
+		if err != nil {
+			return runSchema.Output{}, err
+		}
+		options = opts
+	}
+
+	// Create the model from the input and options.
 	model, err := factory.NewModel(input, options.Model)
 	if err != nil {
 		return runSchema.Output{}, err
 	}
 
+	// Create the solver from the model.
 	solver, err := nextroute.NewParallelSolver(model)
 	if err != nil {
 		return runSchema.Output{}, err
 	}
 
+	// Solve the model.
 	solutions, err := solver.Solve(ctx, options.Solve)
 	if err != nil {
 		return runSchema.Output{}, err
 	}
 
+	// Get the last solution.
+	// This call is blocking until the solver terminates. Alternatively,
+	// solutions can be ranged over (see All() method).
 	last, err := solutions.Last()
 	if err != nil {
 		return runSchema.Output{}, err
 	}
 
+	// Process the solution for output.
 	output, err := check.Format(
 		ctx,
 		options,