openshift · neisw · Nov 1, 2025 · Nov 1, 2025 · Nov 1, 2025 · Nov 2, 2025
diff --git a/pkg/test/ginkgo/cmd_runsuite.go b/pkg/test/ginkgo/cmd_runsuite.go
@@ -3,6 +3,7 @@ package ginkgo
 import (
 	"bytes"
 	"context"
+	_ "embed"
 	"encoding/json"
 	"fmt"
 	"io"
@@ -55,6 +56,96 @@ const (
 	postUpgradeEvent = "PostUpgrade"
 )
 
+// Embed long_tests.json at compile time
+//
+//go:embed long_tests.json
+var longTestsJSON []byte
+
+// Embed long_never_fail_tests.json at compile time
+//
+//go:embed long_never_fail_tests.json
+var longNeverFailTestsJSON []byte
+
+// LongTestInfo represents duration information for a single test
+type LongTestInfo struct {
+	Name            string  `json:"name"`
+	DurationSeconds float64 `json:"duration_seconds"`
+	GroupID         string  `json:"group_id"`
+}
+
+// LongTestGroup represents a group of tests with the same group_id
+type LongTestGroup struct {
+	GroupID string         `json:"group_id"`
+	Tests   []LongTestInfo `json:"tests"`
+}
+
+// LongTestsData holds all long test groups loaded from long_tests.json
+var longTestsData []LongTestGroup
+
+// neverFailTestNames is a set of test names that never fail and should be skipped
+var neverFailTestNames map[string]bool
+
+func init() {
+	// Load long_tests.json
+	if err := json.Unmarshal(longTestsJSON, &longTestsData); err != nil {
+		logrus.WithError(err).Warn("Failed to load long_tests.json, test duration data will not be available")
+	} else {
+		totalTests := 0
+		for _, group := range longTestsData {
+			totalTests += len(group.Tests)
+		}
+		logrus.Infof("Loaded %d long test groups with %d total tests from long_tests.json", len(longTestsData), totalTests)
+	}
+
+	// Load long_never_fail_tests.json
+	var neverFailTestsData []LongTestGroup
+	if err := json.Unmarshal(longNeverFailTestsJSON, &neverFailTestsData); err != nil {
+		logrus.WithError(err).Warn("Failed to load long_never_fail_tests.json, never-fail test data will not be available")
+	} else {
+		neverFailTestNames = make(map[string]bool)
+		totalNeverFail := 0
+		for _, group := range neverFailTestsData {
+			for _, test := range group.Tests {
+				neverFailTestNames[test.Name] = true
+				totalNeverFail++
+			}
+		}
+		logrus.Infof("Loaded %d tests from long_never_fail_tests.json that will be skipped", totalNeverFail)
+	}
+}
+
+// GetTestDuration returns the expected duration in seconds for a test by name, or 0 if not found
+func GetTestDuration(testName string) int {
+	for _, group := range longTestsData {
+		for _, test := range group.Tests {
+			if test.Name == testName {
+				return int(test.DurationSeconds)
+			}
+		}
+	}
+	return 0
+}
+
+// GetTestGroup returns the group ID for a test by name, or empty string if not found
+func GetTestGroup(testName string) string {
+	for _, group := range longTestsData {
+		for _, test := range group.Tests {
+			if test.Name == testName {
+				return test.GroupID
+			}
+		}
+	}
+	return ""
+}
+
+// IsNeverFailTest returns true if the test is in the never-fail list and should be skipped
+func IsNeverFailTest(testName string) bool {
+	if neverFailTestNames == nil {
+		return false
+	}
+	return neverFailTestNames[testName]
+}
+
 // GinkgoRunSuiteOptions is used to run a suite of tests by invoking each test
 // as a call to a child worker (the run-tests command).
 type GinkgoRunSuiteOptions struct {
@@ -434,6 +525,45 @@ func (o *GinkgoRunSuiteOptions) Run(suite *TestSuite, clusterConfig *clusterdisc
 		return err
 	}
 
+	// Extract long-running tests into a single group and skip never-fail tests
+	// All long tests will run first, sorted by duration (longest first)
+	var longTests []*testCase
+	var remainingTests []*testCase
+	var skippedNeverFailTests []*testCase
+
+	for _, test := range primaryTests {
+		// Skip tests that never fail
+		if IsNeverFailTest(test.name) {
+			skippedNeverFailTests = append(skippedNeverFailTests, test)
+			continue
+		}
+
+		group := GetTestGroup(test.name)
+		if group != "" {
+			// This is a known long-running test
+			longTests = append(longTests, test)
+		} else {
+			// Not in long_tests.json, add to remaining
+			remainingTests = append(remainingTests, test)
+		}
+	}
+
+	if len(skippedNeverFailTests) > 0 {
+		logrus.Infof("Skipping %d tests that never fail", len(skippedNeverFailTests))
+	}
+
+	// Sort all long tests by duration (longest first)
+	sort.Slice(longTests, func(i, j int) bool {
+		durationI := GetTestDuration(longTests[i].name)
+		durationJ := GetTestDuration(longTests[j].name)
+		return durationI > durationJ // Descending order (longest first)
+	})
+
+	logrus.Infof("Found %d long-running tests (will run first), %d remaining tests", len(longTests), len(remainingTests))
+
+	// Now split the remaining tests (non-long tests) into groups
+	primaryTests = remainingTests
+
 	kubeTests, openshiftTests := splitTests(primaryTests, func(t *testCase) bool {
 		return strings.Contains(t.name, "[Suite:k8s]")
 	})
@@ -462,6 +592,7 @@ func (o *GinkgoRunSuiteOptions) Run(suite *TestSuite, clusterConfig *clusterdisc
 		return strings.Contains(t.name, "[sig-cli] oc adm must-gather")
 	})
 
+	logrus.Infof("Found %d long tests", len(longTests))
 	logrus.Infof("Found %d openshift tests", len(openshiftTests))
 	logrus.Infof("Found %d kube tests", len(kubeTests))
 	logrus.Infof("Found %d storage tests", len(storageTests))
@@ -471,9 +602,10 @@ func (o *GinkgoRunSuiteOptions) Run(suite *TestSuite, clusterConfig *clusterdisc
 	logrus.Infof("Found %d builds tests", len(buildsTests))
 	logrus.Infof("Found %d must-gather tests", len(mustGatherTests))
 
-	// If user specifies a count, duplicate the kube and openshift tests that many times.
+	// If user specifies a count, duplicate the tests that many times.
 	expectedTestCount := len(early) + len(late)
 	if count != -1 {
+		originalLong := longTests
 		originalKube := kubeTests
 		originalOpenshift := openshiftTests
 		originalStorage := storageTests
@@ -484,6 +616,7 @@ func (o *GinkgoRunSuiteOptions) Run(suite *TestSuite, clusterConfig *clusterdisc
 		originalMustGather := mustGatherTests
 
 		for i := 1; i < count; i++ {
+			longTests = append(longTests, copyTests(originalLong)...)
 			kubeTests = append(kubeTests, copyTests(originalKube)...)
 			openshiftTests = append(openshiftTests, copyTests(originalOpenshift)...)
 			storageTests = append(storageTests, copyTests(originalStorage)...)
@@ -494,7 +627,7 @@ func (o *GinkgoRunSuiteOptions) Run(suite *TestSuite, clusterConfig *clusterdisc
 			mustGatherTests = append(mustGatherTests, copyTests(originalMustGather)...)
 		}
 	}
-	expectedTestCount += len(openshiftTests) + len(kubeTests) + len(storageTests) + len(networkK8sTests) + len(hpaTests) + len(networkTests) + len(buildsTests) + len(mustGatherTests)
+	expectedTestCount += len(longTests) + len(openshiftTests) + len(kubeTests) + len(storageTests) + len(networkK8sTests) + len(hpaTests) + len(networkTests) + len(buildsTests) + len(mustGatherTests)
 
 	abortFn := neverAbort
 	testCtx := ctx
@@ -512,9 +645,14 @@ func (o *GinkgoRunSuiteOptions) Run(suite *TestSuite, clusterConfig *clusterdisc
 	// TODO: will move to the monitor
 	pc.SetEvents([]string{upgradeEvent})
 
-	// Run kube, storage, openshift, and must-gather tests. If user specified a count of -1,
+	// Run long tests, kube, storage, openshift, and must-gather tests. If user specified a count of -1,
 	// we loop indefinitely.
 	for i := 0; (i < 1 || count == -1) && testCtx.Err() == nil; i++ {
+		// Run long tests first with full parallelism
+		longTestsCopy := copyTests(longTests)
+		q.Execute(testCtx, longTestsCopy, parallelism, testOutputConfig, abortFn)
+		tests = append(tests, longTestsCopy...)
+
 		kubeTestsCopy := copyTests(kubeTests)
 		q.Execute(testCtx, kubeTestsCopy, parallelism, testOutputConfig, abortFn)
 		tests = append(tests, kubeTestsCopy...)