@@ -878,6 +878,45 @@ def triggerJob(jobName, parameters, jenkinsUrl = "", credentials = "")
878878 return status
879879}
880880
881+ def launchJob (jobName , reuseBuild , enableFailFast , globalVars , platform = " x86_64" , additionalParameters = [:]) {
882+ def parameters = getCommonParameters()
883+ String globalVarsJson = writeJSON returnText : true , json : globalVars
884+ parameters + = [
885+ ' enableFailFast' : enableFailFast,
886+ ' globalVars' : globalVarsJson,
887+ ] + additionalParameters
888+
889+ if (env. alternativeTRT && platform == " x86_64" ) {
890+ parameters + = [
891+ ' alternativeTRT' : env. alternativeTRT,
892+ ]
893+ }
894+
895+ if (env. alternativeTrtSBSA && platform == " SBSA" ) {
896+ parameters + = [
897+ ' alternativeTRT' : env. alternativeTrtSBSA,
898+ ]
899+ }
900+
901+ if (env. testPhase2StageName) {
902+ parameters + = [
903+ ' testPhase2StageName' : env. testPhase2StageName,
904+ ]
905+ }
906+
907+ if (reuseBuild) {
908+ parameters[' reuseArtifactPath' ] = " sw-tensorrt-generic/llm-artifacts/${ JOB_NAME} /${ reuseBuild} "
909+ }
910+
911+ echo " Trigger ${ jobName} job, params: ${ parameters} "
912+
913+ def status = triggerJob(jobName, parameters)
914+ if (status != " SUCCESS" ) {
915+ error " Downstream job did not succeed"
916+ }
917+ return status
918+ }
919+
881920def launchStages (pipeline , reuseBuild , testFilter , enableFailFast , globalVars )
882921{
883922 stages = [
@@ -889,78 +928,88 @@ def launchStages(pipeline, reuseBuild, testFilter, enableFailFast, globalVars)
889928 " x86_64-linux" : {
890929 script {
891930 stage(" Build" ) {
892- def parameters = getCommonParameters()
893- String globalVarsJson = writeJSON returnText : true , json : globalVars
894- parameters + = [
895- ' enableFailFast' : enableFailFast,
931+ def additionalParameters = [
896932 ' dockerImage' : globalVars[" LLM_DOCKER_IMAGE" ],
897933 ' wheelDockerImagePy310' : globalVars[" LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE" ],
898934 ' wheelDockerImagePy312' : globalVars[" LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE" ],
899- ' globalVars' : globalVarsJson,
900935 ]
901-
902- if (env. alternativeTRT) {
903- parameters + = [
904- ' alternativeTRT' : env. alternativeTRT,
905- ]
906- }
907-
908- if (reuseBuild) {
909- parameters[' reuseArtifactPath' ] = " sw-tensorrt-generic/llm-artifacts/${ JOB_NAME} /${ reuseBuild} "
910- }
911-
912- echo " trigger x86_64 build job, params: ${ parameters} "
913-
914- def status = triggerJob(" /LLM/helpers/Build-x86_64" , parameters)
915- if (status != " SUCCESS" ) {
916- error " Downstream job did not succeed"
917- }
918-
919- }
920- def testStageName = " [Test-x86_64] Run"
921- if (env. localJobCredentials) {
922- testStageName = " [Test-x86_64] Remote Run"
936+ launchJob(" /LLM/helpers/Build-x86_64" , reuseBuild, enableFailFast, globalVars, " x86_64" , additionalParameters)
923937 }
938+ def testStageName = " [Test-x86_64-Single-GPU] ${ env.localJobCredentials ? "Remote Run" : "Run"} "
939+ def singleGpuTestFailed = false
924940 stage(testStageName) {
925941 if (X86_TEST_CHOICE == STAGE_CHOICE_SKIP ) {
926942 echo " x86_64 test job is skipped due to Jenkins configuration"
927943 return
928944 }
929945 try {
930- parameters = getCommonParameters()
931946 String testFilterJson = writeJSON returnText : true , json : testFilter
932- String globalVarsJson = writeJSON returnText : true , json : globalVars
933- parameters + = [
934- ' enableFailFast' : enableFailFast,
947+ def additionalParameters = [
935948 ' testFilter' : testFilterJson,
936949 ' dockerImage' : globalVars[" LLM_DOCKER_IMAGE" ],
937950 ' wheelDockerImagePy310' : globalVars[" LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE" ],
938951 ' wheelDockerImagePy312' : globalVars[" LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE" ],
939- ' globalVars' : globalVarsJson,
940952 ]
941953
942- if (env. alternativeTRT) {
943- parameters + = [
944- ' alternativeTRT' : env. alternativeTRT,
945- ]
954+ launchJob(" L0_Test-x86_64-Single-GPU" , false , enableFailFast, globalVars, " x86_64" , additionalParameters)
955+ } catch (InterruptedException e) {
956+ throw e
957+ } catch (Exception e) {
958+ if (X86_TEST_CHOICE == STAGE_CHOICE_IGNORE ) {
959+ catchError(
960+ buildResult : ' SUCCESS' ,
961+ stageResult : ' FAILURE' ) {
962+ error " x86_64 test failed but ignored due to Jenkins configuration"
963+ }
964+ } else {
965+ catchError(
966+ buildResult : ' FAILURE' ,
967+ stageResult : ' FAILURE' ) {
968+ error " x86_64 single-GPU test failed"
969+ }
970+ singleGpuTestFailed = true
946971 }
972+ }
973+ }
947974
948- if (env. testPhase2StageName) {
949- parameters + = [
950- ' testPhase2StageName' : env. testPhase2StageName,
951- ]
975+ def requireMultiGpuTesting = currentBuild. description?. contains(" Require Multi-GPU Testing" ) ?: false
976+ echo " requireMultiGpuTesting: ${ requireMultiGpuTesting} "
977+ if (! requireMultiGpuTesting) {
978+ return
979+ }
980+
981+ if (singleGpuTestFailed) {
982+ if (env. JOB_NAME ==~ / .*PostMerge.*/ ) {
983+ echo " In the official post-merge pipeline, single-GPU test failed, whereas multi-GPU test is still kept running."
984+ } else {
985+ stage(" [Test-x86_64-Multi-GPU] Blocked" ) {
986+ catchError(
987+ buildResult : ' FAILURE' ,
988+ stageResult : ' FAILURE' ) {
989+ error " This pipeline requires running multi-GPU test, but single-GPU test has failed."
990+ }
952991 }
992+ return
993+ }
994+ }
953995
954- echo " trigger x86_64 test job, params: ${ parameters} "
996+ testStageName = " [Test-x86_64-Multi-GPU] ${ env.localJobCredentials ? "Remote Run" : "Run"} "
997+ stage(testStageName) {
998+ if (X86_TEST_CHOICE == STAGE_CHOICE_SKIP ) {
999+ echo " x86_64 test job is skipped due to Jenkins configuration"
1000+ return
1001+ }
1002+ try {
1003+ def testFilterJson = writeJSON returnText : true , json : testFilter
1004+ def additionalParameters = [
1005+ ' testFilter' : testFilterJson,
1006+ ' dockerImage' : globalVars[" LLM_DOCKER_IMAGE" ],
1007+ ' wheelDockerImagePy310' : globalVars[" LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE" ],
1008+ ' wheelDockerImagePy312' : globalVars[" LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE" ],
1009+ ]
9551010
956- def status = triggerJob(
957- " L0_Test-x86_64" ,
958- parameters,
959- )
1011+ launchJob(" L0_Test-x86_64-Multi-GPU" , false , enableFailFast, globalVars, " x86_64" , additionalParameters)
9601012
961- if (status != " SUCCESS" ) {
962- error " Downstream job did not succeed"
963- }
9641013 } catch (InterruptedException e) {
9651014 throw e
9661015 } catch (Exception e) {
@@ -991,79 +1040,26 @@ def launchStages(pipeline, reuseBuild, testFilter, enableFailFast, globalVars)
9911040 return
9921041 }
9931042
994- def stageName = " Build"
995- stage(stageName) {
996- def parameters = getCommonParameters()
997- String globalVarsJson = writeJSON returnText : true , json : globalVars
998- parameters + = [
999- ' enableFailFast' : enableFailFast,
1043+ stage(" Build" ) {
1044+ def additionalParameters = [
10001045 " dockerImage" : globalVars[" LLM_SBSA_DOCKER_IMAGE" ],
1001- ' globalVars' : globalVarsJson,
10021046 ]
1003-
1004- if (env. alternativeTrtSBSA) {
1005- parameters + = [
1006- " alternativeTRT" : env. alternativeTrtSBSA,
1007- ]
1008- }
1009-
1010- if (reuseBuild) {
1011- parameters[' reuseArtifactPath' ] = " sw-tensorrt-generic/llm-artifacts/${ JOB_NAME} /${ reuseBuild} "
1012- }
1013-
1014- echo " trigger SBSA build job, params: ${ parameters} "
1015-
1016- def status = triggerJob(
1017- " /LLM/helpers/Build-SBSA" ,
1018- parameters,
1019- jenkinsUrl,
1020- credentials,
1021- )
1022-
1023- if (status != " SUCCESS" ) {
1024- error " Downstream job did not succeed"
1025- }
1047+ launchJob(" /LLM/helpers/Build-SBSA" , reuseBuild, enableFailFast, globalVars, " SBSA" , additionalParameters)
10261048 }
10271049 stage(testStageName) {
10281050 if (SBSA_TEST_CHOICE == STAGE_CHOICE_SKIP ) {
10291051 echo " SBSA test job is skipped due to Jenkins configuration"
10301052 return
10311053 }
10321054 try {
1033- def parameters = getCommonParameters()
1034- String testFilterJson = writeJSON returnText : true , json : testFilter
1035- String globalVarsJson = writeJSON returnText : true , json : globalVars
1036- parameters + = [
1037- ' enableFailFast' : enableFailFast,
1055+ def testFilterJson = writeJSON returnText : true , json : testFilter
1056+ def additionalParameters = [
10381057 ' testFilter' : testFilterJson,
10391058 " dockerImage" : globalVars[" LLM_SBSA_DOCKER_IMAGE" ],
1040- ' globalVars' : globalVarsJson,
10411059 ]
10421060
1043- if (env. alternativeTrtSBSA) {
1044- parameters + = [
1045- " alternativeTRT" : env. alternativeTrtSBSA,
1046- ]
1047- }
1048-
1049- if (env. testPhase2StageName) {
1050- parameters + = [
1051- ' testPhase2StageName' : env. testPhase2StageName,
1052- ]
1053- }
1054-
1055- echo " trigger SBSA test job, params: ${ parameters} "
1061+ launchJob(" L0_Test-SBSA" , false , enableFailFast, globalVars, " SBSA" , additionalParameters)
10561062
1057- def status = triggerJob(
1058- " L0_Test-SBSA" ,
1059- parameters,
1060- jenkinsUrl,
1061- credentials,
1062- )
1063-
1064- if (status != " SUCCESS" ) {
1065- error " Downstream job did not succeed"
1066- }
10671063 } catch (InterruptedException e) {
10681064 throw e
10691065 } catch (Exception e) {
@@ -1085,31 +1081,23 @@ def launchStages(pipeline, reuseBuild, testFilter, enableFailFast, globalVars)
10851081 " Build-Docker-Images" : {
10861082 script {
10871083 stage(" [Build-Docker-Images] Remote Run" ) {
1088- def parameters = getCommonParameters()
1089- String globalVarsJson = writeJSON returnText : true , json : globalVars
10901084 def branch = env. gitlabBranch ? env. gitlabBranch : " main"
10911085 if (globalVars[GITHUB_PR_API_URL ]) {
10921086 branch = " github-pr-" + globalVars[GITHUB_PR_API_URL ]. split(' /' ). last()
10931087 }
10941088
1095- parameters + = [
1096- ' enableFailFast' : enableFailFast,
1089+ def additionalParameters = [
10971090 ' branch' : branch,
10981091 ' action' : " push" ,
10991092 ' triggerType' : env. JOB_NAME ==~ / .*PostMerge.*/ ? " post-merge" : " pre-merge" ,
1100- ' globalVars' : globalVarsJson,
11011093 ]
11021094
1103- echo " trigger BuildDockerImages job, params: ${ parameters} "
1104-
1105- def status = triggerJob(" /LLM/helpers/BuildDockerImages" , parameters)
1106- if (status != " SUCCESS" ) {
1107- error " Downstream job did not succeed"
1108- }
1095+ launchJob(" /LLM/helpers/BuildDockerImages" , false , enableFailFast, globalVars, " x86_64" , additionalParameters)
11091096 }
11101097 }
11111098 }
11121099 ]
1100+
11131101 if (env. JOB_NAME ==~ / .*PostMerge.*/ ) {
11141102 stages + = dockerBuildJob
11151103 }
0 commit comments