From 022ca78f2c97c13a2cf00c09a2de56a0c751f232 Mon Sep 17 00:00:00 2001 From: Alan Malta Rodrigues Date: Thu, 5 Sep 2024 16:28:36 -0400 Subject: [PATCH 1/2] Jobs that failed in JobAccountant only persist logArch1 output --- .../WMComponent/JobAccountant/AccountantWorker.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/src/python/WMComponent/JobAccountant/AccountantWorker.py b/src/python/WMComponent/JobAccountant/AccountantWorker.py index 943cf90ca7..820392a29c 100644 --- a/src/python/WMComponent/JobAccountant/AccountantWorker.py +++ b/src/python/WMComponent/JobAccountant/AccountantWorker.py @@ -495,19 +495,16 @@ def handleJob(self, jobID, fwkJobReport): else: fileList = fwkJobReport.getAllFilesFromStep(step='logArch1') - # Make sure every file has a valid location - # see https://github.com/dmwm/WMCore/issues/9353 - newList = [] + # Workaround: make sure every file has a valid location. See: + # https://github.com/dmwm/WMCore/issues/9353 and https://github.com/dmwm/WMCore/issues/12092 for fwjrFile in fileList: # T0 has analysis file without any location, see: # https://github.com/dmwm/WMCore/issues/9497 if not fwjrFile.get("locations") and fwjrFile.get("lfn", "").endswith(".root"): logging.warning("The following file does not have any location: %s", fwjrFile) jobSuccess = False - else: - newList.append(fwjrFile) - # save the new list free of ill files (without any location) - fileList = newList + fileList = fwkJobReport.getAllFilesFromStep(step='logArch1') + break if jobSuccess: logging.info("Job %d , handle successful job", jobID) From 6c40c915f0f94b4c80e8045da8581e998d6e6b85 Mon Sep 17 00:00:00 2001 From: Alan Malta Rodrigues Date: Thu, 5 Sep 2024 18:50:44 -0400 Subject: [PATCH 2/2] unit test fixes --- .../JobAccountant_t/JobAccountant_t.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/test/python/WMComponent_t/JobAccountant_t/JobAccountant_t.py b/test/python/WMComponent_t/JobAccountant_t/JobAccountant_t.py index 029e44f229..35ad8b0089 100644 --- a/test/python/WMComponent_t/JobAccountant_t/JobAccountant_t.py +++ b/test/python/WMComponent_t/JobAccountant_t/JobAccountant_t.py @@ -1633,41 +1633,39 @@ def testNoLocation(self): self.assertFalse(jobSuccess, "Job should have failed because a file has no location") - from pprint import pformat # now verify the WMBS information to be inserted into the database self.assertEqual(accountantWorker.parentageBinds, [], "Job report has no parentage relationship to be defined") - self.assertEqual(len(accountantWorker.wmbsFilesToBuild), 2, + self.assertEqual(len(accountantWorker.wmbsFilesToBuild), 1, "Should have 3 files if all of them had valid location") - self.assertEqual(accountantWorker.wmbsMergeFilesToBuild, [], "Job report has no merge files to register") self.assertEqual(accountantWorker.parentageBindsForMerge, [], "Job report has no parentage files to register") + self.assertEqual(accountantWorker.mergedOutputFiles, [], + "Job report should have no merged output files") + self.assertEqual(len(accountantWorker.filesetAssoc), 1, + "Job report should have no file association") + self.assertTrue(accountantWorker.filesetAssoc[0]['lfn'].endswith("logArchive.tar.gz")) self.assertEqual(len(jobReport.listSteps()), 8) - #print("AMR all files: %s" % pformat(jobReport.getAllFiles())) # steps that do not have any output files for stepName in ['cmsRun1', 'cmsRun2', 'cmsRun3', 'cmsRun4', 'stageOut1']: filesForStep = jobReport.getAllFilesFromStep(stepName) - logging.info("AMR step: %s, had output: %s", stepName, filesForStep) self.assertEqual(filesForStep, []) # steps that produced output files not have any output files filesForStep = jobReport.getAllFilesFromStep('cmsRun5') - #logging.info("AMR step: cmsRun5, had output: %s", pformat(filesForStep)) self.assertEqual(len(filesForStep), 1) self.assertTrue('83BC5087-21BD-6140-9118-51204C0B64B9.root' in filesForStep[0]['lfn']) self.assertEqual({'T2_CH_CSCS'}, filesForStep[0]['locations']) filesForStep = jobReport.getAllFilesFromStep('cmsRun6') - #logging.info("AMR step: cmsRun6, had output: %s", pformat(filesForStep)) self.assertEqual(len(filesForStep), 1) self.assertTrue('CFC2B499-098E-0143-8A7D-BED766ED7D87.root' in filesForStep[0]['lfn']) self.assertEqual(set(), filesForStep[0]['locations']) filesForStep = jobReport.getAllFilesFromStep('logArch1') - #logging.info("AMR step: logArch1, had output: %s", pformat(filesForStep)) self.assertEqual(len(filesForStep), 1) self.assertTrue('e3230232-09ed-40c0-ac47-ddf926edcd57-64-3-logArchive.tar.gz' in filesForStep[0]['lfn']) self.assertEqual({'T2_CH_CSCS'}, filesForStep[0]['locations'])