Skip to content

Commit 81ce033

Browse files
committed
Adopt MSPileup data into PileupFetcher
fix import Fix PileupFetcher logging and kwargs Support different Rucio urls in PileupFetcher - according to the DBS instance Custom pileup requires a custom scope as well another fix for custom pileup name Remove blocks available in DBS but not in Rucio improve docstring
1 parent cde16c2 commit 81ce033

File tree

2 files changed

+67
-18
lines changed

2 files changed

+67
-18
lines changed

src/python/Utils/Patterns.py

+12-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
"""
22
Patterns module provides set of CS patterns
33
"""
4-
4+
import re
55

66
class Singleton(type):
77
"""Implementation of Singleton class"""
@@ -11,3 +11,14 @@ def __call__(cls, *args, **kwargs):
1111
cls._instances[cls] = \
1212
super(Singleton, cls).__call__(*args, **kwargs)
1313
return cls._instances[cls]
14+
15+
16+
def getDomainName(urlStr):
17+
"""
18+
Given a URL string, return the domain name.
19+
:param urlStr: URL string
20+
:return: a string with the domain name (e.g. "cmsweb-prod")
21+
"""
22+
domainPattern = re.compile(r'https?://([^/]+)\.cern\.ch')
23+
match = domainPattern.search(urlStr)
24+
return match.group(1) if match else ""

src/python/WMCore/WMSpec/Steps/Fetchers/PileupFetcher.py

+55-17
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,6 @@
33
of pileup files in the job sandbox for the dataset.
44
55
"""
6-
from __future__ import print_function
7-
8-
from future.utils import viewitems
9-
106
import datetime
117
import os
128
import hashlib
@@ -15,8 +11,10 @@
1511
import logging
1612
from json import JSONEncoder
1713
import WMCore.WMSpec.WMStep as WMStep
14+
from Utils.Patterns import getDomainName
1815
from Utils.Utilities import encodeUnicodeToBytes
1916
from WMCore.Services.DBS.DBSReader import DBSReader
17+
from WMCore.Services.MSPileup.MSPileupUtils import getPileupDocs
2018
from WMCore.Services.Rucio.Rucio import Rucio
2119
from WMCore.WMSpec.Steps.Fetchers.FetcherInterface import FetcherInterface
2220

@@ -34,7 +32,6 @@ def __init__(self):
3432
Prepare module setup
3533
"""
3634
super(PileupFetcher, self).__init__()
37-
# FIXME: find a way to pass the Rucio account name to this fetcher module
3835
self.rucioAcct = "wmcore_pileup"
3936
self.rucio = None
4037

@@ -52,41 +49,81 @@ def _queryDbsAndGetPileupConfig(self, stepHelper, dbsReader):
5249
"BlockB": {"FileList": [], "PhEDExNodeName": []}, ....}
5350
"""
5451
resultDict = {}
52+
# first, figure out which instance of MSPileup and Rucio to use
53+
pileupInstance = getDomainName(dbsReader.dbsURL)
54+
msPileupUrl = f"https://{pileupInstance}.cern.ch/ms-pileup/data/pileup"
55+
# FIXME: this juggling with Rucio is tough! We can get away without it,
56+
# but for that we would have to use testbed MSPileup against Prod Rucio
57+
if pileupInstance == "cmsweb-prod" or pileupInstance == "cmsweb":
58+
rucioAuthUrl, rucioUrl = "cms-rucio-auth", "cms-rucio"
59+
else:
60+
rucioAuthUrl, rucioUrl = "cms-rucio-auth-int", "cms-rucio-int"
61+
# initialize Rucio here to avoid this authentication on T0-WMAgent
62+
self.rucio = Rucio(self.rucioAcct,
63+
authUrl=f"https://{rucioAuthUrl}.cern.ch",
64+
hostUrl=f"http://{rucioUrl}.cern.ch")
65+
5566
# iterate over input pileup types (e.g. "cosmics", "minbias")
5667
for pileupType in stepHelper.data.pileup.listSections_():
5768
# the format here is: step.data.pileup.cosmics.dataset = [/some/data/set]
5869
datasets = getattr(getattr(stepHelper.data.pileup, pileupType), "dataset")
5970
# each dataset input can generally be a list, iterate over dataset names
6071
blockDict = {}
6172
for dataset in datasets:
62-
73+
# using the original dataset, resolve blocks, files and number of events with DBS
74+
fCounter = 0
6375
for fileInfo in dbsReader.getFileListByDataset(dataset=dataset, detail=True):
6476
blockDict.setdefault(fileInfo['block_name'], {'FileList': [],
6577
'NumberOfEvents': 0,
6678
'PhEDExNodeNames': []})
6779
blockDict[fileInfo['block_name']]['FileList'].append(fileInfo['logical_file_name'])
6880
blockDict[fileInfo['block_name']]['NumberOfEvents'] += fileInfo['event_count']
81+
fCounter += 1
6982

70-
self._getDatasetLocation(dataset, blockDict)
83+
logging.info(f"Found {len(blockDict)} blocks in DBS for dataset {dataset} with {fCounter} files")
84+
self._getDatasetLocation(dataset, blockDict, msPileupUrl)
7185

7286
resultDict[pileupType] = blockDict
7387
return resultDict
7488

75-
def _getDatasetLocation(self, dset, blockDict):
89+
def _getDatasetLocation(self, dset, blockDict, msPileupUrl):
7690
"""
7791
Given a dataset name, query PhEDEx or Rucio and resolve the block location
7892
:param dset: string with the dataset name
7993
:param blockDict: dictionary with DBS summary info
94+
:param msPileupUrl: string with the MSPileup url
8095
:return: update blockDict in place
8196
"""
82-
# initialize Rucio here to avoid this authentication on T0-WMAgent
83-
self.rucio = Rucio(self.rucioAcct)
84-
blockReplicas = self.rucio.getPileupLockedAndAvailable(dset, account=self.rucioAcct)
85-
for blockName, blockLocation in viewitems(blockReplicas):
86-
try:
87-
blockDict[blockName]['PhEDExNodeNames'] = list(blockLocation)
88-
except KeyError:
89-
logging.warning("Block '%s' present in Rucio but not in DBS", blockName)
97+
# fetch the pileup configuration from MSPileup
98+
try:
99+
queryDict = {'query': {'pileupName': dset},
100+
'filters': ['pileupName', 'customName', 'containerFraction', 'currentRSEs']}
101+
doc = getPileupDocs(msPileupUrl, queryDict, method='POST')[0]
102+
msg = f'Pileup dataset {doc["pileupName"]} with:\n\tcustom name: {doc["customName"]},'
103+
msg += f'\n\tcurrent RSEs: {doc["currentRSEs"]}\n\tand container fraction: {doc["containerFraction"]}'
104+
logging.info(msg)
105+
except Exception as ex:
106+
logging.error(f'Error querying MSPileup for dataset {dset}. Details: {str(ex)}')
107+
raise ex
108+
109+
# custom dataset name means there was a container fraction change, use different scope
110+
puScope = 'cms'
111+
if doc["customName"]:
112+
dset = doc["customName"]
113+
puScope = 'group.wmcore'
114+
115+
blockReplicas = self.rucio.getBlocksInContainer(container=dset, scope=puScope)
116+
logging.info(f"Found {len(blockReplicas)} blocks in container {dset} for scope {puScope}")
117+
118+
# Finally, update blocks present in Rucio with the MSPileup currentRSEs.
119+
# Blocks not present in Rucio - hence only in DBS - are meant to be removed.
120+
for blockName in list(blockDict):
121+
if blockName not in blockReplicas:
122+
logging.warning(f"Block {blockName} present in DBS but not in Rucio. Removing it.")
123+
blockDict.pop(blockName)
124+
else:
125+
blockDict[blockName]['PhEDExNodeNames'] = doc["currentRSEs"]
126+
logging.info(f"Final pileup dataset {dset} has a total of {len(blockDict)} blocks.")
90127

91128
def _getCacheFilePath(self, stepHelper):
92129

@@ -171,7 +208,8 @@ def createPileupConfigFile(self, helper):
171208
"""
172209
Stores pileup JSON configuration file in the working
173210
directory / sandbox.
174-
211+
:param helper: WMStepHelper instance
212+
:return: None
175213
"""
176214
if self._isCacheValid(helper):
177215
# if file already exist don't make a new dbs call and overwrite the file.

0 commit comments

Comments
 (0)