From d4c850de1cafd7d436b2b997b46c42e5b10eec23 Mon Sep 17 00:00:00 2001 From: Stephen R Pietrowicz Date: Mon, 11 Mar 2024 17:24:52 -0500 Subject: [PATCH] Fix formatting issues --- bin.src/rucio-integration-register | 33 +++-- python/lsst/dm/rucio/register/app_parser.py | 136 ++++++++++-------- python/lsst/dm/rucio/register/dataset_map.py | 3 +- .../lsst/dm/rucio/register/resource_bundle.py | 1 + .../lsst/dm/rucio/register/rucio_interface.py | 36 +++-- .../rucio/register/rucio_register_config.py | 3 +- tests/test_app_parser.py | 18 ++- tests/test_dataset_map.py | 9 +- tests/test_interface.py | 108 +++++++------- tests/test_resource_bundle.py | 5 +- tests/test_rucio_config.py | 3 +- 11 files changed, 210 insertions(+), 145 deletions(-) diff --git a/bin.src/rucio-integration-register b/bin.src/rucio-integration-register index c576904..29e157e 100755 --- a/bin.src/rucio-integration-register +++ b/bin.src/rucio-integration-register @@ -14,9 +14,10 @@ logger = logging.getLogger(__name__) RUCIO_REGISTER_CONFIG = "RUCIO_REGISTER_CONFIG" _MSG = "environment variable not set, and no configuration was specified on the command line" + def main(): ap = AppParser(sys.argv) - + if RUCIO_REGISTER_CONFIG in os.environ: config_file = os.environ[RUCIO_REGISTER_CONFIG] elif ap.register_config is None: @@ -24,29 +25,33 @@ def main(): sys.exit(100) else: config_file = ap.register_config - + config = RucioRegisterConfig(config_file) - + rucio_rse = config.rucio_rse scope = config.scope rse_root = config.rse_root dtn_url = config.dtn_url - + butler = Butler(ap.butler_repo) - - ri = RucioInterface(butler=butler, - rucio_rse=rucio_rse, - scope=scope, - rse_root=rse_root, - dtn_url=dtn_url - ) - - dataset_refs = butler.registry.queryDatasets(ap.dataset_type, collections=ap.collections) - + + ri = RucioInterface( + butler=butler, + rucio_rse=rucio_rse, + scope=scope, + rse_root=rse_root, + dtn_url=dtn_url, + ) + + dataset_refs = butler.registry.queryDatasets( + ap.dataset_type, collections=ap.collections + ) + cnt = ri.register_as_replicas(ap.rucio_dataset, dataset_refs) if cnt == 0: print("no datasets to register") sys.exit(200) + if __name__ == "__main__": main() diff --git a/python/lsst/dm/rucio/register/app_parser.py b/python/lsst/dm/rucio/register/app_parser.py index 0d9de4c..8bbe733 100644 --- a/python/lsst/dm/rucio/register/app_parser.py +++ b/python/lsst/dm/rucio/register/app_parser.py @@ -33,74 +33,86 @@ class AppParser: argv: `list` list of program name and arguments """ + def __init__(self, argv): parser = argparse.ArgumentParser(prog=argv[0]) - parser.add_argument("-r", - "--repo", - action="store", - default=None, - dest="repo", - help="Butler repository", - type=str, - required=True - ) - parser.add_argument("-c", - "--collections", - action="store", - default=None, - dest="collections", - help="collections for lookup", - type=str, - required=True - ) - - parser.add_argument("-t", - "--dataset-type", - action="store", - default=None, - dest="dataset_type", - help="dataset type for lookup", - type=str, - required=True - ) - - parser.add_argument("-d", - "--rucio-dataset", - action="store", - default=None, - dest="rucio_dataset", - help="rucio dataset to register files to", - type=str, - required=True - ) - parser.add_argument("-C", - "--rucio-register-config", - action="store", - default=None, - dest="register_config", - help="configuration file used for registration", - type=str, - required=False, - ) + parser.add_argument( + "-r", + "--repo", + action="store", + default=None, + dest="repo", + help="Butler repository", + type=str, + required=True, + ) + parser.add_argument( + "-c", + "--collections", + action="store", + default=None, + dest="collections", + help="collections for lookup", + type=str, + required=True, + ) + + parser.add_argument( + "-t", + "--dataset-type", + action="store", + default=None, + dest="dataset_type", + help="dataset type for lookup", + type=str, + required=True, + ) + + parser.add_argument( + "-d", + "--rucio-dataset", + action="store", + default=None, + dest="rucio_dataset", + help="rucio dataset to register files to", + type=str, + required=True, + ) + parser.add_argument( + "-C", + "--rucio-register-config", + action="store", + default=None, + dest="register_config", + help="configuration file used for registration", + type=str, + required=False, + ) # the following arguments are for logging; # defaults to WARNING # -v sets to INFO # -D sets to DEBUG group = parser.add_mutually_exclusive_group() - group.add_argument('-v', - '--verbose', - help="set loglevel to INFO", - action="store_const", dest="loglevel", const=logging.INFO, - default=None, - ) - group.add_argument('-D', - '--debug', - help="set loglevel to DEBUG", - action="store_const", dest="loglevel", const=logging.DEBUG, - default=None, - ) - + group.add_argument( + "-v", + "--verbose", + help="set loglevel to INFO", + action="store_const", + dest="loglevel", + const=logging.INFO, + default=None, + ) + group.add_argument( + "-D", + "--debug", + help="set loglevel to DEBUG", + action="store_const", + dest="loglevel", + const=logging.DEBUG, + default=None, + ) + args = parser.parse_args(argv[1:]) if args.loglevel is None: @@ -108,7 +120,9 @@ def __init__(self, argv): else: loglevel = args.loglevel - logging.basicConfig(level=logging.INFO, format=(_FORMAT), datefmt="%Y-%m-%d %H:%M:%S") + logging.basicConfig( + level=logging.INFO, format=(_FORMAT), datefmt="%Y-%m-%d %H:%M:%S" + ) self.butler_repo = args.repo self.collections = args.collections diff --git a/python/lsst/dm/rucio/register/dataset_map.py b/python/lsst/dm/rucio/register/dataset_map.py index 3645362..1318ccf 100644 --- a/python/lsst/dm/rucio/register/dataset_map.py +++ b/python/lsst/dm/rucio/register/dataset_map.py @@ -32,12 +32,13 @@ class DatasetMap: mapping_yaml: `str` name of YAML file containing mapping """ + def __init__(self, mapping_yaml): with open(mapping_yaml) as f: self.config = yaml.safe_load(f) def get_dataset_template(self, typename) -> str: - """ Get the dataset_template associated with a dataset type + """Get the dataset_template associated with a dataset type Parameters ---------- diff --git a/python/lsst/dm/rucio/register/resource_bundle.py b/python/lsst/dm/rucio/register/resource_bundle.py index d54317b..ee63334 100644 --- a/python/lsst/dm/rucio/register/resource_bundle.py +++ b/python/lsst/dm/rucio/register/resource_bundle.py @@ -25,6 +25,7 @@ logger = logging.getLogger(__name__) + class ResourceBundle: def __init__(self, dataset_id: str, did: dict): """Create a resource bundle of a dataset_id and its metadata diff --git a/python/lsst/dm/rucio/register/rucio_interface.py b/python/lsst/dm/rucio/register/rucio_interface.py index 0807589..ed07abe 100644 --- a/python/lsst/dm/rucio/register/rucio_interface.py +++ b/python/lsst/dm/rucio/register/rucio_interface.py @@ -39,6 +39,7 @@ logger = logging.getLogger(__name__) + class RucioInterface: """Add files as replicas in Rucio, along with metadata, and attach them to datasets. @@ -56,7 +57,14 @@ class RucioInterface: Base URL of the data transfer node for the Rucio physical filename. """ - def __init__(self, butler: lsst.daf.butler.Butler, rucio_rse: str, scope: str, rse_root:str, dtn_url: str): + def __init__( + self, + butler: lsst.daf.butler.Butler, + rucio_rse: str, + scope: str, + rse_root: str, + dtn_url: str, + ): self.butler = butler self.rse = rucio_rse self.scope = scope @@ -104,10 +112,18 @@ def _make_did(self, resource_path, metadata: str) -> dict[str, str | int]: path = resource_path.path.removeprefix(self.rse_root) pfn = self.pfn_base + path logging.debug(f"pfn = {pfn}") - name = path.removeprefix("/"+self.scope+"/") + name = path.removeprefix("/" + self.scope + "/") - meta = { 'rubin_butler': 1, 'rubin_sidecar': metadata } - d = dict(pfn=pfn, bytes=size, adler32=adler32, md5=md5, name=name, scope=self.scope, meta=meta) + meta = {"rubin_butler": 1, "rubin_sidecar": metadata} + d = dict( + pfn=pfn, + bytes=size, + adler32=adler32, + md5=md5, + name=name, + scope=self.scope, + meta=meta, + ) return d @@ -121,11 +137,13 @@ def _add_replicas(self, bundles: list[ResourceBundle]) -> None: """ logger.info(f"rse={self.rse}, bundles={bundles}") dids = [bundle.get_did() for bundle in bundles] - logger.info(f'{dids[0]=}') + logger.info(f"{dids[0]=}") self.replica_client = ReplicaClient() self.replica_client.add_replicas(rse=self.rse, files=dids) - def _add_files_to_dataset(self, did_client, dataset_id: str, dids: list[dict]) -> None: + def _add_files_to_dataset( + self, did_client, dataset_id: str, dids: list[dict] + ) -> None: """Attach a list of files specified by Rucio DIDs to a Rucio dataset. Ignores already-attached files for idempotency. @@ -195,7 +213,9 @@ def register_to_dataset(self, bundles) -> None: dids = [] for rb in bundles: dids.append(rb.get_did()) - logger.info("Registering %s in dataset %s, RSE %s", dids, dataset_id, self.rse) + logger.info( + "Registering %s in dataset %s, RSE %s", dids, dataset_id, self.rse + ) self._add_files_to_dataset(did_client, dataset_id, dids) except rucio.common.exception.DataIdentifierNotFound: # No such dataset, so create it @@ -217,7 +237,7 @@ def register_to_dataset(self, bundles) -> None: logger.debug("Done with Rucio for %s", bundles) def register_as_replicas(self, dataset_id, dataset_refs) -> None: - """Register a list of DatasetRefs to a Rucio dataset + """Register a list of DatasetRefs to a Rucio dataset Parameters ---------- diff --git a/python/lsst/dm/rucio/register/rucio_register_config.py b/python/lsst/dm/rucio/register/rucio_register_config.py index 3ee891b..d4d047e 100644 --- a/python/lsst/dm/rucio/register/rucio_register_config.py +++ b/python/lsst/dm/rucio/register/rucio_register_config.py @@ -34,10 +34,11 @@ class RucioRegisterConfig: config_file: `str` path to configuration file """ + def __init__(self, config_file: str): with open(config_file) as f: config = yaml.safe_load(f) - + self.rucio_rse = config["rucio_rse"] self.scope = config["scope"] self.rse_root = config["rse_root"] diff --git a/tests/test_app_parser.py b/tests/test_app_parser.py index 52f7bda..ae44641 100644 --- a/tests/test_app_parser.py +++ b/tests/test_app_parser.py @@ -31,7 +31,20 @@ class AppParserTestCase(unittest.TestCase): def testAppParser(self): - argv = ["unittest", "-r", "repo", "-c", "collections", "-t", "type", "-d", "dataset", "-C", "config", "-v"] + argv = [ + "unittest", + "-r", + "repo", + "-c", + "collections", + "-t", + "type", + "-d", + "dataset", + "-C", + "config", + "-v", + ] ap = AppParser(argv) self.assertEqual(ap.butler_repo, "repo") @@ -49,9 +62,10 @@ def testMissingArguments(self): except SystemExit: pass + class MemoryTester(lsst.utils.tests.MemoryTestCase): pass - + def setup_module(module): lsst.utils.tests.init() diff --git a/tests/test_dataset_map.py b/tests/test_dataset_map.py index bbbd22a..c22cdaf 100644 --- a/tests/test_dataset_map.py +++ b/tests/test_dataset_map.py @@ -34,18 +34,19 @@ def testDatasetMap(self): map_file = os.path.join(test_dir, "data", "map.yaml") td = DatasetMap(map_file) - s = td.get_dataset_template('visitSummary') + s = td.get_dataset_template("visitSummary") self.assertEqual(s, "{visit}/one/two") - s = td.get_dataset_template('isolated_star_cat') + s = td.get_dataset_template("isolated_star_cat") self.assertEqual(s, "three/four/{tract}") - s = td.get_dataset_template('isolated_star_sources') + s = td.get_dataset_template("isolated_star_sources") self.assertEqual(s, "five/six/{tract}") + class MemoryTester(lsst.utils.tests.MemoryTestCase): pass - + def setup_module(module): lsst.utils.tests.init() diff --git a/tests/test_interface.py b/tests/test_interface.py index 42e2694..ee0ba1f 100644 --- a/tests/test_interface.py +++ b/tests/test_interface.py @@ -76,85 +76,91 @@ def testInterfaceTestCase(self): self.butler.registry.refresh() - dataset_refs = self.butler.registry.queryDatasets("visitSummary", collections="HSC/runs/RC2/w_2023_32/DM-40356/20230814T170253Z") + dataset_refs = self.butler.registry.queryDatasets( + "visitSummary", + collections="HSC/runs/RC2/w_2023_32/DM-40356/20230814T170253Z", + ) cnt = ri.register_as_replicas("mydataset", dataset_refs) self.assertEqual(cnt, 1) - dataset_refs = self.butler.registry.queryDatasets("visitSummary", collections="HSC/runs/RC2/w_2023_32/DM-40356/20230814T170253Z") + dataset_refs = self.butler.registry.queryDatasets( + "visitSummary", + collections="HSC/runs/RC2/w_2023_32/DM-40356/20230814T170253Z", + ) cnt = 0 for ref in dataset_refs: - self.assertEqual(cnt, 0, "There should have been only one dataset ref retrieved") - + self.assertEqual( + cnt, 0, "There should have been only one dataset ref retrieved" + ) + rb = ri._make_bundle("mydataset", ref) self.assertEqual(rb.dataset_id, "mydataset") did = rb.did - self.assertEqual(did['pfn'], f'{dtn_url}{self.data_file}') - self.assertEqual(did['bytes'], 1365120) - self.assertEqual(did['adler32'], '480be4de') - self.assertEqual(did['md5'], 'a7ee5c19f5717bcf8d772de202864244') - self.assertEqual(did['name'], f'{self.data_file}') - self.assertEqual(did['scope'], 'test') + self.assertEqual(did["pfn"], f"{dtn_url}{self.data_file}") + self.assertEqual(did["bytes"], 1365120) + self.assertEqual(did["adler32"], "480be4de") + self.assertEqual(did["md5"], "a7ee5c19f5717bcf8d772de202864244") + self.assertEqual(did["name"], f"{self.data_file}") + self.assertEqual(did["scope"], "test") - meta = did['meta'] - self.assertEqual(meta['rubin_butler'], 1) + meta = did["meta"] + self.assertEqual(meta["rubin_butler"], 1) - with open(self.json_file, 'r') as f: + with open(self.json_file, "r") as f: metadata = json.loads(f.readline()) - sidecar = json.loads(meta['rubin_sidecar']) + sidecar = json.loads(meta["rubin_sidecar"]) self.assertDictEqual(metadata, sidecar) cnt = cnt + 1 - def tearDown(self): shutil.rmtree(self.butler_repo, ignore_errors=True) - def create_file_dataset(self, datafile:str, jsonfile:str): - with open(jsonfile, 'r') as f: + def create_file_dataset(self, datafile: str, jsonfile: str): + with open(jsonfile, "r") as f: metadata = f.readline() ref = DatasetRef.from_json(metadata, registry=self.butler.registry) fds = FileDataset(ResourcePath(f"file://{datafile}"), ref) return fds def ingest(self, datasets: list): - """Ingest a list of Datasets - - Parameters - ---------- - filedataset : `FileDataset` - A FileDataset - """ - completed = False - while not completed: - try: - self.butler.ingest(*datasets, transfer="direct") - print("ingested") - completed = True - except DatasetTypeError: - print("DatasetTypeError") - dst_set = set() - for dataset in datasets: - for dst in {ref.datasetType for ref in dataset.refs}: - dst_set.add(dst) - for dst in dst_set: - self.butler.registry.registerDatasetType(dst) - except MissingCollectionError: - print("MissingCollectionError") - run_set = set() - for dataset in datasets: - for run in {ref.run for ref in dataset.refs}: - run_set.add(run) - for run in run_set: - self.butler.registry.registerRun(run) - except Exception as e: - print(f"Exception {e=}") - completed = True - + """Ingest a list of Datasets + + Parameters + ---------- + filedataset : `FileDataset` + A FileDataset + """ + completed = False + while not completed: + try: + self.butler.ingest(*datasets, transfer="direct") + print("ingested") + completed = True + except DatasetTypeError: + print("DatasetTypeError") + dst_set = set() + for dataset in datasets: + for dst in {ref.datasetType for ref in dataset.refs}: + dst_set.add(dst) + for dst in dst_set: + self.butler.registry.registerDatasetType(dst) + except MissingCollectionError: + print("MissingCollectionError") + run_set = set() + for dataset in datasets: + for run in {ref.run for ref in dataset.refs}: + run_set.add(run) + for run in run_set: + self.butler.registry.registerRun(run) + except Exception as e: + print(f"Exception {e=}") + completed = True class MemoryTester(lsst.utils.tests.MemoryTestCase): pass - + def setup_module(module): lsst.utils.tests.init() diff --git a/tests/test_resource_bundle.py b/tests/test_resource_bundle.py index 3bb27bc..0adc0c5 100644 --- a/tests/test_resource_bundle.py +++ b/tests/test_resource_bundle.py @@ -30,16 +30,17 @@ class ResourceBundleTestCase(unittest.TestCase): def testResourceBundle(self): - rb = ResourceBundle("12", {"1": 1, "2":2, "3": 3}) + rb = ResourceBundle("12", {"1": 1, "2": 2, "3": 3}) self.assertEqual(rb.dataset_id, "12") self.assertEqual(rb.did["1"], 1) self.assertEqual(rb.did["2"], 2) self.assertEqual(rb.did["3"], 3) + class MemoryTester(lsst.utils.tests.MemoryTestCase): pass - + def setup_module(module): lsst.utils.tests.init() diff --git a/tests/test_rucio_config.py b/tests/test_rucio_config.py index b9b9654..a374ccf 100644 --- a/tests/test_rucio_config.py +++ b/tests/test_rucio_config.py @@ -38,9 +38,10 @@ def testConfig1(self): self.assertEqual(rrc.rse_root, "/rse/root") self.assertEqual(rrc.dtn_url, "root://rse1:1094//rucio") + class MemoryTester(lsst.utils.tests.MemoryTestCase): pass - + def setup_module(module): lsst.utils.tests.init()