From d1cb17bb206aee8aad4a1dfce055cbcd903b63ae Mon Sep 17 00:00:00 2001 From: paulboon Date: Thu, 8 Feb 2024 16:39:23 +0100 Subject: [PATCH 01/11] Initial implementation of dv-dataverse-role-assignment --- pyproject.toml | 1 + src/datastation/dataverse/dataverse_api.py | 39 +++++- src/datastation/dataverse/dataverse_client.py | 4 +- .../dataverse/permissions_collect.py | 2 +- .../dv_dataverse_role_assignment.py | 127 ++++++++++++++++++ 5 files changed, 168 insertions(+), 5 deletions(-) create mode 100644 src/datastation/dv_dataverse_role_assignment.py diff --git a/pyproject.toml b/pyproject.toml index 89e54c0..1bb242e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -52,3 +52,4 @@ ingest-flow = "datastation.ingest_flow:main" dv-dataverse-root-collect-storage-usage = "datastation.dv_dataverse_root_collect_storage_usage:main" dv-dataverse-root-collect-permission-overview = "datastation.dv_dataverse_root_collect_permission_overview:main" datastation-get-component-versions = "datastation.datastation_get_component_versions:main" +dv-dataverse-role-assignment = "datastation.dv_dataverse_role_assignment:main" diff --git a/src/datastation/dataverse/dataverse_api.py b/src/datastation/dataverse/dataverse_api.py index 023873c..cc6c7b5 100644 --- a/src/datastation/dataverse/dataverse_api.py +++ b/src/datastation/dataverse/dataverse_api.py @@ -1,13 +1,18 @@ import requests +import json from datastation.common.utils import print_dry_run_message class DataverseApi: - def __init__(self, server_url, api_token): + def __init__(self, server_url, api_token, alias=None): self.server_url = server_url self.api_token = api_token + self.alias = alias # Methods should use this one if specified + + def get_alias(self): + return self.alias # get json data for a specific dataverses API endpoint using an API token def get_resource_data(self, resource, alias="root", dry_run=False): @@ -30,7 +35,9 @@ def get_contents(self, alias="root", dry_run=False): def get_roles(self, alias="root", dry_run=False): return self.get_resource_data("roles", alias, dry_run) - def get_assignments(self, alias="root", dry_run=False): + def get_role_assignments(self, alias="root", dry_run=False): + if self.alias is not None: + alias = self.alias return self.get_resource_data("assignments", alias, dry_run) def get_groups(self, alias="root", dry_run=False): @@ -47,3 +54,31 @@ def get_storage_size(self, alias="root", dry_run=False): r = requests.get(url, headers=headers) r.raise_for_status() return r.json()['data']['message'] + + def add_role_assignment(self, assignee, role, alias=None, dry_run=False): + if self.alias is not None: + alias = self.alias + url = f'{self.server_url}/api/dataverses/{alias}/assignments' + headers = {'X-Dataverse-key': self.api_token, 'Content-type': 'application/json'} + role_assignment = {"assignee": assignee, "role": role} + if dry_run: + print_dry_run_message(method='POST', url=url, headers=headers, + data=json.dumps(role_assignment)) + return None + else: + r = requests.post(url, headers=headers, json=role_assignment) + r.raise_for_status() + return r + + def remove_role_assignment(self, assignment_id, alias=None, dry_run=False): + if self.alias is not None: + alias = self.alias + url = f'{self.server_url}/api/dataverses/{alias}/assignments/{assignment_id}' + headers = {'X-Dataverse-key': self.api_token, 'Content-type': 'application/json'} + if dry_run: + print_dry_run_message(method='DELETE', url=url, headers=headers) + return None + else: + r = requests.delete(url, headers=headers) + r.raise_for_status() + return r diff --git a/src/datastation/dataverse/dataverse_client.py b/src/datastation/dataverse/dataverse_client.py index f0355aa..39b5e67 100644 --- a/src/datastation/dataverse/dataverse_client.py +++ b/src/datastation/dataverse/dataverse_client.py @@ -27,8 +27,8 @@ def search_api(self): def dataset(self, pid): return DatasetApi(pid, self.server_url, self.api_token, self.unblock_key, self.safety_latch) - def dataverse(self): - return DataverseApi(self.server_url, self.api_token) + def dataverse(self, alias=None): + return DataverseApi(self.server_url, self.api_token, alias) def file(self, file_id): return FileApi(file_id, self.server_url, self.api_token, self.unblock_key, self.safety_latch) diff --git a/src/datastation/dataverse/permissions_collect.py b/src/datastation/dataverse/permissions_collect.py index ed2e578..f874fd0 100644 --- a/src/datastation/dataverse/permissions_collect.py +++ b/src/datastation/dataverse/permissions_collect.py @@ -61,7 +61,7 @@ def get_role_info(self, alias): return ', '.join(result_list) def get_assignment_info(self, alias): - resp_data = self.dataverse_client.dataverse().get_assignments(alias) + resp_data = self.dataverse_client.dataverse().get_role_assignments(alias) # flatten and compact it... no list comprehension though result_list = [] for assignment in resp_data: diff --git a/src/datastation/dv_dataverse_role_assignment.py b/src/datastation/dv_dataverse_role_assignment.py new file mode 100644 index 0000000..a546791 --- /dev/null +++ b/src/datastation/dv_dataverse_role_assignment.py @@ -0,0 +1,127 @@ +import argparse +from datetime import datetime + +import rich + +from datastation.common.batch_processing import get_pids, BatchProcessorWithReport +from datastation.common.config import init +from datastation.common.utils import add_batch_processor_args, add_dry_run_arg +from datastation.dataverse.dataset_api import DatasetApi +from datastation.dataverse.dataverse_api import DataverseApi +from datastation.dataverse.dataverse_client import DataverseClient + + +def add_role_assignments(args, dataverse_client: DataverseClient): + pids = get_pids(args.pid_or_pid_file) + batch_processor = BatchProcessorWithReport(wait=args.wait, fail_on_first_error=args.fail_fast, + report_file=args.report_file, + headers=['DOI', 'Modified', 'Assignee', 'Role', 'Change']) + batch_processor.process_pids(pids, + lambda pid, csv_report: add_role_assignment(args.role_assignment, + dataverse_api=dataverse_client.dataverse(pid), + csv_report=csv_report)) + + +def add_role_assignment(role_assignment, dataverse_api: DataverseApi, csv_report, dry_run: bool = False): + assignee = role_assignment.split('=')[0] + role = role_assignment.split('=')[1] + action = "None" + if in_current_assignments(assignee, role, dataverse_api): + print("{} is already {} for dataset {}".format(assignee, role, dataverse_api.get_alias())) + else: + print( + "Adding {} as {} for dataset {}".format(assignee, role, dataverse_api.get_alias())) + dataverse_api.add_role_assignment(assignee, role, dry_run=dry_run) + action = "Added" + csv_report.write( + {'DOI': dataverse_api.get_alias(), 'Modified': datetime.now(), 'Assignee': assignee, 'Role': role, + 'Change': action}) + + +def in_current_assignments(assignee, role, dataverse_api: DataverseApi): + current_assignments = dataverse_api.get_role_assignments() + found = False + for current_assignment in current_assignments: + if current_assignment.get('assignee') == assignee and current_assignment.get( + '_roleAlias') == role: + found = True + break + return found + + +def list_role_assignments(args, dataverse_client): + r = dataverse_client.dataverse(args.pid).get_role_assignments() + if r is not None: + rich.print_json(data=r) + + +def remove_role_assignments(args, dataverse_client: DataverseClient): + pids = get_pids(args.pid_or_pid_file) + batch_processor = BatchProcessorWithReport(wait=args.wait, report_file=args.report_file, + headers=['alias', 'Modified', 'Assignee', 'Role', 'Change']) + batch_processor.process_pids(pids, + lambda pid, + csv_report: remove_role_assignment(args.role_assignment, + dataverse_api=dataverse_client.dataverse(pid), + csv_report=csv_report)) + +def remove_role_assignment(role_assignment, dataverse_api: DataverseApi, csv_report, dry_run: bool = False): + assignee = role_assignment.split('=')[0] + role = role_assignment.split('=')[1] + action = "None" + if in_current_assignments(assignee, role, dataverse_api): + print("Removing {} as {} for dataverse {}".format(assignee, role, dataverse_api.get_alias())) + all_assignments = dataverse_api.get_role_assignments() + for assignment in all_assignments: + if assignment.get('assignee') == assignee and assignment.get('_roleAlias') == role: + dataverse_api.remove_role_assignment(assignment.get('id'), dry_run=dry_run) + action = "Removed" + break + else: + print("{} is not {} for dataset {}".format(assignee, role, dataverse_api.get_alias())) + csv_report.write( + {'alias': dataverse_api.get_alias(), 'Modified': datetime.now(), 'Assignee': assignee, 'Role': role, + 'Change': action}) + + +def main(): + config = init() + dataverse_client = DataverseClient(config['dataverse']) + batch_processor = BatchProcessorWithReport(headers=['alias', 'Modified', 'Assignee', 'Role', 'Change']) + + # Create main parser and subparsers + parser = argparse.ArgumentParser(description='Manage role assignments on one or more datasets.') + subparsers = parser.add_subparsers(help='subcommands', dest='subcommand') + + # Add role assignment + parser_add = subparsers.add_parser('add', help='add role assignment to specified dataset(s)') + parser_add.add_argument('role_assignment', + help='role assignee and alias (example: @dataverseAdmin=contributor) to add') + parser_add.add_argument('pid_or_pid_file', help='the dataset pid or the input file with the dataset pids') + add_batch_processor_args(parser_add) + add_dry_run_arg(parser_add) + + parser_add.set_defaults(func=lambda _: add_role_assignments(_, dataverse_client)) + + # Remove role assignment + parser_remove = subparsers.add_parser('remove', help='remove role assignment from specified dataset(s)') + parser_remove.add_argument('role_assignment', + help='role assignee and alias (example: @dataverseAdmin=contributor)') + parser_remove.add_argument('pid_or_pid_file', help='The dataset pid or the input file with the dataset pids') + add_batch_processor_args(parser_remove) + add_dry_run_arg(parser_remove) + parser_remove.set_defaults(func=lambda _: remove_role_assignments(_, dataverse_client)) + + # List role assignments + parser_list = subparsers.add_parser('list', + help='list role assignments for specified dataset (only one pid allowed)') + parser_list.add_argument('pid', help='the dataset pid') + add_dry_run_arg(parser_list) + parser_list.set_defaults(func=lambda _: list_role_assignments(_, dataverse_client)) + + args = parser.parse_args() + args.func(args) + + +if __name__ == '__main__': + main() From 8d4a65c4b6da1036864fd0860710561727937c82 Mon Sep 17 00:00:00 2001 From: paulboon Date: Tue, 13 Feb 2024 10:21:20 +0100 Subject: [PATCH 02/11] Completed functionality of dv-dataverse-role-assignment --- src/datastation/common/batch_processing.py | 71 +++++++++++++++++++ .../dv_dataverse_role_assignment.py | 55 +++++++------- 2 files changed, 100 insertions(+), 26 deletions(-) diff --git a/src/datastation/common/batch_processing.py b/src/datastation/common/batch_processing.py index 9b6f07b..786ea59 100644 --- a/src/datastation/common/batch_processing.py +++ b/src/datastation/common/batch_processing.py @@ -34,6 +34,31 @@ def get_pids(pid_or_pids_file, search_api=None, query="*", subtree="root", objec return [pid_or_pids_file] +def get_aliases(alias_or_aliases_file, dry_run=False): + """ + + Args: + alias_or_aliases_file: The dataverse alias, or a file with a list of aliases. + dry_run: Do not perform the action, but show what would be done. + Only applicable if pid_or_pids_file is None. + + Returns: an iterator with aliases + """ + if alias_or_aliases_file is None: + # The tree of all (published) dataverses could be retrieved and aliases could recursively be extracted + # from the tree, but this is not implemented yet. + logging.warning(f"No aliases provided, nothing to do.") + return None + elif os.path.isfile(os.path.expanduser(alias_or_aliases_file)): + aliases = [] + with open(os.path.expanduser(alias_or_aliases_file)) as f: + for line in f: + aliases.append(line.strip()) + return aliases + else: + return [alias_or_aliases_file] + + class BatchProcessor: def __init__(self, wait=0.1, fail_on_first_error=True): self.wait = wait @@ -75,3 +100,49 @@ def __init__(self, report_file=None, headers=None, wait=0.1, fail_on_first_error def process_pids(self, pids, callback): with CsvReport(os.path.expanduser(self.report_file), self.headers) as csv_report: super().process_pids(pids, lambda pid: callback(pid, csv_report)) + + +class DataverseBatchProcessor: + """ + A batch processor for dataverses (collections) using their alias as unique identifier. + """ + def __init__(self, wait=0.1, fail_on_first_error=True): + self.wait = wait + self.fail_on_first_error = fail_on_first_error + + def process_aliases(self, aliases, callback): + if type(aliases) is list: + num_aliases = len(aliases) + logging.info(f"Start batch processing on {num_aliases} dataverse aliases") + else: + logging.info(f"Start batch processing on unknown number of dataverse aliases") + num_aliases = -1 + i = 0 + for alias in aliases: + i += 1 + try: + if self.wait > 0 and i > 1: + logging.debug(f"Waiting {self.wait} seconds before processing next dataverse alias") + time.sleep(self.wait) + logging.info(f"Processing {i} of {num_aliases}: {alias}") + callback(alias) + except Exception as e: + logging.exception("Exception occurred", exc_info=True) + if self.fail_on_first_error: + logging.error(f"Stop processing because of an exception: {e}") + break + logging.debug("fail_on_first_error is False, continuing...") + + +class DataverseBatchProcessorWithReport(DataverseBatchProcessor): + + def __init__(self, report_file=None, headers=None, wait=0.1, fail_on_first_error=True): + super().__init__(wait, fail_on_first_error) + if headers is None: + headers = ["alias", "Modified", "Change"] + self.report_file = report_file + self.headers = headers + + def process_aliases(self, aliases, callback): + with CsvReport(os.path.expanduser(self.report_file), self.headers) as csv_report: + super().process_aliases(aliases, lambda alias: callback(alias, csv_report)) diff --git a/src/datastation/dv_dataverse_role_assignment.py b/src/datastation/dv_dataverse_role_assignment.py index a546791..5b67b1e 100644 --- a/src/datastation/dv_dataverse_role_assignment.py +++ b/src/datastation/dv_dataverse_role_assignment.py @@ -3,23 +3,23 @@ import rich -from datastation.common.batch_processing import get_pids, BatchProcessorWithReport +from datastation.common.batch_processing import DataverseBatchProcessorWithReport, get_aliases from datastation.common.config import init from datastation.common.utils import add_batch_processor_args, add_dry_run_arg -from datastation.dataverse.dataset_api import DatasetApi from datastation.dataverse.dataverse_api import DataverseApi from datastation.dataverse.dataverse_client import DataverseClient def add_role_assignments(args, dataverse_client: DataverseClient): - pids = get_pids(args.pid_or_pid_file) - batch_processor = BatchProcessorWithReport(wait=args.wait, fail_on_first_error=args.fail_fast, - report_file=args.report_file, - headers=['DOI', 'Modified', 'Assignee', 'Role', 'Change']) - batch_processor.process_pids(pids, - lambda pid, csv_report: add_role_assignment(args.role_assignment, - dataverse_api=dataverse_client.dataverse(pid), - csv_report=csv_report)) + aliases = get_aliases(args.alias_or_alias_file) + batch_processor = DataverseBatchProcessorWithReport(wait=args.wait, fail_on_first_error=args.fail_fast, + report_file=args.report_file, + headers=['alias', 'Modified', 'Assignee', 'Role', 'Change']) + batch_processor.process_aliases(aliases, + lambda alias, + csv_report: add_role_assignment(args.role_assignment, + dataverse_api=dataverse_client.dataverse(alias), + csv_report=csv_report)) def add_role_assignment(role_assignment, dataverse_api: DataverseApi, csv_report, dry_run: bool = False): @@ -34,7 +34,7 @@ def add_role_assignment(role_assignment, dataverse_api: DataverseApi, csv_report dataverse_api.add_role_assignment(assignee, role, dry_run=dry_run) action = "Added" csv_report.write( - {'DOI': dataverse_api.get_alias(), 'Modified': datetime.now(), 'Assignee': assignee, 'Role': role, + {'alias': dataverse_api.get_alias(), 'Modified': datetime.now(), 'Assignee': assignee, 'Role': role, 'Change': action}) @@ -50,20 +50,21 @@ def in_current_assignments(assignee, role, dataverse_api: DataverseApi): def list_role_assignments(args, dataverse_client): - r = dataverse_client.dataverse(args.pid).get_role_assignments() + r = dataverse_client.dataverse(args.alias).get_role_assignments() if r is not None: rich.print_json(data=r) def remove_role_assignments(args, dataverse_client: DataverseClient): - pids = get_pids(args.pid_or_pid_file) - batch_processor = BatchProcessorWithReport(wait=args.wait, report_file=args.report_file, - headers=['alias', 'Modified', 'Assignee', 'Role', 'Change']) - batch_processor.process_pids(pids, - lambda pid, - csv_report: remove_role_assignment(args.role_assignment, - dataverse_api=dataverse_client.dataverse(pid), - csv_report=csv_report)) + aliases = get_aliases(args.alias_or_alias_file) + batch_processor = DataverseBatchProcessorWithReport(wait=args.wait, report_file=args.report_file, + headers=['alias', 'Modified', 'Assignee', 'Role', 'Change']) + batch_processor.process_aliases(aliases, + lambda alias, + csv_report: remove_role_assignment(args.role_assignment, + dataverse_api=dataverse_client.dataverse(alias), + csv_report=csv_report)) + def remove_role_assignment(role_assignment, dataverse_api: DataverseApi, csv_report, dry_run: bool = False): assignee = role_assignment.split('=')[0] @@ -78,7 +79,7 @@ def remove_role_assignment(role_assignment, dataverse_api: DataverseApi, csv_rep action = "Removed" break else: - print("{} is not {} for dataset {}".format(assignee, role, dataverse_api.get_alias())) + print("{} is not {} for dataverse {}".format(assignee, role, dataverse_api.get_alias())) csv_report.write( {'alias': dataverse_api.get_alias(), 'Modified': datetime.now(), 'Assignee': assignee, 'Role': role, 'Change': action}) @@ -87,7 +88,7 @@ def remove_role_assignment(role_assignment, dataverse_api: DataverseApi, csv_rep def main(): config = init() dataverse_client = DataverseClient(config['dataverse']) - batch_processor = BatchProcessorWithReport(headers=['alias', 'Modified', 'Assignee', 'Role', 'Change']) + batch_processor = DataverseBatchProcessorWithReport(headers=['alias', 'Modified', 'Assignee', 'Role', 'Change']) # Create main parser and subparsers parser = argparse.ArgumentParser(description='Manage role assignments on one or more datasets.') @@ -97,7 +98,8 @@ def main(): parser_add = subparsers.add_parser('add', help='add role assignment to specified dataset(s)') parser_add.add_argument('role_assignment', help='role assignee and alias (example: @dataverseAdmin=contributor) to add') - parser_add.add_argument('pid_or_pid_file', help='the dataset pid or the input file with the dataset pids') + parser_add.add_argument('alias_or_alias_file', + help='The dataverse alias or the input file with the dataverse aliases') add_batch_processor_args(parser_add) add_dry_run_arg(parser_add) @@ -107,15 +109,16 @@ def main(): parser_remove = subparsers.add_parser('remove', help='remove role assignment from specified dataset(s)') parser_remove.add_argument('role_assignment', help='role assignee and alias (example: @dataverseAdmin=contributor)') - parser_remove.add_argument('pid_or_pid_file', help='The dataset pid or the input file with the dataset pids') + parser_remove.add_argument('alias_or_alias_file', + help='The dataverse alias or the input file with the dataverse aliases') add_batch_processor_args(parser_remove) add_dry_run_arg(parser_remove) parser_remove.set_defaults(func=lambda _: remove_role_assignments(_, dataverse_client)) # List role assignments parser_list = subparsers.add_parser('list', - help='list role assignments for specified dataset (only one pid allowed)') - parser_list.add_argument('pid', help='the dataset pid') + help='list role assignments for specified dataverse (only one alias allowed)') + parser_list.add_argument('alias', help='the dataverse alias') add_dry_run_arg(parser_list) parser_list.set_defaults(func=lambda _: list_role_assignments(_, dataverse_client)) From f2760e605e7c678cce5124b968121c3da5ad9201 Mon Sep 17 00:00:00 2001 From: paulboon Date: Tue, 13 Feb 2024 10:32:06 +0100 Subject: [PATCH 03/11] Renaming BatchProcessor to DatasetBatchProcessor --- src/datastation/common/batch_processing.py | 4 ++-- src/datastation/dv_dataset_delete_draft.py | 6 +++--- src/datastation/dv_dataset_destroy.py | 6 +++--- .../dv_dataset_destroy_migration_placeholder.py | 6 +++--- src/datastation/dv_dataset_get_attributes.py | 4 ++-- src/datastation/dv_dataset_get_metadata_export.py | 4 ++-- src/datastation/dv_dataset_publish.py | 8 ++++---- src/datastation/dv_dataset_reindex.py | 6 +++--- src/datastation/dv_dataset_reingest_tabular.py | 8 ++++---- src/datastation/dv_dataset_role_assignment.py | 14 +++++++------- src/datastation/dv_dataset_update_datacite.py | 6 +++--- src/tests/test_batch_processing.py | 8 ++++---- 12 files changed, 40 insertions(+), 40 deletions(-) diff --git a/src/datastation/common/batch_processing.py b/src/datastation/common/batch_processing.py index 786ea59..a3622f3 100644 --- a/src/datastation/common/batch_processing.py +++ b/src/datastation/common/batch_processing.py @@ -59,7 +59,7 @@ def get_aliases(alias_or_aliases_file, dry_run=False): return [alias_or_aliases_file] -class BatchProcessor: +class DatasetBatchProcessor: def __init__(self, wait=0.1, fail_on_first_error=True): self.wait = wait self.fail_on_first_error = fail_on_first_error @@ -88,7 +88,7 @@ def process_pids(self, pids, callback): logging.debug("fail_on_first_error is False, continuing...") -class BatchProcessorWithReport(BatchProcessor): +class DatasetBatchProcessorWithReport(DatasetBatchProcessor): def __init__(self, report_file=None, headers=None, wait=0.1, fail_on_first_error=True): super().__init__(wait, fail_on_first_error) diff --git a/src/datastation/dv_dataset_delete_draft.py b/src/datastation/dv_dataset_delete_draft.py index 50b656c..ea4812a 100644 --- a/src/datastation/dv_dataset_delete_draft.py +++ b/src/datastation/dv_dataset_delete_draft.py @@ -1,13 +1,13 @@ import argparse from datetime import datetime -from datastation.common.batch_processing import BatchProcessor, get_pids, BatchProcessorWithReport +from datastation.common.batch_processing import DatasetBatchProcessor, get_pids, DatasetBatchProcessorWithReport from datastation.common.config import init from datastation.common.utils import add_batch_processor_args, add_dry_run_arg from datastation.dataverse.dataverse_client import DataverseClient -def delete_dataset_drafts(args, dataverse_client: DataverseClient, batch_processor: BatchProcessor): +def delete_dataset_drafts(args, dataverse_client: DataverseClient, batch_processor: DatasetBatchProcessor): pids = get_pids(args.pid_or_pid_file) batch_processor.process_pids(pids, lambda pid, csv_report: delete_dataset_draft(pid, @@ -37,7 +37,7 @@ def main(): args = parser.parse_args() dataverse_client = DataverseClient(config['dataverse']) - batch_processor = BatchProcessorWithReport(wait=args.wait, report_file=args.report_file) + batch_processor = DatasetBatchProcessorWithReport(wait=args.wait, report_file=args.report_file) delete_dataset_drafts(args, dataverse_client, batch_processor) diff --git a/src/datastation/dv_dataset_destroy.py b/src/datastation/dv_dataset_destroy.py index 85a085a..f7fb241 100644 --- a/src/datastation/dv_dataset_destroy.py +++ b/src/datastation/dv_dataset_destroy.py @@ -1,13 +1,13 @@ import argparse from datetime import datetime -from datastation.common.batch_processing import BatchProcessor, get_pids, BatchProcessorWithReport +from datastation.common.batch_processing import DatasetBatchProcessor, get_pids, DatasetBatchProcessorWithReport from datastation.common.config import init from datastation.common.utils import add_batch_processor_args, add_dry_run_arg from datastation.dataverse.dataverse_client import DataverseClient -def destroy_datasets(args, dataverse_client: DataverseClient, batch_processor: BatchProcessor, dry_run: bool): +def destroy_datasets(args, dataverse_client: DataverseClient, batch_processor: DatasetBatchProcessor, dry_run: bool): pids = get_pids(args.pid_or_pid_file) batch_processor.process_pids(pids, lambda pid, csv_report: destroy_dataset(pid, dataset_api=dataverse_client.dataset( @@ -35,7 +35,7 @@ def main(): args = parser.parse_args() dataverse_client = DataverseClient(config['dataverse']) - batch_processor = BatchProcessorWithReport(wait=args.wait, report_file=args.report_file) + batch_processor = DatasetBatchProcessorWithReport(wait=args.wait, report_file=args.report_file) destroy_datasets(args, dataverse_client, batch_processor, dry_run=args.dry_run) diff --git a/src/datastation/dv_dataset_destroy_migration_placeholder.py b/src/datastation/dv_dataset_destroy_migration_placeholder.py index 1121e6b..5931f52 100644 --- a/src/datastation/dv_dataset_destroy_migration_placeholder.py +++ b/src/datastation/dv_dataset_destroy_migration_placeholder.py @@ -1,6 +1,6 @@ import argparse -from datastation.common.batch_processing import get_pids, BatchProcessorWithReport +from datastation.common.batch_processing import get_pids, DatasetBatchProcessorWithReport from datastation.common.config import init from datastation.common.utils import add_batch_processor_args, add_dry_run_arg from datastation.dataverse.dataverse_client import DataverseClient @@ -21,8 +21,8 @@ def main(): add_dry_run_arg(parser) args = parser.parse_args() - batch_processor = BatchProcessorWithReport(wait=args.wait, report_file=args.report_file, - headers=['PID', 'Destroyed', 'Messages']) + batch_processor = DatasetBatchProcessorWithReport(wait=args.wait, report_file=args.report_file, + headers=['PID', 'Destroyed', 'Messages']) pids = get_pids(args.pid_or_pids_file) description_text_pattern = config['migration_placeholders']['description_text_pattern'] batch_processor.process_pids(pids, diff --git a/src/datastation/dv_dataset_get_attributes.py b/src/datastation/dv_dataset_get_attributes.py index de6fe2c..939807e 100644 --- a/src/datastation/dv_dataset_get_attributes.py +++ b/src/datastation/dv_dataset_get_attributes.py @@ -1,7 +1,7 @@ import argparse import json -from datastation.common.batch_processing import get_pids, BatchProcessor, BatchProcessorWithReport +from datastation.common.batch_processing import get_pids, DatasetBatchProcessor, DatasetBatchProcessorWithReport from datastation.common.config import init from datastation.common.utils import add_batch_processor_args, add_dry_run_arg from datastation.dataverse.datasets import Datasets @@ -40,7 +40,7 @@ def main(): dataverse_client = DataverseClient(config["dataverse"]) datasets = Datasets(dataverse_client, dry_run=args.dry_run) - BatchProcessor(wait=args.wait, fail_on_first_error=args.fail_fast).process_pids( + DatasetBatchProcessor(wait=args.wait, fail_on_first_error=args.fail_fast).process_pids( get_pids(args.pid_or_pids_file, dataverse_client.search_api(), dry_run=args.dry_run), lambda pid: print(json.dumps(datasets.get_dataset_attributes(pid, **attribute_options), skipkeys=True))) diff --git a/src/datastation/dv_dataset_get_metadata_export.py b/src/datastation/dv_dataset_get_metadata_export.py index 050df93..1b99893 100644 --- a/src/datastation/dv_dataset_get_metadata_export.py +++ b/src/datastation/dv_dataset_get_metadata_export.py @@ -1,7 +1,7 @@ import argparse import os -from datastation.common.batch_processing import BatchProcessor, get_pids +from datastation.common.batch_processing import DatasetBatchProcessor, get_pids from datastation.common.config import init from datastation.common.utils import add_batch_processor_args, add_dry_run_arg from datastation.dataverse.dataverse_client import DataverseClient @@ -53,7 +53,7 @@ def main(): add_dry_run_arg(parser) args = parser.parse_args() - batch_processor = BatchProcessor(wait=args.wait, fail_on_first_error=args.fail_fast) + batch_processor = DatasetBatchProcessor(wait=args.wait, fail_on_first_error=args.fail_fast) pids = get_pids(args.pid_or_pids_file) batch_processor.process_pids(pids, callback=lambda pid: get_metadata_export(args, pid, dataverse)) diff --git a/src/datastation/dv_dataset_publish.py b/src/datastation/dv_dataset_publish.py index 8392db1..bbdbdea 100644 --- a/src/datastation/dv_dataset_publish.py +++ b/src/datastation/dv_dataset_publish.py @@ -2,7 +2,7 @@ import json from datetime import datetime -from datastation.common.batch_processing import BatchProcessorWithReport, get_pids +from datastation.common.batch_processing import DatasetBatchProcessorWithReport, get_pids from datastation.common.config import init from datastation.common.utils import add_batch_processor_args, add_dry_run_arg from datastation.dataverse.dataverse_client import DataverseClient @@ -10,9 +10,9 @@ def publish_datasets(args, dataverse_client: DataverseClient): pids = get_pids(args.pid_or_pid_file) - batch_processor = BatchProcessorWithReport(report_file=args.report_file, wait=args.wait, - fail_on_first_error=args.fail_fast, - headers=['DOI', 'Modified', 'Change', 'Messages']) + batch_processor = DatasetBatchProcessorWithReport(report_file=args.report_file, wait=args.wait, + fail_on_first_error=args.fail_fast, + headers=['DOI', 'Modified', 'Change', 'Messages']) batch_processor.process_pids(pids, lambda pid, csv_report: publish(pid, dataverse_client, update_type=args.update_type, diff --git a/src/datastation/dv_dataset_reindex.py b/src/datastation/dv_dataset_reindex.py index 6d970da..bfaee4a 100644 --- a/src/datastation/dv_dataset_reindex.py +++ b/src/datastation/dv_dataset_reindex.py @@ -3,7 +3,7 @@ from requests import HTTPError -from datastation.common.batch_processing import get_pids, BatchProcessorWithReport +from datastation.common.batch_processing import get_pids, DatasetBatchProcessorWithReport from datastation.common.config import init from datastation.common.csv import CsvReport from datastation.common.utils import add_batch_processor_args, add_dry_run_arg @@ -12,8 +12,8 @@ def reindex_datasets(args, dataverse_client: DataverseClient): pids = get_pids(args.pid_or_pid_file) - batch_processor = BatchProcessorWithReport(wait=args.wait, fail_on_first_error=args.fail_fast, - report_file=args.report_file, headers=["PID", "Status", "Message"]) + batch_processor = DatasetBatchProcessorWithReport(wait=args.wait, fail_on_first_error=args.fail_fast, + report_file=args.report_file, headers=["PID", "Status", "Message"]) batch_processor.process_pids(pids, lambda pid, csv_report: reindex_dataset(pid, dataverse_client, csv_report=csv_report, dry_run=args.dry_run)) diff --git a/src/datastation/dv_dataset_reingest_tabular.py b/src/datastation/dv_dataset_reingest_tabular.py index a958204..628463b 100644 --- a/src/datastation/dv_dataset_reingest_tabular.py +++ b/src/datastation/dv_dataset_reingest_tabular.py @@ -4,7 +4,7 @@ import requests -from datastation.common.batch_processing import get_pids, BatchProcessorWithReport +from datastation.common.batch_processing import get_pids, DatasetBatchProcessorWithReport from datastation.common.config import init from datastation.common.utils import add_batch_processor_args, add_dry_run_arg from datastation.dataverse.dataverse_client import DataverseClient @@ -12,9 +12,9 @@ def reingest_tabular_files_in_datasets(args, dataverse_client: DataverseClient): pids = get_pids(args.pid_or_pid_file) - batch_processor = BatchProcessorWithReport(report_file=args.report_file, wait=args.wait, - fail_on_first_error=args.fail_fast, - headers=['DOI', 'Modified', 'Change', 'Messages']) + batch_processor = DatasetBatchProcessorWithReport(report_file=args.report_file, wait=args.wait, + fail_on_first_error=args.fail_fast, + headers=['DOI', 'Modified', 'Change', 'Messages']) batch_processor.process_pids(pids, lambda pid, csv_report: reingest_tabular_files_in_dataset(pid, dataverse_client, csv_report=csv_report, diff --git a/src/datastation/dv_dataset_role_assignment.py b/src/datastation/dv_dataset_role_assignment.py index 2f847c0..3ddec80 100644 --- a/src/datastation/dv_dataset_role_assignment.py +++ b/src/datastation/dv_dataset_role_assignment.py @@ -3,7 +3,7 @@ import rich -from datastation.common.batch_processing import get_pids, BatchProcessorWithReport +from datastation.common.batch_processing import get_pids, DatasetBatchProcessorWithReport from datastation.common.config import init from datastation.common.utils import add_batch_processor_args, add_dry_run_arg from datastation.dataverse.dataset_api import DatasetApi @@ -12,9 +12,9 @@ def add_role_assignments(args, dataverse_client: DataverseClient): pids = get_pids(args.pid_or_pid_file) - batch_processor = BatchProcessorWithReport(wait=args.wait, fail_on_first_error=args.fail_fast, - report_file=args.report_file, - headers=['DOI', 'Modified', 'Assignee', 'Role', 'Change']) + batch_processor = DatasetBatchProcessorWithReport(wait=args.wait, fail_on_first_error=args.fail_fast, + report_file=args.report_file, + headers=['DOI', 'Modified', 'Assignee', 'Role', 'Change']) batch_processor.process_pids(pids, lambda pid, csv_report: add_role_assignment(args.role_assignment, dataset_api=dataverse_client.dataset(pid), @@ -56,8 +56,8 @@ def list_role_assignments(args, dataverse_client): def remove_role_assignments(args, dataverse_client: DataverseClient): pids = get_pids(args.pid_or_pid_file) - batch_processor = BatchProcessorWithReport(wait=args.wait, report_file=args.report_file, - headers=['DOI', 'Modified', 'Assignee', 'Role', 'Change']) + batch_processor = DatasetBatchProcessorWithReport(wait=args.wait, report_file=args.report_file, + headers=['DOI', 'Modified', 'Assignee', 'Role', 'Change']) batch_processor.process_pids(pids, lambda pid, csv_report: remove_role_assignment(args.role_assignment, dataset_api=dataverse_client.dataset( @@ -87,7 +87,7 @@ def remove_role_assignment(role_assignment, dataset_api: DatasetApi, csv_report, def main(): config = init() dataverse_client = DataverseClient(config['dataverse']) - batch_processor = BatchProcessorWithReport(headers=['DOI', 'Modified', 'Assignee', 'Role', 'Change']) + batch_processor = DatasetBatchProcessorWithReport(headers=['DOI', 'Modified', 'Assignee', 'Role', 'Change']) # Create main parser and subparsers parser = argparse.ArgumentParser(description='Manage role assignments on one or more datasets.') diff --git a/src/datastation/dv_dataset_update_datacite.py b/src/datastation/dv_dataset_update_datacite.py index 60be081..809a6b9 100644 --- a/src/datastation/dv_dataset_update_datacite.py +++ b/src/datastation/dv_dataset_update_datacite.py @@ -3,7 +3,7 @@ from requests import HTTPError -from datastation.common.batch_processing import get_pids, BatchProcessorWithReport +from datastation.common.batch_processing import get_pids, DatasetBatchProcessorWithReport from datastation.common.config import init from datastation.common.csv import CsvReport from datastation.common.utils import add_batch_processor_args, add_dry_run_arg @@ -12,8 +12,8 @@ def update_datacite_records(args, dataverse_client: DataverseClient): pids = get_pids(args.pid_or_pids_file) - batch_processor = BatchProcessorWithReport(wait=args.wait, fail_on_first_error=args.fail_fast, - report_file=args.report_file, headers=["PID", "Status", "Message"]) + batch_processor = DatasetBatchProcessorWithReport(wait=args.wait, fail_on_first_error=args.fail_fast, + report_file=args.report_file, headers=["PID", "Status", "Message"]) batch_processor.process_pids(pids, lambda pid, csv_report: update_datacite_record(pid, dataverse_client, csv_report=csv_report, diff --git a/src/tests/test_batch_processing.py b/src/tests/test_batch_processing.py index 2dd3759..bcd3791 100644 --- a/src/tests/test_batch_processing.py +++ b/src/tests/test_batch_processing.py @@ -2,13 +2,13 @@ import time from datetime import datetime -from datastation.common.batch_processing import BatchProcessor, get_pids +from datastation.common.batch_processing import DatasetBatchProcessor, get_pids class TestBatchProcessor: def test_process_pids(self, capsys): - batch_processor = BatchProcessor() + batch_processor = DatasetBatchProcessor() pids = ["1", "2", "3"] callback = lambda pid: print(pid) batch_processor.process_pids(pids, callback) @@ -16,7 +16,7 @@ def test_process_pids(self, capsys): assert captured.out == "1\n2\n3\n" def test_process_pids_with_wait_on_iterator(self, capsys): - batch_processor = BatchProcessor(wait=0.1) + batch_processor = DatasetBatchProcessor(wait=0.1) def as_is(rec): time.sleep(0.1) @@ -37,7 +37,7 @@ def as_is(rec): time.sleep(0.1) print(f"lazy-{rec}") return rec - batch_processor = BatchProcessor(wait=0.1) + batch_processor = DatasetBatchProcessor(wait=0.1) pids = [as_is(rec) for rec in ["1", "2", "3"]] callback = lambda pid: print(pid) start_time = datetime.now() From ebcad95d5163f1aa3017f3b17614d06f18d3e365 Mon Sep 17 00:00:00 2001 From: paulboon Date: Tue, 13 Feb 2024 11:32:02 +0100 Subject: [PATCH 04/11] Refactoring of the BatchProcessor classes --- src/datastation/common/batch_processing.py | 121 ++++++++++----------- src/datastation/common/utils.py | 8 ++ src/tests/test_utils.py | 11 +- 3 files changed, 74 insertions(+), 66 deletions(-) diff --git a/src/datastation/common/batch_processing.py b/src/datastation/common/batch_processing.py index a3622f3..d9838ca 100644 --- a/src/datastation/common/batch_processing.py +++ b/src/datastation/common/batch_processing.py @@ -3,6 +3,52 @@ import time from datastation.common.csv import CsvReport +from datastation.common.utils import plural + + +# Base class for batch processing of items +class BatchProcessor: + def __init__(self, item_name="item", wait=0.1, fail_on_first_error=True): + self.item_name = item_name + self.wait = wait + self.fail_on_first_error = fail_on_first_error + + def process_items(self, items, callback): + if type(items) is list: + num_items = len(items) + logging.info(f"Start batch processing on {num_items} {plural(self.item_name)}") + else: + logging.info(f"Start batch processing on unknown number of {plural(self.item_name)}") + num_items = -1 + i = 0 + for item in items: + i += 1 + try: + if self.wait > 0 and i > 1: + logging.debug(f"Waiting {self.wait} seconds before processing next {self.item_name}") + time.sleep(self.wait) + logging.info(f"Processing {i} of {num_items}: {item}") + callback(item) + except Exception as e: + logging.exception("Exception occurred", exc_info=True) + if self.fail_on_first_error: + logging.error(f"Stop processing because of an exception: {e}") + break + logging.debug("fail_on_first_error is False, continuing...") + + +def get_provided_items_iterator(item_or_items_file, item_name="item"): + if item_or_items_file is None: + logging.debug(f"No {plural(item_name)} provided.") + return None + elif os.path.isfile(os.path.expanduser(item_or_items_file)): + items = [] + with open(os.path.expanduser(item_or_items_file)) as f: + for line in f: + items.append(line.strip()) + return items + else: + return [item_or_items_file] def get_pids(pid_or_pids_file, search_api=None, query="*", subtree="root", object_type="dataset", dry_run=False): @@ -24,14 +70,8 @@ def get_pids(pid_or_pids_file, search_api=None, query="*", subtree="root", objec if pid_or_pids_file is None: result = search_api.search(query=query, subtree=subtree, object_type=object_type, dry_run=dry_run) return map(lambda rec: rec['global_id'], result) - elif os.path.isfile(os.path.expanduser(pid_or_pids_file)): - pids = [] - with open(os.path.expanduser(pid_or_pids_file)) as f: - for line in f: - pids.append(line.strip()) - return pids else: - return [pid_or_pids_file] + return get_provided_items_iterator(pid_or_pids_file, "pid") def get_aliases(alias_or_aliases_file, dry_run=False): @@ -49,43 +89,17 @@ def get_aliases(alias_or_aliases_file, dry_run=False): # from the tree, but this is not implemented yet. logging.warning(f"No aliases provided, nothing to do.") return None - elif os.path.isfile(os.path.expanduser(alias_or_aliases_file)): - aliases = [] - with open(os.path.expanduser(alias_or_aliases_file)) as f: - for line in f: - aliases.append(line.strip()) - return aliases else: - return [alias_or_aliases_file] + return get_provided_items_iterator(alias_or_aliases_file, "alias") + +class DatasetBatchProcessor(BatchProcessor): -class DatasetBatchProcessor: def __init__(self, wait=0.1, fail_on_first_error=True): - self.wait = wait - self.fail_on_first_error = fail_on_first_error + super().__init__("pid", wait, fail_on_first_error) def process_pids(self, pids, callback): - if type(pids) is list: - num_pids = len(pids) - logging.info(f"Start batch processing on {num_pids} pids") - else: - logging.info(f"Start batch processing on unknown number of pids") - num_pids = -1 - i = 0 - for pid in pids: - i += 1 - try: - if self.wait > 0 and i > 1: - logging.debug(f"Waiting {self.wait} seconds before processing next pid") - time.sleep(self.wait) - logging.info(f"Processing {i} of {num_pids}: {pid}") - callback(pid) - except Exception as e: - logging.exception("Exception occurred", exc_info=True) - if self.fail_on_first_error: - logging.error(f"Stop processing because of an exception: {e}") - break - logging.debug("fail_on_first_error is False, continuing...") + super().process_items(pids, callback) class DatasetBatchProcessorWithReport(DatasetBatchProcessor): @@ -102,36 +116,13 @@ def process_pids(self, pids, callback): super().process_pids(pids, lambda pid: callback(pid, csv_report)) -class DataverseBatchProcessor: - """ - A batch processor for dataverses (collections) using their alias as unique identifier. - """ +class DataverseBatchProcessor(BatchProcessor): + def __init__(self, wait=0.1, fail_on_first_error=True): - self.wait = wait - self.fail_on_first_error = fail_on_first_error + super().__init__("alias", wait, fail_on_first_error) def process_aliases(self, aliases, callback): - if type(aliases) is list: - num_aliases = len(aliases) - logging.info(f"Start batch processing on {num_aliases} dataverse aliases") - else: - logging.info(f"Start batch processing on unknown number of dataverse aliases") - num_aliases = -1 - i = 0 - for alias in aliases: - i += 1 - try: - if self.wait > 0 and i > 1: - logging.debug(f"Waiting {self.wait} seconds before processing next dataverse alias") - time.sleep(self.wait) - logging.info(f"Processing {i} of {num_aliases}: {alias}") - callback(alias) - except Exception as e: - logging.exception("Exception occurred", exc_info=True) - if self.fail_on_first_error: - logging.error(f"Stop processing because of an exception: {e}") - break - logging.debug("fail_on_first_error is False, continuing...") + super().process_items(aliases, callback) class DataverseBatchProcessorWithReport(DataverseBatchProcessor): diff --git a/src/datastation/common/utils.py b/src/datastation/common/utils.py index b818e0f..d3c0b8c 100644 --- a/src/datastation/common/utils.py +++ b/src/datastation/common/utils.py @@ -110,3 +110,11 @@ def sizeof_fmt(num, suffix='B'): return "%3.1f%s%s" % (num, unit, suffix) num /= 1024.0 return "%.1f%s%s" % (num, 'Yi', suffix) + +def plural(word: str): + if word.endswith('s'): + return word + "es" + elif word.endswith('y'): + return word[:-1] + "ies" + else: + return word + "s" diff --git a/src/tests/test_utils.py b/src/tests/test_utils.py index 1ffdb39..ecc8b6c 100644 --- a/src/tests/test_utils.py +++ b/src/tests/test_utils.py @@ -2,7 +2,8 @@ import argparse import unittest -from datastation.common.utils import is_sub_path_of, has_dirtree_pred, set_permissions, positive_int_argument_converter +from datastation.common.utils import is_sub_path_of, has_dirtree_pred, set_permissions, positive_int_argument_converter, \ + plural class TestIsSubPathOf: @@ -104,3 +105,11 @@ def test_positive_int_argument_converter(self): positive_int_argument_converter("-5") with self.assertRaises(argparse.ArgumentTypeError): positive_int_argument_converter("abc") + + +class TestPlural(unittest.TestCase): + def test_plural(self): + self.assertEqual(plural("pid"), "pids") + self.assertEqual(plural("alias"), "aliases") + self.assertEqual(plural(":-)lolly"), ":-)lollies") + From cbedcf3c82fda9820a2e53fbd1ef283c15759c1a Mon Sep 17 00:00:00 2001 From: paulboon Date: Tue, 13 Feb 2024 16:30:59 +0100 Subject: [PATCH 05/11] Refactoring; dv_dataverse_role_assignment uses new DataverseRole class --- src/datastation/dataverse/roles.py | 88 +++++++++++++++++++ .../dv_dataverse_role_assignment.py | 87 ++---------------- 2 files changed, 93 insertions(+), 82 deletions(-) create mode 100644 src/datastation/dataverse/roles.py diff --git a/src/datastation/dataverse/roles.py b/src/datastation/dataverse/roles.py new file mode 100644 index 0000000..9835e96 --- /dev/null +++ b/src/datastation/dataverse/roles.py @@ -0,0 +1,88 @@ +import rich +from datetime import datetime + +from datastation.common.batch_processing import DataverseBatchProcessorWithReport, get_aliases +from datastation.dataverse.dataverse_api import DataverseApi +from datastation.dataverse.dataverse_client import DataverseClient + + +class DataverseRole: + + def __init__(self, dataverse_client: DataverseClient, dry_run: bool = False): + self.dataverse_client = dataverse_client + self.dry_run = dry_run + + def list_role_assignments(self, args): + r = self.dataverse_client.dataverse(args.alias).get_role_assignments() + if r is not None: + rich.print_json(data=r) + + def add_role_assignments(self, args): + aliases = get_aliases(args.alias_or_alias_file) + batch_processor = DataverseBatchProcessorWithReport(wait=args.wait, fail_on_first_error=args.fail_fast, + report_file=args.report_file, + headers=['alias', 'Modified', 'Assignee', 'Role', 'Change']) + batch_processor.process_aliases(aliases, + lambda alias, + csv_report: self.add_role_assignment(args.role_assignment, + dataverse_api= + self.dataverse_client.dataverse( + alias), + csv_report=csv_report, + dry_run=args.dry_run)) + + def add_role_assignment(self, role_assignment, dataverse_api: DataverseApi, csv_report, dry_run: bool = False): + assignee = role_assignment.split('=')[0] + role = role_assignment.split('=')[1] + action = "None" + if self.in_current_assignments(assignee, role, dataverse_api): + print("{} is already {} for dataset {}".format(assignee, role, dataverse_api.get_alias())) + else: + print( + "Adding {} as {} for dataset {}".format(assignee, role, dataverse_api.get_alias())) + dataverse_api.add_role_assignment(assignee, role, dry_run=dry_run) + action = "Added" + csv_report.write( + {'alias': dataverse_api.get_alias(), 'Modified': datetime.now(), 'Assignee': assignee, 'Role': role, + 'Change': action}) + + def in_current_assignments(self, assignee, role, dataverse_api: DataverseApi): + current_assignments = dataverse_api.get_role_assignments() + found = False + for current_assignment in current_assignments: + if current_assignment.get('assignee') == assignee and current_assignment.get( + '_roleAlias') == role: + found = True + break + return found + + def remove_role_assignments(self, args): + aliases = get_aliases(args.alias_or_alias_file) + batch_processor = DataverseBatchProcessorWithReport(wait=args.wait, report_file=args.report_file, + headers=['alias', 'Modified', 'Assignee', 'Role', 'Change']) + batch_processor.process_aliases(aliases, + lambda alias, + csv_report: self.remove_role_assignment(args.role_assignment, + dataverse_api= + self.dataverse_client.dataverse( + alias), + csv_report=csv_report, + dry_run=args.dry_run)) + + def remove_role_assignment(self, role_assignment, dataverse_api: DataverseApi, csv_report, dry_run: bool = False): + assignee = role_assignment.split('=')[0] + role = role_assignment.split('=')[1] + action = "None" + if self.in_current_assignments(assignee, role, dataverse_api): + print("Removing {} as {} for dataverse {}".format(assignee, role, dataverse_api.get_alias())) + all_assignments = dataverse_api.get_role_assignments() + for assignment in all_assignments: + if assignment.get('assignee') == assignee and assignment.get('_roleAlias') == role: + dataverse_api.remove_role_assignment(assignment.get('id'), dry_run=dry_run) + action = "Removed" + break + else: + print("{} is not {} for dataverse {}".format(assignee, role, dataverse_api.get_alias())) + csv_report.write( + {'alias': dataverse_api.get_alias(), 'Modified': datetime.now(), 'Assignee': assignee, 'Role': role, + 'Change': action}) diff --git a/src/datastation/dv_dataverse_role_assignment.py b/src/datastation/dv_dataverse_role_assignment.py index 5b67b1e..9be8b21 100644 --- a/src/datastation/dv_dataverse_role_assignment.py +++ b/src/datastation/dv_dataverse_role_assignment.py @@ -1,94 +1,17 @@ import argparse -from datetime import datetime -import rich from datastation.common.batch_processing import DataverseBatchProcessorWithReport, get_aliases from datastation.common.config import init from datastation.common.utils import add_batch_processor_args, add_dry_run_arg -from datastation.dataverse.dataverse_api import DataverseApi from datastation.dataverse.dataverse_client import DataverseClient - - -def add_role_assignments(args, dataverse_client: DataverseClient): - aliases = get_aliases(args.alias_or_alias_file) - batch_processor = DataverseBatchProcessorWithReport(wait=args.wait, fail_on_first_error=args.fail_fast, - report_file=args.report_file, - headers=['alias', 'Modified', 'Assignee', 'Role', 'Change']) - batch_processor.process_aliases(aliases, - lambda alias, - csv_report: add_role_assignment(args.role_assignment, - dataverse_api=dataverse_client.dataverse(alias), - csv_report=csv_report)) - - -def add_role_assignment(role_assignment, dataverse_api: DataverseApi, csv_report, dry_run: bool = False): - assignee = role_assignment.split('=')[0] - role = role_assignment.split('=')[1] - action = "None" - if in_current_assignments(assignee, role, dataverse_api): - print("{} is already {} for dataset {}".format(assignee, role, dataverse_api.get_alias())) - else: - print( - "Adding {} as {} for dataset {}".format(assignee, role, dataverse_api.get_alias())) - dataverse_api.add_role_assignment(assignee, role, dry_run=dry_run) - action = "Added" - csv_report.write( - {'alias': dataverse_api.get_alias(), 'Modified': datetime.now(), 'Assignee': assignee, 'Role': role, - 'Change': action}) - - -def in_current_assignments(assignee, role, dataverse_api: DataverseApi): - current_assignments = dataverse_api.get_role_assignments() - found = False - for current_assignment in current_assignments: - if current_assignment.get('assignee') == assignee and current_assignment.get( - '_roleAlias') == role: - found = True - break - return found - - -def list_role_assignments(args, dataverse_client): - r = dataverse_client.dataverse(args.alias).get_role_assignments() - if r is not None: - rich.print_json(data=r) - - -def remove_role_assignments(args, dataverse_client: DataverseClient): - aliases = get_aliases(args.alias_or_alias_file) - batch_processor = DataverseBatchProcessorWithReport(wait=args.wait, report_file=args.report_file, - headers=['alias', 'Modified', 'Assignee', 'Role', 'Change']) - batch_processor.process_aliases(aliases, - lambda alias, - csv_report: remove_role_assignment(args.role_assignment, - dataverse_api=dataverse_client.dataverse(alias), - csv_report=csv_report)) - - -def remove_role_assignment(role_assignment, dataverse_api: DataverseApi, csv_report, dry_run: bool = False): - assignee = role_assignment.split('=')[0] - role = role_assignment.split('=')[1] - action = "None" - if in_current_assignments(assignee, role, dataverse_api): - print("Removing {} as {} for dataverse {}".format(assignee, role, dataverse_api.get_alias())) - all_assignments = dataverse_api.get_role_assignments() - for assignment in all_assignments: - if assignment.get('assignee') == assignee and assignment.get('_roleAlias') == role: - dataverse_api.remove_role_assignment(assignment.get('id'), dry_run=dry_run) - action = "Removed" - break - else: - print("{} is not {} for dataverse {}".format(assignee, role, dataverse_api.get_alias())) - csv_report.write( - {'alias': dataverse_api.get_alias(), 'Modified': datetime.now(), 'Assignee': assignee, 'Role': role, - 'Change': action}) +from datastation.dataverse.roles import DataverseRole def main(): config = init() dataverse_client = DataverseClient(config['dataverse']) - batch_processor = DataverseBatchProcessorWithReport(headers=['alias', 'Modified', 'Assignee', 'Role', 'Change']) + role_assignment = DataverseRole(dataverse_client) # Create main parser and subparsers parser = argparse.ArgumentParser(description='Manage role assignments on one or more datasets.') @@ -103,7 +26,7 @@ def main(): add_batch_processor_args(parser_add) add_dry_run_arg(parser_add) - parser_add.set_defaults(func=lambda _: add_role_assignments(_, dataverse_client)) + parser_add.set_defaults(func=lambda _: role_assignment.add_role_assignments(_)) # Remove role assignment parser_remove = subparsers.add_parser('remove', help='remove role assignment from specified dataset(s)') @@ -113,14 +36,14 @@ def main(): help='The dataverse alias or the input file with the dataverse aliases') add_batch_processor_args(parser_remove) add_dry_run_arg(parser_remove) - parser_remove.set_defaults(func=lambda _: remove_role_assignments(_, dataverse_client)) + parser_remove.set_defaults(func=lambda _: role_assignment.remove_role_assignments(_)) # List role assignments parser_list = subparsers.add_parser('list', help='list role assignments for specified dataverse (only one alias allowed)') parser_list.add_argument('alias', help='the dataverse alias') add_dry_run_arg(parser_list) - parser_list.set_defaults(func=lambda _: list_role_assignments(_, dataverse_client)) + parser_list.set_defaults(func=lambda _: role_assignment.list_role_assignments(_)) args = parser.parse_args() args.func(args) From 65db8eeb6c3a14e78ef88cfbe3d8187c4a44666c Mon Sep 17 00:00:00 2001 From: paulboon Date: Wed, 14 Feb 2024 11:06:52 +0100 Subject: [PATCH 06/11] Refactoring; removed alias as input paramater for the assigment related members of the dataverse_api --- src/datastation/dataverse/dataverse_api.py | 18 ++++++------------ .../dataverse/permissions_collect.py | 2 +- 2 files changed, 7 insertions(+), 13 deletions(-) diff --git a/src/datastation/dataverse/dataverse_api.py b/src/datastation/dataverse/dataverse_api.py index cc6c7b5..9be5535 100644 --- a/src/datastation/dataverse/dataverse_api.py +++ b/src/datastation/dataverse/dataverse_api.py @@ -35,10 +35,8 @@ def get_contents(self, alias="root", dry_run=False): def get_roles(self, alias="root", dry_run=False): return self.get_resource_data("roles", alias, dry_run) - def get_role_assignments(self, alias="root", dry_run=False): - if self.alias is not None: - alias = self.alias - return self.get_resource_data("assignments", alias, dry_run) + def get_role_assignments(self, dry_run=False): + return self.get_resource_data("assignments", self.alias, dry_run) def get_groups(self, alias="root", dry_run=False): return self.get_resource_data("groups", alias, dry_run) @@ -55,10 +53,8 @@ def get_storage_size(self, alias="root", dry_run=False): r.raise_for_status() return r.json()['data']['message'] - def add_role_assignment(self, assignee, role, alias=None, dry_run=False): - if self.alias is not None: - alias = self.alias - url = f'{self.server_url}/api/dataverses/{alias}/assignments' + def add_role_assignment(self, assignee, role, dry_run=False): + url = f'{self.server_url}/api/dataverses/{self.alias}/assignments' headers = {'X-Dataverse-key': self.api_token, 'Content-type': 'application/json'} role_assignment = {"assignee": assignee, "role": role} if dry_run: @@ -70,10 +66,8 @@ def add_role_assignment(self, assignee, role, alias=None, dry_run=False): r.raise_for_status() return r - def remove_role_assignment(self, assignment_id, alias=None, dry_run=False): - if self.alias is not None: - alias = self.alias - url = f'{self.server_url}/api/dataverses/{alias}/assignments/{assignment_id}' + def remove_role_assignment(self, assignment_id, dry_run=False): + url = f'{self.server_url}/api/dataverses/{self.alias}/assignments/{assignment_id}' headers = {'X-Dataverse-key': self.api_token, 'Content-type': 'application/json'} if dry_run: print_dry_run_message(method='DELETE', url=url, headers=headers) diff --git a/src/datastation/dataverse/permissions_collect.py b/src/datastation/dataverse/permissions_collect.py index f874fd0..ddadf43 100644 --- a/src/datastation/dataverse/permissions_collect.py +++ b/src/datastation/dataverse/permissions_collect.py @@ -61,7 +61,7 @@ def get_role_info(self, alias): return ', '.join(result_list) def get_assignment_info(self, alias): - resp_data = self.dataverse_client.dataverse().get_role_assignments(alias) + resp_data = self.dataverse_client.dataverse(alias).get_role_assignments() # flatten and compact it... no list comprehension though result_list = [] for assignment in resp_data: From 1d4805982600e095ad69305e0a11da41623533e8 Mon Sep 17 00:00:00 2001 From: paulboon Date: Wed, 14 Feb 2024 13:32:57 +0100 Subject: [PATCH 07/11] Refactoring; removed alias as input parameter for all members of the dataverse_api --- src/datastation/dataverse/dataverse_api.py | 24 +++++++++---------- src/datastation/dataverse/metrics_collect.py | 2 +- .../dataverse/permissions_collect.py | 4 ++-- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/src/datastation/dataverse/dataverse_api.py b/src/datastation/dataverse/dataverse_api.py index 9be5535..c5cf940 100644 --- a/src/datastation/dataverse/dataverse_api.py +++ b/src/datastation/dataverse/dataverse_api.py @@ -6,7 +6,7 @@ class DataverseApi: - def __init__(self, server_url, api_token, alias=None): + def __init__(self, server_url, api_token, alias): self.server_url = server_url self.api_token = api_token self.alias = alias # Methods should use this one if specified @@ -15,9 +15,9 @@ def get_alias(self): return self.alias # get json data for a specific dataverses API endpoint using an API token - def get_resource_data(self, resource, alias="root", dry_run=False): + def get_resource_data(self, resource, dry_run=False): headers = {"X-Dataverse-key": self.api_token} - url = f"{self.server_url}/api/dataverses/{alias}/{resource}" + url = f"{self.server_url}/api/dataverses/{self.alias}/{resource}" if dry_run: print_dry_run_message(method="GET", url=url, headers=headers) @@ -29,21 +29,21 @@ def get_resource_data(self, resource, alias="root", dry_run=False): resp_data = dv_resp.json()["data"] return resp_data - def get_contents(self, alias="root", dry_run=False): - return self.get_resource_data("contents", alias, dry_run) + def get_contents(self, dry_run=False): + return self.get_resource_data("contents", dry_run) - def get_roles(self, alias="root", dry_run=False): - return self.get_resource_data("roles", alias, dry_run) + def get_roles(self, dry_run=False): + return self.get_resource_data("roles", dry_run) def get_role_assignments(self, dry_run=False): - return self.get_resource_data("assignments", self.alias, dry_run) + return self.get_resource_data("assignments", dry_run) - def get_groups(self, alias="root", dry_run=False): - return self.get_resource_data("groups", alias, dry_run) + def get_groups(self, dry_run=False): + return self.get_resource_data("groups", dry_run) - def get_storage_size(self, alias="root", dry_run=False): + def get_storage_size(self, dry_run=False): """ Get dataverse storage size (bytes). """ - url = f'{self.server_url}/api/dataverses/{alias}/storagesize' + url = f'{self.server_url}/api/dataverses/{self.alias}/storagesize' headers = {'X-Dataverse-key': self.api_token} if dry_run: print_dry_run_message(method='GET', url=url, headers=headers) diff --git a/src/datastation/dataverse/metrics_collect.py b/src/datastation/dataverse/metrics_collect.py index 1fdb31b..30a9f28 100644 --- a/src/datastation/dataverse/metrics_collect.py +++ b/src/datastation/dataverse/metrics_collect.py @@ -46,7 +46,7 @@ def write_result_row(self, row): def get_result_row(self, parent_alias, child_alias, child_name, depth): logging.info(f'Retrieving size for dataverse: {parent_alias} / {child_alias} ...') - msg = self.dataverse_client.dataverse().get_storage_size(child_alias) + msg = self.dataverse_client.dataverse(child_alias).get_storage_size() storage_size = extract_size_str(msg) logging.info(f'size: {storage_size}') row = {'depth': depth, 'parentalias': parent_alias, 'alias': child_alias, 'name': child_name, diff --git a/src/datastation/dataverse/permissions_collect.py b/src/datastation/dataverse/permissions_collect.py index ddadf43..a1e7d73 100644 --- a/src/datastation/dataverse/permissions_collect.py +++ b/src/datastation/dataverse/permissions_collect.py @@ -43,7 +43,7 @@ def get_result_row(self, parent_alias, child_alias, child_name, id, vpath, depth return row def get_group_info(self, alias): - resp_data = self.dataverse_client.dataverse().get_groups(alias) + resp_data = self.dataverse_client.dataverse(alias).get_groups() # flatten and compact it... no list comprehension though result_list = [] for group in resp_data: @@ -52,7 +52,7 @@ def get_group_info(self, alias): return ', '.join(result_list) def get_role_info(self, alias): - resp_data = self.dataverse_client.dataverse().get_roles(alias) + resp_data = self.dataverse_client.dataverse(alias).get_roles() # flatten and compact it... no list comprehension though result_list = [] for role in resp_data: From 5075e94bd229fbb7c5622b988d02e5dce3095237 Mon Sep 17 00:00:00 2001 From: paulboon Date: Thu, 15 Feb 2024 11:45:55 +0100 Subject: [PATCH 08/11] Refactoring; moved and renamed new BatchProcessor out of the way --- src/datastation/common/batch_processing.py | 126 ++++------------ .../common/common_batch_processing.py | 139 ++++++++++++++++++ src/datastation/dataverse/roles.py | 2 +- src/datastation/dv_dataset_delete_draft.py | 6 +- src/datastation/dv_dataset_destroy.py | 6 +- ...v_dataset_destroy_migration_placeholder.py | 4 +- src/datastation/dv_dataset_get_attributes.py | 4 +- .../dv_dataset_get_metadata_export.py | 4 +- src/datastation/dv_dataset_publish.py | 4 +- src/datastation/dv_dataset_reindex.py | 4 +- .../dv_dataset_reingest_tabular.py | 4 +- src/datastation/dv_dataset_role_assignment.py | 8 +- src/datastation/dv_dataset_update_datacite.py | 4 +- .../dv_dataverse_role_assignment.py | 1 - src/tests/test_batch_processing.py | 8 +- 15 files changed, 200 insertions(+), 124 deletions(-) create mode 100644 src/datastation/common/common_batch_processing.py diff --git a/src/datastation/common/batch_processing.py b/src/datastation/common/batch_processing.py index d9838ca..9b6f07b 100644 --- a/src/datastation/common/batch_processing.py +++ b/src/datastation/common/batch_processing.py @@ -3,52 +3,6 @@ import time from datastation.common.csv import CsvReport -from datastation.common.utils import plural - - -# Base class for batch processing of items -class BatchProcessor: - def __init__(self, item_name="item", wait=0.1, fail_on_first_error=True): - self.item_name = item_name - self.wait = wait - self.fail_on_first_error = fail_on_first_error - - def process_items(self, items, callback): - if type(items) is list: - num_items = len(items) - logging.info(f"Start batch processing on {num_items} {plural(self.item_name)}") - else: - logging.info(f"Start batch processing on unknown number of {plural(self.item_name)}") - num_items = -1 - i = 0 - for item in items: - i += 1 - try: - if self.wait > 0 and i > 1: - logging.debug(f"Waiting {self.wait} seconds before processing next {self.item_name}") - time.sleep(self.wait) - logging.info(f"Processing {i} of {num_items}: {item}") - callback(item) - except Exception as e: - logging.exception("Exception occurred", exc_info=True) - if self.fail_on_first_error: - logging.error(f"Stop processing because of an exception: {e}") - break - logging.debug("fail_on_first_error is False, continuing...") - - -def get_provided_items_iterator(item_or_items_file, item_name="item"): - if item_or_items_file is None: - logging.debug(f"No {plural(item_name)} provided.") - return None - elif os.path.isfile(os.path.expanduser(item_or_items_file)): - items = [] - with open(os.path.expanduser(item_or_items_file)) as f: - for line in f: - items.append(line.strip()) - return items - else: - return [item_or_items_file] def get_pids(pid_or_pids_file, search_api=None, query="*", subtree="root", object_type="dataset", dry_run=False): @@ -70,39 +24,46 @@ def get_pids(pid_or_pids_file, search_api=None, query="*", subtree="root", objec if pid_or_pids_file is None: result = search_api.search(query=query, subtree=subtree, object_type=object_type, dry_run=dry_run) return map(lambda rec: rec['global_id'], result) + elif os.path.isfile(os.path.expanduser(pid_or_pids_file)): + pids = [] + with open(os.path.expanduser(pid_or_pids_file)) as f: + for line in f: + pids.append(line.strip()) + return pids else: - return get_provided_items_iterator(pid_or_pids_file, "pid") - - -def get_aliases(alias_or_aliases_file, dry_run=False): - """ - - Args: - alias_or_aliases_file: The dataverse alias, or a file with a list of aliases. - dry_run: Do not perform the action, but show what would be done. - Only applicable if pid_or_pids_file is None. - - Returns: an iterator with aliases - """ - if alias_or_aliases_file is None: - # The tree of all (published) dataverses could be retrieved and aliases could recursively be extracted - # from the tree, but this is not implemented yet. - logging.warning(f"No aliases provided, nothing to do.") - return None - else: - return get_provided_items_iterator(alias_or_aliases_file, "alias") - + return [pid_or_pids_file] -class DatasetBatchProcessor(BatchProcessor): +class BatchProcessor: def __init__(self, wait=0.1, fail_on_first_error=True): - super().__init__("pid", wait, fail_on_first_error) + self.wait = wait + self.fail_on_first_error = fail_on_first_error def process_pids(self, pids, callback): - super().process_items(pids, callback) + if type(pids) is list: + num_pids = len(pids) + logging.info(f"Start batch processing on {num_pids} pids") + else: + logging.info(f"Start batch processing on unknown number of pids") + num_pids = -1 + i = 0 + for pid in pids: + i += 1 + try: + if self.wait > 0 and i > 1: + logging.debug(f"Waiting {self.wait} seconds before processing next pid") + time.sleep(self.wait) + logging.info(f"Processing {i} of {num_pids}: {pid}") + callback(pid) + except Exception as e: + logging.exception("Exception occurred", exc_info=True) + if self.fail_on_first_error: + logging.error(f"Stop processing because of an exception: {e}") + break + logging.debug("fail_on_first_error is False, continuing...") -class DatasetBatchProcessorWithReport(DatasetBatchProcessor): +class BatchProcessorWithReport(BatchProcessor): def __init__(self, report_file=None, headers=None, wait=0.1, fail_on_first_error=True): super().__init__(wait, fail_on_first_error) @@ -114,26 +75,3 @@ def __init__(self, report_file=None, headers=None, wait=0.1, fail_on_first_error def process_pids(self, pids, callback): with CsvReport(os.path.expanduser(self.report_file), self.headers) as csv_report: super().process_pids(pids, lambda pid: callback(pid, csv_report)) - - -class DataverseBatchProcessor(BatchProcessor): - - def __init__(self, wait=0.1, fail_on_first_error=True): - super().__init__("alias", wait, fail_on_first_error) - - def process_aliases(self, aliases, callback): - super().process_items(aliases, callback) - - -class DataverseBatchProcessorWithReport(DataverseBatchProcessor): - - def __init__(self, report_file=None, headers=None, wait=0.1, fail_on_first_error=True): - super().__init__(wait, fail_on_first_error) - if headers is None: - headers = ["alias", "Modified", "Change"] - self.report_file = report_file - self.headers = headers - - def process_aliases(self, aliases, callback): - with CsvReport(os.path.expanduser(self.report_file), self.headers) as csv_report: - super().process_aliases(aliases, lambda alias: callback(alias, csv_report)) diff --git a/src/datastation/common/common_batch_processing.py b/src/datastation/common/common_batch_processing.py new file mode 100644 index 0000000..7954c3f --- /dev/null +++ b/src/datastation/common/common_batch_processing.py @@ -0,0 +1,139 @@ +import logging +import os +import time + +from datastation.common.csv import CsvReport +from datastation.common.utils import plural + + +# Base class for batch processing of items +class CommonBatchProcessor: + def __init__(self, item_name="item", wait=0.1, fail_on_first_error=True): + self.item_name = item_name + self.wait = wait + self.fail_on_first_error = fail_on_first_error + + def process_items(self, items, callback): + if type(items) is list: + num_items = len(items) + logging.info(f"Start batch processing on {num_items} {plural(self.item_name)}") + else: + logging.info(f"Start batch processing on unknown number of {plural(self.item_name)}") + num_items = -1 + i = 0 + for item in items: + i += 1 + try: + if self.wait > 0 and i > 1: + logging.debug(f"Waiting {self.wait} seconds before processing next {self.item_name}") + time.sleep(self.wait) + logging.info(f"Processing {i} of {num_items}: {item}") + callback(item) + except Exception as e: + logging.exception("Exception occurred", exc_info=True) + if self.fail_on_first_error: + logging.error(f"Stop processing because of an exception: {e}") + break + logging.debug("fail_on_first_error is False, continuing...") + + +def get_provided_items_iterator(item_or_items_file, item_name="item"): + if item_or_items_file is None: + logging.debug(f"No {plural(item_name)} provided.") + return None + elif os.path.isfile(os.path.expanduser(item_or_items_file)): + items = [] + with open(os.path.expanduser(item_or_items_file)) as f: + for line in f: + items.append(line.strip()) + return items + else: + return [item_or_items_file] + + +def get_pids(pid_or_pids_file, search_api=None, query="*", subtree="root", object_type="dataset", dry_run=False): + """ + + Args: + pid_or_pids_file: The dataset pid, or a file with a list of pids. + search_api: must be provided if pid_or_pids_file is None + query: passed on to search_api().search + object_type: passed on to search_api().search + subtree (object): passed on to search_api().search + dry_run: Do not perform the action, but show what would be done. + Only applicable if pid_or_pids_file is None. + + Returns: an iterator with pids, + if pid_or_pids_file is not provided, it searches for all datasets + and extracts their pids, fetching the result pages lazy. + """ + if pid_or_pids_file is None: + result = search_api.search(query=query, subtree=subtree, object_type=object_type, dry_run=dry_run) + return map(lambda rec: rec['global_id'], result) + else: + return get_provided_items_iterator(pid_or_pids_file, "pid") + + +def get_aliases(alias_or_aliases_file, dry_run=False): + """ + + Args: + alias_or_aliases_file: The dataverse alias, or a file with a list of aliases. + dry_run: Do not perform the action, but show what would be done. + Only applicable if pid_or_pids_file is None. + + Returns: an iterator with aliases + """ + if alias_or_aliases_file is None: + # The tree of all (published) dataverses could be retrieved and aliases could recursively be extracted + # from the tree, but this is not implemented yet. + logging.warning(f"No aliases provided, nothing to do.") + return None + else: + return get_provided_items_iterator(alias_or_aliases_file, "alias") + + +class DatasetBatchProcessor(CommonBatchProcessor): + + def __init__(self, wait=0.1, fail_on_first_error=True): + super().__init__("pid", wait, fail_on_first_error) + + def process_pids(self, pids, callback): + super().process_items(pids, callback) + + +class DatasetBatchProcessorWithReport(DatasetBatchProcessor): + + def __init__(self, report_file=None, headers=None, wait=0.1, fail_on_first_error=True): + super().__init__(wait, fail_on_first_error) + if headers is None: + headers = ["DOI", "Modified", "Change"] + self.report_file = report_file + self.headers = headers + + def process_pids(self, pids, callback): + with CsvReport(os.path.expanduser(self.report_file), self.headers) as csv_report: + super().process_pids(pids, lambda pid: callback(pid, csv_report)) + + +class DataverseBatchProcessor(CommonBatchProcessor): + + def __init__(self, wait=0.1, fail_on_first_error=True): + super().__init__("alias", wait, fail_on_first_error) + + def process_aliases(self, aliases, callback): + super().process_items(aliases, callback) + + +class DataverseBatchProcessorWithReport(DataverseBatchProcessor): + + def __init__(self, report_file=None, headers=None, wait=0.1, fail_on_first_error=True): + super().__init__(wait, fail_on_first_error) + if headers is None: + headers = ["alias", "Modified", "Change"] + self.report_file = report_file + self.headers = headers + + def process_aliases(self, aliases, callback): + with CsvReport(os.path.expanduser(self.report_file), self.headers) as csv_report: + super().process_aliases(aliases, lambda alias: callback(alias, csv_report)) diff --git a/src/datastation/dataverse/roles.py b/src/datastation/dataverse/roles.py index 9835e96..c28420f 100644 --- a/src/datastation/dataverse/roles.py +++ b/src/datastation/dataverse/roles.py @@ -1,7 +1,7 @@ import rich from datetime import datetime -from datastation.common.batch_processing import DataverseBatchProcessorWithReport, get_aliases +from datastation.common.common_batch_processing import DataverseBatchProcessorWithReport, get_aliases from datastation.dataverse.dataverse_api import DataverseApi from datastation.dataverse.dataverse_client import DataverseClient diff --git a/src/datastation/dv_dataset_delete_draft.py b/src/datastation/dv_dataset_delete_draft.py index ea4812a..50b656c 100644 --- a/src/datastation/dv_dataset_delete_draft.py +++ b/src/datastation/dv_dataset_delete_draft.py @@ -1,13 +1,13 @@ import argparse from datetime import datetime -from datastation.common.batch_processing import DatasetBatchProcessor, get_pids, DatasetBatchProcessorWithReport +from datastation.common.batch_processing import BatchProcessor, get_pids, BatchProcessorWithReport from datastation.common.config import init from datastation.common.utils import add_batch_processor_args, add_dry_run_arg from datastation.dataverse.dataverse_client import DataverseClient -def delete_dataset_drafts(args, dataverse_client: DataverseClient, batch_processor: DatasetBatchProcessor): +def delete_dataset_drafts(args, dataverse_client: DataverseClient, batch_processor: BatchProcessor): pids = get_pids(args.pid_or_pid_file) batch_processor.process_pids(pids, lambda pid, csv_report: delete_dataset_draft(pid, @@ -37,7 +37,7 @@ def main(): args = parser.parse_args() dataverse_client = DataverseClient(config['dataverse']) - batch_processor = DatasetBatchProcessorWithReport(wait=args.wait, report_file=args.report_file) + batch_processor = BatchProcessorWithReport(wait=args.wait, report_file=args.report_file) delete_dataset_drafts(args, dataverse_client, batch_processor) diff --git a/src/datastation/dv_dataset_destroy.py b/src/datastation/dv_dataset_destroy.py index f7fb241..85a085a 100644 --- a/src/datastation/dv_dataset_destroy.py +++ b/src/datastation/dv_dataset_destroy.py @@ -1,13 +1,13 @@ import argparse from datetime import datetime -from datastation.common.batch_processing import DatasetBatchProcessor, get_pids, DatasetBatchProcessorWithReport +from datastation.common.batch_processing import BatchProcessor, get_pids, BatchProcessorWithReport from datastation.common.config import init from datastation.common.utils import add_batch_processor_args, add_dry_run_arg from datastation.dataverse.dataverse_client import DataverseClient -def destroy_datasets(args, dataverse_client: DataverseClient, batch_processor: DatasetBatchProcessor, dry_run: bool): +def destroy_datasets(args, dataverse_client: DataverseClient, batch_processor: BatchProcessor, dry_run: bool): pids = get_pids(args.pid_or_pid_file) batch_processor.process_pids(pids, lambda pid, csv_report: destroy_dataset(pid, dataset_api=dataverse_client.dataset( @@ -35,7 +35,7 @@ def main(): args = parser.parse_args() dataverse_client = DataverseClient(config['dataverse']) - batch_processor = DatasetBatchProcessorWithReport(wait=args.wait, report_file=args.report_file) + batch_processor = BatchProcessorWithReport(wait=args.wait, report_file=args.report_file) destroy_datasets(args, dataverse_client, batch_processor, dry_run=args.dry_run) diff --git a/src/datastation/dv_dataset_destroy_migration_placeholder.py b/src/datastation/dv_dataset_destroy_migration_placeholder.py index 59d6bff..3fa385c 100644 --- a/src/datastation/dv_dataset_destroy_migration_placeholder.py +++ b/src/datastation/dv_dataset_destroy_migration_placeholder.py @@ -1,6 +1,6 @@ import argparse -from datastation.common.batch_processing import get_pids, DatasetBatchProcessorWithReport +from datastation.common.batch_processing import get_pids, BatchProcessorWithReport from datastation.common.config import init from datastation.common.utils import add_batch_processor_args, add_dry_run_arg from datastation.dataverse.dataverse_client import DataverseClient @@ -21,7 +21,7 @@ def main(): add_dry_run_arg(parser) args = parser.parse_args() - batch_processor = DatasetBatchProcessorWithReport(wait=args.wait, report_file=args.report_file, + batch_processor = BatchProcessorWithReport(wait=args.wait, report_file=args.report_file, headers=['PID', 'Destroyed', 'Messages']) pids = get_pids(args.pid_or_pids_file) description_text_pattern = config['migration_placeholders']['description_text_pattern'] diff --git a/src/datastation/dv_dataset_get_attributes.py b/src/datastation/dv_dataset_get_attributes.py index 939807e..de6fe2c 100644 --- a/src/datastation/dv_dataset_get_attributes.py +++ b/src/datastation/dv_dataset_get_attributes.py @@ -1,7 +1,7 @@ import argparse import json -from datastation.common.batch_processing import get_pids, DatasetBatchProcessor, DatasetBatchProcessorWithReport +from datastation.common.batch_processing import get_pids, BatchProcessor, BatchProcessorWithReport from datastation.common.config import init from datastation.common.utils import add_batch_processor_args, add_dry_run_arg from datastation.dataverse.datasets import Datasets @@ -40,7 +40,7 @@ def main(): dataverse_client = DataverseClient(config["dataverse"]) datasets = Datasets(dataverse_client, dry_run=args.dry_run) - DatasetBatchProcessor(wait=args.wait, fail_on_first_error=args.fail_fast).process_pids( + BatchProcessor(wait=args.wait, fail_on_first_error=args.fail_fast).process_pids( get_pids(args.pid_or_pids_file, dataverse_client.search_api(), dry_run=args.dry_run), lambda pid: print(json.dumps(datasets.get_dataset_attributes(pid, **attribute_options), skipkeys=True))) diff --git a/src/datastation/dv_dataset_get_metadata_export.py b/src/datastation/dv_dataset_get_metadata_export.py index 1b99893..050df93 100644 --- a/src/datastation/dv_dataset_get_metadata_export.py +++ b/src/datastation/dv_dataset_get_metadata_export.py @@ -1,7 +1,7 @@ import argparse import os -from datastation.common.batch_processing import DatasetBatchProcessor, get_pids +from datastation.common.batch_processing import BatchProcessor, get_pids from datastation.common.config import init from datastation.common.utils import add_batch_processor_args, add_dry_run_arg from datastation.dataverse.dataverse_client import DataverseClient @@ -53,7 +53,7 @@ def main(): add_dry_run_arg(parser) args = parser.parse_args() - batch_processor = DatasetBatchProcessor(wait=args.wait, fail_on_first_error=args.fail_fast) + batch_processor = BatchProcessor(wait=args.wait, fail_on_first_error=args.fail_fast) pids = get_pids(args.pid_or_pids_file) batch_processor.process_pids(pids, callback=lambda pid: get_metadata_export(args, pid, dataverse)) diff --git a/src/datastation/dv_dataset_publish.py b/src/datastation/dv_dataset_publish.py index bbdbdea..98d56fe 100644 --- a/src/datastation/dv_dataset_publish.py +++ b/src/datastation/dv_dataset_publish.py @@ -2,7 +2,7 @@ import json from datetime import datetime -from datastation.common.batch_processing import DatasetBatchProcessorWithReport, get_pids +from datastation.common.batch_processing import BatchProcessorWithReport, get_pids from datastation.common.config import init from datastation.common.utils import add_batch_processor_args, add_dry_run_arg from datastation.dataverse.dataverse_client import DataverseClient @@ -10,7 +10,7 @@ def publish_datasets(args, dataverse_client: DataverseClient): pids = get_pids(args.pid_or_pid_file) - batch_processor = DatasetBatchProcessorWithReport(report_file=args.report_file, wait=args.wait, + batch_processor = BatchProcessorWithReport(report_file=args.report_file, wait=args.wait, fail_on_first_error=args.fail_fast, headers=['DOI', 'Modified', 'Change', 'Messages']) batch_processor.process_pids(pids, diff --git a/src/datastation/dv_dataset_reindex.py b/src/datastation/dv_dataset_reindex.py index bfaee4a..63a4bea 100644 --- a/src/datastation/dv_dataset_reindex.py +++ b/src/datastation/dv_dataset_reindex.py @@ -3,7 +3,7 @@ from requests import HTTPError -from datastation.common.batch_processing import get_pids, DatasetBatchProcessorWithReport +from datastation.common.batch_processing import get_pids, BatchProcessorWithReport from datastation.common.config import init from datastation.common.csv import CsvReport from datastation.common.utils import add_batch_processor_args, add_dry_run_arg @@ -12,7 +12,7 @@ def reindex_datasets(args, dataverse_client: DataverseClient): pids = get_pids(args.pid_or_pid_file) - batch_processor = DatasetBatchProcessorWithReport(wait=args.wait, fail_on_first_error=args.fail_fast, + batch_processor = BatchProcessorWithReport(wait=args.wait, fail_on_first_error=args.fail_fast, report_file=args.report_file, headers=["PID", "Status", "Message"]) batch_processor.process_pids(pids, lambda pid, csv_report: reindex_dataset(pid, dataverse_client, csv_report=csv_report, diff --git a/src/datastation/dv_dataset_reingest_tabular.py b/src/datastation/dv_dataset_reingest_tabular.py index 628463b..80dd1c4 100644 --- a/src/datastation/dv_dataset_reingest_tabular.py +++ b/src/datastation/dv_dataset_reingest_tabular.py @@ -4,7 +4,7 @@ import requests -from datastation.common.batch_processing import get_pids, DatasetBatchProcessorWithReport +from datastation.common.batch_processing import get_pids, BatchProcessorWithReport from datastation.common.config import init from datastation.common.utils import add_batch_processor_args, add_dry_run_arg from datastation.dataverse.dataverse_client import DataverseClient @@ -12,7 +12,7 @@ def reingest_tabular_files_in_datasets(args, dataverse_client: DataverseClient): pids = get_pids(args.pid_or_pid_file) - batch_processor = DatasetBatchProcessorWithReport(report_file=args.report_file, wait=args.wait, + batch_processor = BatchProcessorWithReport(report_file=args.report_file, wait=args.wait, fail_on_first_error=args.fail_fast, headers=['DOI', 'Modified', 'Change', 'Messages']) batch_processor.process_pids(pids, diff --git a/src/datastation/dv_dataset_role_assignment.py b/src/datastation/dv_dataset_role_assignment.py index 3ddec80..cf960da 100644 --- a/src/datastation/dv_dataset_role_assignment.py +++ b/src/datastation/dv_dataset_role_assignment.py @@ -3,7 +3,7 @@ import rich -from datastation.common.batch_processing import get_pids, DatasetBatchProcessorWithReport +from datastation.common.batch_processing import get_pids, BatchProcessorWithReport from datastation.common.config import init from datastation.common.utils import add_batch_processor_args, add_dry_run_arg from datastation.dataverse.dataset_api import DatasetApi @@ -12,7 +12,7 @@ def add_role_assignments(args, dataverse_client: DataverseClient): pids = get_pids(args.pid_or_pid_file) - batch_processor = DatasetBatchProcessorWithReport(wait=args.wait, fail_on_first_error=args.fail_fast, + batch_processor = BatchProcessorWithReport(wait=args.wait, fail_on_first_error=args.fail_fast, report_file=args.report_file, headers=['DOI', 'Modified', 'Assignee', 'Role', 'Change']) batch_processor.process_pids(pids, @@ -56,7 +56,7 @@ def list_role_assignments(args, dataverse_client): def remove_role_assignments(args, dataverse_client: DataverseClient): pids = get_pids(args.pid_or_pid_file) - batch_processor = DatasetBatchProcessorWithReport(wait=args.wait, report_file=args.report_file, + batch_processor = BatchProcessorWithReport(wait=args.wait, report_file=args.report_file, headers=['DOI', 'Modified', 'Assignee', 'Role', 'Change']) batch_processor.process_pids(pids, lambda pid, csv_report: remove_role_assignment(args.role_assignment, @@ -87,7 +87,7 @@ def remove_role_assignment(role_assignment, dataset_api: DatasetApi, csv_report, def main(): config = init() dataverse_client = DataverseClient(config['dataverse']) - batch_processor = DatasetBatchProcessorWithReport(headers=['DOI', 'Modified', 'Assignee', 'Role', 'Change']) + batch_processor = BatchProcessorWithReport(headers=['DOI', 'Modified', 'Assignee', 'Role', 'Change']) # Create main parser and subparsers parser = argparse.ArgumentParser(description='Manage role assignments on one or more datasets.') diff --git a/src/datastation/dv_dataset_update_datacite.py b/src/datastation/dv_dataset_update_datacite.py index 809a6b9..25be017 100644 --- a/src/datastation/dv_dataset_update_datacite.py +++ b/src/datastation/dv_dataset_update_datacite.py @@ -3,7 +3,7 @@ from requests import HTTPError -from datastation.common.batch_processing import get_pids, DatasetBatchProcessorWithReport +from datastation.common.batch_processing import get_pids, BatchProcessorWithReport from datastation.common.config import init from datastation.common.csv import CsvReport from datastation.common.utils import add_batch_processor_args, add_dry_run_arg @@ -12,7 +12,7 @@ def update_datacite_records(args, dataverse_client: DataverseClient): pids = get_pids(args.pid_or_pids_file) - batch_processor = DatasetBatchProcessorWithReport(wait=args.wait, fail_on_first_error=args.fail_fast, + batch_processor = BatchProcessorWithReport(wait=args.wait, fail_on_first_error=args.fail_fast, report_file=args.report_file, headers=["PID", "Status", "Message"]) batch_processor.process_pids(pids, lambda pid, csv_report: update_datacite_record(pid, dataverse_client, diff --git a/src/datastation/dv_dataverse_role_assignment.py b/src/datastation/dv_dataverse_role_assignment.py index 9be8b21..ae2df55 100644 --- a/src/datastation/dv_dataverse_role_assignment.py +++ b/src/datastation/dv_dataverse_role_assignment.py @@ -1,7 +1,6 @@ import argparse -from datastation.common.batch_processing import DataverseBatchProcessorWithReport, get_aliases from datastation.common.config import init from datastation.common.utils import add_batch_processor_args, add_dry_run_arg from datastation.dataverse.dataverse_client import DataverseClient diff --git a/src/tests/test_batch_processing.py b/src/tests/test_batch_processing.py index bcd3791..2dd3759 100644 --- a/src/tests/test_batch_processing.py +++ b/src/tests/test_batch_processing.py @@ -2,13 +2,13 @@ import time from datetime import datetime -from datastation.common.batch_processing import DatasetBatchProcessor, get_pids +from datastation.common.batch_processing import BatchProcessor, get_pids class TestBatchProcessor: def test_process_pids(self, capsys): - batch_processor = DatasetBatchProcessor() + batch_processor = BatchProcessor() pids = ["1", "2", "3"] callback = lambda pid: print(pid) batch_processor.process_pids(pids, callback) @@ -16,7 +16,7 @@ def test_process_pids(self, capsys): assert captured.out == "1\n2\n3\n" def test_process_pids_with_wait_on_iterator(self, capsys): - batch_processor = DatasetBatchProcessor(wait=0.1) + batch_processor = BatchProcessor(wait=0.1) def as_is(rec): time.sleep(0.1) @@ -37,7 +37,7 @@ def as_is(rec): time.sleep(0.1) print(f"lazy-{rec}") return rec - batch_processor = DatasetBatchProcessor(wait=0.1) + batch_processor = BatchProcessor(wait=0.1) pids = [as_is(rec) for rec in ["1", "2", "3"]] callback = lambda pid: print(pid) start_time = datetime.now() From a93934605e440b8c04ce5ae18685c2a060f891ab Mon Sep 17 00:00:00 2001 From: paulboon Date: Thu, 15 Feb 2024 12:41:19 +0100 Subject: [PATCH 09/11] Cleanup; removed whitespace --- src/datastation/dv_dataset_destroy_migration_placeholder.py | 5 ++--- src/datastation/dv_dataset_publish.py | 4 ++-- src/datastation/dv_dataset_reindex.py | 2 +- src/datastation/dv_dataset_reingest_tabular.py | 4 ++-- src/datastation/dv_dataset_role_assignment.py | 6 +++--- src/datastation/dv_dataset_update_datacite.py | 2 +- 6 files changed, 11 insertions(+), 12 deletions(-) diff --git a/src/datastation/dv_dataset_destroy_migration_placeholder.py b/src/datastation/dv_dataset_destroy_migration_placeholder.py index 3fa385c..1121e6b 100644 --- a/src/datastation/dv_dataset_destroy_migration_placeholder.py +++ b/src/datastation/dv_dataset_destroy_migration_placeholder.py @@ -22,14 +22,13 @@ def main(): args = parser.parse_args() batch_processor = BatchProcessorWithReport(wait=args.wait, report_file=args.report_file, - headers=['PID', 'Destroyed', 'Messages']) + headers=['PID', 'Destroyed', 'Messages']) pids = get_pids(args.pid_or_pids_file) description_text_pattern = config['migration_placeholders']['description_text_pattern'] batch_processor.process_pids(pids, callback=lambda pid, csv_report: destroy_placeholder_dataset(dataverse.dataset(pid), description_text_pattern, - csv_report, - dry_run=args.dry_run)) + csv_report)) if __name__ == '__main__': diff --git a/src/datastation/dv_dataset_publish.py b/src/datastation/dv_dataset_publish.py index 98d56fe..8392db1 100644 --- a/src/datastation/dv_dataset_publish.py +++ b/src/datastation/dv_dataset_publish.py @@ -11,8 +11,8 @@ def publish_datasets(args, dataverse_client: DataverseClient): pids = get_pids(args.pid_or_pid_file) batch_processor = BatchProcessorWithReport(report_file=args.report_file, wait=args.wait, - fail_on_first_error=args.fail_fast, - headers=['DOI', 'Modified', 'Change', 'Messages']) + fail_on_first_error=args.fail_fast, + headers=['DOI', 'Modified', 'Change', 'Messages']) batch_processor.process_pids(pids, lambda pid, csv_report: publish(pid, dataverse_client, update_type=args.update_type, diff --git a/src/datastation/dv_dataset_reindex.py b/src/datastation/dv_dataset_reindex.py index 63a4bea..6d970da 100644 --- a/src/datastation/dv_dataset_reindex.py +++ b/src/datastation/dv_dataset_reindex.py @@ -13,7 +13,7 @@ def reindex_datasets(args, dataverse_client: DataverseClient): pids = get_pids(args.pid_or_pid_file) batch_processor = BatchProcessorWithReport(wait=args.wait, fail_on_first_error=args.fail_fast, - report_file=args.report_file, headers=["PID", "Status", "Message"]) + report_file=args.report_file, headers=["PID", "Status", "Message"]) batch_processor.process_pids(pids, lambda pid, csv_report: reindex_dataset(pid, dataverse_client, csv_report=csv_report, dry_run=args.dry_run)) diff --git a/src/datastation/dv_dataset_reingest_tabular.py b/src/datastation/dv_dataset_reingest_tabular.py index 80dd1c4..a958204 100644 --- a/src/datastation/dv_dataset_reingest_tabular.py +++ b/src/datastation/dv_dataset_reingest_tabular.py @@ -13,8 +13,8 @@ def reingest_tabular_files_in_datasets(args, dataverse_client: DataverseClient): pids = get_pids(args.pid_or_pid_file) batch_processor = BatchProcessorWithReport(report_file=args.report_file, wait=args.wait, - fail_on_first_error=args.fail_fast, - headers=['DOI', 'Modified', 'Change', 'Messages']) + fail_on_first_error=args.fail_fast, + headers=['DOI', 'Modified', 'Change', 'Messages']) batch_processor.process_pids(pids, lambda pid, csv_report: reingest_tabular_files_in_dataset(pid, dataverse_client, csv_report=csv_report, diff --git a/src/datastation/dv_dataset_role_assignment.py b/src/datastation/dv_dataset_role_assignment.py index cf960da..2f847c0 100644 --- a/src/datastation/dv_dataset_role_assignment.py +++ b/src/datastation/dv_dataset_role_assignment.py @@ -13,8 +13,8 @@ def add_role_assignments(args, dataverse_client: DataverseClient): pids = get_pids(args.pid_or_pid_file) batch_processor = BatchProcessorWithReport(wait=args.wait, fail_on_first_error=args.fail_fast, - report_file=args.report_file, - headers=['DOI', 'Modified', 'Assignee', 'Role', 'Change']) + report_file=args.report_file, + headers=['DOI', 'Modified', 'Assignee', 'Role', 'Change']) batch_processor.process_pids(pids, lambda pid, csv_report: add_role_assignment(args.role_assignment, dataset_api=dataverse_client.dataset(pid), @@ -57,7 +57,7 @@ def list_role_assignments(args, dataverse_client): def remove_role_assignments(args, dataverse_client: DataverseClient): pids = get_pids(args.pid_or_pid_file) batch_processor = BatchProcessorWithReport(wait=args.wait, report_file=args.report_file, - headers=['DOI', 'Modified', 'Assignee', 'Role', 'Change']) + headers=['DOI', 'Modified', 'Assignee', 'Role', 'Change']) batch_processor.process_pids(pids, lambda pid, csv_report: remove_role_assignment(args.role_assignment, dataset_api=dataverse_client.dataset( diff --git a/src/datastation/dv_dataset_update_datacite.py b/src/datastation/dv_dataset_update_datacite.py index 25be017..60be081 100644 --- a/src/datastation/dv_dataset_update_datacite.py +++ b/src/datastation/dv_dataset_update_datacite.py @@ -13,7 +13,7 @@ def update_datacite_records(args, dataverse_client: DataverseClient): pids = get_pids(args.pid_or_pids_file) batch_processor = BatchProcessorWithReport(wait=args.wait, fail_on_first_error=args.fail_fast, - report_file=args.report_file, headers=["PID", "Status", "Message"]) + report_file=args.report_file, headers=["PID", "Status", "Message"]) batch_processor.process_pids(pids, lambda pid, csv_report: update_datacite_record(pid, dataverse_client, csv_report=csv_report, From 328e872cc7cff4e153cf24b0bc3ff1e5d96c5381 Mon Sep 17 00:00:00 2001 From: paulboon Date: Thu, 15 Feb 2024 13:45:09 +0100 Subject: [PATCH 10/11] Refactoring; moved part of assignment code from roles class back to commandline interface --- src/datastation/dataverse/roles.py | 30 +------------ .../dv_dataverse_role_assignment.py | 44 ++++++++++++++++--- 2 files changed, 41 insertions(+), 33 deletions(-) diff --git a/src/datastation/dataverse/roles.py b/src/datastation/dataverse/roles.py index c28420f..6fa482c 100644 --- a/src/datastation/dataverse/roles.py +++ b/src/datastation/dataverse/roles.py @@ -12,25 +12,11 @@ def __init__(self, dataverse_client: DataverseClient, dry_run: bool = False): self.dataverse_client = dataverse_client self.dry_run = dry_run - def list_role_assignments(self, args): - r = self.dataverse_client.dataverse(args.alias).get_role_assignments() + def list_role_assignments(self, alias): + r = self.dataverse_client.dataverse(alias).get_role_assignments() if r is not None: rich.print_json(data=r) - def add_role_assignments(self, args): - aliases = get_aliases(args.alias_or_alias_file) - batch_processor = DataverseBatchProcessorWithReport(wait=args.wait, fail_on_first_error=args.fail_fast, - report_file=args.report_file, - headers=['alias', 'Modified', 'Assignee', 'Role', 'Change']) - batch_processor.process_aliases(aliases, - lambda alias, - csv_report: self.add_role_assignment(args.role_assignment, - dataverse_api= - self.dataverse_client.dataverse( - alias), - csv_report=csv_report, - dry_run=args.dry_run)) - def add_role_assignment(self, role_assignment, dataverse_api: DataverseApi, csv_report, dry_run: bool = False): assignee = role_assignment.split('=')[0] role = role_assignment.split('=')[1] @@ -56,18 +42,6 @@ def in_current_assignments(self, assignee, role, dataverse_api: DataverseApi): break return found - def remove_role_assignments(self, args): - aliases = get_aliases(args.alias_or_alias_file) - batch_processor = DataverseBatchProcessorWithReport(wait=args.wait, report_file=args.report_file, - headers=['alias', 'Modified', 'Assignee', 'Role', 'Change']) - batch_processor.process_aliases(aliases, - lambda alias, - csv_report: self.remove_role_assignment(args.role_assignment, - dataverse_api= - self.dataverse_client.dataverse( - alias), - csv_report=csv_report, - dry_run=args.dry_run)) def remove_role_assignment(self, role_assignment, dataverse_api: DataverseApi, csv_report, dry_run: bool = False): assignee = role_assignment.split('=')[0] diff --git a/src/datastation/dv_dataverse_role_assignment.py b/src/datastation/dv_dataverse_role_assignment.py index ae2df55..5f6166b 100644 --- a/src/datastation/dv_dataverse_role_assignment.py +++ b/src/datastation/dv_dataverse_role_assignment.py @@ -1,16 +1,50 @@ import argparse - +from datastation.common.common_batch_processing import get_aliases, DataverseBatchProcessorWithReport from datastation.common.config import init from datastation.common.utils import add_batch_processor_args, add_dry_run_arg from datastation.dataverse.dataverse_client import DataverseClient from datastation.dataverse.roles import DataverseRole +def list_role_assignments(args, dataverse_client: DataverseClient): + role_assignment = DataverseRole(dataverse_client) + role_assignment.list_role_assignments(args.alias) + +def add_role_assignments(args, dataverse_client: DataverseClient): + role_assignment = DataverseRole(dataverse_client, args.dry_run) + aliases = get_aliases(args.alias_or_alias_file) + batch_processor = DataverseBatchProcessorWithReport(wait=args.wait, fail_on_first_error=args.fail_fast, + report_file=args.report_file, + headers=['alias', 'Modified', 'Assignee', 'Role', 'Change']) + batch_processor.process_aliases(aliases, + lambda alias, + csv_report: role_assignment.add_role_assignment(args.role_assignment, + dataverse_api= + dataverse_client.dataverse( + alias), + csv_report=csv_report, + dry_run=args.dry_run)) + + +def remove_role_assignments(args, dataverse_client: DataverseClient): + role_assignment = DataverseRole(dataverse_client, args.dry_run) + aliases = get_aliases(args.alias_or_alias_file) + batch_processor = DataverseBatchProcessorWithReport(wait=args.wait, report_file=args.report_file, + headers=['alias', 'Modified', 'Assignee', 'Role', 'Change']) + batch_processor.process_aliases(aliases, + lambda alias, + csv_report: role_assignment.remove_role_assignment(args.role_assignment, + dataverse_api= + dataverse_client.dataverse( + alias), + csv_report=csv_report, + dry_run=args.dry_run)) + + def main(): config = init() dataverse_client = DataverseClient(config['dataverse']) - role_assignment = DataverseRole(dataverse_client) # Create main parser and subparsers parser = argparse.ArgumentParser(description='Manage role assignments on one or more datasets.') @@ -25,7 +59,7 @@ def main(): add_batch_processor_args(parser_add) add_dry_run_arg(parser_add) - parser_add.set_defaults(func=lambda _: role_assignment.add_role_assignments(_)) + parser_add.set_defaults(func=lambda _: add_role_assignments(_, dataverse_client)) # Remove role assignment parser_remove = subparsers.add_parser('remove', help='remove role assignment from specified dataset(s)') @@ -35,14 +69,14 @@ def main(): help='The dataverse alias or the input file with the dataverse aliases') add_batch_processor_args(parser_remove) add_dry_run_arg(parser_remove) - parser_remove.set_defaults(func=lambda _: role_assignment.remove_role_assignments(_)) + parser_remove.set_defaults(func=lambda _: remove_role_assignments(_, dataverse_client)) # List role assignments parser_list = subparsers.add_parser('list', help='list role assignments for specified dataverse (only one alias allowed)') parser_list.add_argument('alias', help='the dataverse alias') add_dry_run_arg(parser_list) - parser_list.set_defaults(func=lambda _: role_assignment.list_role_assignments(_)) + parser_list.set_defaults(func=lambda _: list_role_assignments(_, dataverse_client)) args = parser.parse_args() args.func(args) From 03c099a4d6dc444df334996a828af07d0e32ae27 Mon Sep 17 00:00:00 2001 From: paulboon Date: Thu, 15 Feb 2024 13:52:48 +0100 Subject: [PATCH 11/11] Refactoring; role assignment in DataverseRole uses the self.dry_run instead of input param --- src/datastation/dataverse/roles.py | 8 ++++---- src/datastation/dv_dataverse_role_assignment.py | 6 ++---- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/src/datastation/dataverse/roles.py b/src/datastation/dataverse/roles.py index 6fa482c..d6c605a 100644 --- a/src/datastation/dataverse/roles.py +++ b/src/datastation/dataverse/roles.py @@ -17,7 +17,7 @@ def list_role_assignments(self, alias): if r is not None: rich.print_json(data=r) - def add_role_assignment(self, role_assignment, dataverse_api: DataverseApi, csv_report, dry_run: bool = False): + def add_role_assignment(self, role_assignment, dataverse_api: DataverseApi, csv_report): assignee = role_assignment.split('=')[0] role = role_assignment.split('=')[1] action = "None" @@ -26,7 +26,7 @@ def add_role_assignment(self, role_assignment, dataverse_api: DataverseApi, csv_ else: print( "Adding {} as {} for dataset {}".format(assignee, role, dataverse_api.get_alias())) - dataverse_api.add_role_assignment(assignee, role, dry_run=dry_run) + dataverse_api.add_role_assignment(assignee, role, dry_run=self.dry_run) action = "Added" csv_report.write( {'alias': dataverse_api.get_alias(), 'Modified': datetime.now(), 'Assignee': assignee, 'Role': role, @@ -43,7 +43,7 @@ def in_current_assignments(self, assignee, role, dataverse_api: DataverseApi): return found - def remove_role_assignment(self, role_assignment, dataverse_api: DataverseApi, csv_report, dry_run: bool = False): + def remove_role_assignment(self, role_assignment, dataverse_api: DataverseApi, csv_report): assignee = role_assignment.split('=')[0] role = role_assignment.split('=')[1] action = "None" @@ -52,7 +52,7 @@ def remove_role_assignment(self, role_assignment, dataverse_api: DataverseApi, c all_assignments = dataverse_api.get_role_assignments() for assignment in all_assignments: if assignment.get('assignee') == assignee and assignment.get('_roleAlias') == role: - dataverse_api.remove_role_assignment(assignment.get('id'), dry_run=dry_run) + dataverse_api.remove_role_assignment(assignment.get('id'), dry_run=self.dry_run) action = "Removed" break else: diff --git a/src/datastation/dv_dataverse_role_assignment.py b/src/datastation/dv_dataverse_role_assignment.py index 5f6166b..1a4ae96 100644 --- a/src/datastation/dv_dataverse_role_assignment.py +++ b/src/datastation/dv_dataverse_role_assignment.py @@ -23,8 +23,7 @@ def add_role_assignments(args, dataverse_client: DataverseClient): dataverse_api= dataverse_client.dataverse( alias), - csv_report=csv_report, - dry_run=args.dry_run)) + csv_report=csv_report)) def remove_role_assignments(args, dataverse_client: DataverseClient): @@ -38,8 +37,7 @@ def remove_role_assignments(args, dataverse_client: DataverseClient): dataverse_api= dataverse_client.dataverse( alias), - csv_report=csv_report, - dry_run=args.dry_run)) + csv_report=csv_report)) def main():