Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/master' into DD-1467-edit-metadata
Browse files Browse the repository at this point in the history
# Conflicts:
#	poetry.lock
#	pyproject.toml
  • Loading branch information
jo-pol committed Feb 12, 2024
2 parents 4bbd644 + 0b4debf commit cadaa1d
Show file tree
Hide file tree
Showing 8 changed files with 206 additions and 46 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/codeql.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ jobs:

steps:
- name: Checkout repository
uses: actions/checkout@v3
uses: actions/checkout@v4

# Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/coverage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,12 @@ jobs:
OS: ubuntu-latest
PYTHON: '3.9'
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
with:
fetch-depth: '2'

- name: Setup Python
uses: actions/setup-python@master
uses: actions/setup-python@v5
with:
python-version: 3.9
- name: Generate Report
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,13 @@ jobs:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v2
uses: actions/setup-python@v5
with:
python-version: 3.9
- name: Cache pip
uses: actions/cache@v2
uses: actions/cache@v4
with:
path: ~/.cache/pip
key: ${{ runner.os }}-pip-${{ hashFiles('requirements.txt') }}
Expand Down
46 changes: 8 additions & 38 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ pyYAML = "^6.0"
psycopg = "^3.1.8"
rich = "^13.7.0"
bs4 = "^0.0.1"
argparse-formatter = "^1.4"

[tool.poetry.dev-dependencies]
pytest = "^7.3.1"
Expand Down Expand Up @@ -51,4 +52,5 @@ dv-user-import = "datastation.dv_user_import:main"
dv-notifications-cleanup="datastation.dv_notifications_cleanup:main"
ingest-flow = "datastation.ingest_flow:main"
dv-dataverse-root-collect-storage-usage = "datastation.dv_dataverse_root_collect_storage_usage:main"
dv-dataverse-root-collect-permission-overview = "datastation.dv_dataverse_root_collect_permission_overview:main"
datastation-get-component-versions = "datastation.datastation_get_component_versions:main"
17 changes: 15 additions & 2 deletions src/datastation/dataverse/dataverse_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,10 @@ def __init__(self, server_url, api_token):
self.server_url = server_url
self.api_token = api_token

def get_contents(self, alias="root", dry_run=False):
# get json data for a specific dataverses API endpoint using an API token
def get_resource_data(self, resource, alias="root", dry_run=False):
headers = {"X-Dataverse-key": self.api_token}
url = f"{self.server_url}/api/dataverses/{alias}/contents"
url = f"{self.server_url}/api/dataverses/{alias}/{resource}"

if dry_run:
print_dry_run_message(method="GET", url=url, headers=headers)
Expand All @@ -23,6 +24,18 @@ def get_contents(self, alias="root", dry_run=False):
resp_data = dv_resp.json()["data"]
return resp_data

def get_contents(self, alias="root", dry_run=False):
return self.get_resource_data("contents", alias, dry_run)

def get_roles(self, alias="root", dry_run=False):
return self.get_resource_data("roles", alias, dry_run)

def get_assignments(self, alias="root", dry_run=False):
return self.get_resource_data("assignments", alias, dry_run)

def get_groups(self, alias="root", dry_run=False):
return self.get_resource_data("groups", alias, dry_run)

def get_storage_size(self, alias="root", dry_run=False):
""" Get dataverse storage size (bytes). """
url = f'{self.server_url}/api/dataverses/{alias}/storagesize'
Expand Down
129 changes: 129 additions & 0 deletions src/datastation/dataverse/permissions_collect.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
from datastation.common.result_writer import CsvResultWriter, YamlResultWriter, JsonResultWriter
from datastation.dataverse.dataverse_client import DataverseClient
import logging
import re
import csv
import sys
import json
import rich
from datetime import timedelta


class PermissionsCollect:

def __init__(self, dataverse_client: DataverseClient, output_file, output_format, dry_run: bool = False):
self.dataverse_client = dataverse_client
self.output_file = output_file
self.output_format = output_format
self.dry_run = dry_run

self.writer = None
self.is_first = True # Would be nicer if the Writer does the bookkeeping
self.vpath_delimiter = ' > ' # Note that'/' would tempt people us use it as a real path

def create_result_writer(self, out_stream):
logging.info(f'Writing output: {self.output_file}, with format : {self.output_format}')
csv_columns = ['depth', 'parentalias', 'alias', 'name', 'id', 'vpath', 'groups', 'roles', 'assignments']
if self.output_format == 'csv':
return CsvResultWriter(headers=csv_columns, out_stream=out_stream)
else:
return JsonResultWriter(out_stream)

def write_result_row(self, row):
self.writer.write(row, self.is_first)
self.is_first = False # Only the first time it can be True

def get_result_row(self, parent_alias, child_alias, child_name, id, vpath, depth):
logging.info(f'Retrieving permission info for dataverse: {parent_alias} / {child_alias} ...')
group_info = self.get_group_info(child_alias)
role_info = self.get_role_info(child_alias)
assignment_info = self.get_assignment_info(child_alias)
row = {'depth': depth, 'parentalias': parent_alias, 'alias': child_alias, 'name': child_name,
'id': id, 'vpath': vpath, 'groups': group_info, 'roles': role_info, 'assignments': assignment_info}
return row

def get_group_info(self, alias):
resp_data = self.dataverse_client.dataverse().get_groups(alias)
# flatten and compact it... no list comprehension though
result_list = []
for group in resp_data:
# append the number of assignees in braces
result_list.append(group['identifier'] + ' (' + str(len(group['containedRoleAssignees'])) + ')')
return ', '.join(result_list)

def get_role_info(self, alias):
resp_data = self.dataverse_client.dataverse().get_roles(alias)
# flatten and compact it... no list comprehension though
result_list = []
for role in resp_data:
# append the number of permissions in braces
result_list.append(role['alias'] + ' (' + str(len(role['permissions'])) + ')')
return ', '.join(result_list)

def get_assignment_info(self, alias):
resp_data = self.dataverse_client.dataverse().get_assignments(alias)
# flatten and compact it... no list comprehension though
result_list = []
for assignment in resp_data:
# append the role alias in braces
result_list.append(assignment['assignee'] + ' (' + (assignment['_roleAlias']) + ')')
return ', '.join(result_list)

def collect_permissions_info(self, tree_data, parent_vpath, parent_alias, depth=1):
alias = tree_data['alias']
name = tree_data['name']
id = tree_data['id']
vpath = parent_vpath + self.vpath_delimiter + alias
row = self.get_result_row(parent_alias, alias, name, id, vpath, depth)
self.write_result_row(row)
# only direct descendants (children)
if 'children' in tree_data:
for child_tree_data in tree_data['children']:
self.collect_permissions_info(child_tree_data, vpath, alias, depth + 1) # recurse

def find_child(self, parent, alias):
result = None
for child in parent['children']:
if child["alias"] == alias:
result = child
break
return result

def collect_permissions_info_overview(self, selected_dataverse=None):
out_stream = sys.stdout
if self.output_file != '-':
try:
out_stream = open(self.output_file, "w")
except:
logging.error(f"Could not open file: {self.output_file}")
raise

self.writer = self.create_result_writer(out_stream)

logging.info(f'Extracting tree for server: {self.dataverse_client.server_url} ...')
tree_data = self.dataverse_client.metrics().get_tree()
alias = tree_data['alias']
name = tree_data['name']
id = tree_data['id']
vpath = alias
logging.info(f'Extracted the tree for the toplevel dataverse: {name} ({alias})')
logging.info("Retrieving the info for this dataverse instance...")

if selected_dataverse is None:
# do whole tree
logging.info("Retrieving the info for all the dataverse collections...")
self.collect_permissions_info(tree_data, vpath, alias, 1)
else:
# always the 'root' dataverse
row = self.get_result_row("-", alias, name, id, vpath, 0) # The root has no parent
self.write_result_row(row)
# then the selected sub-verse tree
logging.info("Retrieving the info for a selected dataverse collection sub-tree...")
selected_tree_data = self.find_child(tree_data, selected_dataverse)
if selected_tree_data is not None:
self.collect_permissions_info(selected_tree_data, vpath, alias, 1)
else:
logging.error(f"Could not find the selected dataverse: {selected_dataverse}")

self.writer.close()
self.is_first = True
46 changes: 46 additions & 0 deletions src/datastation/dv_dataverse_root_collect_permission_overview.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import argparse
from argparse_formatter import FlexiFormatter

from datastation.common.config import init
from datastation.common.utils import add_dry_run_arg
from datastation.dataverse.dataverse_client import DataverseClient
from datastation.dataverse.permissions_collect import PermissionsCollect


def main():
config = init()

output_explanation = '''
The output has the following information:
* depth: The depth in the tree of the . The top-level ('root') has depth 0.
* parentalias: The alias of the parent dataverse
* alias: The alias of the dataverse
* name: The name of the dataverse
* id: The id of the dataverse, sometimes used in places where the alias is not used
* vpath: The virtual path of the collection, i.e. the path from the root to the dataverse
* groups: A comma-separated list of the explicit groups under the dataverse.
For each group there is the 'identifier' with the number of 'containedRoleAssignees' in braces appended.
* roles: A comma-separated list of the roles defined in the dataverse.
For each role there is the 'alias' with the number of 'permissions' in braces appended.
* assignments: A comma-separated list of the assignments of roles on the dataverse.
For each assignment there is the 'assignee' with the '_roleAlias' in braces appended.
'''
parser = argparse.ArgumentParser(description='Collect the permissions overview for the dataverses (collections) in a Dataverse installation.',
epilog=output_explanation, formatter_class=FlexiFormatter)
parser.add_argument('-o', '--output-file', dest='output_file', default='-',
help='The file to write the output to or - for stdout')
parser.add_argument('-f', '--format', dest='format',
help='Output format, one of: csv, json (default: json)')
parser.add_argument('-s', '--selected-dataverse', dest='selected_dataverse', default=None,
help='The dataverse (top-level) sub-tree to collect the permissions for, by default all dataverses are collected')
add_dry_run_arg(parser)
args = parser.parse_args()

selected_dataverse = args.selected_dataverse
dataverse_client = DataverseClient(config['dataverse'])
collector = PermissionsCollect(dataverse_client, args.output_file, args.format, args.dry_run)
collector.collect_permissions_info_overview(selected_dataverse)

if __name__ == '__main__':
main()

0 comments on commit cadaa1d

Please sign in to comment.