From 7b0aa6068f48f4b5b2c49181b8667c518a90c0b8 Mon Sep 17 00:00:00 2001 From: emalinowski Date: Fri, 26 Jul 2024 09:37:35 -0500 Subject: [PATCH] Prometheus metrics support for logins and presigned URLs (#1156) --- .secrets.baseline | 6 +- clear_prometheus_multiproc | 6 +- fence/__init__.py | 64 ++--- fence/blueprints/data/indexd.py | 52 +++- fence/blueprints/ga4gh.py | 2 + fence/blueprints/login/base.py | 9 + fence/blueprints/login/google.py | 1 + fence/config-default.yaml | 1 + fence/metrics.py | 199 ++++++++++++++ openapis/swagger.yaml | 12 + poetry.lock | 129 +++++---- pyproject.toml | 4 +- tests/ci_commands_script.sh | 2 + tests/conftest.py | 42 +++ tests/login/test_base.py | 4 +- tests/test_app_config.py | 1 - ...{test_audit_service.py => test_metrics.py} | 248 +++++++++++++++++- tests/utils/metrics.py | 223 ++++++++++++++++ 18 files changed, 864 insertions(+), 141 deletions(-) create mode 100644 fence/metrics.py rename tests/{test_audit_service.py => test_metrics.py} (71%) create mode 100644 tests/utils/metrics.py diff --git a/.secrets.baseline b/.secrets.baseline index 8c97f29fb..9aa531c19 100644 --- a/.secrets.baseline +++ b/.secrets.baseline @@ -268,14 +268,14 @@ "filename": "tests/conftest.py", "hashed_secret": "1348b145fa1a555461c1b790a2f66614781091e9", "is_verified": false, - "line_number": 1561 + "line_number": 1569 }, { "type": "Base64 High Entropy String", "filename": "tests/conftest.py", "hashed_secret": "227dea087477346785aefd575f91dd13ab86c108", "is_verified": false, - "line_number": 1583 + "line_number": 1593 } ], "tests/credentials/google/test_credentials.py": [ @@ -422,5 +422,5 @@ } ] }, - "generated_at": "2024-03-16T00:09:27Z" + "generated_at": "2024-07-25T17:19:58Z" } diff --git a/clear_prometheus_multiproc b/clear_prometheus_multiproc index 4bb2b425f..af1ba6d18 100755 --- a/clear_prometheus_multiproc +++ b/clear_prometheus_multiproc @@ -4,6 +4,8 @@ set -ex rm -Rf $1 -mkdir $1 +mkdir -p $1 chmod 755 $1 -chown 100:101 $1 +if id -u nginx &>/dev/null; then + chown $(id -u nginx):$(id -g nginx) $1 +fi diff --git a/fence/__init__.py b/fence/__init__.py index fc0fe97cf..31cb76eda 100755 --- a/fence/__init__.py +++ b/fence/__init__.py @@ -1,21 +1,17 @@ from collections import OrderedDict import os -import tempfile from urllib.parse import urljoin -import flask -from flask_cors import CORS -from sqlalchemy.orm import scoped_session -from flask import current_app -from werkzeug.local import LocalProxy from authutils.oauth2.client import OAuthClient -from cdislogging import get_logger -from gen3authz.client.arborist.client import ArboristClient -from flask_wtf.csrf import validate_csrf -from werkzeug.middleware.dispatcher import DispatcherMiddleware from azure.storage.blob import BlobServiceClient from azure.core.exceptions import ResourceNotFoundError -from urllib.parse import urlparse +from cdislogging import get_logger +import flask +from flask_cors import CORS +from flask_wtf.csrf import validate_csrf +from gen3authz.client.arborist.client import ArboristClient +from sqlalchemy.orm import scoped_session + # Can't read config yet. Just set to debug for now, else no handlers. # Later, in app_config(), will actually set level based on config @@ -31,6 +27,7 @@ ) from fence.auth import logout, build_redirect_url +from fence.metrics import metrics from fence.blueprints.data.indexd import S3IndexedFileLocation from fence.blueprints.login.utils import allowed_login_redirects, domain from fence.errors import UserError @@ -67,11 +64,6 @@ import fence.blueprints.ga4gh -# for some reason the temp dir does not get created properly if we move -# this statement to `_setup_prometheus()` -PROMETHEUS_TMP_COUNTER_DIR = tempfile.TemporaryDirectory() - - app = flask.Flask(__name__) CORS(app=app, headers=["content-type", "accept"], expose_headers="*") @@ -102,6 +94,9 @@ def app_init( app_sessions(app) app_register_blueprints(app) server.init_app(app, query_client=query_client) + logger.info( + f"Prometheus metrics are{'' if config['ENABLE_PROMETHEUS_METRICS'] else ' NOT'} enabled." + ) def app_sessions(app): @@ -206,6 +201,15 @@ def public_keys(): {"keys": [(keypair.kid, keypair.public_key) for keypair in app.keypairs]} ) + @app.route("/metrics") + def metrics_endpoint(): + """ + /!\ There is no authz control on this endpoint! + In cloud-automation setups, access to this endpoint is blocked at the revproxy level. + """ + data, content_type = metrics.get_latest_metrics() + return flask.Response(data, content_type=content_type) + def _check_azure_storage(app): """ @@ -365,13 +369,6 @@ def app_config( _setup_data_endpoint_and_boto(app) _load_keys(app, root_dir) - app.prometheus_counters = {} - if config["ENABLE_PROMETHEUS_METRICS"]: - logger.info("Enabling Prometheus metrics...") - _setup_prometheus(app) - else: - logger.info("Prometheus metrics are NOT enabled.") - app.storage_manager = StorageManager(config["STORAGE_CREDENTIALS"], logger=logger) app.debug = config["DEBUG"] @@ -495,27 +492,6 @@ def _setup_audit_service_client(app): ) -def _setup_prometheus(app): - # This environment variable MUST be declared before importing the - # prometheus modules (or unit tests fail) - # More details on this awkwardness: https://github.com/prometheus/client_python/issues/250 - os.environ["prometheus_multiproc_dir"] = PROMETHEUS_TMP_COUNTER_DIR.name - - from prometheus_client import ( - CollectorRegistry, - multiprocess, - make_wsgi_app, - ) - - app.prometheus_registry = CollectorRegistry() - multiprocess.MultiProcessCollector(app.prometheus_registry) - - # Add prometheus wsgi middleware to route /metrics requests - app.wsgi_app = DispatcherMiddleware( - app.wsgi_app, {"/metrics": make_wsgi_app(registry=app.prometheus_registry)} - ) - - @app.errorhandler(Exception) def handle_error(error): """ diff --git a/fence/blueprints/data/indexd.py b/fence/blueprints/data/indexd.py index f7b9488f6..380fcd43e 100755 --- a/fence/blueprints/data/indexd.py +++ b/fence/blueprints/data/indexd.py @@ -49,6 +49,7 @@ from fence.resources.ga4gh.passports import sync_gen3_users_authz_from_ga4gh_passports from fence.resources.audit.utils import enable_audit_logging from fence.utils import get_valid_expiration_from_request +from fence.metrics import metrics from . import multipart_upload from ...models import AssumeRoleCacheAWS, query_for_user, query_for_user_by_id @@ -77,6 +78,7 @@ def get_signed_url_for_file( ga4gh_passports=None, db_session=None, bucket=None, + drs="False", ): requested_protocol = requested_protocol or flask.request.args.get("protocol", None) r_pays_project = flask.request.args.get("userProject", None) @@ -164,12 +166,33 @@ def get_signed_url_for_file( user_sub=flask.g.audit_data.get("sub", ""), client_id=_get_client_id(), requested_protocol=requested_protocol, + action=action, + drs=drs, ) return {"url": signed_url} -def _log_signed_url_data_info(indexed_file, user_sub, client_id, requested_protocol): +def get_bucket_from_urls(urls, protocol): + """ + Return the bucket name from the first of the provided URLs that starts with the given protocol (usually `gs`, `s3`, `az`...) + """ + bucket = "" + for url in urls: + if "://" in url: + # Extract the protocol and the rest of the URL + bucket_protocol, rest_of_url = url.split("://", 1) + + if bucket_protocol == protocol: + # Extract bucket name + bucket = f"{bucket_protocol}://{rest_of_url.split('/')[0]}" + break + return bucket + + +def _log_signed_url_data_info( + indexed_file, user_sub, client_id, requested_protocol, action, drs="False" +): size_in_kibibytes = (indexed_file.index_document.get("size") or 0) / 1024 acl = indexed_file.index_document.get("acl") authz = indexed_file.index_document.get("authz") @@ -180,23 +203,25 @@ def _log_signed_url_data_info(indexed_file, user_sub, client_id, requested_proto protocol = indexed_file.indexed_file_locations[0].protocol # figure out which bucket was used based on the protocol - bucket = "" - for url in indexed_file.index_document.get("urls", []): - bucket_name = None - if "://" in url: - # Extract the protocol and the rest of the URL - bucket_protocol, rest_of_url = url.split("://", 1) - - if bucket_protocol == protocol: - # Extract bucket name - bucket = f"{bucket_protocol}://{rest_of_url.split('/')[0]}" - break + bucket = get_bucket_from_urls(indexed_file.index_document.get("urls", []), protocol) logger.info( - f"Signed URL Generated. size_in_kibibytes={size_in_kibibytes} " + f"Signed URL Generated. action={action} size_in_kibibytes={size_in_kibibytes} " f"acl={acl} authz={authz} bucket={bucket} user_sub={user_sub} client_id={client_id}" ) + metrics.add_signed_url_event( + action, + protocol, + acl, + authz, + bucket, + user_sub, + client_id, + drs, + size_in_kibibytes, + ) + def _get_client_id(): client_id = "Unknown Client" @@ -208,6 +233,7 @@ def _get_client_id(): return client_id + def prepare_presigned_url_audit_log(protocol, indexed_file): """ Store in `flask.g.audit_data` the data needed to record an audit log. diff --git a/fence/blueprints/ga4gh.py b/fence/blueprints/ga4gh.py index 7b4ef0603..890da83f6 100644 --- a/fence/blueprints/ga4gh.py +++ b/fence/blueprints/ga4gh.py @@ -41,5 +41,7 @@ def get_ga4gh_signed_url(object_id, access_id): object_id, requested_protocol=access_id, ga4gh_passports=ga4gh_passports, + drs="True", ) + return flask.jsonify(result) diff --git a/fence/blueprints/login/base.py b/fence/blueprints/login/base.py index 0b6ae3f95..08fcab61d 100644 --- a/fence/blueprints/login/base.py +++ b/fence/blueprints/login/base.py @@ -7,6 +7,7 @@ from fence.blueprints.login.redirect import validate_redirect from fence.config import config from fence.errors import UserError +from fence.metrics import metrics logger = get_logger(__name__) @@ -133,6 +134,14 @@ def get(self): def post_login(self, user=None, token_result=None, **kwargs): prepare_login_log(self.idp_name) + metrics.add_login_event( + user_sub=flask.g.user.id, + idp=self.idp_name, + fence_idp=flask.session.get("fence_idp"), + shib_idp=flask.session.get("shib_idp"), + client_id=flask.session.get("client_id"), + ) + if token_result: username = token_result.get(self.username_field) if self.is_mfa_enabled: diff --git a/fence/blueprints/login/google.py b/fence/blueprints/login/google.py index 0fa3e4cb5..2c7570795 100644 --- a/fence/blueprints/login/google.py +++ b/fence/blueprints/login/google.py @@ -25,4 +25,5 @@ def get(self): config.get("BASE_URL", "") + "/link/google/callback?code={}".format(flask.request.args.get("code")) ) + return super(GoogleCallback, self).get() diff --git a/fence/config-default.yaml b/fence/config-default.yaml index a73ebb976..5e43e21dc 100755 --- a/fence/config-default.yaml +++ b/fence/config-default.yaml @@ -62,6 +62,7 @@ MOCK_STORAGE: true # WARNING: ONLY set to true when fence will be deployed in such a way that it will # ONLY receive traffic from internal clients and can safely use HTTP. AUTHLIB_INSECURE_TRANSPORT: true + # enable Prometheus Metrics for observability purposes # # WARNING: Any counters, gauges, histograms, etc. should be carefully diff --git a/fence/metrics.py b/fence/metrics.py new file mode 100644 index 000000000..acdb200a9 --- /dev/null +++ b/fence/metrics.py @@ -0,0 +1,199 @@ +""" +Metrics are collected by the Prometheus client and exposed at the `/metrics` endpoint. + +To add a new metric: +- Add a new method to the `Metrics` class below (see `add_login_event` and `add_signed_url_event` +for example). +- The new method should call the `_increment_counter` and/or `_set_gauge` methods with the +appropriate metric name and labels. +- Call the new method from the code where relevant, for example: + from fence.metric import metrics + metrics.add_login_event(...) +- Add unit tests to the `tests/test_metrics` file. +""" + + +import os +import pathlib + +from cdislogging import get_logger +from prometheus_client import ( + CollectorRegistry, + multiprocess, + Counter, + Gauge, + generate_latest, + CONTENT_TYPE_LATEST, +) + +from fence.config import config + + +logger = get_logger(__name__) + + +class Metrics: + """ + Class to handle Prometheus metrics + Attributes: + registry (CollectorRegistry): Prometheus registry + metrics (dict): Dictionary to store Prometheus metrics + """ + + def __init__(self, prometheus_dir="/var/tmp/uwsgi_flask_metrics"): + pathlib.Path(prometheus_dir).mkdir(parents=True, exist_ok=True) + os.environ["PROMETHEUS_MULTIPROC_DIR"] = prometheus_dir + + self._registry = CollectorRegistry() + multiprocess.MultiProcessCollector(self._registry) + self._metrics = {} + + # set the descriptions of new metrics here. Descriptions not specified here + # will default to the metric name. + self._counter_descriptions = { + "gen3_fence_presigned_url": "Fence presigned urls", + "gen3_fence_login": "Fence logins", + } + self._gauge_descriptions = { + "gen3_fence_presigned_url_size": "Fence presigned urls", + } + + def get_latest_metrics(self): + """ + Generate the latest Prometheus metrics + Returns: + str: Latest Prometheus metrics + str: Content type of the latest Prometheus metrics + """ + # When metrics gathering is not enabled, the metrics endpoint should not error, but it should + # not return any data. + if not config["ENABLE_PROMETHEUS_METRICS"]: + return "", None + + return generate_latest(self._registry), CONTENT_TYPE_LATEST + + def _increment_counter(self, name, labels): + """ + Increment a Prometheus counter metric. + Note that this function should not be called directly - implement a function like + `add_login_event` instead. A metric's labels should always be consistent. + Args: + name (str): Name of the metric + labels (dict): Dictionary of labels for the metric + """ + # create the counter if it doesn't already exist + if name not in self._metrics: + description = self._counter_descriptions.get(name, name) + logger.info( + f"Creating counter '{name}' with description '{description}' and labels: {labels}" + ) + self._metrics[name] = Counter(name, description, [*labels.keys()]) + elif type(self._metrics[name]) != Counter: + raise ValueError( + f"Trying to create counter '{name}' but a {type(self._metrics[name])} with this name already exists" + ) + + logger.debug(f"Incrementing counter '{name}' with labels: {labels}") + self._metrics[name].labels(*labels.values()).inc() + + def _set_gauge(self, name, labels, value): + """ + Set a Prometheus gauge metric. + Note that this function should not be called directly - implement a function like + `add_signed_url_event` instead. A metric's labels should always be consistent. + Args: + name (str): Name of the metric + labels (dict): Dictionary of labels for the metric + value (int): Value to set the metric to + """ + # create the gauge if it doesn't already exist + if name not in self._metrics: + description = self._gauge_descriptions.get(name, name) + logger.info( + f"Creating gauge '{name}' with description '{description}' and labels: {labels}" + ) + self._metrics[name] = Gauge(name, description, [*labels.keys()]) + elif type(self._metrics[name]) != Gauge: + raise ValueError( + f"Trying to create gauge '{name}' but a {type(self._metrics[name])} with this name already exists" + ) + + logger.debug(f"Setting gauge '{name}' with labels: {labels}") + self._metrics[name].labels(*labels.values()).set(value) + + def add_login_event(self, user_sub, idp, fence_idp, shib_idp, client_id): + """ + Record a login event + """ + if not config["ENABLE_PROMETHEUS_METRICS"]: + return + self._increment_counter( + "gen3_fence_login", + { + "user_sub": user_sub, + "idp": idp, + "client_id": client_id, + "fence_idp": fence_idp, + "shib_idp": shib_idp, + }, + ) + self._increment_counter( + "gen3_fence_login", + { + "user_sub": user_sub, + "idp": "all", + "client_id": client_id, + # when counting all IDPs, we don't care about the fence and shib IDP values + "fence_idp": None, + "shib_idp": None, + }, + ) + + def add_signed_url_event( + self, + action, + protocol, + acl, + authz, + bucket, + user_sub, + client_id, + drs, + size_in_kibibytes, + ): + """ + Record a signed URL event + """ + if not config["ENABLE_PROMETHEUS_METRICS"]: + return + self._increment_counter( + "gen3_fence_presigned_url", + { + "action": action, + "protocol": protocol, + "acl": acl, + "authz": authz, + "bucket": bucket, + "user_sub": user_sub, + "client_id": client_id, + "drs": drs, + }, + ) + self._set_gauge( + "gen3_fence_presigned_url_size", + { + "action": action, + "protocol": protocol, + "acl": acl, + "authz": authz, + "bucket": bucket, + "user_sub": user_sub, + "client_id": client_id, + "drs": drs, + }, + size_in_kibibytes, + ) + + +# Initialize the Metrics instance +metrics = Metrics() diff --git a/openapis/swagger.yaml b/openapis/swagger.yaml index 70d1f8e1a..0336fec54 100644 --- a/openapis/swagger.yaml +++ b/openapis/swagger.yaml @@ -68,6 +68,18 @@ paths: description: successful operation schema: $ref: '#/definitions/SystemVersionOutputRef' + /metrics: + get: + tags: + - system + summary: Get Prometheus metrics + description: >- + Returns Prometheus metrics if the `ENABLE_PROMETHEUS_METRICS` setting is `True`. + By default, this endpoint is public. Authorization controls can be setup externally; + in cloud-automation setups, access to this endpoint is blocked at the revproxy level. + responses: + '200': + description: successful operation /oauth2/authorize: get: tags: diff --git a/poetry.lock b/poetry.lock index fb705cd5c..b3ea2a211 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.7.0 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. [[package]] name = "addict" @@ -170,13 +170,13 @@ aio = ["aiohttp (>=3.0)"] [[package]] name = "azure-storage-blob" -version = "12.20.0" +version = "12.21.0" description = "Microsoft Azure Blob Storage Client Library for Python" optional = false python-versions = ">=3.8" files = [ - {file = "azure-storage-blob-12.20.0.tar.gz", hash = "sha256:eeb91256e41d4b5b9bad6a87fd0a8ade07dd58aa52344e2c8d2746e27a017d3b"}, - {file = "azure_storage_blob-12.20.0-py3-none-any.whl", hash = "sha256:de6b3bf3a90e9341a6bcb96a2ebe981dffff993e9045818f6549afea827a52a9"}, + {file = "azure-storage-blob-12.21.0.tar.gz", hash = "sha256:b9722725072f5b7373c0f4dd6d78fbae2bb37bffc5c3e01731ab8c750ee8dd7e"}, + {file = "azure_storage_blob-12.21.0-py3-none-any.whl", hash = "sha256:f9ede187dd5a0ef296b583a7c1861c6938ddd6708d6e70f4203a163c2ab42d43"}, ] [package.dependencies] @@ -250,17 +250,17 @@ files = [ [[package]] name = "boto3" -version = "1.34.143" +version = "1.34.147" description = "The AWS SDK for Python" optional = false python-versions = ">=3.8" files = [ - {file = "boto3-1.34.143-py3-none-any.whl", hash = "sha256:0d16832f23e6bd3ae94e35ea8e625529850bfad9baccd426de96ad8f445d8e03"}, - {file = "boto3-1.34.143.tar.gz", hash = "sha256:b590ce80c65149194def43ebf0ea1cf0533945502507837389a8d22e3ecbcf05"}, + {file = "boto3-1.34.147-py3-none-any.whl", hash = "sha256:e1cef9a1a301866bcdee32ae0c699465eb2345f9a8e613a5835821430165ff6d"}, + {file = "boto3-1.34.147.tar.gz", hash = "sha256:9ec1c6ab22588242a47549f51a63dfc7c21fdf95a94820fc6e629ab060c38bd9"}, ] [package.dependencies] -botocore = ">=1.34.143,<1.35.0" +botocore = ">=1.34.147,<1.35.0" jmespath = ">=0.7.1,<2.0.0" s3transfer = ">=0.10.0,<0.11.0" @@ -269,13 +269,13 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] [[package]] name = "botocore" -version = "1.34.143" +version = "1.34.147" description = "Low-level, data-driven core of boto 3." optional = false python-versions = ">=3.8" files = [ - {file = "botocore-1.34.143-py3-none-any.whl", hash = "sha256:094aea179e8aaa1bc957ad49cc27d93b189dd3a1f3075d8b0ca7c445a2a88430"}, - {file = "botocore-1.34.143.tar.gz", hash = "sha256:059f032ec05733a836e04e869c5a15534420102f93116f3bc9a5b759b0651caf"}, + {file = "botocore-1.34.147-py3-none-any.whl", hash = "sha256:be94a2f4874b1d1705cae2bd512c475047497379651678593acb6c61c50d91de"}, + {file = "botocore-1.34.147.tar.gz", hash = "sha256:2e8f000b77e4ca345146cb2edab6403769a517b564f627bb084ab335417f3dbe"}, ] [package.dependencies] @@ -313,13 +313,13 @@ files = [ [[package]] name = "cachetools" -version = "5.3.3" +version = "5.4.0" description = "Extensible memoizing collections and decorators" optional = false python-versions = ">=3.7" files = [ - {file = "cachetools-5.3.3-py3-none-any.whl", hash = "sha256:0abad1021d3f8325b2fc1d2e9c8b9c9d57b04c3932657a72465447332c24d945"}, - {file = "cachetools-5.3.3.tar.gz", hash = "sha256:ba29e2dfa0b8b556606f097407ed1aa62080ee108ab0dc5ec9d6a723a007d105"}, + {file = "cachetools-5.4.0-py3-none-any.whl", hash = "sha256:3ae3b49a3d5e28a77a0be2b37dbcb89005058959cb2323858c2657c4a8cab474"}, + {file = "cachetools-5.4.0.tar.gz", hash = "sha256:b8adc2e7c07f105ced7bc56dbb6dfbe7c4a00acce20e2227b3f355be89bc6827"}, ] [[package]] @@ -684,43 +684,38 @@ yaml = ["PyYAML (>=3.10)"] [[package]] name = "cryptography" -version = "42.0.8" +version = "43.0.0" description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers." optional = false python-versions = ">=3.7" files = [ - {file = "cryptography-42.0.8-cp37-abi3-macosx_10_12_universal2.whl", hash = "sha256:81d8a521705787afe7a18d5bfb47ea9d9cc068206270aad0b96a725022e18d2e"}, - {file = "cryptography-42.0.8-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:961e61cefdcb06e0c6d7e3a1b22ebe8b996eb2bf50614e89384be54c48c6b63d"}, - {file = "cryptography-42.0.8-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e3ec3672626e1b9e55afd0df6d774ff0e953452886e06e0f1eb7eb0c832e8902"}, - {file = "cryptography-42.0.8-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e599b53fd95357d92304510fb7bda8523ed1f79ca98dce2f43c115950aa78801"}, - {file = "cryptography-42.0.8-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:5226d5d21ab681f432a9c1cf8b658c0cb02533eece706b155e5fbd8a0cdd3949"}, - {file = "cryptography-42.0.8-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:6b7c4f03ce01afd3b76cf69a5455caa9cfa3de8c8f493e0d3ab7d20611c8dae9"}, - {file = "cryptography-42.0.8-cp37-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:2346b911eb349ab547076f47f2e035fc8ff2c02380a7cbbf8d87114fa0f1c583"}, - {file = "cryptography-42.0.8-cp37-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:ad803773e9df0b92e0a817d22fd8a3675493f690b96130a5e24f1b8fabbea9c7"}, - {file = "cryptography-42.0.8-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:2f66d9cd9147ee495a8374a45ca445819f8929a3efcd2e3df6428e46c3cbb10b"}, - {file = "cryptography-42.0.8-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:d45b940883a03e19e944456a558b67a41160e367a719833c53de6911cabba2b7"}, - {file = "cryptography-42.0.8-cp37-abi3-win32.whl", hash = "sha256:a0c5b2b0585b6af82d7e385f55a8bc568abff8923af147ee3c07bd8b42cda8b2"}, - {file = "cryptography-42.0.8-cp37-abi3-win_amd64.whl", hash = "sha256:57080dee41209e556a9a4ce60d229244f7a66ef52750f813bfbe18959770cfba"}, - {file = "cryptography-42.0.8-cp39-abi3-macosx_10_12_universal2.whl", hash = "sha256:dea567d1b0e8bc5764b9443858b673b734100c2871dc93163f58c46a97a83d28"}, - {file = "cryptography-42.0.8-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c4783183f7cb757b73b2ae9aed6599b96338eb957233c58ca8f49a49cc32fd5e"}, - {file = "cryptography-42.0.8-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a0608251135d0e03111152e41f0cc2392d1e74e35703960d4190b2e0f4ca9c70"}, - {file = "cryptography-42.0.8-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:dc0fdf6787f37b1c6b08e6dfc892d9d068b5bdb671198c72072828b80bd5fe4c"}, - {file = "cryptography-42.0.8-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:9c0c1716c8447ee7dbf08d6db2e5c41c688544c61074b54fc4564196f55c25a7"}, - {file = "cryptography-42.0.8-cp39-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:fff12c88a672ab9c9c1cf7b0c80e3ad9e2ebd9d828d955c126be4fd3e5578c9e"}, - {file = "cryptography-42.0.8-cp39-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:cafb92b2bc622cd1aa6a1dce4b93307792633f4c5fe1f46c6b97cf67073ec961"}, - {file = "cryptography-42.0.8-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:31f721658a29331f895a5a54e7e82075554ccfb8b163a18719d342f5ffe5ecb1"}, - {file = "cryptography-42.0.8-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:b297f90c5723d04bcc8265fc2a0f86d4ea2e0f7ab4b6994459548d3a6b992a14"}, - {file = "cryptography-42.0.8-cp39-abi3-win32.whl", hash = "sha256:2f88d197e66c65be5e42cd72e5c18afbfae3f741742070e3019ac8f4ac57262c"}, - {file = "cryptography-42.0.8-cp39-abi3-win_amd64.whl", hash = "sha256:fa76fbb7596cc5839320000cdd5d0955313696d9511debab7ee7278fc8b5c84a"}, - {file = "cryptography-42.0.8-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:ba4f0a211697362e89ad822e667d8d340b4d8d55fae72cdd619389fb5912eefe"}, - {file = "cryptography-42.0.8-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:81884c4d096c272f00aeb1f11cf62ccd39763581645b0812e99a91505fa48e0c"}, - {file = "cryptography-42.0.8-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:c9bb2ae11bfbab395bdd072985abde58ea9860ed84e59dbc0463a5d0159f5b71"}, - {file = "cryptography-42.0.8-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:7016f837e15b0a1c119d27ecd89b3515f01f90a8615ed5e9427e30d9cdbfed3d"}, - {file = "cryptography-42.0.8-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:5a94eccb2a81a309806027e1670a358b99b8fe8bfe9f8d329f27d72c094dde8c"}, - {file = "cryptography-42.0.8-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:dec9b018df185f08483f294cae6ccac29e7a6e0678996587363dc352dc65c842"}, - {file = "cryptography-42.0.8-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:343728aac38decfdeecf55ecab3264b015be68fc2816ca800db649607aeee648"}, - {file = "cryptography-42.0.8-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:013629ae70b40af70c9a7a5db40abe5d9054e6f4380e50ce769947b73bf3caad"}, - {file = "cryptography-42.0.8.tar.gz", hash = "sha256:8d09d05439ce7baa8e9e95b07ec5b6c886f548deb7e0f69ef25f64b3bce842f2"}, + {file = "cryptography-43.0.0-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:64c3f16e2a4fc51c0d06af28441881f98c5d91009b8caaff40cf3548089e9c74"}, + {file = "cryptography-43.0.0-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3dcdedae5c7710b9f97ac6bba7e1052b95c7083c9d0e9df96e02a1932e777895"}, + {file = "cryptography-43.0.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3d9a1eca329405219b605fac09ecfc09ac09e595d6def650a437523fcd08dd22"}, + {file = "cryptography-43.0.0-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:ea9e57f8ea880eeea38ab5abf9fbe39f923544d7884228ec67d666abd60f5a47"}, + {file = "cryptography-43.0.0-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:9a8d6802e0825767476f62aafed40532bd435e8a5f7d23bd8b4f5fd04cc80ecf"}, + {file = "cryptography-43.0.0-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:cc70b4b581f28d0a254d006f26949245e3657d40d8857066c2ae22a61222ef55"}, + {file = "cryptography-43.0.0-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:4a997df8c1c2aae1e1e5ac49c2e4f610ad037fc5a3aadc7b64e39dea42249431"}, + {file = "cryptography-43.0.0-cp37-abi3-win32.whl", hash = "sha256:6e2b11c55d260d03a8cf29ac9b5e0608d35f08077d8c087be96287f43af3ccdc"}, + {file = "cryptography-43.0.0-cp37-abi3-win_amd64.whl", hash = "sha256:31e44a986ceccec3d0498e16f3d27b2ee5fdf69ce2ab89b52eaad1d2f33d8778"}, + {file = "cryptography-43.0.0-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:7b3f5fe74a5ca32d4d0f302ffe6680fcc5c28f8ef0dc0ae8f40c0f3a1b4fca66"}, + {file = "cryptography-43.0.0-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ac1955ce000cb29ab40def14fd1bbfa7af2017cca696ee696925615cafd0dce5"}, + {file = "cryptography-43.0.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:299d3da8e00b7e2b54bb02ef58d73cd5f55fb31f33ebbf33bd00d9aa6807df7e"}, + {file = "cryptography-43.0.0-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:ee0c405832ade84d4de74b9029bedb7b31200600fa524d218fc29bfa371e97f5"}, + {file = "cryptography-43.0.0-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:cb013933d4c127349b3948aa8aaf2f12c0353ad0eccd715ca789c8a0f671646f"}, + {file = "cryptography-43.0.0-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:fdcb265de28585de5b859ae13e3846a8e805268a823a12a4da2597f1f5afc9f0"}, + {file = "cryptography-43.0.0-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:2905ccf93a8a2a416f3ec01b1a7911c3fe4073ef35640e7ee5296754e30b762b"}, + {file = "cryptography-43.0.0-cp39-abi3-win32.whl", hash = "sha256:47ca71115e545954e6c1d207dd13461ab81f4eccfcb1345eac874828b5e3eaaf"}, + {file = "cryptography-43.0.0-cp39-abi3-win_amd64.whl", hash = "sha256:0663585d02f76929792470451a5ba64424acc3cd5227b03921dab0e2f27b1709"}, + {file = "cryptography-43.0.0-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:2c6d112bf61c5ef44042c253e4859b3cbbb50df2f78fa8fae6747a7814484a70"}, + {file = "cryptography-43.0.0-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:844b6d608374e7d08f4f6e6f9f7b951f9256db41421917dfb2d003dde4cd6b66"}, + {file = "cryptography-43.0.0-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:51956cf8730665e2bdf8ddb8da0056f699c1a5715648c1b0144670c1ba00b48f"}, + {file = "cryptography-43.0.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:aae4d918f6b180a8ab8bf6511a419473d107df4dbb4225c7b48c5c9602c38c7f"}, + {file = "cryptography-43.0.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:232ce02943a579095a339ac4b390fbbe97f5b5d5d107f8a08260ea2768be8cc2"}, + {file = "cryptography-43.0.0-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:5bcb8a5620008a8034d39bce21dc3e23735dfdb6a33a06974739bfa04f853947"}, + {file = "cryptography-43.0.0-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:08a24a7070b2b6804c1940ff0f910ff728932a9d0e80e7814234269f9d46d069"}, + {file = "cryptography-43.0.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:e9c5266c432a1e23738d178e51c2c7a5e2ddf790f248be939448c0ba2021f9d1"}, + {file = "cryptography-43.0.0.tar.gz", hash = "sha256:b88075ada2d51aa9f18283532c9f60e72170041bba88d7f37e49cbb10275299e"}, ] [package.dependencies] @@ -733,7 +728,7 @@ nox = ["nox"] pep8test = ["check-sdist", "click", "mypy", "ruff"] sdist = ["build"] ssh = ["bcrypt (>=3.1.5)"] -test = ["certifi", "pretend", "pytest (>=6.2.0)", "pytest-benchmark", "pytest-cov", "pytest-xdist"] +test = ["certifi", "cryptography-vectors (==43.0.0)", "pretend", "pytest (>=6.2.0)", "pytest-benchmark", "pytest-cov", "pytest-xdist"] test-randomorder = ["pytest-randomly"] [[package]] @@ -834,13 +829,13 @@ idna = ">=2.0.0" [[package]] name = "exceptiongroup" -version = "1.2.1" +version = "1.2.2" description = "Backport of PEP 654 (exception groups)" optional = false python-versions = ">=3.7" files = [ - {file = "exceptiongroup-1.2.1-py3-none-any.whl", hash = "sha256:5258b9ed329c5bbdd31a309f53cbfb0b155341807f6ff7606a1e801a891b29ad"}, - {file = "exceptiongroup-1.2.1.tar.gz", hash = "sha256:a4785e48b045528f5bfe627b6ad554ff32def154f42372786903b7abcfe1aa16"}, + {file = "exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b"}, + {file = "exceptiongroup-1.2.2.tar.gz", hash = "sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc"}, ] [package.extras] @@ -1102,13 +1097,13 @@ grpc = ["grpcio (>=1.38.0,<2.0dev)", "grpcio-status (>=1.38.0,<2.0.dev0)"] [[package]] name = "google-cloud-storage" -version = "2.17.0" +version = "2.18.0" description = "Google Cloud Storage API client library" optional = false python-versions = ">=3.7" files = [ - {file = "google-cloud-storage-2.17.0.tar.gz", hash = "sha256:49378abff54ef656b52dca5ef0f2eba9aa83dc2b2c72c78714b03a1a95fe9388"}, - {file = "google_cloud_storage-2.17.0-py2.py3-none-any.whl", hash = "sha256:5b393bc766b7a3bc6f5407b9e665b2450d36282614b7945e570b3480a456d1e1"}, + {file = "google_cloud_storage-2.18.0-py2.py3-none-any.whl", hash = "sha256:e8e1a9577952143c3fca8163005ecfadd2d70ec080fa158a8b305000e2c22fbb"}, + {file = "google_cloud_storage-2.18.0.tar.gz", hash = "sha256:0aa3f7c57f3632f81b455d91558d2b27ada96eee2de3aaa17f689db1470d9578"}, ] [package.dependencies] @@ -1120,7 +1115,8 @@ google-resumable-media = ">=2.6.0" requests = ">=2.18.0,<3.0.0dev" [package.extras] -protobuf = ["protobuf (<5.0.0dev)"] +protobuf = ["protobuf (<6.0.0dev)"] +tracing = ["opentelemetry-api (>=1.1.0)"] [[package]] name = "google-crc32c" @@ -1391,13 +1387,13 @@ files = [ [[package]] name = "importlib-metadata" -version = "8.0.0" +version = "8.1.0" description = "Read metadata from Python packages" optional = false python-versions = ">=3.8" files = [ - {file = "importlib_metadata-8.0.0-py3-none-any.whl", hash = "sha256:15584cf2b1bf449d98ff8a6ff1abef57bf20f3ac6454f431736cd3e660921b2f"}, - {file = "importlib_metadata-8.0.0.tar.gz", hash = "sha256:188bd24e4c346d3f0a933f275c2fec67050326a856b9a359881d7c2a697e8812"}, + {file = "importlib_metadata-8.1.0-py3-none-any.whl", hash = "sha256:3cd29f739ed65973840b068e3132135ce954c254d48b5b640484467ef7ab3c8c"}, + {file = "importlib_metadata-8.1.0.tar.gz", hash = "sha256:fcdcb1d5ead7bdf3dd32657bb94ebe9d2aabfe89a19782ddc32da5041d6ebfb4"}, ] [package.dependencies] @@ -1734,13 +1730,13 @@ dev = ["pre-commit", "tox"] [[package]] name = "prometheus-client" -version = "0.9.0" +version = "0.20.0" description = "Python client for the Prometheus monitoring system." optional = false -python-versions = "*" +python-versions = ">=3.8" files = [ - {file = "prometheus_client-0.9.0-py2.py3-none-any.whl", hash = "sha256:b08c34c328e1bf5961f0b4352668e6c8f145b4a087e09b7296ef62cbe4693d35"}, - {file = "prometheus_client-0.9.0.tar.gz", hash = "sha256:9da7b32f02439d8c04f7777021c304ed51d9ec180604700c1ba72a4d44dceb03"}, + {file = "prometheus_client-0.20.0-py3-none-any.whl", hash = "sha256:cde524a85bce83ca359cc837f28b8c0db5cac7aa653a588fd7e84ba061c329e7"}, + {file = "prometheus_client-0.20.0.tar.gz", hash = "sha256:287629d00b147a32dcb2be0b9df905da599b2d82f80377083ec8463309a4bb89"}, ] [package.extras] @@ -1818,13 +1814,13 @@ files = [ [[package]] name = "pyaml" -version = "24.4.0" +version = "24.7.0" description = "PyYAML-based module to produce a bit more pretty and readable YAML-serialized data" optional = false python-versions = ">=3.8" files = [ - {file = "pyaml-24.4.0-py3-none-any.whl", hash = "sha256:acc2b39c55cb0cbe4f694a6d3886f89ad3d2a5b3efcece526202f8de9a6b27de"}, - {file = "pyaml-24.4.0.tar.gz", hash = "sha256:0e483d9289010e747a325dc43171bcc39d6562dd1dd4719e8cc7e7c96c99fce6"}, + {file = "pyaml-24.7.0-py3-none-any.whl", hash = "sha256:6b06596cb5ac438a3fad1e1bf5775088c4d3afb927e2b03a29305d334835deb2"}, + {file = "pyaml-24.7.0.tar.gz", hash = "sha256:5d0fdf9e681036fb263a783d0298fc3af580a6e2a6cf1a3314ffc48dc3d91ccb"}, ] [package.dependencies] @@ -2127,6 +2123,7 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, @@ -2581,4 +2578,4 @@ test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools", [metadata] lock-version = "2.0" python-versions = ">=3.9,<4.0.0" -content-hash = "47af6b1308f26c35e92b7574c7f2c82746273d206f0d7b728de7158937cc5c1d" +content-hash = "d003418dcc0d68257a215186d21776941f8739bd3b4f898762a93e7895d7c89e" diff --git a/pyproject.toml b/pyproject.toml index e3b57ede2..143ca2940 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "fence" -version = "10.0.0" +version = "10.1.0" description = "Gen3 AuthN/AuthZ OIDC Service" authors = ["CTDS UChicago "] license = "Apache-2.0" @@ -38,7 +38,7 @@ markdown = "^3.1.1" markupsafe = "^2.0.1" paramiko = ">=2.6.0" -prometheus-client = "^0.9.0" +prometheus-client = "<1" psycopg2 = "^2.8.3" PyJWT = "^2.4.0" python_dateutil = "^2.6.1" diff --git a/tests/ci_commands_script.sh b/tests/ci_commands_script.sh index 5ab1d6c6c..fef98a668 100755 --- a/tests/ci_commands_script.sh +++ b/tests/ci_commands_script.sh @@ -1,3 +1,5 @@ #!/usr/bin/env bash +mkdir -p /var/tmp/uwsgi_flask_metrics/ || true +export PROMETHEUS_MULTIPROC_DIR="/var/tmp/uwsgi_flask_metrics/" poetry run pytest -vv --cov=fence --cov-report xml tests diff --git a/tests/conftest.py b/tests/conftest.py index f7b9d432d..273f4a496 100755 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -552,6 +552,13 @@ def drop_all(): return app.db +@pytest.fixture +def prometheus_metrics_before(client): + resp = client.get("/metrics") + assert resp.status_code == 200, "Could not get prometheus metrics initial state" + yield resp.text + + @fence.app.route("/protected") @fence.auth.login_required({"access"}) def protected_endpoint(methods=["GET"]): @@ -966,6 +973,7 @@ def do_patch(authz): "mocker": mocker, # only gs or s3 for location, ignore specifiers after the _ "indexed_file_location": protocol.split("_")[0], + "record": record, } return output @@ -1734,3 +1742,37 @@ def get_all_shib_idps_patcher(): yield mock get_all_shib_idps_patch.stop() + + +@pytest.fixture(scope="function") +def mock_authn_user_flask_context(app): + """ + Mock g and session to simulate a simple user who has authenticated. + + This is primarily to ensure that tests which mock the start of authN where sessions get set can still + test the callbacks (where metrics logging rely on session data). + """ + from flask import g + from flask import session + + g_before = copy.deepcopy(g) + session_before = copy.deepcopy(session) + + user_mock = MagicMock() + user_mock.id = 1 + + user_mocker = MagicMock() + user_mocker.return_value = user_mock + g.user = user_mocker + + session = MagicMock() + session.return_value = { + "fence_idp": "google", + "shib_idp": "shib_idp_foobar", + "client_id": "client_id_foobar", + } + + yield + + g = g_before + session = session_before diff --git a/tests/login/test_base.py b/tests/login/test_base.py index a9bfff7ec..a32452b2c 100644 --- a/tests/login/test_base.py +++ b/tests/login/test_base.py @@ -4,7 +4,7 @@ @patch("fence.blueprints.login.base.prepare_login_log") -def test_post_login_set_mfa(app, monkeypatch): +def test_post_login_set_mfa(app, monkeypatch, mock_authn_user_flask_context): """ Verifies the arborist is called with the mfa_policy if a given token contains the claims found in the configured multifactor_auth_claim_info @@ -37,7 +37,7 @@ def test_post_login_set_mfa(app, monkeypatch): @patch("fence.blueprints.login.base.prepare_login_log") -def test_post_login_no_mfa_enabled(app, monkeypatch): +def test_post_login_no_mfa_enabled(app, monkeypatch, mock_authn_user_flask_context): """ Verifies arborist is not called when there is no multifactor_auth_claim_info defined for the given IDP. """ diff --git a/tests/test_app_config.py b/tests/test_app_config.py index 3c3af9fd0..ec7b7b8b7 100755 --- a/tests/test_app_config.py +++ b/tests/test_app_config.py @@ -81,7 +81,6 @@ def test_app_config(): {"patch_name": "fence.app_sessions"}, {"patch_name": "fence.app_register_blueprints"}, {"patch_name": "fence.oidc.oidc_server.OIDCServer.init_app"}, - {"patch_name": "fence._setup_prometheus"}, { "patch_name": "fence.resources.storage.StorageManager.__init__", "return_value": None, diff --git a/tests/test_audit_service.py b/tests/test_metrics.py similarity index 71% rename from tests/test_audit_service.py rename to tests/test_metrics.py index cbf5c8cf2..be7d6b2ab 100644 --- a/tests/test_audit_service.py +++ b/tests/test_metrics.py @@ -1,9 +1,15 @@ """ +Tests for the metrics features (Audit Service and Prometheus) + Tests for the Audit Service integration: - test the creation of presigned URL audit logs - test the creation of login audit logs - test the SQS flow +In Audit Service tests where it makes sense, we also test that Prometheus +metrics are created as expected. The last section tests Prometheus metrics +independently. + Note 1: there is no test for the /oauth2 endpoint: the /oauth2 endpoint should redirect the user to the /login endpoint (tested in `test_redirect_oauth2_authorize`), and the login endpoint should @@ -16,7 +22,6 @@ tests looking at users are not affected. """ - import boto3 import flask import json @@ -27,11 +32,17 @@ from unittest.mock import ANY, MagicMock, patch import fence +from fence.metrics import metrics from fence.config import config +from fence.blueprints.data.indexd import get_bucket_from_urls +from fence.models import User from fence.resources.audit.utils import _clean_authorization_request_url from tests import utils from tests.conftest import LOGIN_IDPS +# `reset_prometheus_metrics` must be imported even if not used so the autorun fixture gets triggered +from tests.utils.metrics import assert_prometheus_metrics, reset_prometheus_metrics + def test_clean_authorization_request_url(): """ @@ -111,6 +122,7 @@ def __init__(self, data, status_code=200): @pytest.mark.parametrize("protocol", ["gs", None]) def test_presigned_url_log( endpoint, + prometheus_metrics_before, protocol, client, user_client, @@ -126,7 +138,7 @@ def test_presigned_url_log( """ Get a presigned URL from Fence and make sure a call to the Audit Service was made to create an audit log. Test with and without a requested - protocol. + protocol. Also check that a prometheus metric is created. """ mock_arborist_requests({"arborist/auth/request": {"POST": ({"auth": True}, 200)}}) audit_service_mocker = mock.patch( @@ -142,7 +154,7 @@ def test_presigned_url_log( else: path = f"/ga4gh/drs/v1/objects/{guid}/access/{protocol or 's3'}" resource_paths = ["/my/resource/path1", "/path2"] - indexd_client_with_arborist(resource_paths) + record = indexd_client_with_arborist(resource_paths)["record"] headers = { "Authorization": "Bearer " + jwt.encode( @@ -183,6 +195,39 @@ def test_presigned_url_log( }, ) + # check prometheus metrics + resp = client.get("/metrics") + assert resp.status_code == 200 + bucket = get_bucket_from_urls(record["urls"], expected_protocol) + size_in_kibibytes = record["size"] / 1024 + expected_metrics = [ + { + "name": "gen3_fence_presigned_url_total", + "labels": { + "action": "download", + "authz": resource_paths, + "bucket": bucket, + "drs": endpoint == "ga4gh-drs", + "protocol": expected_protocol, + "user_sub": user_client.user_id, + }, + "value": 1.0, + }, + { + "name": "gen3_fence_presigned_url_size", + "labels": { + "action": "download", + "authz": resource_paths, + "bucket": bucket, + "drs": endpoint == "ga4gh-drs", + "protocol": expected_protocol, + "user_sub": user_client.user_id, + }, + "value": size_in_kibibytes, + }, + ] + assert_prometheus_metrics(prometheus_metrics_before, resp.text, expected_metrics) + @pytest.mark.parametrize( "indexd_client_with_arborist", ["s3_and_gs_acl_no_authz"], indirect=True @@ -411,10 +456,11 @@ def test_login_log_login_endpoint( rsa_private_key, db_session, # do not remove :-) See note at top of file monkeypatch, + prometheus_metrics_before, ): """ Test that logging in via any of the existing IDPs triggers the creation - of a login audit log. + of a login audit log and of a prometheus metric. """ mock_arborist_requests() audit_service_mocker = mock.patch( @@ -493,13 +539,14 @@ def test_login_log_login_endpoint( path = f"/login/{idp}/{callback_endpoint}" # SEE fence/blueprints/login/fence_login.py L91 response = client.get(path, headers=headers) assert response.status_code == 200, response + user_sub = db_session.query(User).filter(User.username == username).first().id audit_service_requests.post.assert_called_once_with( "http://audit-service/log/login", json={ "request_url": path, "status_code": 200, "username": username, - "sub": ANY, + "sub": user_sub, "idp": idp_name, "fence_idp": None, "shib_idp": None, @@ -510,10 +557,27 @@ def test_login_log_login_endpoint( if get_auth_info_patch: get_auth_info_patch.stop() + # check prometheus metrics + resp = client.get("/metrics") + assert resp.status_code == 200 + expected_metrics = [ + { + "name": "gen3_fence_login_total", + "labels": {"idp": "all", "user_sub": user_sub}, + "value": 1.0, + }, + { + "name": "gen3_fence_login_total", + "labels": {"idp": idp_name, "user_sub": user_sub}, + "value": 1.0, + }, + ] + assert_prometheus_metrics(prometheus_metrics_before, resp.text, expected_metrics) + -########################## -# Push audit logs to SQS # -########################## +########################################## +# Audit Service - Push audit logs to SQS # +########################################## def mock_audit_service_sqs(app): @@ -638,3 +702,171 @@ def test_login_log_push_to_sqs( mocked_sqs.send_message.assert_called_once() get_auth_info_patch.stop() + + +###################### +# Prometheus metrics # +###################### + + +def test_disabled_prometheus_metrics(client, monkeypatch): + """ + When metrics gathering is not enabled, the metrics endpoint should not error, but it should + not return any data. + """ + monkeypatch.setitem(config, "ENABLE_PROMETHEUS_METRICS", False) + metrics.add_login_event( + user_sub="123", + idp="test_idp", + fence_idp="shib", + shib_idp="university", + client_id="test_azp", + ) + resp = client.get("/metrics") + assert resp.status_code == 200 + assert resp.text == "" + + +def test_record_prometheus_events(prometheus_metrics_before, client): + """ + Validate the returned value of the metrics endpoint before any event is logged, after an event + is logged, and after more events (one identical to the 1st one, and two different) are logged. + """ + # NOTE: To update later. The metrics utils don't support this yet. The gauges are not handled correctly. + # resp = client.get("/metrics") + # assert resp.status_code == 200 + # # no metrics have been recorded yet + # assert_prometheus_metrics(prometheus_metrics_before, resp.text, []) + + # record a login event and check that we get both a metric for the specific IDP, and an + # IDP-agnostic metric for the total number of login events. The latter should have no IDP + # information (no `fence_idp` or `shib_idp`). + metrics.add_login_event( + user_sub="123", + idp="test_idp", + fence_idp="shib", + shib_idp="university", + client_id="test_azp", + ) + resp = client.get("/metrics") + assert resp.status_code == 200 + expected_metrics = [ + { + "name": "gen3_fence_login_total", + "labels": { + "user_sub": "123", + "idp": "test_idp", + "fence_idp": "shib", + "shib_idp": "university", + "client_id": "test_azp", + }, + "value": 1.0, + }, + { + "name": "gen3_fence_login_total", + "labels": { + "user_sub": "123", + "idp": "all", + "fence_idp": "None", + "shib_idp": "None", + "client_id": "test_azp", + }, + "value": 1.0, + }, + ] + assert_prometheus_metrics(prometheus_metrics_before, resp.text, expected_metrics) + + # same login: should increase the existing counter by 1 + metrics.add_login_event( + user_sub="123", + idp="test_idp", + fence_idp="shib", + shib_idp="university", + client_id="test_azp", + ) + # login with different IDP labels: should create a new metric + metrics.add_login_event( + user_sub="123", + idp="another_idp", + fence_idp=None, + shib_idp=None, + client_id="test_azp", + ) + # new signed URL event: should create a new metric + metrics.add_signed_url_event( + action="upload", + protocol="s3", + acl=None, + authz=["/test/path"], + bucket="s3://test-bucket", + user_sub="123", + client_id="test_azp", + drs=True, + size_in_kibibytes=1.2, + ) + resp = client.get("/metrics") + assert resp.status_code == 200 + expected_metrics = [ + { + "name": "gen3_fence_login_total", + "labels": { + "user_sub": "123", + "idp": "all", + "fence_idp": "None", + "shib_idp": "None", + "client_id": "test_azp", + }, + "value": 3.0, # recorded login events since the beginning of the test + }, + { + "name": "gen3_fence_login_total", + "labels": { + "user_sub": "123", + "idp": "test_idp", + "fence_idp": "shib", + "shib_idp": "university", + "client_id": "test_azp", + }, + "value": 2.0, # recorded login events for this idp, fence_idp and shib_idp combo + }, + { + "name": "gen3_fence_login_total", + "labels": { + "user_sub": "123", + "idp": "another_idp", + "fence_idp": "None", + "shib_idp": "None", + "client_id": "test_azp", + }, + "value": 1.0, # recorded login events for the different idp + }, + { + "name": "gen3_fence_presigned_url_total", + "labels": { + "user_sub": "123", + "action": "upload", + "protocol": "s3", + "authz": ["/test/path"], + "bucket": "s3://test-bucket", + "user_sub": "123", + "client_id": "test_azp", + "drs": True, + }, + "value": 1.0, # recorded presigned URL events + }, + { + "name": "gen3_fence_presigned_url_size", + "labels": { + "user_sub": "123", + "action": "upload", + "protocol": "s3", + "authz": ["/test/path"], + "bucket": "s3://test-bucket", + "user_sub": "123", + "client_id": "test_azp", + "drs": True, + }, + "value": 1.2, # presigned URL gauge with the file size as value + }, + ] + assert_prometheus_metrics(prometheus_metrics_before, resp.text, expected_metrics) diff --git a/tests/utils/metrics.py b/tests/utils/metrics.py new file mode 100644 index 000000000..0443589a0 --- /dev/null +++ b/tests/utils/metrics.py @@ -0,0 +1,223 @@ +""" +At the time of writing, Prometheus metrics out of the box can't be reset between each +unit test. To be able to write independent unit tests, we have to manually save the "previous +state" (see `prometheus_metrics_before` fixture) and compare it to the new state. This involves +manually parsing the "previous state" (a python object) and the "current state" (raw text) into +the same format so they can be compared: +{ "name": "", "labels": {}, "value": 0 } + +The utility functions below can be used to check that the expected metrics have been recorded, +while discarding any previous metrics. + +https://stackoverflow.com/questions/73198616/how-do-i-reset-a-prometheus-python-client-python-runtime-between-pytest-test-fun +""" + + +import os +import shutil + +import pytest + + +@pytest.fixture(autouse=True, scope="session") +def reset_prometheus_metrics(): + """ + Delete the prometheus files after all the tests have run. + Without this, when running the tests locally, we would keep reading the metrics from + previous test runs. + So why not run this in-between the unit tests instead of the `assert_prometheus_metrics` + logic? Because it doesn't work, the prometheus client also keeps the state, and the mismatch + causes errors. This only works when the client is reset too (new process) + """ + yield + + folder = os.environ["PROMETHEUS_MULTIPROC_DIR"] + for filename in os.listdir(folder): + file_path = os.path.join(folder, filename) + try: + if os.path.isfile(file_path) or os.path.islink(file_path): + os.unlink(file_path) + elif os.path.isdir(file_path): + shutil.rmtree(file_path) + except Exception as e: + print(f"Failed to delete Prometheus metrics file '{file_path}': {e}") + + +def _diff_new_metrics_from_old_metrics(new_metrics, old_metrics): + """ + Return a list of "current metrics" by comparing the "new metrics" (current state) to the "old metrics" (previous state). + + Input metric format example: { + 'gen3_fence_login_total{client_id="test_azp",fence_idp="shib",idp="test_idp",shib_idp="university",user_sub="123"}': 2.0, + 'gen3_fence_login_total{client_id="test_azp",fence_idp="None",idp="all",shib_idp="None",user_sub="123"}': 3.0, + } + + Functionality example: + old_metrics = { 'counter1': 2, 'counter2': 2, 'gauge1': 1 } + new_metrics = { 'counter1': 1, 'counter3': 1 } + Returned value = [ + ('counter1', 1) (difference between 2 and 1), + ('counter3', 1) + ] (counter2 and gauge1 omitted since they are not part of the current state) + + Args: + new_metrics (dict): format { : } + old_metrics (dict): format { : } + } + + Return: + list> + """ + + def metric_is_gauge(metric_name): + return not metric_name.endswith("_total") and not metric_name.endswith( + "_created" + ) + + diff = [] + for long_metric_name, old_value in old_metrics.items(): + # long_metric_name = metric name + labels (see example in docstring) + metric_name = long_metric_name.split("{")[0] + if long_metric_name not in new_metrics or metric_is_gauge(metric_name): + # ignore all old metrics that are not also present in the new metrics + continue + # the metric value generated by the current test is the difference between the previous + # value and the current value + val = new_metrics[long_metric_name] - old_value + if val != 0: + diff.append((long_metric_name, val)) + for long_metric_name, new_value in new_metrics.items(): + metric_name = long_metric_name.split("{")[0] + if metric_is_gauge(metric_name): # all gauge metrics must be listed + diff.append((long_metric_name, new_value)) + elif long_metric_name not in old_metrics: + diff.append((long_metric_name, new_value)) + return diff + + +def _parse_raw_metrics_to_dict(text_metric): + """ + Parse raw text metrics into a dictionary of metric (metric name + labels) to value, + ignoring lines that are not metrics. + + Args: + text_metric (str) + Example: + # TYPE gen3_fence_login_total counter + gen3_fence_login_total{idp="test_idp",shib_idp="university",user_sub="123"} 2.0 + # HELP gen3_fence_presigned_url_total Fence presigned urls + # TYPE gen3_fence_presigned_url_total counter + gen3_fence_presigned_url_total{client_id="test_azp",drs="True",user_sub="123"} 1.0 + + Return: + dict + Example: + { + "gen3_fence_login_total{idp="test_idp",shib_idp="university",user_sub="123"}": 2.0, + "gen3_fence_presigned_url_total{client_id="test_azp",drs="True",user_sub="123"}": 1.0, + } + """ + if not text_metric: + return {} + return { + " ".join(m.split(" ")[:-1]): float(m.split(" ")[-1]) + for m in text_metric.strip().split("\n") + if not m.startswith("#") + } + + +def _parse_raw_name_to_labels(text_metric_name): + """ + Parse a raw metric name into a name and a dict of labels. + + Example: + text_metric_name = `metric_name{param1="None",param2="upload",param3="['/test/path']"` + Returned value = { + "name": "metric_name", + "labels": { "param1": "None", "param2": "upload", "param3": "['/test/path']" } + } + + Args: + text_metric (str) + + Returns: + dict + """ + name = text_metric_name.split("{")[0] + labels = text_metric_name.split("{")[1].split("}")[0].split('",') + labels = {l.split("=")[0]: l.split("=")[1].strip('"') for l in labels} + return {"name": name, "labels": labels} + + +def assert_prometheus_metrics( + previous_text_metrics, current_text_metrics, expected_metrics +): + """ + Compare the previous state and the current state of prometheus metrics, and checks if the difference between the 2 is the same as the new metrics a test expects to have recorded. + + Expected: only provide labels we need to check for, the rest will be ignored + + Args: + previous_text_metrics (str): previous state of prometheus metrics + current_text_metrics (str): current state + Example `previous_text_metrics` or `current_text_metrics`: + # TYPE gen3_fence_login_total counter + gen3_fence_login_total{idp="test_idp",shib_idp="university",user_sub="123"} 2.0 + # HELP gen3_fence_presigned_url_total Fence presigned urls + # TYPE gen3_fence_presigned_url_total counter + gen3_fence_presigned_url_total{acl="None",action="upload",authz="['/test/path']",bucket="s3://test-bucket",client_id="test_azp",drs="True",protocol="s3",user_sub="123"} 1.0 + expected_metrics (list): the expected difference between previous state and current state. + Only provide the labels we need to check; omitted labels will be ignored even if they + are present in the current state. + Example: [ + { + 'name': 'gen3_fence_login_total', + 'labels': { + 'idp': 'test_idp', 'shib_idp': 'university', 'user_sub': '123' + }, + 'value': 2.0 + } + ] + """ + old_metrics = _parse_raw_metrics_to_dict(previous_text_metrics) + print("Old metrics:") + for k, v in old_metrics.items(): + print(f"- {k} = {v}") + + new_metrics = _parse_raw_metrics_to_dict(current_text_metrics) + print("Received metrics:") + for k, v in new_metrics.items(): + print(f"- {k} = {v}") + + diff_metrics = _diff_new_metrics_from_old_metrics(new_metrics, old_metrics) + current_metrics = [] + print("Diff:") + for (metric_name, val) in diff_metrics: + parsed_m = _parse_raw_name_to_labels(metric_name) + parsed_m["value"] = val + current_metrics.append(parsed_m) + print(f"- {parsed_m}") + + print("Expecting metrics:") + # check that for each metric+label combination, the value is identical to the expected value + for expected_m in expected_metrics: + found = False + print(f"- {expected_m}") + for current_m in current_metrics: # look for the right metric + if current_m["name"] != expected_m["name"]: + continue + # if the metric name is identical, check the labels + right_labels = True + for label_k, label_v in expected_m["labels"].items(): + if current_m["labels"].get(label_k) != str(label_v): + right_labels = False + break + # if both the name and the labels are identical, this is the right metric: + # check that the value is the same as expected + if right_labels: + assert ( + current_m["value"] == expected_m["value"] + ), f"Missing metric: {expected_m}" + found = True + break # we found the right metric and it has the right value: moving on + assert found, f"Missing metric: {expected_m}"