From 7b0aa6068f48f4b5b2c49181b8667c518a90c0b8 Mon Sep 17 00:00:00 2001
From: emalinowski <emalinowski@uchicago.edu>
Date: Fri, 26 Jul 2024 09:37:35 -0500
Subject: [PATCH] Prometheus metrics support for logins and presigned URLs
 (#1156)

---
 .secrets.baseline                             |   6 +-
 clear_prometheus_multiproc                    |   6 +-
 fence/__init__.py                             |  64 ++---
 fence/blueprints/data/indexd.py               |  52 +++-
 fence/blueprints/ga4gh.py                     |   2 +
 fence/blueprints/login/base.py                |   9 +
 fence/blueprints/login/google.py              |   1 +
 fence/config-default.yaml                     |   1 +
 fence/metrics.py                              | 199 ++++++++++++++
 openapis/swagger.yaml                         |  12 +
 poetry.lock                                   | 129 +++++----
 pyproject.toml                                |   4 +-
 tests/ci_commands_script.sh                   |   2 +
 tests/conftest.py                             |  42 +++
 tests/login/test_base.py                      |   4 +-
 tests/test_app_config.py                      |   1 -
 ...{test_audit_service.py => test_metrics.py} | 248 +++++++++++++++++-
 tests/utils/metrics.py                        | 223 ++++++++++++++++
 18 files changed, 864 insertions(+), 141 deletions(-)
 create mode 100644 fence/metrics.py
 rename tests/{test_audit_service.py => test_metrics.py} (71%)
 create mode 100644 tests/utils/metrics.py

diff --git a/.secrets.baseline b/.secrets.baseline
index 8c97f29fb..9aa531c19 100644
--- a/.secrets.baseline
+++ b/.secrets.baseline
@@ -268,14 +268,14 @@
         "filename": "tests/conftest.py",
         "hashed_secret": "1348b145fa1a555461c1b790a2f66614781091e9",
         "is_verified": false,
-        "line_number": 1561
+        "line_number": 1569
       },
       {
         "type": "Base64 High Entropy String",
         "filename": "tests/conftest.py",
         "hashed_secret": "227dea087477346785aefd575f91dd13ab86c108",
         "is_verified": false,
-        "line_number": 1583
+        "line_number": 1593
       }
     ],
     "tests/credentials/google/test_credentials.py": [
@@ -422,5 +422,5 @@
       }
     ]
   },
-  "generated_at": "2024-03-16T00:09:27Z"
+  "generated_at": "2024-07-25T17:19:58Z"
 }
diff --git a/clear_prometheus_multiproc b/clear_prometheus_multiproc
index 4bb2b425f..af1ba6d18 100755
--- a/clear_prometheus_multiproc
+++ b/clear_prometheus_multiproc
@@ -4,6 +4,8 @@
 set -ex
 
 rm -Rf $1
-mkdir $1
+mkdir -p $1
 chmod 755 $1
-chown 100:101 $1
+if id -u nginx &>/dev/null; then
+    chown $(id -u nginx):$(id -g nginx) $1
+fi
diff --git a/fence/__init__.py b/fence/__init__.py
index fc0fe97cf..31cb76eda 100755
--- a/fence/__init__.py
+++ b/fence/__init__.py
@@ -1,21 +1,17 @@
 from collections import OrderedDict
 import os
-import tempfile
 from urllib.parse import urljoin
-import flask
-from flask_cors import CORS
-from sqlalchemy.orm import scoped_session
-from flask import current_app
-from werkzeug.local import LocalProxy
 
 from authutils.oauth2.client import OAuthClient
-from cdislogging import get_logger
-from gen3authz.client.arborist.client import ArboristClient
-from flask_wtf.csrf import validate_csrf
-from werkzeug.middleware.dispatcher import DispatcherMiddleware
 from azure.storage.blob import BlobServiceClient
 from azure.core.exceptions import ResourceNotFoundError
-from urllib.parse import urlparse
+from cdislogging import get_logger
+import flask
+from flask_cors import CORS
+from flask_wtf.csrf import validate_csrf
+from gen3authz.client.arborist.client import ArboristClient
+from sqlalchemy.orm import scoped_session
+
 
 # Can't read config yet. Just set to debug for now, else no handlers.
 # Later, in app_config(), will actually set level based on config
@@ -31,6 +27,7 @@
 )
 
 from fence.auth import logout, build_redirect_url
+from fence.metrics import metrics
 from fence.blueprints.data.indexd import S3IndexedFileLocation
 from fence.blueprints.login.utils import allowed_login_redirects, domain
 from fence.errors import UserError
@@ -67,11 +64,6 @@
 import fence.blueprints.ga4gh
 
 
-# for some reason the temp dir does not get created properly if we move
-# this statement to `_setup_prometheus()`
-PROMETHEUS_TMP_COUNTER_DIR = tempfile.TemporaryDirectory()
-
-
 app = flask.Flask(__name__)
 CORS(app=app, headers=["content-type", "accept"], expose_headers="*")
 
@@ -102,6 +94,9 @@ def app_init(
     app_sessions(app)
     app_register_blueprints(app)
     server.init_app(app, query_client=query_client)
+    logger.info(
+        f"Prometheus metrics are{'' if config['ENABLE_PROMETHEUS_METRICS'] else ' NOT'} enabled."
+    )
 
 
 def app_sessions(app):
@@ -206,6 +201,15 @@ def public_keys():
             {"keys": [(keypair.kid, keypair.public_key) for keypair in app.keypairs]}
         )
 
+    @app.route("/metrics")
+    def metrics_endpoint():
+        """
+        /!\ There is no authz control on this endpoint!
+        In cloud-automation setups, access to this endpoint is blocked at the revproxy level.
+        """
+        data, content_type = metrics.get_latest_metrics()
+        return flask.Response(data, content_type=content_type)
+
 
 def _check_azure_storage(app):
     """
@@ -365,13 +369,6 @@ def app_config(
     _setup_data_endpoint_and_boto(app)
     _load_keys(app, root_dir)
 
-    app.prometheus_counters = {}
-    if config["ENABLE_PROMETHEUS_METRICS"]:
-        logger.info("Enabling Prometheus metrics...")
-        _setup_prometheus(app)
-    else:
-        logger.info("Prometheus metrics are NOT enabled.")
-
     app.storage_manager = StorageManager(config["STORAGE_CREDENTIALS"], logger=logger)
 
     app.debug = config["DEBUG"]
@@ -495,27 +492,6 @@ def _setup_audit_service_client(app):
     )
 
 
-def _setup_prometheus(app):
-    # This environment variable MUST be declared before importing the
-    # prometheus modules (or unit tests fail)
-    # More details on this awkwardness: https://github.com/prometheus/client_python/issues/250
-    os.environ["prometheus_multiproc_dir"] = PROMETHEUS_TMP_COUNTER_DIR.name
-
-    from prometheus_client import (
-        CollectorRegistry,
-        multiprocess,
-        make_wsgi_app,
-    )
-
-    app.prometheus_registry = CollectorRegistry()
-    multiprocess.MultiProcessCollector(app.prometheus_registry)
-
-    # Add prometheus wsgi middleware to route /metrics requests
-    app.wsgi_app = DispatcherMiddleware(
-        app.wsgi_app, {"/metrics": make_wsgi_app(registry=app.prometheus_registry)}
-    )
-
-
 @app.errorhandler(Exception)
 def handle_error(error):
     """
diff --git a/fence/blueprints/data/indexd.py b/fence/blueprints/data/indexd.py
index f7b9488f6..380fcd43e 100755
--- a/fence/blueprints/data/indexd.py
+++ b/fence/blueprints/data/indexd.py
@@ -49,6 +49,7 @@
 from fence.resources.ga4gh.passports import sync_gen3_users_authz_from_ga4gh_passports
 from fence.resources.audit.utils import enable_audit_logging
 from fence.utils import get_valid_expiration_from_request
+from fence.metrics import metrics
 
 from . import multipart_upload
 from ...models import AssumeRoleCacheAWS, query_for_user, query_for_user_by_id
@@ -77,6 +78,7 @@ def get_signed_url_for_file(
     ga4gh_passports=None,
     db_session=None,
     bucket=None,
+    drs="False",
 ):
     requested_protocol = requested_protocol or flask.request.args.get("protocol", None)
     r_pays_project = flask.request.args.get("userProject", None)
@@ -164,12 +166,33 @@ def get_signed_url_for_file(
         user_sub=flask.g.audit_data.get("sub", ""),
         client_id=_get_client_id(),
         requested_protocol=requested_protocol,
+        action=action,
+        drs=drs,
     )
 
     return {"url": signed_url}
 
 
-def _log_signed_url_data_info(indexed_file, user_sub, client_id, requested_protocol):
+def get_bucket_from_urls(urls, protocol):
+    """
+    Return the bucket name from the first of the provided URLs that starts with the given protocol (usually `gs`, `s3`, `az`...)
+    """
+    bucket = ""
+    for url in urls:
+        if "://" in url:
+            # Extract the protocol and the rest of the URL
+            bucket_protocol, rest_of_url = url.split("://", 1)
+
+            if bucket_protocol == protocol:
+                # Extract bucket name
+                bucket = f"{bucket_protocol}://{rest_of_url.split('/')[0]}"
+                break
+    return bucket
+
+
+def _log_signed_url_data_info(
+    indexed_file, user_sub, client_id, requested_protocol, action, drs="False"
+):
     size_in_kibibytes = (indexed_file.index_document.get("size") or 0) / 1024
     acl = indexed_file.index_document.get("acl")
     authz = indexed_file.index_document.get("authz")
@@ -180,23 +203,25 @@ def _log_signed_url_data_info(indexed_file, user_sub, client_id, requested_proto
         protocol = indexed_file.indexed_file_locations[0].protocol
 
     # figure out which bucket was used based on the protocol
-    bucket = ""
-    for url in indexed_file.index_document.get("urls", []):
-        bucket_name = None
-        if "://" in url:
-            # Extract the protocol and the rest of the URL
-            bucket_protocol, rest_of_url = url.split("://", 1)
-
-            if bucket_protocol == protocol:
-                # Extract bucket name
-                bucket = f"{bucket_protocol}://{rest_of_url.split('/')[0]}"
-                break
+    bucket = get_bucket_from_urls(indexed_file.index_document.get("urls", []), protocol)
 
     logger.info(
-        f"Signed URL Generated. size_in_kibibytes={size_in_kibibytes} "
+        f"Signed URL Generated. action={action} size_in_kibibytes={size_in_kibibytes} "
         f"acl={acl} authz={authz} bucket={bucket} user_sub={user_sub} client_id={client_id}"
     )
 
+    metrics.add_signed_url_event(
+        action,
+        protocol,
+        acl,
+        authz,
+        bucket,
+        user_sub,
+        client_id,
+        drs,
+        size_in_kibibytes,
+    )
+
 
 def _get_client_id():
     client_id = "Unknown Client"
@@ -208,6 +233,7 @@ def _get_client_id():
 
     return client_id
 
+
 def prepare_presigned_url_audit_log(protocol, indexed_file):
     """
     Store in `flask.g.audit_data` the data needed to record an audit log.
diff --git a/fence/blueprints/ga4gh.py b/fence/blueprints/ga4gh.py
index 7b4ef0603..890da83f6 100644
--- a/fence/blueprints/ga4gh.py
+++ b/fence/blueprints/ga4gh.py
@@ -41,5 +41,7 @@ def get_ga4gh_signed_url(object_id, access_id):
         object_id,
         requested_protocol=access_id,
         ga4gh_passports=ga4gh_passports,
+        drs="True",
     )
+
     return flask.jsonify(result)
diff --git a/fence/blueprints/login/base.py b/fence/blueprints/login/base.py
index 0b6ae3f95..08fcab61d 100644
--- a/fence/blueprints/login/base.py
+++ b/fence/blueprints/login/base.py
@@ -7,6 +7,7 @@
 from fence.blueprints.login.redirect import validate_redirect
 from fence.config import config
 from fence.errors import UserError
+from fence.metrics import metrics
 
 logger = get_logger(__name__)
 
@@ -133,6 +134,14 @@ def get(self):
 
     def post_login(self, user=None, token_result=None, **kwargs):
         prepare_login_log(self.idp_name)
+        metrics.add_login_event(
+            user_sub=flask.g.user.id,
+            idp=self.idp_name,
+            fence_idp=flask.session.get("fence_idp"),
+            shib_idp=flask.session.get("shib_idp"),
+            client_id=flask.session.get("client_id"),
+        )
+
         if token_result:
             username = token_result.get(self.username_field)
             if self.is_mfa_enabled:
diff --git a/fence/blueprints/login/google.py b/fence/blueprints/login/google.py
index 0fa3e4cb5..2c7570795 100644
--- a/fence/blueprints/login/google.py
+++ b/fence/blueprints/login/google.py
@@ -25,4 +25,5 @@ def get(self):
                 config.get("BASE_URL", "")
                 + "/link/google/callback?code={}".format(flask.request.args.get("code"))
             )
+
         return super(GoogleCallback, self).get()
diff --git a/fence/config-default.yaml b/fence/config-default.yaml
index a73ebb976..5e43e21dc 100755
--- a/fence/config-default.yaml
+++ b/fence/config-default.yaml
@@ -62,6 +62,7 @@ MOCK_STORAGE: true
 # WARNING: ONLY set to true when fence will be deployed in such a way that it will
 #          ONLY receive traffic from internal clients and can safely use HTTP.
 AUTHLIB_INSECURE_TRANSPORT: true
+
 # enable Prometheus Metrics for observability purposes
 #
 # WARNING: Any counters, gauges, histograms, etc. should be carefully
diff --git a/fence/metrics.py b/fence/metrics.py
new file mode 100644
index 000000000..acdb200a9
--- /dev/null
+++ b/fence/metrics.py
@@ -0,0 +1,199 @@
+"""
+Metrics are collected by the Prometheus client and exposed at the `/metrics` endpoint.
+
+To add a new metric:
+- Add a new method to the `Metrics` class below (see `add_login_event` and `add_signed_url_event`
+for example).
+- The new method should call the `_increment_counter` and/or `_set_gauge` methods with the
+appropriate metric name and labels.
+- Call the new method from the code where relevant, for example:
+    from fence.metric import metrics
+    metrics.add_login_event(...)
+- Add unit tests to the `tests/test_metrics` file.
+"""
+
+
+import os
+import pathlib
+
+from cdislogging import get_logger
+from prometheus_client import (
+    CollectorRegistry,
+    multiprocess,
+    Counter,
+    Gauge,
+    generate_latest,
+    CONTENT_TYPE_LATEST,
+)
+
+from fence.config import config
+
+
+logger = get_logger(__name__)
+
+
+class Metrics:
+    """
+    Class to handle Prometheus metrics
+    Attributes:
+        registry (CollectorRegistry): Prometheus registry
+        metrics (dict): Dictionary to store Prometheus metrics
+    """
+
+    def __init__(self, prometheus_dir="/var/tmp/uwsgi_flask_metrics"):
+        pathlib.Path(prometheus_dir).mkdir(parents=True, exist_ok=True)
+        os.environ["PROMETHEUS_MULTIPROC_DIR"] = prometheus_dir
+
+        self._registry = CollectorRegistry()
+        multiprocess.MultiProcessCollector(self._registry)
+        self._metrics = {}
+
+        # set the descriptions of new metrics here. Descriptions not specified here
+        # will default to the metric name.
+        self._counter_descriptions = {
+            "gen3_fence_presigned_url": "Fence presigned urls",
+            "gen3_fence_login": "Fence logins",
+        }
+        self._gauge_descriptions = {
+            "gen3_fence_presigned_url_size": "Fence presigned urls",
+        }
+
+    def get_latest_metrics(self):
+        """
+        Generate the latest Prometheus metrics
+        Returns:
+            str: Latest Prometheus metrics
+            str: Content type of the latest Prometheus metrics
+        """
+        # When metrics gathering is not enabled, the metrics endpoint should not error, but it should
+        # not return any data.
+        if not config["ENABLE_PROMETHEUS_METRICS"]:
+            return "", None
+
+        return generate_latest(self._registry), CONTENT_TYPE_LATEST
+
+    def _increment_counter(self, name, labels):
+        """
+        Increment a Prometheus counter metric.
+        Note that this function should not be called directly - implement a function like
+        `add_login_event` instead. A metric's labels should always be consistent.
+        Args:
+            name (str): Name of the metric
+            labels (dict): Dictionary of labels for the metric
+        """
+        # create the counter if it doesn't already exist
+        if name not in self._metrics:
+            description = self._counter_descriptions.get(name, name)
+            logger.info(
+                f"Creating counter '{name}' with description '{description}' and labels: {labels}"
+            )
+            self._metrics[name] = Counter(name, description, [*labels.keys()])
+        elif type(self._metrics[name]) != Counter:
+            raise ValueError(
+                f"Trying to create counter '{name}' but a {type(self._metrics[name])} with this name already exists"
+            )
+
+        logger.debug(f"Incrementing counter '{name}' with labels: {labels}")
+        self._metrics[name].labels(*labels.values()).inc()
+
+    def _set_gauge(self, name, labels, value):
+        """
+        Set a Prometheus gauge metric.
+        Note that this function should not be called directly - implement a function like
+        `add_signed_url_event` instead. A metric's labels should always be consistent.
+        Args:
+            name (str): Name of the metric
+            labels (dict): Dictionary of labels for the metric
+            value (int): Value to set the metric to
+        """
+        # create the gauge if it doesn't already exist
+        if name not in self._metrics:
+            description = self._gauge_descriptions.get(name, name)
+            logger.info(
+                f"Creating gauge '{name}' with description '{description}' and labels: {labels}"
+            )
+            self._metrics[name] = Gauge(name, description, [*labels.keys()])
+        elif type(self._metrics[name]) != Gauge:
+            raise ValueError(
+                f"Trying to create gauge '{name}' but a {type(self._metrics[name])} with this name already exists"
+            )
+
+        logger.debug(f"Setting gauge '{name}' with labels: {labels}")
+        self._metrics[name].labels(*labels.values()).set(value)
+
+    def add_login_event(self, user_sub, idp, fence_idp, shib_idp, client_id):
+        """
+        Record a login event
+        """
+        if not config["ENABLE_PROMETHEUS_METRICS"]:
+            return
+        self._increment_counter(
+            "gen3_fence_login",
+            {
+                "user_sub": user_sub,
+                "idp": idp,
+                "client_id": client_id,
+                "fence_idp": fence_idp,
+                "shib_idp": shib_idp,
+            },
+        )
+        self._increment_counter(
+            "gen3_fence_login",
+            {
+                "user_sub": user_sub,
+                "idp": "all",
+                "client_id": client_id,
+                # when counting all IDPs, we don't care about the fence and shib IDP values
+                "fence_idp": None,
+                "shib_idp": None,
+            },
+        )
+
+    def add_signed_url_event(
+        self,
+        action,
+        protocol,
+        acl,
+        authz,
+        bucket,
+        user_sub,
+        client_id,
+        drs,
+        size_in_kibibytes,
+    ):
+        """
+        Record a signed URL event
+        """
+        if not config["ENABLE_PROMETHEUS_METRICS"]:
+            return
+        self._increment_counter(
+            "gen3_fence_presigned_url",
+            {
+                "action": action,
+                "protocol": protocol,
+                "acl": acl,
+                "authz": authz,
+                "bucket": bucket,
+                "user_sub": user_sub,
+                "client_id": client_id,
+                "drs": drs,
+            },
+        )
+        self._set_gauge(
+            "gen3_fence_presigned_url_size",
+            {
+                "action": action,
+                "protocol": protocol,
+                "acl": acl,
+                "authz": authz,
+                "bucket": bucket,
+                "user_sub": user_sub,
+                "client_id": client_id,
+                "drs": drs,
+            },
+            size_in_kibibytes,
+        )
+
+
+# Initialize the Metrics instance
+metrics = Metrics()
diff --git a/openapis/swagger.yaml b/openapis/swagger.yaml
index 70d1f8e1a..0336fec54 100644
--- a/openapis/swagger.yaml
+++ b/openapis/swagger.yaml
@@ -68,6 +68,18 @@ paths:
           description: successful operation
           schema:
             $ref: '#/definitions/SystemVersionOutputRef'
+  /metrics:
+    get:
+      tags:
+        - system
+      summary: Get Prometheus metrics
+      description: >-
+        Returns Prometheus metrics if the `ENABLE_PROMETHEUS_METRICS` setting is `True`.
+        By default, this endpoint is public. Authorization controls can be setup externally;
+        in cloud-automation setups, access to this endpoint is blocked at the revproxy level.
+      responses:
+        '200':
+          description: successful operation
   /oauth2/authorize:
     get:
       tags:
diff --git a/poetry.lock b/poetry.lock
index fb705cd5c..b3ea2a211 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.7.0 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
 
 [[package]]
 name = "addict"
@@ -170,13 +170,13 @@ aio = ["aiohttp (>=3.0)"]
 
 [[package]]
 name = "azure-storage-blob"
-version = "12.20.0"
+version = "12.21.0"
 description = "Microsoft Azure Blob Storage Client Library for Python"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "azure-storage-blob-12.20.0.tar.gz", hash = "sha256:eeb91256e41d4b5b9bad6a87fd0a8ade07dd58aa52344e2c8d2746e27a017d3b"},
-    {file = "azure_storage_blob-12.20.0-py3-none-any.whl", hash = "sha256:de6b3bf3a90e9341a6bcb96a2ebe981dffff993e9045818f6549afea827a52a9"},
+    {file = "azure-storage-blob-12.21.0.tar.gz", hash = "sha256:b9722725072f5b7373c0f4dd6d78fbae2bb37bffc5c3e01731ab8c750ee8dd7e"},
+    {file = "azure_storage_blob-12.21.0-py3-none-any.whl", hash = "sha256:f9ede187dd5a0ef296b583a7c1861c6938ddd6708d6e70f4203a163c2ab42d43"},
 ]
 
 [package.dependencies]
@@ -250,17 +250,17 @@ files = [
 
 [[package]]
 name = "boto3"
-version = "1.34.143"
+version = "1.34.147"
 description = "The AWS SDK for Python"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "boto3-1.34.143-py3-none-any.whl", hash = "sha256:0d16832f23e6bd3ae94e35ea8e625529850bfad9baccd426de96ad8f445d8e03"},
-    {file = "boto3-1.34.143.tar.gz", hash = "sha256:b590ce80c65149194def43ebf0ea1cf0533945502507837389a8d22e3ecbcf05"},
+    {file = "boto3-1.34.147-py3-none-any.whl", hash = "sha256:e1cef9a1a301866bcdee32ae0c699465eb2345f9a8e613a5835821430165ff6d"},
+    {file = "boto3-1.34.147.tar.gz", hash = "sha256:9ec1c6ab22588242a47549f51a63dfc7c21fdf95a94820fc6e629ab060c38bd9"},
 ]
 
 [package.dependencies]
-botocore = ">=1.34.143,<1.35.0"
+botocore = ">=1.34.147,<1.35.0"
 jmespath = ">=0.7.1,<2.0.0"
 s3transfer = ">=0.10.0,<0.11.0"
 
@@ -269,13 +269,13 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"]
 
 [[package]]
 name = "botocore"
-version = "1.34.143"
+version = "1.34.147"
 description = "Low-level, data-driven core of boto 3."
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "botocore-1.34.143-py3-none-any.whl", hash = "sha256:094aea179e8aaa1bc957ad49cc27d93b189dd3a1f3075d8b0ca7c445a2a88430"},
-    {file = "botocore-1.34.143.tar.gz", hash = "sha256:059f032ec05733a836e04e869c5a15534420102f93116f3bc9a5b759b0651caf"},
+    {file = "botocore-1.34.147-py3-none-any.whl", hash = "sha256:be94a2f4874b1d1705cae2bd512c475047497379651678593acb6c61c50d91de"},
+    {file = "botocore-1.34.147.tar.gz", hash = "sha256:2e8f000b77e4ca345146cb2edab6403769a517b564f627bb084ab335417f3dbe"},
 ]
 
 [package.dependencies]
@@ -313,13 +313,13 @@ files = [
 
 [[package]]
 name = "cachetools"
-version = "5.3.3"
+version = "5.4.0"
 description = "Extensible memoizing collections and decorators"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "cachetools-5.3.3-py3-none-any.whl", hash = "sha256:0abad1021d3f8325b2fc1d2e9c8b9c9d57b04c3932657a72465447332c24d945"},
-    {file = "cachetools-5.3.3.tar.gz", hash = "sha256:ba29e2dfa0b8b556606f097407ed1aa62080ee108ab0dc5ec9d6a723a007d105"},
+    {file = "cachetools-5.4.0-py3-none-any.whl", hash = "sha256:3ae3b49a3d5e28a77a0be2b37dbcb89005058959cb2323858c2657c4a8cab474"},
+    {file = "cachetools-5.4.0.tar.gz", hash = "sha256:b8adc2e7c07f105ced7bc56dbb6dfbe7c4a00acce20e2227b3f355be89bc6827"},
 ]
 
 [[package]]
@@ -684,43 +684,38 @@ yaml = ["PyYAML (>=3.10)"]
 
 [[package]]
 name = "cryptography"
-version = "42.0.8"
+version = "43.0.0"
 description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers."
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "cryptography-42.0.8-cp37-abi3-macosx_10_12_universal2.whl", hash = "sha256:81d8a521705787afe7a18d5bfb47ea9d9cc068206270aad0b96a725022e18d2e"},
-    {file = "cryptography-42.0.8-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:961e61cefdcb06e0c6d7e3a1b22ebe8b996eb2bf50614e89384be54c48c6b63d"},
-    {file = "cryptography-42.0.8-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e3ec3672626e1b9e55afd0df6d774ff0e953452886e06e0f1eb7eb0c832e8902"},
-    {file = "cryptography-42.0.8-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e599b53fd95357d92304510fb7bda8523ed1f79ca98dce2f43c115950aa78801"},
-    {file = "cryptography-42.0.8-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:5226d5d21ab681f432a9c1cf8b658c0cb02533eece706b155e5fbd8a0cdd3949"},
-    {file = "cryptography-42.0.8-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:6b7c4f03ce01afd3b76cf69a5455caa9cfa3de8c8f493e0d3ab7d20611c8dae9"},
-    {file = "cryptography-42.0.8-cp37-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:2346b911eb349ab547076f47f2e035fc8ff2c02380a7cbbf8d87114fa0f1c583"},
-    {file = "cryptography-42.0.8-cp37-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:ad803773e9df0b92e0a817d22fd8a3675493f690b96130a5e24f1b8fabbea9c7"},
-    {file = "cryptography-42.0.8-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:2f66d9cd9147ee495a8374a45ca445819f8929a3efcd2e3df6428e46c3cbb10b"},
-    {file = "cryptography-42.0.8-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:d45b940883a03e19e944456a558b67a41160e367a719833c53de6911cabba2b7"},
-    {file = "cryptography-42.0.8-cp37-abi3-win32.whl", hash = "sha256:a0c5b2b0585b6af82d7e385f55a8bc568abff8923af147ee3c07bd8b42cda8b2"},
-    {file = "cryptography-42.0.8-cp37-abi3-win_amd64.whl", hash = "sha256:57080dee41209e556a9a4ce60d229244f7a66ef52750f813bfbe18959770cfba"},
-    {file = "cryptography-42.0.8-cp39-abi3-macosx_10_12_universal2.whl", hash = "sha256:dea567d1b0e8bc5764b9443858b673b734100c2871dc93163f58c46a97a83d28"},
-    {file = "cryptography-42.0.8-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c4783183f7cb757b73b2ae9aed6599b96338eb957233c58ca8f49a49cc32fd5e"},
-    {file = "cryptography-42.0.8-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a0608251135d0e03111152e41f0cc2392d1e74e35703960d4190b2e0f4ca9c70"},
-    {file = "cryptography-42.0.8-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:dc0fdf6787f37b1c6b08e6dfc892d9d068b5bdb671198c72072828b80bd5fe4c"},
-    {file = "cryptography-42.0.8-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:9c0c1716c8447ee7dbf08d6db2e5c41c688544c61074b54fc4564196f55c25a7"},
-    {file = "cryptography-42.0.8-cp39-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:fff12c88a672ab9c9c1cf7b0c80e3ad9e2ebd9d828d955c126be4fd3e5578c9e"},
-    {file = "cryptography-42.0.8-cp39-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:cafb92b2bc622cd1aa6a1dce4b93307792633f4c5fe1f46c6b97cf67073ec961"},
-    {file = "cryptography-42.0.8-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:31f721658a29331f895a5a54e7e82075554ccfb8b163a18719d342f5ffe5ecb1"},
-    {file = "cryptography-42.0.8-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:b297f90c5723d04bcc8265fc2a0f86d4ea2e0f7ab4b6994459548d3a6b992a14"},
-    {file = "cryptography-42.0.8-cp39-abi3-win32.whl", hash = "sha256:2f88d197e66c65be5e42cd72e5c18afbfae3f741742070e3019ac8f4ac57262c"},
-    {file = "cryptography-42.0.8-cp39-abi3-win_amd64.whl", hash = "sha256:fa76fbb7596cc5839320000cdd5d0955313696d9511debab7ee7278fc8b5c84a"},
-    {file = "cryptography-42.0.8-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:ba4f0a211697362e89ad822e667d8d340b4d8d55fae72cdd619389fb5912eefe"},
-    {file = "cryptography-42.0.8-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:81884c4d096c272f00aeb1f11cf62ccd39763581645b0812e99a91505fa48e0c"},
-    {file = "cryptography-42.0.8-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:c9bb2ae11bfbab395bdd072985abde58ea9860ed84e59dbc0463a5d0159f5b71"},
-    {file = "cryptography-42.0.8-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:7016f837e15b0a1c119d27ecd89b3515f01f90a8615ed5e9427e30d9cdbfed3d"},
-    {file = "cryptography-42.0.8-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:5a94eccb2a81a309806027e1670a358b99b8fe8bfe9f8d329f27d72c094dde8c"},
-    {file = "cryptography-42.0.8-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:dec9b018df185f08483f294cae6ccac29e7a6e0678996587363dc352dc65c842"},
-    {file = "cryptography-42.0.8-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:343728aac38decfdeecf55ecab3264b015be68fc2816ca800db649607aeee648"},
-    {file = "cryptography-42.0.8-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:013629ae70b40af70c9a7a5db40abe5d9054e6f4380e50ce769947b73bf3caad"},
-    {file = "cryptography-42.0.8.tar.gz", hash = "sha256:8d09d05439ce7baa8e9e95b07ec5b6c886f548deb7e0f69ef25f64b3bce842f2"},
+    {file = "cryptography-43.0.0-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:64c3f16e2a4fc51c0d06af28441881f98c5d91009b8caaff40cf3548089e9c74"},
+    {file = "cryptography-43.0.0-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3dcdedae5c7710b9f97ac6bba7e1052b95c7083c9d0e9df96e02a1932e777895"},
+    {file = "cryptography-43.0.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3d9a1eca329405219b605fac09ecfc09ac09e595d6def650a437523fcd08dd22"},
+    {file = "cryptography-43.0.0-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:ea9e57f8ea880eeea38ab5abf9fbe39f923544d7884228ec67d666abd60f5a47"},
+    {file = "cryptography-43.0.0-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:9a8d6802e0825767476f62aafed40532bd435e8a5f7d23bd8b4f5fd04cc80ecf"},
+    {file = "cryptography-43.0.0-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:cc70b4b581f28d0a254d006f26949245e3657d40d8857066c2ae22a61222ef55"},
+    {file = "cryptography-43.0.0-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:4a997df8c1c2aae1e1e5ac49c2e4f610ad037fc5a3aadc7b64e39dea42249431"},
+    {file = "cryptography-43.0.0-cp37-abi3-win32.whl", hash = "sha256:6e2b11c55d260d03a8cf29ac9b5e0608d35f08077d8c087be96287f43af3ccdc"},
+    {file = "cryptography-43.0.0-cp37-abi3-win_amd64.whl", hash = "sha256:31e44a986ceccec3d0498e16f3d27b2ee5fdf69ce2ab89b52eaad1d2f33d8778"},
+    {file = "cryptography-43.0.0-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:7b3f5fe74a5ca32d4d0f302ffe6680fcc5c28f8ef0dc0ae8f40c0f3a1b4fca66"},
+    {file = "cryptography-43.0.0-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ac1955ce000cb29ab40def14fd1bbfa7af2017cca696ee696925615cafd0dce5"},
+    {file = "cryptography-43.0.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:299d3da8e00b7e2b54bb02ef58d73cd5f55fb31f33ebbf33bd00d9aa6807df7e"},
+    {file = "cryptography-43.0.0-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:ee0c405832ade84d4de74b9029bedb7b31200600fa524d218fc29bfa371e97f5"},
+    {file = "cryptography-43.0.0-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:cb013933d4c127349b3948aa8aaf2f12c0353ad0eccd715ca789c8a0f671646f"},
+    {file = "cryptography-43.0.0-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:fdcb265de28585de5b859ae13e3846a8e805268a823a12a4da2597f1f5afc9f0"},
+    {file = "cryptography-43.0.0-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:2905ccf93a8a2a416f3ec01b1a7911c3fe4073ef35640e7ee5296754e30b762b"},
+    {file = "cryptography-43.0.0-cp39-abi3-win32.whl", hash = "sha256:47ca71115e545954e6c1d207dd13461ab81f4eccfcb1345eac874828b5e3eaaf"},
+    {file = "cryptography-43.0.0-cp39-abi3-win_amd64.whl", hash = "sha256:0663585d02f76929792470451a5ba64424acc3cd5227b03921dab0e2f27b1709"},
+    {file = "cryptography-43.0.0-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:2c6d112bf61c5ef44042c253e4859b3cbbb50df2f78fa8fae6747a7814484a70"},
+    {file = "cryptography-43.0.0-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:844b6d608374e7d08f4f6e6f9f7b951f9256db41421917dfb2d003dde4cd6b66"},
+    {file = "cryptography-43.0.0-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:51956cf8730665e2bdf8ddb8da0056f699c1a5715648c1b0144670c1ba00b48f"},
+    {file = "cryptography-43.0.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:aae4d918f6b180a8ab8bf6511a419473d107df4dbb4225c7b48c5c9602c38c7f"},
+    {file = "cryptography-43.0.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:232ce02943a579095a339ac4b390fbbe97f5b5d5d107f8a08260ea2768be8cc2"},
+    {file = "cryptography-43.0.0-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:5bcb8a5620008a8034d39bce21dc3e23735dfdb6a33a06974739bfa04f853947"},
+    {file = "cryptography-43.0.0-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:08a24a7070b2b6804c1940ff0f910ff728932a9d0e80e7814234269f9d46d069"},
+    {file = "cryptography-43.0.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:e9c5266c432a1e23738d178e51c2c7a5e2ddf790f248be939448c0ba2021f9d1"},
+    {file = "cryptography-43.0.0.tar.gz", hash = "sha256:b88075ada2d51aa9f18283532c9f60e72170041bba88d7f37e49cbb10275299e"},
 ]
 
 [package.dependencies]
@@ -733,7 +728,7 @@ nox = ["nox"]
 pep8test = ["check-sdist", "click", "mypy", "ruff"]
 sdist = ["build"]
 ssh = ["bcrypt (>=3.1.5)"]
-test = ["certifi", "pretend", "pytest (>=6.2.0)", "pytest-benchmark", "pytest-cov", "pytest-xdist"]
+test = ["certifi", "cryptography-vectors (==43.0.0)", "pretend", "pytest (>=6.2.0)", "pytest-benchmark", "pytest-cov", "pytest-xdist"]
 test-randomorder = ["pytest-randomly"]
 
 [[package]]
@@ -834,13 +829,13 @@ idna = ">=2.0.0"
 
 [[package]]
 name = "exceptiongroup"
-version = "1.2.1"
+version = "1.2.2"
 description = "Backport of PEP 654 (exception groups)"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "exceptiongroup-1.2.1-py3-none-any.whl", hash = "sha256:5258b9ed329c5bbdd31a309f53cbfb0b155341807f6ff7606a1e801a891b29ad"},
-    {file = "exceptiongroup-1.2.1.tar.gz", hash = "sha256:a4785e48b045528f5bfe627b6ad554ff32def154f42372786903b7abcfe1aa16"},
+    {file = "exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b"},
+    {file = "exceptiongroup-1.2.2.tar.gz", hash = "sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc"},
 ]
 
 [package.extras]
@@ -1102,13 +1097,13 @@ grpc = ["grpcio (>=1.38.0,<2.0dev)", "grpcio-status (>=1.38.0,<2.0.dev0)"]
 
 [[package]]
 name = "google-cloud-storage"
-version = "2.17.0"
+version = "2.18.0"
 description = "Google Cloud Storage API client library"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "google-cloud-storage-2.17.0.tar.gz", hash = "sha256:49378abff54ef656b52dca5ef0f2eba9aa83dc2b2c72c78714b03a1a95fe9388"},
-    {file = "google_cloud_storage-2.17.0-py2.py3-none-any.whl", hash = "sha256:5b393bc766b7a3bc6f5407b9e665b2450d36282614b7945e570b3480a456d1e1"},
+    {file = "google_cloud_storage-2.18.0-py2.py3-none-any.whl", hash = "sha256:e8e1a9577952143c3fca8163005ecfadd2d70ec080fa158a8b305000e2c22fbb"},
+    {file = "google_cloud_storage-2.18.0.tar.gz", hash = "sha256:0aa3f7c57f3632f81b455d91558d2b27ada96eee2de3aaa17f689db1470d9578"},
 ]
 
 [package.dependencies]
@@ -1120,7 +1115,8 @@ google-resumable-media = ">=2.6.0"
 requests = ">=2.18.0,<3.0.0dev"
 
 [package.extras]
-protobuf = ["protobuf (<5.0.0dev)"]
+protobuf = ["protobuf (<6.0.0dev)"]
+tracing = ["opentelemetry-api (>=1.1.0)"]
 
 [[package]]
 name = "google-crc32c"
@@ -1391,13 +1387,13 @@ files = [
 
 [[package]]
 name = "importlib-metadata"
-version = "8.0.0"
+version = "8.1.0"
 description = "Read metadata from Python packages"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "importlib_metadata-8.0.0-py3-none-any.whl", hash = "sha256:15584cf2b1bf449d98ff8a6ff1abef57bf20f3ac6454f431736cd3e660921b2f"},
-    {file = "importlib_metadata-8.0.0.tar.gz", hash = "sha256:188bd24e4c346d3f0a933f275c2fec67050326a856b9a359881d7c2a697e8812"},
+    {file = "importlib_metadata-8.1.0-py3-none-any.whl", hash = "sha256:3cd29f739ed65973840b068e3132135ce954c254d48b5b640484467ef7ab3c8c"},
+    {file = "importlib_metadata-8.1.0.tar.gz", hash = "sha256:fcdcb1d5ead7bdf3dd32657bb94ebe9d2aabfe89a19782ddc32da5041d6ebfb4"},
 ]
 
 [package.dependencies]
@@ -1734,13 +1730,13 @@ dev = ["pre-commit", "tox"]
 
 [[package]]
 name = "prometheus-client"
-version = "0.9.0"
+version = "0.20.0"
 description = "Python client for the Prometheus monitoring system."
 optional = false
-python-versions = "*"
+python-versions = ">=3.8"
 files = [
-    {file = "prometheus_client-0.9.0-py2.py3-none-any.whl", hash = "sha256:b08c34c328e1bf5961f0b4352668e6c8f145b4a087e09b7296ef62cbe4693d35"},
-    {file = "prometheus_client-0.9.0.tar.gz", hash = "sha256:9da7b32f02439d8c04f7777021c304ed51d9ec180604700c1ba72a4d44dceb03"},
+    {file = "prometheus_client-0.20.0-py3-none-any.whl", hash = "sha256:cde524a85bce83ca359cc837f28b8c0db5cac7aa653a588fd7e84ba061c329e7"},
+    {file = "prometheus_client-0.20.0.tar.gz", hash = "sha256:287629d00b147a32dcb2be0b9df905da599b2d82f80377083ec8463309a4bb89"},
 ]
 
 [package.extras]
@@ -1818,13 +1814,13 @@ files = [
 
 [[package]]
 name = "pyaml"
-version = "24.4.0"
+version = "24.7.0"
 description = "PyYAML-based module to produce a bit more pretty and readable YAML-serialized data"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "pyaml-24.4.0-py3-none-any.whl", hash = "sha256:acc2b39c55cb0cbe4f694a6d3886f89ad3d2a5b3efcece526202f8de9a6b27de"},
-    {file = "pyaml-24.4.0.tar.gz", hash = "sha256:0e483d9289010e747a325dc43171bcc39d6562dd1dd4719e8cc7e7c96c99fce6"},
+    {file = "pyaml-24.7.0-py3-none-any.whl", hash = "sha256:6b06596cb5ac438a3fad1e1bf5775088c4d3afb927e2b03a29305d334835deb2"},
+    {file = "pyaml-24.7.0.tar.gz", hash = "sha256:5d0fdf9e681036fb263a783d0298fc3af580a6e2a6cf1a3314ffc48dc3d91ccb"},
 ]
 
 [package.dependencies]
@@ -2127,6 +2123,7 @@ files = [
     {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"},
     {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"},
     {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"},
+    {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"},
     {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"},
     {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"},
     {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"},
@@ -2581,4 +2578,4 @@ test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools",
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.9,<4.0.0"
-content-hash = "47af6b1308f26c35e92b7574c7f2c82746273d206f0d7b728de7158937cc5c1d"
+content-hash = "d003418dcc0d68257a215186d21776941f8739bd3b4f898762a93e7895d7c89e"
diff --git a/pyproject.toml b/pyproject.toml
index e3b57ede2..143ca2940 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "fence"
-version = "10.0.0"
+version = "10.1.0"
 description = "Gen3 AuthN/AuthZ OIDC Service"
 authors = ["CTDS UChicago <cdis@uchicago.edu>"]
 license = "Apache-2.0"
@@ -38,7 +38,7 @@ markdown = "^3.1.1"
 markupsafe = "^2.0.1"
 
 paramiko = ">=2.6.0"
-prometheus-client = "^0.9.0"
+prometheus-client = "<1"
 psycopg2 = "^2.8.3"
 PyJWT = "^2.4.0"
 python_dateutil = "^2.6.1"
diff --git a/tests/ci_commands_script.sh b/tests/ci_commands_script.sh
index 5ab1d6c6c..fef98a668 100755
--- a/tests/ci_commands_script.sh
+++ b/tests/ci_commands_script.sh
@@ -1,3 +1,5 @@
 #!/usr/bin/env bash
 
+mkdir -p /var/tmp/uwsgi_flask_metrics/ || true
+export PROMETHEUS_MULTIPROC_DIR="/var/tmp/uwsgi_flask_metrics/"
 poetry run pytest -vv --cov=fence --cov-report xml tests
diff --git a/tests/conftest.py b/tests/conftest.py
index f7b9d432d..273f4a496 100755
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -552,6 +552,13 @@ def drop_all():
     return app.db
 
 
+@pytest.fixture
+def prometheus_metrics_before(client):
+    resp = client.get("/metrics")
+    assert resp.status_code == 200, "Could not get prometheus metrics initial state"
+    yield resp.text
+
+
 @fence.app.route("/protected")
 @fence.auth.login_required({"access"})
 def protected_endpoint(methods=["GET"]):
@@ -966,6 +973,7 @@ def do_patch(authz):
             "mocker": mocker,
             # only gs or s3 for location, ignore specifiers after the _
             "indexed_file_location": protocol.split("_")[0],
+            "record": record,
         }
 
         return output
@@ -1734,3 +1742,37 @@ def get_all_shib_idps_patcher():
     yield mock
 
     get_all_shib_idps_patch.stop()
+
+
+@pytest.fixture(scope="function")
+def mock_authn_user_flask_context(app):
+    """
+    Mock g and session to simulate a simple user who has authenticated.
+
+    This is primarily to ensure that tests which mock the start of authN where sessions get set can still
+    test the callbacks (where metrics logging rely on session data).
+    """
+    from flask import g
+    from flask import session
+
+    g_before = copy.deepcopy(g)
+    session_before = copy.deepcopy(session)
+
+    user_mock = MagicMock()
+    user_mock.id = 1
+
+    user_mocker = MagicMock()
+    user_mocker.return_value = user_mock
+    g.user = user_mocker
+
+    session = MagicMock()
+    session.return_value = {
+        "fence_idp": "google",
+        "shib_idp": "shib_idp_foobar",
+        "client_id": "client_id_foobar",
+    }
+
+    yield
+
+    g = g_before
+    session = session_before
diff --git a/tests/login/test_base.py b/tests/login/test_base.py
index a9bfff7ec..a32452b2c 100644
--- a/tests/login/test_base.py
+++ b/tests/login/test_base.py
@@ -4,7 +4,7 @@
 
 
 @patch("fence.blueprints.login.base.prepare_login_log")
-def test_post_login_set_mfa(app, monkeypatch):
+def test_post_login_set_mfa(app, monkeypatch, mock_authn_user_flask_context):
     """
     Verifies the arborist is called with the mfa_policy if a given token contains the claims found in the
     configured multifactor_auth_claim_info
@@ -37,7 +37,7 @@ def test_post_login_set_mfa(app, monkeypatch):
 
 
 @patch("fence.blueprints.login.base.prepare_login_log")
-def test_post_login_no_mfa_enabled(app, monkeypatch):
+def test_post_login_no_mfa_enabled(app, monkeypatch, mock_authn_user_flask_context):
     """
     Verifies arborist is not called when there is no multifactor_auth_claim_info defined for the given IDP.
     """
diff --git a/tests/test_app_config.py b/tests/test_app_config.py
index 3c3af9fd0..ec7b7b8b7 100755
--- a/tests/test_app_config.py
+++ b/tests/test_app_config.py
@@ -81,7 +81,6 @@ def test_app_config():
         {"patch_name": "fence.app_sessions"},
         {"patch_name": "fence.app_register_blueprints"},
         {"patch_name": "fence.oidc.oidc_server.OIDCServer.init_app"},
-        {"patch_name": "fence._setup_prometheus"},
         {
             "patch_name": "fence.resources.storage.StorageManager.__init__",
             "return_value": None,
diff --git a/tests/test_audit_service.py b/tests/test_metrics.py
similarity index 71%
rename from tests/test_audit_service.py
rename to tests/test_metrics.py
index cbf5c8cf2..be7d6b2ab 100644
--- a/tests/test_audit_service.py
+++ b/tests/test_metrics.py
@@ -1,9 +1,15 @@
 """
+Tests for the metrics features (Audit Service and Prometheus)
+
 Tests for the Audit Service integration:
 - test the creation of presigned URL audit logs
 - test the creation of login audit logs
 - test the SQS flow
 
+In Audit Service tests where it makes sense, we also test that Prometheus
+metrics are created as expected. The last section tests Prometheus metrics
+independently.
+
 Note 1: there is no test for the /oauth2 endpoint: the /oauth2 endpoint
 should redirect the user to the /login endpoint (tested in
 `test_redirect_oauth2_authorize`), and the login endpoint should
@@ -16,7 +22,6 @@
 tests looking at users are not affected.
 """
 
-
 import boto3
 import flask
 import json
@@ -27,11 +32,17 @@
 from unittest.mock import ANY, MagicMock, patch
 
 import fence
+from fence.metrics import metrics
 from fence.config import config
+from fence.blueprints.data.indexd import get_bucket_from_urls
+from fence.models import User
 from fence.resources.audit.utils import _clean_authorization_request_url
 from tests import utils
 from tests.conftest import LOGIN_IDPS
 
+# `reset_prometheus_metrics` must be imported even if not used so the autorun fixture gets triggered
+from tests.utils.metrics import assert_prometheus_metrics, reset_prometheus_metrics
+
 
 def test_clean_authorization_request_url():
     """
@@ -111,6 +122,7 @@ def __init__(self, data, status_code=200):
 @pytest.mark.parametrize("protocol", ["gs", None])
 def test_presigned_url_log(
     endpoint,
+    prometheus_metrics_before,
     protocol,
     client,
     user_client,
@@ -126,7 +138,7 @@ def test_presigned_url_log(
     """
     Get a presigned URL from Fence and make sure a call to the Audit Service
     was made to create an audit log. Test with and without a requested
-    protocol.
+    protocol. Also check that a prometheus metric is created.
     """
     mock_arborist_requests({"arborist/auth/request": {"POST": ({"auth": True}, 200)}})
     audit_service_mocker = mock.patch(
@@ -142,7 +154,7 @@ def test_presigned_url_log(
     else:
         path = f"/ga4gh/drs/v1/objects/{guid}/access/{protocol or 's3'}"
     resource_paths = ["/my/resource/path1", "/path2"]
-    indexd_client_with_arborist(resource_paths)
+    record = indexd_client_with_arborist(resource_paths)["record"]
     headers = {
         "Authorization": "Bearer "
         + jwt.encode(
@@ -183,6 +195,39 @@ def test_presigned_url_log(
             },
         )
 
+    # check prometheus metrics
+    resp = client.get("/metrics")
+    assert resp.status_code == 200
+    bucket = get_bucket_from_urls(record["urls"], expected_protocol)
+    size_in_kibibytes = record["size"] / 1024
+    expected_metrics = [
+        {
+            "name": "gen3_fence_presigned_url_total",
+            "labels": {
+                "action": "download",
+                "authz": resource_paths,
+                "bucket": bucket,
+                "drs": endpoint == "ga4gh-drs",
+                "protocol": expected_protocol,
+                "user_sub": user_client.user_id,
+            },
+            "value": 1.0,
+        },
+        {
+            "name": "gen3_fence_presigned_url_size",
+            "labels": {
+                "action": "download",
+                "authz": resource_paths,
+                "bucket": bucket,
+                "drs": endpoint == "ga4gh-drs",
+                "protocol": expected_protocol,
+                "user_sub": user_client.user_id,
+            },
+            "value": size_in_kibibytes,
+        },
+    ]
+    assert_prometheus_metrics(prometheus_metrics_before, resp.text, expected_metrics)
+
 
 @pytest.mark.parametrize(
     "indexd_client_with_arborist", ["s3_and_gs_acl_no_authz"], indirect=True
@@ -411,10 +456,11 @@ def test_login_log_login_endpoint(
     rsa_private_key,
     db_session,  # do not remove :-) See note at top of file
     monkeypatch,
+    prometheus_metrics_before,
 ):
     """
     Test that logging in via any of the existing IDPs triggers the creation
-    of a login audit log.
+    of a login audit log and of a prometheus metric.
     """
     mock_arborist_requests()
     audit_service_mocker = mock.patch(
@@ -493,13 +539,14 @@ def test_login_log_login_endpoint(
         path = f"/login/{idp}/{callback_endpoint}"  # SEE fence/blueprints/login/fence_login.py L91
         response = client.get(path, headers=headers)
         assert response.status_code == 200, response
+        user_sub = db_session.query(User).filter(User.username == username).first().id
         audit_service_requests.post.assert_called_once_with(
             "http://audit-service/log/login",
             json={
                 "request_url": path,
                 "status_code": 200,
                 "username": username,
-                "sub": ANY,
+                "sub": user_sub,
                 "idp": idp_name,
                 "fence_idp": None,
                 "shib_idp": None,
@@ -510,10 +557,27 @@ def test_login_log_login_endpoint(
     if get_auth_info_patch:
         get_auth_info_patch.stop()
 
+    # check prometheus metrics
+    resp = client.get("/metrics")
+    assert resp.status_code == 200
+    expected_metrics = [
+        {
+            "name": "gen3_fence_login_total",
+            "labels": {"idp": "all", "user_sub": user_sub},
+            "value": 1.0,
+        },
+        {
+            "name": "gen3_fence_login_total",
+            "labels": {"idp": idp_name, "user_sub": user_sub},
+            "value": 1.0,
+        },
+    ]
+    assert_prometheus_metrics(prometheus_metrics_before, resp.text, expected_metrics)
+
 
-##########################
-# Push audit logs to SQS #
-##########################
+##########################################
+# Audit Service - Push audit logs to SQS #
+##########################################
 
 
 def mock_audit_service_sqs(app):
@@ -638,3 +702,171 @@ def test_login_log_push_to_sqs(
     mocked_sqs.send_message.assert_called_once()
 
     get_auth_info_patch.stop()
+
+
+######################
+# Prometheus metrics #
+######################
+
+
+def test_disabled_prometheus_metrics(client, monkeypatch):
+    """
+    When metrics gathering is not enabled, the metrics endpoint should not error, but it should
+    not return any data.
+    """
+    monkeypatch.setitem(config, "ENABLE_PROMETHEUS_METRICS", False)
+    metrics.add_login_event(
+        user_sub="123",
+        idp="test_idp",
+        fence_idp="shib",
+        shib_idp="university",
+        client_id="test_azp",
+    )
+    resp = client.get("/metrics")
+    assert resp.status_code == 200
+    assert resp.text == ""
+
+
+def test_record_prometheus_events(prometheus_metrics_before, client):
+    """
+    Validate the returned value of the metrics endpoint before any event is logged, after an event
+    is logged, and after more events (one identical to the 1st one, and two different) are logged.
+    """
+    # NOTE: To update later. The metrics utils don't support this yet. The gauges are not handled correctly.
+    # resp = client.get("/metrics")
+    # assert resp.status_code == 200
+    # # no metrics have been recorded yet
+    # assert_prometheus_metrics(prometheus_metrics_before, resp.text, [])
+
+    # record a login event and check that we get both a metric for the specific IDP, and an
+    # IDP-agnostic metric for the total number of login events. The latter should have no IDP
+    # information (no `fence_idp` or `shib_idp`).
+    metrics.add_login_event(
+        user_sub="123",
+        idp="test_idp",
+        fence_idp="shib",
+        shib_idp="university",
+        client_id="test_azp",
+    )
+    resp = client.get("/metrics")
+    assert resp.status_code == 200
+    expected_metrics = [
+        {
+            "name": "gen3_fence_login_total",
+            "labels": {
+                "user_sub": "123",
+                "idp": "test_idp",
+                "fence_idp": "shib",
+                "shib_idp": "university",
+                "client_id": "test_azp",
+            },
+            "value": 1.0,
+        },
+        {
+            "name": "gen3_fence_login_total",
+            "labels": {
+                "user_sub": "123",
+                "idp": "all",
+                "fence_idp": "None",
+                "shib_idp": "None",
+                "client_id": "test_azp",
+            },
+            "value": 1.0,
+        },
+    ]
+    assert_prometheus_metrics(prometheus_metrics_before, resp.text, expected_metrics)
+
+    # same login: should increase the existing counter by 1
+    metrics.add_login_event(
+        user_sub="123",
+        idp="test_idp",
+        fence_idp="shib",
+        shib_idp="university",
+        client_id="test_azp",
+    )
+    # login with different IDP labels: should create a new metric
+    metrics.add_login_event(
+        user_sub="123",
+        idp="another_idp",
+        fence_idp=None,
+        shib_idp=None,
+        client_id="test_azp",
+    )
+    # new signed URL event: should create a new metric
+    metrics.add_signed_url_event(
+        action="upload",
+        protocol="s3",
+        acl=None,
+        authz=["/test/path"],
+        bucket="s3://test-bucket",
+        user_sub="123",
+        client_id="test_azp",
+        drs=True,
+        size_in_kibibytes=1.2,
+    )
+    resp = client.get("/metrics")
+    assert resp.status_code == 200
+    expected_metrics = [
+        {
+            "name": "gen3_fence_login_total",
+            "labels": {
+                "user_sub": "123",
+                "idp": "all",
+                "fence_idp": "None",
+                "shib_idp": "None",
+                "client_id": "test_azp",
+            },
+            "value": 3.0,  # recorded login events since the beginning of the test
+        },
+        {
+            "name": "gen3_fence_login_total",
+            "labels": {
+                "user_sub": "123",
+                "idp": "test_idp",
+                "fence_idp": "shib",
+                "shib_idp": "university",
+                "client_id": "test_azp",
+            },
+            "value": 2.0,  # recorded login events for this idp, fence_idp and shib_idp combo
+        },
+        {
+            "name": "gen3_fence_login_total",
+            "labels": {
+                "user_sub": "123",
+                "idp": "another_idp",
+                "fence_idp": "None",
+                "shib_idp": "None",
+                "client_id": "test_azp",
+            },
+            "value": 1.0,  # recorded login events for the different idp
+        },
+        {
+            "name": "gen3_fence_presigned_url_total",
+            "labels": {
+                "user_sub": "123",
+                "action": "upload",
+                "protocol": "s3",
+                "authz": ["/test/path"],
+                "bucket": "s3://test-bucket",
+                "user_sub": "123",
+                "client_id": "test_azp",
+                "drs": True,
+            },
+            "value": 1.0,  # recorded presigned URL events
+        },
+        {
+            "name": "gen3_fence_presigned_url_size",
+            "labels": {
+                "user_sub": "123",
+                "action": "upload",
+                "protocol": "s3",
+                "authz": ["/test/path"],
+                "bucket": "s3://test-bucket",
+                "user_sub": "123",
+                "client_id": "test_azp",
+                "drs": True,
+            },
+            "value": 1.2,  # presigned URL gauge with the file size as value
+        },
+    ]
+    assert_prometheus_metrics(prometheus_metrics_before, resp.text, expected_metrics)
diff --git a/tests/utils/metrics.py b/tests/utils/metrics.py
new file mode 100644
index 000000000..0443589a0
--- /dev/null
+++ b/tests/utils/metrics.py
@@ -0,0 +1,223 @@
+"""
+At the time of writing, Prometheus metrics out of the box can't be reset between each
+unit test. To be able to write independent unit tests, we have to manually save the "previous
+state" (see `prometheus_metrics_before` fixture) and compare it to the new state. This involves
+manually parsing the "previous state" (a python object) and the "current state" (raw text) into
+the same format so they can be compared:
+{ "name": "", "labels": {}, "value": 0 }
+
+The utility functions below can be used to check that the expected metrics have been recorded,
+while discarding any previous metrics.
+
+https://stackoverflow.com/questions/73198616/how-do-i-reset-a-prometheus-python-client-python-runtime-between-pytest-test-fun
+"""
+
+
+import os
+import shutil
+
+import pytest
+
+
+@pytest.fixture(autouse=True, scope="session")
+def reset_prometheus_metrics():
+    """
+    Delete the prometheus files after all the tests have run.
+    Without this, when running the tests locally, we would keep reading the metrics from
+    previous test runs.
+    So why not run this in-between the unit tests instead of the `assert_prometheus_metrics`
+    logic? Because it doesn't work, the prometheus client also keeps the state, and the mismatch
+    causes errors. This only works when the client is reset too (new process)
+    """
+    yield
+
+    folder = os.environ["PROMETHEUS_MULTIPROC_DIR"]
+    for filename in os.listdir(folder):
+        file_path = os.path.join(folder, filename)
+        try:
+            if os.path.isfile(file_path) or os.path.islink(file_path):
+                os.unlink(file_path)
+            elif os.path.isdir(file_path):
+                shutil.rmtree(file_path)
+        except Exception as e:
+            print(f"Failed to delete Prometheus metrics file '{file_path}': {e}")
+
+
+def _diff_new_metrics_from_old_metrics(new_metrics, old_metrics):
+    """
+    Return a list of "current metrics" by comparing the "new metrics" (current state) to the "old metrics" (previous state).
+
+    Input metric format example: {
+        'gen3_fence_login_total{client_id="test_azp",fence_idp="shib",idp="test_idp",shib_idp="university",user_sub="123"}': 2.0,
+        'gen3_fence_login_total{client_id="test_azp",fence_idp="None",idp="all",shib_idp="None",user_sub="123"}': 3.0,
+    }
+
+    Functionality example:
+        old_metrics = { 'counter1': 2, 'counter2': 2, 'gauge1': 1 }
+        new_metrics = { 'counter1': 1, 'counter3': 1 }
+        Returned value = [
+            ('counter1', 1) (difference between 2 and 1),
+            ('counter3', 1)
+        ] (counter2 and gauge1 omitted since they are not part of the current state)
+
+    Args:
+        new_metrics (dict): format { <unparsed metric name and labels>: <metric value> }
+        old_metrics (dict): format { <unparsed metric name and labels>: <metric value> }
+    }
+
+    Return:
+        list<tuple<long metric name, metric value>>
+    """
+
+    def metric_is_gauge(metric_name):
+        return not metric_name.endswith("_total") and not metric_name.endswith(
+            "_created"
+        )
+
+    diff = []
+    for long_metric_name, old_value in old_metrics.items():
+        # long_metric_name = metric name + labels (see example in docstring)
+        metric_name = long_metric_name.split("{")[0]
+        if long_metric_name not in new_metrics or metric_is_gauge(metric_name):
+            # ignore all old metrics that are not also present in the new metrics
+            continue
+        # the metric value generated by the current test is the difference between the previous
+        # value and the current value
+        val = new_metrics[long_metric_name] - old_value
+        if val != 0:
+            diff.append((long_metric_name, val))
+    for long_metric_name, new_value in new_metrics.items():
+        metric_name = long_metric_name.split("{")[0]
+        if metric_is_gauge(metric_name):  # all gauge metrics must be listed
+            diff.append((long_metric_name, new_value))
+        elif long_metric_name not in old_metrics:
+            diff.append((long_metric_name, new_value))
+    return diff
+
+
+def _parse_raw_metrics_to_dict(text_metric):
+    """
+    Parse raw text metrics into a dictionary of metric (metric name + labels) to value,
+    ignoring lines that are not metrics.
+
+    Args:
+        text_metric (str)
+            Example:
+                # TYPE gen3_fence_login_total counter
+                gen3_fence_login_total{idp="test_idp",shib_idp="university",user_sub="123"} 2.0
+                # HELP gen3_fence_presigned_url_total Fence presigned urls
+                # TYPE gen3_fence_presigned_url_total counter
+                gen3_fence_presigned_url_total{client_id="test_azp",drs="True",user_sub="123"} 1.0
+
+    Return:
+        dict<long metric name: metric value>
+            Example:
+                {
+                    "gen3_fence_login_total{idp="test_idp",shib_idp="university",user_sub="123"}": 2.0,
+                    "gen3_fence_presigned_url_total{client_id="test_azp",drs="True",user_sub="123"}": 1.0,
+                }
+    """
+    if not text_metric:
+        return {}
+    return {
+        " ".join(m.split(" ")[:-1]): float(m.split(" ")[-1])
+        for m in text_metric.strip().split("\n")
+        if not m.startswith("#")
+    }
+
+
+def _parse_raw_name_to_labels(text_metric_name):
+    """
+    Parse a raw metric name into a name and a dict of labels.
+
+    Example:
+        text_metric_name = `metric_name{param1="None",param2="upload",param3="['/test/path']"`
+        Returned value = {
+            "name": "metric_name",
+            "labels": { "param1": "None", "param2": "upload", "param3": "['/test/path']" }
+        }
+
+    Args:
+        text_metric (str)
+
+    Returns:
+        dict
+    """
+    name = text_metric_name.split("{")[0]
+    labels = text_metric_name.split("{")[1].split("}")[0].split('",')
+    labels = {l.split("=")[0]: l.split("=")[1].strip('"') for l in labels}
+    return {"name": name, "labels": labels}
+
+
+def assert_prometheus_metrics(
+    previous_text_metrics, current_text_metrics, expected_metrics
+):
+    """
+    Compare the previous state and the current state of prometheus metrics, and checks if the difference between the 2 is the same as the new metrics a test expects to have recorded.
+
+    Expected: only provide labels we need to check for, the rest will be ignored
+
+    Args:
+        previous_text_metrics (str): previous state of prometheus metrics
+        current_text_metrics (str): current state
+            Example `previous_text_metrics` or `current_text_metrics`:
+                # TYPE gen3_fence_login_total counter
+                gen3_fence_login_total{idp="test_idp",shib_idp="university",user_sub="123"} 2.0
+                # HELP gen3_fence_presigned_url_total Fence presigned urls
+                # TYPE gen3_fence_presigned_url_total counter
+                gen3_fence_presigned_url_total{acl="None",action="upload",authz="['/test/path']",bucket="s3://test-bucket",client_id="test_azp",drs="True",protocol="s3",user_sub="123"} 1.0
+        expected_metrics (list<dict>): the expected difference between previous state and current state.
+            Only provide the labels we need to check; omitted labels will be ignored even if they
+            are present in the current state.
+            Example: [
+                {
+                    'name': 'gen3_fence_login_total',
+                    'labels': {
+                        'idp': 'test_idp', 'shib_idp': 'university', 'user_sub': '123'
+                    },
+                    'value': 2.0
+                }
+            ]
+    """
+    old_metrics = _parse_raw_metrics_to_dict(previous_text_metrics)
+    print("Old metrics:")
+    for k, v in old_metrics.items():
+        print(f"- {k} = {v}")
+
+    new_metrics = _parse_raw_metrics_to_dict(current_text_metrics)
+    print("Received metrics:")
+    for k, v in new_metrics.items():
+        print(f"- {k} = {v}")
+
+    diff_metrics = _diff_new_metrics_from_old_metrics(new_metrics, old_metrics)
+    current_metrics = []
+    print("Diff:")
+    for (metric_name, val) in diff_metrics:
+        parsed_m = _parse_raw_name_to_labels(metric_name)
+        parsed_m["value"] = val
+        current_metrics.append(parsed_m)
+        print(f"- {parsed_m}")
+
+    print("Expecting metrics:")
+    # check that for each metric+label combination, the value is identical to the expected value
+    for expected_m in expected_metrics:
+        found = False
+        print(f"- {expected_m}")
+        for current_m in current_metrics:  # look for the right metric
+            if current_m["name"] != expected_m["name"]:
+                continue
+            # if the metric name is identical, check the labels
+            right_labels = True
+            for label_k, label_v in expected_m["labels"].items():
+                if current_m["labels"].get(label_k) != str(label_v):
+                    right_labels = False
+                    break
+            # if both the name and the labels are identical, this is the right metric:
+            # check that the value is the same as expected
+            if right_labels:
+                assert (
+                    current_m["value"] == expected_m["value"]
+                ), f"Missing metric: {expected_m}"
+                found = True
+                break  # we found the right metric and it has the right value: moving on
+        assert found, f"Missing metric: {expected_m}"