Skip to content

Commit

Permalink
Merge pull request #107 from bento-platform/feat/authorized-katsu-calls
Browse files Browse the repository at this point in the history
feat: beacon client-authorized requests to private data endpoints
  • Loading branch information
davidlougheed authored Sep 16, 2024
2 parents df6a78c + 0dd00fc commit d78d2ea
Show file tree
Hide file tree
Showing 6 changed files with 136 additions and 25 deletions.
2 changes: 1 addition & 1 deletion bento_beacon/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,5 +114,5 @@ def generic_exception_handler(e):
current_app.logger.error(f"HTTP Exception: {e}")
return beacon_error_response(e.name, e.code), e.code

current_app.logger.error(f"Server Error: {e}")
current_app.logger.error(f"Server Error: {repr(e)}")
return beacon_error_response("Server Error", 500), 500
68 changes: 68 additions & 0 deletions bento_beacon/authz/access.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
import functools
import requests
from flask import current_app

from .headers import auth_header_from_request

__all__ = [
"get_access_token",
"create_access_header_or_fall_back",
]


@functools.cache
def get_token_endpoint_from_openid_config_url(url: str, validate_ssl: bool = True):
r = requests.get(url, verify=validate_ssl)
if not r.ok:
raise Exception(f"Received not-OK response from OIDC config URL: {r.status_code}")
return r.json()["token_endpoint"]


def get_access_token() -> str | None:
logger = current_app.logger

oidc_config_url = current_app.config["OPENID_CONFIG_URL"]
client_id = current_app.config["CLIENT_ID"]
client_secret = current_app.config["CLIENT_SECRET"]
validate_ssl = current_app.config["BENTO_VALIDATE_SSL"]

if not all((oidc_config_url, client_id, client_secret)):
logger.error("Could not retrieve access token; one of OPENID_CONFIG_URL | CLIENT_ID | CLIENT_SECRET is not set")
return None

try:
token_endpoint = get_token_endpoint_from_openid_config_url(oidc_config_url, validate_ssl=validate_ssl)
except Exception as e:
logger.error(f"Could not retrieve access token; got exception from OpenID config URL: {e}")
return None

token_res = requests.post(
token_endpoint,
verify=validate_ssl,
data={
"grant_type": "client_credentials",
"client_id": client_id,
"client_secret": client_secret,
},
)

if not token_res.ok:
logger.error(f"Could not retrieve access token; got error response: {token_res.json()}")
return None

return token_res.json()["access_token"]


def create_access_header_or_fall_back():
logger = current_app.logger

if not current_app.config["AUTHZ_BENTO_REQUESTS_ENABLED"]:
logger.warning("AUTHZ_BENTO_REQUESTS_ENABLED is false; falling back to request headers")
return auth_header_from_request()

access_token = get_access_token()
if access_token is None:
logger.error("create_access_header_or_fall_back: falling back to request headers")
return auth_header_from_request()
else:
return {"Authorization": f"Bearer {access_token}"}
38 changes: 29 additions & 9 deletions bento_beacon/config_files/config.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,24 @@
import json
import os
import urllib3


GA4GH_BEACON_REPO_URL = "https://raw.githubusercontent.com/ga4gh-beacon/beacon-v2"


def str_to_bool(value: str) -> bool:
return value.strip().lower() in ("true", "1", "t", "yes")


BENTO_DEBUG = str_to_bool(os.environ.get("BENTO_DEBUG", os.environ.get("FLASK_DEBUG", "false")))
BENTO_VALIDATE_SSL = str_to_bool(os.environ.get("BENTO_VALIDATE_SSL", str(not BENTO_DEBUG)))

if not BENTO_VALIDATE_SSL:
# Don't let urllib3 spam us with SSL validation warnings if we're operating with SSL validation off, most likely in
# a development/test context where we're using self-signed certificates.
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)


class Config:
BEACON_SPEC_VERSION = "v2.0.0"

Expand All @@ -22,11 +37,8 @@ class Config:

DEFAULT_PAGINATION_PAGE_SIZE = 10

BENTO_DEBUG = os.environ.get("BENTO_DEBUG", os.environ.get("FLASK_DEBUG", "false")).strip().lower() in (
"true",
"1",
"t",
)
BENTO_DEBUG = BENTO_DEBUG
BENTO_VALIDATE_SSL = BENTO_VALIDATE_SSL

BENTO_DOMAIN = os.environ.get("BENTOV2_DOMAIN")
BEACON_BASE_URL = os.environ.get("BEACON_BASE_URL")
Expand All @@ -36,7 +48,7 @@ class Config:
BEACON_ID = ".".join(reversed(BENTO_DOMAIN.split("."))) + ".beacon"

BEACON_NAME = os.environ.get("BENTO_PUBLIC_CLIENT_NAME", "Bento") + " Beacon"
BEACON_UI_ENABLED = os.environ.get("BENTO_BEACON_UI_ENABLED").strip().lower() in ("true", "1", "t")
BEACON_UI_ENABLED = str_to_bool(os.environ.get("BENTO_BEACON_UI_ENABLED", ""))
BEACON_UI_URL = BENTO_PUBLIC_URL + "/#/en/beacon"

ENTRY_TYPES_DETAILS = {
Expand Down Expand Up @@ -145,14 +157,14 @@ class Config:
KATSU_PUBLIC_CONFIG_ENDPOINT = "/api/public_search_fields"
KATSU_INDIVIDUAL_SCHEMA_ENDPOINT = "/api/schemas/phenopacket"
KATSU_EXPERIMENT_SCHEMA_ENDPOINT = "/api/schemas/experiment"
KATSU_BEACON_SEARCH = "/api/beacon_search"
KATSU_BEACON_SEARCH = "/api/public"
KATSU_SEARCH_OVERVIEW = "/api/search_overview"
KATSU_PRIVATE_OVERVIEW = "/api/overview"
KATSU_PUBLIC_OVERVIEW = "/api/public_overview"
KATSU_PUBLIC_RULES = "/api/public_rules"
KATSU_TIMEOUT = int(os.environ.get("BEACON_KATSU_TIMEOUT", 180))

MAP_EXTRA_PROPERTIES_TO_INFO = os.environ.get("MAP_EXTRA_PROPERTIES_TO_INFO", True)
MAP_EXTRA_PROPERTIES_TO_INFO = str_to_bool(os.environ.get("MAP_EXTRA_PROPERTIES_TO_INFO", ""))

PHENOPACKETS_SCHEMA_REFERENCE = {"entityType": "individual", "schema": "phenopackets v1"}

Expand All @@ -174,15 +186,23 @@ class Config:
# -------------------
# authorization

# - for contacting the Bento authorization service
AUTHZ_URL: str = os.environ.get("BENTO_AUTHZ_SERVICE_URL", "")
AUTHZ_ENABLED: bool = os.environ.get("AUTHZ_ENABLED", "true").strip().lower() in ("true", "1", "yes")
AUTHZ_ENABLED: bool = str_to_bool(os.environ.get("AUTHZ_ENABLED", "true"))
# - for retrieving a token from an OAuth2 IdP in order to make authorized requests to Katsu
# --> if this is disabled, <Authorization: ...> headers from the requestor will be forwarded instead.
AUTHZ_BENTO_REQUESTS_ENABLED: bool = str_to_bool(os.environ.get("BEACON_AUTHZ_BENTO_REQUESTS_ENABLED", "true"))
OPENID_CONFIG_URL: str = os.environ.get("BENTO_OPENID_CONFIG_URL", "")
CLIENT_ID: str = os.environ.get("BEACON_CLIENT_ID", "")
CLIENT_SECRET: str = os.environ.get("BEACON_CLIENT_SECRET", "")

# -------------------
# handle injected config files
# a) obtain reference to the expected configuration files' location by
# using the programmable env variable `CONFIG_ABSOLUTE_PATH` if it exists, or
# b) default to using "this file's directory" as the reference to where
# configuration files are expected to be located
@staticmethod
def retrieve_config_json(filename):
# TODO: abstract out CONFIG_PATH if needed
config_path = os.environ.get("CONFIG_ABSOLUTE_PATH", os.path.dirname(os.path.abspath(__file__)))
Expand Down
4 changes: 2 additions & 2 deletions bento_beacon/endpoints/info.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,13 +90,13 @@ def beacon_overview():
@info.route("/individual_schema", methods=["GET", "POST"])
@authz_middleware.deco_public_endpoint
def get_individual_schema():
return katsu_get(current_app.config["KATSU_INDIVIDUAL_SCHEMA_ENDPOINT"])
return katsu_get(current_app.config["KATSU_INDIVIDUAL_SCHEMA_ENDPOINT"], requires_auth="none")


@info.route("/experiment_schema", methods=["GET", "POST"])
@authz_middleware.deco_public_endpoint
def get_experiment_schema():
return katsu_get(current_app.config["KATSU_EXPERIMENT_SCHEMA_ENDPOINT"])
return katsu_get(current_app.config["KATSU_EXPERIMENT_SCHEMA_ENDPOINT"], requires_auth="none")


# -------------------------------------------------------
Expand Down
10 changes: 8 additions & 2 deletions bento_beacon/utils/gohan_utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from flask import current_app
from .exceptions import APIException, InvalidQuery, NotImplemented
from ..authz.headers import auth_header_from_request
from ..authz.access import create_access_header_or_fall_back
import requests

# -------------------------------------------------------
Expand Down Expand Up @@ -178,7 +178,13 @@ def gohan_results(url, gohan_args):
def gohan_network_call(url, gohan_args):
c = current_app.config
try:
r = requests.get(url, headers=auth_header_from_request(), timeout=c["GOHAN_TIMEOUT"], params=gohan_args)
r = requests.get(
url,
headers=create_access_header_or_fall_back(),
params=gohan_args,
timeout=c["GOHAN_TIMEOUT"],
verify=c["BENTO_VALIDATE_SSL"],
)

# handle gohan errors or any bad responses
if not r.ok:
Expand Down
39 changes: 28 additions & 11 deletions bento_beacon/utils/katsu_utils.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import requests
from flask import current_app
from functools import reduce
from json import JSONDecodeError
from urllib.parse import urlsplit, urlunsplit
from typing import Literal
from .exceptions import APIException, InvalidQuery
from functools import reduce
from ..authz.access import create_access_header_or_fall_back
from ..authz.headers import auth_header_from_request


Expand Down Expand Up @@ -52,7 +54,13 @@ def katsu_network_call(payload, endpoint=None):
current_app.logger.debug(f"calling katsu url {url}")

try:
r = requests.post(url, headers=auth_header_from_request(), timeout=c["KATSU_TIMEOUT"], json=payload)
r = requests.post(
url,
headers=create_access_header_or_fall_back(),
json=payload,
timeout=c["KATSU_TIMEOUT"],
verify=c["BENTO_VALIDATE_SSL"],
)

katsu_response = r.json()
if not r.ok:
Expand All @@ -73,10 +81,10 @@ def katsu_network_call(payload, endpoint=None):


# used for GET calls at particular katsu endpoints, eg /biosamples
def katsu_get(endpoint, id=None, query=""):
def katsu_get(endpoint, id=None, query="", requires_auth: Literal["none", "forwarded", "full"] = "none"):
c = current_app.config
katsu_base_url = c["KATSU_BASE_URL"]
timeout = current_app.config["KATSU_TIMEOUT"]
timeout = c["KATSU_TIMEOUT"]

# construct request url
url_components = urlsplit(katsu_base_url)
Expand All @@ -92,7 +100,12 @@ def katsu_get(endpoint, id=None, query=""):
)

try:
r = requests.get(query_url, timeout=timeout)
headers = {}
if requires_auth == "forwarded":
headers = auth_header_from_request()
elif requires_auth == "full":
headers = create_access_header_or_fall_back()
r = requests.get(query_url, headers=headers, timeout=timeout, verify=c["BENTO_VALIDATE_SSL"])
katsu_response = r.json()

except JSONDecodeError:
Expand All @@ -114,12 +127,13 @@ def katsu_get(endpoint, id=None, query=""):
def search_from_config(config_filters):
# query error checking handled in katsu
query_string = "&".join(f'{cf["id"]}{cf["operator"]}{cf["value"]}' for cf in config_filters)
response = katsu_get(current_app.config["KATSU_BEACON_SEARCH"], query=query_string)
response = katsu_get(current_app.config["KATSU_BEACON_SEARCH"], query=query_string, requires_auth="full")
return response.get("matches", [])


def get_katsu_config_search_fields():
fields = katsu_get(current_app.config["KATSU_PUBLIC_CONFIG_ENDPOINT"])
# Use forwarded auth for getting available search fields, which may be limited based on access level
fields = katsu_get(current_app.config["KATSU_PUBLIC_CONFIG_ENDPOINT"], requires_auth="forwarded")
current_app.config["KATSU_CONFIG_SEARCH_FIELDS"] = fields
return fields

Expand Down Expand Up @@ -252,7 +266,7 @@ def get_filtering_terms():
def katsu_total_individuals_count():
c = current_app.config
endpoint = c["KATSU_INDIVIDUALS_ENDPOINT"]
count_response = katsu_get(endpoint, query="page_size=1")
count_response = katsu_get(endpoint, query="page_size=1", requires_auth="full")
count = count_response.get("count")
return count

Expand All @@ -261,7 +275,8 @@ def katsu_datasets(id=None):
c = current_app.config
endpoint = c["KATSU_DATASETS_ENDPOINT"]
try:
response = katsu_get(endpoint, id, query="format=phenopackets")
# right now, the datasets endpoint doesn't need any authorization for listing
response = katsu_get(endpoint, id, query="format=phenopackets", requires_auth="none")
except APIException:
return {}

Expand Down Expand Up @@ -295,12 +310,14 @@ def search_summary_statistics(ids):


def overview_statistics():
return katsu_get(current_app.config["KATSU_PRIVATE_OVERVIEW"])
return katsu_get(current_app.config["KATSU_PRIVATE_OVERVIEW"], requires_auth="full")


def katsu_censorship_settings() -> tuple[int | None, int | None]:
# TODO: should be project-dataset scoped
rules = katsu_get(current_app.config["KATSU_PUBLIC_RULES"])
# TODO: should be called on-the-fly and pass request authorization headers onward, since this can change based on
# scoping and the token's particular permissions.
rules = katsu_get(current_app.config["KATSU_PUBLIC_RULES"], requires_auth="none")
max_filters = rules.get("max_query_parameters")
count_threshold = rules.get("count_threshold")
# return even if None
Expand Down

0 comments on commit d78d2ea

Please sign in to comment.