-
Notifications
You must be signed in to change notification settings - Fork 260
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Abstract Secret Management Support #703
base: main
Are you sure you want to change the base?
Changes from all commits
c31906c
e96459a
2d6c135
f616522
cdcd612
aa5fdda
a6870d9
997a2b1
8be6a42
e617b99
435e24f
9cb332c
218123f
07a8cf0
44a3ce0
87cd083
b9a004a
4f1f98d
9551473
7054d07
6c2a415
0617f41
b4cdfa1
dc79000
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
from abc import ABC, abstractmethod | ||
|
||
from typing import Any, Dict, List, Optional, Tuple | ||
|
||
|
||
class FeathrSecretsManagementClient(ABC): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the file should not be named as abc.py? |
||
"""This is the abstract class for all the secrets management service, which are used to store the credentials that Feathr might use. | ||
""" | ||
|
||
@abstractmethod | ||
def __init__(self, secret_namespace: str, secret_client) -> None: | ||
"""Initialize the FeathrSecretsManagementClient class. | ||
|
||
Args: | ||
secret_namespace (str): a namespace that Feathr needs to get secrets from. | ||
For Azure Key Vault, it is something like the key vault name. | ||
For AWS secrets manager, it is something like a secret name. | ||
|
||
secret_client: A client that will be used to retrieve Feathr secrets. | ||
""" | ||
pass | ||
|
||
@abstractmethod | ||
def get_feathr_secret(self, secret_name: str) -> str: | ||
"""Get Feathr Secrets from a certain secret management service, such as Azure Key Vault or AWS Secrets Manager. | ||
|
||
Returns: | ||
str: returned secret from secret management service | ||
""" | ||
pass |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,31 +1,37 @@ | ||
from azure.keyvault.secrets import SecretClient | ||
from azure.identity import DefaultAzureCredential | ||
from loguru import logger | ||
from azure.core.exceptions import ResourceNotFoundError | ||
from feathr.secrets.abc import FeathrSecretsManagementClient | ||
|
||
class AzureKeyVaultClient: | ||
def __init__(self, akv_name: str): | ||
self.akv_name = akv_name | ||
self.secret_client = None | ||
|
||
def get_feathr_akv_secret(self, secret_name: str): | ||
class AzureKeyVaultClient(FeathrSecretsManagementClient): | ||
def __init__(self, secret_namespace: str, secret_client: SecretClient = None): | ||
"""Initializes the AzureKeyVaultClient. Note that `secret_namespace` is not used, since the namespace information will be included in secret_client. | ||
""" | ||
self.secret_client = secret_client | ||
if self.secret_client is not None and not isinstance(secret_client, SecretClient): | ||
raise RuntimeError( | ||
"You need to pass an azure.keyvault.secrets.SecretClient instance.") | ||
|
||
def get_feathr_secret(self, secret_name: str) -> str: | ||
"""Get Feathr Secrets from Azure Key Vault. Note that this function will replace '_' in `secret_name` with '-' since Azure Key Vault doesn't support it | ||
|
||
Returns: | ||
_type_: _description_ | ||
str: returned secret from secret management service | ||
""" | ||
if self.secret_client is None: | ||
self.secret_client = SecretClient( | ||
vault_url = f"https://{self.akv_name}.vault.azure.net", | ||
credential=DefaultAzureCredential() | ||
) | ||
raise RuntimeError("You need to pass an azure.keyvault.secrets.SecretClient instance when initializing FeathrClient.") | ||
|
||
try: | ||
# replace '_' with '-' since Azure Key Vault doesn't support it | ||
variable_replaced = secret_name.replace('_','-') #.upper() | ||
logger.info('Fetching the secret {} from Key Vault {}.', variable_replaced, self.akv_name) | ||
variable_replaced = secret_name.replace('_', '-') # .upper() | ||
logger.info('Fetching the secret {} from Key Vault {}.', | ||
variable_replaced, self.secret_client.vault_url) | ||
secret = self.secret_client.get_secret(variable_replaced) | ||
logger.info('Secret {} fetched from Key Vault {}.', variable_replaced, self.akv_name) | ||
logger.info('Secret {} fetched from Key Vault {}.', | ||
variable_replaced, self.secret_client.vault_url) | ||
return secret.value | ||
except ResourceNotFoundError as e: | ||
logger.error(f"Secret {secret_name} cannot be found in Key Vault {self.akv_name}.") | ||
raise | ||
except ResourceNotFoundError: | ||
logger.error( | ||
f"Secret {secret_name} cannot be found in Key Vault {self.secret_client.vault_url}.") | ||
raise |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
from loguru import logger | ||
import json | ||
from feathr.secrets.abc import FeathrSecretsManagementClient | ||
from aws_secretsmanager_caching.secret_cache import SecretCache | ||
|
||
|
||
class AWSSecretManagerClient(FeathrSecretsManagementClient): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the file name: aws_secret_manager |
||
def __init__(self, secret_namespace: str = None, secret_client: SecretCache = None): | ||
self.secret_id = secret_namespace | ||
self.secret_client = secret_client | ||
# make sure secret_client is a SecretCache type | ||
if secret_client is not None and not isinstance(secret_client, SecretCache): | ||
raise RuntimeError( | ||
"You need to pass a aws_secretsmanager_caching.secret_cache.SecretCache instance. Please refer to https://docs.aws.amazon.com/secretsmanager/latest/userguide/retrieving-secrets_cache-python.html for more details.") | ||
|
||
def get_feathr_secret(self, secret_name: str): | ||
"""Get Feathr Secrets from AWS Secrets manager. It's also recommended that the client passes a cache objects to reduce cost. | ||
See more details here: https://docs.aws.amazon.com/secretsmanager/latest/userguide/retrieving-secrets_cache-python.html | ||
""" | ||
if self.secret_client is None: | ||
raise RuntimeError( | ||
"You need to pass a aws_secretsmanager_caching.secret_cache.SecretCache instance when initializing FeathrClient.") | ||
|
||
try: | ||
get_secret_value_response = self.secret_client.get_secret_string( | ||
self.secret_id) | ||
# result is in str format, so we need to load it as a dict | ||
secret = json.loads(get_secret_value_response) | ||
return secret[secret_name] | ||
except KeyError as e: | ||
logger.error( | ||
f"Secret {secret_name} cannot be found in secretsmanager {self.secret_id}.") | ||
raise e |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,111 @@ | ||
import os | ||
import yaml | ||
from loguru import logger | ||
from feathr.secrets.akv_client import AzureKeyVaultClient | ||
from azure.core.exceptions import ResourceNotFoundError | ||
from feathr.secrets.aws_secretmanager import AWSSecretManagerClient | ||
|
||
class _EnvVaraibleUtil(object): | ||
def __init__(self, config_path: str, secret_manager_client = None): | ||
"""Initialize the environment variable utils client | ||
|
||
Args: | ||
config_path (str): configuration path, if users want to use YAML to load all the configs | ||
secret_manager_client: the secret manager client type. currently only Azure key vault and AWS secret manager is supported. | ||
""" | ||
self.config_path = config_path | ||
# Set to none first to avoid invalid reference | ||
self.secret_manager_client = None | ||
if secret_manager_client and self.get_environment_variable_with_default('secrets', 'azure_key_vault', 'name'): | ||
self.secret_manager_client = AzureKeyVaultClient( | ||
secret_namespace=self.get_environment_variable_with_default('secrets', 'azure_key_vault', 'name'), | ||
secret_client=secret_manager_client) | ||
elif secret_manager_client and self.get_environment_variable_with_default('secrets', 'aws_secrets_manager', 'secret_id'): | ||
self.secret_manager_client = AWSSecretManagerClient( | ||
secret_namespace=self.get_environment_variable_with_default('secrets', 'aws_secrets_manager', 'secret_id'), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should this introduce an else and error out in the case if the secrets manager client is not supported by feathr? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. good catch. I'll update |
||
secret_client=secret_manager_client) | ||
|
||
def get_environment_variable_with_default(self, *args): | ||
"""Gets the environment variable for the variable key. | ||
Args: | ||
*args: list of keys in feathr_config.yaml file | ||
Return: | ||
A environment variable for the variable key. It will retrieve the value of the environment variables in the following order: | ||
If the key is set in the environment variable, Feathr will use the value of that environment variable | ||
If it's not set in the environment, then a default is retrieved from the feathr_config.yaml file with the same config key. | ||
If it's not available in the feathr_config.yaml file, Feathr will try to retrieve the value from key vault | ||
If not found, an empty string will be returned with a warning error message. | ||
""" | ||
|
||
# if envs exist, just return the existing env variable without reading the file | ||
env_keyword = "__".join(args) | ||
upper_env_keyword = env_keyword.upper() | ||
# make it work for lower case and upper case. | ||
env_variable = os.environ.get( | ||
env_keyword, os.environ.get(upper_env_keyword)) | ||
|
||
# If the key is set in the environment variable, Feathr will use the value of that environment variable | ||
if env_variable: | ||
return env_variable | ||
|
||
# If it's not set in the environment, then a default is retrieved from the feathr_config.yaml file with the same config key. | ||
if os.path.exists(os.path.abspath(self.config_path)): | ||
with open(os.path.abspath(self.config_path), 'r') as stream: | ||
try: | ||
yaml_config = yaml.safe_load(stream) | ||
# concat all layers and check in environment variable | ||
yaml_layer = yaml_config | ||
|
||
# resolve one layer after another | ||
for arg in args: | ||
yaml_layer = yaml_layer[arg] | ||
return yaml_layer | ||
except KeyError as exc: | ||
logger.info( | ||
"{} not found in the config file.", env_keyword) | ||
except yaml.YAMLError as exc: | ||
logger.warning(exc) | ||
|
||
# If it's not available in the feathr_config.yaml file, Feathr will try to retrieve the value from key vault | ||
if self.secret_manager_client: | ||
try: | ||
return self.secret_manager_client.get_feathr_secret(env_keyword) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should this check be case insensitive (upper case and lower case)? |
||
except ResourceNotFoundError: | ||
# print out warning message if cannot find the env variable in all the resources | ||
logger.warning('Environment variable {} not found in environment variable, default YAML config file, or key vault service.', env_keyword) | ||
return None | ||
except KeyError: | ||
# print out warning message if cannot find the env variable in all the resources | ||
logger.warning('Environment variable {} not found in environment variable, default YAML config file, or key vault service.', env_keyword) | ||
return None | ||
|
||
def get_environment_variable(self, variable_key): | ||
"""Gets the environment variable for the variable key. | ||
|
||
Args: | ||
variable_key: environment variable key that is used to retrieve the environment variable | ||
Return: | ||
A environment variable for the variable key. It will retrieve the value of the environment variables in the following order: | ||
If the key is set in the environment variable, Feathr will use the value of that environment variable | ||
If it's not available in the environment variable file, Feathr will try to retrieve the value from key vault | ||
If not found, an empty string will be returned with a warning error message. | ||
""" | ||
env_var_value = os.environ.get(variable_key) | ||
|
||
if env_var_value: | ||
return env_var_value | ||
|
||
# If it's not available in the environment variable file, Feathr will try to retrieve the value from key vault | ||
logger.info(variable_key + ' is not set in the environment variables.') | ||
|
||
if self.secret_manager_client: | ||
try: | ||
return self.secret_manager_client.get_feathr_secret(variable_key) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same as above |
||
except ResourceNotFoundError: | ||
# print out warning message if cannot find the env variable in all the resources | ||
logger.warning('Environment variable {} not found in environment variable, default YAML config file, or key vault service.', variable_key) | ||
return None | ||
except KeyError: | ||
# print out warning message if cannot find the env variable in all the resources | ||
logger.warning('Environment variable {} not found in environment variable, default YAML config file, or key vault service.', variable_key) | ||
return None |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As an alternative, we can include in the documentation, that IRSA based authentication is possible as well if the users are running feathr in k8s. More documentation here: https://docs.aws.amazon.com/eks/latest/userguide/iam-roles-for-service-accounts.html
I don't believe any code changes are required for this auth to be supported.