From 99f10324d16c45e2b4af4e0dbd3b133ba7cb92f4 Mon Sep 17 00:00:00 2001 From: Chi Chang Date: Wed, 23 Aug 2023 08:29:53 -0700 Subject: [PATCH 1/2] Support explicitly setting spark app id --- service_configuration_lib/spark_config.py | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/service_configuration_lib/spark_config.py b/service_configuration_lib/spark_config.py index 69ccc5b..b480afc 100644 --- a/service_configuration_lib/spark_config.py +++ b/service_configuration_lib/spark_config.py @@ -6,7 +6,9 @@ import logging import math import os +import random import re +import string import time from typing import Any from typing import Dict @@ -38,7 +40,6 @@ NON_CONFIGURABLE_SPARK_OPTS = { 'spark.master', - 'spark.app.id', 'spark.ui.port', 'spark.mesos.principal', 'spark.mesos.secret', @@ -1029,6 +1030,7 @@ def get_spark_conf( aws_region: Optional[str] = None, service_account_name: Optional[str] = None, force_spark_resource_configs: bool = True, + spark_app_id: str = '', # to be removed once verified all applications are not explicitly setting app id ) -> Dict[str, str]: """Build spark config dict to run with spark on paasta @@ -1057,6 +1059,7 @@ def get_spark_conf( If not provided, it uses cert files at {K8S_AUTH_FOLDER} to authenticate. :param force_spark_resource_configs: skip the resource/instances recalculation. This is strongly not recommended. + :param explcitly setting spark.app.id :returns: spark opts in a dict. """ # Mesos deprecation @@ -1095,11 +1098,19 @@ def get_spark_conf( # in all places for metric systems: # - since in the Promehteus metrics endpoint those will be converted to '_' # - while the 'spark-app-selector' executor pod label will keep the original app id - if is_jupyter: - raw_app_id = app_name + if len(spark_app_id) == 0: + if is_jupyter: + raw_app_id = app_name + else: + random_postfix = ''.join(random.choice(string.ascii_lowercase + string.digits) for _ in range(4)) + raw_app_id = f'{paasta_service}__{paasta_instance}__{random_postfix}' + app_id = re.sub(r'[\.,-]', '_', _get_k8s_resource_name_limit_size_with_hash(raw_app_id)) else: - raw_app_id = f'{paasta_service}__{paasta_instance}__{int(time.time()) % 10000}' - app_id = re.sub(r'[\.,-]', '_', _get_k8s_resource_name_limit_size_with_hash(raw_app_id)) + log.warning( + 'We do not recommend users to set spark.app.id, as it could diminish the clarity of identification ' + 'and can potentially cause the monitoring dashboard to not work properly.', + ) + app_id = spark_app_id spark_conf.update({ 'spark.app.name': app_name, From 9a85e3e08d2ea5041c2da51d6f37c951202fa264 Mon Sep 17 00:00:00 2001 From: Chi Chang Date: Wed, 23 Aug 2023 08:59:32 -0700 Subject: [PATCH 2/2] Bump version --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 0be675b..ceb5fa3 100644 --- a/setup.py +++ b/setup.py @@ -17,7 +17,7 @@ setup( name='service-configuration-lib', - version='2.18.5', + version='2.18.6', provides=['service_configuration_lib'], description='Start, stop, and inspect Yelp SOA services', url='https://github.com/Yelp/service_configuration_lib',