From 63df91cfeb47cd04b31f4b77881e9176a8506219 Mon Sep 17 00:00:00 2001 From: Chi Chang Date: Wed, 16 Aug 2023 17:44:56 -0700 Subject: [PATCH 1/3] Setting spark.app.id to a more intuitive name --- service_configuration_lib/spark_config.py | 15 +++++++++++---- setup.py | 2 +- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/service_configuration_lib/spark_config.py b/service_configuration_lib/spark_config.py index 1ae9bc4..df71e8e 100644 --- a/service_configuration_lib/spark_config.py +++ b/service_configuration_lib/spark_config.py @@ -37,6 +37,7 @@ NON_CONFIGURABLE_SPARK_OPTS = { 'spark.master', + 'spark.app.id', 'spark.ui.port', 'spark.mesos.principal', 'spark.mesos.secret', @@ -1076,6 +1077,11 @@ def get_spark_conf( _pick_random_port(PREFERRED_SPARK_UI_PORT), ) + spark_conf = {**(spark_opts_from_env or {}), **_filter_user_spark_opts(user_spark_opts)} + + if aws_creds[2] is not None: + spark_conf['spark.hadoop.fs.s3a.aws.credentials.provider'] = AWS_ENV_CREDENTIALS_PROVIDER + # app_name from env is already appended port and time to make it unique app_name = (spark_opts_from_env or {}).get('spark.app.name') if not app_name: @@ -1083,13 +1089,14 @@ def get_spark_conf( # from history server. app_name = f'{app_base_name}_{ui_port}_{int(time.time())}' - spark_conf = {**(spark_opts_from_env or {}), **_filter_user_spark_opts(user_spark_opts)} - - if aws_creds[2] is not None: - spark_conf['spark.hadoop.fs.s3a.aws.credentials.provider'] = AWS_ENV_CREDENTIALS_PROVIDER + # Explicitly setting app id: replace '-' to '_' to make it consistent in all places for metric systems: + # - since the Spark app id in Promehteus metrics will be converted to underscores, + # - while the 'spark-app-selector' executor pod label will keep the original app id. + app_id = app_name.replace('-', '_') spark_conf.update({ 'spark.app.name': app_name, + 'spark.app.id': app_id, 'spark.ui.port': str(ui_port), }) diff --git a/setup.py b/setup.py index 1de2655..f262250 100644 --- a/setup.py +++ b/setup.py @@ -17,7 +17,7 @@ setup( name='service-configuration-lib', - version='2.18.3', + version='2.18.4', provides=['service_configuration_lib'], description='Start, stop, and inspect Yelp SOA services', url='https://github.com/Yelp/service_configuration_lib', From dbc83017dc1a38182a9aa56cf3eb19c026b06ff6 Mon Sep 17 00:00:00 2001 From: Chi Chang Date: Wed, 16 Aug 2023 18:00:49 -0700 Subject: [PATCH 2/3] Update tests --- tests/spark_config_test.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tests/spark_config_test.py b/tests/spark_config_test.py index 92b46aa..bda4127 100644 --- a/tests/spark_config_test.py +++ b/tests/spark_config_test.py @@ -1138,6 +1138,14 @@ def verify(output): return [key] return verify + @pytest.fixture + def assert_app_id(self): + def verify(output): + key = 'spark.app.id' + assert output[key] == output['spark.app.name'].replace('-', '_') + return [key] + return verify + @pytest.fixture def assert_mesos_conf(self): def verify(output): @@ -1230,6 +1238,7 @@ def test_leaders_get_spark_conf_kubernetes( mock_time, assert_ui_port, assert_app_name, + assert_app_id, assert_kubernetes_conf, mock_log, ): @@ -1262,6 +1271,7 @@ def test_leaders_get_spark_conf_kubernetes( verified_keys = set( assert_ui_port(output) + assert_app_name(output) + + assert_app_id(output) + assert_kubernetes_conf(output) + list(other_spark_opts.keys()) + list(mock_adjust_spark_requested_resources_kubernetes.return_value.keys()) + @@ -1321,6 +1331,7 @@ def test_show_console_progress_jupyter( mock_time, assert_ui_port, assert_app_name, + assert_app_id, assert_local_conf, mock_log, ): @@ -1361,6 +1372,7 @@ def test_local_spark( mock_time, assert_ui_port, assert_app_name, + assert_app_id, assert_local_conf, mock_log, ): @@ -1385,6 +1397,7 @@ def test_local_spark( verified_keys = set( assert_ui_port(output) + assert_app_name(output) + + assert_app_id(output) + assert_local_conf(output) + list(mock_append_spark_prometheus_conf.return_value.keys()) + list(mock_append_event_log_conf.return_value.keys()) + From c0ef07907844ce49bedcec7aaaa0b6d15f846139 Mon Sep 17 00:00:00 2001 From: Chi Chang Date: Thu, 17 Aug 2023 04:27:32 -0700 Subject: [PATCH 3/3] Replace more special characters --- service_configuration_lib/spark_config.py | 10 ++++++---- tests/spark_config_test.py | 3 ++- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/service_configuration_lib/spark_config.py b/service_configuration_lib/spark_config.py index df71e8e..cd0a22d 100644 --- a/service_configuration_lib/spark_config.py +++ b/service_configuration_lib/spark_config.py @@ -6,6 +6,7 @@ import logging import math import os +import re import time from typing import Any from typing import Dict @@ -1089,10 +1090,11 @@ def get_spark_conf( # from history server. app_name = f'{app_base_name}_{ui_port}_{int(time.time())}' - # Explicitly setting app id: replace '-' to '_' to make it consistent in all places for metric systems: - # - since the Spark app id in Promehteus metrics will be converted to underscores, - # - while the 'spark-app-selector' executor pod label will keep the original app id. - app_id = app_name.replace('-', '_') + # Explicitly setting app id: replace special characters to '_' to make it consistent + # in all places for metric systems: + # - since in the Promehteus metrics endpoint those will be converted to '_' + # - while the 'spark-app-selector' executor pod label will keep the original app id + app_id = re.sub(r'[\.,-]', '_', app_name) spark_conf.update({ 'spark.app.name': app_name, diff --git a/tests/spark_config_test.py b/tests/spark_config_test.py index bda4127..f99c5d9 100644 --- a/tests/spark_config_test.py +++ b/tests/spark_config_test.py @@ -2,6 +2,7 @@ import itertools import json import os +import re import sys from unittest import mock @@ -1142,7 +1143,7 @@ def verify(output): def assert_app_id(self): def verify(output): key = 'spark.app.id' - assert output[key] == output['spark.app.name'].replace('-', '_') + assert output[key] == re.sub(r'[\.,-]', '_', output['spark.app.name']) return [key] return verify