diff --git a/service_configuration_lib/spark_config.py b/service_configuration_lib/spark_config.py index df71e8e..cd0a22d 100644 --- a/service_configuration_lib/spark_config.py +++ b/service_configuration_lib/spark_config.py @@ -6,6 +6,7 @@ import logging import math import os +import re import time from typing import Any from typing import Dict @@ -1089,10 +1090,11 @@ def get_spark_conf( # from history server. app_name = f'{app_base_name}_{ui_port}_{int(time.time())}' - # Explicitly setting app id: replace '-' to '_' to make it consistent in all places for metric systems: - # - since the Spark app id in Promehteus metrics will be converted to underscores, - # - while the 'spark-app-selector' executor pod label will keep the original app id. - app_id = app_name.replace('-', '_') + # Explicitly setting app id: replace special characters to '_' to make it consistent + # in all places for metric systems: + # - since in the Promehteus metrics endpoint those will be converted to '_' + # - while the 'spark-app-selector' executor pod label will keep the original app id + app_id = re.sub(r'[\.,-]', '_', app_name) spark_conf.update({ 'spark.app.name': app_name, diff --git a/tests/spark_config_test.py b/tests/spark_config_test.py index bda4127..f99c5d9 100644 --- a/tests/spark_config_test.py +++ b/tests/spark_config_test.py @@ -2,6 +2,7 @@ import itertools import json import os +import re import sys from unittest import mock @@ -1142,7 +1143,7 @@ def verify(output): def assert_app_id(self): def verify(output): key = 'spark.app.id' - assert output[key] == output['spark.app.name'].replace('-', '_') + assert output[key] == re.sub(r'[\.,-]', '_', output['spark.app.name']) return [key] return verify