Skip to content

Commit

Permalink
Setting spark.app.id to a more intuitive name (#124)
Browse files Browse the repository at this point in the history
* Setting spark.app.id to a more intuitive name

* Update tests

* Replace more special characters
  • Loading branch information
edingroot authored Aug 17, 2023
1 parent d62f4fe commit 457b5d1
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 5 deletions.
17 changes: 13 additions & 4 deletions service_configuration_lib/spark_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import logging
import math
import os
import re
import time
from typing import Any
from typing import Dict
Expand Down Expand Up @@ -37,6 +38,7 @@

NON_CONFIGURABLE_SPARK_OPTS = {
'spark.master',
'spark.app.id',
'spark.ui.port',
'spark.mesos.principal',
'spark.mesos.secret',
Expand Down Expand Up @@ -1076,20 +1078,27 @@ def get_spark_conf(
_pick_random_port(PREFERRED_SPARK_UI_PORT),
)

spark_conf = {**(spark_opts_from_env or {}), **_filter_user_spark_opts(user_spark_opts)}

if aws_creds[2] is not None:
spark_conf['spark.hadoop.fs.s3a.aws.credentials.provider'] = AWS_ENV_CREDENTIALS_PROVIDER

# app_name from env is already appended port and time to make it unique
app_name = (spark_opts_from_env or {}).get('spark.app.name')
if not app_name:
# We want to make the app name more unique so that we can search it
# from history server.
app_name = f'{app_base_name}_{ui_port}_{int(time.time())}'

spark_conf = {**(spark_opts_from_env or {}), **_filter_user_spark_opts(user_spark_opts)}

if aws_creds[2] is not None:
spark_conf['spark.hadoop.fs.s3a.aws.credentials.provider'] = AWS_ENV_CREDENTIALS_PROVIDER
# Explicitly setting app id: replace special characters to '_' to make it consistent
# in all places for metric systems:
# - since in the Promehteus metrics endpoint those will be converted to '_'
# - while the 'spark-app-selector' executor pod label will keep the original app id
app_id = re.sub(r'[\.,-]', '_', app_name)

spark_conf.update({
'spark.app.name': app_name,
'spark.app.id': app_id,
'spark.ui.port': str(ui_port),
})

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

setup(
name='service-configuration-lib',
version='2.18.3',
version='2.18.4',
provides=['service_configuration_lib'],
description='Start, stop, and inspect Yelp SOA services',
url='https://github.com/Yelp/service_configuration_lib',
Expand Down
14 changes: 14 additions & 0 deletions tests/spark_config_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import itertools
import json
import os
import re
import sys
from unittest import mock

Expand Down Expand Up @@ -1138,6 +1139,14 @@ def verify(output):
return [key]
return verify

@pytest.fixture
def assert_app_id(self):
def verify(output):
key = 'spark.app.id'
assert output[key] == re.sub(r'[\.,-]', '_', output['spark.app.name'])
return [key]
return verify

@pytest.fixture
def assert_mesos_conf(self):
def verify(output):
Expand Down Expand Up @@ -1230,6 +1239,7 @@ def test_leaders_get_spark_conf_kubernetes(
mock_time,
assert_ui_port,
assert_app_name,
assert_app_id,
assert_kubernetes_conf,
mock_log,
):
Expand Down Expand Up @@ -1262,6 +1272,7 @@ def test_leaders_get_spark_conf_kubernetes(
verified_keys = set(
assert_ui_port(output) +
assert_app_name(output) +
assert_app_id(output) +
assert_kubernetes_conf(output) +
list(other_spark_opts.keys()) +
list(mock_adjust_spark_requested_resources_kubernetes.return_value.keys()) +
Expand Down Expand Up @@ -1321,6 +1332,7 @@ def test_show_console_progress_jupyter(
mock_time,
assert_ui_port,
assert_app_name,
assert_app_id,
assert_local_conf,
mock_log,
):
Expand Down Expand Up @@ -1361,6 +1373,7 @@ def test_local_spark(
mock_time,
assert_ui_port,
assert_app_name,
assert_app_id,
assert_local_conf,
mock_log,
):
Expand All @@ -1385,6 +1398,7 @@ def test_local_spark(
verified_keys = set(
assert_ui_port(output) +
assert_app_name(output) +
assert_app_id(output) +
assert_local_conf(output) +
list(mock_append_spark_prometheus_conf.return_value.keys()) +
list(mock_append_event_log_conf.return_value.keys()) +
Expand Down

0 comments on commit 457b5d1

Please sign in to comment.