From 363d4357eebdf8174bcf4366a33913c02b6c14a0 Mon Sep 17 00:00:00 2001 From: Sameer Sharma <30409342+CaptainSame@users.noreply.github.com> Date: Thu, 29 Aug 2024 19:45:37 +0100 Subject: [PATCH] MLCOMPUTE-1496 | change logging levels to reduce logs (#147) * MLCOMPUTE-1496 | change logging levels to reduce logs --------- Co-authored-by: Sameer Sharma --- service_configuration_lib/spark_config.py | 32 +++++++++++------------ setup.py | 2 +- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/service_configuration_lib/spark_config.py b/service_configuration_lib/spark_config.py index 8e959f5..5c11293 100644 --- a/service_configuration_lib/spark_config.py +++ b/service_configuration_lib/spark_config.py @@ -76,7 +76,7 @@ DEFAULT_SPARK_RUN_CONFIG = '/nail/srv/configs/spark.yaml' log = logging.Logger(__name__) -log.setLevel(logging.INFO) +log.setLevel(logging.WARN) class UnsupportedClusterManagerException(Exception): @@ -455,7 +455,7 @@ def get_dra_configs(self, spark_opts: Dict[str, str]) -> Dict[str, str]: spark_app_name = spark_opts.get('spark.app.name', '') - log.warning( + log.info( TextColors.yellow( '\nSpark Dynamic Resource Allocation (DRA) enabled for this batch. More info: y/spark-dra.\n', ), @@ -473,7 +473,7 @@ def get_dra_configs(self, spark_opts: Dict[str, str]) -> Dict[str, str]: if _is_jupyterhub_job(spark_app_name): # increase cachedExecutorIdleTimeout by 15 minutes in case of Jupyterhub cached_executor_idle_timeout = str(int(cached_executor_idle_timeout[:-1]) + 900) + 's' - log.warning( + log.info( f'\nSetting {TextColors.yellow("spark.dynamicAllocation.cachedExecutorIdleTimeout")} as ' f'{cached_executor_idle_timeout}. Executor with cached data block will be released ' f'if it has been idle for this duration. If you wish to change the value of ' @@ -524,7 +524,7 @@ def get_dra_configs(self, spark_opts: Dict[str, str]) -> Dict[str, str]: warn_msg = f'{warn_msg} {min_executors}' spark_opts['spark.dynamicAllocation.minExecutors'] = str(min_executors) - log.warning( + log.info( f'\n{warn_msg}. If you wish to change the value of minimum executors, please provide ' f'the exact value of spark.dynamicAllocation.minExecutors in your spark args\n', ) @@ -550,7 +550,7 @@ def get_dra_configs(self, spark_opts: Dict[str, str]) -> Dict[str, str]: max_executors = max(max_executors, int(spark_opts['spark.dynamicAllocation.initialExecutors'])) spark_opts['spark.dynamicAllocation.maxExecutors'] = str(max_executors) - log.warning( + log.info( f'\nSetting {TextColors.yellow("spark.dynamicAllocation.maxExecutors")} as {max_executors}. ' f'If you wish to change the value of maximum executors, please provide the exact value of ' f'spark.dynamicAllocation.maxExecutors in your spark args\n', @@ -567,7 +567,7 @@ def get_dra_configs(self, spark_opts: Dict[str, str]) -> Dict[str, str]: initial_executors = int(spark_opts['spark.dynamicAllocation.minExecutors']) spark_opts['spark.dynamicAllocation.initialExecutors'] = str(initial_executors) - log.warning( + log.info( f'\nSetting {TextColors.yellow("spark.dynamicAllocation.initialExecutors")} as {initial_executors}. ' f'If you wish to change the value of initial executors, please provide the exact value of ' f'spark.dynamicAllocation.initialExecutors in your spark args\n', @@ -590,16 +590,16 @@ def _cap_executor_resources( if memory_mb > max_memory_gb * 1024: executor_memory = f'{max_memory_gb}g' - log.warning(warning_title) - log.warning( + log.info(warning_title) + log.info( f' - spark.executor.memory: {int(memory_mb / 1024):3}g → {executor_memory}', ) warning_title_printed = True if executor_cores > max_cores: if not warning_title_printed: - log.warning(warning_title) - log.warning( + log.info(warning_title) + log.info( f' - spark.executor.cores: {executor_cores:3}c → {max_cores}c\n', ) executor_cores = max_cores @@ -706,7 +706,7 @@ def _calculate_resources( new_memory = new_cpu * target_mem_cpu_ratio if cpu != new_cpu or memory != new_memory or instances != new_instances: - log.warning( + log.info( f'Adjust Executor Resources based on recommended mem:core:: 7:1 and Bucket: ' f'{new_memory}g, {new_cpu}cores to better fit aws nodes\n' f' - spark.executor.cores: {cpu:3}c → {new_cpu}c\n' @@ -718,7 +718,7 @@ def _calculate_resources( ) if new_cpu < task_cpus: - log.warning( + log.info( f'Given spark.task.cpus is {task_cpus}, ' f'=> adjusted to {new_cpu} to keep it within the limits of adjust spark.executor.cores.\n', ) @@ -740,7 +740,7 @@ def _calculate_resources( elif memory_gb > max_memory_gb or executor_cores > max_cores: executor_cores, executor_memory = self._cap_executor_resources(executor_cores, executor_memory, memory_mb) elif force_spark_resource_configs: - log.warning( + log.info( '--force-spark-resource-configs is set to true: this can result in non-optimal bin-packing ' 'of executors on aws nodes or can lead to wastage the resources. ' "Please use this flag only if you have tested that standard memory/cpu configs won't work for " @@ -900,7 +900,7 @@ def _adjust_spark_requested_resources( total_cpus = cpus_per_gpu * num_gpus num_cpus = int(executor_instances) * int(executor_cores) if num_cpus != total_cpus: - log.warning( + log.info( f'spark.cores.max has been adjusted to {total_cpus}. ' 'See y/horovod for sizing of GPU pools.', ) @@ -1021,14 +1021,14 @@ def compute_approx_hourly_cost_dollars( min_dollars = round(min_cores * cost_factor, 5) max_dollars = round(max_cores * cost_factor, 5) if max_dollars * 24 > self.spark_constants['high_cost_threshold_daily']: - log.warning( + log.info( TextColors.red( TextColors.bold( '\n!!!!! HIGH COST ALERT !!!!!', ), ), ) - log.warning( + log.info( TextColors.magenta( TextColors.bold( f'\nExpected {"maximum" if min_dollars != max_dollars else ""} cost based on requested resources: ' diff --git a/setup.py b/setup.py index 7f0d2d5..ca85bfa 100644 --- a/setup.py +++ b/setup.py @@ -17,7 +17,7 @@ setup( name='service-configuration-lib', - version='2.18.20', + version='2.18.21', provides=['service_configuration_lib'], description='Start, stop, and inspect Yelp SOA services', url='https://github.com/Yelp/service_configuration_lib',