From 189c1c1b0e29e3234ee1b56c161fb63883e7d8d1 Mon Sep 17 00:00:00 2001 From: Yonatan Goldschmidt Date: Mon, 23 Sep 2024 10:57:53 +0300 Subject: [PATCH] databricks: Add is_databricks() based on /databricks/spark/conf/metrics.properties --- granulate_utils/metadata/bigdata/bigdatainfo.py | 4 +++- granulate_utils/metadata/bigdata/databricks.py | 10 ++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/granulate_utils/metadata/bigdata/bigdatainfo.py b/granulate_utils/metadata/bigdata/bigdatainfo.py index 7f739f49..d11eafe9 100644 --- a/granulate_utils/metadata/bigdata/bigdatainfo.py +++ b/granulate_utils/metadata/bigdata/bigdatainfo.py @@ -1,7 +1,7 @@ from typing import Optional from granulate_utils.metadata.bigdata.cloudera import get_cloudera_version -from granulate_utils.metadata.bigdata.databricks import get_databricks_version +from granulate_utils.metadata.bigdata.databricks import get_databricks_version, is_databricks from granulate_utils.metadata.bigdata.dataproc import get_dataproc_version from granulate_utils.metadata.bigdata.emr import get_emr_version from granulate_utils.metadata.bigdata.interfaces import BigDataInfo @@ -15,6 +15,8 @@ def get_bigdata_info() -> Optional[BigDataInfo]: return BigDataInfo("emr", emr_version) elif databricks_version := get_databricks_version(): return BigDataInfo("databricks", databricks_version) + elif is_databricks(): + return BigDataInfo("databricks", "unknown") elif dataproc_version := get_dataproc_version(): return BigDataInfo("dataproc", dataproc_version) elif cloudera_version := get_cloudera_version(): diff --git a/granulate_utils/metadata/bigdata/databricks.py b/granulate_utils/metadata/bigdata/databricks.py index 0085818d..025fa81a 100644 --- a/granulate_utils/metadata/bigdata/databricks.py +++ b/granulate_utils/metadata/bigdata/databricks.py @@ -1,4 +1,5 @@ import logging +import os from typing import TYPE_CHECKING, Optional, Union if TYPE_CHECKING: @@ -6,6 +7,8 @@ else: _LoggerAdapter = logging.LoggerAdapter +DATABRICKS_METRICS_PROP_PATH = "/databricks/spark/conf/metrics.properties" + def get_databricks_version() -> Optional[str]: try: @@ -15,6 +18,13 @@ def get_databricks_version() -> Optional[str]: return None +def is_databricks() -> bool: + """ + In some Databricks versions / images, /databricks/DBR_VERSION is missing but this file exists. + """ + return os.path.exists(DATABRICKS_METRICS_PROP_PATH) + + def get_hadoop_version(logger: Optional[Union[logging.Logger, _LoggerAdapter]]) -> Optional[str]: try: with open("/databricks/spark/HADOOP_VERSION", "r") as f: