diff --git a/CHANGELOG.md b/CHANGELOG.md index 8e06dff3..e45bcfc7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,7 +24,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed -- +- Fixed device name retrieval without hlsmi ([#240](https://github.com/Lightning-AI/lightning-Habana/pull/240)) ### Removed diff --git a/src/lightning_habana/__about__.py b/src/lightning_habana/__about__.py index e91ce2f7..813519e0 100644 --- a/src/lightning_habana/__about__.py +++ b/src/lightning_habana/__about__.py @@ -1,4 +1,4 @@ -__version__ = "1.7.0" +__version__ = "1.7.0.rc0" __author__ = "Lightning-AI et al." __author_email__ = "name@lightning.ai" __license__ = "Apache-2.0" diff --git a/src/lightning_habana/utils/resources.py b/src/lightning_habana/utils/resources.py index 1e079fbe..94d78158 100644 --- a/src/lightning_habana/utils/resources.py +++ b/src/lightning_habana/utils/resources.py @@ -94,6 +94,17 @@ def get_hpu_synapse_version() -> str: return hl or "0.0.0" +@lru_cache +def get_device_name_from_backend() -> str: + """Return the name of the HPU device.""" + try: + # this opens up a device to retrieve the name + return torch_hpu.get_device_name() + except (AttributeError, NameError): + # return GAUDI as default name + return "GAUDI" + + def _parse_for_device_name(line: str) -> str: """Parse the CMD output with version capture. @@ -123,9 +134,9 @@ def get_device_name_from_hlsmi() -> str: """Get hpu device name from hl-smi.""" try: proc = subprocess.Popen(["hl-smi", "-L"], stdout=subprocess.PIPE) - # TODO: FileNotFoundError: No such file or directory: 'hl-smi' except (FileNotFoundError, NotADirectoryError): - return "GAUDI" + # if hl-smi is not present, we open a device to get the name + return get_device_name_from_backend() out = proc.communicate()[0] return _parse_for_device_name(out.decode("utf-8"))