Skip to content

Commit

Permalink
Add engine parameter to hopsworks.login (4.1) (#384)
Browse files Browse the repository at this point in the history
* Add engine parameter to hopsworks.login

* Fix the docstring for the engine parameter

* Fix typing

* Remove engine param from get_feature_store

* Remove redundant code from get_feature_store
  • Loading branch information
aversey authored Dec 4, 2024
1 parent 64c246d commit 03fc722
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 24 deletions.
13 changes: 12 additions & 1 deletion python/hopsworks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import tempfile
import warnings
from pathlib import Path
from typing import Literal, Union

from hopsworks import client, constants, project, version
from hopsworks.client.exceptions import (
Expand Down Expand Up @@ -83,6 +84,7 @@ def login(
api_key_file: str = None,
hostname_verification: bool = False,
trust_store_path: str = None,
engine: Union[None, Literal["spark"], Literal["python"], Literal["training"]] = None,
) -> project.Project:
"""Connect to [Serverless Hopsworks](https://app.hopsworks.ai) by calling the `hopsworks.login()` function with no arguments.
Expand Down Expand Up @@ -122,6 +124,13 @@ def login(
api_key_file: Path to file wih Api Key
hostname_verification: Whether to verify Hopsworks' certificate
trust_store_path: Path on the file system containing the Hopsworks certificates
engine: Which engine to use, `"spark"`, `"python"` or `"training"`. Defaults to `None`,
which initializes the engine to Spark if the environment provides Spark, for
example on Hopsworks and Databricks, or falls back to Python if Spark is not
available, e.g. on local Python environments or AWS SageMaker. This option
allows you to override this behaviour. `"training"` engine is useful when only
feature store metadata is needed, for example training dataset location and label
information when Hopsworks training experiment is conducted.
# Returns
`Project`: The Project object to perform operations on
# Raises
Expand All @@ -138,7 +147,7 @@ def login(

# If inside hopsworks, just return the current project for now
if "REST_ENDPOINT" in os.environ:
_hw_connection = _hw_connection(hostname_verification=hostname_verification)
_hw_connection = _hw_connection(hostname_verification=hostname_verification, engine=engine)
_connected_project = _hw_connection.get_project()
_initialize_module_apis()
print("\nLogged in to project, explore it here " + _connected_project.get_url())
Expand Down Expand Up @@ -207,6 +216,7 @@ def login(
_hw_connection = _hw_connection(
host=host,
port=port,
engine=engine,
api_key_file=api_key_path,
hostname_verification=hostname_verification,
trust_store_path=trust_store_path,
Expand Down Expand Up @@ -246,6 +256,7 @@ def login(
_hw_connection = _hw_connection(
host=host,
port=port,
engine=engine,
api_key_value=api_key,
hostname_verification=hostname_verification,
trust_store_path=trust_store_path,
Expand Down
20 changes: 2 additions & 18 deletions python/hopsworks_common/connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ class Connection:
Defaults to `None`.
engine: Which engine to use, `"spark"`, `"python"` or `"training"`. Defaults to `None`,
which initializes the engine to Spark if the environment provides Spark, for
example on Hopsworks and Databricks, or falls back on Hive in Python if Spark is not
example on Hopsworks and Databricks, or falls back to Python if Spark is not
available, e.g. on local Python environments or AWS SageMaker. This option
allows you to override this behaviour. `"training"` engine is useful when only
feature store metadata is needed, for example training dataset location and label
Expand Down Expand Up @@ -151,7 +151,6 @@ def __init__(
def get_feature_store(
self,
name: Optional[str] = None,
engine: Optional[str] = None,
): # -> feature_store.FeatureStore
# the typing is commented out due to circular dependency, it breaks auto_doc.py
"""Get a reference to a feature store to perform operations on.
Expand All @@ -161,25 +160,10 @@ def get_feature_store(
# Arguments
name: The name of the feature store, defaults to `None`.
engine: Which engine to use, `"spark"`, `"python"` or `"training"`. Defaults to `None`,
which initializes the engine to Spark if the environment provides Spark, for
example on Hopsworks and Databricks, or falls back on Hive in Python if Spark is not
available, e.g. on local Python environments or AWS SageMaker. This option
allows you to override this behaviour. `"training"` engine is useful when only
feature store metadata is needed, for example training dataset location and label
information when Hopsworks training experiment is conducted.
# Returns
`FeatureStore`. A feature store handle object to perform operations on.
"""
# Ensure the engine is initialized and of right type
from hsfs import engine as hsfs_engine

if engine:
global _hsfs_engine_type
_hsfs_engine_type = engine
hsfs_engine.get_instance()

if not name:
name = client.get_instance()._project_name
return self._feature_store_api.get(util.append_feature_store_suffix(name))
Expand Down Expand Up @@ -532,7 +516,7 @@ def connection(
Defaults to `None`.
engine: Which engine to use, `"spark"`, `"python"` or `"training"`. Defaults to `None`,
which initializes the engine to Spark if the environment provides Spark, for
example on Hopsworks and Databricks, or falls back on Hive in Python if Spark is not
example on Hopsworks and Databricks, or falls back to Python if Spark is not
available, e.g. on local Python environments or AWS SageMaker. This option
allows you to override this behaviour. `"training"` engine is useful when only
feature store metadata is needed, for example training dataset location and label
Expand Down
7 changes: 2 additions & 5 deletions python/hopsworks_common/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ def project_namespace(self):
return self._project_namespace

def get_feature_store(
self, name: Optional[str] = None, engine: Optional[str] = None
self, name: Optional[str] = None
): # -> hsfs.feature_store.FeatureStore
"""Connect to Project's Feature Store.
Expand All @@ -127,15 +127,12 @@ def get_feature_store(
# Arguments
name: Project name of the feature store.
engine: Which engine to use, `"spark"`, `"python"` or `"training"`.
Defaults to `"python"` when connected to [Serverless Hopsworks](https://app.hopsworks.ai).
See [`hopsworks.connection`](connection.md#connection) documentation for more information.
# Returns
`hsfs.feature_store.FeatureStore`: The Feature Store API
# Raises
`RestAPIError`: If unable to connect
"""
return client.get_connection().get_feature_store(name, engine)
return client.get_connection().get_feature_store(name)

def get_model_registry(self):
"""Connect to Project's Model Registry API.
Expand Down

0 comments on commit 03fc722

Please sign in to comment.