Skip to content

Commit

Permalink
Add engine parameter to hopsworks.login (#378)
Browse files Browse the repository at this point in the history
* Add engine parameter to hopsworks.login

* Remove engine param from get_feature_store

* Remove redundant code from get_feature_store

* Bump version to 4.0.0-RC1
  • Loading branch information
aversey authored Oct 30, 2024
1 parent 426f730 commit 3ec2248
Show file tree
Hide file tree
Showing 10 changed files with 23 additions and 31 deletions.
2 changes: 1 addition & 1 deletion java/beam/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<artifactId>hsfs-parent</artifactId>
<groupId>com.logicalclocks</groupId>
<version>4.0.0-RC0</version>
<version>4.0.0-RC1</version>
</parent>
<modelVersion>4.0.0</modelVersion>

Expand Down
2 changes: 1 addition & 1 deletion java/flink/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<artifactId>hsfs-parent</artifactId>
<groupId>com.logicalclocks</groupId>
<version>4.0.0-RC0</version>
<version>4.0.0-RC1</version>
</parent>
<modelVersion>4.0.0</modelVersion>

Expand Down
2 changes: 1 addition & 1 deletion java/hsfs/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<artifactId>hsfs-parent</artifactId>
<groupId>com.logicalclocks</groupId>
<version>4.0.0-RC0</version>
<version>4.0.0-RC1</version>
</parent>
<modelVersion>4.0.0</modelVersion>

Expand Down
2 changes: 1 addition & 1 deletion java/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
<groupId>com.logicalclocks</groupId>
<artifactId>hsfs-parent</artifactId>
<packaging>pom</packaging>
<version>4.0.0-RC0</version>
<version>4.0.0-RC1</version>
<modules>
<module>hsfs</module>
<module>spark</module>
Expand Down
2 changes: 1 addition & 1 deletion java/spark/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
<parent>
<artifactId>hsfs-parent</artifactId>
<groupId>com.logicalclocks</groupId>
<version>4.0.0-RC0</version>
<version>4.0.0-RC1</version>
</parent>
<modelVersion>4.0.0</modelVersion>

Expand Down
13 changes: 12 additions & 1 deletion python/hopsworks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import tempfile
import warnings
from pathlib import Path
from typing import Literal, Union

from hopsworks import client, constants, project, version
from hopsworks.client.exceptions import (
Expand Down Expand Up @@ -83,6 +84,7 @@ def login(
api_key_file: str = None,
hostname_verification: bool = False,
trust_store_path: str = None,
engine: Union[None, Literal["spark"], Literal["python"], Literal["training"]] = None,
) -> project.Project:
"""Connect to [Serverless Hopsworks](https://app.hopsworks.ai) by calling the `hopsworks.login()` function with no arguments.
Expand Down Expand Up @@ -122,6 +124,13 @@ def login(
api_key_file: Path to file wih Api Key
hostname_verification: Whether to verify Hopsworks' certificate
trust_store_path: Path on the file system containing the Hopsworks certificates
engine: Which engine to use, `"spark"`, `"python"` or `"training"`. Defaults to `None`,
which initializes the engine to Spark if the environment provides Spark, for
example on Hopsworks and Databricks, or falls back to Python if Spark is not
available, e.g. on local Python environments or AWS SageMaker. This option
allows you to override this behaviour. `"training"` engine is useful when only
feature store metadata is needed, for example training dataset location and label
information when Hopsworks training experiment is conducted.
# Returns
`Project`: The Project object to perform operations on
# Raises
Expand All @@ -138,7 +147,7 @@ def login(

# If inside hopsworks, just return the current project for now
if "REST_ENDPOINT" in os.environ:
_hw_connection = _hw_connection(hostname_verification=hostname_verification)
_hw_connection = _hw_connection(hostname_verification=hostname_verification, engine=engine)
_connected_project = _hw_connection.get_project()
_initialize_module_apis()
print("\nLogged in to project, explore it here " + _connected_project.get_url())
Expand Down Expand Up @@ -207,6 +216,7 @@ def login(
_hw_connection = _hw_connection(
host=host,
port=port,
engine=engine,
api_key_file=api_key_path,
hostname_verification=hostname_verification,
trust_store_path=trust_store_path,
Expand Down Expand Up @@ -246,6 +256,7 @@ def login(
_hw_connection = _hw_connection(
host=host,
port=port,
engine=engine,
api_key_value=api_key,
hostname_verification=hostname_verification,
trust_store_path=trust_store_path,
Expand Down
20 changes: 2 additions & 18 deletions python/hopsworks_common/connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ class Connection:
Defaults to `None`.
engine: Which engine to use, `"spark"`, `"python"` or `"training"`. Defaults to `None`,
which initializes the engine to Spark if the environment provides Spark, for
example on Hopsworks and Databricks, or falls back on Hive in Python if Spark is not
example on Hopsworks and Databricks, or falls back to Python if Spark is not
available, e.g. on local Python environments or AWS SageMaker. This option
allows you to override this behaviour. `"training"` engine is useful when only
feature store metadata is needed, for example training dataset location and label
Expand Down Expand Up @@ -150,7 +150,6 @@ def __init__(
def get_feature_store(
self,
name: Optional[str] = None,
engine: Optional[str] = None,
): # -> feature_store.FeatureStore
# the typing is commented out due to circular dependency, it breaks auto_doc.py
"""Get a reference to a feature store to perform operations on.
Expand All @@ -160,25 +159,10 @@ def get_feature_store(
# Arguments
name: The name of the feature store, defaults to `None`.
engine: Which engine to use, `"spark"`, `"python"` or `"training"`. Defaults to `None`,
which initializes the engine to Spark if the environment provides Spark, for
example on Hopsworks and Databricks, or falls back on Hive in Python if Spark is not
available, e.g. on local Python environments or AWS SageMaker. This option
allows you to override this behaviour. `"training"` engine is useful when only
feature store metadata is needed, for example training dataset location and label
information when Hopsworks training experiment is conducted.
# Returns
`FeatureStore`. A feature store handle object to perform operations on.
"""
# Ensure the engine is initialized and of right type
from hsfs import engine as hsfs_engine

if engine:
global _hsfs_engine_type
_hsfs_engine_type = engine
hsfs_engine.get_instance()

if not name:
name = client.get_instance()._project_name
return self._feature_store_api.get(util.append_feature_store_suffix(name))
Expand Down Expand Up @@ -525,7 +509,7 @@ def connection(
Defaults to `None`.
engine: Which engine to use, `"spark"`, `"python"` or `"training"`. Defaults to `None`,
which initializes the engine to Spark if the environment provides Spark, for
example on Hopsworks and Databricks, or falls back on Hive in Python if Spark is not
example on Hopsworks and Databricks, or falls back to Python if Spark is not
available, e.g. on local Python environments or AWS SageMaker. This option
allows you to override this behaviour. `"training"` engine is useful when only
feature store metadata is needed, for example training dataset location and label
Expand Down
7 changes: 2 additions & 5 deletions python/hopsworks_common/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ def project_namespace(self):
return self._project_namespace

def get_feature_store(
self, name: Optional[str] = None, engine: Optional[str] = None
self, name: Optional[str] = None
): # -> hsfs.feature_store.FeatureStore
"""Connect to Project's Feature Store.
Expand All @@ -127,15 +127,12 @@ def get_feature_store(
# Arguments
name: Project name of the feature store.
engine: Which engine to use, `"spark"`, `"python"` or `"training"`.
Defaults to `"python"` when connected to [Serverless Hopsworks](https://app.hopsworks.ai).
See [`hopsworks.connection`](connection.md#connection) documentation for more information.
# Returns
`hsfs.feature_store.FeatureStore`: The Feature Store API
# Raises
`RestAPIError`: If unable to connect
"""
return client.get_connection().get_feature_store(name, engine)
return client.get_connection().get_feature_store(name)

def get_model_registry(self):
"""Connect to Project's Model Registry API.
Expand Down
2 changes: 1 addition & 1 deletion python/hopsworks_common/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,4 @@
# limitations under the License.
#

__version__ = "4.0.0rc0"
__version__ = "4.0.0rc1"
2 changes: 1 addition & 1 deletion utils/java/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

<groupId>com.logicalclocks</groupId>
<artifactId>hsfs-utils</artifactId>
<version>4.0.0-RC0</version>
<version>4.0.0-RC1</version>

<properties>
<hops.version>3.2.0.0-SNAPSHOT</hops.version>
Expand Down

0 comments on commit 3ec2248

Please sign in to comment.