diff --git a/java/beam/pom.xml b/java/beam/pom.xml index daafc3199..a2850ee89 100644 --- a/java/beam/pom.xml +++ b/java/beam/pom.xml @@ -5,7 +5,7 @@ hsfs-parent com.logicalclocks - 4.0.0-RC0 + 4.0.0-RC1 4.0.0 diff --git a/java/flink/pom.xml b/java/flink/pom.xml index c749d8209..9e9b560fd 100644 --- a/java/flink/pom.xml +++ b/java/flink/pom.xml @@ -5,7 +5,7 @@ hsfs-parent com.logicalclocks - 4.0.0-RC0 + 4.0.0-RC1 4.0.0 diff --git a/java/hsfs/pom.xml b/java/hsfs/pom.xml index cc7684b32..6bcb84265 100644 --- a/java/hsfs/pom.xml +++ b/java/hsfs/pom.xml @@ -5,7 +5,7 @@ hsfs-parent com.logicalclocks - 4.0.0-RC0 + 4.0.0-RC1 4.0.0 diff --git a/java/pom.xml b/java/pom.xml index 08d5a7a0b..97dcb9aac 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -7,7 +7,7 @@ com.logicalclocks hsfs-parent pom - 4.0.0-RC0 + 4.0.0-RC1 hsfs spark diff --git a/java/spark/pom.xml b/java/spark/pom.xml index f0dd0b2ce..f9e9b7a20 100644 --- a/java/spark/pom.xml +++ b/java/spark/pom.xml @@ -22,7 +22,7 @@ hsfs-parent com.logicalclocks - 4.0.0-RC0 + 4.0.0-RC1 4.0.0 diff --git a/python/hopsworks/__init__.py b/python/hopsworks/__init__.py index 79d500769..220dcadb8 100644 --- a/python/hopsworks/__init__.py +++ b/python/hopsworks/__init__.py @@ -22,6 +22,7 @@ import tempfile import warnings from pathlib import Path +from typing import Literal, Union from hopsworks import client, constants, project, version from hopsworks.client.exceptions import ( @@ -83,6 +84,7 @@ def login( api_key_file: str = None, hostname_verification: bool = False, trust_store_path: str = None, + engine: Union[None, Literal["spark"], Literal["python"], Literal["training"]] = None, ) -> project.Project: """Connect to [Serverless Hopsworks](https://app.hopsworks.ai) by calling the `hopsworks.login()` function with no arguments. @@ -122,6 +124,13 @@ def login( api_key_file: Path to file wih Api Key hostname_verification: Whether to verify Hopsworks' certificate trust_store_path: Path on the file system containing the Hopsworks certificates + engine: Which engine to use, `"spark"`, `"python"` or `"training"`. Defaults to `None`, + which initializes the engine to Spark if the environment provides Spark, for + example on Hopsworks and Databricks, or falls back to Python if Spark is not + available, e.g. on local Python environments or AWS SageMaker. This option + allows you to override this behaviour. `"training"` engine is useful when only + feature store metadata is needed, for example training dataset location and label + information when Hopsworks training experiment is conducted. # Returns `Project`: The Project object to perform operations on # Raises @@ -138,7 +147,7 @@ def login( # If inside hopsworks, just return the current project for now if "REST_ENDPOINT" in os.environ: - _hw_connection = _hw_connection(hostname_verification=hostname_verification) + _hw_connection = _hw_connection(hostname_verification=hostname_verification, engine=engine) _connected_project = _hw_connection.get_project() _initialize_module_apis() print("\nLogged in to project, explore it here " + _connected_project.get_url()) @@ -207,6 +216,7 @@ def login( _hw_connection = _hw_connection( host=host, port=port, + engine=engine, api_key_file=api_key_path, hostname_verification=hostname_verification, trust_store_path=trust_store_path, @@ -246,6 +256,7 @@ def login( _hw_connection = _hw_connection( host=host, port=port, + engine=engine, api_key_value=api_key, hostname_verification=hostname_verification, trust_store_path=trust_store_path, diff --git a/python/hopsworks_common/connection.py b/python/hopsworks_common/connection.py index 08ad1f8a2..1d0e075ad 100644 --- a/python/hopsworks_common/connection.py +++ b/python/hopsworks_common/connection.py @@ -99,7 +99,7 @@ class Connection: Defaults to `None`. engine: Which engine to use, `"spark"`, `"python"` or `"training"`. Defaults to `None`, which initializes the engine to Spark if the environment provides Spark, for - example on Hopsworks and Databricks, or falls back on Hive in Python if Spark is not + example on Hopsworks and Databricks, or falls back to Python if Spark is not available, e.g. on local Python environments or AWS SageMaker. This option allows you to override this behaviour. `"training"` engine is useful when only feature store metadata is needed, for example training dataset location and label @@ -150,7 +150,6 @@ def __init__( def get_feature_store( self, name: Optional[str] = None, - engine: Optional[str] = None, ): # -> feature_store.FeatureStore # the typing is commented out due to circular dependency, it breaks auto_doc.py """Get a reference to a feature store to perform operations on. @@ -160,25 +159,10 @@ def get_feature_store( # Arguments name: The name of the feature store, defaults to `None`. - engine: Which engine to use, `"spark"`, `"python"` or `"training"`. Defaults to `None`, - which initializes the engine to Spark if the environment provides Spark, for - example on Hopsworks and Databricks, or falls back on Hive in Python if Spark is not - available, e.g. on local Python environments or AWS SageMaker. This option - allows you to override this behaviour. `"training"` engine is useful when only - feature store metadata is needed, for example training dataset location and label - information when Hopsworks training experiment is conducted. # Returns `FeatureStore`. A feature store handle object to perform operations on. """ - # Ensure the engine is initialized and of right type - from hsfs import engine as hsfs_engine - - if engine: - global _hsfs_engine_type - _hsfs_engine_type = engine - hsfs_engine.get_instance() - if not name: name = client.get_instance()._project_name return self._feature_store_api.get(util.append_feature_store_suffix(name)) @@ -525,7 +509,7 @@ def connection( Defaults to `None`. engine: Which engine to use, `"spark"`, `"python"` or `"training"`. Defaults to `None`, which initializes the engine to Spark if the environment provides Spark, for - example on Hopsworks and Databricks, or falls back on Hive in Python if Spark is not + example on Hopsworks and Databricks, or falls back to Python if Spark is not available, e.g. on local Python environments or AWS SageMaker. This option allows you to override this behaviour. `"training"` engine is useful when only feature store metadata is needed, for example training dataset location and label diff --git a/python/hopsworks_common/project.py b/python/hopsworks_common/project.py index 7705b603b..b35cac288 100644 --- a/python/hopsworks_common/project.py +++ b/python/hopsworks_common/project.py @@ -109,7 +109,7 @@ def project_namespace(self): return self._project_namespace def get_feature_store( - self, name: Optional[str] = None, engine: Optional[str] = None + self, name: Optional[str] = None ): # -> hsfs.feature_store.FeatureStore """Connect to Project's Feature Store. @@ -127,15 +127,12 @@ def get_feature_store( # Arguments name: Project name of the feature store. - engine: Which engine to use, `"spark"`, `"python"` or `"training"`. - Defaults to `"python"` when connected to [Serverless Hopsworks](https://app.hopsworks.ai). - See [`hopsworks.connection`](connection.md#connection) documentation for more information. # Returns `hsfs.feature_store.FeatureStore`: The Feature Store API # Raises `RestAPIError`: If unable to connect """ - return client.get_connection().get_feature_store(name, engine) + return client.get_connection().get_feature_store(name) def get_model_registry(self): """Connect to Project's Model Registry API. diff --git a/python/hopsworks_common/version.py b/python/hopsworks_common/version.py index 2964f0e77..3bbf053be 100644 --- a/python/hopsworks_common/version.py +++ b/python/hopsworks_common/version.py @@ -14,4 +14,4 @@ # limitations under the License. # -__version__ = "4.0.0rc0" +__version__ = "4.0.0rc1" diff --git a/utils/java/pom.xml b/utils/java/pom.xml index 4a5fbeb8a..141443b6f 100644 --- a/utils/java/pom.xml +++ b/utils/java/pom.xml @@ -5,7 +5,7 @@ com.logicalclocks hsfs-utils - 4.0.0-RC0 + 4.0.0-RC1 3.2.0.0-SNAPSHOT