diff --git a/.github/workflows/mkdocs-main.yml b/.github/workflows/mkdocs-main.yml index 001f1fad1..28d9d747b 100644 --- a/.github/workflows/mkdocs-main.yml +++ b/.github/workflows/mkdocs-main.yml @@ -24,7 +24,7 @@ jobs: run: cp ../README.md . && pip3 install -r ../requirements-docs.txt && pip3 install -e .[dev] - name: generate autodoc - run: python3 auto_doc.py + run: python3 ./python/auto_doc.py - name: setup git run: | diff --git a/.github/workflows/mkdocs-release.yml b/.github/workflows/mkdocs-release.yml index e2b4b2b3f..35f2fbe54 100644 --- a/.github/workflows/mkdocs-release.yml +++ b/.github/workflows/mkdocs-release.yml @@ -29,7 +29,7 @@ jobs: run: cp ../README.md . && pip3 install -r ../requirements-docs.txt && pip3 install -e .[dev] - name: generate autodoc - run: python3 auto_doc.py + run: python3 ./python/auto_doc.py - name: setup git run: | diff --git a/hsfs/python/pyproject.toml b/hsfs/python/pyproject.toml deleted file mode 100644 index 4869bf25b..000000000 --- a/hsfs/python/pyproject.toml +++ /dev/null @@ -1,173 +0,0 @@ -[project] -name = "hsfs" -dynamic = ["version"] -requires-python = ">=3.8,<3.13" -readme = "README.md" -description = "HSFS Python SDK to interact with Hopsworks Feature Store" -keywords = [ - "Hopsworks", - "Feature Store", - "hsfs", - "Spark", - "Machine Learning", - "MLOps", - "DataOps", -] -authors = [{ name = "Hopsworks AB", email = "robin@hopsworks.ai" }] -license = { text = "Apache-2.0" } - -classifiers = [ - "Development Status :: 5 - Production/Stable", - "Topic :: Utilities", - "License :: OSI Approved :: Apache Software License", - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "Programming Language :: Python :: 3.12", - "Intended Audience :: Developers", -] - -dependencies = [ - "pyhumps==1.6.1", - "requests", - "furl", - "boto3", - "pandas<2.2.0", - "numpy<2", - "pyjks", - "mock", - "avro==1.11.3", - "sqlalchemy", - "PyMySQL[rsa]", - "tzlocal", - "fsspec", - "retrying", - "hopsworks_aiomysql[sa]==0.2.1", - "polars>=0.20.18,<=0.21.0", - "opensearch-py>=1.1.0,<=2.4.2", -] - -[project.optional-dependencies] -python = [ - "pyarrow>=10.0", - "confluent-kafka<=2.3.0", - "fastavro>=1.4.11,<=1.8.4", - "tqdm", -] -great-expectations = ["great_expectations==0.18.12"] -dev-no-opt = [ - "pytest==7.4.4", - "pytest-mock==3.12.0", - "ruff", - "pyspark==3.1.1", - "moto[s3]==5.0.0", - "typeguard==4.2.1", -] -dev-pandas1 = [ - "pytest==7.4.4", - "pytest-mock==3.12.0", - "ruff", - "pyspark==3.1.1", - "moto[s3]==5.0.0", - "pandas<=1.5.3", - "sqlalchemy<=1.4.48", -] -dev = ["hsfs[dev-no-opt]", "hsfs[great-expectations]"] - - -[build-system] -requires = ["setuptools", "wheel"] -build-backend = "setuptools.build_meta" - -[tool.setuptools.packages.find] -exclude = ["tests*"] -include = ["../Readme.md", "../LICENSE", "hsfs", "hsfs.*"] - -[tool.setuptools.dynamic] -version = { attr = "hsfs.version.__version__" } - -[project.urls] -Documentation = "https://docs.hopsworks.ai/latest" -Repository = "https://github.com/logicalclocks/feature-store-api" -Homepage = "https://www.hopsworks.ai" -Community = "https://community.hopsworks.ai" - - -[tool.ruff] -# Exclude a variety of commonly ignored directories. -exclude = [ - ".bzr", - ".direnv", - ".eggs", - ".git", - ".git-rewrite", - ".hg", - ".ipynb_checkpoints", - ".mypy_cache", - ".nox", - ".pants.d", - ".pyenv", - ".pytest_cache", - ".pytype", - ".ruff_cache", - ".svn", - ".tox", - ".venv", - ".vscode", - "__pypackages__", - "_build", - "buck-out", - "build", - "dist", - "node_modules", - "site-packages", - "venv", - "java" -] - -# Same as Black. -line-length = 88 -indent-width = 4 - -# Assume Python 3.8+ syntax. -target-version = "py38" - -[tool.ruff.lint] -# 1. Enable flake8-bugbear (`B`) rules, in addition to the defaults. -select = ["E4", "E7", "E9", "F", "B", "I", "W"] #, "ANN"] -ignore = [ - "B905", # zip has no strict kwarg until Python 3.10 - "ANN101", # Missing type annotation for self in method - "ANN102", # Missing type annotation for cls in classmethod - "ANN003", # Missing type annotation for **kwarg in function - "ANN002", # Missing type annotation for *args in function - "ANN401", # Allow Any in type annotations - "W505", # Doc line too long -] - -# Allow fix for all enabled rules (when `--fix`) is provided. -fixable = ["ALL"] -unfixable = [] - -# Allow unused variables when underscore-prefixed. -dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" - -[tool.ruff.lint.isort] -lines-after-imports = 2 -known-third-party = ["hopsworks", "hsfs", "hsml"] - - -[tool.ruff.format] -# Like Black, use double quotes for strings. -quote-style = "double" - -# Like Black, indent with spaces, rather than tabs. -indent-style = "space" - -# Like Black, respect magic trailing commas. -skip-magic-trailing-comma = false - -# Like Black, automatically detect the appropriate line ending. -line-ending = "auto" diff --git a/hsml/python/pyproject.toml b/hsml/python/pyproject.toml deleted file mode 100644 index e4770cd4a..000000000 --- a/hsml/python/pyproject.toml +++ /dev/null @@ -1,136 +0,0 @@ -[project] -name="hsml" -dynamic = ["version"] -requires-python = ">=3.8,<3.13" -readme = "README.md" -description = "HSML Python SDK to interact with Hopsworks Model Registry" -keywords = ["Hopsworks", "Model Registry", "hsml", "Models", "ML", "Machine Learning Models", "TensorFlow", "PyTorch", "Machine Learning", "MLOps", "DataOps"] -authors = [{name = "Hopsworks AB", email = "robin@hopswors.ai"}] -license = { text = "Apache-2.0" } - -classifiers = [ - "Development Status :: 5 - Production/Stable", - "Topic :: Utilities", - "License :: OSI Approved :: Apache Software License", - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "Programming Language :: Python :: 3.12", - "Intended Audience :: Developers", -] - -dependencies = [ - "pyhumps==1.6.1", - "requests", - "furl", - "boto3", - "pandas", - "numpy", - "pyjks", - "mock", - "tqdm", - "grpcio>=1.49.1,<2.0.0", # ^1.49.1 - "protobuf>=3.19.0,<4.0.0", # ^3.19.0 -] - -[project.optional-dependencies] -dev = ["pytest==7.4.4", "pytest-mock==3.12.0", "ruff"] - -[build-system] -requires = ["setuptools", "wheel"] -build-backend = "setuptools.build_meta" - - -[tool.setuptools.packages.find] -exclude = ["tests*"] -include = ["../Readme.md", "../LICENSE", "hsml", "hsml.*"] - - -[tool.setuptools.dynamic] -version = {attr = "hsml.version.__version__"} - -[project.urls] -Documentation = "https://docs.hopsworks.ai/latest" -Repository = "https://github.com/logicalclocks/machine-learning-api" -Homepage = "https://www.hopsworks.ai" -Community = "https://community.hopsworks.ai" - - -[tool.ruff] -# Exclude a variety of commonly ignored directories. -exclude = [ - ".bzr", - ".direnv", - ".eggs", - ".git", - ".git-rewrite", - ".hg", - ".ipynb_checkpoints", - ".mypy_cache", - ".nox", - ".pants.d", - ".pyenv", - ".pytest_cache", - ".pytype", - ".ruff_cache", - ".svn", - ".tox", - ".venv", - ".vscode", - "__pypackages__", - "_build", - "buck-out", - "build", - "dist", - "node_modules", - "site-packages", - "venv", - "java", -] - -# Same as Black. -line-length = 88 -indent-width = 4 - -# Assume Python 3.8+ syntax. -target-version = "py38" - -[tool.ruff.lint] -# 1. Enable flake8-bugbear (`B`) rules, in addition to the defaults. -select = ["E4", "E7", "E9", "F", "B", "I", "W"]#, "ANN"] -ignore = [ - "B905", # zip has no strict kwarg until Python 3.10 - "ANN101", # Missing type annotation for self in method - "ANN102", # Missing type annotation for cls in classmethod - "ANN003", # Missing type annotation for **kwarg in function - "ANN002", # Missing type annotation for *args in function - "ANN401", # Allow Any in type annotations - "W505", # Doc line too long -] - -# Allow fix for all enabled rules (when `--fix`) is provided. -fixable = ["ALL"] -unfixable = [] - -# Allow unused variables when underscore-prefixed. -dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" - -[tool.ruff.lint.isort] -lines-after-imports = 2 -known-third-party = ["hopsworks", "hsfs", "hsml"] - - -[tool.ruff.format] -# Like Black, use double quotes for strings. -quote-style = "double" - -# Like Black, indent with spaces, rather than tabs. -indent-style = "space" - -# Like Black, respect magic trailing commas. -skip-magic-trailing-comma = false - -# Like Black, automatically detect the appropriate line ending. -line-ending = "auto" diff --git a/auto_doc.py b/python/auto_doc.py similarity index 99% rename from auto_doc.py rename to python/auto_doc.py index 91d3f358d..342a7dcfd 100644 --- a/auto_doc.py +++ b/python/auto_doc.py @@ -19,6 +19,7 @@ import keras_autodoc + JSON_METHODS = [ "extract_fields_from_json", "from_json", @@ -682,7 +683,7 @@ }, } -hw_dir = pathlib.Path(__file__).resolve().parents[0] +hw_dir = pathlib.Path(__file__).resolve().parents[1] if "GITHUB_SHA" in os.environ: commit_sha = os.environ["GITHUB_SHA"] project_url = ( diff --git a/python/hopsworks/project.py b/python/hopsworks/project.py index 294a69435..79ccff369 100644 --- a/python/hopsworks/project.py +++ b/python/hopsworks/project.py @@ -17,6 +17,7 @@ import json +import hsfs.feature_store import humps from hopsworks import client, constants, util from hopsworks.client.external import Client @@ -29,7 +30,6 @@ kafka_api, opensearch_api, ) -from hsfs import feature_store class Project: @@ -103,7 +103,9 @@ def created(self): """Timestamp when the project was created""" return self._created - def get_feature_store(self, name: str = None, engine: str = None) -> feature_store.FeatureStore: + def get_feature_store( + self, name: str = None, engine: str = None + ) -> hsfs.feature_store.FeatureStore: """Connect to Project's Feature Store. Defaulting to the project name of default feature store. To get a @@ -142,7 +144,9 @@ def get_feature_store(self, name: str = None, engine: str = None) -> feature_sto engine=engine, ).get_feature_store(name) else: - return connection(engine=engine).get_feature_store(name) # If internal client + return connection(engine=engine).get_feature_store( + name + ) # If internal client def get_model_registry(self): """Connect to Project's Model Registry API. diff --git a/python/hsfs/core/feature_store_api.py b/python/hsfs/core/feature_store_api.py index 377a3f934..2b77d6aca 100644 --- a/python/hsfs/core/feature_store_api.py +++ b/python/hsfs/core/feature_store_api.py @@ -17,12 +17,12 @@ from typing import Union +import hsfs.feature_store from hsfs import client -from hsfs.feature_store import FeatureStore class FeatureStoreApi: - def get(self, identifier: Union[int, str]) -> FeatureStore: + def get(self, identifier: Union[int, str]) -> hsfs.feature_store.FeatureStore: """Get feature store with specific id or name. :param identifier: id or name of the feature store @@ -32,6 +32,6 @@ def get(self, identifier: Union[int, str]) -> FeatureStore: """ _client = client.get_instance() path_params = ["project", _client._project_id, "featurestores", identifier] - return FeatureStore.from_response_json( + return hsfs.feature_store.FeatureStore.from_response_json( _client._send_request("GET", path_params) ) diff --git a/python/hsfs/feature_group.py b/python/hsfs/feature_group.py index bbd92c2f1..409ae6ecd 100644 --- a/python/hsfs/feature_group.py +++ b/python/hsfs/feature_group.py @@ -39,6 +39,7 @@ import avro.schema import confluent_kafka +import hsfs.expectation_suite import humps import numpy as np import pandas as pd @@ -89,7 +90,6 @@ # if great_expectations is not installed, we will default to using native Hopsworks class as return values from hsfs.decorators import typechecked, uses_great_expectations from hsfs.embedding import EmbeddingIndex -from hsfs.expectation_suite import ExpectationSuite from hsfs.ge_validation_result import ValidationResult from hsfs.statistics import Statistics from hsfs.statistics_config import StatisticsConfig @@ -117,7 +117,7 @@ def __init__( embedding_index: Optional[EmbeddingIndex] = None, expectation_suite: Optional[ Union[ - ExpectationSuite, + hsfs.expectation_suite.ExpectationSuite, great_expectations.core.ExpectationSuite, Dict[str, Any], ] @@ -911,7 +911,11 @@ def append_features( def get_expectation_suite( self, ge_type: bool = HAS_GREAT_EXPECTATIONS - ) -> Union[ExpectationSuite, great_expectations.core.ExpectationSuite, None]: + ) -> Union[ + hsfs.expectation_suite.ExpectationSuite, + great_expectations.core.ExpectationSuite, + None, + ]: """Return the expectation suite attached to the feature group if it exists. !!! example @@ -949,12 +953,16 @@ def get_expectation_suite( def save_expectation_suite( self, expectation_suite: Union[ - ExpectationSuite, great_expectations.core.ExpectationSuite + hsfs.expectation_suite.ExpectationSuite, + great_expectations.core.ExpectationSuite, ], run_validation: bool = True, validation_ingestion_policy: Literal["always", "strict"] = "always", overwrite: bool = False, - ) -> Union[ExpectationSuite, great_expectations.core.ExpectationSuite]: + ) -> Union[ + hsfs.expectation_suite.ExpectationSuite, + great_expectations.core.ExpectationSuite, + ]: """Attach an expectation suite to a feature group and saves it for future use. If an expectation suite is already attached, it is replaced. Note that the provided expectation suite is modified inplace to include expectationId fields. @@ -985,18 +993,22 @@ def save_expectation_suite( if HAS_GREAT_EXPECTATIONS and isinstance( expectation_suite, great_expectations.core.ExpectationSuite ): - tmp_expectation_suite = ExpectationSuite.from_ge_type( - ge_expectation_suite=expectation_suite, - run_validation=run_validation, - validation_ingestion_policy=validation_ingestion_policy, - feature_store_id=self._feature_store_id, - feature_group_id=self._id, + tmp_expectation_suite = ( + hsfs.expectation_suite.ExpectationSuite.from_ge_type( + ge_expectation_suite=expectation_suite, + run_validation=run_validation, + validation_ingestion_policy=validation_ingestion_policy, + feature_store_id=self._feature_store_id, + feature_group_id=self._id, + ) ) - elif isinstance(expectation_suite, ExpectationSuite): + elif isinstance(expectation_suite, hsfs.expectation_suite.ExpectationSuite): tmp_expectation_suite = expectation_suite.to_json_dict(decamelize=True) tmp_expectation_suite["feature_group_id"] = self._id tmp_expectation_suite["feature_store_id"] = self._feature_store_id - tmp_expectation_suite = ExpectationSuite(**tmp_expectation_suite) + tmp_expectation_suite = hsfs.expectation_suite.ExpectationSuite( + **tmp_expectation_suite + ) else: raise TypeError( "The provided expectation suite type `{}` is not supported. Use Great Expectation `ExpectationSuite` or HSFS' own `ExpectationSuite` object.".format( @@ -1243,7 +1255,7 @@ def validate( dataframe: Optional[ Union[pd.DataFrame, TypeVar("pyspark.sql.DataFrame")] # noqa: F821 ] = None, - expectation_suite: Optional[ExpectationSuite] = None, + expectation_suite: Optional[hsfs.expectation_suite.ExpectationSuite] = None, save_report: Optional[bool] = False, validation_options: Optional[Dict[str, Any]] = None, ingestion_result: Literal[ @@ -1859,7 +1871,7 @@ def location(self) -> Optional[str]: @property def expectation_suite( self, - ) -> Optional[ExpectationSuite]: + ) -> Optional[hsfs.expectation_suite.ExpectationSuite]: """Expectation Suite configuration object defining the settings for data validation of the feature group.""" return self._expectation_suite @@ -1868,22 +1880,24 @@ def expectation_suite( def expectation_suite( self, expectation_suite: Union[ - ExpectationSuite, + hsfs.expectation_suite.ExpectationSuite, great_expectations.core.ExpectationSuite, Dict[str, Any], None, ], ) -> None: - if isinstance(expectation_suite, ExpectationSuite): + if isinstance(expectation_suite, hsfs.expectation_suite.ExpectationSuite): tmp_expectation_suite = expectation_suite.to_json_dict(decamelize=True) tmp_expectation_suite["feature_group_id"] = self._id tmp_expectation_suite["feature_store_id"] = self._feature_store_id - self._expectation_suite = ExpectationSuite(**tmp_expectation_suite) + self._expectation_suite = hsfs.expectation_suite.ExpectationSuite( + **tmp_expectation_suite + ) elif HAS_GREAT_EXPECTATIONS and isinstance( expectation_suite, great_expectations.core.expectation_suite.ExpectationSuite, ): - self._expectation_suite = ExpectationSuite( + self._expectation_suite = hsfs.expectation_suite.ExpectationSuite( **expectation_suite.to_json_dict(), feature_store_id=self._feature_store_id, feature_group_id=self._id, @@ -1892,7 +1906,9 @@ def expectation_suite( tmp_expectation_suite = expectation_suite.copy() tmp_expectation_suite["feature_store_id"] = self._feature_store_id tmp_expectation_suite["feature_group_id"] = self._id - self._expectation_suite = ExpectationSuite(**tmp_expectation_suite) + self._expectation_suite = hsfs.expectation_suite.ExpectationSuite( + **tmp_expectation_suite + ) elif expectation_suite is None: self._expectation_suite = None else: @@ -2077,7 +2093,7 @@ def __init__( expectation_suite: Optional[ Union[ great_expectations.core.ExpectationSuite, - ExpectationSuite, + hsfs.expectation_suite.ExpectationSuite, Dict[str, Any], ] ] = None, @@ -3493,7 +3509,7 @@ def __init__( event_time: Optional[str] = None, expectation_suite: Optional[ Union[ - ExpectationSuite, + hsfs.expectation_suite.ExpectationSuite, great_expectations.core.ExpectationSuite, Dict[str, Any], ] @@ -4033,7 +4049,10 @@ def __init__( statistics_config: Optional[StatisticsConfig] = None, event_time: Optional[str] = None, expectation_suite: Optional[ - Union[ExpectationSuite, great_expectations.core.ExpectationSuite] + Union[ + hsfs.expectation_suite.ExpectationSuite, + great_expectations.core.ExpectationSuite, + ] ] = None, online_enabled: bool = False, href: Optional[str] = None, diff --git a/python/hsml/client/__init__.py b/python/hsml/client/__init__.py index 3982f0c56..b3475258c 100644 --- a/python/hsml/client/__init__.py +++ b/python/hsml/client/__init__.py @@ -20,7 +20,7 @@ from hsml.client.istio import base as ist_base from hsml.client.istio import external as ist_external from hsml.client.istio import internal as ist_internal -from hsml.connection import CONNECTION_SAAS_HOSTNAME +from hsml.constants import CONNECTION _client_type = None @@ -49,7 +49,7 @@ def init( _client_type = client_type global _saas_connection - _saas_connection = host == CONNECTION_SAAS_HOSTNAME + _saas_connection = host == CONNECTION.SAAS_HOSTNAME global _hopsworks_client if not _hopsworks_client: diff --git a/python/hsml/connection.py b/python/hsml/connection.py index d9d61b9e8..899589a4e 100644 --- a/python/hsml/connection.py +++ b/python/hsml/connection.py @@ -16,14 +16,10 @@ import os -from hsml import client -from hsml.core import model_api, model_registry_api, model_serving_api from hsml.decorators import connected, not_connected from requests.exceptions import ConnectionError -CONNECTION_SAAS_HOSTNAME = "c.app.hopsworks.ai" - HOPSWORKS_PORT_DEFAULT = 443 HOSTNAME_VERIFICATION_DEFAULT = True @@ -100,6 +96,7 @@ def __init__( api_key_file: str = None, api_key_value: str = None, ): + from hsml.core import model_api, model_registry_api, model_serving_api self._host = host self._port = port self._project = project @@ -164,6 +161,8 @@ def connect(self): conn.connect() ``` """ + from hsml import client + from hsml.core import model_api self._connected = True try: # init client @@ -196,6 +195,7 @@ def close(self): Usage is recommended but optional. """ + from hsml import client client.stop() self._model_api = None self._connected = False diff --git a/python/hsml/constants.py b/python/hsml/constants.py index 6ec99ff3c..d7af16967 100644 --- a/python/hsml/constants.py +++ b/python/hsml/constants.py @@ -18,6 +18,10 @@ DEFAULT = dict() # used as default parameter for a class object +class CONNECTION: + SAAS_HOSTNAME = "c.app.hopsworks.ai" + + class MODEL: FRAMEWORK_TENSORFLOW = "TENSORFLOW" FRAMEWORK_TORCH = "TORCH" diff --git a/python/hsml/util.py b/python/hsml/util.py index 96380b6f4..6ef6d9053 100644 --- a/python/hsml/util.py +++ b/python/hsml/util.py @@ -28,16 +28,6 @@ import pandas as pd from hsml import client from hsml.constants import DEFAULT, MODEL, PREDICTOR -from hsml.model import Model as BaseModel -from hsml.predictor import Predictor as BasePredictor -from hsml.python.model import Model as PyModel -from hsml.python.predictor import Predictor as PyPredictor -from hsml.sklearn.model import Model as SkLearnModel -from hsml.sklearn.predictor import Predictor as SkLearnPredictor -from hsml.tensorflow.model import Model as TFModel -from hsml.tensorflow.predictor import Predictor as TFPredictor -from hsml.torch.model import Model as TorchModel -from hsml.torch.predictor import Predictor as TorchPredictor from six import string_types @@ -105,6 +95,11 @@ def default(self, obj): # pylint: disable=E0202 def set_model_class(model): + from hsml.model import Model as BaseModel + from hsml.python.model import Model as PyModel + from hsml.sklearn.model import Model as SkLearnModel + from hsml.tensorflow.model import Model as TFModel + from hsml.torch.model import Model as TorchModel if "href" in model: _ = model.pop("href") if "type" in model: # backwards compatibility @@ -236,6 +231,16 @@ def validate_metrics(metrics): def get_predictor_for_model(model, **kwargs): + from hsml.model import Model as BaseModel + from hsml.predictor import Predictor as BasePredictor + from hsml.python.model import Model as PyModel + from hsml.python.predictor import Predictor as PyPredictor + from hsml.sklearn.model import Model as SkLearnModel + from hsml.sklearn.predictor import Predictor as SkLearnPredictor + from hsml.tensorflow.model import Model as TFModel + from hsml.tensorflow.predictor import Predictor as TFPredictor + from hsml.torch.model import Model as TorchModel + from hsml.torch.predictor import Predictor as TorchPredictor if not isinstance(model, BaseModel): raise ValueError( "model is of type {}, but an instance of {} class is expected".format( @@ -243,15 +248,15 @@ def get_predictor_for_model(model, **kwargs): ) ) - if type(model) == TFModel: + if type(model) is TFModel: return TFPredictor(**kwargs) - if type(model) == TorchModel: + if type(model) is TorchModel: return TorchPredictor(**kwargs) - if type(model) == SkLearnModel: + if type(model) is SkLearnModel: return SkLearnPredictor(**kwargs) - if type(model) == PyModel: + if type(model) is PyModel: return PyPredictor(**kwargs) - if type(model) == BaseModel: + if type(model) is BaseModel: return BasePredictor( # python as default framework and model server model_framework=MODEL.FRAMEWORK_PYTHON, model_server=PREDICTOR.MODEL_SERVER_PYTHON, diff --git a/python/pyproject.toml b/python/pyproject.toml index 6adfea048..9452f5061 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -7,14 +7,22 @@ description = "Hopsworks Python SDK to interact with Hopsworks Platform, Feature keywords = [ "Hopsworks", "Feature Store", + "hsfs", "Spark", "Machine Learning", "MLOps", "DataOps", + "Model Registry", + "hsml", + "Models", + "ML", + "Machine Learning Models", + "TensorFlow", + "PyTorch", ] - authors = [{ name = "Hopsworks AB", email = "robin@hopsworks.ai" }] license = { text = "Apache-2.0" } + classifiers = [ "Development Status :: 5 - Production/Stable", "Topic :: Utilities", @@ -29,19 +37,56 @@ classifiers = [ ] dependencies = [ - "hsfs[python] @ git+https://git@github.com/logicalclocks/feature-store-api@master#subdirectory=python", - "hsml @ git+https://git@github.com/logicalclocks/machine-learning-api@main#subdirectory=python", "pyhumps==1.6.1", "requests", "furl", "boto3", + "pandas<2.2.0", + "numpy<2", "pyjks", "mock", + "avro==1.11.3", + "sqlalchemy", + "PyMySQL[rsa]", + "tzlocal", + "fsspec", + "retrying", + "hopsworks_aiomysql[sa]==0.2.1", + "polars>=0.20.18,<=0.21.0", + "opensearch-py>=1.1.0,<=2.4.2", "tqdm", + "grpcio>=1.49.1,<2.0.0", # ^1.49.1 + "protobuf>=3.19.0,<4.0.0", # ^3.19.0 ] [project.optional-dependencies] -dev = ["ruff", "pytest", "great_expectations"] +python = [ + "pyarrow>=10.0", + "confluent-kafka<=2.3.0", + "fastavro>=1.4.11,<=1.8.4", + "tqdm", +] +great-expectations = ["great_expectations==0.18.12"] +dev-no-opt = [ + "hopsworks[python]", + "pytest==7.4.4", + "pytest-mock==3.12.0", + "ruff", + "pyspark==3.1.1", + "moto[s3]==5.0.0", + "typeguard==4.2.1", +] +dev-pandas1 = [ + "hopsworks[python]", + "pytest==7.4.4", + "pytest-mock==3.12.0", + "ruff", + "pyspark==3.1.1", + "moto[s3]==5.0.0", + "pandas<=1.5.3", + "sqlalchemy<=1.4.48", +] +dev = ["hopsworks[dev-no-opt,great-expectations]"] [build-system] requires = ["setuptools", "wheel"] @@ -49,12 +94,11 @@ build-backend = "setuptools.build_meta" [tool.setuptools.packages.find] exclude = ["tests*"] -include = ["../Readme.md", "../LICENSE", "hopsworks", "hopsworks.*"] +include = ["../README.md", "../LICENSE", "hopsworks*", "hsfs*", "hsml*"] [tool.setuptools.dynamic] version = { attr = "hopsworks.version.__version__" } - [project.urls] Documentation = "https://docs.hopsworks.ai/latest" Repository = "https://github.com/logicalclocks/hopsworks-api"