Skip to content

Commit

Permalink
Merge branch 'master' of github.com:logicalclocks/feature-store-api i…
Browse files Browse the repository at this point in the history
…nto the-merge
  • Loading branch information
aversey committed Jun 27, 2024
2 parents 59e03e8 + 19a45c4 commit 139f7be
Show file tree
Hide file tree
Showing 31 changed files with 857 additions and 327 deletions.
29 changes: 29 additions & 0 deletions .github/workflows/optional-dependency.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
name: optional-dependency

on: pull_request

jobs:
unit_tests_no_great_expectations:
name: Unit Testing (No Great Expectations)
runs-on: ubuntu-latest

steps:
- name: Set Timezone
run: sudo timedatectl set-timezone UTC

- uses: actions/checkout@v4
- name: Copy README
run: cp README.md python/

- uses: actions/setup-python@v5
name: Setup Python
with:
python-version: "3.10"
cache: "pip"
cache-dependency-path: "python/setup.py"
- run: pip install -e python[python,dev-no-opt]

- name: Run Pytest suite
env:
ENABLE_HOPSWORKS_USAGE: "false"
run: pytest python/tests
6 changes: 3 additions & 3 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,9 +80,9 @@ We use `mkdocs` together with `mike` ([for versioning](https://github.com/jimpor

2. Install HOPSWORKS with `docs` extras:

```bash
pip install -e .[python,dev,docs]
```
```bash
pip install -e ".[python,dev]" && pip install -r ../requirements-docs.txt
```

3. To build the docs, first run the auto doc script:

Expand Down
137 changes: 137 additions & 0 deletions auto_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,22 @@
"git_provider_properties": keras_autodoc.get_properties(
"hopsworks.git_provider.GitProvider"
),
},
"api/spine_group_api.md": {
"fg": ["hsfs.feature_group.SpineGroup"],
"fg_create": ["hsfs.feature_store.FeatureStore.get_or_create_spine_group"],
"fg_get": ["hsfs.feature_store.FeatureStore.get_or_create_spine_group"],
"fg_properties": keras_autodoc.get_properties(
"hsfs.feature_group.SpineGroup",
exclude=[
"expectation_suite",
"location",
"online_enabled",
"statistics",
"statistics_config",
"subject",
],
),
"git_provider_methods": keras_autodoc.get_methods(
"hopsworks.git_provider.GitProvider", exclude=["from_response_json", "json"]
),
Expand All @@ -133,6 +149,127 @@
"hopsworks.core.dataset_api.DatasetApi"
),
},
"api/feature_view_api.md": {
"fv": ["hsfs.feature_view.FeatureView"],
"fv_create": ["hsfs.feature_store.FeatureStore.create_feature_view"],
"fv_get": ["hsfs.feature_store.FeatureStore.get_feature_view"],
"fvs_get": ["hsfs.feature_store.FeatureStore.get_feature_views"],
"fv_properties": keras_autodoc.get_properties("hsfs.feature_view.FeatureView"),
"fv_methods": keras_autodoc.get_methods("hsfs.feature_view.FeatureView"),
},
"api/feature_api.md": {
"feature": ["hsfs.feature.Feature"],
"feature_properties": keras_autodoc.get_properties("hsfs.feature.Feature"),
"feature_methods": keras_autodoc.get_methods("hsfs.feature.Feature"),
},
"api/expectation_suite_api.md": {
"expectation_suite": ["hsfs.expectation_suite.ExpectationSuite"],
"expectation_suite_attach": [
"hsfs.feature_group.FeatureGroup.save_expectation_suite"
],
"single_expectation_api": [
"hsfs.expectation_suite.ExpectationSuite.add_expectation",
"hsfs.expectation_suite.ExpectationSuite.replace_expectation",
"hsfs.expectation_suite.ExpectationSuite.remove_expectation",
],
"expectation_suite_properties": keras_autodoc.get_properties(
"hsfs.expectation_suite.ExpectationSuite"
),
"expectation_suite_methods": keras_autodoc.get_methods(
"hsfs.expectation_suite.ExpectationSuite"
),
},
"api/feature_store_api.md": {
"fs": ["hsfs.feature_store.FeatureStore"],
"fs_get": ["hsfs.connection.Connection.get_feature_store"],
"fs_properties": keras_autodoc.get_properties(
"hsfs.feature_store.FeatureStore"
),
"fs_methods": keras_autodoc.get_methods("hsfs.feature_store.FeatureStore"),
},
"api/feature_group_api.md": {
"fg": ["hsfs.feature_group.FeatureGroup"],
"fg_create": [
"hsfs.feature_store.FeatureStore.create_feature_group",
"hsfs.feature_store.FeatureStore.get_or_create_feature_group",
],
"fg_get": ["hsfs.feature_store.FeatureStore.get_feature_group"],
"fg_properties": keras_autodoc.get_properties(
"hsfs.feature_group.FeatureGroup"
),
"fg_methods": keras_autodoc.get_methods("hsfs.feature_group.FeatureGroup"),
},
"api/external_feature_group_api.md": {
"fg": ["hsfs.feature_group.ExternalFeatureGroup"],
"fg_create": ["hsfs.feature_store.FeatureStore.create_external_feature_group"],
"fg_get": ["hsfs.feature_store.FeatureStore.get_external_feature_group"],
"fg_properties": keras_autodoc.get_properties(
"hsfs.feature_group.ExternalFeatureGroup"
),
"fg_methods": keras_autodoc.get_methods(
"hsfs.feature_group.ExternalFeatureGroup"
),
},
"api/storage_connector_api.md": {
"sc_get": [
"hsfs.feature_store.FeatureStore.get_storage_connector",
"hsfs.feature_store.FeatureStore.get_online_storage_connector",
],
"hopsfs_methods": keras_autodoc.get_methods(
"hsfs.storage_connector.HopsFSConnector", exclude=["from_response_json"]
),
"hopsfs_properties": keras_autodoc.get_properties(
"hsfs.storage_connector.HopsFSConnector"
),
"s3_methods": keras_autodoc.get_methods(
"hsfs.storage_connector.S3Connector", exclude=["from_response_json"]
),
"s3_properties": keras_autodoc.get_properties(
"hsfs.storage_connector.S3Connector"
),
"redshift_methods": keras_autodoc.get_methods(
"hsfs.storage_connector.RedshiftConnector", exclude=["from_response_json"]
),
"redshift_properties": keras_autodoc.get_properties(
"hsfs.storage_connector.RedshiftConnector"
),
"adls_methods": keras_autodoc.get_methods(
"hsfs.storage_connector.AdlsConnector", exclude=["from_response_json"]
),
"adls_properties": keras_autodoc.get_properties(
"hsfs.storage_connector.AdlsConnector"
),
"snowflake_methods": keras_autodoc.get_methods(
"hsfs.storage_connector.SnowflakeConnector", exclude=["from_response_json"]
),
"snowflake_properties": keras_autodoc.get_properties(
"hsfs.storage_connector.SnowflakeConnector"
),
"jdbc_methods": keras_autodoc.get_methods(
"hsfs.storage_connector.JdbcConnector", exclude=["from_response_json"]
),
"jdbc_properties": keras_autodoc.get_properties(
"hsfs.storage_connector.JdbcConnector"
),
"gcs_methods": keras_autodoc.get_methods(
"hsfs.storage_connector.GcsConnector", exclude=["from_response_json"]
),
"gcs_properties": keras_autodoc.get_properties(
"hsfs.storage_connector.GcsConnector"
),
"bigquery_methods": keras_autodoc.get_methods(
"hsfs.storage_connector.BigQueryConnector", exclude=["from_response_json"]
),
"bigquery_properties": keras_autodoc.get_properties(
"hsfs.storage_connector.BigQueryConnector"
),
"kafka_methods": keras_autodoc.get_methods(
"hsfs.storage_connector.KafkaConnector", exclude=["from_response_json"]
),
"kafka_properties": keras_autodoc.get_properties(
"hsfs.storage_connector.KafkaConnector"
),
},
"api/kafka_topic.md": {
"kafka_api_handle": ["hopsworks.project.Project.get_kafka_api"],
"kafka_config": ["hopsworks.core.kafka_api.KafkaApi.get_default_config"],
Expand Down
6 changes: 3 additions & 3 deletions docs/CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,9 +80,9 @@ We use `mkdocs` together with `mike` ([for versioning](https://github.com/jimpor

2. Install HOPSWORKS with `docs` extras:

```bash
pip install -e .[python,dev,docs]
```
```bash
pip install -e ".[python,dev]" && pip install -r ../requirements-docs.txt
```

3. To build the docs, first run the auto doc script:

Expand Down
52 changes: 44 additions & 8 deletions docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@
src="https://img.shields.io/badge/docs-HSFS-orange"
alt="Hopsworks Feature Store Documentation"
/></a>
<a><img
src="https://img.shields.io/badge/python-3.8+-blue"
alt="python"
/></a>
<a href="https://pypi.org/project/hsfs/"><img
src="https://img.shields.io/pypi/v/hsfs?color=blue"
alt="PyPiStatus"
Expand All @@ -21,9 +25,9 @@
src="https://pepy.tech/badge/hsfs/month"
alt="Downloads"
/></a>
<a href="https://github.com/psf/black"><img
src="https://img.shields.io/badge/code%20style-black-000000.svg"
alt="CodeStyle"
<a href=https://github.com/astral-sh/ruff><img
src="https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json"
alt="Ruff"
/></a>
<a><img
src="https://img.shields.io/pypi/l/hsfs?color=green"
Expand All @@ -41,19 +45,44 @@ The library is environment independent and can be used in two modes:

The library automatically configures itself based on the environment it is run.
However, to connect from an external environment such as Databricks or AWS Sagemaker,
additional connection information, such as host and port, is required. For more information about the setup from external environments, see the setup section.
additional connection information, such as host and port, is required. For more information checkout the [Hopsworks documentation](https://docs.hopsworks.ai/latest/).

## Getting Started On Hopsworks

Instantiate a connection and get the project feature store handler
Get started easily by registering an account on [Hopsworks Serverless](https://app.hopsworks.ai/). Create your project and a [new Api key](https://docs.hopsworks.ai/latest/user_guides/projects/api_key/create_api_key/). In a new python environment with Python 3.8 or higher, install the [client library](https://docs.hopsworks.ai/latest/user_guides/client_installation/) using pip:

```bash
# Get all Hopsworks SDKs: Feature Store, Model Serving and Platform SDK
pip install hopsworks
# or minimum install with the Feature Store SDK
pip install hsfs[python]
# if using zsh don't forget the quotes
pip install 'hsfs[python]'
```

You can start a notebook and instantiate a connection and get the project feature store handler.

```python
import hopsworks

project = hopsworks.login() # you will be prompted for your api key
fs = project.get_feature_store()
```

or using `hsfs` directly:

```python
import hsfs

connection = hsfs.connection()
connection = hsfs.connection(
host="c.app.hopsworks.ai", #
project="your-project",
api_key_value="your-api-key",
)
fs = connection.get_feature_store()
```

Create a new feature group
Create a new feature group to start inserting feature values.
```python
fg = fs.create_feature_group("rain",
version=1,
Expand Down Expand Up @@ -135,7 +164,7 @@ You can find more examples on how to use the library in our [hops-examples](http

## Usage

Usage data is collected for improving quality of the library. It is turned on by default if the backend
Usage data is collected for improving quality of the library. It is turned on by default if the backend
is "c.app.hopsworks.ai". To turn it off, use one of the following way:
```python
# use environment variable
Expand All @@ -159,6 +188,13 @@ For general questions about the usage of Hopsworks and the Feature Store please

Please report any issue using [Github issue tracking](https://github.com/logicalclocks/feature-store-api/issues).

Please attach the client environment from the output below in the issue:
```python
import hopsworks
import hsfs
hopsworks.login().get_feature_store()
print(hsfs.get_env())
```

## Contributing

Expand Down
4 changes: 2 additions & 2 deletions python/hsfs/constructor/external_feature_group_alias.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def __init__(
self, on_demand_feature_group: Dict[str, Any], alias: str, **kwargs
) -> None:
self._on_demand_feature_group: Union[
"feature_group.ExternalFeatureGroup", "feature_group.SpineGroup"
feature_group.ExternalFeatureGroup, "feature_group.SpineGroup"
]
if not on_demand_feature_group["spine"]:
self._on_demand_feature_group = (
Expand All @@ -48,7 +48,7 @@ def from_response_json(cls, json_dict: Dict[str, Any]) -> ExternalFeatureGroupAl
@property
def on_demand_feature_group(
self,
) -> Union["feature_group.ExternalFeatureGroup", "feature_group.SpineGroup"]:
) -> Union[feature_group.ExternalFeatureGroup, "feature_group.SpineGroup"]:
return self._on_demand_feature_group

@property
Expand Down
15 changes: 15 additions & 0 deletions python/hsfs/core/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import importlib.util


# Data Validation / Great Expectations
HAS_GREAT_EXPECTATIONS: bool = (
importlib.util.find_spec("great_expectations") is not None
)
great_expectations_not_installed_message = (
"Great Expectations package not found. "
"If you want to use data validation with Hopsworks you can install the corresponding extras "
"""`pip install hopsworks[great_expectations]` or `pip install "hopsworks[great_expectations]"` if using zsh. """
"You can also install great-expectations directly in your environment e.g `pip install great-expectations`. "
"You will need to restart your kernel if applicable."
)
initialise_expectation_suite_for_single_expectation_api_message = "Initialize Expectation Suite by attaching to a Feature Group to enable single expectation API"
4 changes: 2 additions & 2 deletions python/hsfs/core/feature_monitoring_config_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -344,13 +344,13 @@ def get_monitoring_job(

def run_feature_monitoring(
self,
entity: Union["feature_group.FeatureGroup", "feature_view.FeatureView"],
entity: Union[feature_group.FeatureGroup, "feature_view.FeatureView"],
config_name: str,
) -> List[FeatureMonitoringResult]:
"""Main function used by the job to actually perform the monitoring.
Args:
entity: Union["feature_group.FeatureGroup", "feature_view.FeatureView"]
entity: Union[feature_group.FeatureGroup, "feature_view.FeatureView"]
Featuregroup or Featureview object containing the feature to monitor.
config_name: str: name of the monitoring config.
Expand Down
Loading

0 comments on commit 139f7be

Please sign in to comment.