diff --git a/auto_doc.py b/auto_doc.py
index 9b2a6ad00..1fd5b40f8 100644
--- a/auto_doc.py
+++ b/auto_doc.py
@@ -27,6 +27,9 @@
"mr_api": ["hopsworks.project.Project.get_model_registry"],
"ms_api": ["hopsworks.project.Project.get_model_serving"],
},
+ "api/udf.md": {
+ "udf": ["hopsworks.udf"],
+ },
"api/connection.md": {
"connection_create": ["hopsworks.connection.Connection.connection"],
"connection_properties": keras_autodoc.get_properties(
diff --git a/docs/templates/api/udf.md b/docs/templates/api/udf.md
new file mode 100644
index 000000000..47106cb1f
--- /dev/null
+++ b/docs/templates/api/udf.md
@@ -0,0 +1,3 @@
+# UDF API
+
+{{udf}}
\ No newline at end of file
diff --git a/hsfs/.github/pull_request_template.md b/hsfs/.github/pull_request_template.md
new file mode 100644
index 000000000..2a5a1e5ec
--- /dev/null
+++ b/hsfs/.github/pull_request_template.md
@@ -0,0 +1,32 @@
+This PR adds/fixes/changes...
+- please summarize your changes to the code
+- and make sure to include all changes to user-facing APIs
+
+JIRA Issue: -
+
+Priority for Review: -
+
+Related PRs: -
+
+**How Has This Been Tested?**
+
+- [ ] Unit Tests
+- [ ] Integration Tests
+- [ ] Manual Tests on VM
+
+
+**Checklist For The Assigned Reviewer:**
+
+```
+- [ ] Checked if merge conflicts with master exist
+- [ ] Checked if stylechecks for Java and Python pass
+- [ ] Checked if all docstrings were added and/or updated appropriately
+- [ ] Ran spellcheck on docstring
+- [ ] Checked if guides & concepts need to be updated
+- [ ] Checked if naming conventions for parameters and variables were followed
+- [ ] Checked if private methods are properly declared and used
+- [ ] Checked if hard-to-understand areas of code are commented
+- [ ] Checked if tests are effective
+- [ ] Built and deployed changes on dev VM and tested manually
+- [x] (Checked if all type annotations were added and/or updated appropriately)
+```
diff --git a/hsfs/.github/workflows/java-ut.yml b/hsfs/.github/workflows/java-ut.yml
new file mode 100644
index 000000000..f83f62caf
--- /dev/null
+++ b/hsfs/.github/workflows/java-ut.yml
@@ -0,0 +1,62 @@
+name: java
+
+on: pull_request
+
+jobs:
+ unit_tests_utc:
+ name: Java Unit Tests
+ runs-on: ubuntu-latest
+
+ steps:
+ - name: Set Timezone
+ run: sudo timedatectl set-timezone UTC
+
+ - name: Checkout
+ uses: actions/checkout@v3
+
+ - name: Set up JDK 8
+ uses: actions/setup-java@v3
+ with:
+ java-version: '8'
+ distribution: 'adopt'
+
+ - name: Cache local Maven repository
+ uses: actions/cache@v2
+ with:
+ path: ~/.m2/repository
+ key: ${{ runner.os }}-maven-${{ hashFiles('java/pom.xml') }}
+ restore-keys: |
+ ${{ runner.os }}-maven-
+
+ - name: Test
+ working-directory: ./java
+ run: mvn clean test
+
+ unit_tests_local:
+ name: Java Unit Tests (Local TZ)
+ runs-on: ubuntu-latest
+
+ steps:
+ - name: Set Timezone
+ run: sudo timedatectl set-timezone Europe/Amsterdam
+
+ - name: Checkout
+ uses: actions/checkout@v3
+
+ - name: Set up JDK 8
+ uses: actions/setup-java@v3
+ with:
+ java-version: '8'
+ distribution: 'adopt'
+
+ - name: Cache local Maven repository
+ uses: actions/cache@v2
+ with:
+ path: ~/.m2/repository
+ key: ${{ runner.os }}-maven-${{ hashFiles('java/pom.xml') }}
+ restore-keys: |
+ ${{ runner.os }}-maven-
+
+ - name: Test
+ working-directory: ./java
+ run: mvn clean test
diff --git a/hsfs/.github/workflows/mkdocs-master.yml b/hsfs/.github/workflows/mkdocs-master.yml
new file mode 100644
index 000000000..1c904ad28
--- /dev/null
+++ b/hsfs/.github/workflows/mkdocs-master.yml
@@ -0,0 +1,53 @@
+name: mkdocs-master
+
+on: pull_request
+
+jobs:
+ publish-master:
+ runs-on: ubuntu-latest
+
+ steps:
+ - uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
+
+ - name: set dev version
+ working-directory: ./java
+ run: echo "DEV_VERSION=$(mvn org.apache.maven.plugins:maven-help-plugin:2.1.1:evaluate -Dexpression=project.version | grep -Ev 'Download|INFO|WARNING')" >> $GITHUB_ENV
+
+ - uses: actions/setup-python@v5
+ with:
+ python-version: "3.10"
+
+ - name: install deps
+ working-directory: ./python
+ run: cp ../README.md . && pip3 install -r ../requirements-docs.txt && pip3 install -e .[python,dev]
+
+ - name: generate autodoc
+ run: python3 auto_doc.py
+
+ - name: Cache local Maven repository
+ uses: actions/cache@v2
+ with:
+ path: ~/.m2/repository
+ key: ${{ runner.os }}-maven-${{ hashFiles('java/pom.xml') }}
+ restore-keys: |
+ ${{ runner.os }}-maven-
+
+ - name: Set up JDK 8
+ uses: actions/setup-java@v3
+ with:
+ java-version: "8"
+ distribution: "adopt"
+
+ - name: Build java doc documentation
+ working-directory: ./java
+ run: mvn clean install javadoc:javadoc javadoc:aggregate -DskipTests && cp -r target/site/apidocs ../docs/javadoc
+
+ - name: setup git
+ run: |
+ git config --global user.name Mike
+ git config --global user.email mike@docs.hopsworks.ai
+
+ - name: mike deploy docs
+ run: mike deploy ${{ env.DEV_VERSION }} dev -u
diff --git a/hsfs/.github/workflows/mkdocs-release.yml b/hsfs/.github/workflows/mkdocs-release.yml
new file mode 100644
index 000000000..66ca638ae
--- /dev/null
+++ b/hsfs/.github/workflows/mkdocs-release.yml
@@ -0,0 +1,59 @@
+name: mkdocs-release
+
+on:
+ push:
+ branches: [branch-*]
+
+jobs:
+ publish-release:
+ runs-on: ubuntu-latest
+
+ steps:
+ - uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
+
+ - name: set major/minor/bugfix release version
+ working-directory: ./java
+ run: echo "RELEASE_VERSION=$(mvn org.apache.maven.plugins:maven-help-plugin:2.1.1:evaluate -Dexpression=project.version | grep -Ev 'Download|INFO|WARNING')" >> $GITHUB_ENV
+
+ - name: set major/minor release version
+ run: echo "MAJOR_VERSION=$(echo $RELEASE_VERSION | sed 's/^\([0-9]*\.[0-9]*\).*$/\1/')" >> $GITHUB_ENV
+
+ - uses: actions/setup-python@v5
+ with:
+ python-version: "3.10"
+
+ - name: install deps
+ working-directory: ./python
+ run: cp ../README.md . && pip3 install -r ../requirements-docs.txt && pip3 install -e .[python,dev]
+
+ - name: generate autodoc
+ run: python3 auto_doc.py
+
+ - name: Cache local Maven repository
+ uses: actions/cache@v2
+ with:
+ path: ~/.m2/repository
+ key: ${{ runner.os }}-maven-${{ hashFiles('java/pom.xml') }}
+ restore-keys: |
+ ${{ runner.os }}-maven-
+
+ - name: Set up JDK 8
+ uses: actions/setup-java@v3
+ with:
+ java-version: "8"
+ distribution: "adopt"
+
+ - name: Build java doc documentation
+ working-directory: ./java
+ run: mvn clean install javadoc:javadoc javadoc:aggregate -DskipTests && cp -r target/site/apidocs ../docs/javadoc
+
+ - name: setup git
+ run: |
+ git config --global user.name Mike
+ git config --global user.email mike@docs.hopsworks.ai
+ - name: mike deploy docs
+ run: |
+ mike deploy ${{ env.RELEASE_VERSION }} ${{ env.MAJOR_VERSION }} -u --push
+ mike alias ${{ env.RELEASE_VERSION }} latest -u --push
diff --git a/hsfs/.github/workflows/optional-dependency.yml b/hsfs/.github/workflows/optional-dependency.yml
new file mode 100644
index 000000000..547b02029
--- /dev/null
+++ b/hsfs/.github/workflows/optional-dependency.yml
@@ -0,0 +1,29 @@
+name: optional-dependency
+
+on: pull_request
+
+jobs:
+ unit_tests_no_great_expectations:
+ name: Unit Testing (No Great Expectations)
+ runs-on: ubuntu-latest
+
+ steps:
+ - name: Set Timezone
+ run: sudo timedatectl set-timezone UTC
+
+ - uses: actions/checkout@v4
+ - name: Copy README
+ run: cp README.md python/
+
+ - uses: actions/setup-python@v5
+ name: Setup Python
+ with:
+ python-version: "3.10"
+ cache: "pip"
+ cache-dependency-path: "python/setup.py"
+ - run: pip install -e python[python,dev-no-opt]
+
+ - name: Run Pytest suite
+ env:
+ ENABLE_HOPSWORKS_USAGE: "false"
+ run: pytest python/tests
\ No newline at end of file
diff --git a/hsfs/.github/workflows/python-lint.yml b/hsfs/.github/workflows/python-lint.yml
new file mode 100644
index 000000000..f638b0128
--- /dev/null
+++ b/hsfs/.github/workflows/python-lint.yml
@@ -0,0 +1,222 @@
+name: python
+
+on: pull_request
+
+jobs:
+ lint_stylecheck:
+ name: Lint and Stylecheck
+ runs-on: ubuntu-latest
+
+ steps:
+ - uses: actions/checkout@v4
+
+ - uses: actions/setup-python@v5
+ with:
+ python-version: "3.11"
+
+ - name: Get all changed files
+ id: get-changed-files
+ uses: tj-actions/changed-files@v44
+ with:
+ files_yaml: |
+ src:
+ - 'python/**/*.py'
+ - '!python/tests/**/*.py'
+ test:
+ - 'python/tests/**/*.py'
+
+ - name: install deps
+ run: pip install ruff==0.4.2
+
+ - name: ruff on python files
+ if: steps.get-changed-files.outputs.src_any_changed == 'true'
+ env:
+ SRC_ALL_CHANGED_FILES: ${{ steps.get-changed-files.outputs.src_all_changed_files }}
+ run: ruff check --output-format=github $SRC_ALL_CHANGED_FILES
+
+ - name: ruff on test files
+ if: steps.get-changed-files.outputs.test_any_changed == 'true'
+ env:
+ TEST_ALL_CHANGED_FILES: ${{ steps.get-changed-files.outputs.test_all_changed_files }}
+ run: ruff check --output-format=github $TEST_ALL_CHANGED_FILES
+
+ - name: ruff format --check $ALL_CHANGED_FILES
+ env:
+ ALL_CHANGED_FILES: ${{ steps.get-changed-files.outputs.all_changed_files }}
+ run: ruff format $ALL_CHANGED_FILES
+
+ unit_tests_ubuntu_utc:
+ name: Unit Testing (Ubuntu)
+ needs: lint_stylecheck
+ runs-on: ubuntu-latest
+ strategy:
+ matrix:
+ python-version: ["3.8", "3.9", "3.10"]
+
+ steps:
+ - name: Set Timezone
+ run: sudo timedatectl set-timezone UTC
+
+ - uses: actions/checkout@v4
+ - name: Copy README
+ run: cp README.md python/
+
+ - uses: actions/setup-python@v5
+ name: Setup Python
+ with:
+ python-version: ${{ matrix.python-version }}
+ cache: "pip"
+ cache-dependency-path: "python/setup.py"
+ - run: pip install -e python[python,dev]
+
+ - name: Display Python version
+ run: python --version
+
+ - name: Run Pytest suite
+ env:
+ ENABLE_HOPSWORKS_USAGE: "false"
+ run: pytest python/tests
+
+ unit_tests_ubuntu_pandas:
+ name: Unit Testing (Ubuntu) (Pandas 1.x)
+ needs: lint_stylecheck
+ runs-on: ubuntu-latest
+
+ steps:
+ - name: Set Timezone
+ run: sudo timedatectl set-timezone UTC
+
+ - uses: actions/checkout@v4
+ - name: Copy README
+ run: cp README.md python/
+
+ - uses: actions/setup-python@v4
+ name: Setup Python
+ with:
+ python-version: "3.10"
+ cache: "pip"
+ cache-dependency-path: "python/setup.py"
+ - run: pip install -e python[python,dev-pandas1]
+
+ - name: Display Python version
+ run: python --version
+
+ - name: Run Pytest suite
+ env:
+ ENABLE_HOPSWORKS_USAGE: "false"
+ run: pytest python/tests
+
+ unit_tests_ubuntu_local:
+ name: Unit Testing (Ubuntu) (Local TZ)
+ needs: lint_stylecheck
+ runs-on: ubuntu-latest
+
+ steps:
+ - name: Set Timezone
+ run: sudo timedatectl set-timezone Europe/Amsterdam
+
+ - uses: actions/checkout@v4
+ - name: Copy README
+ run: cp README.md python/
+
+ - uses: actions/setup-python@v5
+ name: Setup Python
+ with:
+ python-version: "3.10"
+ cache: "pip"
+ cache-dependency-path: "python/setup.py"
+ - run: pip install -e 'python[python,dev]'
+
+ - name: Display Python version
+ run: python --version
+
+ - name: Run Pytest suite
+ env:
+ ENABLE_HOPSWORKS_USAGE: "false"
+ run: pytest python/tests
+
+ unit_tests_ubuntu_typechecked:
+ name: Typechecked Unit Testing (Ubuntu)
+ needs: lint_stylecheck
+ runs-on: ubuntu-latest
+
+ steps:
+ - uses: actions/checkout@v3
+ - name: Copy README
+ run: cp README.md python/
+
+ - uses: actions/setup-python@v4
+ name: Setup Python
+ with:
+ python-version: "3.10"
+ cache: "pip"
+ cache-dependency-path: "python/setup.py"
+ - run: pip install -e python[python,dev,docs]
+
+ - name: Run Pytest suite
+ env:
+ ENABLE_HOPSWORKS_USAGE: "false"
+ HOPSWORKS_RUN_WITH_TYPECHECK: "true"
+ run: pytest python/tests
+ continue-on-error: true
+
+ unit_tests_windows_utc:
+ name: Unit Testing (Windows)
+ needs: lint_stylecheck
+ runs-on: windows-latest
+
+ steps:
+ - name: Set Timezone
+ run: tzutil /s "UTC"
+
+ - uses: actions/checkout@v4
+ - name: Copy README
+ run: cp README.md python/
+
+ - uses: actions/setup-python@v5
+ name: Setup Python
+ with:
+ python-version: "3.10"
+ cache: "pip"
+ cache-dependency-path: "python/setup.py"
+ - run: pip install -e python[python,dev]
+
+ - name: Display Python version
+ run: python --version
+
+ - name: Run Pytest suite
+ env:
+ ENABLE_HOPSWORKS_USAGE: "false"
+ run: pytest python/tests
+
+ unit_tests_windows_local:
+ name: Unit Testing (Windows) (Local TZ)
+ needs: lint_stylecheck
+ runs-on: windows-latest
+
+ steps:
+ - name: Set Timezone
+ run: tzutil /s "W. Europe Standard Time"
+
+ - uses: actions/checkout@v4
+ - name: Copy README
+ run: cp README.md python/
+
+ - uses: actions/setup-python@v5
+ name: Setup Python
+ with:
+ python-version: "3.10"
+ cache: "pip"
+ cache-dependency-path: "python/setup.py"
+ - run: pip install -e python[python,dev]
+
+ - name: Display Python version
+ run: python --version
+
+ - name: Display pip freeze
+ run: pip freeze
+
+ - name: Run Pytest suite
+ env:
+ ENABLE_HOPSWORKS_USAGE: "false"
+ run: pytest python/tests
diff --git a/hsfs/.gitignore b/hsfs/.gitignore
new file mode 100644
index 000000000..a8b4c5683
--- /dev/null
+++ b/hsfs/.gitignore
@@ -0,0 +1,145 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+python/README.md
+python/LICENSE
+
+# PyInstaller
+# Usually these files are written by a python script from a template
+# before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+.pytest_cache/
+.ruff_cache/
+bigquery.json
+metastore_db/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# Mike Javadoc
+docs/javadoc
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+.python-version
+
+# celery beat schedule file
+celerybeat-schedule
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# Java
+.idea
+.vscode
+*.iml
+target/
+
+# Mac
+.DS_Store
+
+# mkdocs intemediate files
+docs/generated
+
+# Test artifacts
+keyFile.json
+
+# delombok dir
+delombok
+
+# dev scripts dir
+dev_scripts/
+dev_tools/
diff --git a/hsfs/CONTRIBUTING.md b/hsfs/CONTRIBUTING.md
new file mode 100644
index 000000000..0df3de08e
--- /dev/null
+++ b/hsfs/CONTRIBUTING.md
@@ -0,0 +1,220 @@
+## Python development setup
+
+---
+
+- Fork and clone the repository
+
+- Create a new Python environment with your favourite environment manager (e.g. virtualenv or conda) and Python 3.9 (newer versions will return a library conflict in `auto_doc.py`)
+
+- Install repository in editable mode with development dependencies:
+
+ ```bash
+ cd python
+ pip install -e ".[python,dev]"
+ ```
+
+- Install [pre-commit](https://pre-commit.com/) and then activate its hooks. pre-commit is a framework for managing and maintaining multi-language pre-commit hooks. The Feature Store uses pre-commit to ensure code-style and code formatting through [ruff](https://docs.astral.sh/ruff/). Run the following commands from the `python` directory:
+
+ ```bash
+ cd python
+ pip install --user pre-commit
+ pre-commit install
+ ```
+
+ Afterwards, pre-commit will run whenever you commit.
+
+- To run formatting and code-style separately, you can configure your IDE, such as VSCode, to use `ruff`, or run it via the command line:
+
+ ```bash
+ # linting
+ ruff check python --fix
+ # formatting
+ ruff format python
+ ```
+
+### Python documentation
+
+We follow a few best practices for writing the Python documentation:
+
+1. Use the google docstring style:
+
+ ```python
+ """[One Line Summary]
+
+ [Extended Summary]
+
+ [!!! example
+ import xyz
+ ]
+
+ # Arguments
+ arg1: Type[, optional]. Description[, defaults to `default`]
+ arg2: Type[, optional]. Description[, defaults to `default`]
+
+ # Returns
+ Type. Description.
+
+ # Raises
+ Exception. Description.
+ """
+ ```
+
+ If Python 3 type annotations are used, they are inserted automatically.
+
+2. Feature store entity engine methods (e.g. FeatureGroupEngine etc.) only require a single line docstring.
+3. REST Api implementations (e.g. FeatureGroupApi etc.) should be fully documented with docstrings without defaults.
+4. Public Api such as metadata objects should be fully documented with defaults.
+
+#### Setup and Build Documentation
+
+We use `mkdocs` together with `mike` ([for versioning](https://github.com/jimporter/mike/)) to build the documentation and a plugin called `keras-autodoc` to auto generate Python API documentation from docstrings.
+
+**Background about `mike`:**
+`mike` builds the documentation and commits it as a new directory to the gh-pages branch. Each directory corresponds to one version of the documentation. Additionally, `mike` maintains a json in the root of gh-pages with the mappings of versions/aliases for each of the directories available. With aliases you can define extra names like `dev` or `latest`, to indicate stable and unstable releases.
+
+1. Currently we are using our own version of `keras-autodoc`
+
+ ```bash
+ pip install git+https://github.com/logicalclocks/keras-autodoc
+ ```
+
+2. Install HSFS with `docs` extras:
+
+ ```bash
+ pip install -e ".[python,dev]" && pip install -r ../requirements-docs.txt
+ ```
+
+3. To build the docs, first run the auto doc script:
+
+ ```bash
+ cd ..
+ python auto_doc.py
+ ```
+
+##### Option 1: Build only current version of docs
+
+4. Either build the docs, or serve them dynamically:
+
+ Note: Links and pictures might not resolve properly later on when checking with this build.
+ The reason for that is that the docs are deployed with versioning on docs.hopsworks.ai and
+ therefore another level is added to all paths, e.g. `docs.hopsworks.ai/[version-or-alias]`.
+ Using relative links should not be affected by this, however, building the docs with version
+ (Option 2) is recommended.
+
+ ```bash
+ mkdocs build
+ # or
+ mkdocs serve
+ ```
+
+##### Option 2 (Preferred): Build multi-version doc with `mike`
+
+###### Versioning on docs.hopsworks.ai
+
+On docs.hopsworks.ai we implement the following versioning scheme:
+
+- current master branches (e.g. of hsfs corresponding to master of Hopsworks): rendered as current Hopsworks snapshot version, e.g. **2.2.0-SNAPSHOT [dev]**, where `dev` is an alias to indicate that this is an unstable version.
+- the latest release: rendered with full current version, e.g. **2.1.5 [latest]** with `latest` alias to indicate that this is the latest stable release.
+- previous stable releases: rendered without alias, e.g. **2.1.4**.
+
+###### Build Instructions
+
+4. For this you can either checkout and make a local copy of the `upstream/gh-pages` branch, where `mike` maintains the current state of docs.hopsworks.ai, or just build documentation for the branch you are updating:
+
+ Building _one_ branch:
+
+ Checkout your dev branch with modified docs:
+
+ ```bash
+ git checkout [dev-branch]
+ ```
+
+ Generate API docs if necessary:
+
+ ```bash
+ python auto_doc.py
+ ```
+
+ Build docs with a version and alias
+
+ ```bash
+ mike deploy [version] [alias] --update-alias
+
+ # for example, if you are updating documentation to be merged to master,
+ # which will become the new SNAPSHOT version:
+ mike deploy 2.2.0-SNAPSHOT dev --update-alias
+
+ # if you are updating docs of the latest stable release branch
+ mike deploy [version] latest --update-alias
+
+ # if you are updating docs of a previous stable release branch
+ mike deploy [version]
+ ```
+
+ If no gh-pages branch existed in your local repository, this will have created it.
+
+ **Important**: If no previous docs were built, you will have to choose a version as default to be loaded as index, as follows
+
+ ```bash
+ mike set-default [version-or-alias]
+ ```
+
+ You can now checkout the gh-pages branch and serve:
+
+ ```bash
+ git checkout gh-pages
+ mike serve
+ ```
+
+ You can also list all available versions/aliases:
+
+ ```bash
+ mike list
+ ```
+
+ Delete and reset your local gh-pages branch:
+
+ ```bash
+ mike delete --all
+
+ # or delete single version
+ mike delete [version-or-alias]
+ ```
+
+#### Adding new API documentation
+
+To add new documentation for APIs, you need to add information about the method/class to document to the `auto_doc.py` script:
+
+```python
+PAGES = {
+ "connection.md": [
+ "hsfs.connection.Connection.connection"
+ ]
+ "new_template.md": [
+ "module",
+ "xyz.asd"
+ ]
+}
+```
+
+Now you can add a template markdown file to the `docs/templates` directory with the name you specified in the auto-doc script. The `new_template.md` file should contain a tag to identify the place at which the API documentation should be inserted:
+
+````
+## The XYZ package
+
+{{module}}
+
+Some extra content here.
+
+!!! example
+ ```python
+ import xyz
+ ```
+
+{{xyz.asd}}
+````
+
+Finally, run the `auto_doc.py` script, as decribed above, to update the documentation.
+
+For information about Markdown syntax and possible Admonitions/Highlighting etc. see
+the [Material for Mkdocs themes reference documentation](https://squidfunk.github.io/mkdocs-material/reference/abbreviations/).
diff --git a/hsfs/Dockerfile b/hsfs/Dockerfile
new file mode 100644
index 000000000..38d9025c5
--- /dev/null
+++ b/hsfs/Dockerfile
@@ -0,0 +1,13 @@
+FROM ubuntu:22.04
+
+RUN apt-get update && \
+ apt-get install -y python3-pip git && apt-get clean && \
+ rm -rf /var/lib/apt/lists/*
+
+RUN pip3 install twine build virtualenv \
+ mkdocs==1.5.3 \
+ mkdocs-material==9.5.17 \
+ mike==2.0.0 \
+ git+https://github.com/logicalclocks/keras-autodoc
+
+RUN mkdir -p /.local && chmod -R 777 /.local
diff --git a/hsfs/Jenkinsfile b/hsfs/Jenkinsfile
new file mode 100644
index 000000000..d2014d5cb
--- /dev/null
+++ b/hsfs/Jenkinsfile
@@ -0,0 +1,23 @@
+pipeline {
+ agent {
+ docker {
+ label "local"
+ image "docker.hops.works/hopsworks_twine:0.0.1"
+ }
+ }
+ stages {
+ stage("publish") {
+ environment {
+ PYPI = credentials('977daeb0-e1c8-43a0-b35a-fc37bb9eee9b')
+ }
+ steps {
+ dir("python") {
+ sh "rm -f LICENSE README.md"
+ sh "cp -f ../LICENSE ../README.md ./"
+ sh "python3 -m build"
+ sh "twine upload -u $PYPI_USR -p $PYPI_PSW --skip-existing dist/*"
+ }
+ }
+ }
+ }
+}
diff --git a/hsfs/LICENSE b/hsfs/LICENSE
new file mode 100644
index 000000000..261eeb9e9
--- /dev/null
+++ b/hsfs/LICENSE
@@ -0,0 +1,201 @@
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright [yyyy] [name of copyright owner]
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
diff --git a/hsfs/README.md b/hsfs/README.md
new file mode 100644
index 000000000..a13ea2ce5
--- /dev/null
+++ b/hsfs/README.md
@@ -0,0 +1,201 @@
+# Hopsworks Feature Store
+
+
+
+
+
+
+
+
+
+
+
+
+HSFS is the library to interact with the Hopsworks Feature Store. The library makes creating new features, feature groups and training datasets easy.
+
+The library is environment independent and can be used in two modes:
+
+- Spark mode: For data engineering jobs that create and write features into the feature store or generate training datasets. It requires a Spark environment such as the one provided in the Hopsworks platform or Databricks. In Spark mode, HSFS provides bindings both for Python and JVM languages.
+
+- Python mode: For data science jobs to explore the features available in the feature store, generate training datasets and feed them in a training pipeline. Python mode requires just a Python interpreter and can be used both in Hopsworks from Python Jobs/Jupyter Kernels, Amazon SageMaker or KubeFlow.
+
+The library automatically configures itself based on the environment it is run.
+However, to connect from an external environment such as Databricks or AWS Sagemaker,
+additional connection information, such as host and port, is required. For more information checkout the [Hopsworks documentation](https://docs.hopsworks.ai/latest/).
+
+## Getting Started On Hopsworks
+
+Get started easily by registering an account on [Hopsworks Serverless](https://app.hopsworks.ai/). Create your project and a [new Api key](https://docs.hopsworks.ai/latest/user_guides/projects/api_key/create_api_key/). In a new python environment with Python 3.8 or higher, install the [client library](https://docs.hopsworks.ai/latest/user_guides/client_installation/) using pip:
+
+```bash
+# Get all Hopsworks SDKs: Feature Store, Model Serving and Platform SDK
+pip install hopsworks
+# or minimum install with the Feature Store SDK
+pip install hsfs[python]
+# if using zsh don't forget the quotes
+pip install 'hsfs[python]'
+```
+
+You can start a notebook and instantiate a connection and get the project feature store handler.
+
+```python
+import hopsworks
+
+project = hopsworks.login() # you will be prompted for your api key
+fs = project.get_feature_store()
+```
+
+or using `hsfs` directly:
+
+```python
+import hsfs
+
+connection = hsfs.connection(
+ host="c.app.hopsworks.ai", #
+ project="your-project",
+ api_key_value="your-api-key",
+)
+fs = connection.get_feature_store()
+```
+
+Create a new feature group to start inserting feature values.
+```python
+fg = fs.create_feature_group("rain",
+ version=1,
+ description="Rain features",
+ primary_key=['date', 'location_id'],
+ online_enabled=True)
+
+fg.save(dataframe)
+```
+
+Upsert new data in to the feature group with `time_travel_format="HUDI"`".
+```python
+fg.insert(upsert_df)
+```
+
+Retrieve commit timeline metdata of the feature group with `time_travel_format="HUDI"`".
+```python
+fg.commit_details()
+```
+
+"Reading feature group as of specific point in time".
+```python
+fg = fs.get_feature_group("rain", 1)
+fg.read("2020-10-20 07:34:11").show()
+```
+
+Read updates that occurred between specified points in time.
+```python
+fg = fs.get_feature_group("rain", 1)
+fg.read_changes("2020-10-20 07:31:38", "2020-10-20 07:34:11").show()
+```
+
+Join features together
+```python
+feature_join = rain_fg.select_all()
+ .join(temperature_fg.select_all(), on=["date", "location_id"])
+ .join(location_fg.select_all())
+feature_join.show(5)
+```
+
+join feature groups that correspond to specific point in time
+```python
+feature_join = rain_fg.select_all()
+ .join(temperature_fg.select_all(), on=["date", "location_id"])
+ .join(location_fg.select_all())
+ .as_of("2020-10-31")
+feature_join.show(5)
+```
+
+join feature groups that correspond to different time
+```python
+rain_fg_q = rain_fg.select_all().as_of("2020-10-20 07:41:43")
+temperature_fg_q = temperature_fg.select_all().as_of("2020-10-20 07:32:33")
+location_fg_q = location_fg.select_all().as_of("2020-10-20 07:33:08")
+joined_features_q = rain_fg_q.join(temperature_fg_q).join(location_fg_q)
+```
+
+Use the query object to create a training dataset:
+```python
+td = fs.create_training_dataset("rain_dataset",
+ version=1,
+ data_format="tfrecords",
+ description="A test training dataset saved in TfRecords format",
+ splits={'train': 0.7, 'test': 0.2, 'validate': 0.1})
+
+td.save(feature_join)
+```
+
+A short introduction to the Scala API:
+```scala
+import com.logicalclocks.hsfs._
+val connection = HopsworksConnection.builder().build()
+val fs = connection.getFeatureStore();
+val attendances_features_fg = fs.getFeatureGroup("games_features", 1);
+attendances_features_fg.show(1)
+```
+
+You can find more examples on how to use the library in our [hops-examples](https://github.com/logicalclocks/hops-examples) repository.
+
+## Usage
+
+Usage data is collected for improving quality of the library. It is turned on by default if the backend
+is "c.app.hopsworks.ai". To turn it off, use one of the following way:
+```python
+# use environment variable
+import os
+os.environ["ENABLE_HOPSWORKS_USAGE"] = "false"
+
+# use `disable_usage_logging`
+import hsfs
+hsfs.disable_usage_logging()
+```
+
+The source code can be found in python/hsfs/usage.py.
+
+## Documentation
+
+Documentation is available at [Hopsworks Feature Store Documentation](https://docs.hopsworks.ai/).
+
+## Issues
+
+For general questions about the usage of Hopsworks and the Feature Store please open a topic on [Hopsworks Community](https://community.hopsworks.ai/).
+
+Please report any issue using [Github issue tracking](https://github.com/logicalclocks/feature-store-api/issues).
+
+Please attach the client environment from the output below in the issue:
+```python
+import hopsworks
+import hsfs
+hopsworks.login().get_feature_store()
+print(hsfs.get_env())
+```
+
+## Contributing
+
+If you would like to contribute to this library, please see the [Contribution Guidelines](CONTRIBUTING.md).
diff --git a/hsfs/auto_doc.py b/hsfs/auto_doc.py
new file mode 100644
index 000000000..a98af258b
--- /dev/null
+++ b/hsfs/auto_doc.py
@@ -0,0 +1,384 @@
+import os
+import pathlib
+import shutil
+
+import keras_autodoc
+
+PAGES = {
+ "api/connection_api.md": {
+ "connection": ["hsfs.connection.Connection"],
+ "connection_properties": keras_autodoc.get_properties(
+ "hsfs.connection.Connection"
+ ),
+ "connection_methods": keras_autodoc.get_methods("hsfs.connection.Connection"),
+ },
+ "api/spine_group_api.md": {
+ "fg": ["hsfs.feature_group.SpineGroup"],
+ "fg_create": ["hsfs.feature_store.FeatureStore.get_or_create_spine_group"],
+ "fg_get": ["hsfs.feature_store.FeatureStore.get_or_create_spine_group"],
+ "fg_properties": keras_autodoc.get_properties(
+ "hsfs.feature_group.SpineGroup",
+ exclude=[
+ "expectation_suite",
+ "location",
+ "online_enabled",
+ "statistics",
+ "statistics_config",
+ "subject",
+ ],
+ ),
+ "fg_methods": keras_autodoc.get_methods(
+ "hsfs.feature_group.SpineGroup",
+ exclude=[
+ "append_features",
+ "compute_statistics",
+ "delete_expectation_suite",
+ "from_response_json",
+ "get_all_validation_reports",
+ "get_expectation_suite",
+ "get_latest_validation_report",
+ "get_statistics",
+ "get_validation_history",
+ "save_expectation_suite",
+ "save_validation_report",
+ "update_from_response_json",
+ "update_statistics_config",
+ "validate",
+ ],
+ ),
+ },
+ "api/training_dataset_api.md": {
+ "td": ["hsfs.training_dataset.TrainingDataset"],
+ "td_create": ["hsfs.feature_store.FeatureStore.create_training_dataset"],
+ "td_get": ["hsfs.feature_store.FeatureStore.get_training_dataset"],
+ "td_properties": keras_autodoc.get_properties(
+ "hsfs.training_dataset.TrainingDataset"
+ ),
+ "td_methods": keras_autodoc.get_methods(
+ "hsfs.training_dataset.TrainingDataset"
+ ),
+ },
+ "api/feature_view_api.md": {
+ "fv": ["hsfs.feature_view.FeatureView"],
+ "fv_create": ["hsfs.feature_store.FeatureStore.create_feature_view"],
+ "fv_get": ["hsfs.feature_store.FeatureStore.get_feature_view"],
+ "fvs_get": ["hsfs.feature_store.FeatureStore.get_feature_views"],
+ "fv_properties": keras_autodoc.get_properties("hsfs.feature_view.FeatureView"),
+ "fv_methods": keras_autodoc.get_methods("hsfs.feature_view.FeatureView"),
+ },
+ "api/feature_api.md": {
+ "feature": ["hsfs.feature.Feature"],
+ "feature_properties": keras_autodoc.get_properties("hsfs.feature.Feature"),
+ "feature_methods": keras_autodoc.get_methods("hsfs.feature.Feature"),
+ },
+ "api/expectation_suite_api.md": {
+ "expectation_suite": ["hsfs.expectation_suite.ExpectationSuite"],
+ "expectation_suite_attach": [
+ "hsfs.feature_group.FeatureGroup.save_expectation_suite"
+ ],
+ "single_expectation_api": [
+ "hsfs.expectation_suite.ExpectationSuite.add_expectation",
+ "hsfs.expectation_suite.ExpectationSuite.replace_expectation",
+ "hsfs.expectation_suite.ExpectationSuite.remove_expectation",
+ ],
+ "expectation_suite_properties": keras_autodoc.get_properties(
+ "hsfs.expectation_suite.ExpectationSuite"
+ ),
+ "expectation_suite_methods": keras_autodoc.get_methods(
+ "hsfs.expectation_suite.ExpectationSuite"
+ ),
+ },
+ "api/feature_store_api.md": {
+ "fs": ["hsfs.feature_store.FeatureStore"],
+ "fs_get": ["hsfs.connection.Connection.get_feature_store"],
+ "fs_properties": keras_autodoc.get_properties(
+ "hsfs.feature_store.FeatureStore"
+ ),
+ "fs_methods": keras_autodoc.get_methods("hsfs.feature_store.FeatureStore"),
+ },
+ "api/feature_group_api.md": {
+ "fg": ["hsfs.feature_group.FeatureGroup"],
+ "fg_create": [
+ "hsfs.feature_store.FeatureStore.create_feature_group",
+ "hsfs.feature_store.FeatureStore.get_or_create_feature_group",
+ ],
+ "fg_get": ["hsfs.feature_store.FeatureStore.get_feature_group"],
+ "fg_properties": keras_autodoc.get_properties(
+ "hsfs.feature_group.FeatureGroup"
+ ),
+ "fg_methods": keras_autodoc.get_methods("hsfs.feature_group.FeatureGroup"),
+ },
+ "api/external_feature_group_api.md": {
+ "fg": ["hsfs.feature_group.ExternalFeatureGroup"],
+ "fg_create": ["hsfs.feature_store.FeatureStore.create_external_feature_group"],
+ "fg_get": ["hsfs.feature_store.FeatureStore.get_external_feature_group"],
+ "fg_properties": keras_autodoc.get_properties(
+ "hsfs.feature_group.ExternalFeatureGroup"
+ ),
+ "fg_methods": keras_autodoc.get_methods(
+ "hsfs.feature_group.ExternalFeatureGroup"
+ ),
+ },
+ "api/storage_connector_api.md": {
+ "sc_get": [
+ "hsfs.feature_store.FeatureStore.get_storage_connector",
+ "hsfs.feature_store.FeatureStore.get_online_storage_connector",
+ ],
+ "hopsfs_methods": keras_autodoc.get_methods(
+ "hsfs.storage_connector.HopsFSConnector", exclude=["from_response_json"]
+ ),
+ "hopsfs_properties": keras_autodoc.get_properties(
+ "hsfs.storage_connector.HopsFSConnector"
+ ),
+ "s3_methods": keras_autodoc.get_methods(
+ "hsfs.storage_connector.S3Connector", exclude=["from_response_json"]
+ ),
+ "s3_properties": keras_autodoc.get_properties(
+ "hsfs.storage_connector.S3Connector"
+ ),
+ "redshift_methods": keras_autodoc.get_methods(
+ "hsfs.storage_connector.RedshiftConnector", exclude=["from_response_json"]
+ ),
+ "redshift_properties": keras_autodoc.get_properties(
+ "hsfs.storage_connector.RedshiftConnector"
+ ),
+ "adls_methods": keras_autodoc.get_methods(
+ "hsfs.storage_connector.AdlsConnector", exclude=["from_response_json"]
+ ),
+ "adls_properties": keras_autodoc.get_properties(
+ "hsfs.storage_connector.AdlsConnector"
+ ),
+ "snowflake_methods": keras_autodoc.get_methods(
+ "hsfs.storage_connector.SnowflakeConnector", exclude=["from_response_json"]
+ ),
+ "snowflake_properties": keras_autodoc.get_properties(
+ "hsfs.storage_connector.SnowflakeConnector"
+ ),
+ "jdbc_methods": keras_autodoc.get_methods(
+ "hsfs.storage_connector.JdbcConnector", exclude=["from_response_json"]
+ ),
+ "jdbc_properties": keras_autodoc.get_properties(
+ "hsfs.storage_connector.JdbcConnector"
+ ),
+ "gcs_methods": keras_autodoc.get_methods(
+ "hsfs.storage_connector.GcsConnector", exclude=["from_response_json"]
+ ),
+ "gcs_properties": keras_autodoc.get_properties(
+ "hsfs.storage_connector.GcsConnector"
+ ),
+ "bigquery_methods": keras_autodoc.get_methods(
+ "hsfs.storage_connector.BigQueryConnector", exclude=["from_response_json"]
+ ),
+ "bigquery_properties": keras_autodoc.get_properties(
+ "hsfs.storage_connector.BigQueryConnector"
+ ),
+ "kafka_methods": keras_autodoc.get_methods(
+ "hsfs.storage_connector.KafkaConnector", exclude=["from_response_json"]
+ ),
+ "kafka_properties": keras_autodoc.get_properties(
+ "hsfs.storage_connector.KafkaConnector"
+ ),
+ },
+ "api/statistics_config_api.md": {
+ "statistics_config": ["hsfs.statistics_config.StatisticsConfig"],
+ "statistics_config_properties": keras_autodoc.get_properties(
+ "hsfs.statistics_config.StatisticsConfig"
+ ),
+ },
+ "api/transformation_functions_api.md": {
+ "transformation_function": [
+ "hsfs.transformation_function.TransformationFunction"
+ ],
+ "transformation_function_properties": keras_autodoc.get_properties(
+ "hsfs.transformation_function.TransformationFunction"
+ ),
+ "transformation_function_methods": keras_autodoc.get_methods(
+ "hsfs.transformation_function.TransformationFunction",
+ exclude=[
+ "from_response_json",
+ "update_from_response_json",
+ "json",
+ "to_dict",
+ ],
+ ),
+ "create_transformation_function": [
+ "hsfs.feature_store.FeatureStore.create_transformation_function"
+ ],
+ "get_transformation_function": [
+ "hsfs.feature_store.FeatureStore.get_transformation_function"
+ ],
+ "get_transformation_functions": [
+ "hsfs.feature_store.FeatureStore.get_transformation_functions"
+ ],
+ },
+ "api/validation_report_api.md": {
+ "validation_report": ["hsfs.validation_report.ValidationReport"],
+ "validation_report_validate": [
+ "hsfs.feature_group.FeatureGroup.validate",
+ "hsfs.feature_group.FeatureGroup.insert",
+ ],
+ "validation_report_get": [
+ "hsfs.feature_group.FeatureGroup.get_latest_validation_report",
+ "hsfs.feature_group.FeatureGroup.get_all_validation_reports",
+ ],
+ "validation_report_properties": keras_autodoc.get_properties(
+ "hsfs.validation_report.ValidationReport"
+ ),
+ "validation_report_methods": keras_autodoc.get_methods(
+ "hsfs.validation_report.ValidationReport"
+ ),
+ },
+ "api/job.md": {
+ "job_configuration": ["hsfs.core.job_configuration.JobConfiguration"],
+ "job": ["hsfs.core.job.Job"],
+ "job_methods": [
+ "hsfs.core.job.Job.get_state",
+ "hsfs.core.job.Job.get_final_state",
+ ],
+ },
+ "api/query_api.md": {
+ "query_methods": keras_autodoc.get_methods(
+ "hsfs.constructor.query.Query",
+ exclude=["json", "to_dict"],
+ ),
+ "query_properties": keras_autodoc.get_properties(
+ "hsfs.constructor.query.Query"
+ ),
+ },
+ "api/links.md": {
+ "links_properties": keras_autodoc.get_properties(
+ "hsfs.core.explicit_provenance.Links"
+ ),
+ "artifact_properties": keras_autodoc.get_properties(
+ "hsfs.core.explicit_provenance.Artifact"
+ ),
+ },
+ "api/statistics_api.md": {
+ "statistics": ["hsfs.statistics.Statistics"],
+ "statistics_properties": keras_autodoc.get_properties(
+ "hsfs.statistics.Statistics"
+ ),
+ },
+ "api/split_statistics_api.md": {
+ "split_statistics": ["hsfs.split_statistics.SplitStatistics"],
+ "split_statistics_properties": keras_autodoc.get_properties(
+ "hsfs.split_statistics.SplitStatistics"
+ ),
+ },
+ "api/feature_descriptive_statistics_api.md": {
+ "feature_descriptive_statistics": [
+ "hsfs.core.feature_descriptive_statistics.FeatureDescriptiveStatistics"
+ ],
+ "feature_descriptive_statistics_properties": keras_autodoc.get_properties(
+ "hsfs.core.feature_descriptive_statistics.FeatureDescriptiveStatistics"
+ ),
+ },
+ "api/feature_monitoring_config_api.md": {
+ "feature_monitoring_config": [
+ "hsfs.core.feature_monitoring_config.FeatureMonitoringConfig"
+ ],
+ "feature_monitoring_config_properties": keras_autodoc.get_properties(
+ "hsfs.core.feature_monitoring_config.FeatureMonitoringConfig"
+ ),
+ "feature_monitoring_config_methods": keras_autodoc.get_methods(
+ "hsfs.core.feature_monitoring_config.FeatureMonitoringConfig",
+ exclude=[
+ "from_response_json",
+ "update_from_response_json",
+ "json",
+ "to_dict",
+ ],
+ ),
+ # from feature group
+ "feature_monitoring_config_creation_fg": [
+ "hsfs.feature_group.FeatureGroup.create_statistics_monitoring",
+ "hsfs.feature_group.FeatureGroup.create_feature_monitoring",
+ ],
+ # from feature view
+ "feature_monitoring_config_creation_fv": [
+ "hsfs.feature_view.FeatureView.create_statistics_monitoring",
+ "hsfs.feature_view.FeatureView.create_feature_monitoring",
+ ],
+ # retrieval
+ "feature_monitoring_config_retrieval_fg": [
+ "hsfs.feature_group.FeatureGroup.get_feature_monitoring_configs",
+ ],
+ "feature_monitoring_config_retrieval_fv": [
+ "hsfs.feature_view.FeatureView.get_feature_monitoring_configs",
+ ],
+ },
+ "api/feature_monitoring_result_api.md": {
+ "feature_monitoring_result": [
+ "hsfs.core.feature_monitoring_result.FeatureMonitoringResult"
+ ],
+ "feature_monitoring_result_retrieval": [
+ "hsfs.core.feature_monitoring_config.FeatureMonitoringConfig.get_history"
+ ],
+ "feature_monitoring_result_properties": keras_autodoc.get_properties(
+ "hsfs.core.feature_monitoring_result.FeatureMonitoringResult"
+ ),
+ },
+ "api/feature_monitoring_window_config_api.md": {
+ "feature_monitoring_window_config": [
+ "hsfs.core.monitoring_window_config.MonitoringWindowConfig"
+ ],
+ "feature_monitoring_window_config_properties": keras_autodoc.get_properties(
+ "hsfs.core.monitoring_window_config.MonitoringWindowConfig"
+ ),
+ },
+ "api/embedding_index_api.md": {
+ "embedding_index": ["hsfs.embedding.EmbeddingIndex"],
+ "embedding_index_properties": keras_autodoc.get_properties(
+ "hsfs.embedding.EmbeddingIndex"
+ ),
+ "embedding_index_methods": keras_autodoc.get_methods(
+ "hsfs.embedding.EmbeddingIndex", exclude=["from_response_json"]
+ ),
+ },
+ "api/embedding_feature_api.md": {
+ "embedding_feature": ["hsfs.embedding.EmbeddingFeature"],
+ "embedding_feature_properties": keras_autodoc.get_properties(
+ "hsfs.embedding.EmbeddingFeature"
+ ),
+ },
+ "api/similarity_function_type_api.md": {
+ "similarity_function_type": ["hsfs.embedding.SimilarityFunctionType"],
+ },
+}
+
+hsfs_dir = pathlib.Path(__file__).resolve().parents[0]
+if "GITHUB_SHA" in os.environ:
+ commit_sha = os.environ["GITHUB_SHA"]
+ project_url = (
+ f"https://github.com/logicalclocks/feature-store-api/tree/{commit_sha}/python"
+ )
+else:
+ branch_name = os.environ.get("GITHUB_BASE_REF", "master")
+ project_url = (
+ f"https://github.com/logicalclocks/feature-store-api/blob/{branch_name}/python"
+ )
+
+
+def generate(dest_dir):
+ doc_generator = keras_autodoc.DocumentationGenerator(
+ PAGES,
+ project_url=project_url,
+ template_dir="./docs/templates",
+ titles_size="###",
+ extra_aliases={
+ "hsfs.core.query.Query": "hsfs.Query",
+ "hsfs.storage_connector.StorageConnector": "hsfs.StorageConnector",
+ "hsfs.statistics_config.StatisticsConfig": "hsfs.StatisticsConfig",
+ "hsfs.training_dataset_feature.TrainingDatasetFeature": "hsfs.TrainingDatasetFeature",
+ "pandas.core.frame.DataFrame": "pandas.DataFrame",
+ },
+ max_signature_line_length=100,
+ )
+ shutil.copyfile(hsfs_dir / "CONTRIBUTING.md", dest_dir / "CONTRIBUTING.md")
+ shutil.copyfile(hsfs_dir / "README.md", dest_dir / "index.md")
+
+ doc_generator.generate(dest_dir / "generated")
+
+
+if __name__ == "__main__":
+ generate(hsfs_dir / "docs")
diff --git a/hsfs/docs/CONTRIBUTING.md b/hsfs/docs/CONTRIBUTING.md
new file mode 100644
index 000000000..0df3de08e
--- /dev/null
+++ b/hsfs/docs/CONTRIBUTING.md
@@ -0,0 +1,220 @@
+## Python development setup
+
+---
+
+- Fork and clone the repository
+
+- Create a new Python environment with your favourite environment manager (e.g. virtualenv or conda) and Python 3.9 (newer versions will return a library conflict in `auto_doc.py`)
+
+- Install repository in editable mode with development dependencies:
+
+ ```bash
+ cd python
+ pip install -e ".[python,dev]"
+ ```
+
+- Install [pre-commit](https://pre-commit.com/) and then activate its hooks. pre-commit is a framework for managing and maintaining multi-language pre-commit hooks. The Feature Store uses pre-commit to ensure code-style and code formatting through [ruff](https://docs.astral.sh/ruff/). Run the following commands from the `python` directory:
+
+ ```bash
+ cd python
+ pip install --user pre-commit
+ pre-commit install
+ ```
+
+ Afterwards, pre-commit will run whenever you commit.
+
+- To run formatting and code-style separately, you can configure your IDE, such as VSCode, to use `ruff`, or run it via the command line:
+
+ ```bash
+ # linting
+ ruff check python --fix
+ # formatting
+ ruff format python
+ ```
+
+### Python documentation
+
+We follow a few best practices for writing the Python documentation:
+
+1. Use the google docstring style:
+
+ ```python
+ """[One Line Summary]
+
+ [Extended Summary]
+
+ [!!! example
+ import xyz
+ ]
+
+ # Arguments
+ arg1: Type[, optional]. Description[, defaults to `default`]
+ arg2: Type[, optional]. Description[, defaults to `default`]
+
+ # Returns
+ Type. Description.
+
+ # Raises
+ Exception. Description.
+ """
+ ```
+
+ If Python 3 type annotations are used, they are inserted automatically.
+
+2. Feature store entity engine methods (e.g. FeatureGroupEngine etc.) only require a single line docstring.
+3. REST Api implementations (e.g. FeatureGroupApi etc.) should be fully documented with docstrings without defaults.
+4. Public Api such as metadata objects should be fully documented with defaults.
+
+#### Setup and Build Documentation
+
+We use `mkdocs` together with `mike` ([for versioning](https://github.com/jimporter/mike/)) to build the documentation and a plugin called `keras-autodoc` to auto generate Python API documentation from docstrings.
+
+**Background about `mike`:**
+`mike` builds the documentation and commits it as a new directory to the gh-pages branch. Each directory corresponds to one version of the documentation. Additionally, `mike` maintains a json in the root of gh-pages with the mappings of versions/aliases for each of the directories available. With aliases you can define extra names like `dev` or `latest`, to indicate stable and unstable releases.
+
+1. Currently we are using our own version of `keras-autodoc`
+
+ ```bash
+ pip install git+https://github.com/logicalclocks/keras-autodoc
+ ```
+
+2. Install HSFS with `docs` extras:
+
+ ```bash
+ pip install -e ".[python,dev]" && pip install -r ../requirements-docs.txt
+ ```
+
+3. To build the docs, first run the auto doc script:
+
+ ```bash
+ cd ..
+ python auto_doc.py
+ ```
+
+##### Option 1: Build only current version of docs
+
+4. Either build the docs, or serve them dynamically:
+
+ Note: Links and pictures might not resolve properly later on when checking with this build.
+ The reason for that is that the docs are deployed with versioning on docs.hopsworks.ai and
+ therefore another level is added to all paths, e.g. `docs.hopsworks.ai/[version-or-alias]`.
+ Using relative links should not be affected by this, however, building the docs with version
+ (Option 2) is recommended.
+
+ ```bash
+ mkdocs build
+ # or
+ mkdocs serve
+ ```
+
+##### Option 2 (Preferred): Build multi-version doc with `mike`
+
+###### Versioning on docs.hopsworks.ai
+
+On docs.hopsworks.ai we implement the following versioning scheme:
+
+- current master branches (e.g. of hsfs corresponding to master of Hopsworks): rendered as current Hopsworks snapshot version, e.g. **2.2.0-SNAPSHOT [dev]**, where `dev` is an alias to indicate that this is an unstable version.
+- the latest release: rendered with full current version, e.g. **2.1.5 [latest]** with `latest` alias to indicate that this is the latest stable release.
+- previous stable releases: rendered without alias, e.g. **2.1.4**.
+
+###### Build Instructions
+
+4. For this you can either checkout and make a local copy of the `upstream/gh-pages` branch, where `mike` maintains the current state of docs.hopsworks.ai, or just build documentation for the branch you are updating:
+
+ Building _one_ branch:
+
+ Checkout your dev branch with modified docs:
+
+ ```bash
+ git checkout [dev-branch]
+ ```
+
+ Generate API docs if necessary:
+
+ ```bash
+ python auto_doc.py
+ ```
+
+ Build docs with a version and alias
+
+ ```bash
+ mike deploy [version] [alias] --update-alias
+
+ # for example, if you are updating documentation to be merged to master,
+ # which will become the new SNAPSHOT version:
+ mike deploy 2.2.0-SNAPSHOT dev --update-alias
+
+ # if you are updating docs of the latest stable release branch
+ mike deploy [version] latest --update-alias
+
+ # if you are updating docs of a previous stable release branch
+ mike deploy [version]
+ ```
+
+ If no gh-pages branch existed in your local repository, this will have created it.
+
+ **Important**: If no previous docs were built, you will have to choose a version as default to be loaded as index, as follows
+
+ ```bash
+ mike set-default [version-or-alias]
+ ```
+
+ You can now checkout the gh-pages branch and serve:
+
+ ```bash
+ git checkout gh-pages
+ mike serve
+ ```
+
+ You can also list all available versions/aliases:
+
+ ```bash
+ mike list
+ ```
+
+ Delete and reset your local gh-pages branch:
+
+ ```bash
+ mike delete --all
+
+ # or delete single version
+ mike delete [version-or-alias]
+ ```
+
+#### Adding new API documentation
+
+To add new documentation for APIs, you need to add information about the method/class to document to the `auto_doc.py` script:
+
+```python
+PAGES = {
+ "connection.md": [
+ "hsfs.connection.Connection.connection"
+ ]
+ "new_template.md": [
+ "module",
+ "xyz.asd"
+ ]
+}
+```
+
+Now you can add a template markdown file to the `docs/templates` directory with the name you specified in the auto-doc script. The `new_template.md` file should contain a tag to identify the place at which the API documentation should be inserted:
+
+````
+## The XYZ package
+
+{{module}}
+
+Some extra content here.
+
+!!! example
+ ```python
+ import xyz
+ ```
+
+{{xyz.asd}}
+````
+
+Finally, run the `auto_doc.py` script, as decribed above, to update the documentation.
+
+For information about Markdown syntax and possible Admonitions/Highlighting etc. see
+the [Material for Mkdocs themes reference documentation](https://squidfunk.github.io/mkdocs-material/reference/abbreviations/).
diff --git a/hsfs/docs/assets/images/favicon.ico b/hsfs/docs/assets/images/favicon.ico
new file mode 100644
index 000000000..ab7573067
Binary files /dev/null and b/hsfs/docs/assets/images/favicon.ico differ
diff --git a/hsfs/docs/assets/images/hops-logo.png b/hsfs/docs/assets/images/hops-logo.png
new file mode 100644
index 000000000..d3625ae07
Binary files /dev/null and b/hsfs/docs/assets/images/hops-logo.png differ
diff --git a/hsfs/docs/assets/images/hopsworks-logo.png b/hsfs/docs/assets/images/hopsworks-logo.png
new file mode 100644
index 000000000..36f20bb12
Binary files /dev/null and b/hsfs/docs/assets/images/hopsworks-logo.png differ
diff --git a/hsfs/docs/css/custom.css b/hsfs/docs/css/custom.css
new file mode 100644
index 000000000..45f87459a
--- /dev/null
+++ b/hsfs/docs/css/custom.css
@@ -0,0 +1,114 @@
+[data-md-color-scheme="hopsworks"] {
+ --md-primary-fg-color: #1EB382;
+ --md-secondary-fg-color: #188a64;
+ --md-tertiary-fg-color: #0d493550;
+ --md-quaternary-fg-color: #fdfdfd;
+ --border-radius-variable: 5px;
+}
+
+.md-footer__inner:not([hidden]) {
+ display: none
+}
+
+/* Lex did stuff here */
+.svg_topnav{
+ width: 12px;
+ filter: invert(100);
+}
+.svg_topnav:hover{
+ width: 12px;
+ filter: invert(10);
+}
+
+.md-header[data-md-state=shadow] {
+ box-shadow: 0 0 0 0;
+}
+
+.md-tabs__item {
+ min-width: 2.25rem;
+ min-height: 1.5rem;
+}
+
+.md-tabs__item:hover {
+ background-color: var(--md-tertiary-fg-color);
+ transition: background-color 450ms;
+}
+
+/*
+.md-sidebar__scrollwrap{
+ background-color: var(--md-quaternary-fg-color);
+ padding: 15px 5px 5px 5px;
+ border-radius: var(--border-radius-variable);
+}
+*/
+.md-nav__link:focus{
+}
+
+.image_logo_02{
+ width:450px;
+}
+
+/* End of Lex did stuff here */
+
+.md-header__button.md-logo {
+ margin: .1rem;
+ padding: .1rem;
+}
+
+.md-header__button.md-logo img, .md-header__button.md-logo svg {
+ display: block;
+ width: 1.8rem;
+ height: 1.8rem;
+ fill: currentColor;
+}
+
+.md-tabs {
+ width: 100%;
+ overflow: auto;
+ color: var(--md-primary-bg-color);
+ background-color: var(--md-secondary-fg-color);
+ transition: background-color 250ms;
+}
+
+.wrapper {
+ display: grid;
+ grid-template-columns: repeat(4, 1fr);
+ gap: 10px;
+ grid-auto-rows: minmax(100px, auto);
+}
+
+.wrapper * {
+ border: 2px solid green;
+ text-align: center;
+ padding: 70px 0;
+}
+
+.one {
+ grid-column: 1 / 2;
+ grid-row: 1;
+}
+.two {
+ grid-column: 2 / 3;
+ grid-row: 1;
+}
+.three {
+ grid-column: 3 / 4;
+ grid-row: 1;
+}
+.four {
+ grid-column: 4 / 5;
+ grid-row: 1;
+}
+.five {
+ grid-column: 1 / 3;
+ grid-row: 2;
+}
+.six {
+ grid-column: 3 / 5;
+ grid-row: 2;
+}
+
+/* Jupyter Stuff */
+.jupyter-wrapper .jp-CodeCell .jp-Cell-inputWrapper .jp-InputPrompt {
+ display: none !important;
+}
diff --git a/hsfs/docs/css/dropdown.css b/hsfs/docs/css/dropdown.css
new file mode 100644
index 000000000..531f7b10d
--- /dev/null
+++ b/hsfs/docs/css/dropdown.css
@@ -0,0 +1,55 @@
+/* Style The Dropdown Button */
+.dropbtn {
+ color: white;
+ border: none;
+ cursor: pointer;
+}
+
+.md-tabs__list {
+ contain: inherit;
+}
+
+.md-tabs {
+ overflow: inherit;
+}
+
+
+/* The container
+
+HSFS is the library to interact with the Hopsworks Feature Store. The library makes creating new features, feature groups and training datasets easy.
+
+The library is environment independent and can be used in two modes:
+
+- Spark mode: For data engineering jobs that create and write features into the feature store or generate training datasets. It requires a Spark environment such as the one provided in the Hopsworks platform or Databricks. In Spark mode, HSFS provides bindings both for Python and JVM languages.
+
+- Python mode: For data science jobs to explore the features available in the feature store, generate training datasets and feed them in a training pipeline. Python mode requires just a Python interpreter and can be used both in Hopsworks from Python Jobs/Jupyter Kernels, Amazon SageMaker or KubeFlow.
+
+The library automatically configures itself based on the environment it is run.
+However, to connect from an external environment such as Databricks or AWS Sagemaker,
+additional connection information, such as host and port, is required. For more information checkout the [Hopsworks documentation](https://docs.hopsworks.ai/latest/).
+
+## Getting Started On Hopsworks
+
+Get started easily by registering an account on [Hopsworks Serverless](https://app.hopsworks.ai/). Create your project and a [new Api key](https://docs.hopsworks.ai/latest/user_guides/projects/api_key/create_api_key/). In a new python environment with Python 3.8 or higher, install the [client library](https://docs.hopsworks.ai/latest/user_guides/client_installation/) using pip:
+
+```bash
+# Get all Hopsworks SDKs: Feature Store, Model Serving and Platform SDK
+pip install hopsworks
+# or minimum install with the Feature Store SDK
+pip install hsfs[python]
+# if using zsh don't forget the quotes
+pip install 'hsfs[python]'
+```
+
+You can start a notebook and instantiate a connection and get the project feature store handler.
+
+```python
+import hopsworks
+
+project = hopsworks.login() # you will be prompted for your api key
+fs = project.get_feature_store()
+```
+
+or using `hsfs` directly:
+
+```python
+import hsfs
+
+connection = hsfs.connection(
+ host="c.app.hopsworks.ai", #
+ project="your-project",
+ api_key_value="your-api-key",
+)
+fs = connection.get_feature_store()
+```
+
+Create a new feature group to start inserting feature values.
+```python
+fg = fs.create_feature_group("rain",
+ version=1,
+ description="Rain features",
+ primary_key=['date', 'location_id'],
+ online_enabled=True)
+
+fg.save(dataframe)
+```
+
+Upsert new data in to the feature group with `time_travel_format="HUDI"`".
+```python
+fg.insert(upsert_df)
+```
+
+Retrieve commit timeline metdata of the feature group with `time_travel_format="HUDI"`".
+```python
+fg.commit_details()
+```
+
+"Reading feature group as of specific point in time".
+```python
+fg = fs.get_feature_group("rain", 1)
+fg.read("2020-10-20 07:34:11").show()
+```
+
+Read updates that occurred between specified points in time.
+```python
+fg = fs.get_feature_group("rain", 1)
+fg.read_changes("2020-10-20 07:31:38", "2020-10-20 07:34:11").show()
+```
+
+Join features together
+```python
+feature_join = rain_fg.select_all()
+ .join(temperature_fg.select_all(), on=["date", "location_id"])
+ .join(location_fg.select_all())
+feature_join.show(5)
+```
+
+join feature groups that correspond to specific point in time
+```python
+feature_join = rain_fg.select_all()
+ .join(temperature_fg.select_all(), on=["date", "location_id"])
+ .join(location_fg.select_all())
+ .as_of("2020-10-31")
+feature_join.show(5)
+```
+
+join feature groups that correspond to different time
+```python
+rain_fg_q = rain_fg.select_all().as_of("2020-10-20 07:41:43")
+temperature_fg_q = temperature_fg.select_all().as_of("2020-10-20 07:32:33")
+location_fg_q = location_fg.select_all().as_of("2020-10-20 07:33:08")
+joined_features_q = rain_fg_q.join(temperature_fg_q).join(location_fg_q)
+```
+
+Use the query object to create a training dataset:
+```python
+td = fs.create_training_dataset("rain_dataset",
+ version=1,
+ data_format="tfrecords",
+ description="A test training dataset saved in TfRecords format",
+ splits={'train': 0.7, 'test': 0.2, 'validate': 0.1})
+
+td.save(feature_join)
+```
+
+A short introduction to the Scala API:
+```scala
+import com.logicalclocks.hsfs._
+val connection = HopsworksConnection.builder().build()
+val fs = connection.getFeatureStore();
+val attendances_features_fg = fs.getFeatureGroup("games_features", 1);
+attendances_features_fg.show(1)
+```
+
+You can find more examples on how to use the library in our [hops-examples](https://github.com/logicalclocks/hops-examples) repository.
+
+## Usage
+
+Usage data is collected for improving quality of the library. It is turned on by default if the backend
+is "c.app.hopsworks.ai". To turn it off, use one of the following way:
+```python
+# use environment variable
+import os
+os.environ["ENABLE_HOPSWORKS_USAGE"] = "false"
+
+# use `disable_usage_logging`
+import hsfs
+hsfs.disable_usage_logging()
+```
+
+The source code can be found in python/hsfs/usage.py.
+
+## Documentation
+
+Documentation is available at [Hopsworks Feature Store Documentation](https://docs.hopsworks.ai/).
+
+## Issues
+
+For general questions about the usage of Hopsworks and the Feature Store please open a topic on [Hopsworks Community](https://community.hopsworks.ai/).
+
+Please report any issue using [Github issue tracking](https://github.com/logicalclocks/feature-store-api/issues).
+
+Please attach the client environment from the output below in the issue:
+```python
+import hopsworks
+import hsfs
+hopsworks.login().get_feature_store()
+print(hsfs.get_env())
+```
+
+## Contributing
+
+If you would like to contribute to this library, please see the [Contribution Guidelines](CONTRIBUTING.md).
diff --git a/hsfs/docs/js/dropdown.js b/hsfs/docs/js/dropdown.js
new file mode 100644
index 000000000..2618e0ce7
--- /dev/null
+++ b/hsfs/docs/js/dropdown.js
@@ -0,0 +1,2 @@
+document.getElementsByClassName("md-tabs__link")[7].style.display = "none";
+document.getElementsByClassName("md-tabs__link")[9].style.display = "none";
diff --git a/hsfs/docs/js/inject-api-links.js b/hsfs/docs/js/inject-api-links.js
new file mode 100644
index 000000000..aa5852283
--- /dev/null
+++ b/hsfs/docs/js/inject-api-links.js
@@ -0,0 +1,32 @@
+window.addEventListener("DOMContentLoaded", function () {
+ var windowPathNameSplits = window.location.pathname.split("/");
+ var majorVersionRegex = new RegExp("(\\d+[.]\\d+)")
+ var latestRegex = new RegExp("latest");
+ if (majorVersionRegex.test(windowPathNameSplits[1])) { // On landing page docs.hopsworks.api/3.0 - URL contains major version
+ // Version API dropdown
+ document.getElementById("hopsworks_api_link").href = "https://docs.hopsworks.ai/hopsworks-api/" + windowPathNameSplits[1] + "/generated/api/login/";
+ document.getElementById("hsfs_api_link").href = "https://docs.hopsworks.ai/feature-store-api/" + windowPathNameSplits[1] + "/generated/api/connection_api/";
+ document.getElementById("hsml_api_link").href = "https://docs.hopsworks.ai/machine-learning-api/" + windowPathNameSplits[1] + "/generated/connection_api/";
+ } else { // on docs.hopsworks.api/feature-store-api/3.0 / docs.hopsworks.api/hopsworks-api/3.0 / docs.hopsworks.api/machine-learning-api/3.0
+ if (latestRegex.test(windowPathNameSplits[2]) || latestRegex.test(windowPathNameSplits[1])) {
+ var majorVersion = "latest";
+ } else {
+
+ var apiVersion = windowPathNameSplits[2];
+ var majorVersion = apiVersion.match(majorVersionRegex)[0];
+ }
+ // Version main navigation
+ document.getElementsByClassName("md-tabs__link")[0].href = "https://docs.hopsworks.ai/" + majorVersion;
+ document.getElementsByClassName("md-tabs__link")[1].href = "https://colab.research.google.com/github/logicalclocks/hopsworks-tutorials/blob/master/quickstart.ipynb";
+ document.getElementsByClassName("md-tabs__link")[2].href = "https://docs.hopsworks.ai/" + majorVersion + "/tutorials/";
+ document.getElementsByClassName("md-tabs__link")[3].href = "https://docs.hopsworks.ai/" + majorVersion + "/concepts/hopsworks/";
+ document.getElementsByClassName("md-tabs__link")[4].href = "https://docs.hopsworks.ai/" + majorVersion + "/user_guides/";
+ document.getElementsByClassName("md-tabs__link")[5].href = "https://docs.hopsworks.ai/" + majorVersion + "/setup_installation/aws/getting_started/";
+ document.getElementsByClassName("md-tabs__link")[6].href = "https://docs.hopsworks.ai/" + majorVersion + "/admin/";
+ // Version API dropdown
+ document.getElementById("hopsworks_api_link").href = "https://docs.hopsworks.ai/hopsworks-api/" + majorVersion + "/generated/api/login/";
+ document.getElementById("hsfs_api_link").href = "https://docs.hopsworks.ai/feature-store-api/" + majorVersion + "/generated/api/connection_api/";
+ document.getElementById("hsfs_javadoc_link").href = "https://docs.hopsworks.ai/feature-store-api/" + majorVersion + "/javadoc";
+ document.getElementById("hsml_api_link").href = "https://docs.hopsworks.ai/machine-learning-api/" + majorVersion + "/generated/connection_api/";
+ }
+});
diff --git a/hsfs/docs/js/version-select.js b/hsfs/docs/js/version-select.js
new file mode 100644
index 000000000..fcac029e3
--- /dev/null
+++ b/hsfs/docs/js/version-select.js
@@ -0,0 +1,64 @@
+window.addEventListener("DOMContentLoaded", function() {
+ // This is a bit hacky. Figure out the base URL from a known CSS file the
+ // template refers to...
+ var ex = new RegExp("/?css/version-select.css$");
+ var sheet = document.querySelector('link[href$="version-select.css"]');
+
+ var ABS_BASE_URL = sheet.href.replace(ex, "");
+ var CURRENT_VERSION = ABS_BASE_URL.split("/").pop();
+
+ function makeSelect(options, selected) {
+ var select = document.createElement("select");
+ select.classList.add("form-control");
+
+ options.forEach(function(i) {
+ var option = new Option(i.text, i.value, undefined,
+ i.value === selected);
+ select.add(option);
+ });
+
+ return select;
+ }
+
+ var xhr = new XMLHttpRequest();
+ xhr.open("GET", ABS_BASE_URL + "/../versions.json");
+ xhr.onload = function() {
+ var versions = JSON.parse(this.responseText);
+
+ var realVersion = versions.find(function(i) {
+ return i.version === CURRENT_VERSION ||
+ i.aliases.includes(CURRENT_VERSION);
+ }).version;
+ var latestVersion = versions.find(function(i) {
+ return i.aliases.includes("latest");
+ }).version;
+ let outdated_banner = document.querySelector('div[data-md-color-scheme="default"][data-md-component="outdated"]');
+ if (realVersion !== latestVersion) {
+ outdated_banner.removeAttribute("hidden");
+ } else {
+ outdated_banner.setAttribute("hidden", "");
+ }
+
+ var select = makeSelect(versions.map(function(i) {
+ var allowedAliases = ["dev", "latest"]
+ if (i.aliases.length > 0) {
+ var aliasString = " [" + i.aliases.filter(function (str) { return allowedAliases.includes(str); }).join(", ") + "]";
+ } else {
+ var aliasString = "";
+ }
+ return {text: i.title + aliasString, value: i.version};
+ }), realVersion);
+ select.addEventListener("change", function(event) {
+ window.location.href = ABS_BASE_URL + "/../" + this.value + "/generated/api/connection_api/";
+ });
+
+ var container = document.createElement("div");
+ container.id = "version-selector";
+ // container.className = "md-nav__item";
+ container.appendChild(select);
+
+ var sidebar = document.querySelector(".md-nav--primary > .md-nav__list");
+ sidebar.parentNode.insertBefore(container, sidebar.nextSibling);
+ };
+ xhr.send();
+});
diff --git a/hsfs/docs/overrides/main.html b/hsfs/docs/overrides/main.html
new file mode 100644
index 000000000..ecb09de07
--- /dev/null
+++ b/hsfs/docs/overrides/main.html
@@ -0,0 +1,8 @@
+{% extends "base.html" %}
+
+{% block outdated %}
+You're not viewing the latest version of the documentation.
+
+ Click here to go to latest.
+
+{% endblock %}
diff --git a/hsfs/docs/templates/api/connection_api.md b/hsfs/docs/templates/api/connection_api.md
new file mode 100644
index 000000000..19e13f3eb
--- /dev/null
+++ b/hsfs/docs/templates/api/connection_api.md
@@ -0,0 +1,11 @@
+# Connection
+
+{{connection}}
+
+## Properties
+
+{{connection_properties}}
+
+## Methods
+
+{{connection_methods}}
diff --git a/hsfs/docs/templates/api/embedding_feature_api.md b/hsfs/docs/templates/api/embedding_feature_api.md
new file mode 100644
index 000000000..c054672d0
--- /dev/null
+++ b/hsfs/docs/templates/api/embedding_feature_api.md
@@ -0,0 +1,7 @@
+# EmbeddingFeature
+
+{{embedding_feature}}
+
+## Properties
+
+{{embedding_feature_properties}}
diff --git a/hsfs/docs/templates/api/embedding_index_api.md b/hsfs/docs/templates/api/embedding_index_api.md
new file mode 100644
index 000000000..d336e0ddb
--- /dev/null
+++ b/hsfs/docs/templates/api/embedding_index_api.md
@@ -0,0 +1,12 @@
+# EmbeddingIndex
+
+{{embedding_index}}
+
+## Properties
+
+{{embedding_index_properties}}
+
+## Methods
+
+{{embedding_index_methods}}
+
diff --git a/hsfs/docs/templates/api/expectation_api.md b/hsfs/docs/templates/api/expectation_api.md
new file mode 100644
index 000000000..7ba4110c1
--- /dev/null
+++ b/hsfs/docs/templates/api/expectation_api.md
@@ -0,0 +1,20 @@
+# Expectation
+
+{{expectation}}
+
+## Properties
+
+{{expectation_properties}}
+
+## Methods
+
+{{expectation_methods}}
+
+## Creation
+{{expectation_create}}
+
+## Retrieval
+
+{{expectation_getall}}
+
+{{expectation_get}}
diff --git a/hsfs/docs/templates/api/expectation_suite_api.md b/hsfs/docs/templates/api/expectation_suite_api.md
new file mode 100644
index 000000000..a07ac5f8a
--- /dev/null
+++ b/hsfs/docs/templates/api/expectation_suite_api.md
@@ -0,0 +1,41 @@
+# Expectation Suite
+
+{{expectation_suite}}
+
+## Creation with Great Expectations
+
+```python3
+import great_expectations as ge
+
+expectation_suite = ge.core.ExpectationSuite(
+ "new_expectation_suite",
+ expectations=[
+ ge.core.ExpectationConfiguration(
+ expectation_type="expect_column_max_to_be_between",
+ kwargs={
+ "column": "feature",
+ "min_value": -1,
+ "max_value": 1
+ }
+ )
+ ]
+)
+```
+
+## Attach to Feature Group
+
+{{expectation_suite_attach}}
+
+## Single Expectation API
+
+An API to edit the expectation list based on Great Expectations API.
+
+{{single_expectation_api}}
+
+## Properties
+
+{{expectation_suite_properties}}
+
+## Methods
+
+{{expectation_suite_methods}}
diff --git a/hsfs/docs/templates/api/external_feature_group_api.md b/hsfs/docs/templates/api/external_feature_group_api.md
new file mode 100644
index 000000000..a982a39e8
--- /dev/null
+++ b/hsfs/docs/templates/api/external_feature_group_api.md
@@ -0,0 +1,19 @@
+# ExternalFeatureGroup
+
+{{fg}}
+
+## Creation
+
+{{fg_create}}
+
+## Retrieval
+
+{{fg_get}}
+
+## Properties
+
+{{fg_properties}}
+
+## Methods
+
+{{fg_methods}}
diff --git a/hsfs/docs/templates/api/feature_api.md b/hsfs/docs/templates/api/feature_api.md
new file mode 100644
index 000000000..8dca5ef54
--- /dev/null
+++ b/hsfs/docs/templates/api/feature_api.md
@@ -0,0 +1,11 @@
+# Feature
+
+{{feature}}
+
+## Properties
+
+{{feature_properties}}
+
+## Methods
+
+{{feature_methods}}
diff --git a/hsfs/docs/templates/api/feature_descriptive_statistics_api.md b/hsfs/docs/templates/api/feature_descriptive_statistics_api.md
new file mode 100644
index 000000000..3be8cccd3
--- /dev/null
+++ b/hsfs/docs/templates/api/feature_descriptive_statistics_api.md
@@ -0,0 +1,7 @@
+# Feature Descriptive Statistics
+
+{{feature_descriptive_statistics}}
+
+## Properties
+
+{{feature_descriptive_statistics_properties}}
diff --git a/hsfs/docs/templates/api/feature_group_api.md b/hsfs/docs/templates/api/feature_group_api.md
new file mode 100644
index 000000000..372865c4b
--- /dev/null
+++ b/hsfs/docs/templates/api/feature_group_api.md
@@ -0,0 +1,19 @@
+# FeatureGroup
+
+{{fg}}
+
+## Creation
+
+{{fg_create}}
+
+## Retrieval
+
+{{fg_get}}
+
+## Properties
+
+{{fg_properties}}
+
+## Methods
+
+{{fg_methods}}
diff --git a/hsfs/docs/templates/api/feature_monitoring_config_api.md b/hsfs/docs/templates/api/feature_monitoring_config_api.md
new file mode 100644
index 000000000..7ca9b46ff
--- /dev/null
+++ b/hsfs/docs/templates/api/feature_monitoring_config_api.md
@@ -0,0 +1,27 @@
+# Feature Monitoring Configuration
+
+{{feature_monitoring_config}}
+
+## Creation from Feature Group
+
+{{feature_monitoring_config_creation_fg}}
+
+## Creation from Feature View
+
+{{feature_monitoring_config_creation_fv}}
+
+## Retrieval from Feature Group
+
+{{feature_monitoring_config_retrieval_fg}}
+
+## Retrieval from Feature View
+
+{{feature_monitoring_config_retrieval_fv}}
+
+## Properties
+
+{{feature_monitoring_config_properties}}
+
+## Methods
+
+{{feature_monitoring_config_methods}}
diff --git a/hsfs/docs/templates/api/feature_monitoring_result_api.md b/hsfs/docs/templates/api/feature_monitoring_result_api.md
new file mode 100644
index 000000000..5bfca1165
--- /dev/null
+++ b/hsfs/docs/templates/api/feature_monitoring_result_api.md
@@ -0,0 +1,11 @@
+# Feature Monitoring Result
+
+{{feature_monitoring_result}}
+
+## Retrieval
+
+{{feature_monitoring_result_retrieval}}
+
+## Properties
+
+{{feature_monitoring_result_properties}}
diff --git a/hsfs/docs/templates/api/feature_monitoring_window_config_api.md b/hsfs/docs/templates/api/feature_monitoring_window_config_api.md
new file mode 100644
index 000000000..53ef23ea2
--- /dev/null
+++ b/hsfs/docs/templates/api/feature_monitoring_window_config_api.md
@@ -0,0 +1,7 @@
+# Feature Monitoring Window Configuration
+
+{{feature_monitoring_window_config}}
+
+## Properties
+
+{{feature_monitoring_window_config_properties}}
diff --git a/hsfs/docs/templates/api/feature_store_api.md b/hsfs/docs/templates/api/feature_store_api.md
new file mode 100644
index 000000000..f859336f6
--- /dev/null
+++ b/hsfs/docs/templates/api/feature_store_api.md
@@ -0,0 +1,15 @@
+# Feature Store
+
+{{fs}}
+
+## Retrieval
+
+{{fs_get}}
+
+## Properties
+
+{{fs_properties}}
+
+## Methods
+
+{{fs_methods}}
diff --git a/hsfs/docs/templates/api/feature_view_api.md b/hsfs/docs/templates/api/feature_view_api.md
new file mode 100644
index 000000000..c0f7df954
--- /dev/null
+++ b/hsfs/docs/templates/api/feature_view_api.md
@@ -0,0 +1,21 @@
+# Feature View
+
+{{fv}}
+
+## Creation
+
+{{fv_create}}
+
+## Retrieval
+
+{{fv_get}}
+
+{{fvs_get}}
+
+## Properties
+
+{{fv_properties}}
+
+## Methods
+
+{{fv_methods}}
diff --git a/hsfs/docs/templates/api/job.md b/hsfs/docs/templates/api/job.md
new file mode 100644
index 000000000..9ad68d976
--- /dev/null
+++ b/hsfs/docs/templates/api/job.md
@@ -0,0 +1,11 @@
+# Job
+
+{{job}}
+
+## Methods
+
+{{job_methods}}
+
+## Job Configuration
+
+{{job_configuration}}
diff --git a/hsfs/docs/templates/api/links.md b/hsfs/docs/templates/api/links.md
new file mode 100644
index 000000000..62cdc7001
--- /dev/null
+++ b/hsfs/docs/templates/api/links.md
@@ -0,0 +1,14 @@
+# Provenance Links
+
+Provenance Links are objects returned by methods such as [get_feature_groups_provenance](../storage_connector_api/#get_feature_groups_provenance), [get_storage_connector_provenance](../feature_group_api/#get_storage_connector_provenance), [get_parent_feature_group](../feature_group_api/#get_parent_feature_groups), [get_generated_feature_groups](../feature_group_api/#get_generated_feature_groups), [get_generated_feature_views](../feature_group_api/#get_generated_feature_views) [get_models_provenance](../feature_view_api/#get_models_provenance) and represent sections of the provenance graph, depending on the method invoked.
+
+## Properties
+
+{{links_properties}}
+
+# Artifact
+
+Artifacts objects are part of the provenance graph and contain a minimal set of information regarding the entities (feature groups, feature views) they represent.
+The provenance graph contains Artifact objects when the underlying entities have been deleted or they are corrupted or they are not accessible by the user.
+
+{{artifact_properties}}
diff --git a/hsfs/docs/templates/api/query_api.md b/hsfs/docs/templates/api/query_api.md
new file mode 100644
index 000000000..7cc664d96
--- /dev/null
+++ b/hsfs/docs/templates/api/query_api.md
@@ -0,0 +1,13 @@
+# Query
+
+Query objects are strictly generated by HSFS APIs called on [Feature Group objects](feature_group_api.md).
+Users will never construct a Query object using the constructor of the class.
+For this reason we do not provide the full documentation of the class here.
+
+## Methods
+
+{{query_methods}}
+
+## Properties
+
+{{query_properties}}
diff --git a/hsfs/docs/templates/api/rule_api.md b/hsfs/docs/templates/api/rule_api.md
new file mode 100644
index 000000000..0801e3954
--- /dev/null
+++ b/hsfs/docs/templates/api/rule_api.md
@@ -0,0 +1,7 @@
+# Rule
+
+{{rule}}
+
+## Properties
+
+{{rule_properties}}
diff --git a/hsfs/docs/templates/api/rule_definition_api.md b/hsfs/docs/templates/api/rule_definition_api.md
new file mode 100644
index 000000000..326b66db0
--- /dev/null
+++ b/hsfs/docs/templates/api/rule_definition_api.md
@@ -0,0 +1,13 @@
+# Rule Definition
+
+{{ruledefinition}}
+
+## Properties
+
+{{ruledefinition_properties}}
+
+## Retrieval
+
+{{ruledefinition_getall}}
+
+{{ruledefinition_get}}
diff --git a/hsfs/docs/templates/api/similarity_function_type_api.md b/hsfs/docs/templates/api/similarity_function_type_api.md
new file mode 100644
index 000000000..bdfbc51c2
--- /dev/null
+++ b/hsfs/docs/templates/api/similarity_function_type_api.md
@@ -0,0 +1,3 @@
+# SimilarityFunctionType
+
+{{similarity_function_type}}
diff --git a/hsfs/docs/templates/api/spine_group_api.md b/hsfs/docs/templates/api/spine_group_api.md
new file mode 100644
index 000000000..a2bdf119c
--- /dev/null
+++ b/hsfs/docs/templates/api/spine_group_api.md
@@ -0,0 +1,19 @@
+# SpineGroup
+
+{{fg}}
+
+## Creation
+
+{{fg_create}}
+
+## Retrieval
+
+{{fg_get}}
+
+## Properties
+
+{{fg_properties}}
+
+## Methods
+
+{{fg_methods}}
diff --git a/hsfs/docs/templates/api/split_statistics_api.md b/hsfs/docs/templates/api/split_statistics_api.md
new file mode 100644
index 000000000..09053ac5d
--- /dev/null
+++ b/hsfs/docs/templates/api/split_statistics_api.md
@@ -0,0 +1,7 @@
+# Split Statistics
+
+{{split_statistics}}
+
+## Properties
+
+{{split_statistics_properties}}
diff --git a/hsfs/docs/templates/api/statistics_api.md b/hsfs/docs/templates/api/statistics_api.md
new file mode 100644
index 000000000..27ed90c9d
--- /dev/null
+++ b/hsfs/docs/templates/api/statistics_api.md
@@ -0,0 +1,7 @@
+# Statistics
+
+{{statistics}}
+
+## Properties
+
+{{statistics_properties}}
diff --git a/hsfs/docs/templates/api/statistics_config_api.md b/hsfs/docs/templates/api/statistics_config_api.md
new file mode 100644
index 000000000..a907d1d32
--- /dev/null
+++ b/hsfs/docs/templates/api/statistics_config_api.md
@@ -0,0 +1,7 @@
+# StatisticsConfig
+
+{{statistics_config}}
+
+## Properties
+
+{{statistics_config_properties}}
diff --git a/hsfs/docs/templates/api/storage_connector_api.md b/hsfs/docs/templates/api/storage_connector_api.md
new file mode 100644
index 000000000..1b390e72a
--- /dev/null
+++ b/hsfs/docs/templates/api/storage_connector_api.md
@@ -0,0 +1,119 @@
+# Storage Connector
+
+## Retrieval
+
+{{sc_get}}
+
+## HopsFS
+
+### Properties
+
+{{hopsfs_properties}}
+
+### Methods
+
+{{hopsfs_methods}}
+
+## JDBC
+
+### Properties
+
+{{jdbc_properties}}
+
+### Methods
+
+{{jdbc_methods}}
+
+## S3
+
+### Properties
+
+{{s3_properties}}
+
+### Methods
+
+{{s3_methods}}
+
+## Redshift
+
+### Properties
+
+{{redshift_properties}}
+
+### Methods
+
+{{redshift_methods}}
+
+## Azure Data Lake Storage
+
+### Properties
+
+{{adls_properties}}
+
+### Methods
+
+{{adls_methods}}
+
+## Snowflake
+
+### Properties
+
+{{snowflake_properties}}
+
+### Methods
+
+{{snowflake_methods}}
+
+## Google Cloud Storage
+This storage connector provides integration to Google Cloud Storage (GCS).
+Once you create a connector in FeatureStore, you can transact data from a GCS bucket into a spark dataframe
+by calling the `read` API.
+
+Authentication to GCP is handled by uploading the `JSON keyfile for service account` to the Hopsworks Project. For more information
+on service accounts and creating keyfile in GCP, read [Google Cloud documentation.](https://cloud.google.com/docs/authentication/production#create_service_account
+'creating service account keyfile')
+
+The connector also supports the optional encryption method `Customer Supplied Encryption Key` by Google.
+The encryption details are stored as `Secrets` in the FeatureStore for keeping it secure.
+Read more about encryption on [Google Documentation.](https://cloud.google.com/storage/docs/encryption#customer-supplied_encryption_keys)
+
+The storage connector uses the Google `gcs-connector-hadoop` behind the scenes. For more information, check out [Google Cloud Storage Connector for Spark and Hadoop](
+https://github.com/GoogleCloudDataproc/hadoop-connectors/tree/master/gcs#google-cloud-storage-connector-for-spark-and-hadoop 'google-cloud-storage-connector-for-spark-and-hadoop')
+
+### Properties
+
+{{gcs_properties}}
+
+### Methods
+
+{{gcs_methods}}
+
+## BigQuery
+The BigQuery storage connector provides integration to Google Cloud BigQuery.
+You can use it to run bigquery on your GCP cluster and load results into spark dataframe by calling the `read` API.
+
+Authentication to GCP is handled by uploading the `JSON keyfile for service account` to the Hopsworks Project. For more information
+on service accounts and creating keyfile in GCP, read [Google Cloud documentation.](https://cloud.google.com/docs/authentication/production#create_service_account
+'creating service account keyfile')
+
+The storage connector uses the Google `spark-bigquery-connector` behind the scenes.
+To read more about the spark connector, like the spark options or usage, check [Apache Spark SQL connector for Google BigQuery.](https://github.com/GoogleCloudDataproc/spark-bigquery-connector#usage
+'github.com/GoogleCloudDataproc/spark-bigquery-connector')
+
+### Properties
+
+{{bigquery_properties}}
+
+### Methods
+
+{{bigquery_methods}}
+
+## Kafka
+
+### Properties
+
+{{kafka_properties}}
+
+### Methods
+
+{{kafka_methods}}
diff --git a/hsfs/docs/templates/api/training_dataset_api.md b/hsfs/docs/templates/api/training_dataset_api.md
new file mode 100644
index 000000000..a53696465
--- /dev/null
+++ b/hsfs/docs/templates/api/training_dataset_api.md
@@ -0,0 +1,19 @@
+# Training Dataset
+
+{{td}}
+
+## Creation
+
+{{td_create}}
+
+## Retrieval
+
+{{td_get}}
+
+## Properties
+
+{{td_properties}}
+
+## Methods
+
+{{td_methods}}
diff --git a/hsfs/docs/templates/api/transformation_functions_api.md b/hsfs/docs/templates/api/transformation_functions_api.md
new file mode 100644
index 000000000..249262a45
--- /dev/null
+++ b/hsfs/docs/templates/api/transformation_functions_api.md
@@ -0,0 +1,20 @@
+# Transformation Function
+
+{{transformation_function}}
+
+## Properties
+
+{{transformation_function_properties}}
+
+## Methods
+
+{{transformation_function_methods}}
+
+## Creation
+{{create_transformation_function}}
+
+## Retrieval
+
+{{get_transformation_function}}
+
+{{get_transformation_functions}}
diff --git a/hsfs/docs/templates/api/validation_api.md b/hsfs/docs/templates/api/validation_api.md
new file mode 100644
index 000000000..8e1512f34
--- /dev/null
+++ b/hsfs/docs/templates/api/validation_api.md
@@ -0,0 +1,18 @@
+# Validation
+
+{{validation_result}}
+
+## Properties
+
+{{validation_result_properties}}
+
+## Methods
+
+{{expectation_methods}}
+
+## Validate a dataframe
+{{validate}}
+
+## Retrieval
+
+{{validation_result_get}}
diff --git a/hsfs/docs/templates/api/validation_report_api.md b/hsfs/docs/templates/api/validation_report_api.md
new file mode 100644
index 000000000..435a87a03
--- /dev/null
+++ b/hsfs/docs/templates/api/validation_report_api.md
@@ -0,0 +1,19 @@
+# Validation Report
+
+{{validation_report}}
+
+## Creation
+
+{{validation_report_validate}}
+
+## Retrieval
+
+{{validation_report_get}}
+
+## Properties
+
+{{validation_report_properties}}
+
+## Methods
+
+{{validation_report_methods}}
diff --git a/hsfs/java/beam/pom.xml b/hsfs/java/beam/pom.xml
new file mode 100644
index 000000000..3b3f902ca
--- /dev/null
+++ b/hsfs/java/beam/pom.xml
@@ -0,0 +1,55 @@
+
+
+
+ hsfs-parent
+ com.logicalclocks
+ 4.0.0-SNAPSHOT
+
+ 4.0.0
+
+ hsfs-beam
+
+
+ 2.48.0
+ 3.4.0
+
+
+
+
+ com.logicalclocks
+ hsfs
+ ${project.version}
+ compile
+
+
+ javax.xml.bind
+ jaxb-api
+
+
+
+
+
+
+ org.apache.beam
+ beam-sdks-java-core
+ ${beam.version}
+
+
+
+
+ org.apache.beam
+ beam-sdks-java-io-kafka
+ ${beam.version}
+
+
+
+
+ org.apache.kafka
+ kafka-clients
+ ${kafka.version}
+
+
+
+
diff --git a/hsfs/java/beam/src/main/java/com/logicalclocks/hsfs/beam/FeatureStore.java b/hsfs/java/beam/src/main/java/com/logicalclocks/hsfs/beam/FeatureStore.java
new file mode 100644
index 000000000..fd93052a3
--- /dev/null
+++ b/hsfs/java/beam/src/main/java/com/logicalclocks/hsfs/beam/FeatureStore.java
@@ -0,0 +1,322 @@
+/*
+ * Copyright (c) 2023. Hopsworks AB
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *
+ * See the License for the specific language governing permissions and limitations under the License.
+ *
+ */
+
+package com.logicalclocks.hsfs.beam;
+
+import com.logicalclocks.hsfs.FeatureStoreBase;
+import com.logicalclocks.hsfs.FeatureStoreException;
+import com.logicalclocks.hsfs.StatisticsConfig;
+import com.logicalclocks.hsfs.StorageConnector;
+import com.logicalclocks.hsfs.TimeTravelFormat;
+import com.logicalclocks.hsfs.TrainingDatasetBase;
+import com.logicalclocks.hsfs.beam.constructor.Query;
+import com.logicalclocks.hsfs.beam.engine.FeatureGroupEngine;
+import com.logicalclocks.hsfs.beam.engine.FeatureViewEngine;
+import com.logicalclocks.hsfs.metadata.StorageConnectorApi;
+import lombok.NonNull;
+
+import java.io.IOException;
+import java.util.List;
+
+public class FeatureStore extends FeatureStoreBase {
+
+ private FeatureGroupEngine featureGroupEngine;
+ private FeatureViewEngine featureViewEngine;
+
+ public FeatureStore() {
+ storageConnectorApi = new StorageConnectorApi();
+ featureGroupEngine = new FeatureGroupEngine();
+ }
+
+ @Override
+ public Object createFeatureGroup() {
+ throw new UnsupportedOperationException("Not supported for Beam");
+ }
+
+ @Override
+ public Object getFeatureGroups(@NonNull String name) throws FeatureStoreException, IOException {
+ throw new UnsupportedOperationException("Not supported for Beam");
+ }
+
+ @Override
+ public Object getOrCreateFeatureGroup(String name, Integer version) throws IOException, FeatureStoreException {
+ throw new UnsupportedOperationException("Not supported for Beam");
+ }
+
+
+ @Override
+ public Object getOrCreateFeatureGroup(String name, Integer version, List primaryKeys,
+ boolean onlineEnabled, String eventTime) throws IOException, FeatureStoreException {
+ throw new UnsupportedOperationException("Not supported for Beam");
+ }
+
+ @Override
+ public Object getOrCreateFeatureGroup(String name, Integer version, List primaryKeys,
+ List partitionKeys, boolean onlineEnabled, String eventTime) throws IOException, FeatureStoreException {
+ throw new UnsupportedOperationException("Not supported for Beam");
+ }
+
+ @Override
+ public Object getOrCreateFeatureGroup(String name, Integer version, String description, List primaryKeys,
+ List partitionKeys, String hudiPrecombineKey, boolean onlineEnabled, TimeTravelFormat timeTravelFormat,
+ StatisticsConfig statisticsConfig, String topicName, String notificationTopicName, String eventTime)
+ throws IOException, FeatureStoreException {
+ throw new UnsupportedOperationException("Not supported for Beam");
+ }
+
+ /**
+ * Get a stream feature group object from the feature store.
+ *
+ *
Getting a stream feature group metadata handle enables to interact with the feature group,
+ * such as read the data or use the `Query`-API to perform joins between feature groups and create feature
+ * views.
+ *
+ *
+ *
+ * @param name the name of the feature group
+ * @return StreamFeatureGroup The stream feature group metadata object.
+ * @throws FeatureStoreException If unable to retrieve feature group from the feature store.
+ * @throws IOException Generic IO exception.
+ */
+ @Override
+ public Object getStreamFeatureGroup(String name) throws FeatureStoreException, IOException {
+ LOGGER.info("VersionWarning: No version provided for getting feature group `" + name + "`, defaulting to `"
+ + DEFAULT_VERSION + "`.");
+ return getStreamFeatureGroup(name, DEFAULT_VERSION);
+ }
+
+ /**
+ * Get a stream feature group object from the feature store.
+ *
+ *
Getting a stream feature group metadata handle enables to interact with the feature group,
+ * such as read the data or use the `Query`-API to perform joins between feature groups and create feature
+ * views.
+ *
+ *