From 99ab7ad28a13698ee1a8ba3c70e8f8ec02024f63 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Sat, 27 Jul 2024 07:38:57 +0200 Subject: [PATCH] chore(python-docs): update python doc assets to recent development state (#194) --- .../dev/reference/client/client/index.html | 2 +- .../dev/reference/client/config/index.html | 22 +++- .../client/credential_provider/index.html | 2 +- .../endpoint/api/data_lake_measure/index.html | 2 +- .../endpoint/api/data_stream/index.html | 2 +- .../reference/endpoint/api/version/index.html | 2 +- .../reference/endpoint/endpoint/index.html | 2 +- .../reference/endpoint/exceptions/index.html | 2 +- .../function_zoo/river_function/index.html | 2 +- .../functions/broker/broker/index.html | 2 +- .../broker/broker_handler/index.html | 2 +- .../functions/broker/consumer/index.html | 2 +- .../broker/kafka/kafka_consumer/index.html | 2 +- .../kafka/kafka_message_fetcher/index.html | 2 +- .../broker/kafka/kafka_publisher/index.html | 2 +- .../broker/nats/nats_consumer/index.html | 2 +- .../broker/nats/nats_publisher/index.html | 2 +- .../broker/output_collector/index.html | 2 +- .../functions/broker/publisher/index.html | 2 +- .../functions/function_handler/index.html | 2 +- .../functions/registration/index.html | 2 +- .../functions/streampipes_function/index.html | 2 +- .../utils/async_iter_handler/index.html | 2 +- .../utils/data_stream_context/index.html | 2 +- .../utils/data_stream_generator/index.html | 4 +- .../utils/function_context/index.html | 2 +- .../dev/reference/model/common/index.html | 2 +- .../container/data_lake_measures/index.html | 2 +- .../model/container/data_streams/index.html | 2 +- .../container/resource_container/index.html | 2 +- .../model/container/versions/index.html | 2 +- .../resource/data_lake_measure/index.html | 2 +- .../model/resource/data_series/index.html | 2 +- .../model/resource/data_stream/index.html | 2 +- .../model/resource/exceptions/index.html | 2 +- .../resource/function_definition/index.html | 2 +- .../model/resource/query_result/index.html | 2 +- .../model/resource/resource/index.html | 2 +- .../model/resource/version/index.html | 2 +- docs-python/dev/search/search_index.json | 2 +- docs-python/dev/sitemap.xml | 96 +++++++++--------- docs-python/dev/sitemap.xml.gz | Bin 712 -> 712 bytes 42 files changed, 108 insertions(+), 90 deletions(-) diff --git a/docs-python/dev/reference/client/client/index.html b/docs-python/dev/reference/client/client/index.html index 422ad2f08..c00b4456e 100644 --- a/docs-python/dev/reference/client/client/index.html +++ b/docs-python/dev/reference/client/client/index.html @@ -2810,7 +2810,7 @@

July 13, 2024 + July 27, 2024 diff --git a/docs-python/dev/reference/client/config/index.html b/docs-python/dev/reference/client/config/index.html index fcdaeb011..900c98abf 100644 --- a/docs-python/dev/reference/client/config/index.html +++ b/docs-python/dev/reference/client/config/index.html @@ -2290,7 +2290,7 @@

Config

- StreamPipesClientConfig(credential_provider, host_address, https_disabled=False, port=80) + StreamPipesClientConfig(credential_provider, host_address, https_disabled=False, port=80, additional_headers=dict()) dataclass @@ -2381,6 +2381,24 @@

+
+

Specifies additional HTTP headers that should be sent with each request, e.g., proxy headers

+
+

+ + TYPE: + Optional[Dict[str, str]] + + + DEFAULT: + dict() + +

+ + @@ -2425,7 +2443,7 @@

July 13, 2024 + July 27, 2024 diff --git a/docs-python/dev/reference/client/credential_provider/index.html b/docs-python/dev/reference/client/credential_provider/index.html index aa1d5af3c..ab645ff77 100644 --- a/docs-python/dev/reference/client/credential_provider/index.html +++ b/docs-python/dev/reference/client/credential_provider/index.html @@ -2803,7 +2803,7 @@

July 13, 2024 + July 27, 2024 diff --git a/docs-python/dev/reference/endpoint/api/data_lake_measure/index.html b/docs-python/dev/reference/endpoint/api/data_lake_measure/index.html index 4a305e5f5..166e22446 100644 --- a/docs-python/dev/reference/endpoint/api/data_lake_measure/index.html +++ b/docs-python/dev/reference/endpoint/api/data_lake_measure/index.html @@ -3089,7 +3089,7 @@

July 13, 2024 + July 27, 2024 diff --git a/docs-python/dev/reference/endpoint/api/data_stream/index.html b/docs-python/dev/reference/endpoint/api/data_stream/index.html index c62bd9078..820773d4e 100644 --- a/docs-python/dev/reference/endpoint/api/data_stream/index.html +++ b/docs-python/dev/reference/endpoint/api/data_stream/index.html @@ -2676,7 +2676,7 @@

July 13, 2024 + July 27, 2024 diff --git a/docs-python/dev/reference/endpoint/api/version/index.html b/docs-python/dev/reference/endpoint/api/version/index.html index 2f85b24ff..22dc323d5 100644 --- a/docs-python/dev/reference/endpoint/api/version/index.html +++ b/docs-python/dev/reference/endpoint/api/version/index.html @@ -2696,7 +2696,7 @@

July 13, 2024 + July 27, 2024 diff --git a/docs-python/dev/reference/endpoint/endpoint/index.html b/docs-python/dev/reference/endpoint/endpoint/index.html index 1de3e5e4e..a2045dbb4 100644 --- a/docs-python/dev/reference/endpoint/endpoint/index.html +++ b/docs-python/dev/reference/endpoint/endpoint/index.html @@ -2983,7 +2983,7 @@

July 13, 2024 + July 27, 2024 diff --git a/docs-python/dev/reference/endpoint/exceptions/index.html b/docs-python/dev/reference/endpoint/exceptions/index.html index c3c0afc5b..b0900092d 100644 --- a/docs-python/dev/reference/endpoint/exceptions/index.html +++ b/docs-python/dev/reference/endpoint/exceptions/index.html @@ -2367,7 +2367,7 @@

Last update: - July 13, 2024 + July 27, 2024 diff --git a/docs-python/dev/reference/function_zoo/river_function/index.html b/docs-python/dev/reference/function_zoo/river_function/index.html index 05c4f67c9..70a1a0b4a 100644 --- a/docs-python/dev/reference/function_zoo/river_function/index.html +++ b/docs-python/dev/reference/function_zoo/river_function/index.html @@ -3291,7 +3291,7 @@

July 13, 2024 + July 27, 2024 diff --git a/docs-python/dev/reference/functions/broker/broker/index.html b/docs-python/dev/reference/functions/broker/broker/index.html index 2f0320bcc..65cb5986b 100644 --- a/docs-python/dev/reference/functions/broker/broker/index.html +++ b/docs-python/dev/reference/functions/broker/broker/index.html @@ -2504,7 +2504,7 @@

July 13, 2024 + July 27, 2024 diff --git a/docs-python/dev/reference/functions/broker/broker_handler/index.html b/docs-python/dev/reference/functions/broker/broker_handler/index.html index 0d7be732f..ae9c98de6 100644 --- a/docs-python/dev/reference/functions/broker/broker_handler/index.html +++ b/docs-python/dev/reference/functions/broker/broker_handler/index.html @@ -2620,7 +2620,7 @@

Last update: - July 13, 2024 + July 27, 2024 diff --git a/docs-python/dev/reference/functions/broker/consumer/index.html b/docs-python/dev/reference/functions/broker/consumer/index.html index 610226ca2..594b4adc1 100644 --- a/docs-python/dev/reference/functions/broker/consumer/index.html +++ b/docs-python/dev/reference/functions/broker/consumer/index.html @@ -2570,7 +2570,7 @@

July 13, 2024 + July 27, 2024 diff --git a/docs-python/dev/reference/functions/broker/kafka/kafka_consumer/index.html b/docs-python/dev/reference/functions/broker/kafka/kafka_consumer/index.html index 09093f2ff..41defe08d 100644 --- a/docs-python/dev/reference/functions/broker/kafka/kafka_consumer/index.html +++ b/docs-python/dev/reference/functions/broker/kafka/kafka_consumer/index.html @@ -2566,7 +2566,7 @@

July 13, 2024 + July 27, 2024 diff --git a/docs-python/dev/reference/functions/broker/kafka/kafka_message_fetcher/index.html b/docs-python/dev/reference/functions/broker/kafka/kafka_message_fetcher/index.html index 4e3e5713d..1108125b1 100644 --- a/docs-python/dev/reference/functions/broker/kafka/kafka_message_fetcher/index.html +++ b/docs-python/dev/reference/functions/broker/kafka/kafka_message_fetcher/index.html @@ -2439,7 +2439,7 @@

July 13, 2024 + July 27, 2024 diff --git a/docs-python/dev/reference/functions/broker/kafka/kafka_publisher/index.html b/docs-python/dev/reference/functions/broker/kafka/kafka_publisher/index.html index 57e009715..8cf7da4b8 100644 --- a/docs-python/dev/reference/functions/broker/kafka/kafka_publisher/index.html +++ b/docs-python/dev/reference/functions/broker/kafka/kafka_publisher/index.html @@ -2593,7 +2593,7 @@

July 13, 2024 + July 27, 2024 diff --git a/docs-python/dev/reference/functions/broker/nats/nats_consumer/index.html b/docs-python/dev/reference/functions/broker/nats/nats_consumer/index.html index ca9e67ffc..98b6abd68 100644 --- a/docs-python/dev/reference/functions/broker/nats/nats_consumer/index.html +++ b/docs-python/dev/reference/functions/broker/nats/nats_consumer/index.html @@ -2566,7 +2566,7 @@

July 13, 2024 + July 27, 2024 diff --git a/docs-python/dev/reference/functions/broker/nats/nats_publisher/index.html b/docs-python/dev/reference/functions/broker/nats/nats_publisher/index.html index 2bf028642..14ffaa5f5 100644 --- a/docs-python/dev/reference/functions/broker/nats/nats_publisher/index.html +++ b/docs-python/dev/reference/functions/broker/nats/nats_publisher/index.html @@ -2593,7 +2593,7 @@

July 13, 2024 + July 27, 2024 diff --git a/docs-python/dev/reference/functions/broker/output_collector/index.html b/docs-python/dev/reference/functions/broker/output_collector/index.html index a6964126a..d2d5670f5 100644 --- a/docs-python/dev/reference/functions/broker/output_collector/index.html +++ b/docs-python/dev/reference/functions/broker/output_collector/index.html @@ -2546,7 +2546,7 @@

July 13, 2024 + July 27, 2024 diff --git a/docs-python/dev/reference/functions/broker/publisher/index.html b/docs-python/dev/reference/functions/broker/publisher/index.html index 906f99f28..dad1bd28f 100644 --- a/docs-python/dev/reference/functions/broker/publisher/index.html +++ b/docs-python/dev/reference/functions/broker/publisher/index.html @@ -2594,7 +2594,7 @@

July 13, 2024 + July 27, 2024 diff --git a/docs-python/dev/reference/functions/function_handler/index.html b/docs-python/dev/reference/functions/function_handler/index.html index 20f610e46..93eb7dba9 100644 --- a/docs-python/dev/reference/functions/function_handler/index.html +++ b/docs-python/dev/reference/functions/function_handler/index.html @@ -2629,7 +2629,7 @@

July 13, 2024 + July 27, 2024 diff --git a/docs-python/dev/reference/functions/registration/index.html b/docs-python/dev/reference/functions/registration/index.html index 9ebffe9ce..7ee451d6e 100644 --- a/docs-python/dev/reference/functions/registration/index.html +++ b/docs-python/dev/reference/functions/registration/index.html @@ -2525,7 +2525,7 @@

July 13, 2024 + July 27, 2024 diff --git a/docs-python/dev/reference/functions/streampipes_function/index.html b/docs-python/dev/reference/functions/streampipes_function/index.html index 04cb4fdec..e3deb4cb8 100644 --- a/docs-python/dev/reference/functions/streampipes_function/index.html +++ b/docs-python/dev/reference/functions/streampipes_function/index.html @@ -2919,7 +2919,7 @@

Last update: - July 13, 2024 + July 27, 2024 diff --git a/docs-python/dev/reference/functions/utils/async_iter_handler/index.html b/docs-python/dev/reference/functions/utils/async_iter_handler/index.html index 4e2321b91..08647df3d 100644 --- a/docs-python/dev/reference/functions/utils/async_iter_handler/index.html +++ b/docs-python/dev/reference/functions/utils/async_iter_handler/index.html @@ -2551,7 +2551,7 @@

July 13, 2024 + July 27, 2024 diff --git a/docs-python/dev/reference/functions/utils/data_stream_context/index.html b/docs-python/dev/reference/functions/utils/data_stream_context/index.html index 953b1ffd9..ebbf832ea 100644 --- a/docs-python/dev/reference/functions/utils/data_stream_context/index.html +++ b/docs-python/dev/reference/functions/utils/data_stream_context/index.html @@ -2488,7 +2488,7 @@

July 13, 2024 + July 27, 2024 diff --git a/docs-python/dev/reference/functions/utils/data_stream_generator/index.html b/docs-python/dev/reference/functions/utils/data_stream_generator/index.html index 2c1673ebe..9839f1c11 100644 --- a/docs-python/dev/reference/functions/utils/data_stream_generator/index.html +++ b/docs-python/dev/reference/functions/utils/data_stream_generator/index.html @@ -2475,7 +2475,7 @@

TYPE: - Optional[str] + str DEFAULT: @@ -2533,7 +2533,7 @@

Last update: - July 13, 2024 + July 27, 2024 diff --git a/docs-python/dev/reference/functions/utils/function_context/index.html b/docs-python/dev/reference/functions/utils/function_context/index.html index fe0a8c802..d86e51658 100644 --- a/docs-python/dev/reference/functions/utils/function_context/index.html +++ b/docs-python/dev/reference/functions/utils/function_context/index.html @@ -2516,7 +2516,7 @@

July 13, 2024 + July 27, 2024 diff --git a/docs-python/dev/reference/model/common/index.html b/docs-python/dev/reference/model/common/index.html index 9b7b007e7..ffaf930c3 100644 --- a/docs-python/dev/reference/model/common/index.html +++ b/docs-python/dev/reference/model/common/index.html @@ -3802,7 +3802,7 @@

Last update: - July 13, 2024 + July 27, 2024 diff --git a/docs-python/dev/reference/model/container/data_lake_measures/index.html b/docs-python/dev/reference/model/container/data_lake_measures/index.html index 20357cd60..375c79b18 100644 --- a/docs-python/dev/reference/model/container/data_lake_measures/index.html +++ b/docs-python/dev/reference/model/container/data_lake_measures/index.html @@ -2701,7 +2701,7 @@

July 13, 2024 + July 27, 2024 diff --git a/docs-python/dev/reference/model/container/data_streams/index.html b/docs-python/dev/reference/model/container/data_streams/index.html index 3a20d15a6..b2be2d7ce 100644 --- a/docs-python/dev/reference/model/container/data_streams/index.html +++ b/docs-python/dev/reference/model/container/data_streams/index.html @@ -2701,7 +2701,7 @@

July 13, 2024 + July 27, 2024 diff --git a/docs-python/dev/reference/model/container/resource_container/index.html b/docs-python/dev/reference/model/container/resource_container/index.html index c762d6621..03d4ac2f1 100644 --- a/docs-python/dev/reference/model/container/resource_container/index.html +++ b/docs-python/dev/reference/model/container/resource_container/index.html @@ -2907,7 +2907,7 @@

July 13, 2024 + July 27, 2024 diff --git a/docs-python/dev/reference/model/container/versions/index.html b/docs-python/dev/reference/model/container/versions/index.html index 8cf5ec437..90bbc4370 100644 --- a/docs-python/dev/reference/model/container/versions/index.html +++ b/docs-python/dev/reference/model/container/versions/index.html @@ -2728,7 +2728,7 @@

July 13, 2024 + July 27, 2024 diff --git a/docs-python/dev/reference/model/resource/data_lake_measure/index.html b/docs-python/dev/reference/model/resource/data_lake_measure/index.html index 888d00789..c05aed346 100644 --- a/docs-python/dev/reference/model/resource/data_lake_measure/index.html +++ b/docs-python/dev/reference/model/resource/data_lake_measure/index.html @@ -2565,7 +2565,7 @@

Last update: - July 13, 2024 + July 27, 2024 diff --git a/docs-python/dev/reference/model/resource/data_series/index.html b/docs-python/dev/reference/model/resource/data_series/index.html index 82d659c68..6e621989b 100644 --- a/docs-python/dev/reference/model/resource/data_series/index.html +++ b/docs-python/dev/reference/model/resource/data_series/index.html @@ -2748,7 +2748,7 @@

July 13, 2024 + July 27, 2024 diff --git a/docs-python/dev/reference/model/resource/data_stream/index.html b/docs-python/dev/reference/model/resource/data_stream/index.html index 1bb550af7..0250eb0c8 100644 --- a/docs-python/dev/reference/model/resource/data_stream/index.html +++ b/docs-python/dev/reference/model/resource/data_stream/index.html @@ -2560,7 +2560,7 @@

July 13, 2024 + July 27, 2024 diff --git a/docs-python/dev/reference/model/resource/exceptions/index.html b/docs-python/dev/reference/model/resource/exceptions/index.html index f8c079f2d..e9083dfb9 100644 --- a/docs-python/dev/reference/model/resource/exceptions/index.html +++ b/docs-python/dev/reference/model/resource/exceptions/index.html @@ -2339,7 +2339,7 @@

Last update: - July 13, 2024 + July 27, 2024 diff --git a/docs-python/dev/reference/model/resource/function_definition/index.html b/docs-python/dev/reference/model/resource/function_definition/index.html index 62199bff7..6b65def5f 100644 --- a/docs-python/dev/reference/model/resource/function_definition/index.html +++ b/docs-python/dev/reference/model/resource/function_definition/index.html @@ -2989,7 +2989,7 @@

Last update: - July 13, 2024 + July 27, 2024 diff --git a/docs-python/dev/reference/model/resource/query_result/index.html b/docs-python/dev/reference/model/resource/query_result/index.html index 496cee0ba..c985fb013 100644 --- a/docs-python/dev/reference/model/resource/query_result/index.html +++ b/docs-python/dev/reference/model/resource/query_result/index.html @@ -2645,7 +2645,7 @@

July 13, 2024 + July 27, 2024 diff --git a/docs-python/dev/reference/model/resource/resource/index.html b/docs-python/dev/reference/model/resource/resource/index.html index b798ac4ce..6cfcf2fbf 100644 --- a/docs-python/dev/reference/model/resource/resource/index.html +++ b/docs-python/dev/reference/model/resource/resource/index.html @@ -2568,7 +2568,7 @@

July 13, 2024 + July 27, 2024 diff --git a/docs-python/dev/reference/model/resource/version/index.html b/docs-python/dev/reference/model/resource/version/index.html index 34c8873e3..51f225bff 100644 --- a/docs-python/dev/reference/model/resource/version/index.html +++ b/docs-python/dev/reference/model/resource/version/index.html @@ -2588,7 +2588,7 @@

Last update: - July 13, 2024 + July 27, 2024 diff --git a/docs-python/dev/search/search_index.json b/docs-python/dev/search/search_index.json index 0afebe514..00ed4e11c 100644 --- a/docs-python/dev/search/search_index.json +++ b/docs-python/dev/search/search_index.json @@ -1 +1 @@ -{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"\ud83c\udfe1 Home","text":"StreamPipes is a self-service (Industrial) IoT toolbox to enable non-technical users to connect, analyze and explore IoT data streams. Apache StreamPipes for Python \ud83d\udc0d

Apache StreamPipes meets Python! We are working highly motivated on a Python library to interact with StreamPipes. In this way, we would like to unite the power of StreamPipes to easily connect to and read from different data sources, especially in the IoT domain, and the amazing universe of data analytics libraries in Python.

"},{"location":"#quickstart","title":"\u26a1\ufe0f Quickstart","text":"

As a quick example, we demonstrate how to set up and configure a StreamPipes client. In addition, we will get the available data lake measures out of StreamPipes.

from streampipes.client import StreamPipesClient\nfrom streampipes.client.config import StreamPipesClientConfig\nfrom streampipes.client.credential_provider import StreamPipesApiKeyCredentials\n\nconfig = StreamPipesClientConfig(\n    credential_provider = StreamPipesApiKeyCredentials(\n        username = \"test@streampipes.apache.org\",\n        api_key = \"DEMO-KEY\",\n        ),\n    host_address = \"localhost\",\n    https_disabled = True,\n    port = 80\n)\n\nclient = StreamPipesClient(client_config=config)\n\n# get all available datat lake measures\nmeasures = client.dataLakeMeasureApi.all()\n\n# get amount of retrieved measures\nlen(measures)\n
Output:
1\n

# inspect the data lake measures as pandas dataframe\nmeasures.to_pandas()\n

Output:

measure_name timestamp_field ... pipeline_is_running num_event_properties\n0 test s0::timestamp ... False 2\n[1 rows x 6 columns]\n
Alternatively, you can provide your credentials via environment variables. Simply define your credential provider as follows:

from streampipes.client.credential_provider import StreamPipesApiKeyCredentials\n\nStreamPipesApiKeyCredentials()\n
This requires to set the following environment variables: SP_API_KEY and SP_USERNAME

username is always the username that is used to log in into StreamPipes.

How to get your StreamPipes API key

The api_key can be generated within the UI as demonstrated below:

"},{"location":"getting-started/developing/","title":"Developing & Contributing","text":""},{"location":"getting-started/developing/#development-guide","title":"\ud83d\udcd6 Development Guide","text":"

This document describes how to easily set up your local dev environment to work on StreamPipes Python \ud83d\udc0d.

"},{"location":"getting-started/developing/#first-steps","title":"\ud83d\ude80 First Steps","text":"

1) Set up your Python environment

Create a virtual Python environment using a tool of your choice. To manage dependencies, we use Poetry, so please install poetry in your local environment, e.g. via

pip install poetry\n

Once poetry is installed you can simply finalize your Python environment by running:

poetry install --with dev,stubs  # install everything that is required for the development\npoetry install --with docs  # install everything to work with the documentation\npoetry install --with dev,stubs,docs  # install all optional dependencies related to development\n

2) Install pre-commit hook

The pre-commit hook is run before every commit and takes care about code style, linting, type hints, import sorting, etc. It will stop your commit in case the changes do not apply the expected format. Always check to have the recent version of the pre-commit hook installed otherwise the CI build might fail. If you are interested, you can have a deeper look on the underlying library: pre-commit.

pre-commit install\n
The definition of the pre-commit hook can be found in .pre-commit-config.yaml.

"},{"location":"getting-started/developing/#conventions","title":"\ud83d\udc4f Conventions","text":"

Below we list some conventions that we have agreed on for creating StreamPipes Python. Please comply to them when you plan to contribute to this project. If you have any other suggestions or would like to discuss them, we would be happy to hear from you on our mailing list dev@streampipes.apache.org or in our discussions on GitHub.

1) Use numpy style for Python docstrings \ud83d\udcc4 Please stick to the numpy style when writing docstrings, as we require this for generating our documentation.

2) Provide tests \u2705 We are aiming for broad test coverage for the Python package and have therefore set a requirement of at least 90% unit test coverage. Therefore, please remember to write (unit) tests already during development. If you have problems with writing tests, don't hesitate to ask us for help directly in the PR or even before that via our mailing list (see above).

3) Build a similar API as the Java client provides \ud83d\udd04 Whenever possible, please try to develop the API of the Python library the same as the Java client or Java SDK. By doing so, we would like to provide a consistent developer experience and the basis for automated testing in the future.

"},{"location":"getting-started/developing/#dependency-management","title":"\ud83d\udce6 Dependency Management","text":"

In case you want to add a new dependency to StreamPipes you can use the following command:

poetry add <dep-name>\n

If the dependency is only required for development purpose or the documentation, please stick to one the following:

poetry add <dep-name> --group dev\npoetry add <dep-name> --group stubs\npoetry add <dep-name> --group docs\n

In case you want to regenerate the poetry lock file, e.g., in case you manually updated the pyproject.toml, the following command should be used:

poetry lock --no-update\n

After that, you should install the current version of the poetry lock file to keep your local environment consistent (see command above.)

"},{"location":"getting-started/developing/#documentation","title":"\ud83d\udcdaDocumentation","text":"

To build our documentation, we use Materials for MkDocs. All files can be found within the docs directory. To pre-view your local version of the documentation, you can use the following command:

make livedoc\n

"},{"location":"getting-started/developing/#roadmap","title":"\ud83d\ude80 Roadmap","text":"

Broadly speaking, we plan to expand or add new aspects/functionality to the library where we are focusing on the following:

  • increase coverage of StreamPipes API \ud83d\udd17
  • build a comprehensive function zoo \ud83d\udc18
  • support more messaging broker \ud83d\udcec
  • possibility to build pipeline elements \ud83d\udd27

In case you want to have a more detailed look on what we are currently planning, have a look at our open issues(more short-term driven).

Of course, contributions are always highly appreciated \ud83d\udd2e

Stay tuned!

"},{"location":"getting-started/developing/#contributing","title":"\ud83d\udc68\u200d\ud83d\udcbb Contributing","text":"

Before opening a pull request, review the Get Involved page. It lists information that is required for contributing to StreamPipes.

When you contribute code, you affirm that the contribution is your original work and that you license the work to the project under the project's open source license. Whether or not you state this explicitly, by submitting any copyrighted material via pull request, email, or other means you agree to license the material under the project's open source license and warrant that you have the legal authority to do so.

"},{"location":"getting-started/first-steps/","title":"First Steps","text":""},{"location":"getting-started/first-steps/#installation","title":"\ud83d\udcda Installation","text":"

The StreamPipes Python library is meant to work with Python 3.8 and above. Installation can be done via pip: You can install the latest development version from GitHub, as so:

pip install streampipes\n\n# if you want to have the current development state you can also execute\npip install git+https://github.com/apache/streampipes.git#subdirectory=streampipes-client-python\n# the corresponding documentation can be found here: https://streampipes.apache.org/docs/docs/python/dev/\n
"},{"location":"getting-started/first-steps/#setting-up-streampipes","title":"\u2b06\ufe0f Setting up StreamPipes","text":"

When working with the StreamPipes Python library it is inevitable to have a running StreamPipes instance to connect and interact with. In case you don't have a running instance at hand, you can easily set up one on your local machine. Hereby you need to consider that StreamPipes supports different message broker (e.g., Kafka, NATS). We will demonstrate below how you can easily set up StreamPipes for both supported message brokers.

"},{"location":"getting-started/first-steps/#start-streampipes-via-docker-compose","title":"\ud83d\udc33 Start StreamPipes via Docker Compose","text":"

The easiest and therefore recommend way to get StreamPipes started is by using docker compose. Therefore, you need Docker running. You can check if Docker is ready on your machine by executing.

docker ps\n
If this results in an output similar to the following, Docker is ready to continue.
CONTAINER ID   IMAGE     COMMAND   CREATED   STATUS    PORTS     NAMES\n...            ...       ...       ...       ...       ...       ...\n
Otherwise, you need to start docker first. Please read the full guide on how to start StreamPipes with docker compose here.

"},{"location":"getting-started/first-steps/#setup-streampipes-with-nats-as-message-broker","title":"Setup StreamPipes with NATS as message broker","text":"

The following shows how you can set up a StreamPipes instance that uses NATS as messaging layer. So in this scenario, we will go with docker-compose.nats.yml. Thereby, when running locally, we need to add the following port mapping entry to services.nats.ports:

- 4222:4222\n

After this modification is applied, StreamPipes can simply be started with this command:

docker-compose -f docker-compose.nats.yml up -d\n

Once all services are started, you can access StreamPipes via http://localhost.

"},{"location":"getting-started/first-steps/#setup-streampipes-with-kafka-as-message-broker","title":"Setup StreamPipes with Kafka as message broker","text":"

Alternatively, you can use docker-compose.yml to start StreamPipes with Kafka as messaging layer. When running locally we have to modify services.kafka.environment and add the ports to services.kafka.ports:

environment:\nKAFKA_LISTENER_SECURITY_PROTOCOL_MAP: PLAINTEXT:PLAINTEXT,OUTSIDE:PLAINTEXT\nKAFKA_ADVERTISED_LISTENERS: PLAINTEXT://:9092,OUTSIDE://localhost:9094\nKAFKA_LISTENERS: PLAINTEXT://:9092,OUTSIDE://:9094\n...\nports:\n- 9094:9094\n
Then, you need to execute the following command:
docker-compose -f docker-compose.yml up -d\n

Once all services are started, you can access StreamPipes via http://localhost.

In case you want to have more control over your StreamPipes setup, you might take a look at our deployment CLI.

Have fun discovering StreamPipes and our Python library \ud83d\ude80

"},{"location":"getting-started/quickstart/","title":"Quickstart","text":""},{"location":"getting-started/quickstart/#quickstart","title":"\u26a1\ufe0f Quickstart","text":"

As a quick example, we demonstrate how to set up and configure a StreamPipes client. In addition, we will get the available data lake measures out of StreamPipes.

from streampipes.client import StreamPipesClient\nfrom streampipes.client.config import StreamPipesClientConfig\nfrom streampipes.client.credential_provider import StreamPipesApiKeyCredentials\n\nconfig = StreamPipesClientConfig(\n    credential_provider = StreamPipesApiKeyCredentials(\n        username = \"test@streampipes.apache.org\",\n        api_key = \"DEMO-KEY\",\n        ),\n    host_address = \"localhost\",\n    https_disabled = True,\n    port = 80\n)\n\nclient = StreamPipesClient(client_config=config)\n\n# get all available datat lake measures\nmeasures = client.dataLakeMeasureApi.all()\n\n# get amount of retrieved measures\nlen(measures)\n
Output:
1\n

# inspect the data lake measures as pandas dataframe\nmeasures.to_pandas()\n

Output:

measure_name timestamp_field ... pipeline_is_running num_event_properties\n0 test s0::timestamp ... False 2\n[1 rows x 6 columns]\n
Alternatively, you can provide your credentials via environment variables. Simply define your credential provider as follows:

from streampipes.client.credential_provider import StreamPipesApiKeyCredentials\n\nStreamPipesApiKeyCredentials()\n
This requires to set the following environment variables: SP_API_KEY and SP_USERNAME

username is always the username that is used to log in into StreamPipes. The api_key can be generated within the UI as demonstrated below:

"},{"location":"reference/client/client/","title":"Client","text":"

Implementation of the StreamPipes client. The client is designed as the central point of interaction with the StreamPipes API and provides all functionalities to communicate with the API.

"},{"location":"reference/client/client/#streampipes.client.client.StreamPipesClient","title":"StreamPipesClient(client_config, logging_level=logging.INFO)","text":"

The client to connect to StreamPipes.

This is the central point of contact with StreamPipes and provides all the functionalities to interact with it.

The client provides so-called \"endpoints\" each of which refers to an endpoint of the StreamPipes API, e.g. .dataLakeMeasureApi. An endpoint provides the actual methods to interact with StreamPipes API.

PARAMETER DESCRIPTION client_config

Configures the client to connect properly to the StreamPipes instance.

TYPE: StreamPipesClientConfig

logging_level

Influences the log messages emitted by the StreamPipesClient

TYPE: Optional[int] DEFAULT: INFO

ATTRIBUTE DESCRIPTION dataLakeMeasureApi

Instance of the data lake measure endpoint

TYPE: DataLakeMeasureEndpoint

dataStreamApi

Instance of the data stream endpoint

TYPE: DataStreamEndpoint

RAISES DESCRIPTION AttributeError:

In case an invalid configuration of the StreamPipesClientConfig is passed

Examples:

from streampipes.client import StreamPipesClient\nfrom streampipes.client.config import StreamPipesClientConfig\nfrom streampipes.client.credential_provider import StreamPipesApiKeyCredentials\n
client_config = StreamPipesClientConfig(\n    credential_provider=StreamPipesApiKeyCredentials(\n         username=\"test-user\",\n         api_key=\"api-key\"\n     ),\n     host_address=\"localhost\",\n     https_disabled=True\n)\n

The following way of instantiating a client instance is intended to be consistent with the StreamPipes Java client.

client = StreamPipesClient.create(client_config=client_config)\n

If you prefer a more pythonic way, you can simply write:

client = StreamPipesClient(client_config=client_config)\n

To interact with an endpoint:

data_lake_measures = client.dataLakeMeasureApi.all()\n

To inspect returned data as a pandas dataframe:

data_lake_measures.to_pandas()\n#\n#     measure_name timestamp_field  ... pipeline_is_running num_event_properties\n# 0           test   s0::timestamp  ...               False                    2\n# [1 rows x 6 columns]\n

"},{"location":"reference/client/client/#streampipes.client.client.StreamPipesClient.base_api_path","title":"base_api_path: str property","text":"

Constructs the basic API URL from the given client_config.

RETURNS DESCRIPTION base_api_path

basic API path of the connected StreamPipes instance

TYPE: str

"},{"location":"reference/client/client/#streampipes.client.client.StreamPipesClient.http_headers","title":"http_headers: Dict[str, str] property","text":"

Returns the HTTP headers used for all requests.

The HTTP headers are composed of the authentication headers supplied by the credential provider and additional required headers (currently this is only the application header).

RETURNS DESCRIPTION http_headers

Header information for HTTP requests as string key-value pairs.

TYPE: Dict[str, str]

"},{"location":"reference/client/client/#streampipes.client.client.StreamPipesClient.create","title":"create(client_config, logging_level=logging.INFO) classmethod","text":"

Returns an instance of the StreamPipesPythonClient.

Provides consistency to the StreamPipes Java client.

PARAMETER DESCRIPTION client_config

Configures the client to connect properly to the StreamPipes instance.

TYPE: StreamPipesClientConfig

logging_level

Influences the log messages emitted by the StreamPipesClient.

TYPE: int DEFAULT: INFO

RETURNS DESCRIPTION StreamPipesClient"},{"location":"reference/client/client/#streampipes.client.client.StreamPipesClient.describe","title":"describe()","text":"

Prints a short description of the connected StreamPipes instance and the available resources to the console.

RETURNS DESCRIPTION None

Examples:

client.describe()\n
Output:
Hi there!\nYou are connected to a StreamPipes instance running at http://localhost:80.\nThe following StreamPipes resources are available with this client:\n6x DataStreams\n1x DataLakeMeasures\n

"},{"location":"reference/client/config/","title":"Config","text":"

Configuration class for the StreamPipes client.

"},{"location":"reference/client/config/#streampipes.client.config.StreamPipesClientConfig","title":"StreamPipesClientConfig(credential_provider, host_address, https_disabled=False, port=80) dataclass","text":"

Configure the StreamPipes client in accordance to the actual StreamPipes instance to connect to.

An instance is provided to the StreamPipesClient to configure it properly.

PARAMETER DESCRIPTION credential_provider

Provides the credentials to authenticate with the StreamPipes API.

TYPE: CredentialProvider

host_address

Host address of the StreamPipes instance to connect to. Should be provided without the protocol/scheme, e.g. as localhost or streampipes.xyz.

TYPE: str

https_disabled

Determines whether https is used to connect to StreamPipes.

TYPE: Optional[bool] DEFAULT: False

port

Specifies the port under which the StreamPipes API is available, e.g., 80 (with http) or 443 (with https)

TYPE: Optional[int] DEFAULT: 80

Examples:

see StreamPipesClient

"},{"location":"reference/client/credential_provider/","title":"Credential provider","text":"

Implementation of credential providers. A credential provider supplies the specified sort of credentials in the appropriate HTTP header format. The headers are then used by the client to connect to StreamPipes.

"},{"location":"reference/client/credential_provider/#streampipes.client.credential_provider.CredentialProvider","title":"CredentialProvider","text":"

Bases: ABC

Abstract implementation of a credential provider. Must be inherited by all credential providers.

"},{"location":"reference/client/credential_provider/#streampipes.client.credential_provider.CredentialProvider.make_headers","title":"make_headers(http_headers=None)","text":"

Creates the HTTP headers for the specific credential provider.

Concrete authentication headers must be defined in the implementation of a credential provider.

PARAMETER DESCRIPTION http_headers

Additional HTTP headers the generated headers are extended by.

TYPE: Optional[Dict[str, str]] DEFAULT: None

RETURNS DESCRIPTION https_headers

Dictionary with header information as string key-value pairs. Contains all pairs given as parameter plus the header pairs for authentication determined by the credential provider.

TYPE: Dict[str, str]

"},{"location":"reference/client/credential_provider/#streampipes.client.credential_provider.StreamPipesApiKeyCredentials","title":"StreamPipesApiKeyCredentials(username=None, api_key=None)","text":"

Bases: CredentialProvider

A credential provider that allows authentication via a StreamPipes API Token.

The required token can be generated via the StreamPipes UI (see the description on our start-page.

Both parameters can either be passed as arguments or remain unset. If they are not passed, they are retrieved from environment variables:

  • SP_USERNAME is expected to contain the username
  • SP_API_KEY is expected to contain the API key
PARAMETER DESCRIPTION username

The username to which the API token is granted, e.g., demo-user@streampipes.apche.org. If not passed, the username is retrieved from environment variable SP_USERNAME.

TYPE: Optional[str] DEFAULT: None

api_key

The StreamPipes API key as it is displayed in the UI. If not passed, the api key is retrieved from environment variable SP_API_KEY

TYPE: Optional[str] DEFAULT: None

Examples:

see StreamPipesClient

"},{"location":"reference/client/credential_provider/#streampipes.client.credential_provider.StreamPipesApiKeyCredentials.from_env","title":"from_env(username_env, api_key_env) classmethod","text":"

DEPRECATED - use the class constructor instead

Returns an API key provider parameterized via environment variables.

PARAMETER DESCRIPTION username_env

Name of the environment variable that contains the username

TYPE: str

api_key_env

Name of the environment variable that contains the API key

TYPE: str

RETURNS DESCRIPTION StreamPipesApiKeyCredentials RAISES DESCRIPTION KeyError

If one of the environment variables is not defined

"},{"location":"reference/client/credential_provider/#streampipes.client.credential_provider.StreamPipesApiKeyCredentials.make_headers","title":"make_headers(http_headers=None)","text":"

Creates the HTTP headers for the specific credential provider.

Concrete authentication headers must be defined in the implementation of a credential provider.

PARAMETER DESCRIPTION http_headers

Additional HTTP headers the generated headers are extended by.

TYPE: Optional[Dict[str, str]] DEFAULT: None

RETURNS DESCRIPTION https_headers

Dictionary with header information as string key-value pairs. Contains all pairs given as parameter plus the header pairs for authentication determined by the credential provider.

TYPE: Dict[str, str]

"},{"location":"reference/endpoint/endpoint/","title":"Endpoint","text":"

General implementation for an endpoint. Provided classes and assets are aimed to be used for developing endpoints. An endpoint provides all options to communicate with a dedicated part of StreamPipes in a handy way.

"},{"location":"reference/endpoint/endpoint/#streampipes.endpoint.endpoint.APIEndpoint","title":"APIEndpoint(parent_client)","text":"

Bases: Endpoint

Abstract implementation of an API endpoint.

Serves as template for all endpoints of the StreamPipes API. By design, endpoints are only instantiated within the __init__ method of the StreamPipesClient.

"},{"location":"reference/endpoint/endpoint/#streampipes.endpoint.endpoint.APIEndpoint.all","title":"all()","text":"

Get all resources of this endpoint provided by the StreamPipes API.

Results are provided as an instance of a ResourceContainer that allows to handle the returned resources in a comfortable and pythonic way.

RETURNS DESCRIPTION container

Container element that bundles the returned resources

TYPE: ResourceContainer

"},{"location":"reference/endpoint/endpoint/#streampipes.endpoint.endpoint.APIEndpoint.build_url","title":"build_url()","text":"

Builds the endpoint's URL of the API path.

RETURNS DESCRIPTION url

The URL of the endpoint

TYPE: str

"},{"location":"reference/endpoint/endpoint/#streampipes.endpoint.endpoint.APIEndpoint.get","title":"get(identifier, **kwargs)","text":"

Queries the specified resource from the API endpoint.

PARAMETER DESCRIPTION identifier

The identifier of the resource to be queried.

TYPE: str

RETURNS DESCRIPTION resource

The specified resource as an instance of the corresponding model class.

TYPE: Resource

"},{"location":"reference/endpoint/endpoint/#streampipes.endpoint.endpoint.APIEndpoint.post","title":"post(resource)","text":"

Allows to post a resource to the StreamPipes API.

PARAMETER DESCRIPTION resource

The resource to be posted.

TYPE: Resource

RETURNS DESCRIPTION None"},{"location":"reference/endpoint/endpoint/#streampipes.endpoint.endpoint.Endpoint","title":"Endpoint(parent_client)","text":"

Bases: ABC

Abstract implementation of a StreamPipes endpoint.

Serves as template for all endpoints used for interaction with a StreamPipes instance. By design, endpoints are only instantiated within the __init__ method of the StreamPipesClient.

PARAMETER DESCRIPTION parent_client

This parameter expects the instance of StreamPipesClient the endpoint is attached to.

TYPE: StreamPipesClient

"},{"location":"reference/endpoint/endpoint/#streampipes.endpoint.endpoint.MessagingEndpoint","title":"MessagingEndpoint(parent_client)","text":"

Bases: Endpoint

Abstract implementation of a StreamPipes messaging endpoint.

Serves as template for all endpoints used for interacting with the StreamPipes messaging layer directly. Therefore, they need to provide the functionality to talk with the broker system running in StreamPipes. By design, endpoints are only instantiated within the __init__ method of the StreamPipesClient.

"},{"location":"reference/endpoint/endpoint/#streampipes.endpoint.endpoint.MessagingEndpoint.broker","title":"broker: Broker property writable","text":"

Defines the broker instance that is used to connect to StreamPipes' messaging layer.

This instance enables the client to authenticate to the broker used in the target StreamPipes instance, to consume messages from and to write messages to the broker.

RAISES DESCRIPTION MessagingEndpointNotConfiguredError

If the endpoint is used before the broker instance is set via configure()

RETURNS DESCRIPTION broker

The broker instance to be used to communicate with StreamPipes' messaging layer.

TYPE: Broker

"},{"location":"reference/endpoint/endpoint/#streampipes.endpoint.endpoint.MessagingEndpoint.configure","title":"configure(broker)","text":"

Configures the message endpoint by setting the broker instance to be used.

This configuration step is required before the endpoint can be actually used. The based broker instance is passed to an internal property.

PARAMETER DESCRIPTION broker

Broker instance that should be used for this endpoint

TYPE: Broker

RETURNS DESCRIPTION None"},{"location":"reference/endpoint/exceptions/","title":"Exceptions","text":"

Custom exceptions dedicated to the endpoints module.

"},{"location":"reference/endpoint/exceptions/#streampipes.endpoint.exceptions.MessagingEndpointNotConfiguredError","title":"MessagingEndpointNotConfiguredError(endpoint_name)","text":"

Bases: Exception

Exception that indicates that an instance of a messaging endpoint has not been configured.

This error occurs when an instance of a messaging endpoint is used before the broker instance to be used is configured by passing it to the configure() method.

PARAMETER DESCRIPTION endpoint_name

The name of the endpoint that caused the error

TYPE: str

"},{"location":"reference/endpoint/api/data_lake_measure/","title":"Data lake measure","text":"

Specific implementation of the StreamPipes API's data lake measure endpoints. This endpoint allows to consume data stored in StreamPipes' data lake.

"},{"location":"reference/endpoint/api/data_lake_measure/#streampipes.endpoint.api.data_lake_measure.DataLakeMeasureEndpoint","title":"DataLakeMeasureEndpoint(parent_client)","text":"

Bases: APIEndpoint

Implementation of the DataLakeMeasure endpoint.

This endpoint provides an interface to all data stored in the StreamPipes data lake.

Consequently, it allows querying metadata about available data sets (see all() method). The metadata is returned as an instance of DataLakeMeasures.

In addition, the endpoint provides direct access to the data stored in the data laka by querying a specific data lake measure using the get() method.

Examples:

from streampipes.client import StreamPipesClient\nfrom streampipes.client.config import StreamPipesClientConfig\nfrom streampipes.client.credential_provider import StreamPipesApiKeyCredentials\n
client_config = StreamPipesClientConfig(\n    credential_provider=StreamPipesApiKeyCredentials(username=\"test-user\", api_key=\"api-key\"),\n    host_address=\"localhost\",\n    port=8082,\n    https_disabled=True\n)\nclient = StreamPipesClient.create(client_config=client_config)\n

# get all existing data lake measures from StreamPipes\ndata_lake_measures = client.dataLakeMeasureApi.all()\n\n# let's take a look how many we got\nlen(data_lake_measures)\n
5\n

# Retrieve a specific data lake measure as a pandas DataFrame\nflow_rate_pd = client.dataLakeMeasureApi.get(identifier=\"flow-rate\").to_pandas()\nflow_rate_pd\n
                         time    density  mass_flow    sensorId  sensor_fault_flags  temperature  volume_flow\n0    2023-02-24T16:19:41.472Z  50.872730   3.309556  flowrate02               False    44.448483     5.793138\n1    2023-02-24T16:19:41.482Z  47.186588   5.608580  flowrate02               False    40.322033     0.058015\n2    2023-02-24T16:19:41.493Z  46.735321   7.692881  flowrate02               False    49.239639    10.283526\n3    2023-02-24T16:19:41.503Z  40.169796   3.632898  flowrate02               False    49.933754     6.893441\n4    2023-02-24T16:19:41.513Z  49.635124   0.711260  flowrate02               False    50.106617     2.999871\n..                        ...        ...        ...         ...                 ...          ...          ...\n995  2023-02-24T16:19:52.927Z  50.057495   1.740114  flowrate02               False    46.558231     1.818237\n996  2023-02-24T16:19:52.94Z   41.038895   7.211723  flowrate02               False    48.048622     2.127493\n997  2023-02-24T16:19:52.952Z  45.837013   7.770180  flowrate02               False    48.188026     7.892062\n998  2023-02-24T16:19:52.965Z  43.389065   4.458602  flowrate02               False    48.280899     5.733892\n999  2023-02-24T16:19:52.977Z  44.056030   2.592060  flowrate02               False    47.505951     4.260697\n

As you can see, the returned amount of rows per default is 1000. We can modify this behavior by passing the limit paramter.

flow_rate_pd = client.dataLakeMeasureApi.get(identifier=\"flow-rate\", limit=10).to_pandas()\nlen(flow_rate_pd)\n
10\n

If we are only interested in the values for density, columns allows us to select the columns to be returned:

flow_rate_pd = client.dataLakeMeasureApi.get(identifier=\"flow-rate\", columns='density', limit=3).to_pandas()\nflow_rate_pd\n
                       time    density\n0  2023-02-24T16:19:41.472Z  50.872730\n1  2023-02-24T16:19:41.482Z  47.186588\n2  2023-02-24T16:19:41.493Z  46.735321\n

This is only a subset of the available query parameters, find them at MeasurementGetQueryConfig.

"},{"location":"reference/endpoint/api/data_lake_measure/#streampipes.endpoint.api.data_lake_measure.DataLakeMeasureEndpoint.all","title":"all()","text":"

Get all resources of this endpoint provided by the StreamPipes API.

Results are provided as an instance of a ResourceContainer that allows to handle the returned resources in a comfortable and pythonic way.

RETURNS DESCRIPTION container

Container element that bundles the returned resources

TYPE: ResourceContainer

"},{"location":"reference/endpoint/api/data_lake_measure/#streampipes.endpoint.api.data_lake_measure.DataLakeMeasureEndpoint.build_url","title":"build_url()","text":"

Builds the endpoint's URL of the API path.

RETURNS DESCRIPTION url

The URL of the endpoint

TYPE: str

"},{"location":"reference/endpoint/api/data_lake_measure/#streampipes.endpoint.api.data_lake_measure.DataLakeMeasureEndpoint.get","title":"get(identifier, **kwargs)","text":"

Queries the specified data lake measure from the API.

By default, the maximum number of returned records is 1000. This behaviour can be influenced by passing the parameter limit with a different value (see MeasurementGetQueryConfig).

PARAMETER DESCRIPTION identifier

The identifier of the data lake measure to be queried.

TYPE: str

**kwargs

Keyword arguments can be used to provide additional query parameters. The available query parameters are defined by the MeasurementGetQueryConfig.

TYPE: Optional[Dict[str, Any]] DEFAULT: {}

RETURNS DESCRIPTION measurement

The specified data lake measure

TYPE: QueryResult

Examples:

see directly at DataLakeMeasureEndpoint.

"},{"location":"reference/endpoint/api/data_lake_measure/#streampipes.endpoint.api.data_lake_measure.DataLakeMeasureEndpoint.post","title":"post(resource)","text":"

Allows to post a resource to the StreamPipes API.

PARAMETER DESCRIPTION resource

The resource to be posted.

TYPE: Resource

RETURNS DESCRIPTION None"},{"location":"reference/endpoint/api/data_lake_measure/#streampipes.endpoint.api.data_lake_measure.MeasurementGetQueryConfig","title":"MeasurementGetQueryConfig","text":"

Bases: BaseModel

Config class describing the parameters of the get() method for measurements.

This config class is used to validate the provided query parameters for the GET endpoint of measurements. Additionally, it takes care of the conversion to a proper HTTP query string. Thereby, parameter names are adapted to the naming of the StreamPipes API, for which Pydantic aliases are used.

ATTRIBUTE DESCRIPTION columns

A comma separated list of column names (e.g., time,value) If provided, the returned data only consists of the given columns. Please be aware that the column time as an index is always included.

TYPE: Optional[List[str]]

end_date

Limits the queried data to only include data that is older than the specified time. In other words, any data that occurred after the end_date will not be included in the query results.

TYPE: Optional[datetime]

limit

Amount of records returned at maximum (default: 1000) This needs to be at least 1

TYPE: Optional[int]

offset

Offset to be applied to returned data This needs to be at least 0

TYPE: Optional[int]

order

Ordering of query results Allowed values: ASC and DESC (default: ASC)

TYPE: Optional[str]

page_no

Page number used for paging operation This needs to be at least 1

TYPE: Optional[int]

start_date

Limits the queried data to only include data that is newer than the specified time. In other words, any data that occurred before the start_date will not be included in the query results.

TYPE: Optional[datetime]

"},{"location":"reference/endpoint/api/data_lake_measure/#streampipes.endpoint.api.data_lake_measure.MeasurementGetQueryConfig.Config","title":"Config","text":"

Pydantic Config class

"},{"location":"reference/endpoint/api/data_lake_measure/#streampipes.endpoint.api.data_lake_measure.MeasurementGetQueryConfig.build_query_string","title":"build_query_string()","text":"

Builds a HTTP query string for the config.

This method returns an HTTP query string for the invoking config. It follows the following structure ?param1=value1&param2=value2.... This query string is not an entire URL, instead it needs to appended to an API path.

RETURNS DESCRIPTION query_param_string

HTTP query params string (?param1=value1&param2=value2...)

TYPE: str

"},{"location":"reference/endpoint/api/data_lake_measure/#streampipes.endpoint.api.data_lake_measure.StreamPipesQueryValidationError","title":"StreamPipesQueryValidationError","text":"

Bases: Exception

A custom exception to be raised when the validation of query parameter causes an error.

"},{"location":"reference/endpoint/api/data_stream/","title":"Data stream","text":"

Specific implementation of the StreamPipes API's data stream endpoints.

"},{"location":"reference/endpoint/api/data_stream/#streampipes.endpoint.api.data_stream.DataStreamEndpoint","title":"DataStreamEndpoint(parent_client)","text":"

Bases: APIEndpoint

Implementation of the DataStream endpoint.

Consequently, it allows querying metadata about available data streams (see all() method). The metadata is returned as an instance of DataStreams.

Examples:

from streampipes.client import StreamPipesClient\nfrom streampipes.client.config import StreamPipesClientConfig\nfrom streampipes.client.credential_provider import StreamPipesApiKeyCredentials\n\nclient_config = StreamPipesClientConfig(\n    credential_provider=StreamPipesApiKeyCredentials(username=\"test-user\", api_key=\"api-key\"),\n    host_address=\"localhost\",\n    port=8082,\n    https_disabled=True\n)\nclient = StreamPipesClient.create(client_config=client_config)\n

# let's get all existing data streams in StreamPipes\ndata_streams = client.dataStreamApi.all()\nlen(data_streams)\n
2\n

"},{"location":"reference/endpoint/api/data_stream/#streampipes.endpoint.api.data_stream.DataStreamEndpoint.all","title":"all()","text":"

Get all resources of this endpoint provided by the StreamPipes API.

Results are provided as an instance of a ResourceContainer that allows to handle the returned resources in a comfortable and pythonic way.

RETURNS DESCRIPTION container

Container element that bundles the returned resources

TYPE: ResourceContainer

"},{"location":"reference/endpoint/api/data_stream/#streampipes.endpoint.api.data_stream.DataStreamEndpoint.build_url","title":"build_url()","text":"

Builds the endpoint's URL of the API path.

RETURNS DESCRIPTION url

The URL of the endpoint

TYPE: str

"},{"location":"reference/endpoint/api/data_stream/#streampipes.endpoint.api.data_stream.DataStreamEndpoint.get","title":"get(identifier, **kwargs)","text":"

Queries the specified resource from the API endpoint.

PARAMETER DESCRIPTION identifier

The identifier of the resource to be queried.

TYPE: str

RETURNS DESCRIPTION resource

The specified resource as an instance of the corresponding model class.

TYPE: Resource

"},{"location":"reference/endpoint/api/data_stream/#streampipes.endpoint.api.data_stream.DataStreamEndpoint.post","title":"post(resource)","text":"

Allows to post a resource to the StreamPipes API.

PARAMETER DESCRIPTION resource

The resource to be posted.

TYPE: Resource

RETURNS DESCRIPTION None"},{"location":"reference/endpoint/api/version/","title":"Version","text":"

Specific implementation of the StreamPipes API's version endpoint.

"},{"location":"reference/endpoint/api/version/#streampipes.endpoint.api.version.VersionEndpoint","title":"VersionEndpoint(parent_client)","text":"

Bases: APIEndpoint

Implementation of the Versions endpoint.

This endpoint provides metadata about the StreamPipes version of the connected instance. It only allows to apply the get() method with an empty string as identifier.

PARAMETER DESCRIPTION parent_client

The instance of StreamPipesClient the endpoint is attached to.

TYPE: StreamPipesClient

Examples:

>>> from streampipes.client import StreamPipesClient\n>>> from streampipes.client.config import StreamPipesClientConfig\n>>> from streampipes.client.credential_provider import StreamPipesApiKeyCredentials\n
>>> client_config = StreamPipesClientConfig(\n...     credential_provider=StreamPipesApiKeyCredentials(username=\"test-user\", api_key=\"api-key\"),\n...     host_address=\"localhost\",\n...     port=8082,\n...     https_disabled=True\n... )\n
>>> client = StreamPipesClient.create(client_config=client_config)\n
>>> client.versionApi.get(identifier=\"\").to_dict(use_source_names=False)\n{'backend_version': '0.92.0-SNAPSHOT'}\n
"},{"location":"reference/endpoint/api/version/#streampipes.endpoint.api.version.VersionEndpoint.all","title":"all()","text":"

Usually, this method returns information about all resources provided by this endpoint. However, this endpoint does not support this kind of operation.

RAISES DESCRIPTION NotImplementedError

This endpoint does not return multiple entries, therefore this method is not available.

"},{"location":"reference/endpoint/api/version/#streampipes.endpoint.api.version.VersionEndpoint.build_url","title":"build_url()","text":"

Builds the endpoint's URL of the API path.

RETURNS DESCRIPTION url

The URL of the endpoint

TYPE: str

"},{"location":"reference/endpoint/api/version/#streampipes.endpoint.api.version.VersionEndpoint.get","title":"get(identifier, **kwargs)","text":"

Queries the resource from the API endpoint.

For this endpoint only one resource is available.

PARAMETER DESCRIPTION identifier

Not supported by this endpoint, is set to an empty string.

TYPE: str

RAISES DESCRIPTION ValueError

Non-empty identifier is not supported by this endpoint. Please set identifier to an empty string or None.

RETURNS DESCRIPTION versions

The specified resource as an instance of the corresponding model class(Version). # noqa: 501

TYPE: Version

"},{"location":"reference/endpoint/api/version/#streampipes.endpoint.api.version.VersionEndpoint.post","title":"post(resource)","text":"

Usually, this method allows to create via this endpoint. Since the data represented by this endpoint is immutable, it does not support this kind of operation.

RAISES DESCRIPTION NotImplementedError

This endpoint does not allow for POST requests, therefore this method is not available.

"},{"location":"reference/function_zoo/river_function/","title":"River function","text":""},{"location":"reference/function_zoo/river_function/#streampipes.function_zoo.river_function.OnlineML","title":"OnlineML(client, stream_ids, model, prediction_type=RuntimeType.STRING.value, supervised=False, target_label=None, on_start=lambda self, context: None, on_event=lambda self, event, streamId: None, on_stop=lambda self: None)","text":"

Wrapper class to enable an easy usage for Online Machine Learning models of the River library.

It creates a StreamPipesFunction to train a model with the incoming events of a data stream and creates an output data stream that publishes the prediction to StreamPipes.

PARAMETER DESCRIPTION client

The client for the StreamPipes API.

TYPE: StreamPipesClient

stream_ids

The ids of the data stream to train the model.

TYPE: List[str]

model

The model to train. It meant to be a River model/pipeline, but can be every model with a 'learn_one' and 'predict_one' methode.

TYPE: Any

prediction_type

The data type of the prediction. Is only needed when you continue to work with the prediction in StreamPipes.

TYPE: str DEFAULT: value

supervised

Define if the model is supervised or unsupervised.

TYPE: bool DEFAULT: False

target_label

Define the name of the target attribute if the model is supervised.

TYPE: Optional[str] DEFAULT: None

on_start

A function to be called when this StreamPipesFunction gets started.

TYPE: Callable[[Any, FunctionContext], None] DEFAULT: lambda self, context: None

on_event

A function to be called when this StreamPipesFunction receives an event.

TYPE: Callable[[Any, Dict[str, Any], str], None] DEFAULT: lambda self, event, streamId: None

on_stop

A function to be called when this StreamPipesFunction gets stopped.

TYPE: Callable[[Any], None] DEFAULT: lambda self: None

"},{"location":"reference/function_zoo/river_function/#streampipes.function_zoo.river_function.OnlineML.set_learning","title":"set_learning(learning)","text":"

Start or stop the training of the model.

PARAMETER DESCRIPTION learning

Defines if the training should be continued

TYPE: bool

"},{"location":"reference/function_zoo/river_function/#streampipes.function_zoo.river_function.OnlineML.start","title":"start()","text":"

Registers the function and starts the training.

"},{"location":"reference/function_zoo/river_function/#streampipes.function_zoo.river_function.OnlineML.stop","title":"stop()","text":"

Stops the function and ends the training forever.

"},{"location":"reference/function_zoo/river_function/#streampipes.function_zoo.river_function.RiverFunction","title":"RiverFunction(function_definition, model, supervised, target_label, on_start, on_event, on_stop)","text":"

Bases: StreamPipesFunction

Implementation of a StreamPipesFunction to enable an easy usage for Online Machine Learning models of the River library.

The function trains the model with the incoming events and publishes the prediction to an output data stream.

PARAMETER DESCRIPTION function_definition

The function definition which contains the output stream.

TYPE: FunctionDefinition

model

The model to train. It meant to be a River model/pipeline, but can be every model with a 'learn_one' and 'predict_one' method.

TYPE: Any

supervised

Define if the model is supervised or unsupervised.

TYPE: bool

target_label

Define the name of the target attribute if the model is supervised.

TYPE: Optional[str]

on_start

A function to be called when this StreamPipesFunction gets started.

TYPE: Callable[[Any, FunctionContext], None]

on_event

A function to be called when this StreamPipesFunction receives an event.

TYPE: Callable[[Any, Dict[str, Any], str], None]

on_stop

A function to be called when this StreamPipesFunction gets stopped.

TYPE: Callable[[Any], None]

"},{"location":"reference/function_zoo/river_function/#streampipes.function_zoo.river_function.RiverFunction.add_output","title":"add_output(stream_id, event)","text":"

Send an event via an output data stream to StreamPipes.

PARAMETER DESCRIPTION stream_id

The id of the output data stream

TYPE: str

event

The event which should be sent

TYPE: Dict[str, Any]

RETURNS DESCRIPTION None"},{"location":"reference/function_zoo/river_function/#streampipes.function_zoo.river_function.RiverFunction.getFunctionId","title":"getFunctionId()","text":"

Returns the id of the function.

RETURNS DESCRIPTION function_id

Identification object of the StreamPipes function

TYPE: FunctionId

"},{"location":"reference/function_zoo/river_function/#streampipes.function_zoo.river_function.RiverFunction.onEvent","title":"onEvent(event, streamId)","text":"

Trains the model with the incoming events and sends the prediction back to StreamPipes.

PARAMETER DESCRIPTION event

The incoming event that serves as input for the function

TYPE: Dict[str, Any]

streamId

Identifier of the corresponding data stream

TYPE: str

RETURNS DESCRIPTION None"},{"location":"reference/function_zoo/river_function/#streampipes.function_zoo.river_function.RiverFunction.onServiceStarted","title":"onServiceStarted(context)","text":"

Executes the on_start method of the function.

PARAMETER DESCRIPTION context

The functions' context

TYPE: FunctionContext

RETURNS DESCRIPTION None"},{"location":"reference/function_zoo/river_function/#streampipes.function_zoo.river_function.RiverFunction.onServiceStopped","title":"onServiceStopped()","text":"

Executes the on_stop function.

"},{"location":"reference/function_zoo/river_function/#streampipes.function_zoo.river_function.RiverFunction.requiredStreamIds","title":"requiredStreamIds()","text":"

Get the ids of the streams needed by the function.

RETURNS DESCRIPTION stream_ids

List of the stream ids

TYPE: List[str]

"},{"location":"reference/function_zoo/river_function/#streampipes.function_zoo.river_function.RiverFunction.stop","title":"stop()","text":"

Stops the function and disconnects from the output streams.

"},{"location":"reference/functions/function_handler/","title":"Function handler","text":""},{"location":"reference/functions/function_handler/#streampipes.functions.function_handler.FunctionHandler","title":"FunctionHandler(registration, client)","text":"

The function handler manages the StreamPipes Functions.

It controls the connection to the brokers, starts the functions, manages the broadcast of the live data and is able to stop the connection to the brokers and functions.

PARAMETER DESCRIPTION registration

The registration, that contains the StreamPipesFunctions.

TYPE: Registration

client

The client to interact with the API.

TYPE: StreamPipesClient

ATTRIBUTE DESCRIPTION stream_contexts

Map of all data stream contexts

TYPE: Dict[str, DataStreamContext]

brokers

List of all registered brokers

TYPE: List[Broker]

"},{"location":"reference/functions/function_handler/#streampipes.functions.function_handler.FunctionHandler.disconnect","title":"disconnect()","text":"

Disconnects from the brokers and stops all functions.

RETURNS DESCRIPTION None"},{"location":"reference/functions/function_handler/#streampipes.functions.function_handler.FunctionHandler.force_stop_functions","title":"force_stop_functions()","text":"

Stops the StreamPipesFunctions when the event loop was stopped without stopping the functions.

RETURNS DESCRIPTION None WARNS DESCRIPTION UserWarning

If there is a running event loop and the functions should be stopped by disconnecting from the broker.

"},{"location":"reference/functions/function_handler/#streampipes.functions.function_handler.FunctionHandler.initializeFunctions","title":"initializeFunctions()","text":"

Creates the context for every data stream and starts the event loop to manage the StreamPipes Functions.

RETURNS DESCRIPTION None"},{"location":"reference/functions/registration/","title":"Registration","text":""},{"location":"reference/functions/registration/#streampipes.functions.registration.Registration","title":"Registration()","text":"

Manages the existing StreamPipesFunctions and registers them.

ATTRIBUTE DESCRIPTION functions

List of all registered StreamPipesFunction

TYPE: List[StreamPipesFunction]

"},{"location":"reference/functions/registration/#streampipes.functions.registration.Registration.getFunctions","title":"getFunctions()","text":"

Get all registered functions.

This method exists to be consistent with the Java client.

RETURNS DESCRIPTION functions

List of all registered functions.

TYPE: List[StreamPipesFunction]

"},{"location":"reference/functions/registration/#streampipes.functions.registration.Registration.register","title":"register(streampipes_function)","text":"

Registers a new function.

PARAMETER DESCRIPTION streampipes_function

The function to register.

TYPE: StreamPipesFunction

RETURNS DESCRIPTION self

The updated Registration instance

TYPE: Registration

"},{"location":"reference/functions/streampipes_function/","title":"Streampipes function","text":""},{"location":"reference/functions/streampipes_function/#streampipes.functions.streampipes_function.StreamPipesFunction","title":"StreamPipesFunction(function_definition=None)","text":"

Bases: ABC

Abstract implementation of a StreamPipesFunction.

A StreamPipesFunction allows users to get the data of a StreamPipes data streams easily. It makes it possible to work with the live data in python and enables to use the powerful data analytics libraries there.

PARAMETER DESCRIPTION function_definition

The definition of the function that contains metadata about the connected function

TYPE: Optional[FunctionDefinition] DEFAULT: None

ATTRIBUTE DESCRIPTION output_collectors

List of all output collectors which are created based on the provided function definitions.

TYPE: Dict[str, OutputCollector]

"},{"location":"reference/functions/streampipes_function/#streampipes.functions.streampipes_function.StreamPipesFunction.add_output","title":"add_output(stream_id, event)","text":"

Send an event via an output data stream to StreamPipes.

PARAMETER DESCRIPTION stream_id

The id of the output data stream

TYPE: str

event

The event which should be sent

TYPE: Dict[str, Any]

RETURNS DESCRIPTION None"},{"location":"reference/functions/streampipes_function/#streampipes.functions.streampipes_function.StreamPipesFunction.getFunctionId","title":"getFunctionId()","text":"

Returns the id of the function.

RETURNS DESCRIPTION function_id

Identification object of the StreamPipes function

TYPE: FunctionId

"},{"location":"reference/functions/streampipes_function/#streampipes.functions.streampipes_function.StreamPipesFunction.onEvent","title":"onEvent(event, streamId) abstractmethod","text":"

Is called for every event of a data stream.

PARAMETER DESCRIPTION event

The received event from the data stream.

TYPE: Dict[str, Any]

streamId

The id of the data stream which the event belongs to.

TYPE: str

RETURNS DESCRIPTION None"},{"location":"reference/functions/streampipes_function/#streampipes.functions.streampipes_function.StreamPipesFunction.onServiceStarted","title":"onServiceStarted(context) abstractmethod","text":"

Is called when the function is started.

PARAMETER DESCRIPTION context

The context in which the function is started.

TYPE: FunctionContext

RETURNS DESCRIPTION None"},{"location":"reference/functions/streampipes_function/#streampipes.functions.streampipes_function.StreamPipesFunction.onServiceStopped","title":"onServiceStopped() abstractmethod","text":"

Is called when the function is stopped.

RETURNS DESCRIPTION None"},{"location":"reference/functions/streampipes_function/#streampipes.functions.streampipes_function.StreamPipesFunction.requiredStreamIds","title":"requiredStreamIds()","text":"

Get the ids of the streams needed by the function.

RETURNS DESCRIPTION stream_ids

List of the stream ids

TYPE: List[str]

"},{"location":"reference/functions/streampipes_function/#streampipes.functions.streampipes_function.StreamPipesFunction.stop","title":"stop()","text":"

Stops the function and disconnects from the output streams.

"},{"location":"reference/functions/broker/broker/","title":"Broker","text":""},{"location":"reference/functions/broker/broker/#streampipes.functions.broker.broker.Broker","title":"Broker","text":"

Bases: ABC

Abstract implementation of a broker for consumer and publisher.

It contains the basic logic to connect to a data stream.

"},{"location":"reference/functions/broker/broker/#streampipes.functions.broker.broker.Broker.connect","title":"connect(data_stream) async","text":"

Connects to the broker running in StreamPipes.

PARAMETER DESCRIPTION data_stream

Contains the meta information (resources) for a data stream.

TYPE: DataStream

RETURNS DESCRIPTION None"},{"location":"reference/functions/broker/broker/#streampipes.functions.broker.broker.Broker.disconnect","title":"disconnect() abstractmethod async","text":"

Closes the connection to the server.

RETURNS DESCRIPTION None"},{"location":"reference/functions/broker/broker_handler/","title":"Broker handler","text":""},{"location":"reference/functions/broker/broker_handler/#streampipes.functions.broker.broker_handler.SupportedBroker","title":"SupportedBroker","text":"

Bases: Enum

Enum for the supported brokers.

"},{"location":"reference/functions/broker/broker_handler/#streampipes.functions.broker.broker_handler.UnsupportedBrokerError","title":"UnsupportedBrokerError(broker_name)","text":"

Bases: Exception

Exception if a broker isn't implemented yet.

"},{"location":"reference/functions/broker/broker_handler/#streampipes.functions.broker.broker_handler.get_broker","title":"get_broker(data_stream, is_publisher=False)","text":"

Derive the broker for the given data stream.

PARAMETER DESCRIPTION data_stream

Data stream instance from which the broker is inferred

TYPE: DataStream

RETURNS DESCRIPTION broker

The corresponding broker instance derived from data stream.

TYPE: Broker

RAISES DESCRIPTION UnsupportedBrokerError

Is raised when the given data stream belongs to a broker that is currently not supported by StreamPipes Python.

"},{"location":"reference/functions/broker/broker_handler/#streampipes.functions.broker.broker_handler.get_broker_description","title":"get_broker_description(data_stream)","text":"

Derive the decription of the broker for the given data stream.

PARAMETER DESCRIPTION data_stream

Data stream instance from which the broker is inferred

TYPE: DataStream

RETURNS DESCRIPTION broker

The corresponding broker description derived from data stream.

TYPE: SupportedBroker

RAISES DESCRIPTION UnsupportedBrokerError

Is raised when the given data stream belongs to a broker that is currently not supported by StreamPipes Python.

"},{"location":"reference/functions/broker/consumer/","title":"Consumer","text":""},{"location":"reference/functions/broker/consumer/#streampipes.functions.broker.consumer.Consumer","title":"Consumer","text":"

Bases: Broker

Abstract implementation a consumer for a broker.

A consumer allows to subscribe to a data stream.

"},{"location":"reference/functions/broker/consumer/#streampipes.functions.broker.consumer.Consumer.connect","title":"connect(data_stream) async","text":"

Connects to the broker running in StreamPipes and creates a subscription.

PARAMETER DESCRIPTION data_stream

Contains the meta information (resources) for a data stream.

TYPE: DataStream

RETURNS DESCRIPTION None"},{"location":"reference/functions/broker/consumer/#streampipes.functions.broker.consumer.Consumer.disconnect","title":"disconnect() abstractmethod async","text":"

Closes the connection to the server.

RETURNS DESCRIPTION None"},{"location":"reference/functions/broker/consumer/#streampipes.functions.broker.consumer.Consumer.get_message","title":"get_message() abstractmethod","text":"

Get the published messages of the subscription.

RETURNS DESCRIPTION iterator

An async iterator for the messages.

TYPE: AsyncIterator

"},{"location":"reference/functions/broker/output_collector/","title":"Output collector","text":""},{"location":"reference/functions/broker/output_collector/#streampipes.functions.broker.output_collector.OutputCollector","title":"OutputCollector(data_stream)","text":"

Collector for output events. The events are published to an output data stream. Therefore, the output collector establishes a connection to the broker.

PARAMETER DESCRIPTION data_stream

The output data stream that will receive the events.

TYPE: DataStream

ATTRIBUTE DESCRIPTION publisher

The publisher instance that sends the data to StreamPipes

TYPE: Publisher

"},{"location":"reference/functions/broker/output_collector/#streampipes.functions.broker.output_collector.OutputCollector.collect","title":"collect(event)","text":"

Publishes an event to the output stream.

PARAMETER DESCRIPTION event

The event to be published.

TYPE: Dict[str, Any]

RETURNS DESCRIPTION None"},{"location":"reference/functions/broker/output_collector/#streampipes.functions.broker.output_collector.OutputCollector.disconnect","title":"disconnect()","text":"

Disconnects the broker of the output collector.

RETURNS DESCRIPTION None"},{"location":"reference/functions/broker/publisher/","title":"Publisher","text":""},{"location":"reference/functions/broker/publisher/#streampipes.functions.broker.publisher.Publisher","title":"Publisher","text":"

Bases: Broker

Abstract implementation of a publisher for a broker.

A publisher allows to publish events to a data stream.

"},{"location":"reference/functions/broker/publisher/#streampipes.functions.broker.publisher.Publisher.connect","title":"connect(data_stream) async","text":"

Connects to the broker running in StreamPipes.

PARAMETER DESCRIPTION data_stream

Contains the meta information (resources) for a data stream.

TYPE: DataStream

RETURNS DESCRIPTION None"},{"location":"reference/functions/broker/publisher/#streampipes.functions.broker.publisher.Publisher.disconnect","title":"disconnect() abstractmethod async","text":"

Closes the connection to the server.

RETURNS DESCRIPTION None"},{"location":"reference/functions/broker/publisher/#streampipes.functions.broker.publisher.Publisher.publish_event","title":"publish_event(event) abstractmethod async","text":"

Publish an event to a connected data stream.

PARAMETER DESCRIPTION event

The event to be published.

TYPE: Dict[str, Any]

RETURNS DESCRIPTION None"},{"location":"reference/functions/broker/kafka/kafka_consumer/","title":"Kafka consumer","text":""},{"location":"reference/functions/broker/kafka/kafka_consumer/#streampipes.functions.broker.kafka.kafka_consumer.KafkaConsumer","title":"KafkaConsumer","text":"

Bases: Consumer

Implementation of a consumer for Kafka

"},{"location":"reference/functions/broker/kafka/kafka_consumer/#streampipes.functions.broker.kafka.kafka_consumer.KafkaConsumer.connect","title":"connect(data_stream) async","text":"

Connects to the broker running in StreamPipes and creates a subscription.

PARAMETER DESCRIPTION data_stream

Contains the meta information (resources) for a data stream.

TYPE: DataStream

RETURNS DESCRIPTION None"},{"location":"reference/functions/broker/kafka/kafka_consumer/#streampipes.functions.broker.kafka.kafka_consumer.KafkaConsumer.disconnect","title":"disconnect() async","text":"

Closes the connection to the server.

RETURNS DESCRIPTION None"},{"location":"reference/functions/broker/kafka/kafka_consumer/#streampipes.functions.broker.kafka.kafka_consumer.KafkaConsumer.get_message","title":"get_message()","text":"

Get the published messages of the subscription.

RETURNS DESCRIPTION iterator

An async iterator for the messages.

TYPE: AsyncIterator

"},{"location":"reference/functions/broker/kafka/kafka_message_fetcher/","title":"Kafka message fetcher","text":""},{"location":"reference/functions/broker/kafka/kafka_message_fetcher/#streampipes.functions.broker.kafka.kafka_message_fetcher.KafkaMessage","title":"KafkaMessage(data)","text":"

An internal representation of a Kafka message

PARAMETER DESCRIPTION data

The received Kafka message as byte array

"},{"location":"reference/functions/broker/kafka/kafka_message_fetcher/#streampipes.functions.broker.kafka.kafka_message_fetcher.KafkaMessageFetcher","title":"KafkaMessageFetcher(consumer)","text":"

Fetches the next message from Kafka

PARAMETER DESCRIPTION consumer

The Kafka consumer

TYPE: Consumer

"},{"location":"reference/functions/broker/kafka/kafka_publisher/","title":"Kafka publisher","text":""},{"location":"reference/functions/broker/kafka/kafka_publisher/#streampipes.functions.broker.kafka.kafka_publisher.KafkaPublisher","title":"KafkaPublisher","text":"

Bases: Publisher

Implementation of a publisher for Kafka

"},{"location":"reference/functions/broker/kafka/kafka_publisher/#streampipes.functions.broker.kafka.kafka_publisher.KafkaPublisher.connect","title":"connect(data_stream) async","text":"

Connects to the broker running in StreamPipes.

PARAMETER DESCRIPTION data_stream

Contains the meta information (resources) for a data stream.

TYPE: DataStream

RETURNS DESCRIPTION None"},{"location":"reference/functions/broker/kafka/kafka_publisher/#streampipes.functions.broker.kafka.kafka_publisher.KafkaPublisher.disconnect","title":"disconnect() async","text":"

Closes the connection to the server.

RETURNS DESCRIPTION None"},{"location":"reference/functions/broker/kafka/kafka_publisher/#streampipes.functions.broker.kafka.kafka_publisher.KafkaPublisher.publish_event","title":"publish_event(event) async","text":"

Publish an event to a connected data stream.

PARAMETER DESCRIPTION event

The event to be published.

TYPE: Dict[str, Any]

RETURNS DESCRIPTION None"},{"location":"reference/functions/broker/nats/nats_consumer/","title":"Nats consumer","text":""},{"location":"reference/functions/broker/nats/nats_consumer/#streampipes.functions.broker.nats.nats_consumer.NatsConsumer","title":"NatsConsumer","text":"

Bases: Consumer

Implementation of a consumer for NATS

"},{"location":"reference/functions/broker/nats/nats_consumer/#streampipes.functions.broker.nats.nats_consumer.NatsConsumer.connect","title":"connect(data_stream) async","text":"

Connects to the broker running in StreamPipes and creates a subscription.

PARAMETER DESCRIPTION data_stream

Contains the meta information (resources) for a data stream.

TYPE: DataStream

RETURNS DESCRIPTION None"},{"location":"reference/functions/broker/nats/nats_consumer/#streampipes.functions.broker.nats.nats_consumer.NatsConsumer.disconnect","title":"disconnect() async","text":"

Closes the connection to the server.

RETURNS DESCRIPTION None"},{"location":"reference/functions/broker/nats/nats_consumer/#streampipes.functions.broker.nats.nats_consumer.NatsConsumer.get_message","title":"get_message()","text":"

Get the published messages of the subscription.

RETURNS DESCRIPTION message_iterator

An async iterator for the messages.

TYPE: AsyncIterator

"},{"location":"reference/functions/broker/nats/nats_publisher/","title":"Nats publisher","text":""},{"location":"reference/functions/broker/nats/nats_publisher/#streampipes.functions.broker.nats.nats_publisher.NatsPublisher","title":"NatsPublisher","text":"

Bases: Publisher

Implementation of a publisher for NATS

"},{"location":"reference/functions/broker/nats/nats_publisher/#streampipes.functions.broker.nats.nats_publisher.NatsPublisher.connect","title":"connect(data_stream) async","text":"

Connects to the broker running in StreamPipes.

PARAMETER DESCRIPTION data_stream

Contains the meta information (resources) for a data stream.

TYPE: DataStream

RETURNS DESCRIPTION None"},{"location":"reference/functions/broker/nats/nats_publisher/#streampipes.functions.broker.nats.nats_publisher.NatsPublisher.disconnect","title":"disconnect() async","text":"

Closes the connection to the server.

RETURNS DESCRIPTION None"},{"location":"reference/functions/broker/nats/nats_publisher/#streampipes.functions.broker.nats.nats_publisher.NatsPublisher.publish_event","title":"publish_event(event) async","text":"

Publish an event to a connected data stream.

PARAMETER DESCRIPTION event

The event to be published.

TYPE: Dict[str, Any]

RETURNS DESCRIPTION None"},{"location":"reference/functions/utils/async_iter_handler/","title":"Async iter handler","text":""},{"location":"reference/functions/utils/async_iter_handler/#streampipes.functions.utils.async_iter_handler.AsyncIterHandler","title":"AsyncIterHandler","text":"

Handles asynchronous iterators to get every message after another in parallel.

"},{"location":"reference/functions/utils/async_iter_handler/#streampipes.functions.utils.async_iter_handler.AsyncIterHandler.anext","title":"anext(stream_id, message) async staticmethod","text":"

Gets the next message from an AsyncIterator.

PARAMETER DESCRIPTION stream_id

The id of the data stream which the message belongs to.

TYPE: str

message

An asynchronous iterator that contains the messages.

TYPE: AsyncIterator

RETURNS DESCRIPTION result

Tuple of the stream id und next message or (\"stop\", None) if no message is left.

TYPE: Tuple[str, Optional[Any]]

"},{"location":"reference/functions/utils/async_iter_handler/#streampipes.functions.utils.async_iter_handler.AsyncIterHandler.combine_async_messages","title":"combine_async_messages(messages) async staticmethod","text":"

Continuously gets the next published message from multiple AsyncIterators in parallel.

PARAMETER DESCRIPTION messages

A dictionary with an asynchronous iterator for every stream id.

TYPE: Dict[str, AsyncIterator]

YIELDS DESCRIPTION message

Description of the anonymous integer return value.

TYPE:: Tuple[str, Any]

"},{"location":"reference/functions/utils/data_stream_context/","title":"Data stream context","text":""},{"location":"reference/functions/utils/data_stream_context/#streampipes.functions.utils.data_stream_context.DataStreamContext","title":"DataStreamContext(functions, schema, broker)","text":"

Container for the context of a data stream.

PARAMETER DESCRIPTION functions

StreamPipes Functions which require the data of this data stream.

TYPE: List[StreamPipesFunction]

schema

The schema of this data stream.

TYPE: DataStream

broker

The consumer to connect to this data stream.

TYPE: Consumer

"},{"location":"reference/functions/utils/data_stream_context/#streampipes.functions.utils.data_stream_context.DataStreamContext.add_function","title":"add_function(function)","text":"

Adds a new StreamPipes Function.

PARAMETER DESCRIPTION function

StreamPipesFunction which requires this data stream.

TYPE: StreamPipesFunction

RETURNS DESCRIPTION None"},{"location":"reference/functions/utils/data_stream_generator/","title":"Data stream generator","text":""},{"location":"reference/functions/utils/data_stream_generator/#streampipes.functions.utils.data_stream_generator.RuntimeType","title":"RuntimeType","text":"

Bases: Enum

Runtime type names for the attributes of a data stream.

ATTRIBUTE DESCRIPTION STRING

BOOLEAN

DOUBLE

FLOAT

INTEGER

LONG

"},{"location":"reference/functions/utils/data_stream_generator/#streampipes.functions.utils.data_stream_generator.create_data_stream","title":"create_data_stream(name, attributes, stream_id=None, broker=SupportedBroker.NATS)","text":"

Creates a data stream

PARAMETER DESCRIPTION name

Name of the data stream to be shown at the UI.

TYPE: str

attributes

Name and types of the attributes.

TYPE: Dict[str, str]

stream_id

The id of this data stream.

TYPE: Optional[str] DEFAULT: None

RETURNS DESCRIPTION data_stream

The created data stream

TYPE: DataStream

"},{"location":"reference/functions/utils/function_context/","title":"Function context","text":""},{"location":"reference/functions/utils/function_context/#streampipes.functions.utils.function_context.FunctionContext","title":"FunctionContext(function_id, schema, client, streams)","text":"

Container for the context of a StreamPipesFunction.

PARAMETER DESCRIPTION function_id

The id of this function.

TYPE: str

schema

A dictionary which contains the schema of a data stream for each stream id.

TYPE: Dict[str, DataStream]

client

The client to interact with the API.

TYPE: StreamPipesClient

streams

The ids of the streams needed by this function.

TYPE: List[str]

"},{"location":"reference/functions/utils/function_context/#streampipes.functions.utils.function_context.FunctionContext.add_data_stream_schema","title":"add_data_stream_schema(stream_id, data_stream)","text":"

Adds a new data stream for a new stream id.

PARAMETER DESCRIPTION stream_id

The id of the data stream.

TYPE: str

data_stream

The schema of the data stream.

TYPE: DataStream

RETURNS DESCRIPTION None"},{"location":"reference/model/common/","title":"Common","text":"

Classes of the StreamPipes data model that are commonly shared.

"},{"location":"reference/model/common/#streampipes.model.common.ApplicationLink","title":"ApplicationLink","text":"

Bases: BasicModel

Data model of an ApplicationLink in compliance with the StreamPipes Backend.

"},{"location":"reference/model/common/#streampipes.model.common.ApplicationLink.Config","title":"Config","text":"

Configuration class for Pydantic. Defines alias generator to convert field names from camelCase (API) to snake_case (Python codebase).

"},{"location":"reference/model/common/#streampipes.model.common.BaseElement","title":"BaseElement","text":"

Bases: BasicModel

Structure of a basic element in the StreamPipes Backend.

"},{"location":"reference/model/common/#streampipes.model.common.BaseElement.Config","title":"Config","text":"

Configuration class for Pydantic. Defines alias generator to convert field names from camelCase (API) to snake_case (Python codebase).

"},{"location":"reference/model/common/#streampipes.model.common.BasicModel","title":"BasicModel","text":"

Bases: BaseModel

Basic model class used for the whole Python StreamPipes data model.

"},{"location":"reference/model/common/#streampipes.model.common.BasicModel.Config","title":"Config","text":"

Configuration class for Pydantic. Defines alias generator to convert field names from camelCase (API) to snake_case (Python codebase).

"},{"location":"reference/model/common/#streampipes.model.common.EventGrounding","title":"EventGrounding","text":"

Bases: BasicModel

Data model of an EventGrounding in compliance to with StreamPipes Backend.

"},{"location":"reference/model/common/#streampipes.model.common.EventGrounding.Config","title":"Config","text":"

Configuration class for Pydantic. Defines alias generator to convert field names from camelCase (API) to snake_case (Python codebase).

"},{"location":"reference/model/common/#streampipes.model.common.EventProperty","title":"EventProperty","text":"

Bases: BasicModel

Data model of an EventProperty in compliance with the StreamPipes Backend.

"},{"location":"reference/model/common/#streampipes.model.common.EventProperty.Config","title":"Config","text":"

Configuration class for Pydantic. Defines alias generator to convert field names from camelCase (API) to snake_case (Python codebase).

"},{"location":"reference/model/common/#streampipes.model.common.EventSchema","title":"EventSchema","text":"

Bases: BasicModel

Data model of an EventSchema in compliance with the StreamPipes Backend.

"},{"location":"reference/model/common/#streampipes.model.common.EventSchema.Config","title":"Config","text":"

Configuration class for Pydantic. Defines alias generator to convert field names from camelCase (API) to snake_case (Python codebase).

"},{"location":"reference/model/common/#streampipes.model.common.MeasurementCapability","title":"MeasurementCapability","text":"

Bases: BasicModel

Data model of a MeasurementCapability in compliance with the StreamPipes Backend.

"},{"location":"reference/model/common/#streampipes.model.common.MeasurementCapability.Config","title":"Config","text":"

Configuration class for Pydantic. Defines alias generator to convert field names from camelCase (API) to snake_case (Python codebase).

"},{"location":"reference/model/common/#streampipes.model.common.MeasurementObject","title":"MeasurementObject","text":"

Bases: BasicModel

Data model of a MeasurementObject in compliance with the StreamPipes Backend.

"},{"location":"reference/model/common/#streampipes.model.common.MeasurementObject.Config","title":"Config","text":"

Configuration class for Pydantic. Defines alias generator to convert field names from camelCase (API) to snake_case (Python codebase).

"},{"location":"reference/model/common/#streampipes.model.common.TopicDefinition","title":"TopicDefinition","text":"

Bases: BasicModel

Data model of a TopicDefinition in compliance with the StreamPipes Backend.

"},{"location":"reference/model/common/#streampipes.model.common.TopicDefinition.Config","title":"Config","text":"

Configuration class for Pydantic. Defines alias generator to convert field names from camelCase (API) to snake_case (Python codebase).

"},{"location":"reference/model/common/#streampipes.model.common.TransportFormat","title":"TransportFormat","text":"

Bases: BasicModel

Data model of a TransportFormat in compliance with the StreamPipes Backend.

"},{"location":"reference/model/common/#streampipes.model.common.TransportFormat.Config","title":"Config","text":"

Configuration class for Pydantic. Defines alias generator to convert field names from camelCase (API) to snake_case (Python codebase).

"},{"location":"reference/model/common/#streampipes.model.common.TransportProtocol","title":"TransportProtocol","text":"

Bases: BasicModel

Data model of a TransportProtocol in compliance with the StreamPipes Backend.

"},{"location":"reference/model/common/#streampipes.model.common.TransportProtocol.Config","title":"Config","text":"

Configuration class for Pydantic. Defines alias generator to convert field names from camelCase (API) to snake_case (Python codebase).

"},{"location":"reference/model/common/#streampipes.model.common.ValueSpecification","title":"ValueSpecification","text":"

Bases: BasicModel

Data model of an ValueSpecification in compliance with the StreamPipes Backend.

"},{"location":"reference/model/common/#streampipes.model.common.ValueSpecification.Config","title":"Config","text":"

Configuration class for Pydantic. Defines alias generator to convert field names from camelCase (API) to snake_case (Python codebase).

"},{"location":"reference/model/common/#streampipes.model.common.random_letters","title":"random_letters(n)","text":"

Generates a string consisting of random letters.

PARAMETER DESCRIPTION n

number of letters

TYPE: int

RETURNS DESCRIPTION rand_str

String consisting of n random letters

TYPE: str

"},{"location":"reference/model/container/data_lake_measures/","title":"Data lake measures","text":"

Implementation of a resource container for the data lake measures endpoint.

"},{"location":"reference/model/container/data_lake_measures/#streampipes.model.container.data_lake_measures.DataLakeMeasures","title":"DataLakeMeasures(resources)","text":"

Bases: ResourceContainer

Implementation of the resource container for the data lake measures endpoint.

This resource container is a collection of data lake measures returned by the StreamPipes API. It is capable of parsing the response content directly into a list of queried DataLakeMeasure. Furthermore, the resource container makes them accessible in a pythonic manner.

"},{"location":"reference/model/container/data_lake_measures/#streampipes.model.container.data_lake_measures.DataLakeMeasures.from_json","title":"from_json(json_string) classmethod","text":"

Creates a ResourceContainer from the given JSON string.

PARAMETER DESCRIPTION json_string

The JSON string returned from the StreamPipes API.

TYPE: str

RETURNS DESCRIPTION container

Instance of the container derived from the JSON definition

TYPE: ResourceContainer

RAISES DESCRIPTION StreamPipesDataModelError

If a resource cannot be mapped to the corresponding Python data model.

StreamPipesResourceContainerJSONError

If JSON response cannot be parsed to a ResourceContainer.

"},{"location":"reference/model/container/data_lake_measures/#streampipes.model.container.data_lake_measures.DataLakeMeasures.to_dicts","title":"to_dicts(use_source_names=False)","text":"

Returns the contained resources as list of dictionaries.

PARAMETER DESCRIPTION use_source_names

Determines whether the field names are named in Python style (=False) or as originally named by StreamPipes (=True).

TYPE: bool DEFAULT: False

RETURNS DESCRIPTION dictionary_list

List of resources in dictionary representation. If use_source_names equals True the keys are named as in the StreamPipes backend.

TYPE: List[Dict[str, Any]]

"},{"location":"reference/model/container/data_lake_measures/#streampipes.model.container.data_lake_measures.DataLakeMeasures.to_json","title":"to_json()","text":"

Returns the resource container in the StreamPipes JSON representation.

RETURNS DESCRIPTION json_string: str

JSON representation of the resource container where key names are equal to keys used in the StreamPipes backend

"},{"location":"reference/model/container/data_lake_measures/#streampipes.model.container.data_lake_measures.DataLakeMeasures.to_pandas","title":"to_pandas()","text":"

Returns the resource container in representation of a Pandas Dataframe.

RETURNS DESCRIPTION resource_container_df

Representation of the resource container as pandas DataFrame

TYPE: DataFrame

"},{"location":"reference/model/container/data_streams/","title":"Data streams","text":"

Implementation of a resource container for the data streams endpoint.

"},{"location":"reference/model/container/data_streams/#streampipes.model.container.data_streams.DataStreams","title":"DataStreams(resources)","text":"

Bases: ResourceContainer

Implementation of the resource container for the data stream endpoint.

This resource container is a collection of data streams returned by the StreamPipes API. It is capable of parsing the response content directly into a list of queried DataStream. Furthermore, the resource container makes them accessible in a pythonic manner.

"},{"location":"reference/model/container/data_streams/#streampipes.model.container.data_streams.DataStreams.from_json","title":"from_json(json_string) classmethod","text":"

Creates a ResourceContainer from the given JSON string.

PARAMETER DESCRIPTION json_string

The JSON string returned from the StreamPipes API.

TYPE: str

RETURNS DESCRIPTION container

Instance of the container derived from the JSON definition

TYPE: ResourceContainer

RAISES DESCRIPTION StreamPipesDataModelError

If a resource cannot be mapped to the corresponding Python data model.

StreamPipesResourceContainerJSONError

If JSON response cannot be parsed to a ResourceContainer.

"},{"location":"reference/model/container/data_streams/#streampipes.model.container.data_streams.DataStreams.to_dicts","title":"to_dicts(use_source_names=False)","text":"

Returns the contained resources as list of dictionaries.

PARAMETER DESCRIPTION use_source_names

Determines whether the field names are named in Python style (=False) or as originally named by StreamPipes (=True).

TYPE: bool DEFAULT: False

RETURNS DESCRIPTION dictionary_list

List of resources in dictionary representation. If use_source_names equals True the keys are named as in the StreamPipes backend.

TYPE: List[Dict[str, Any]]

"},{"location":"reference/model/container/data_streams/#streampipes.model.container.data_streams.DataStreams.to_json","title":"to_json()","text":"

Returns the resource container in the StreamPipes JSON representation.

RETURNS DESCRIPTION json_string: str

JSON representation of the resource container where key names are equal to keys used in the StreamPipes backend

"},{"location":"reference/model/container/data_streams/#streampipes.model.container.data_streams.DataStreams.to_pandas","title":"to_pandas()","text":"

Returns the resource container in representation of a Pandas Dataframe.

RETURNS DESCRIPTION resource_container_df

Representation of the resource container as pandas DataFrame

TYPE: DataFrame

"},{"location":"reference/model/container/resource_container/","title":"Resource container","text":"

General and abstract implementation for a resource container.

A resource container is a collection of resources returned by the StreamPipes API. It is capable of parsing the response content directly into a list of queried resources. Furthermore, the resource container makes them accessible in a pythonic manner.

"},{"location":"reference/model/container/resource_container/#streampipes.model.container.resource_container.ResourceContainer","title":"ResourceContainer(resources)","text":"

Bases: ABC

General and abstract implementation for a resource container.

A resource container is a collection of resources returned by the StreamPipes API. It is capable of parsing the response content directly into a list of queried resources. Furthermore, the resource container makes them accessible in a pythonic manner.

PARAMETER DESCRIPTION resources

A list of resources to be contained in the ResourceContainer.

TYPE: List[Resource]

"},{"location":"reference/model/container/resource_container/#streampipes.model.container.resource_container.ResourceContainer.from_json","title":"from_json(json_string) classmethod","text":"

Creates a ResourceContainer from the given JSON string.

PARAMETER DESCRIPTION json_string

The JSON string returned from the StreamPipes API.

TYPE: str

RETURNS DESCRIPTION container

Instance of the container derived from the JSON definition

TYPE: ResourceContainer

RAISES DESCRIPTION StreamPipesDataModelError

If a resource cannot be mapped to the corresponding Python data model.

StreamPipesResourceContainerJSONError

If JSON response cannot be parsed to a ResourceContainer.

"},{"location":"reference/model/container/resource_container/#streampipes.model.container.resource_container.ResourceContainer.to_dicts","title":"to_dicts(use_source_names=False)","text":"

Returns the contained resources as list of dictionaries.

PARAMETER DESCRIPTION use_source_names

Determines whether the field names are named in Python style (=False) or as originally named by StreamPipes (=True).

TYPE: bool DEFAULT: False

RETURNS DESCRIPTION dictionary_list

List of resources in dictionary representation. If use_source_names equals True the keys are named as in the StreamPipes backend.

TYPE: List[Dict[str, Any]]

"},{"location":"reference/model/container/resource_container/#streampipes.model.container.resource_container.ResourceContainer.to_json","title":"to_json()","text":"

Returns the resource container in the StreamPipes JSON representation.

RETURNS DESCRIPTION json_string: str

JSON representation of the resource container where key names are equal to keys used in the StreamPipes backend

"},{"location":"reference/model/container/resource_container/#streampipes.model.container.resource_container.ResourceContainer.to_pandas","title":"to_pandas()","text":"

Returns the resource container in representation of a Pandas Dataframe.

RETURNS DESCRIPTION resource_container_df

Representation of the resource container as pandas DataFrame

TYPE: DataFrame

"},{"location":"reference/model/container/resource_container/#streampipes.model.container.resource_container.StreamPipesDataModelError","title":"StreamPipesDataModelError(validation_error)","text":"

Bases: Exception

A custom exception to be raised when a validation error occurs during the parsing of StreamPipes API responses.

PARAMETER DESCRIPTION validation_error

The validation error thrown by Pydantic during parsing.

TYPE: ValidationError

"},{"location":"reference/model/container/resource_container/#streampipes.model.container.resource_container.StreamPipesResourceContainerJSONError","title":"StreamPipesResourceContainerJSONError(container_name, json_string)","text":"

Bases: Exception

A custom exception to be raised when the returned JSON string does not suit to the structure of resource container.

PARAMETER DESCRIPTION container_name

The class name of the resource container where the invalid data structure was detected.

TYPE: str

json_string

The JSON string that has been tried to parse.

TYPE: str

"},{"location":"reference/model/container/versions/","title":"Versions","text":"

Implementation of a resource container for the versions endpoint.

"},{"location":"reference/model/container/versions/#streampipes.model.container.versions.Versions","title":"Versions(resources)","text":"

Bases: ResourceContainer

Implementation of the resource container for the versions endpoint.

This resource container is a collection of versions returned by the StreamPipes API. It is capable of parsing the response content directly into a list of queried Version. Furthermore, the resource container makes them accessible in a pythonic manner.

PARAMETER DESCRIPTION resources

A list of resources (Version) to be contained in the ResourceContainer.

TYPE: List[Resource]

"},{"location":"reference/model/container/versions/#streampipes.model.container.versions.Versions.from_json","title":"from_json(json_string) classmethod","text":"

Creates a ResourceContainer from the given JSON string.

PARAMETER DESCRIPTION json_string

The JSON string returned from the StreamPipes API.

TYPE: str

RETURNS DESCRIPTION container

Instance of the container derived from the JSON definition

TYPE: ResourceContainer

RAISES DESCRIPTION StreamPipesDataModelError

If a resource cannot be mapped to the corresponding Python data model.

StreamPipesResourceContainerJSONError

If JSON response cannot be parsed to a ResourceContainer.

"},{"location":"reference/model/container/versions/#streampipes.model.container.versions.Versions.to_dicts","title":"to_dicts(use_source_names=False)","text":"

Returns the contained resources as list of dictionaries.

PARAMETER DESCRIPTION use_source_names

Determines whether the field names are named in Python style (=False) or as originally named by StreamPipes (=True).

TYPE: bool DEFAULT: False

RETURNS DESCRIPTION dictionary_list

List of resources in dictionary representation. If use_source_names equals True the keys are named as in the StreamPipes backend.

TYPE: List[Dict[str, Any]]

"},{"location":"reference/model/container/versions/#streampipes.model.container.versions.Versions.to_json","title":"to_json()","text":"

Returns the resource container in the StreamPipes JSON representation.

RETURNS DESCRIPTION json_string: str

JSON representation of the resource container where key names are equal to keys used in the StreamPipes backend

"},{"location":"reference/model/container/versions/#streampipes.model.container.versions.Versions.to_pandas","title":"to_pandas()","text":"

Returns the resource container in representation of a Pandas Dataframe.

RETURNS DESCRIPTION resource_container_df

Representation of the resource container as pandas DataFrame

TYPE: DataFrame

"},{"location":"reference/model/resource/data_lake_measure/","title":"Data lake measure","text":""},{"location":"reference/model/resource/data_lake_measure/#streampipes.model.resource.data_lake_measure.DataLakeMeasure","title":"DataLakeMeasure","text":"

Bases: Resource

Implementation of a resource for data lake measures.

This resource defines the data model used by resource container (model.container.DataLakeMeasures). It inherits from Pydantic's BaseModel to get all its superpowers, which are used to parse, validate the API response, and to easily switch between the Python representation (both serialized and deserialized) and Java representation (serialized only).

"},{"location":"reference/model/resource/data_lake_measure/#streampipes.model.resource.data_lake_measure.DataLakeMeasure.Config","title":"Config","text":"

Configuration class for Pydantic. Defines alias generator to convert field names from camelCase (API) to snake_case (Python codebase).

"},{"location":"reference/model/resource/data_lake_measure/#streampipes.model.resource.data_lake_measure.DataLakeMeasure.convert_to_pandas_representation","title":"convert_to_pandas_representation()","text":"

Returns the dictionary representation of a data lake measure to be used when creating a pandas Dataframe.

It excludes the following fields: element_id, event_schema, schema_version. Instead of the whole event schema the number of event properties contained is returned with the column name num_event_properties.

RETURNS DESCRIPTION pandas_repr

Pandas representation of the resource as a dictionary, which is then used by the respource container to create a data frame from a collection of resources.

TYPE: Dict[str, Any]

"},{"location":"reference/model/resource/data_lake_measure/#streampipes.model.resource.data_lake_measure.DataLakeMeasure.to_dict","title":"to_dict(use_source_names=True)","text":"

Returns the resource in dictionary representation.

PARAMETER DESCRIPTION use_source_names

Indicates if the dictionary keys are in python representation or equally named to the StreamPipes backend

DEFAULT: True

RETURNS DESCRIPTION resource

The resource as dictionary representation

TYPE: Dict[str, Any]

"},{"location":"reference/model/resource/data_series/","title":"Data series","text":""},{"location":"reference/model/resource/data_series/#streampipes.model.resource.data_series.DataSeries","title":"DataSeries","text":"

Bases: Resource

Implementation of a resource for data series. This resource defines the data model used by its resource container(model.container.DataLakeMeasures). It inherits from Pydantic's BaseModel to get all its superpowers, which are used to parse, validate the API response and to easily switch between the Python representation (both serialized and deserialized) and Java representation (serialized only).

Notes
This class will only exist temporarily in it its current appearance since\nthere are some inconsistencies in the StreamPipes API.\n
"},{"location":"reference/model/resource/data_series/#streampipes.model.resource.data_series.DataSeries.Config","title":"Config","text":"

Configuration class for Pydantic. Defines alias generator to convert field names from camelCase (API) to snake_case (Python codebase).

"},{"location":"reference/model/resource/data_series/#streampipes.model.resource.data_series.DataSeries.convert_to_pandas_representation","title":"convert_to_pandas_representation()","text":"

Returns the dictionary representation of a data lake series to be used when creating a pandas Dataframe.

It contains only the \"header rows\" (the column names) and \"rows\" that contain the actual data.

RETURNS DESCRIPTION pandas_repr

Dictionary with the keys headers and rows

TYPE: dict[str, Any]

"},{"location":"reference/model/resource/data_series/#streampipes.model.resource.data_series.DataSeries.from_json","title":"from_json(json_string) classmethod","text":"

Creates an instance of DataSeries from a given JSON string.

This method is used by the resource container to parse the JSON response of the StreamPipes API. Currently, it only supports data lake series that consist of exactly one series of data.

PARAMETER DESCRIPTION json_string

The JSON string the data lake series should be created on.

TYPE: str

RETURNS DESCRIPTION DataSeries

Instance of DataSeries that is created based on the given JSON string.

RAISES DESCRIPTION StreamPipesUnsupportedDataLakeSeries

If the data lake series returned by the StreamPipes API cannot be parsed with the current version of the Python client.

"},{"location":"reference/model/resource/data_series/#streampipes.model.resource.data_series.DataSeries.to_dict","title":"to_dict(use_source_names=True)","text":"

Returns the resource in dictionary representation.

PARAMETER DESCRIPTION use_source_names

Indicates if the dictionary keys are in python representation or equally named to the StreamPipes backend

DEFAULT: True

RETURNS DESCRIPTION resource

The resource as dictionary representation

TYPE: Dict[str, Any]

"},{"location":"reference/model/resource/data_series/#streampipes.model.resource.data_series.DataSeries.to_pandas","title":"to_pandas()","text":"

Returns the data lake series in representation of a Pandas Dataframe.

RETURNS DESCRIPTION pd

The data lake series in form of a pandas dataframe

TYPE: DataFrame

"},{"location":"reference/model/resource/data_stream/","title":"Data stream","text":""},{"location":"reference/model/resource/data_stream/#streampipes.model.resource.data_stream.DataStream","title":"DataStream(**kwargs)","text":"

Bases: Resource

Implementation of a resource for data streams.

This resource defines the data model used by resource container (model.container.DataStreams). It inherits from Pydantic's BaseModel to get all its superpowers, which are used to parse, validate the API response and to easily switch between the Python representation (both serialized and deserialized) and Java representation (serialized only).

"},{"location":"reference/model/resource/data_stream/#streampipes.model.resource.data_stream.DataStream.Config","title":"Config","text":"

Configuration class for Pydantic. Defines alias generator to convert field names from camelCase (API) to snake_case (Python codebase).

"},{"location":"reference/model/resource/data_stream/#streampipes.model.resource.data_stream.DataStream.convert_to_pandas_representation","title":"convert_to_pandas_representation()","text":"

Returns the dictionary representation of a data stream to be used when creating a pandas Dataframe.

RETURNS DESCRIPTION pandas_repr

Pandas representation of the resource as a dictionary, which is then used by the respource container to create a data frame from a collection of resources.

TYPE: Dict[str, Any]

"},{"location":"reference/model/resource/data_stream/#streampipes.model.resource.data_stream.DataStream.to_dict","title":"to_dict(use_source_names=True)","text":"

Returns the resource in dictionary representation.

PARAMETER DESCRIPTION use_source_names

Indicates if the dictionary keys are in python representation or equally named to the StreamPipes backend

DEFAULT: True

RETURNS DESCRIPTION resource

The resource as dictionary representation

TYPE: Dict[str, Any]

"},{"location":"reference/model/resource/exceptions/","title":"Exceptions","text":""},{"location":"reference/model/resource/exceptions/#streampipes.model.resource.exceptions.StreamPipesUnsupportedDataSeries","title":"StreamPipesUnsupportedDataSeries(reason=None)","text":"

Bases: Exception

Exception to be raised when the returned data lake series cannot be parsed with the current implementation of the resource.

"},{"location":"reference/model/resource/function_definition/","title":"Function definition","text":""},{"location":"reference/model/resource/function_definition/#streampipes.model.resource.function_definition.FunctionDefinition","title":"FunctionDefinition","text":"

Bases: Resource

Configuration for a StreamPipes Function.

This class maps to the FunctionDefinition class in the StreamPipes model. It contains all metadata that are required to register a function at the StreamPipes backend.

ATTRIBUTE DESCRIPTION consumed_streams

List of data streams the function is consuming from

TYPE: List[str]

function_id

identifier object of a StreamPipes function

TYPE: FunctionId

output_data_streams

Map off all output data streams added to the function definition

TYPE: Dict[str, DataStream]

"},{"location":"reference/model/resource/function_definition/#streampipes.model.resource.function_definition.FunctionDefinition.Config","title":"Config","text":"

Configuration class for Pydantic. Defines alias generator to convert field names from camelCase (API) to snake_case (Python codebase).

"},{"location":"reference/model/resource/function_definition/#streampipes.model.resource.function_definition.FunctionDefinition.add_output_data_stream","title":"add_output_data_stream(data_stream)","text":"

Adds an output data stream to the function which makes it possible to write data back to StreamPipes.

PARAMETER DESCRIPTION data_stream

The schema of the output data stream.

TYPE: DataStream

RETURNS DESCRIPTION self

Instance of the function definition that is extended by the provided DataStream

TYPE: FunctionDefinition

"},{"location":"reference/model/resource/function_definition/#streampipes.model.resource.function_definition.FunctionDefinition.convert_to_pandas_representation","title":"convert_to_pandas_representation()","text":"

Returns the dictionary representation of a function definition to be used when creating a pandas Dataframe.

RETURNS DESCRIPTION pandas_repr

Pandas representation of the resource as a dictionary, which is then used by the respource container to create a data frame from a collection of resources.

TYPE: Dict[str, Any]

"},{"location":"reference/model/resource/function_definition/#streampipes.model.resource.function_definition.FunctionDefinition.get_output_data_streams","title":"get_output_data_streams()","text":"

Get the output data streams of the function.

RETURNS DESCRIPTION output_streams

Dictionary with every known stream id and the related output stream.

TYPE: Dict[str, DataStream]

"},{"location":"reference/model/resource/function_definition/#streampipes.model.resource.function_definition.FunctionDefinition.get_output_stream_ids","title":"get_output_stream_ids()","text":"

Get the stream ids of the output data streams.

RETURNS DESCRIPTION output_stream_ids

List of all stream ids

TYPE: List[str]

"},{"location":"reference/model/resource/function_definition/#streampipes.model.resource.function_definition.FunctionDefinition.to_dict","title":"to_dict(use_source_names=True)","text":"

Returns the resource in dictionary representation.

PARAMETER DESCRIPTION use_source_names

Indicates if the dictionary keys are in python representation or equally named to the StreamPipes backend

DEFAULT: True

RETURNS DESCRIPTION resource

The resource as dictionary representation

TYPE: Dict[str, Any]

"},{"location":"reference/model/resource/function_definition/#streampipes.model.resource.function_definition.FunctionId","title":"FunctionId","text":"

Bases: BasicModel

Identification object for a StreamPipes function.

Maps to the FunctionId class defined in the StreamPipes model.

ATTRIBUTE DESCRIPTION id

unique identifier of the function instance

TYPE: str

version

version of the corresponding function

TYPE: int

"},{"location":"reference/model/resource/function_definition/#streampipes.model.resource.function_definition.FunctionId.Config","title":"Config","text":"

Configuration class for Pydantic. Defines alias generator to convert field names from camelCase (API) to snake_case (Python codebase).

"},{"location":"reference/model/resource/query_result/","title":"Query result","text":""},{"location":"reference/model/resource/query_result/#streampipes.model.resource.query_result.QueryResult","title":"QueryResult","text":"

Bases: Resource

Implementation of a resource for query result. This resource defines the data model used by its resource container(model.container.DataLakeMeasures). It inherits from Pydantic's BaseModel to get all its superpowers, which are used to parse, validate the API response and to easily switch between the Python representation (both serialized and deserialized) and Java representation (serialized only).

"},{"location":"reference/model/resource/query_result/#streampipes.model.resource.query_result.QueryResult.Config","title":"Config","text":"

Configuration class for Pydantic. Defines alias generator to convert field names from camelCase (API) to snake_case (Python codebase).

"},{"location":"reference/model/resource/query_result/#streampipes.model.resource.query_result.QueryResult.convert_to_pandas_representation","title":"convert_to_pandas_representation()","text":"

Returns the dictionary representation of a data lake series to be used when creating a pandas Dataframe.

It contains only the \"header rows\" (the column names) and \"rows\" that contain the actual data.

RETURNS DESCRIPTION dict

Dictionary with the keys headers and rows

RAISES DESCRIPTION StreamPipesUnsupportedDataLakeSeries

If the query result returned by the StreamPipes API cannot be converted to the pandas representation

"},{"location":"reference/model/resource/query_result/#streampipes.model.resource.query_result.QueryResult.to_dict","title":"to_dict(use_source_names=True)","text":"

Returns the resource in dictionary representation.

PARAMETER DESCRIPTION use_source_names

Indicates if the dictionary keys are in python representation or equally named to the StreamPipes backend

DEFAULT: True

RETURNS DESCRIPTION resource

The resource as dictionary representation

TYPE: Dict[str, Any]

"},{"location":"reference/model/resource/query_result/#streampipes.model.resource.query_result.QueryResult.to_pandas","title":"to_pandas()","text":"

Returns the data lake series in representation of a Pandas Dataframe.

RETURNS DESCRIPTION df

Pandas df containing the query result

TYPE: DataFrame

"},{"location":"reference/model/resource/resource/","title":"Resource","text":"

General and abstract implementation for a resource.

A resource defines the data model that is used by a resource container (model.container.resourceContainer).

"},{"location":"reference/model/resource/resource/#streampipes.model.resource.resource.Resource","title":"Resource","text":"

Bases: ABC, BasicModel

General and abstract implementation for a resource.

A resource defines the data model used by a resource container (model.container.resourceContainer). It inherits from Pydantic's BaseModel to get all its superpowers, which are used to parse, validate the API response and to easily switch between the Python representation (both serialized and deserialized) and Java representation (serialized only).

"},{"location":"reference/model/resource/resource/#streampipes.model.resource.resource.Resource.Config","title":"Config","text":"

Configuration class for Pydantic. Defines alias generator to convert field names from camelCase (API) to snake_case (Python codebase).

"},{"location":"reference/model/resource/resource/#streampipes.model.resource.resource.Resource.convert_to_pandas_representation","title":"convert_to_pandas_representation() abstractmethod","text":"

Returns a dictionary representation to be used when creating a pandas Dataframe.

RETURNS DESCRIPTION pandas_repr

Pandas representation of the resource as a dictionary, which is then used by the respource container to create a data frame from a collection of resources.

TYPE: Dict[str, Any]

"},{"location":"reference/model/resource/resource/#streampipes.model.resource.resource.Resource.to_dict","title":"to_dict(use_source_names=True)","text":"

Returns the resource in dictionary representation.

PARAMETER DESCRIPTION use_source_names

Indicates if the dictionary keys are in python representation or equally named to the StreamPipes backend

DEFAULT: True

RETURNS DESCRIPTION resource

The resource as dictionary representation

TYPE: Dict[str, Any]

"},{"location":"reference/model/resource/version/","title":"Version","text":""},{"location":"reference/model/resource/version/#streampipes.model.resource.version.Version","title":"Version","text":"

Bases: Resource

Metadata about the version of the connected StreamPipes server.

ATTRIBUTE DESCRIPTION backend_version

version of the StreamPipes backend the client is connected to

TYPE: str

"},{"location":"reference/model/resource/version/#streampipes.model.resource.version.Version.Config","title":"Config","text":"

Configuration class for Pydantic. Defines alias generator to convert field names from camelCase (API) to snake_case (Python codebase).

"},{"location":"reference/model/resource/version/#streampipes.model.resource.version.Version.convert_to_pandas_representation","title":"convert_to_pandas_representation()","text":"

Returns the dictionary representation of the version metadata to be used when creating a pandas Dataframe.

"},{"location":"reference/model/resource/version/#streampipes.model.resource.version.Version.to_dict","title":"to_dict(use_source_names=True)","text":"

Returns the resource in dictionary representation.

PARAMETER DESCRIPTION use_source_names

Indicates if the dictionary keys are in python representation or equally named to the StreamPipes backend

DEFAULT: True

RETURNS DESCRIPTION resource

The resource as dictionary representation

TYPE: Dict[str, Any]

"},{"location":"reference/model/resource/version/#streampipes.model.resource.version.Version.validate_backend_version","title":"validate_backend_version(backend_version)","text":"

Validates the backend version of the StreamPipes. Sets 'development' if none is returned since this the behavior of StreamPipes backend running in development mode.

"},{"location":"tutorials/1-introduction-to-streampipes-python-client/","title":"Introduction to StreamPipes Python","text":"In\u00a0[\u00a0]: Copied!
%pip install streampipes\n
%pip install streampipes

If you want to have the current development state you can also execute:

In\u00a0[\u00a0]: Copied!
%pip install git+https://github.com/apache/streampipes.git#subdirectory=streampipes-client-python\n
%pip install git+https://github.com/apache/streampipes.git#subdirectory=streampipes-client-python

The corresponding documentation can be found here.

In\u00a0[\u00a0]: Copied!
from streampipes.client import StreamPipesClient\nfrom streampipes.client.config import StreamPipesClientConfig\nfrom streampipes.client.credential_provider import StreamPipesApiKeyCredentials\n
from streampipes.client import StreamPipesClient from streampipes.client.config import StreamPipesClientConfig from streampipes.client.credential_provider import StreamPipesApiKeyCredentials In\u00a0[\u00a0]: Copied!
config = StreamPipesClientConfig(\n    credential_provider=StreamPipesApiKeyCredentials(\n        username=\"test@streampipes.apache.org\",\n        api_key=\"API-KEY\",\n    ),\n    host_address=\"localhost\",\n    https_disabled=True,\n    port=80\n)\n
config = StreamPipesClientConfig( credential_provider=StreamPipesApiKeyCredentials( username=\"test@streampipes.apache.org\", api_key=\"API-KEY\", ), host_address=\"localhost\", https_disabled=True, port=80 )

Please be aware that connecting to StreamPipes via a https connection is currently not supported by the Python client.

Providing secrets like the api_key as plaintext in the source code is an anti-pattern. This is why the StreamPipes client also supports passing the required secrets as environment variables. To do so, you must initialize the credential provider like the following:

In\u00a0[\u00a0]: Copied!
StreamPipesApiKeyCredentials()\n
StreamPipesApiKeyCredentials()

To ensure that the above code works, you must set the environment variables as expected. This can be done as follows:

In\u00a0[\u00a0]: Copied!
import os\nos.environ[\"SP_USERNAME\"] = \"admin@streampipes.apache.org\"\nos.environ[\"SP_API_KEY\"] = \"XXX\"\n
import os os.environ[\"SP_USERNAME\"] = \"admin@streampipes.apache.org\" os.environ[\"SP_API_KEY\"] = \"XXX\"

Having the config ready, we can now initialize the actual client.

In\u00a0[\u00a0]: Copied!
client = StreamPipesClient(client_config=config)\n
client = StreamPipesClient(client_config=config)

That's already it. You can check if everything works out by using the following command:

In\u00a0[6]: Copied!
client.describe()\n
client.describe()
2023-02-24 17:05:49,398 - streampipes.endpoint.endpoint - [INFO] - [endpoint.py:167] [_make_request] - Successfully retrieved all resources.\n2023-02-24 17:05:49,457 - streampipes.endpoint.endpoint - [INFO] - [endpoint.py:167] [_make_request] - Successfully retrieved all resources.\n\nHi there!\nYou are connected to a StreamPipes instance running at http://localhost:80.\nThe following StreamPipes resources are available with this client:\n1x DataLakeMeasures\n1x DataStreams\n

This prints you a short textual description of the connected StreamPipes instance to the console.

The created client instance serves as the central point of interaction with StreamPipes. You can invoke a variety of commands directly on this object.

Are you curious now how you actually can get data out of StreamPipes and make use of it with Python? Then check out the next tutorial on extracting Data from the StreamPipes data lake.

Thanks for reading this introductory tutorial. We hope you like it and would love to receive some feedback from you. Just go to our GitHub discussion page and let us know your impression. We'll read and react to them all, we promise!

"},{"location":"tutorials/1-introduction-to-streampipes-python-client/#Introduction-to-StreamPipes-Python","title":"Introduction to StreamPipes Python\u00b6","text":""},{"location":"tutorials/1-introduction-to-streampipes-python-client/#Why-there-is-an-extra-Python-library-for-StreamPipes?","title":"Why there is an extra Python library for StreamPipes?\u00b6","text":"

Apache StreamPipes aims to enable non-technical users to connect and analyze IoT data streams. To achieve this, it provides an easy-to-use and convenient user interface that allows one to connect to an IoT data source and create some visual graphs within a few minutes. While this is the primary use case for Apache StreamPipes, it also offers significant value to those interested in data analysis or data science with IoT data, without the need to handle the complexities of extracting data from devices in a suitable format. In this scenario, StreamPipes helps you connect to your data source and extract the data for you. You then can make the data available outside StreamPipes by writing it into an external source, such as a database, Kafka, etc. While this requires another component, you can also extract your data directly from StreamPipes programmatically using the StreamPipes API. For convenience, we also provide you with a StreamPipes client both available for Java and Python. Specifically with StreamPipes Python, we want to address the amazing data analytics and data science community in Python and benefit from the great universe of Python libraries out there.

"},{"location":"tutorials/1-introduction-to-streampipes-python-client/#How-to-install-StreamPipes-Python?","title":"How to install StreamPipes Python?\u00b6","text":"

Simply use the following pip command:

"},{"location":"tutorials/1-introduction-to-streampipes-python-client/#How-to-prepare-the-tutorials","title":"How to prepare the tutorials\u00b6","text":"

In case you want to reproduce the first two tutorials exactly on your end, you need to create a simple pipeline in StreamPipes like demonstrated below.

"},{"location":"tutorials/1-introduction-to-streampipes-python-client/#How-to-configure-the-Python-client","title":"How to configure the Python client\u00b6","text":"

In order to access the resources available in StreamPipes, one must be able to authenticate against the backend. For this purpose, the client so far only supports the authentication via an API token that can be generated via the StreamPipes UI, as you can see below.

Having generated the API token, one can directly start initializing a client instance as follows:

"},{"location":"tutorials/2-extracting-data-from-the-streampipes-data-lake/","title":"Extracting Data from the StreamPipes data lake","text":"In\u00a0[1]: Copied!
from streampipes.client import StreamPipesClient\nfrom streampipes.client.config import StreamPipesClientConfig\nfrom streampipes.client.credential_provider import StreamPipesApiKeyCredentials\n
from streampipes.client import StreamPipesClient from streampipes.client.config import StreamPipesClientConfig from streampipes.client.credential_provider import StreamPipesApiKeyCredentials In\u00a0[\u00a0]: Copied!
# if you want all necessary dependencies required for this tutorial to be installed,\n# you can simply execute the following command\n%pip install matplotlib streampipes\n
# if you want all necessary dependencies required for this tutorial to be installed, # you can simply execute the following command %pip install matplotlib streampipes In\u00a0[2]: Copied!
import os\nos.environ[\"SP_USERNAME\"] = \"admin@streampipes.apache.org\"\nos.environ[\"SP_API_KEY\"] = \"XXX\"\n
import os os.environ[\"SP_USERNAME\"] = \"admin@streampipes.apache.org\" os.environ[\"SP_API_KEY\"] = \"XXX\" In\u00a0[3]: Copied!
config = StreamPipesClientConfig(\n    credential_provider=StreamPipesApiKeyCredentials(),\n    host_address=\"localhost\",\n    https_disabled=True,\n    port=80\n)\n
config = StreamPipesClientConfig( credential_provider=StreamPipesApiKeyCredentials(), host_address=\"localhost\", https_disabled=True, port=80 ) In\u00a0[4]: Copied!
client = StreamPipesClient(client_config=config)\n
client = StreamPipesClient(client_config=config)
2023-02-24 17:34:25,860 - streampipes.client.client - [INFO] - [client.py:128] [_set_up_logging] - Logging successfully initialized with logging level INFO.\n

As a first step, we want to get an overview about all data available in the data lake. The data is stored as so-called measures, which refer to a data stream stored in the data lake. For his purpose we use the all() method of the dataLakeMeasure endpoint.

In\u00a0[5]: Copied!
data_lake_measures = client.dataLakeMeasureApi.all()\n
data_lake_measures = client.dataLakeMeasureApi.all()
2023-02-24 17:34:25,929 - streampipes.endpoint.endpoint - [INFO] - [endpoint.py:167] [_make_request] - Successfully retrieved all resources.\n

So let's see how many measures are available:

In\u00a0[6]: Copied!
len(data_lake_measures)\n
len(data_lake_measures) Out[6]:
2

All resources of the StreamPipes Python client support the standard Python expressions. If not, please let us know.

In\u00a0[7]: Copied!
data_lake_measures[-1]\n
data_lake_measures[-1] Out[7]:
DataLakeMeasure(element_id='3cb6b5e6f107452483d1fd2ccf4bf9f9', measure_name='test', timestamp_field='s0::timestamp', event_schema=EventSchema(event_properties=[EventProperty(class_name='org.apache.streampipes.model.schema.EventPropertyPrimitive', element_id='sp:eventproperty:EiFnkL', label='Density', description='Denotes the current density of the fluid', runtime_name='density', required=False, domain_properties=['http://schema.org/Number'], property_scope='MEASUREMENT_PROPERTY', index=5, runtime_id=None, runtime_type='http://www.w3.org/2001/XMLSchema#float', measurement_unit=None, value_specification=None), EventProperty(class_name='org.apache.streampipes.model.schema.EventPropertyPrimitive', element_id='sp:eventproperty:ghSkQI', label='Mass Flow', description='Denotes the current mass flow in the sensor', runtime_name='mass_flow', required=False, domain_properties=['http://schema.org/Number'], property_scope='MEASUREMENT_PROPERTY', index=2, runtime_id=None, runtime_type='http://www.w3.org/2001/XMLSchema#float', measurement_unit=None, value_specification=None), EventProperty(class_name='org.apache.streampipes.model.schema.EventPropertyPrimitive', element_id='sp:eventproperty:cQAUry', label='Sensor ID', description='The ID of the sensor', runtime_name='sensorId', required=False, domain_properties=['https://streampipes.org/vocabulary/examples/watertank/v1/hasSensorId'], property_scope='DIMENSION_PROPERTY', index=1, runtime_id=None, runtime_type='http://www.w3.org/2001/XMLSchema#string', measurement_unit=None, value_specification=None), EventProperty(class_name='org.apache.streampipes.model.schema.EventPropertyPrimitive', element_id='sp:eventproperty:pbPMyL', label='Sensor Fault Flags', description='Any fault flags of the sensors', runtime_name='sensor_fault_flags', required=False, domain_properties=['http://schema.org/Boolean'], property_scope='MEASUREMENT_PROPERTY', index=6, runtime_id=None, runtime_type='http://www.w3.org/2001/XMLSchema#boolean', measurement_unit=None, value_specification=None), EventProperty(class_name='org.apache.streampipes.model.schema.EventPropertyPrimitive', element_id='sp:eventproperty:Qmayhw', label='Temperature', description='Denotes the current temperature in degrees celsius', runtime_name='temperature', required=False, domain_properties=['http://schema.org/Number'], property_scope='MEASUREMENT_PROPERTY', index=4, runtime_id=None, runtime_type='http://www.w3.org/2001/XMLSchema#float', measurement_unit='http://qudt.org/vocab/unit#DegreeCelsius', value_specification=ValueSpecification(class_name='org.apache.streampipes.model.schema.QuantitativeValue', element_id=None, min_value=0, max_value=100, step=0.1)), EventProperty(class_name='org.apache.streampipes.model.schema.EventPropertyPrimitive', element_id='sp:eventproperty:YQYhjd', label='Volume Flow', description='Denotes the current volume flow', runtime_name='volume_flow', required=False, domain_properties=['http://schema.org/Number'], property_scope='MEASUREMENT_PROPERTY', index=3, runtime_id=None, runtime_type='http://www.w3.org/2001/XMLSchema#float', measurement_unit=None, value_specification=None)]), pipeline_id=None, pipeline_name=None, pipeline_is_running=False, schema_version='1.1')

To get a more comprehensive overview, you can take a look at the pandas representation:

In\u00a0[8]: Copied!
display(data_lake_measures.to_pandas())\n
display(data_lake_measures.to_pandas()) measure_name timestamp_field pipeline_id pipeline_name pipeline_is_running num_event_properties 0 flow-rate s0::timestamp None None False 6 1 test s0::timestamp None None False 6

So far, we have only retrieved metadata about the available data lake measure. In the following, we will access the actual data of the measure flow-rate.

For this purpose, we will use the get() method of the dataLakeMeasure endpoint.

In\u00a0[9]: Copied!
flow_rate_measure = client.dataLakeMeasureApi.get(identifier=\"flow-rate\")\n
flow_rate_measure = client.dataLakeMeasureApi.get(identifier=\"flow-rate\")
2023-02-24 17:34:26,020 - streampipes.endpoint.endpoint - [INFO] - [endpoint.py:167] [_make_request] - Successfully retrieved all resources.\n

For further processing, the easiest way is to turn the data measure into a pandas DataFrame.

In\u00a0[10]: Copied!
flow_rate_pd = flow_rate_measure.to_pandas()\n
flow_rate_pd = flow_rate_measure.to_pandas()

Let's see how many data points we got...

In\u00a0[11]: Copied!
len(flow_rate_pd)\n
len(flow_rate_pd) Out[11]:
1000

... and get a first overview

In\u00a0[12]: Copied!
flow_rate_pd.describe()\n
flow_rate_pd.describe() Out[12]: density mass_flow temperature volume_flow count 1000.000000 1000.000000 1000.000000 1000.000000 mean 45.560337 5.457014 45.480231 5.659558 std 3.201544 3.184959 3.132878 3.122437 min 40.007698 0.004867 40.000992 0.039422 25% 42.819497 2.654101 42.754623 3.021625 50% 45.679264 5.382355 45.435944 5.572553 75% 48.206881 8.183144 48.248473 8.338209 max 50.998310 10.986015 50.964909 10.998676

As a final step, we want to create a plot of both attributes.

In\u00a0[13]: Copied!
import matplotlib.pyplot as plt\nflow_rate_pd.plot(y=[\"mass_flow\", \"temperature\"])\nplt.show()\n
import matplotlib.pyplot as plt flow_rate_pd.plot(y=[\"mass_flow\", \"temperature\"]) plt.show()

For data lake measurements, the get() method is even more powerful than simply returning all the data for a given data lake measurement. We will look at a selection of these below. The full list of supported parameters can be found in the docs. Let's start by referring to the graph we created above, where we use only two columns of our data lake measurement. If we already know this, we can directly restrict the queried data to a subset of columns by using the columns parameter. columns takes a list of column names as a comma-separated string:

In\u00a0[14]: Copied!
flow_rate_pd = client.dataLakeMeasureApi.get(identifier=\"flow-rate\", columns=\"mass_flow,temperature\").to_pandas()\nflow_rate_pd\n
flow_rate_pd = client.dataLakeMeasureApi.get(identifier=\"flow-rate\", columns=\"mass_flow,temperature\").to_pandas() flow_rate_pd
2023-02-24 17:34:26,492 - streampipes.endpoint.endpoint - [INFO] - [endpoint.py:167] [_make_request] - Successfully retrieved all resources.\n
Out[14]: timestamp mass_flow temperature 0 2023-02-24T16:19:41.472Z 3.309556 44.448483 1 2023-02-24T16:19:41.482Z 5.608580 40.322033 2 2023-02-24T16:19:41.493Z 7.692881 49.239639 3 2023-02-24T16:19:41.503Z 3.632898 49.933754 4 2023-02-24T16:19:41.513Z 0.711260 50.106617 ... ... ... ... 995 2023-02-24T16:19:52.927Z 1.740114 46.558231 996 2023-02-24T16:19:52.94Z 7.211723 48.048622 997 2023-02-24T16:19:52.952Z 7.770180 48.188026 998 2023-02-24T16:19:52.965Z 4.458602 48.280899 999 2023-02-24T16:19:52.977Z 2.592060 47.505951

1000 rows \u00d7 3 columns

By default, the client returns only the first one thousand records of a Data Lake measurement. This can be changed by passing a concrete value for the limit parameter:

In\u00a0[15]: Copied!
flow_rate_pd = client.dataLakeMeasureApi.get(identifier=\"flow-rate\", limit=10000).to_pandas()\nlen(flow_rate_pd)\n
flow_rate_pd = client.dataLakeMeasureApi.get(identifier=\"flow-rate\", limit=10000).to_pandas() len(flow_rate_pd)
2023-02-24 17:34:26,736 - streampipes.endpoint.endpoint - [INFO] - [endpoint.py:167] [_make_request] - Successfully retrieved all resources.\n
Out[15]:
9528

If you want your data to be selected by time of occurrence rather than quantity, you can specify your time window by passing the start_date and end_date parameters:

In\u00a0[16]: Copied!
from datetime import datetime\nflow_rate_pd = client.dataLakeMeasureApi.get(\n    identifier=\"flow-rate\",\n    start_date=datetime(year=2023, month=2, day=24, hour=17, minute=21, second=0),\n    end_date=datetime(year=2023, month=2, day=24, hour=17, minute=21, second=1),\n    ).to_pandas()\nflow_rate_pd.plot(y=[\"mass_flow\", \"temperature\"])\nplt.show()\n
from datetime import datetime flow_rate_pd = client.dataLakeMeasureApi.get( identifier=\"flow-rate\", start_date=datetime(year=2023, month=2, day=24, hour=17, minute=21, second=0), end_date=datetime(year=2023, month=2, day=24, hour=17, minute=21, second=1), ).to_pandas() flow_rate_pd.plot(y=[\"mass_flow\", \"temperature\"]) plt.show()
2023-02-24 17:34:26,899 - streampipes.endpoint.endpoint - [INFO] - [endpoint.py:167] [_make_request] - Successfully retrieved all resources.\n

... from this point on, we leave all future processing of the data up to your creativity. Keep in mind: the general syntax used in this tutorial (all(), to_pandas(), get()) applies to all endpoints and associated resources of the StreamPipes Python client.

If you get further and create exciting stuff with data extracted from StreamPipes please let us know. We are thrilled to see what you as a community will build with the provided client. Furthermore, don't hesitate to discuss feature requests to extend the current functionality with us.

For now, that's all about the StreamPipes client. Read the next tutorial (Getting live data from the StreamPipes data stream) if you are interested in making use of the powerful StreamPipes functions to interact with StreamPipes in an event-based manner.

How do you like this tutorial? We hope you like it and would love to receive some feedback from you. Just go to our GitHub discussion page and let us know your impression. We'll read and react to them all, we promise!

"},{"location":"tutorials/2-extracting-data-from-the-streampipes-data-lake/#Extracting-Data-from-the-StreamPipes-data-lake","title":"Extracting Data from the StreamPipes data lake\u00b6","text":"

In the first tutorial (Introduction to the StreamPipes Python client) we took the first steps with the StreamPipes Python client and learned how to set everything up. Now we are ready to get started and want to retrieve some data out of StreamPipes. In this tutorial, we'll focus on the StreamPipes Data Lake, the component where StreamPipes stores data internally. To get started, we'll use the client instance created in the first tutorial.

"},{"location":"tutorials/3-getting-live-data-from-the-streampipes-data-stream/","title":"Getting live data from the StreamPipes data stream","text":"

Note As of now we mainly developed the support for StreamPipes functions using NATS as messaging protocol. Consequently, this setup is tested most and should work flawlessly. Visit our first-steps page to see how to start StreamPipes accordingly. Anyhow, you can also use the other brokers that are currently supported in StreamPipes Python. In case you observe any problems, please reach out to us and file us an issue on GitHub.

In\u00a0[1]: Copied!
from streampipes.client import StreamPipesClient\nfrom streampipes.client.config import StreamPipesClientConfig\nfrom streampipes.client.credential_provider import StreamPipesApiKeyCredentials\n
from streampipes.client import StreamPipesClient from streampipes.client.config import StreamPipesClientConfig from streampipes.client.credential_provider import StreamPipesApiKeyCredentials In\u00a0[\u00a0]: Copied!
# You can install all required libraries for this tutorial with the following command\n%pip install matplotlib ipython streampipes\n
# You can install all required libraries for this tutorial with the following command %pip install matplotlib ipython streampipes In\u00a0[2]: Copied!
import os\n\nos.environ[\"SP_USERNAME\"] = \"admin@streampipes.apache.org\"\nos.environ[\"SP_API_KEY\"] = \"XXX\"\n\n# Use this if you work locally:\nos.environ[\"BROKER-HOST\"] = \"localhost\"  \nos.environ[\"KAFKA-PORT\"] = \"9094\" # When using Kafka as message broker. If Kafka is not running on localhost, KAFKA_ADVERTISED_LISTENERS should be adjusted to the external address\n
import os os.environ[\"SP_USERNAME\"] = \"admin@streampipes.apache.org\" os.environ[\"SP_API_KEY\"] = \"XXX\" # Use this if you work locally: os.environ[\"BROKER-HOST\"] = \"localhost\" os.environ[\"KAFKA-PORT\"] = \"9094\" # When using Kafka as message broker. If Kafka is not running on localhost, KAFKA_ADVERTISED_LISTENERS should be adjusted to the external address In\u00a0[3]: Copied!
client_config = StreamPipesClientConfig(\n    credential_provider=StreamPipesApiKeyCredentials(),\n    host_address=\"localhost\",\n    port=80,\n    https_disabled=True,\n)\nclient = StreamPipesClient(client_config=client_config)\n
client_config = StreamPipesClientConfig( credential_provider=StreamPipesApiKeyCredentials(), host_address=\"localhost\", port=80, https_disabled=True, ) client = StreamPipesClient(client_config=client_config)
2022-12-14 10:43:37,664 - streampipes.client.client - [INFO] - [client.py:127] [_set_up_logging] - Logging successfully initialized with logging level INFO.\n

Now we can have a look at the available data streams. We can choose one or more stream to receive the data from and copy their element_id.

In\u00a0[4]: Copied!
client.dataStreamApi.all().to_pandas()\n
client.dataStreamApi.all().to_pandas()
2022-12-14 10:43:39,944 - streampipes.endpoint.endpoint - [INFO] - [endpoint.py:153] [_make_request] - Successfully retrieved all resources.\n
Out[4]: element_id name description icon_url app_id includes_assets includes_locales internally_managed measurement_object index ... uri dom num_transport_protocols num_measurement_capability num_application_links num_included_assets num_connected_to num_category num_event_properties num_included_locales 0 urn:streampipes.apache.org:eventstream:HHoidJ Test2 None None False False True None 0 ... urn:streampipes.apache.org:eventstream:HHoidJ None 1 0 0 0 0 0 7 0 1 urn:streampipes.apache.org:eventstream:uPDKLI Test None None False False True None 0 ... urn:streampipes.apache.org:eventstream:uPDKLI None 1 0 0 0 0 0 7 0

2 rows \u00d7 21 columns

Next we can create a StreamPipesFunction. For this we need to implement the 3 following methods:

  • onServiceStarted is called when the function gets started. There you can use the given meta information of the FunctionContext to initialize the function.
  • onEvent is called when ever a new event arrives. The event contains the live data and you can use the streamId to identify a stream if the function is connected to multiple data streams.
  • onServiceStopped is called when the function gets stopped.

For this tutorial we just create a function that saves every new event in a pandas DataFrame and plots the first column of the DataFrame when the function gets stopped.

(If you want to use the same structure as in Java you can overwrite the getFunctionId and requiredStreamIds methods instead of using the FunctionDefinition)

In\u00a0[5]: Copied!
from typing import Dict, Any\nimport pandas as pd\nfrom datetime import datetime\nimport matplotlib.pyplot as plt\nfrom streampipes.functions.function_handler import FunctionHandler\nfrom streampipes.functions.registration import Registration\nfrom streampipes.functions.streampipes_function import StreamPipesFunction\nfrom streampipes.functions.utils.function_context import FunctionContext\nfrom streampipes.model.resource.function_definition import FunctionDefinition, FunctionId\n\nclass ExampleFunction(StreamPipesFunction):\n    def __init__(self, function_definition: FunctionDefinition) -> None:\n        super().__init__(function_definition)\n        # Create the Dataframe to save the live data\n        self.df = pd.DataFrame()\n\n    def onServiceStarted(self, context: FunctionContext):\n        # Get the name of the timestamp field\n        for event_property in context.schema[context.streams[0]].event_schema.event_properties:\n            if event_property.property_scope == \"HEADER_PROPERTY\":\n                self.timestamp = event_property.runtime_name\n\n    def onEvent(self, event: Dict[str, Any], streamId: str):\n        # Convert the unix timestamp to datetime\n        event[self.timestamp] = datetime.fromtimestamp(event[self.timestamp] / 1000)\n        # Add every value of the event to the DataFrame\n        self.df = pd.concat(\n            [self.df, pd.DataFrame({key: [event[key]] for key in event.keys()}).set_index(self.timestamp)]\n        )\n\n    def onServiceStopped(self):\n        # Plot the first column of the Dataframe\n        plt.figure(figsize=(10, 5))\n        plt.xlabel(self.timestamp)\n        plt.ylabel(self.df.columns[0])\n        plt.plot(self.df.iloc[:, 0])\n        plt.show()\n
from typing import Dict, Any import pandas as pd from datetime import datetime import matplotlib.pyplot as plt from streampipes.functions.function_handler import FunctionHandler from streampipes.functions.registration import Registration from streampipes.functions.streampipes_function import StreamPipesFunction from streampipes.functions.utils.function_context import FunctionContext from streampipes.model.resource.function_definition import FunctionDefinition, FunctionId class ExampleFunction(StreamPipesFunction): def __init__(self, function_definition: FunctionDefinition) -> None: super().__init__(function_definition) # Create the Dataframe to save the live data self.df = pd.DataFrame() def onServiceStarted(self, context: FunctionContext): # Get the name of the timestamp field for event_property in context.schema[context.streams[0]].event_schema.event_properties: if event_property.property_scope == \"HEADER_PROPERTY\": self.timestamp = event_property.runtime_name def onEvent(self, event: Dict[str, Any], streamId: str): # Convert the unix timestamp to datetime event[self.timestamp] = datetime.fromtimestamp(event[self.timestamp] / 1000) # Add every value of the event to the DataFrame self.df = pd.concat( [self.df, pd.DataFrame({key: [event[key]] for key in event.keys()}).set_index(self.timestamp)] ) def onServiceStopped(self): # Plot the first column of the Dataframe plt.figure(figsize=(10, 5)) plt.xlabel(self.timestamp) plt.ylabel(self.df.columns[0]) plt.plot(self.df.iloc[:, 0]) plt.show()

Now we can start the function. First we create an instance of the ExampleFunction and insert the element_id of the stream which data we want to consume. Then we have to register this function and we can start all functions by initializing the FunctionHandler. (it's also possible to register multiple functions with .register(...).register(...))

In\u00a0[6]: Copied!
example_function = ExampleFunction(\n    FunctionDefinition(\n        function_id=FunctionId(id=\"example-function\"),\n        consumed_streams=[\"urn:streampipes.apache.org:eventstream:uPDKLI\"]\n    )\n)\n\nregistration = Registration()\nregistration.register(example_function)\n\nfunction_handler = FunctionHandler(registration, client)\nfunction_handler.initializeFunctions()\n
example_function = ExampleFunction( FunctionDefinition( function_id=FunctionId(id=\"example-function\"), consumed_streams=[\"urn:streampipes.apache.org:eventstream:uPDKLI\"] ) ) registration = Registration() registration.register(example_function) function_handler = FunctionHandler(registration, client) function_handler.initializeFunctions()
2022-12-14 10:43:42,810 - streampipes.endpoint.endpoint - [INFO] - [endpoint.py:153] [_make_request] - Successfully retrieved all resources.\n2022-12-14 10:43:42,812 - streampipes.functions.function_handler - [INFO] - [function_handler.py:82] [initializeFunctions] - Using NatsBroker for <__main__.ExampleFunction object at 0x000001F2EF298D90>\n

The DataFrame of the ExampleFunction gets updated whenever a new event arrives. Let's test this by executing the cell below.

In\u00a0[7]: Copied!
import asyncio\nfrom asyncio.exceptions import CancelledError\nfrom IPython.display import clear_output\n\nwhile True:\n    clear_output(wait=True)\n    display(example_function.df)\n    try:\n        await asyncio.sleep(1)\n    except CancelledError:\n        break\n
import asyncio from asyncio.exceptions import CancelledError from IPython.display import clear_output while True: clear_output(wait=True) display(example_function.df) try: await asyncio.sleep(1) except CancelledError: break mass_flow density volume_flow sensor_fault_flags temperature sensorId timestamp 2022-12-14 10:43:43.357 10.955496 47.546290 1.001985 False 44.993413 flowrate02 2022-12-14 10:43:44.371 6.499040 44.392069 2.034402 False 41.232352 flowrate02 2022-12-14 10:43:45.382 10.168300 41.192146 9.724287 False 46.812779 flowrate02 2022-12-14 10:43:46.395 10.849059 50.086308 5.832691 False 45.860412 flowrate02 2022-12-14 10:43:47.410 3.081855 47.254246 8.860531 False 50.505801 flowrate02 ... ... ... ... ... ... ... 2022-12-14 10:44:43.920 1.803572 41.978894 10.294002 False 47.820239 flowrate02 2022-12-14 10:44:44.932 1.967062 42.212883 3.237440 False 49.047258 flowrate02 2022-12-14 10:44:45.934 4.457819 47.561256 0.315024 False 40.223413 flowrate02 2022-12-14 10:44:46.949 8.745343 46.346891 7.439090 False 41.982529 flowrate02 2022-12-14 10:44:47.950 5.828744 47.679720 6.307405 False 42.100354 flowrate02

65 rows \u00d7 6 columns

The while loop just displays the the DataFrame every second until the cell is stopped. We could achieve the same result manually by executing example_function.df repeatedly.

You can stop the functions whenever you want by executing the command below.

In\u00a0[\u00a0]: Copied!
function_handler.disconnect()\n
function_handler.disconnect()
2022-12-14 10:44:53,309 - streampipes.functions.broker.nats_broker - [INFO] - [nats_broker.py:67] [disconnect] - Stopped connection to stream: urn:streampipes.apache.org:eventstream:uPDKLI\n

That's enough for this tutorial. Now you can try to write your own StreamPipesFunction. All you need to do is creating a new class, implementing the 4 required methods and registering the function.

Want to see more exciting use cases you can achieve with StreamPipes functions in Python? Then don't hesitate and jump to our next tutorial on applying online machine learning algorithms to StreamPipes data streams with River.

How do you like this tutorial? We hope you like it and would love to receive some feedback from you. Just go to our GitHub discussion page and let us know your impression. We'll read and react to them all, we promise!

"},{"location":"tutorials/3-getting-live-data-from-the-streampipes-data-stream/#Getting-live-data-from-the-StreamPipes-data-stream","title":"Getting live data from the StreamPipes data stream\u00b6","text":"

In the last tutorial (Extracting Data from the StreamPipes data lake) we learned how to extract the stored data from a StreamPipes data lake. This tutorial is about the StreamPipes data stream and shows how to get the live data from StreamPipes into Python. Therefore, we first create the client instance as before.

"},{"location":"tutorials/4-using-online-machine-learning-on-a-streampipes-data-stream/","title":"Using Online Machine Learning on a StreamPipes data stream","text":"In\u00a0[1]: Copied!
from streampipes.client import StreamPipesClient\nfrom streampipes.client.config import StreamPipesClientConfig\nfrom streampipes.client.credential_provider import StreamPipesApiKeyCredentials\n
from streampipes.client import StreamPipesClient from streampipes.client.config import StreamPipesClientConfig from streampipes.client.credential_provider import StreamPipesApiKeyCredentials In\u00a0[\u00a0]: Copied!
# you can install all required dependencies for this tutorial by executing the following command\n%pip install river streampipes\n
# you can install all required dependencies for this tutorial by executing the following command %pip install river streampipes In\u00a0[2]: Copied!
import os\n\nos.environ[\"SP_USERNAME\"] = \"admin@streampipes.apache.org\"\nos.environ[\"SP_API_KEY\"] = \"XXX\"\n\n# Use this if you work locally:\nos.environ[\"BROKER-HOST\"] = \"localhost\"  \nos.environ[\"KAFKA-PORT\"] = \"9094\" # When using Kafka as message broker\n
import os os.environ[\"SP_USERNAME\"] = \"admin@streampipes.apache.org\" os.environ[\"SP_API_KEY\"] = \"XXX\" # Use this if you work locally: os.environ[\"BROKER-HOST\"] = \"localhost\" os.environ[\"KAFKA-PORT\"] = \"9094\" # When using Kafka as message broker In\u00a0[3]: Copied!
client_config = StreamPipesClientConfig(\n    credential_provider=StreamPipesApiKeyCredentials(),\n    host_address=\"localhost\",\n    port=80,\n    https_disabled=True,\n)\nclient = StreamPipesClient(client_config=client_config)\n
client_config = StreamPipesClientConfig( credential_provider=StreamPipesApiKeyCredentials(), host_address=\"localhost\", port=80, https_disabled=True, ) client = StreamPipesClient(client_config=client_config)
2023-01-27 16:04:24,784 - streampipes.client.client - [INFO] - [client.py:128] [_set_up_logging] - Logging successfully initialized with logging level INFO.\n
In\u00a0[4]: Copied!
client.dataStreamApi.all().to_pandas()\n
client.dataStreamApi.all().to_pandas()
2023-01-27 16:04:28,212 - streampipes.endpoint.endpoint - [INFO] - [endpoint.py:163] [_make_request] - Successfully retrieved all resources.\n
Out[4]: element_id name description icon_url app_id includes_assets includes_locales internally_managed measurement_object index ... dom rev num_transport_protocols num_measurement_capability num_application_links num_included_assets num_connected_to num_category num_event_properties num_included_locales 0 sp:spdatastream:xboBFK Test None None False False True None 0 ... None 5-558c861debc745e1ebae29a266a8bdb9 1 0 0 0 0 0 7 0 1 urn:streampipes.apache.org:eventstream:Wgyrse Test File None None False False True None 0 ... None 4-66548b6b84287011b7cec0876ef82baf 1 0 0 0 0 0 2 0

2 rows \u00d7 22 columns

In\u00a0[5]: Copied!
from river import cluster, compose, preprocessing\nfrom streampipes.function_zoo.river_function import OnlineML\nfrom streampipes.functions.utils.data_stream_generator import RuntimeType\n\nk_means = compose.Pipeline(\n    (\"drop_features\", compose.Discard(\"sensorId\", \"timestamp\")),\n    (\"scale\", preprocessing.StandardScaler()),\n    (\"k_means\", cluster.KMeans(n_clusters=2)),\n)\n\nclustering = OnlineML(\n    client=client, stream_ids=[\"sp:spdatastream:xboBFK\"], model=k_means, prediction_type=RuntimeType.INTEGER.value\n)\nclustering.start()\n
from river import cluster, compose, preprocessing from streampipes.function_zoo.river_function import OnlineML from streampipes.functions.utils.data_stream_generator import RuntimeType k_means = compose.Pipeline( (\"drop_features\", compose.Discard(\"sensorId\", \"timestamp\")), (\"scale\", preprocessing.StandardScaler()), (\"k_means\", cluster.KMeans(n_clusters=2)), ) clustering = OnlineML( client=client, stream_ids=[\"sp:spdatastream:xboBFK\"], model=k_means, prediction_type=RuntimeType.INTEGER.value ) clustering.start()
2023-01-27 16:04:35,599 - streampipes.endpoint.endpoint - [INFO] - [endpoint.py:163] [_make_request] - Successfully retrieved all resources.\n2023-01-27 16:04:35,599 - streampipes.functions.function_handler - [INFO] - [function_handler.py:64] [initializeFunctions] - Create output data stream \"sp:spdatastream:cwKPoo\" for the function \"65cf8b86-bcdf-433e-a1c7-3e920eab55d0\"\n2023-01-27 16:04:37,766 - streampipes.endpoint.endpoint - [INFO] - [endpoint.py:163] [_make_request] - Successfully retrieved all resources.\n2023-01-27 16:04:37,767 - streampipes.functions.function_handler - [INFO] - [function_handler.py:78] [initializeFunctions] - Using NatsBroker for RiverFunction\n
2023-01-27 16:04:37,791 - streampipes.functions.broker.nats_broker - [INFO] - [nats_broker.py:48] [_makeConnection] - Connected to NATS at localhost:4222\n2023-01-27 16:04:37,791 - streampipes.functions.broker.nats_broker - [INFO] - [nats_broker.py:48] [_makeConnection] - Connected to NATS at localhost:4222\n2023-01-27 16:04:37,792 - streampipes.functions.broker.nats_broker - [INFO] - [nats_broker.py:58] [createSubscription] - Subscribed to stream: sp:spdatastream:xboBFK\n
In\u00a0[6]: Copied!
clustering.set_learning(False)\n
clustering.set_learning(False) In\u00a0[\u00a0]: Copied!
clustering.stop()\n
clustering.stop()
2023-01-27 16:04:57,303 - streampipes.functions.broker.nats_broker - [INFO] - [nats_broker.py:82] [disconnect] - Stopped connection to stream: sp:spdatastream:xboBFK\n2023-01-27 16:04:57,304 - streampipes.functions.broker.nats_broker - [INFO] - [nats_broker.py:82] [disconnect] - Stopped connection to stream: sp:spdatastream:cwKPoo\n
In\u00a0[\u00a0]: Copied!
import pickle\nfrom river import compose, tree\nfrom streampipes.function_zoo.river_function import OnlineML\nfrom streampipes.functions.utils.data_stream_generator import RuntimeType\n\nhoeffding_tree = compose.Pipeline(\n    (\"drop_features\", compose.Discard(\"sensorId\", \"timestamp\")),\n    (\"hoeffding_tree\", tree.HoeffdingTreeRegressor(grace_period=5)),\n)\n\n\ndef draw_tree(self, event, streamId):\n\"\"\"Draw the tree and save the image.\"\"\"\n    if self.learning:\n        if self.model[1].n_nodes != None:\n            self.model[1].draw().render(\"hoeffding_tree\", format=\"png\", cleanup=True)\n\n\ndef save_model(self):\n\"\"\"Save the trained model.\"\"\"\n    with open(\"hoeffding_tree.pkl\", \"wb\") as f:\n        pickle.dump(self.model, f)\n\n\nregressor = OnlineML(\n    client=client,\n    stream_ids=[\"sp:spdatastream:xboBFK\"],\n    model=hoeffding_tree,\n    prediction_type=RuntimeType.FLOAT.value,\n    supervised=True,\n    target_label=\"temperature\",\n    on_event=draw_tree,\n    on_stop=save_model,\n)\nregressor.start()\n
import pickle from river import compose, tree from streampipes.function_zoo.river_function import OnlineML from streampipes.functions.utils.data_stream_generator import RuntimeType hoeffding_tree = compose.Pipeline( (\"drop_features\", compose.Discard(\"sensorId\", \"timestamp\")), (\"hoeffding_tree\", tree.HoeffdingTreeRegressor(grace_period=5)), ) def draw_tree(self, event, streamId): \"\"\"Draw the tree and save the image.\"\"\" if self.learning: if self.model[1].n_nodes != None: self.model[1].draw().render(\"hoeffding_tree\", format=\"png\", cleanup=True) def save_model(self): \"\"\"Save the trained model.\"\"\" with open(\"hoeffding_tree.pkl\", \"wb\") as f: pickle.dump(self.model, f) regressor = OnlineML( client=client, stream_ids=[\"sp:spdatastream:xboBFK\"], model=hoeffding_tree, prediction_type=RuntimeType.FLOAT.value, supervised=True, target_label=\"temperature\", on_event=draw_tree, on_stop=save_model, ) regressor.start() In\u00a0[9]: Copied!
regressor.set_learning(False)\n
regressor.set_learning(False) In\u00a0[\u00a0]: Copied!
regressor.stop()\n
regressor.stop() In\u00a0[\u00a0]: Copied!
import pickle\nfrom river import compose, tree\nfrom streampipes.function_zoo.river_function import OnlineML\nfrom streampipes.functions.utils.data_stream_generator import RuntimeType\n\ndecision_tree = compose.Pipeline(\n    (\"drop_features\", compose.Discard(\"sensorId\", \"timestamp\")),\n    (\"decision_tree\", tree.ExtremelyFastDecisionTreeClassifier(grace_period=5)),\n)\n\n\ndef draw_tree(self, event, streamId):\n\"\"\"Draw the tree and save the image.\"\"\"\n    if self.learning:\n        if self.model[1].n_nodes != None:\n            self.model[1].draw().render(\"decicion_tree\", format=\"png\", cleanup=True)\n\n\ndef save_model(self):\n\"\"\"Save the trained model.\"\"\"\n    with open(\"decision_tree.pkl\", \"wb\") as f:\n        pickle.dump(self.model, f)\n\n\nclassifier = OnlineML(\n    client=client,\n    stream_ids=[\"sp:spdatastream:xboBFK\"],\n    model=decision_tree,\n    prediction_type=RuntimeType.BOOLEAN.value,\n    supervised=True,\n    target_label=\"sensor_fault_flags\",\n    on_event=draw_tree,\n    on_stop=save_model,\n)\nclassifier.start()\n
import pickle from river import compose, tree from streampipes.function_zoo.river_function import OnlineML from streampipes.functions.utils.data_stream_generator import RuntimeType decision_tree = compose.Pipeline( (\"drop_features\", compose.Discard(\"sensorId\", \"timestamp\")), (\"decision_tree\", tree.ExtremelyFastDecisionTreeClassifier(grace_period=5)), ) def draw_tree(self, event, streamId): \"\"\"Draw the tree and save the image.\"\"\" if self.learning: if self.model[1].n_nodes != None: self.model[1].draw().render(\"decicion_tree\", format=\"png\", cleanup=True) def save_model(self): \"\"\"Save the trained model.\"\"\" with open(\"decision_tree.pkl\", \"wb\") as f: pickle.dump(self.model, f) classifier = OnlineML( client=client, stream_ids=[\"sp:spdatastream:xboBFK\"], model=decision_tree, prediction_type=RuntimeType.BOOLEAN.value, supervised=True, target_label=\"sensor_fault_flags\", on_event=draw_tree, on_stop=save_model, ) classifier.start() In\u00a0[12]: Copied!
classifier.set_learning(False)\n
classifier.set_learning(False) In\u00a0[\u00a0]: Copied!
classifier.stop()\n
classifier.stop()

That's already it! Isn't it truly easy to apply Online ML with StreamPipes and River? Please go ahead and apply it to your own use cases. We would be happy to hear about them!

Want to see more exciting use cases you can achieve with StreamPipes functions in Python? Then don\u2019t hesitate and jump to our next tutorial on using interoperable machine learning algorithm models with StreamPipes Python and ONNX.

How do you like this tutorial? We hope you like it and would love to receive some feedback from you. Just go to our GitHub discussion page and let us know your impression. We'll read and react to them all, we promise!

"},{"location":"tutorials/4-using-online-machine-learning-on-a-streampipes-data-stream/#Using-Online-Machine-Learning-on-a-StreamPipes-data-stream","title":"Using Online Machine Learning on a StreamPipes data stream\u00b6","text":"

The last tutorial (Getting live data from the StreamPipes data stream) showed how we can connect to a data stream, and it would be possible to use Online Machine Learning with this approach and train a model with the incoming events at the onEvent method. However, the StreamPipes client also provides an easier way to do this with the use of the River library for Online Machine Learning. We will have a look at this now.

"},{"location":"tutorials/4-using-online-machine-learning-on-a-streampipes-data-stream/#How-to-use-Online-Machine-Learning-with-StreamPipes","title":"How to use Online Machine Learning with StreamPipes\u00b6","text":"

After we configured the client as usual, we can start with the new part. The approach is straight forward and you can start with the ML part in just 3 steps:

  1. Create a pipeline with River and insert the preprocessing steps and model of your choice.
  2. Configure the OnlineML wrapper to fit to your model and insert the client and required data stream ids.
  3. Start the wrapper and let the learning begin.

A StreamPipesFunction is then started, which trains the model for each new event. It also creates an output data stream which will send the prediction of the model back to StreamPipes. This output stream can be seen when creating a new pipeline and can be used like every other data source. So you can use it in a pipeline and save the predictions in a Data Lake. You can also stop and start the training with the method set_learning. To stop the whole function use the stop methode and if you want to delete the output stream entirely, you can go to the Pipeline Element Installer in StreamPipes and uninstall it.

Now let's take a look at some examples. If you want to execute the examples below you have to create an adapter for the Machine Data Simulator, select the flowrate sensor and insert the stream id of this stream.

"},{"location":"tutorials/4-using-online-machine-learning-on-a-streampipes-data-stream/#KMeans","title":"KMeans\u00b6","text":""},{"location":"tutorials/4-using-online-machine-learning-on-a-streampipes-data-stream/#HoeffdingTreeRegressor","title":"HoeffdingTreeRegressor\u00b6","text":""},{"location":"tutorials/4-using-online-machine-learning-on-a-streampipes-data-stream/#DecisionTreeClassifier","title":"DecisionTreeClassifier\u00b6","text":""},{"location":"tutorials/5-applying-interoperable-machine-learning-in-streampipes/","title":"Applying Interoperable Machine Learning in StreamPipes","text":"In\u00a0[\u00a0]: Copied!
%pip install git+https://github.com/apache/streampipes.git#subdirectory=streampipes-client-python\n%pip install scikit-learn==1.4.0 skl2onnx==1.16.0 onnxruntime==1.17.1 matplotlib==3.8.3\n
%pip install git+https://github.com/apache/streampipes.git#subdirectory=streampipes-client-python %pip install scikit-learn==1.4.0 skl2onnx==1.16.0 onnxruntime==1.17.1 matplotlib==3.8.3 In\u00a0[8]: Copied!
import os\nfrom streampipes.client import StreamPipesClient\nfrom streampipes.client.config import StreamPipesClientConfig\nfrom streampipes.client.credential_provider import StreamPipesApiKeyCredentials\n\nos.environ[\"BROKER-HOST\"] = \"localhost\"\nos.environ[\"KAFKA-PORT\"] = \"9094\"  # When using Kafka as message broker\n\nconfig = StreamPipesClientConfig(\n    credential_provider=StreamPipesApiKeyCredentials(\n        username=\"admin@streampipes.apache.org\",\n        api_key=\"TOKEN\",\n    ),\n    host_address=\"localhost\",\n    https_disabled=True,\n    port=80\n)\n\nclient = StreamPipesClient(client_config=config)\n
import os from streampipes.client import StreamPipesClient from streampipes.client.config import StreamPipesClientConfig from streampipes.client.credential_provider import StreamPipesApiKeyCredentials os.environ[\"BROKER-HOST\"] = \"localhost\" os.environ[\"KAFKA-PORT\"] = \"9094\" # When using Kafka as message broker config = StreamPipesClientConfig( credential_provider=StreamPipesApiKeyCredentials( username=\"admin@streampipes.apache.org\", api_key=\"TOKEN\", ), host_address=\"localhost\", https_disabled=True, port=80 ) client = StreamPipesClient(client_config=config)
2024-03-26 10:21:38,538 - streampipes.client.client - [INFO] - [client.py:198] [_set_up_logging] - Logging successfully initialized with logging level INFO.\n2024-03-26 10:21:38,632 - streampipes.endpoint.endpoint - [INFO] - [endpoint.py:164] [_make_request] - Successfully retrieved all resources.\n2024-03-26 10:21:38,634 - streampipes.client.client - [INFO] - [client.py:171] [_get_server_version] - The StreamPipes version was successfully retrieved from the backend: 0.95.0. By means of that, authentication via the provided credentials is also tested successfully.\n

The main objective of this tutorial is to demonstrate how to make predictions with an existing and pre-trained ML model using a StreamPipes function and ONNX. Therefore, you can skip the following sections on use case and model training if you already have an existing ONNX model and are only interested in applying it using StreamPipes.

In\u00a0[9]: Copied!
flowrate_df = client.dataLakeMeasureApi.get(\"flow-rate\").to_pandas()\nX = flowrate_df[\"volume_flow\"].values.reshape(-1, 1).astype(\"float32\")\n
flowrate_df = client.dataLakeMeasureApi.get(\"flow-rate\").to_pandas() X = flowrate_df[\"volume_flow\"].values.reshape(-1, 1).astype(\"float32\")
2024-03-26 10:21:48,582 - streampipes.endpoint.endpoint - [INFO] - [endpoint.py:164] [_make_request] - Successfully retrieved all resources.\n

Let's fit the model to the data:

In\u00a0[14]: Copied!
from sklearn.ensemble import IsolationForest\n\nmodel = IsolationForest(contamination=0.01)\nmodel.fit(X)\n
from sklearn.ensemble import IsolationForest model = IsolationForest(contamination=0.01) model.fit(X) Out[14]:
IsolationForest(contamination=0.01)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.\u00a0\u00a0IsolationForest?Documentation for IsolationForestiFitted
IsolationForest(contamination=0.01)

The contamination parameter models the proportion of outliers in the data. See the scikit-learn documentation for more information.

Before we convert the model to an ONNX representation, let's do a quick visual analysis of the model results:

In\u00a0[15]: Copied!
import matplotlib.pyplot as plt\n\nflowrate_df[\"anomaly\"] = model.predict(X)\n\nfig, ax = plt.subplots(figsize=(10, 6))\nanomalies = flowrate_df.loc[flowrate_df[\"anomaly\"] == -1, [\"volume_flow\"]]\nax.plot(flowrate_df.index, flowrate_df['volume_flow'], color='black', label='volume_flow')\nax.scatter(anomalies.index, anomalies['volume_flow'], color='red', label='Anomaly')\nplt.legend()\nplt.show()\n
import matplotlib.pyplot as plt flowrate_df[\"anomaly\"] = model.predict(X) fig, ax = plt.subplots(figsize=(10, 6)) anomalies = flowrate_df.loc[flowrate_df[\"anomaly\"] == -1, [\"volume_flow\"]] ax.plot(flowrate_df.index, flowrate_df['volume_flow'], color='black', label='volume_flow') ax.scatter(anomalies.index, anomalies['volume_flow'], color='red', label='Anomaly') plt.legend() plt.show()

Okay, that looks quite reasonable so lets covert the model to an ONNX representation so that we can make use of it later.

In\u00a0[16]: Copied!
from onnxconverter_common import FloatTensorType\nfrom skl2onnx import to_onnx\n\nmodel_onnx = to_onnx(\n    model,\n    initial_types=[('input', FloatTensorType([None, X.shape[1]]))],\n    target_opset={'ai.onnx.ml': 3, 'ai.onnx': 15, '': 15}\n)\n\nwith open(\"isolation_forest.onnx\", \"wb\") as f:\n    f.write(model_onnx.SerializeToString())\n
from onnxconverter_common import FloatTensorType from skl2onnx import to_onnx model_onnx = to_onnx( model, initial_types=[('input', FloatTensorType([None, X.shape[1]]))], target_opset={'ai.onnx.ml': 3, 'ai.onnx': 15, '': 15} ) with open(\"isolation_forest.onnx\", \"wb\") as f: f.write(model_onnx.SerializeToString()) In\u00a0[23]: Copied!
import numpy as np\nimport onnxruntime as rt\n\nfrom streampipes.functions.broker.broker_handler import get_broker_description\nfrom streampipes.functions.streampipes_function import StreamPipesFunction\nfrom streampipes.functions.utils.data_stream_generator import create_data_stream, RuntimeType\nfrom streampipes.functions.utils.function_context import FunctionContext\nfrom streampipes.model.resource import FunctionDefinition, DataStream\n\nfrom typing import Dict, Any, List\n\n\nclass ONNXFunction(StreamPipesFunction):\n\n    def __init__(self, feature_names: list[str], input_stream: DataStream):\n        output_stream = create_data_stream(\n            name=\"flowrate-prediction\",\n            attributes={\n                \"is_anomaly\": RuntimeType.BOOLEAN.value\n            },\n            broker=get_broker_description(input_stream)\n        )\n\n        function_definition = FunctionDefinition(\n            consumed_streams=[input_stream.element_id]\n        ).add_output_data_stream(output_stream)\n\n        self.feature_names = feature_names\n        self.input_name = None\n        self.output_name = None\n        self.session = None\n\n        super().__init__(function_definition=function_definition)\n\n    def onServiceStarted(self, context: FunctionContext) -> None:\n        self.session = rt.InferenceSession(\n            path_or_bytes=\"isolation_forest.onnx\",\n            providers=rt.get_available_providers(),\n        )\n        self.input_name = self.session.get_inputs()[0].name\n        self.output_name = self.session.get_outputs()[0].name\n\n    def onEvent(self, event: Dict[str, Any], streamId: str) -> None:\n        feature_vector = []\n        for feature in self.feature_names:\n            feature_vector.append(event[feature])\n\n        prediction = self.session.run(\n            [self.output_name],\n            {self.input_name: np.expand_dims(np.array(feature_vector), axis=0).astype(\"float32\")}\n        )[0]\n\n        output = {\n            \"is_anomaly\": int(prediction[0]) == -1\n        }\n\n        self.add_output(\n            stream_id=self.function_definition.get_output_stream_ids()[0],\n            event=output\n        )\n\n    def onServiceStopped(self) -> None:\n        pass\n
import numpy as np import onnxruntime as rt from streampipes.functions.broker.broker_handler import get_broker_description from streampipes.functions.streampipes_function import StreamPipesFunction from streampipes.functions.utils.data_stream_generator import create_data_stream, RuntimeType from streampipes.functions.utils.function_context import FunctionContext from streampipes.model.resource import FunctionDefinition, DataStream from typing import Dict, Any, List class ONNXFunction(StreamPipesFunction): def __init__(self, feature_names: list[str], input_stream: DataStream): output_stream = create_data_stream( name=\"flowrate-prediction\", attributes={ \"is_anomaly\": RuntimeType.BOOLEAN.value }, broker=get_broker_description(input_stream) ) function_definition = FunctionDefinition( consumed_streams=[input_stream.element_id] ).add_output_data_stream(output_stream) self.feature_names = feature_names self.input_name = None self.output_name = None self.session = None super().__init__(function_definition=function_definition) def onServiceStarted(self, context: FunctionContext) -> None: self.session = rt.InferenceSession( path_or_bytes=\"isolation_forest.onnx\", providers=rt.get_available_providers(), ) self.input_name = self.session.get_inputs()[0].name self.output_name = self.session.get_outputs()[0].name def onEvent(self, event: Dict[str, Any], streamId: str) -> None: feature_vector = [] for feature in self.feature_names: feature_vector.append(event[feature]) prediction = self.session.run( [self.output_name], {self.input_name: np.expand_dims(np.array(feature_vector), axis=0).astype(\"float32\")} )[0] output = { \"is_anomaly\": int(prediction[0]) == -1 } self.add_output( stream_id=self.function_definition.get_output_stream_ids()[0], event=output ) def onServiceStopped(self) -> None: pass

Let's dive a little deeper into the different parts of the function

  • __init__: First, we need to take care of the data stream that is required to send the predictions from our function to StreamPipes. Thus, we create a dedicated output data stream which we need to provide with the attributes our event will consist of (a timestamp attribute is always added automatically). This output data stream needs to be registered at the function definition which is to be passed to the parent class. Lastly, we need to define some instance variables that are mainly required for the ONNX runtime.

  • onServiceStarted: Here we prepare the ONNX runtime session by creating an InferenceSession and retrieving the corresponding configuration parameters.

  • onEvent: Following the parameter names specified by self.feature_names, we extract all feature values from the current event. Subsequently, the corresponding feature vector is transmitted to the ONNX runtime session. The resulting prediction is then converted into our output event, where a value of -1 signifies an anomaly. Finally, the generated output event is forwarded to StreamPipes.

Having the function code in place, we can start the function with the following:

In\u00a0[25]: Copied!
from streampipes.functions.registration import Registration\nfrom streampipes.functions.function_handler import FunctionHandler\n\nstream = [\n    stream\n    for stream\n    in client.dataStreamApi.all()\n    if stream.name == \"flow-rate\"\n][0]\n\nfunction = ONNXFunction(\n    feature_names=[\"volume_flow\"],\n    input_stream=stream\n)\n\nregistration = Registration()\nregistration.register(function)\nfunction_handler = FunctionHandler(registration, client)\nfunction_handler.initializeFunctions()\n
from streampipes.functions.registration import Registration from streampipes.functions.function_handler import FunctionHandler stream = [ stream for stream in client.dataStreamApi.all() if stream.name == \"flow-rate\" ][0] function = ONNXFunction( feature_names=[\"volume_flow\"], input_stream=stream ) registration = Registration() registration.register(function) function_handler = FunctionHandler(registration, client) function_handler.initializeFunctions()
2024-03-26 12:39:50,443 - streampipes.endpoint.endpoint - [INFO] - [endpoint.py:164] [_make_request] - Successfully retrieved all resources.\n2024-03-26 12:39:50,502 - streampipes.functions.function_handler - [INFO] - [function_handler.py:76] [initializeFunctions] - The data stream could not be created.\n2024-03-26 12:39:50,503 - streampipes.functions.function_handler - [INFO] - [function_handler.py:78] [initializeFunctions] - This is due to the fact that this data stream already exists. Continuing with the existing data stream.\n2024-03-26 12:39:50,503 - streampipes.functions.function_handler - [INFO] - [function_handler.py:84] [initializeFunctions] - Using output data stream 'sp:spdatastream:flowrate-prediction' for function '7c06fa31-9534-4f91-9c50-b7a3607ec3dc'\n2024-03-26 12:39:50,548 - streampipes.endpoint.endpoint - [INFO] - [endpoint.py:164] [_make_request] - Successfully retrieved all resources.\n2024-03-26 12:39:50,549 - streampipes.functions.function_handler - [INFO] - [function_handler.py:100] [initializeFunctions] - Using KafkaConsumer for ONNXFunction\n

We can now access the live values of the prediction in the StreamPipes UI, e.g., in the pipeline editor.

That's already it. We hope this tutorial serves as an illustration how ML models can be utilized in StreamPipes with the help of ONNX.

How do you like this tutorial? We hope you like it and would love to receive some feedback from you. Just go to our GitHub discussion page and let us know your impression. We'll read and react to them all, we promise!

"},{"location":"tutorials/5-applying-interoperable-machine-learning-in-streampipes/#Applying-Interoperable-Machine-Learning-in-StreamPipes","title":"Applying Interoperable Machine Learning in StreamPipes\u00b6","text":"

The last tutorial (Using Online Machine Learning on a StreamPipes data stream) demonstrated how patterns in streaming data can be learned online. In contrast, this tutorial demonstrates how one can apply a pre-trained machine learning (ML) model to a StreamPipes data stream making use of ONNX. We will show how StreamPipes can be used for both: extracting historical data for training purposes and using model inference on live data with a pre-trained model.

"},{"location":"tutorials/5-applying-interoperable-machine-learning-in-streampipes/#Preparation","title":"Preparation\u00b6","text":"

The following lines configure the client and establish a connection to the StreamPipes instance. If you're not familiar with it or anything is unclear, please have a look at our first tutorial.

"},{"location":"tutorials/5-applying-interoperable-machine-learning-in-streampipes/#Machine-Learning-Use-Case","title":"Machine Learning Use Case\u00b6","text":"

In this tutorial, we will use data generated by the Machine Data Simulator adapter. More specifically, we will focus on the flowrate data, which consists of various sensor values coming from a water pipe system. Our goal is keep an eye on the parameter volume_flow, which represents the current volume flow in cubic meters/second. For this parameter, we want to detect anomalies that could indicate problems such as leaks, blockages, etc.

To get the concerned data, we simply need to create an instance of the machine data simulator and persist the data in the data lake:

If you choose to perform the model training step yourself, you will need to wait approximately 15 minutes for enough data to be available for model training. If you want to speed this up, you can configure a lower wait time when creating the adapter. Please be aware that this also influences the inference scenario.

"},{"location":"tutorials/5-applying-interoperable-machine-learning-in-streampipes/#Model-Training-with-Historic-Data","title":"Model Training with Historic Data\u00b6","text":"

As said above, the aim of our model is to detect anomalies of the volume_flow parameter. For this task, we will use Isolation Forests. Please note that the focus of the tutorial is not on training the model, so please be patient even though the training is very simplified and lacks important preparation steps such as standardization.

As a first step, lets query the flowrate data from the StreamPipes data lake and extract the values of volume_flow as a feature:

"},{"location":"tutorials/5-applying-interoperable-machine-learning-in-streampipes/#Model-Inference-with-Live-Data","title":"Model Inference with Live Data\u00b6","text":"

Utilizing a pre-trained model within StreamPipes becomes seamless with the ONNX interoperability standard, enabling effortless application of your existing model on live data streams.

Interacting with live data from StreamPipes is facilitated through StreamPipes functions. Below, we'll create a Python StreamPipes function that leverages an ONNX model to generate predictions for each incoming event, making the results accessible as a data stream within StreamPipes for subsequent steps.

So let's create an ONNXFunction that is capable of applying a model in ONNX representation to a StreamPipes data stream. If you'd like to read more details about how functions are defined, refer to our third tutorial.

"}]} \ No newline at end of file +{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"\ud83c\udfe1 Home","text":"StreamPipes is a self-service (Industrial) IoT toolbox to enable non-technical users to connect, analyze and explore IoT data streams. Apache StreamPipes for Python \ud83d\udc0d

Apache StreamPipes meets Python! We are working highly motivated on a Python library to interact with StreamPipes. In this way, we would like to unite the power of StreamPipes to easily connect to and read from different data sources, especially in the IoT domain, and the amazing universe of data analytics libraries in Python.

"},{"location":"#quickstart","title":"\u26a1\ufe0f Quickstart","text":"

As a quick example, we demonstrate how to set up and configure a StreamPipes client. In addition, we will get the available data lake measures out of StreamPipes.

from streampipes.client import StreamPipesClient\nfrom streampipes.client.config import StreamPipesClientConfig\nfrom streampipes.client.credential_provider import StreamPipesApiKeyCredentials\n\nconfig = StreamPipesClientConfig(\n    credential_provider = StreamPipesApiKeyCredentials(\n        username = \"test@streampipes.apache.org\",\n        api_key = \"DEMO-KEY\",\n        ),\n    host_address = \"localhost\",\n    https_disabled = True,\n    port = 80\n)\n\nclient = StreamPipesClient(client_config=config)\n\n# get all available datat lake measures\nmeasures = client.dataLakeMeasureApi.all()\n\n# get amount of retrieved measures\nlen(measures)\n
Output:
1\n

# inspect the data lake measures as pandas dataframe\nmeasures.to_pandas()\n

Output:

measure_name timestamp_field ... pipeline_is_running num_event_properties\n0 test s0::timestamp ... False 2\n[1 rows x 6 columns]\n
Alternatively, you can provide your credentials via environment variables. Simply define your credential provider as follows:

from streampipes.client.credential_provider import StreamPipesApiKeyCredentials\n\nStreamPipesApiKeyCredentials()\n
This requires to set the following environment variables: SP_API_KEY and SP_USERNAME

username is always the username that is used to log in into StreamPipes.

How to get your StreamPipes API key

The api_key can be generated within the UI as demonstrated below:

"},{"location":"getting-started/developing/","title":"Developing & Contributing","text":""},{"location":"getting-started/developing/#development-guide","title":"\ud83d\udcd6 Development Guide","text":"

This document describes how to easily set up your local dev environment to work on StreamPipes Python \ud83d\udc0d.

"},{"location":"getting-started/developing/#first-steps","title":"\ud83d\ude80 First Steps","text":"

1) Set up your Python environment

Create a virtual Python environment using a tool of your choice. To manage dependencies, we use Poetry, so please install poetry in your local environment, e.g. via

pip install poetry\n

Once poetry is installed you can simply finalize your Python environment by running:

poetry install --with dev,stubs  # install everything that is required for the development\npoetry install --with docs  # install everything to work with the documentation\npoetry install --with dev,stubs,docs  # install all optional dependencies related to development\n

2) Install pre-commit hook

The pre-commit hook is run before every commit and takes care about code style, linting, type hints, import sorting, etc. It will stop your commit in case the changes do not apply the expected format. Always check to have the recent version of the pre-commit hook installed otherwise the CI build might fail. If you are interested, you can have a deeper look on the underlying library: pre-commit.

pre-commit install\n
The definition of the pre-commit hook can be found in .pre-commit-config.yaml.

"},{"location":"getting-started/developing/#conventions","title":"\ud83d\udc4f Conventions","text":"

Below we list some conventions that we have agreed on for creating StreamPipes Python. Please comply to them when you plan to contribute to this project. If you have any other suggestions or would like to discuss them, we would be happy to hear from you on our mailing list dev@streampipes.apache.org or in our discussions on GitHub.

1) Use numpy style for Python docstrings \ud83d\udcc4 Please stick to the numpy style when writing docstrings, as we require this for generating our documentation.

2) Provide tests \u2705 We are aiming for broad test coverage for the Python package and have therefore set a requirement of at least 90% unit test coverage. Therefore, please remember to write (unit) tests already during development. If you have problems with writing tests, don't hesitate to ask us for help directly in the PR or even before that via our mailing list (see above).

3) Build a similar API as the Java client provides \ud83d\udd04 Whenever possible, please try to develop the API of the Python library the same as the Java client or Java SDK. By doing so, we would like to provide a consistent developer experience and the basis for automated testing in the future.

"},{"location":"getting-started/developing/#dependency-management","title":"\ud83d\udce6 Dependency Management","text":"

In case you want to add a new dependency to StreamPipes you can use the following command:

poetry add <dep-name>\n

If the dependency is only required for development purpose or the documentation, please stick to one the following:

poetry add <dep-name> --group dev\npoetry add <dep-name> --group stubs\npoetry add <dep-name> --group docs\n

In case you want to regenerate the poetry lock file, e.g., in case you manually updated the pyproject.toml, the following command should be used:

poetry lock --no-update\n

After that, you should install the current version of the poetry lock file to keep your local environment consistent (see command above.)

"},{"location":"getting-started/developing/#documentation","title":"\ud83d\udcdaDocumentation","text":"

To build our documentation, we use Materials for MkDocs. All files can be found within the docs directory. To pre-view your local version of the documentation, you can use the following command:

make livedoc\n

"},{"location":"getting-started/developing/#roadmap","title":"\ud83d\ude80 Roadmap","text":"

Broadly speaking, we plan to expand or add new aspects/functionality to the library where we are focusing on the following:

  • increase coverage of StreamPipes API \ud83d\udd17
  • build a comprehensive function zoo \ud83d\udc18
  • support more messaging broker \ud83d\udcec
  • possibility to build pipeline elements \ud83d\udd27

In case you want to have a more detailed look on what we are currently planning, have a look at our open issues(more short-term driven).

Of course, contributions are always highly appreciated \ud83d\udd2e

Stay tuned!

"},{"location":"getting-started/developing/#contributing","title":"\ud83d\udc68\u200d\ud83d\udcbb Contributing","text":"

Before opening a pull request, review the Get Involved page. It lists information that is required for contributing to StreamPipes.

When you contribute code, you affirm that the contribution is your original work and that you license the work to the project under the project's open source license. Whether or not you state this explicitly, by submitting any copyrighted material via pull request, email, or other means you agree to license the material under the project's open source license and warrant that you have the legal authority to do so.

"},{"location":"getting-started/first-steps/","title":"First Steps","text":""},{"location":"getting-started/first-steps/#installation","title":"\ud83d\udcda Installation","text":"

The StreamPipes Python library is meant to work with Python 3.8 and above. Installation can be done via pip: You can install the latest development version from GitHub, as so:

pip install streampipes\n\n# if you want to have the current development state you can also execute\npip install git+https://github.com/apache/streampipes.git#subdirectory=streampipes-client-python\n# the corresponding documentation can be found here: https://streampipes.apache.org/docs/docs/python/dev/\n
"},{"location":"getting-started/first-steps/#setting-up-streampipes","title":"\u2b06\ufe0f Setting up StreamPipes","text":"

When working with the StreamPipes Python library it is inevitable to have a running StreamPipes instance to connect and interact with. In case you don't have a running instance at hand, you can easily set up one on your local machine. Hereby you need to consider that StreamPipes supports different message broker (e.g., Kafka, NATS). We will demonstrate below how you can easily set up StreamPipes for both supported message brokers.

"},{"location":"getting-started/first-steps/#start-streampipes-via-docker-compose","title":"\ud83d\udc33 Start StreamPipes via Docker Compose","text":"

The easiest and therefore recommend way to get StreamPipes started is by using docker compose. Therefore, you need Docker running. You can check if Docker is ready on your machine by executing.

docker ps\n
If this results in an output similar to the following, Docker is ready to continue.
CONTAINER ID   IMAGE     COMMAND   CREATED   STATUS    PORTS     NAMES\n...            ...       ...       ...       ...       ...       ...\n
Otherwise, you need to start docker first. Please read the full guide on how to start StreamPipes with docker compose here.

"},{"location":"getting-started/first-steps/#setup-streampipes-with-nats-as-message-broker","title":"Setup StreamPipes with NATS as message broker","text":"

The following shows how you can set up a StreamPipes instance that uses NATS as messaging layer. So in this scenario, we will go with docker-compose.nats.yml. Thereby, when running locally, we need to add the following port mapping entry to services.nats.ports:

- 4222:4222\n

After this modification is applied, StreamPipes can simply be started with this command:

docker-compose -f docker-compose.nats.yml up -d\n

Once all services are started, you can access StreamPipes via http://localhost.

"},{"location":"getting-started/first-steps/#setup-streampipes-with-kafka-as-message-broker","title":"Setup StreamPipes with Kafka as message broker","text":"

Alternatively, you can use docker-compose.yml to start StreamPipes with Kafka as messaging layer. When running locally we have to modify services.kafka.environment and add the ports to services.kafka.ports:

environment:\nKAFKA_LISTENER_SECURITY_PROTOCOL_MAP: PLAINTEXT:PLAINTEXT,OUTSIDE:PLAINTEXT\nKAFKA_ADVERTISED_LISTENERS: PLAINTEXT://:9092,OUTSIDE://localhost:9094\nKAFKA_LISTENERS: PLAINTEXT://:9092,OUTSIDE://:9094\n...\nports:\n- 9094:9094\n
Then, you need to execute the following command:
docker-compose -f docker-compose.yml up -d\n

Once all services are started, you can access StreamPipes via http://localhost.

In case you want to have more control over your StreamPipes setup, you might take a look at our deployment CLI.

Have fun discovering StreamPipes and our Python library \ud83d\ude80

"},{"location":"getting-started/quickstart/","title":"Quickstart","text":""},{"location":"getting-started/quickstart/#quickstart","title":"\u26a1\ufe0f Quickstart","text":"

As a quick example, we demonstrate how to set up and configure a StreamPipes client. In addition, we will get the available data lake measures out of StreamPipes.

from streampipes.client import StreamPipesClient\nfrom streampipes.client.config import StreamPipesClientConfig\nfrom streampipes.client.credential_provider import StreamPipesApiKeyCredentials\n\nconfig = StreamPipesClientConfig(\n    credential_provider = StreamPipesApiKeyCredentials(\n        username = \"test@streampipes.apache.org\",\n        api_key = \"DEMO-KEY\",\n        ),\n    host_address = \"localhost\",\n    https_disabled = True,\n    port = 80\n)\n\nclient = StreamPipesClient(client_config=config)\n\n# get all available datat lake measures\nmeasures = client.dataLakeMeasureApi.all()\n\n# get amount of retrieved measures\nlen(measures)\n
Output:
1\n

# inspect the data lake measures as pandas dataframe\nmeasures.to_pandas()\n

Output:

measure_name timestamp_field ... pipeline_is_running num_event_properties\n0 test s0::timestamp ... False 2\n[1 rows x 6 columns]\n
Alternatively, you can provide your credentials via environment variables. Simply define your credential provider as follows:

from streampipes.client.credential_provider import StreamPipesApiKeyCredentials\n\nStreamPipesApiKeyCredentials()\n
This requires to set the following environment variables: SP_API_KEY and SP_USERNAME

username is always the username that is used to log in into StreamPipes. The api_key can be generated within the UI as demonstrated below:

"},{"location":"reference/client/client/","title":"Client","text":"

Implementation of the StreamPipes client. The client is designed as the central point of interaction with the StreamPipes API and provides all functionalities to communicate with the API.

"},{"location":"reference/client/client/#streampipes.client.client.StreamPipesClient","title":"StreamPipesClient(client_config, logging_level=logging.INFO)","text":"

The client to connect to StreamPipes.

This is the central point of contact with StreamPipes and provides all the functionalities to interact with it.

The client provides so-called \"endpoints\" each of which refers to an endpoint of the StreamPipes API, e.g. .dataLakeMeasureApi. An endpoint provides the actual methods to interact with StreamPipes API.

PARAMETER DESCRIPTION client_config

Configures the client to connect properly to the StreamPipes instance.

TYPE: StreamPipesClientConfig

logging_level

Influences the log messages emitted by the StreamPipesClient

TYPE: Optional[int] DEFAULT: INFO

ATTRIBUTE DESCRIPTION dataLakeMeasureApi

Instance of the data lake measure endpoint

TYPE: DataLakeMeasureEndpoint

dataStreamApi

Instance of the data stream endpoint

TYPE: DataStreamEndpoint

RAISES DESCRIPTION AttributeError:

In case an invalid configuration of the StreamPipesClientConfig is passed

Examples:

from streampipes.client import StreamPipesClient\nfrom streampipes.client.config import StreamPipesClientConfig\nfrom streampipes.client.credential_provider import StreamPipesApiKeyCredentials\n
client_config = StreamPipesClientConfig(\n    credential_provider=StreamPipesApiKeyCredentials(\n         username=\"test-user\",\n         api_key=\"api-key\"\n     ),\n     host_address=\"localhost\",\n     https_disabled=True\n)\n

The following way of instantiating a client instance is intended to be consistent with the StreamPipes Java client.

client = StreamPipesClient.create(client_config=client_config)\n

If you prefer a more pythonic way, you can simply write:

client = StreamPipesClient(client_config=client_config)\n

To interact with an endpoint:

data_lake_measures = client.dataLakeMeasureApi.all()\n

To inspect returned data as a pandas dataframe:

data_lake_measures.to_pandas()\n#\n#     measure_name timestamp_field  ... pipeline_is_running num_event_properties\n# 0           test   s0::timestamp  ...               False                    2\n# [1 rows x 6 columns]\n

"},{"location":"reference/client/client/#streampipes.client.client.StreamPipesClient.base_api_path","title":"base_api_path: str property","text":"

Constructs the basic API URL from the given client_config.

RETURNS DESCRIPTION base_api_path

basic API path of the connected StreamPipes instance

TYPE: str

"},{"location":"reference/client/client/#streampipes.client.client.StreamPipesClient.http_headers","title":"http_headers: Dict[str, str] property","text":"

Returns the HTTP headers used for all requests.

The HTTP headers are composed of the authentication headers supplied by the credential provider and additional required headers (currently this is only the application header).

RETURNS DESCRIPTION http_headers

Header information for HTTP requests as string key-value pairs.

TYPE: Dict[str, str]

"},{"location":"reference/client/client/#streampipes.client.client.StreamPipesClient.create","title":"create(client_config, logging_level=logging.INFO) classmethod","text":"

Returns an instance of the StreamPipesPythonClient.

Provides consistency to the StreamPipes Java client.

PARAMETER DESCRIPTION client_config

Configures the client to connect properly to the StreamPipes instance.

TYPE: StreamPipesClientConfig

logging_level

Influences the log messages emitted by the StreamPipesClient.

TYPE: int DEFAULT: INFO

RETURNS DESCRIPTION StreamPipesClient"},{"location":"reference/client/client/#streampipes.client.client.StreamPipesClient.describe","title":"describe()","text":"

Prints a short description of the connected StreamPipes instance and the available resources to the console.

RETURNS DESCRIPTION None

Examples:

client.describe()\n
Output:
Hi there!\nYou are connected to a StreamPipes instance running at http://localhost:80.\nThe following StreamPipes resources are available with this client:\n6x DataStreams\n1x DataLakeMeasures\n

"},{"location":"reference/client/config/","title":"Config","text":"

Configuration class for the StreamPipes client.

"},{"location":"reference/client/config/#streampipes.client.config.StreamPipesClientConfig","title":"StreamPipesClientConfig(credential_provider, host_address, https_disabled=False, port=80, additional_headers=dict()) dataclass","text":"

Configure the StreamPipes client in accordance to the actual StreamPipes instance to connect to.

An instance is provided to the StreamPipesClient to configure it properly.

PARAMETER DESCRIPTION credential_provider

Provides the credentials to authenticate with the StreamPipes API.

TYPE: CredentialProvider

host_address

Host address of the StreamPipes instance to connect to. Should be provided without the protocol/scheme, e.g. as localhost or streampipes.xyz.

TYPE: str

https_disabled

Determines whether https is used to connect to StreamPipes.

TYPE: Optional[bool] DEFAULT: False

port

Specifies the port under which the StreamPipes API is available, e.g., 80 (with http) or 443 (with https)

TYPE: Optional[int] DEFAULT: 80

additional_headers

Specifies additional HTTP headers that should be sent with each request, e.g., proxy headers

TYPE: Optional[Dict[str, str]] DEFAULT: dict()

Examples:

see StreamPipesClient

"},{"location":"reference/client/credential_provider/","title":"Credential provider","text":"

Implementation of credential providers. A credential provider supplies the specified sort of credentials in the appropriate HTTP header format. The headers are then used by the client to connect to StreamPipes.

"},{"location":"reference/client/credential_provider/#streampipes.client.credential_provider.CredentialProvider","title":"CredentialProvider","text":"

Bases: ABC

Abstract implementation of a credential provider. Must be inherited by all credential providers.

"},{"location":"reference/client/credential_provider/#streampipes.client.credential_provider.CredentialProvider.make_headers","title":"make_headers(http_headers=None)","text":"

Creates the HTTP headers for the specific credential provider.

Concrete authentication headers must be defined in the implementation of a credential provider.

PARAMETER DESCRIPTION http_headers

Additional HTTP headers the generated headers are extended by.

TYPE: Optional[Dict[str, str]] DEFAULT: None

RETURNS DESCRIPTION https_headers

Dictionary with header information as string key-value pairs. Contains all pairs given as parameter plus the header pairs for authentication determined by the credential provider.

TYPE: Dict[str, str]

"},{"location":"reference/client/credential_provider/#streampipes.client.credential_provider.StreamPipesApiKeyCredentials","title":"StreamPipesApiKeyCredentials(username=None, api_key=None)","text":"

Bases: CredentialProvider

A credential provider that allows authentication via a StreamPipes API Token.

The required token can be generated via the StreamPipes UI (see the description on our start-page.

Both parameters can either be passed as arguments or remain unset. If they are not passed, they are retrieved from environment variables:

  • SP_USERNAME is expected to contain the username
  • SP_API_KEY is expected to contain the API key
PARAMETER DESCRIPTION username

The username to which the API token is granted, e.g., demo-user@streampipes.apche.org. If not passed, the username is retrieved from environment variable SP_USERNAME.

TYPE: Optional[str] DEFAULT: None

api_key

The StreamPipes API key as it is displayed in the UI. If not passed, the api key is retrieved from environment variable SP_API_KEY

TYPE: Optional[str] DEFAULT: None

Examples:

see StreamPipesClient

"},{"location":"reference/client/credential_provider/#streampipes.client.credential_provider.StreamPipesApiKeyCredentials.from_env","title":"from_env(username_env, api_key_env) classmethod","text":"

DEPRECATED - use the class constructor instead

Returns an API key provider parameterized via environment variables.

PARAMETER DESCRIPTION username_env

Name of the environment variable that contains the username

TYPE: str

api_key_env

Name of the environment variable that contains the API key

TYPE: str

RETURNS DESCRIPTION StreamPipesApiKeyCredentials RAISES DESCRIPTION KeyError

If one of the environment variables is not defined

"},{"location":"reference/client/credential_provider/#streampipes.client.credential_provider.StreamPipesApiKeyCredentials.make_headers","title":"make_headers(http_headers=None)","text":"

Creates the HTTP headers for the specific credential provider.

Concrete authentication headers must be defined in the implementation of a credential provider.

PARAMETER DESCRIPTION http_headers

Additional HTTP headers the generated headers are extended by.

TYPE: Optional[Dict[str, str]] DEFAULT: None

RETURNS DESCRIPTION https_headers

Dictionary with header information as string key-value pairs. Contains all pairs given as parameter plus the header pairs for authentication determined by the credential provider.

TYPE: Dict[str, str]

"},{"location":"reference/endpoint/endpoint/","title":"Endpoint","text":"

General implementation for an endpoint. Provided classes and assets are aimed to be used for developing endpoints. An endpoint provides all options to communicate with a dedicated part of StreamPipes in a handy way.

"},{"location":"reference/endpoint/endpoint/#streampipes.endpoint.endpoint.APIEndpoint","title":"APIEndpoint(parent_client)","text":"

Bases: Endpoint

Abstract implementation of an API endpoint.

Serves as template for all endpoints of the StreamPipes API. By design, endpoints are only instantiated within the __init__ method of the StreamPipesClient.

"},{"location":"reference/endpoint/endpoint/#streampipes.endpoint.endpoint.APIEndpoint.all","title":"all()","text":"

Get all resources of this endpoint provided by the StreamPipes API.

Results are provided as an instance of a ResourceContainer that allows to handle the returned resources in a comfortable and pythonic way.

RETURNS DESCRIPTION container

Container element that bundles the returned resources

TYPE: ResourceContainer

"},{"location":"reference/endpoint/endpoint/#streampipes.endpoint.endpoint.APIEndpoint.build_url","title":"build_url()","text":"

Builds the endpoint's URL of the API path.

RETURNS DESCRIPTION url

The URL of the endpoint

TYPE: str

"},{"location":"reference/endpoint/endpoint/#streampipes.endpoint.endpoint.APIEndpoint.get","title":"get(identifier, **kwargs)","text":"

Queries the specified resource from the API endpoint.

PARAMETER DESCRIPTION identifier

The identifier of the resource to be queried.

TYPE: str

RETURNS DESCRIPTION resource

The specified resource as an instance of the corresponding model class.

TYPE: Resource

"},{"location":"reference/endpoint/endpoint/#streampipes.endpoint.endpoint.APIEndpoint.post","title":"post(resource)","text":"

Allows to post a resource to the StreamPipes API.

PARAMETER DESCRIPTION resource

The resource to be posted.

TYPE: Resource

RETURNS DESCRIPTION None"},{"location":"reference/endpoint/endpoint/#streampipes.endpoint.endpoint.Endpoint","title":"Endpoint(parent_client)","text":"

Bases: ABC

Abstract implementation of a StreamPipes endpoint.

Serves as template for all endpoints used for interaction with a StreamPipes instance. By design, endpoints are only instantiated within the __init__ method of the StreamPipesClient.

PARAMETER DESCRIPTION parent_client

This parameter expects the instance of StreamPipesClient the endpoint is attached to.

TYPE: StreamPipesClient

"},{"location":"reference/endpoint/endpoint/#streampipes.endpoint.endpoint.MessagingEndpoint","title":"MessagingEndpoint(parent_client)","text":"

Bases: Endpoint

Abstract implementation of a StreamPipes messaging endpoint.

Serves as template for all endpoints used for interacting with the StreamPipes messaging layer directly. Therefore, they need to provide the functionality to talk with the broker system running in StreamPipes. By design, endpoints are only instantiated within the __init__ method of the StreamPipesClient.

"},{"location":"reference/endpoint/endpoint/#streampipes.endpoint.endpoint.MessagingEndpoint.broker","title":"broker: Broker property writable","text":"

Defines the broker instance that is used to connect to StreamPipes' messaging layer.

This instance enables the client to authenticate to the broker used in the target StreamPipes instance, to consume messages from and to write messages to the broker.

RAISES DESCRIPTION MessagingEndpointNotConfiguredError

If the endpoint is used before the broker instance is set via configure()

RETURNS DESCRIPTION broker

The broker instance to be used to communicate with StreamPipes' messaging layer.

TYPE: Broker

"},{"location":"reference/endpoint/endpoint/#streampipes.endpoint.endpoint.MessagingEndpoint.configure","title":"configure(broker)","text":"

Configures the message endpoint by setting the broker instance to be used.

This configuration step is required before the endpoint can be actually used. The based broker instance is passed to an internal property.

PARAMETER DESCRIPTION broker

Broker instance that should be used for this endpoint

TYPE: Broker

RETURNS DESCRIPTION None"},{"location":"reference/endpoint/exceptions/","title":"Exceptions","text":"

Custom exceptions dedicated to the endpoints module.

"},{"location":"reference/endpoint/exceptions/#streampipes.endpoint.exceptions.MessagingEndpointNotConfiguredError","title":"MessagingEndpointNotConfiguredError(endpoint_name)","text":"

Bases: Exception

Exception that indicates that an instance of a messaging endpoint has not been configured.

This error occurs when an instance of a messaging endpoint is used before the broker instance to be used is configured by passing it to the configure() method.

PARAMETER DESCRIPTION endpoint_name

The name of the endpoint that caused the error

TYPE: str

"},{"location":"reference/endpoint/api/data_lake_measure/","title":"Data lake measure","text":"

Specific implementation of the StreamPipes API's data lake measure endpoints. This endpoint allows to consume data stored in StreamPipes' data lake.

"},{"location":"reference/endpoint/api/data_lake_measure/#streampipes.endpoint.api.data_lake_measure.DataLakeMeasureEndpoint","title":"DataLakeMeasureEndpoint(parent_client)","text":"

Bases: APIEndpoint

Implementation of the DataLakeMeasure endpoint.

This endpoint provides an interface to all data stored in the StreamPipes data lake.

Consequently, it allows querying metadata about available data sets (see all() method). The metadata is returned as an instance of DataLakeMeasures.

In addition, the endpoint provides direct access to the data stored in the data laka by querying a specific data lake measure using the get() method.

Examples:

from streampipes.client import StreamPipesClient\nfrom streampipes.client.config import StreamPipesClientConfig\nfrom streampipes.client.credential_provider import StreamPipesApiKeyCredentials\n
client_config = StreamPipesClientConfig(\n    credential_provider=StreamPipesApiKeyCredentials(username=\"test-user\", api_key=\"api-key\"),\n    host_address=\"localhost\",\n    port=8082,\n    https_disabled=True\n)\nclient = StreamPipesClient.create(client_config=client_config)\n

# get all existing data lake measures from StreamPipes\ndata_lake_measures = client.dataLakeMeasureApi.all()\n\n# let's take a look how many we got\nlen(data_lake_measures)\n
5\n

# Retrieve a specific data lake measure as a pandas DataFrame\nflow_rate_pd = client.dataLakeMeasureApi.get(identifier=\"flow-rate\").to_pandas()\nflow_rate_pd\n
                         time    density  mass_flow    sensorId  sensor_fault_flags  temperature  volume_flow\n0    2023-02-24T16:19:41.472Z  50.872730   3.309556  flowrate02               False    44.448483     5.793138\n1    2023-02-24T16:19:41.482Z  47.186588   5.608580  flowrate02               False    40.322033     0.058015\n2    2023-02-24T16:19:41.493Z  46.735321   7.692881  flowrate02               False    49.239639    10.283526\n3    2023-02-24T16:19:41.503Z  40.169796   3.632898  flowrate02               False    49.933754     6.893441\n4    2023-02-24T16:19:41.513Z  49.635124   0.711260  flowrate02               False    50.106617     2.999871\n..                        ...        ...        ...         ...                 ...          ...          ...\n995  2023-02-24T16:19:52.927Z  50.057495   1.740114  flowrate02               False    46.558231     1.818237\n996  2023-02-24T16:19:52.94Z   41.038895   7.211723  flowrate02               False    48.048622     2.127493\n997  2023-02-24T16:19:52.952Z  45.837013   7.770180  flowrate02               False    48.188026     7.892062\n998  2023-02-24T16:19:52.965Z  43.389065   4.458602  flowrate02               False    48.280899     5.733892\n999  2023-02-24T16:19:52.977Z  44.056030   2.592060  flowrate02               False    47.505951     4.260697\n

As you can see, the returned amount of rows per default is 1000. We can modify this behavior by passing the limit paramter.

flow_rate_pd = client.dataLakeMeasureApi.get(identifier=\"flow-rate\", limit=10).to_pandas()\nlen(flow_rate_pd)\n
10\n

If we are only interested in the values for density, columns allows us to select the columns to be returned:

flow_rate_pd = client.dataLakeMeasureApi.get(identifier=\"flow-rate\", columns='density', limit=3).to_pandas()\nflow_rate_pd\n
                       time    density\n0  2023-02-24T16:19:41.472Z  50.872730\n1  2023-02-24T16:19:41.482Z  47.186588\n2  2023-02-24T16:19:41.493Z  46.735321\n

This is only a subset of the available query parameters, find them at MeasurementGetQueryConfig.

"},{"location":"reference/endpoint/api/data_lake_measure/#streampipes.endpoint.api.data_lake_measure.DataLakeMeasureEndpoint.all","title":"all()","text":"

Get all resources of this endpoint provided by the StreamPipes API.

Results are provided as an instance of a ResourceContainer that allows to handle the returned resources in a comfortable and pythonic way.

RETURNS DESCRIPTION container

Container element that bundles the returned resources

TYPE: ResourceContainer

"},{"location":"reference/endpoint/api/data_lake_measure/#streampipes.endpoint.api.data_lake_measure.DataLakeMeasureEndpoint.build_url","title":"build_url()","text":"

Builds the endpoint's URL of the API path.

RETURNS DESCRIPTION url

The URL of the endpoint

TYPE: str

"},{"location":"reference/endpoint/api/data_lake_measure/#streampipes.endpoint.api.data_lake_measure.DataLakeMeasureEndpoint.get","title":"get(identifier, **kwargs)","text":"

Queries the specified data lake measure from the API.

By default, the maximum number of returned records is 1000. This behaviour can be influenced by passing the parameter limit with a different value (see MeasurementGetQueryConfig).

PARAMETER DESCRIPTION identifier

The identifier of the data lake measure to be queried.

TYPE: str

**kwargs

Keyword arguments can be used to provide additional query parameters. The available query parameters are defined by the MeasurementGetQueryConfig.

TYPE: Optional[Dict[str, Any]] DEFAULT: {}

RETURNS DESCRIPTION measurement

The specified data lake measure

TYPE: QueryResult

Examples:

see directly at DataLakeMeasureEndpoint.

"},{"location":"reference/endpoint/api/data_lake_measure/#streampipes.endpoint.api.data_lake_measure.DataLakeMeasureEndpoint.post","title":"post(resource)","text":"

Allows to post a resource to the StreamPipes API.

PARAMETER DESCRIPTION resource

The resource to be posted.

TYPE: Resource

RETURNS DESCRIPTION None"},{"location":"reference/endpoint/api/data_lake_measure/#streampipes.endpoint.api.data_lake_measure.MeasurementGetQueryConfig","title":"MeasurementGetQueryConfig","text":"

Bases: BaseModel

Config class describing the parameters of the get() method for measurements.

This config class is used to validate the provided query parameters for the GET endpoint of measurements. Additionally, it takes care of the conversion to a proper HTTP query string. Thereby, parameter names are adapted to the naming of the StreamPipes API, for which Pydantic aliases are used.

ATTRIBUTE DESCRIPTION columns

A comma separated list of column names (e.g., time,value) If provided, the returned data only consists of the given columns. Please be aware that the column time as an index is always included.

TYPE: Optional[List[str]]

end_date

Limits the queried data to only include data that is older than the specified time. In other words, any data that occurred after the end_date will not be included in the query results.

TYPE: Optional[datetime]

limit

Amount of records returned at maximum (default: 1000) This needs to be at least 1

TYPE: Optional[int]

offset

Offset to be applied to returned data This needs to be at least 0

TYPE: Optional[int]

order

Ordering of query results Allowed values: ASC and DESC (default: ASC)

TYPE: Optional[str]

page_no

Page number used for paging operation This needs to be at least 1

TYPE: Optional[int]

start_date

Limits the queried data to only include data that is newer than the specified time. In other words, any data that occurred before the start_date will not be included in the query results.

TYPE: Optional[datetime]

"},{"location":"reference/endpoint/api/data_lake_measure/#streampipes.endpoint.api.data_lake_measure.MeasurementGetQueryConfig.Config","title":"Config","text":"

Pydantic Config class

"},{"location":"reference/endpoint/api/data_lake_measure/#streampipes.endpoint.api.data_lake_measure.MeasurementGetQueryConfig.build_query_string","title":"build_query_string()","text":"

Builds a HTTP query string for the config.

This method returns an HTTP query string for the invoking config. It follows the following structure ?param1=value1&param2=value2.... This query string is not an entire URL, instead it needs to appended to an API path.

RETURNS DESCRIPTION query_param_string

HTTP query params string (?param1=value1&param2=value2...)

TYPE: str

"},{"location":"reference/endpoint/api/data_lake_measure/#streampipes.endpoint.api.data_lake_measure.StreamPipesQueryValidationError","title":"StreamPipesQueryValidationError","text":"

Bases: Exception

A custom exception to be raised when the validation of query parameter causes an error.

"},{"location":"reference/endpoint/api/data_stream/","title":"Data stream","text":"

Specific implementation of the StreamPipes API's data stream endpoints.

"},{"location":"reference/endpoint/api/data_stream/#streampipes.endpoint.api.data_stream.DataStreamEndpoint","title":"DataStreamEndpoint(parent_client)","text":"

Bases: APIEndpoint

Implementation of the DataStream endpoint.

Consequently, it allows querying metadata about available data streams (see all() method). The metadata is returned as an instance of DataStreams.

Examples:

from streampipes.client import StreamPipesClient\nfrom streampipes.client.config import StreamPipesClientConfig\nfrom streampipes.client.credential_provider import StreamPipesApiKeyCredentials\n\nclient_config = StreamPipesClientConfig(\n    credential_provider=StreamPipesApiKeyCredentials(username=\"test-user\", api_key=\"api-key\"),\n    host_address=\"localhost\",\n    port=8082,\n    https_disabled=True\n)\nclient = StreamPipesClient.create(client_config=client_config)\n

# let's get all existing data streams in StreamPipes\ndata_streams = client.dataStreamApi.all()\nlen(data_streams)\n
2\n

"},{"location":"reference/endpoint/api/data_stream/#streampipes.endpoint.api.data_stream.DataStreamEndpoint.all","title":"all()","text":"

Get all resources of this endpoint provided by the StreamPipes API.

Results are provided as an instance of a ResourceContainer that allows to handle the returned resources in a comfortable and pythonic way.

RETURNS DESCRIPTION container

Container element that bundles the returned resources

TYPE: ResourceContainer

"},{"location":"reference/endpoint/api/data_stream/#streampipes.endpoint.api.data_stream.DataStreamEndpoint.build_url","title":"build_url()","text":"

Builds the endpoint's URL of the API path.

RETURNS DESCRIPTION url

The URL of the endpoint

TYPE: str

"},{"location":"reference/endpoint/api/data_stream/#streampipes.endpoint.api.data_stream.DataStreamEndpoint.get","title":"get(identifier, **kwargs)","text":"

Queries the specified resource from the API endpoint.

PARAMETER DESCRIPTION identifier

The identifier of the resource to be queried.

TYPE: str

RETURNS DESCRIPTION resource

The specified resource as an instance of the corresponding model class.

TYPE: Resource

"},{"location":"reference/endpoint/api/data_stream/#streampipes.endpoint.api.data_stream.DataStreamEndpoint.post","title":"post(resource)","text":"

Allows to post a resource to the StreamPipes API.

PARAMETER DESCRIPTION resource

The resource to be posted.

TYPE: Resource

RETURNS DESCRIPTION None"},{"location":"reference/endpoint/api/version/","title":"Version","text":"

Specific implementation of the StreamPipes API's version endpoint.

"},{"location":"reference/endpoint/api/version/#streampipes.endpoint.api.version.VersionEndpoint","title":"VersionEndpoint(parent_client)","text":"

Bases: APIEndpoint

Implementation of the Versions endpoint.

This endpoint provides metadata about the StreamPipes version of the connected instance. It only allows to apply the get() method with an empty string as identifier.

PARAMETER DESCRIPTION parent_client

The instance of StreamPipesClient the endpoint is attached to.

TYPE: StreamPipesClient

Examples:

>>> from streampipes.client import StreamPipesClient\n>>> from streampipes.client.config import StreamPipesClientConfig\n>>> from streampipes.client.credential_provider import StreamPipesApiKeyCredentials\n
>>> client_config = StreamPipesClientConfig(\n...     credential_provider=StreamPipesApiKeyCredentials(username=\"test-user\", api_key=\"api-key\"),\n...     host_address=\"localhost\",\n...     port=8082,\n...     https_disabled=True\n... )\n
>>> client = StreamPipesClient.create(client_config=client_config)\n
>>> client.versionApi.get(identifier=\"\").to_dict(use_source_names=False)\n{'backend_version': '0.92.0-SNAPSHOT'}\n
"},{"location":"reference/endpoint/api/version/#streampipes.endpoint.api.version.VersionEndpoint.all","title":"all()","text":"

Usually, this method returns information about all resources provided by this endpoint. However, this endpoint does not support this kind of operation.

RAISES DESCRIPTION NotImplementedError

This endpoint does not return multiple entries, therefore this method is not available.

"},{"location":"reference/endpoint/api/version/#streampipes.endpoint.api.version.VersionEndpoint.build_url","title":"build_url()","text":"

Builds the endpoint's URL of the API path.

RETURNS DESCRIPTION url

The URL of the endpoint

TYPE: str

"},{"location":"reference/endpoint/api/version/#streampipes.endpoint.api.version.VersionEndpoint.get","title":"get(identifier, **kwargs)","text":"

Queries the resource from the API endpoint.

For this endpoint only one resource is available.

PARAMETER DESCRIPTION identifier

Not supported by this endpoint, is set to an empty string.

TYPE: str

RAISES DESCRIPTION ValueError

Non-empty identifier is not supported by this endpoint. Please set identifier to an empty string or None.

RETURNS DESCRIPTION versions

The specified resource as an instance of the corresponding model class(Version). # noqa: 501

TYPE: Version

"},{"location":"reference/endpoint/api/version/#streampipes.endpoint.api.version.VersionEndpoint.post","title":"post(resource)","text":"

Usually, this method allows to create via this endpoint. Since the data represented by this endpoint is immutable, it does not support this kind of operation.

RAISES DESCRIPTION NotImplementedError

This endpoint does not allow for POST requests, therefore this method is not available.

"},{"location":"reference/function_zoo/river_function/","title":"River function","text":""},{"location":"reference/function_zoo/river_function/#streampipes.function_zoo.river_function.OnlineML","title":"OnlineML(client, stream_ids, model, prediction_type=RuntimeType.STRING.value, supervised=False, target_label=None, on_start=lambda self, context: None, on_event=lambda self, event, streamId: None, on_stop=lambda self: None)","text":"

Wrapper class to enable an easy usage for Online Machine Learning models of the River library.

It creates a StreamPipesFunction to train a model with the incoming events of a data stream and creates an output data stream that publishes the prediction to StreamPipes.

PARAMETER DESCRIPTION client

The client for the StreamPipes API.

TYPE: StreamPipesClient

stream_ids

The ids of the data stream to train the model.

TYPE: List[str]

model

The model to train. It meant to be a River model/pipeline, but can be every model with a 'learn_one' and 'predict_one' methode.

TYPE: Any

prediction_type

The data type of the prediction. Is only needed when you continue to work with the prediction in StreamPipes.

TYPE: str DEFAULT: value

supervised

Define if the model is supervised or unsupervised.

TYPE: bool DEFAULT: False

target_label

Define the name of the target attribute if the model is supervised.

TYPE: Optional[str] DEFAULT: None

on_start

A function to be called when this StreamPipesFunction gets started.

TYPE: Callable[[Any, FunctionContext], None] DEFAULT: lambda self, context: None

on_event

A function to be called when this StreamPipesFunction receives an event.

TYPE: Callable[[Any, Dict[str, Any], str], None] DEFAULT: lambda self, event, streamId: None

on_stop

A function to be called when this StreamPipesFunction gets stopped.

TYPE: Callable[[Any], None] DEFAULT: lambda self: None

"},{"location":"reference/function_zoo/river_function/#streampipes.function_zoo.river_function.OnlineML.set_learning","title":"set_learning(learning)","text":"

Start or stop the training of the model.

PARAMETER DESCRIPTION learning

Defines if the training should be continued

TYPE: bool

"},{"location":"reference/function_zoo/river_function/#streampipes.function_zoo.river_function.OnlineML.start","title":"start()","text":"

Registers the function and starts the training.

"},{"location":"reference/function_zoo/river_function/#streampipes.function_zoo.river_function.OnlineML.stop","title":"stop()","text":"

Stops the function and ends the training forever.

"},{"location":"reference/function_zoo/river_function/#streampipes.function_zoo.river_function.RiverFunction","title":"RiverFunction(function_definition, model, supervised, target_label, on_start, on_event, on_stop)","text":"

Bases: StreamPipesFunction

Implementation of a StreamPipesFunction to enable an easy usage for Online Machine Learning models of the River library.

The function trains the model with the incoming events and publishes the prediction to an output data stream.

PARAMETER DESCRIPTION function_definition

The function definition which contains the output stream.

TYPE: FunctionDefinition

model

The model to train. It meant to be a River model/pipeline, but can be every model with a 'learn_one' and 'predict_one' method.

TYPE: Any

supervised

Define if the model is supervised or unsupervised.

TYPE: bool

target_label

Define the name of the target attribute if the model is supervised.

TYPE: Optional[str]

on_start

A function to be called when this StreamPipesFunction gets started.

TYPE: Callable[[Any, FunctionContext], None]

on_event

A function to be called when this StreamPipesFunction receives an event.

TYPE: Callable[[Any, Dict[str, Any], str], None]

on_stop

A function to be called when this StreamPipesFunction gets stopped.

TYPE: Callable[[Any], None]

"},{"location":"reference/function_zoo/river_function/#streampipes.function_zoo.river_function.RiverFunction.add_output","title":"add_output(stream_id, event)","text":"

Send an event via an output data stream to StreamPipes.

PARAMETER DESCRIPTION stream_id

The id of the output data stream

TYPE: str

event

The event which should be sent

TYPE: Dict[str, Any]

RETURNS DESCRIPTION None"},{"location":"reference/function_zoo/river_function/#streampipes.function_zoo.river_function.RiverFunction.getFunctionId","title":"getFunctionId()","text":"

Returns the id of the function.

RETURNS DESCRIPTION function_id

Identification object of the StreamPipes function

TYPE: FunctionId

"},{"location":"reference/function_zoo/river_function/#streampipes.function_zoo.river_function.RiverFunction.onEvent","title":"onEvent(event, streamId)","text":"

Trains the model with the incoming events and sends the prediction back to StreamPipes.

PARAMETER DESCRIPTION event

The incoming event that serves as input for the function

TYPE: Dict[str, Any]

streamId

Identifier of the corresponding data stream

TYPE: str

RETURNS DESCRIPTION None"},{"location":"reference/function_zoo/river_function/#streampipes.function_zoo.river_function.RiverFunction.onServiceStarted","title":"onServiceStarted(context)","text":"

Executes the on_start method of the function.

PARAMETER DESCRIPTION context

The functions' context

TYPE: FunctionContext

RETURNS DESCRIPTION None"},{"location":"reference/function_zoo/river_function/#streampipes.function_zoo.river_function.RiverFunction.onServiceStopped","title":"onServiceStopped()","text":"

Executes the on_stop function.

"},{"location":"reference/function_zoo/river_function/#streampipes.function_zoo.river_function.RiverFunction.requiredStreamIds","title":"requiredStreamIds()","text":"

Get the ids of the streams needed by the function.

RETURNS DESCRIPTION stream_ids

List of the stream ids

TYPE: List[str]

"},{"location":"reference/function_zoo/river_function/#streampipes.function_zoo.river_function.RiverFunction.stop","title":"stop()","text":"

Stops the function and disconnects from the output streams.

"},{"location":"reference/functions/function_handler/","title":"Function handler","text":""},{"location":"reference/functions/function_handler/#streampipes.functions.function_handler.FunctionHandler","title":"FunctionHandler(registration, client)","text":"

The function handler manages the StreamPipes Functions.

It controls the connection to the brokers, starts the functions, manages the broadcast of the live data and is able to stop the connection to the brokers and functions.

PARAMETER DESCRIPTION registration

The registration, that contains the StreamPipesFunctions.

TYPE: Registration

client

The client to interact with the API.

TYPE: StreamPipesClient

ATTRIBUTE DESCRIPTION stream_contexts

Map of all data stream contexts

TYPE: Dict[str, DataStreamContext]

brokers

List of all registered brokers

TYPE: List[Broker]

"},{"location":"reference/functions/function_handler/#streampipes.functions.function_handler.FunctionHandler.disconnect","title":"disconnect()","text":"

Disconnects from the brokers and stops all functions.

RETURNS DESCRIPTION None"},{"location":"reference/functions/function_handler/#streampipes.functions.function_handler.FunctionHandler.force_stop_functions","title":"force_stop_functions()","text":"

Stops the StreamPipesFunctions when the event loop was stopped without stopping the functions.

RETURNS DESCRIPTION None WARNS DESCRIPTION UserWarning

If there is a running event loop and the functions should be stopped by disconnecting from the broker.

"},{"location":"reference/functions/function_handler/#streampipes.functions.function_handler.FunctionHandler.initializeFunctions","title":"initializeFunctions()","text":"

Creates the context for every data stream and starts the event loop to manage the StreamPipes Functions.

RETURNS DESCRIPTION None"},{"location":"reference/functions/registration/","title":"Registration","text":""},{"location":"reference/functions/registration/#streampipes.functions.registration.Registration","title":"Registration()","text":"

Manages the existing StreamPipesFunctions and registers them.

ATTRIBUTE DESCRIPTION functions

List of all registered StreamPipesFunction

TYPE: List[StreamPipesFunction]

"},{"location":"reference/functions/registration/#streampipes.functions.registration.Registration.getFunctions","title":"getFunctions()","text":"

Get all registered functions.

This method exists to be consistent with the Java client.

RETURNS DESCRIPTION functions

List of all registered functions.

TYPE: List[StreamPipesFunction]

"},{"location":"reference/functions/registration/#streampipes.functions.registration.Registration.register","title":"register(streampipes_function)","text":"

Registers a new function.

PARAMETER DESCRIPTION streampipes_function

The function to register.

TYPE: StreamPipesFunction

RETURNS DESCRIPTION self

The updated Registration instance

TYPE: Registration

"},{"location":"reference/functions/streampipes_function/","title":"Streampipes function","text":""},{"location":"reference/functions/streampipes_function/#streampipes.functions.streampipes_function.StreamPipesFunction","title":"StreamPipesFunction(function_definition=None)","text":"

Bases: ABC

Abstract implementation of a StreamPipesFunction.

A StreamPipesFunction allows users to get the data of a StreamPipes data streams easily. It makes it possible to work with the live data in python and enables to use the powerful data analytics libraries there.

PARAMETER DESCRIPTION function_definition

The definition of the function that contains metadata about the connected function

TYPE: Optional[FunctionDefinition] DEFAULT: None

ATTRIBUTE DESCRIPTION output_collectors

List of all output collectors which are created based on the provided function definitions.

TYPE: Dict[str, OutputCollector]

"},{"location":"reference/functions/streampipes_function/#streampipes.functions.streampipes_function.StreamPipesFunction.add_output","title":"add_output(stream_id, event)","text":"

Send an event via an output data stream to StreamPipes.

PARAMETER DESCRIPTION stream_id

The id of the output data stream

TYPE: str

event

The event which should be sent

TYPE: Dict[str, Any]

RETURNS DESCRIPTION None"},{"location":"reference/functions/streampipes_function/#streampipes.functions.streampipes_function.StreamPipesFunction.getFunctionId","title":"getFunctionId()","text":"

Returns the id of the function.

RETURNS DESCRIPTION function_id

Identification object of the StreamPipes function

TYPE: FunctionId

"},{"location":"reference/functions/streampipes_function/#streampipes.functions.streampipes_function.StreamPipesFunction.onEvent","title":"onEvent(event, streamId) abstractmethod","text":"

Is called for every event of a data stream.

PARAMETER DESCRIPTION event

The received event from the data stream.

TYPE: Dict[str, Any]

streamId

The id of the data stream which the event belongs to.

TYPE: str

RETURNS DESCRIPTION None"},{"location":"reference/functions/streampipes_function/#streampipes.functions.streampipes_function.StreamPipesFunction.onServiceStarted","title":"onServiceStarted(context) abstractmethod","text":"

Is called when the function is started.

PARAMETER DESCRIPTION context

The context in which the function is started.

TYPE: FunctionContext

RETURNS DESCRIPTION None"},{"location":"reference/functions/streampipes_function/#streampipes.functions.streampipes_function.StreamPipesFunction.onServiceStopped","title":"onServiceStopped() abstractmethod","text":"

Is called when the function is stopped.

RETURNS DESCRIPTION None"},{"location":"reference/functions/streampipes_function/#streampipes.functions.streampipes_function.StreamPipesFunction.requiredStreamIds","title":"requiredStreamIds()","text":"

Get the ids of the streams needed by the function.

RETURNS DESCRIPTION stream_ids

List of the stream ids

TYPE: List[str]

"},{"location":"reference/functions/streampipes_function/#streampipes.functions.streampipes_function.StreamPipesFunction.stop","title":"stop()","text":"

Stops the function and disconnects from the output streams.

"},{"location":"reference/functions/broker/broker/","title":"Broker","text":""},{"location":"reference/functions/broker/broker/#streampipes.functions.broker.broker.Broker","title":"Broker","text":"

Bases: ABC

Abstract implementation of a broker for consumer and publisher.

It contains the basic logic to connect to a data stream.

"},{"location":"reference/functions/broker/broker/#streampipes.functions.broker.broker.Broker.connect","title":"connect(data_stream) async","text":"

Connects to the broker running in StreamPipes.

PARAMETER DESCRIPTION data_stream

Contains the meta information (resources) for a data stream.

TYPE: DataStream

RETURNS DESCRIPTION None"},{"location":"reference/functions/broker/broker/#streampipes.functions.broker.broker.Broker.disconnect","title":"disconnect() abstractmethod async","text":"

Closes the connection to the server.

RETURNS DESCRIPTION None"},{"location":"reference/functions/broker/broker_handler/","title":"Broker handler","text":""},{"location":"reference/functions/broker/broker_handler/#streampipes.functions.broker.broker_handler.SupportedBroker","title":"SupportedBroker","text":"

Bases: Enum

Enum for the supported brokers.

"},{"location":"reference/functions/broker/broker_handler/#streampipes.functions.broker.broker_handler.UnsupportedBrokerError","title":"UnsupportedBrokerError(broker_name)","text":"

Bases: Exception

Exception if a broker isn't implemented yet.

"},{"location":"reference/functions/broker/broker_handler/#streampipes.functions.broker.broker_handler.get_broker","title":"get_broker(data_stream, is_publisher=False)","text":"

Derive the broker for the given data stream.

PARAMETER DESCRIPTION data_stream

Data stream instance from which the broker is inferred

TYPE: DataStream

RETURNS DESCRIPTION broker

The corresponding broker instance derived from data stream.

TYPE: Broker

RAISES DESCRIPTION UnsupportedBrokerError

Is raised when the given data stream belongs to a broker that is currently not supported by StreamPipes Python.

"},{"location":"reference/functions/broker/broker_handler/#streampipes.functions.broker.broker_handler.get_broker_description","title":"get_broker_description(data_stream)","text":"

Derive the decription of the broker for the given data stream.

PARAMETER DESCRIPTION data_stream

Data stream instance from which the broker is inferred

TYPE: DataStream

RETURNS DESCRIPTION broker

The corresponding broker description derived from data stream.

TYPE: SupportedBroker

RAISES DESCRIPTION UnsupportedBrokerError

Is raised when the given data stream belongs to a broker that is currently not supported by StreamPipes Python.

"},{"location":"reference/functions/broker/consumer/","title":"Consumer","text":""},{"location":"reference/functions/broker/consumer/#streampipes.functions.broker.consumer.Consumer","title":"Consumer","text":"

Bases: Broker

Abstract implementation a consumer for a broker.

A consumer allows to subscribe to a data stream.

"},{"location":"reference/functions/broker/consumer/#streampipes.functions.broker.consumer.Consumer.connect","title":"connect(data_stream) async","text":"

Connects to the broker running in StreamPipes and creates a subscription.

PARAMETER DESCRIPTION data_stream

Contains the meta information (resources) for a data stream.

TYPE: DataStream

RETURNS DESCRIPTION None"},{"location":"reference/functions/broker/consumer/#streampipes.functions.broker.consumer.Consumer.disconnect","title":"disconnect() abstractmethod async","text":"

Closes the connection to the server.

RETURNS DESCRIPTION None"},{"location":"reference/functions/broker/consumer/#streampipes.functions.broker.consumer.Consumer.get_message","title":"get_message() abstractmethod","text":"

Get the published messages of the subscription.

RETURNS DESCRIPTION iterator

An async iterator for the messages.

TYPE: AsyncIterator

"},{"location":"reference/functions/broker/output_collector/","title":"Output collector","text":""},{"location":"reference/functions/broker/output_collector/#streampipes.functions.broker.output_collector.OutputCollector","title":"OutputCollector(data_stream)","text":"

Collector for output events. The events are published to an output data stream. Therefore, the output collector establishes a connection to the broker.

PARAMETER DESCRIPTION data_stream

The output data stream that will receive the events.

TYPE: DataStream

ATTRIBUTE DESCRIPTION publisher

The publisher instance that sends the data to StreamPipes

TYPE: Publisher

"},{"location":"reference/functions/broker/output_collector/#streampipes.functions.broker.output_collector.OutputCollector.collect","title":"collect(event)","text":"

Publishes an event to the output stream.

PARAMETER DESCRIPTION event

The event to be published.

TYPE: Dict[str, Any]

RETURNS DESCRIPTION None"},{"location":"reference/functions/broker/output_collector/#streampipes.functions.broker.output_collector.OutputCollector.disconnect","title":"disconnect()","text":"

Disconnects the broker of the output collector.

RETURNS DESCRIPTION None"},{"location":"reference/functions/broker/publisher/","title":"Publisher","text":""},{"location":"reference/functions/broker/publisher/#streampipes.functions.broker.publisher.Publisher","title":"Publisher","text":"

Bases: Broker

Abstract implementation of a publisher for a broker.

A publisher allows to publish events to a data stream.

"},{"location":"reference/functions/broker/publisher/#streampipes.functions.broker.publisher.Publisher.connect","title":"connect(data_stream) async","text":"

Connects to the broker running in StreamPipes.

PARAMETER DESCRIPTION data_stream

Contains the meta information (resources) for a data stream.

TYPE: DataStream

RETURNS DESCRIPTION None"},{"location":"reference/functions/broker/publisher/#streampipes.functions.broker.publisher.Publisher.disconnect","title":"disconnect() abstractmethod async","text":"

Closes the connection to the server.

RETURNS DESCRIPTION None"},{"location":"reference/functions/broker/publisher/#streampipes.functions.broker.publisher.Publisher.publish_event","title":"publish_event(event) abstractmethod async","text":"

Publish an event to a connected data stream.

PARAMETER DESCRIPTION event

The event to be published.

TYPE: Dict[str, Any]

RETURNS DESCRIPTION None"},{"location":"reference/functions/broker/kafka/kafka_consumer/","title":"Kafka consumer","text":""},{"location":"reference/functions/broker/kafka/kafka_consumer/#streampipes.functions.broker.kafka.kafka_consumer.KafkaConsumer","title":"KafkaConsumer","text":"

Bases: Consumer

Implementation of a consumer for Kafka

"},{"location":"reference/functions/broker/kafka/kafka_consumer/#streampipes.functions.broker.kafka.kafka_consumer.KafkaConsumer.connect","title":"connect(data_stream) async","text":"

Connects to the broker running in StreamPipes and creates a subscription.

PARAMETER DESCRIPTION data_stream

Contains the meta information (resources) for a data stream.

TYPE: DataStream

RETURNS DESCRIPTION None"},{"location":"reference/functions/broker/kafka/kafka_consumer/#streampipes.functions.broker.kafka.kafka_consumer.KafkaConsumer.disconnect","title":"disconnect() async","text":"

Closes the connection to the server.

RETURNS DESCRIPTION None"},{"location":"reference/functions/broker/kafka/kafka_consumer/#streampipes.functions.broker.kafka.kafka_consumer.KafkaConsumer.get_message","title":"get_message()","text":"

Get the published messages of the subscription.

RETURNS DESCRIPTION iterator

An async iterator for the messages.

TYPE: AsyncIterator

"},{"location":"reference/functions/broker/kafka/kafka_message_fetcher/","title":"Kafka message fetcher","text":""},{"location":"reference/functions/broker/kafka/kafka_message_fetcher/#streampipes.functions.broker.kafka.kafka_message_fetcher.KafkaMessage","title":"KafkaMessage(data)","text":"

An internal representation of a Kafka message

PARAMETER DESCRIPTION data

The received Kafka message as byte array

"},{"location":"reference/functions/broker/kafka/kafka_message_fetcher/#streampipes.functions.broker.kafka.kafka_message_fetcher.KafkaMessageFetcher","title":"KafkaMessageFetcher(consumer)","text":"

Fetches the next message from Kafka

PARAMETER DESCRIPTION consumer

The Kafka consumer

TYPE: Consumer

"},{"location":"reference/functions/broker/kafka/kafka_publisher/","title":"Kafka publisher","text":""},{"location":"reference/functions/broker/kafka/kafka_publisher/#streampipes.functions.broker.kafka.kafka_publisher.KafkaPublisher","title":"KafkaPublisher","text":"

Bases: Publisher

Implementation of a publisher for Kafka

"},{"location":"reference/functions/broker/kafka/kafka_publisher/#streampipes.functions.broker.kafka.kafka_publisher.KafkaPublisher.connect","title":"connect(data_stream) async","text":"

Connects to the broker running in StreamPipes.

PARAMETER DESCRIPTION data_stream

Contains the meta information (resources) for a data stream.

TYPE: DataStream

RETURNS DESCRIPTION None"},{"location":"reference/functions/broker/kafka/kafka_publisher/#streampipes.functions.broker.kafka.kafka_publisher.KafkaPublisher.disconnect","title":"disconnect() async","text":"

Closes the connection to the server.

RETURNS DESCRIPTION None"},{"location":"reference/functions/broker/kafka/kafka_publisher/#streampipes.functions.broker.kafka.kafka_publisher.KafkaPublisher.publish_event","title":"publish_event(event) async","text":"

Publish an event to a connected data stream.

PARAMETER DESCRIPTION event

The event to be published.

TYPE: Dict[str, Any]

RETURNS DESCRIPTION None"},{"location":"reference/functions/broker/nats/nats_consumer/","title":"Nats consumer","text":""},{"location":"reference/functions/broker/nats/nats_consumer/#streampipes.functions.broker.nats.nats_consumer.NatsConsumer","title":"NatsConsumer","text":"

Bases: Consumer

Implementation of a consumer for NATS

"},{"location":"reference/functions/broker/nats/nats_consumer/#streampipes.functions.broker.nats.nats_consumer.NatsConsumer.connect","title":"connect(data_stream) async","text":"

Connects to the broker running in StreamPipes and creates a subscription.

PARAMETER DESCRIPTION data_stream

Contains the meta information (resources) for a data stream.

TYPE: DataStream

RETURNS DESCRIPTION None"},{"location":"reference/functions/broker/nats/nats_consumer/#streampipes.functions.broker.nats.nats_consumer.NatsConsumer.disconnect","title":"disconnect() async","text":"

Closes the connection to the server.

RETURNS DESCRIPTION None"},{"location":"reference/functions/broker/nats/nats_consumer/#streampipes.functions.broker.nats.nats_consumer.NatsConsumer.get_message","title":"get_message()","text":"

Get the published messages of the subscription.

RETURNS DESCRIPTION message_iterator

An async iterator for the messages.

TYPE: AsyncIterator

"},{"location":"reference/functions/broker/nats/nats_publisher/","title":"Nats publisher","text":""},{"location":"reference/functions/broker/nats/nats_publisher/#streampipes.functions.broker.nats.nats_publisher.NatsPublisher","title":"NatsPublisher","text":"

Bases: Publisher

Implementation of a publisher for NATS

"},{"location":"reference/functions/broker/nats/nats_publisher/#streampipes.functions.broker.nats.nats_publisher.NatsPublisher.connect","title":"connect(data_stream) async","text":"

Connects to the broker running in StreamPipes.

PARAMETER DESCRIPTION data_stream

Contains the meta information (resources) for a data stream.

TYPE: DataStream

RETURNS DESCRIPTION None"},{"location":"reference/functions/broker/nats/nats_publisher/#streampipes.functions.broker.nats.nats_publisher.NatsPublisher.disconnect","title":"disconnect() async","text":"

Closes the connection to the server.

RETURNS DESCRIPTION None"},{"location":"reference/functions/broker/nats/nats_publisher/#streampipes.functions.broker.nats.nats_publisher.NatsPublisher.publish_event","title":"publish_event(event) async","text":"

Publish an event to a connected data stream.

PARAMETER DESCRIPTION event

The event to be published.

TYPE: Dict[str, Any]

RETURNS DESCRIPTION None"},{"location":"reference/functions/utils/async_iter_handler/","title":"Async iter handler","text":""},{"location":"reference/functions/utils/async_iter_handler/#streampipes.functions.utils.async_iter_handler.AsyncIterHandler","title":"AsyncIterHandler","text":"

Handles asynchronous iterators to get every message after another in parallel.

"},{"location":"reference/functions/utils/async_iter_handler/#streampipes.functions.utils.async_iter_handler.AsyncIterHandler.anext","title":"anext(stream_id, message) async staticmethod","text":"

Gets the next message from an AsyncIterator.

PARAMETER DESCRIPTION stream_id

The id of the data stream which the message belongs to.

TYPE: str

message

An asynchronous iterator that contains the messages.

TYPE: AsyncIterator

RETURNS DESCRIPTION result

Tuple of the stream id und next message or (\"stop\", None) if no message is left.

TYPE: Tuple[str, Optional[Any]]

"},{"location":"reference/functions/utils/async_iter_handler/#streampipes.functions.utils.async_iter_handler.AsyncIterHandler.combine_async_messages","title":"combine_async_messages(messages) async staticmethod","text":"

Continuously gets the next published message from multiple AsyncIterators in parallel.

PARAMETER DESCRIPTION messages

A dictionary with an asynchronous iterator for every stream id.

TYPE: Dict[str, AsyncIterator]

YIELDS DESCRIPTION message

Description of the anonymous integer return value.

TYPE:: Tuple[str, Any]

"},{"location":"reference/functions/utils/data_stream_context/","title":"Data stream context","text":""},{"location":"reference/functions/utils/data_stream_context/#streampipes.functions.utils.data_stream_context.DataStreamContext","title":"DataStreamContext(functions, schema, broker)","text":"

Container for the context of a data stream.

PARAMETER DESCRIPTION functions

StreamPipes Functions which require the data of this data stream.

TYPE: List[StreamPipesFunction]

schema

The schema of this data stream.

TYPE: DataStream

broker

The consumer to connect to this data stream.

TYPE: Consumer

"},{"location":"reference/functions/utils/data_stream_context/#streampipes.functions.utils.data_stream_context.DataStreamContext.add_function","title":"add_function(function)","text":"

Adds a new StreamPipes Function.

PARAMETER DESCRIPTION function

StreamPipesFunction which requires this data stream.

TYPE: StreamPipesFunction

RETURNS DESCRIPTION None"},{"location":"reference/functions/utils/data_stream_generator/","title":"Data stream generator","text":""},{"location":"reference/functions/utils/data_stream_generator/#streampipes.functions.utils.data_stream_generator.RuntimeType","title":"RuntimeType","text":"

Bases: Enum

Runtime type names for the attributes of a data stream.

ATTRIBUTE DESCRIPTION STRING

BOOLEAN

DOUBLE

FLOAT

INTEGER

LONG

"},{"location":"reference/functions/utils/data_stream_generator/#streampipes.functions.utils.data_stream_generator.create_data_stream","title":"create_data_stream(name, attributes, stream_id=None, broker=SupportedBroker.NATS)","text":"

Creates a data stream

PARAMETER DESCRIPTION name

Name of the data stream to be shown at the UI.

TYPE: str

attributes

Name and types of the attributes.

TYPE: Dict[str, str]

stream_id

The id of this data stream.

TYPE: str DEFAULT: None

RETURNS DESCRIPTION data_stream

The created data stream

TYPE: DataStream

"},{"location":"reference/functions/utils/function_context/","title":"Function context","text":""},{"location":"reference/functions/utils/function_context/#streampipes.functions.utils.function_context.FunctionContext","title":"FunctionContext(function_id, schema, client, streams)","text":"

Container for the context of a StreamPipesFunction.

PARAMETER DESCRIPTION function_id

The id of this function.

TYPE: str

schema

A dictionary which contains the schema of a data stream for each stream id.

TYPE: Dict[str, DataStream]

client

The client to interact with the API.

TYPE: StreamPipesClient

streams

The ids of the streams needed by this function.

TYPE: List[str]

"},{"location":"reference/functions/utils/function_context/#streampipes.functions.utils.function_context.FunctionContext.add_data_stream_schema","title":"add_data_stream_schema(stream_id, data_stream)","text":"

Adds a new data stream for a new stream id.

PARAMETER DESCRIPTION stream_id

The id of the data stream.

TYPE: str

data_stream

The schema of the data stream.

TYPE: DataStream

RETURNS DESCRIPTION None"},{"location":"reference/model/common/","title":"Common","text":"

Classes of the StreamPipes data model that are commonly shared.

"},{"location":"reference/model/common/#streampipes.model.common.ApplicationLink","title":"ApplicationLink","text":"

Bases: BasicModel

Data model of an ApplicationLink in compliance with the StreamPipes Backend.

"},{"location":"reference/model/common/#streampipes.model.common.ApplicationLink.Config","title":"Config","text":"

Configuration class for Pydantic. Defines alias generator to convert field names from camelCase (API) to snake_case (Python codebase).

"},{"location":"reference/model/common/#streampipes.model.common.BaseElement","title":"BaseElement","text":"

Bases: BasicModel

Structure of a basic element in the StreamPipes Backend.

"},{"location":"reference/model/common/#streampipes.model.common.BaseElement.Config","title":"Config","text":"

Configuration class for Pydantic. Defines alias generator to convert field names from camelCase (API) to snake_case (Python codebase).

"},{"location":"reference/model/common/#streampipes.model.common.BasicModel","title":"BasicModel","text":"

Bases: BaseModel

Basic model class used for the whole Python StreamPipes data model.

"},{"location":"reference/model/common/#streampipes.model.common.BasicModel.Config","title":"Config","text":"

Configuration class for Pydantic. Defines alias generator to convert field names from camelCase (API) to snake_case (Python codebase).

"},{"location":"reference/model/common/#streampipes.model.common.EventGrounding","title":"EventGrounding","text":"

Bases: BasicModel

Data model of an EventGrounding in compliance to with StreamPipes Backend.

"},{"location":"reference/model/common/#streampipes.model.common.EventGrounding.Config","title":"Config","text":"

Configuration class for Pydantic. Defines alias generator to convert field names from camelCase (API) to snake_case (Python codebase).

"},{"location":"reference/model/common/#streampipes.model.common.EventProperty","title":"EventProperty","text":"

Bases: BasicModel

Data model of an EventProperty in compliance with the StreamPipes Backend.

"},{"location":"reference/model/common/#streampipes.model.common.EventProperty.Config","title":"Config","text":"

Configuration class for Pydantic. Defines alias generator to convert field names from camelCase (API) to snake_case (Python codebase).

"},{"location":"reference/model/common/#streampipes.model.common.EventSchema","title":"EventSchema","text":"

Bases: BasicModel

Data model of an EventSchema in compliance with the StreamPipes Backend.

"},{"location":"reference/model/common/#streampipes.model.common.EventSchema.Config","title":"Config","text":"

Configuration class for Pydantic. Defines alias generator to convert field names from camelCase (API) to snake_case (Python codebase).

"},{"location":"reference/model/common/#streampipes.model.common.MeasurementCapability","title":"MeasurementCapability","text":"

Bases: BasicModel

Data model of a MeasurementCapability in compliance with the StreamPipes Backend.

"},{"location":"reference/model/common/#streampipes.model.common.MeasurementCapability.Config","title":"Config","text":"

Configuration class for Pydantic. Defines alias generator to convert field names from camelCase (API) to snake_case (Python codebase).

"},{"location":"reference/model/common/#streampipes.model.common.MeasurementObject","title":"MeasurementObject","text":"

Bases: BasicModel

Data model of a MeasurementObject in compliance with the StreamPipes Backend.

"},{"location":"reference/model/common/#streampipes.model.common.MeasurementObject.Config","title":"Config","text":"

Configuration class for Pydantic. Defines alias generator to convert field names from camelCase (API) to snake_case (Python codebase).

"},{"location":"reference/model/common/#streampipes.model.common.TopicDefinition","title":"TopicDefinition","text":"

Bases: BasicModel

Data model of a TopicDefinition in compliance with the StreamPipes Backend.

"},{"location":"reference/model/common/#streampipes.model.common.TopicDefinition.Config","title":"Config","text":"

Configuration class for Pydantic. Defines alias generator to convert field names from camelCase (API) to snake_case (Python codebase).

"},{"location":"reference/model/common/#streampipes.model.common.TransportFormat","title":"TransportFormat","text":"

Bases: BasicModel

Data model of a TransportFormat in compliance with the StreamPipes Backend.

"},{"location":"reference/model/common/#streampipes.model.common.TransportFormat.Config","title":"Config","text":"

Configuration class for Pydantic. Defines alias generator to convert field names from camelCase (API) to snake_case (Python codebase).

"},{"location":"reference/model/common/#streampipes.model.common.TransportProtocol","title":"TransportProtocol","text":"

Bases: BasicModel

Data model of a TransportProtocol in compliance with the StreamPipes Backend.

"},{"location":"reference/model/common/#streampipes.model.common.TransportProtocol.Config","title":"Config","text":"

Configuration class for Pydantic. Defines alias generator to convert field names from camelCase (API) to snake_case (Python codebase).

"},{"location":"reference/model/common/#streampipes.model.common.ValueSpecification","title":"ValueSpecification","text":"

Bases: BasicModel

Data model of an ValueSpecification in compliance with the StreamPipes Backend.

"},{"location":"reference/model/common/#streampipes.model.common.ValueSpecification.Config","title":"Config","text":"

Configuration class for Pydantic. Defines alias generator to convert field names from camelCase (API) to snake_case (Python codebase).

"},{"location":"reference/model/common/#streampipes.model.common.random_letters","title":"random_letters(n)","text":"

Generates a string consisting of random letters.

PARAMETER DESCRIPTION n

number of letters

TYPE: int

RETURNS DESCRIPTION rand_str

String consisting of n random letters

TYPE: str

"},{"location":"reference/model/container/data_lake_measures/","title":"Data lake measures","text":"

Implementation of a resource container for the data lake measures endpoint.

"},{"location":"reference/model/container/data_lake_measures/#streampipes.model.container.data_lake_measures.DataLakeMeasures","title":"DataLakeMeasures(resources)","text":"

Bases: ResourceContainer

Implementation of the resource container for the data lake measures endpoint.

This resource container is a collection of data lake measures returned by the StreamPipes API. It is capable of parsing the response content directly into a list of queried DataLakeMeasure. Furthermore, the resource container makes them accessible in a pythonic manner.

"},{"location":"reference/model/container/data_lake_measures/#streampipes.model.container.data_lake_measures.DataLakeMeasures.from_json","title":"from_json(json_string) classmethod","text":"

Creates a ResourceContainer from the given JSON string.

PARAMETER DESCRIPTION json_string

The JSON string returned from the StreamPipes API.

TYPE: str

RETURNS DESCRIPTION container

Instance of the container derived from the JSON definition

TYPE: ResourceContainer

RAISES DESCRIPTION StreamPipesDataModelError

If a resource cannot be mapped to the corresponding Python data model.

StreamPipesResourceContainerJSONError

If JSON response cannot be parsed to a ResourceContainer.

"},{"location":"reference/model/container/data_lake_measures/#streampipes.model.container.data_lake_measures.DataLakeMeasures.to_dicts","title":"to_dicts(use_source_names=False)","text":"

Returns the contained resources as list of dictionaries.

PARAMETER DESCRIPTION use_source_names

Determines whether the field names are named in Python style (=False) or as originally named by StreamPipes (=True).

TYPE: bool DEFAULT: False

RETURNS DESCRIPTION dictionary_list

List of resources in dictionary representation. If use_source_names equals True the keys are named as in the StreamPipes backend.

TYPE: List[Dict[str, Any]]

"},{"location":"reference/model/container/data_lake_measures/#streampipes.model.container.data_lake_measures.DataLakeMeasures.to_json","title":"to_json()","text":"

Returns the resource container in the StreamPipes JSON representation.

RETURNS DESCRIPTION json_string: str

JSON representation of the resource container where key names are equal to keys used in the StreamPipes backend

"},{"location":"reference/model/container/data_lake_measures/#streampipes.model.container.data_lake_measures.DataLakeMeasures.to_pandas","title":"to_pandas()","text":"

Returns the resource container in representation of a Pandas Dataframe.

RETURNS DESCRIPTION resource_container_df

Representation of the resource container as pandas DataFrame

TYPE: DataFrame

"},{"location":"reference/model/container/data_streams/","title":"Data streams","text":"

Implementation of a resource container for the data streams endpoint.

"},{"location":"reference/model/container/data_streams/#streampipes.model.container.data_streams.DataStreams","title":"DataStreams(resources)","text":"

Bases: ResourceContainer

Implementation of the resource container for the data stream endpoint.

This resource container is a collection of data streams returned by the StreamPipes API. It is capable of parsing the response content directly into a list of queried DataStream. Furthermore, the resource container makes them accessible in a pythonic manner.

"},{"location":"reference/model/container/data_streams/#streampipes.model.container.data_streams.DataStreams.from_json","title":"from_json(json_string) classmethod","text":"

Creates a ResourceContainer from the given JSON string.

PARAMETER DESCRIPTION json_string

The JSON string returned from the StreamPipes API.

TYPE: str

RETURNS DESCRIPTION container

Instance of the container derived from the JSON definition

TYPE: ResourceContainer

RAISES DESCRIPTION StreamPipesDataModelError

If a resource cannot be mapped to the corresponding Python data model.

StreamPipesResourceContainerJSONError

If JSON response cannot be parsed to a ResourceContainer.

"},{"location":"reference/model/container/data_streams/#streampipes.model.container.data_streams.DataStreams.to_dicts","title":"to_dicts(use_source_names=False)","text":"

Returns the contained resources as list of dictionaries.

PARAMETER DESCRIPTION use_source_names

Determines whether the field names are named in Python style (=False) or as originally named by StreamPipes (=True).

TYPE: bool DEFAULT: False

RETURNS DESCRIPTION dictionary_list

List of resources in dictionary representation. If use_source_names equals True the keys are named as in the StreamPipes backend.

TYPE: List[Dict[str, Any]]

"},{"location":"reference/model/container/data_streams/#streampipes.model.container.data_streams.DataStreams.to_json","title":"to_json()","text":"

Returns the resource container in the StreamPipes JSON representation.

RETURNS DESCRIPTION json_string: str

JSON representation of the resource container where key names are equal to keys used in the StreamPipes backend

"},{"location":"reference/model/container/data_streams/#streampipes.model.container.data_streams.DataStreams.to_pandas","title":"to_pandas()","text":"

Returns the resource container in representation of a Pandas Dataframe.

RETURNS DESCRIPTION resource_container_df

Representation of the resource container as pandas DataFrame

TYPE: DataFrame

"},{"location":"reference/model/container/resource_container/","title":"Resource container","text":"

General and abstract implementation for a resource container.

A resource container is a collection of resources returned by the StreamPipes API. It is capable of parsing the response content directly into a list of queried resources. Furthermore, the resource container makes them accessible in a pythonic manner.

"},{"location":"reference/model/container/resource_container/#streampipes.model.container.resource_container.ResourceContainer","title":"ResourceContainer(resources)","text":"

Bases: ABC

General and abstract implementation for a resource container.

A resource container is a collection of resources returned by the StreamPipes API. It is capable of parsing the response content directly into a list of queried resources. Furthermore, the resource container makes them accessible in a pythonic manner.

PARAMETER DESCRIPTION resources

A list of resources to be contained in the ResourceContainer.

TYPE: List[Resource]

"},{"location":"reference/model/container/resource_container/#streampipes.model.container.resource_container.ResourceContainer.from_json","title":"from_json(json_string) classmethod","text":"

Creates a ResourceContainer from the given JSON string.

PARAMETER DESCRIPTION json_string

The JSON string returned from the StreamPipes API.

TYPE: str

RETURNS DESCRIPTION container

Instance of the container derived from the JSON definition

TYPE: ResourceContainer

RAISES DESCRIPTION StreamPipesDataModelError

If a resource cannot be mapped to the corresponding Python data model.

StreamPipesResourceContainerJSONError

If JSON response cannot be parsed to a ResourceContainer.

"},{"location":"reference/model/container/resource_container/#streampipes.model.container.resource_container.ResourceContainer.to_dicts","title":"to_dicts(use_source_names=False)","text":"

Returns the contained resources as list of dictionaries.

PARAMETER DESCRIPTION use_source_names

Determines whether the field names are named in Python style (=False) or as originally named by StreamPipes (=True).

TYPE: bool DEFAULT: False

RETURNS DESCRIPTION dictionary_list

List of resources in dictionary representation. If use_source_names equals True the keys are named as in the StreamPipes backend.

TYPE: List[Dict[str, Any]]

"},{"location":"reference/model/container/resource_container/#streampipes.model.container.resource_container.ResourceContainer.to_json","title":"to_json()","text":"

Returns the resource container in the StreamPipes JSON representation.

RETURNS DESCRIPTION json_string: str

JSON representation of the resource container where key names are equal to keys used in the StreamPipes backend

"},{"location":"reference/model/container/resource_container/#streampipes.model.container.resource_container.ResourceContainer.to_pandas","title":"to_pandas()","text":"

Returns the resource container in representation of a Pandas Dataframe.

RETURNS DESCRIPTION resource_container_df

Representation of the resource container as pandas DataFrame

TYPE: DataFrame

"},{"location":"reference/model/container/resource_container/#streampipes.model.container.resource_container.StreamPipesDataModelError","title":"StreamPipesDataModelError(validation_error)","text":"

Bases: Exception

A custom exception to be raised when a validation error occurs during the parsing of StreamPipes API responses.

PARAMETER DESCRIPTION validation_error

The validation error thrown by Pydantic during parsing.

TYPE: ValidationError

"},{"location":"reference/model/container/resource_container/#streampipes.model.container.resource_container.StreamPipesResourceContainerJSONError","title":"StreamPipesResourceContainerJSONError(container_name, json_string)","text":"

Bases: Exception

A custom exception to be raised when the returned JSON string does not suit to the structure of resource container.

PARAMETER DESCRIPTION container_name

The class name of the resource container where the invalid data structure was detected.

TYPE: str

json_string

The JSON string that has been tried to parse.

TYPE: str

"},{"location":"reference/model/container/versions/","title":"Versions","text":"

Implementation of a resource container for the versions endpoint.

"},{"location":"reference/model/container/versions/#streampipes.model.container.versions.Versions","title":"Versions(resources)","text":"

Bases: ResourceContainer

Implementation of the resource container for the versions endpoint.

This resource container is a collection of versions returned by the StreamPipes API. It is capable of parsing the response content directly into a list of queried Version. Furthermore, the resource container makes them accessible in a pythonic manner.

PARAMETER DESCRIPTION resources

A list of resources (Version) to be contained in the ResourceContainer.

TYPE: List[Resource]

"},{"location":"reference/model/container/versions/#streampipes.model.container.versions.Versions.from_json","title":"from_json(json_string) classmethod","text":"

Creates a ResourceContainer from the given JSON string.

PARAMETER DESCRIPTION json_string

The JSON string returned from the StreamPipes API.

TYPE: str

RETURNS DESCRIPTION container

Instance of the container derived from the JSON definition

TYPE: ResourceContainer

RAISES DESCRIPTION StreamPipesDataModelError

If a resource cannot be mapped to the corresponding Python data model.

StreamPipesResourceContainerJSONError

If JSON response cannot be parsed to a ResourceContainer.

"},{"location":"reference/model/container/versions/#streampipes.model.container.versions.Versions.to_dicts","title":"to_dicts(use_source_names=False)","text":"

Returns the contained resources as list of dictionaries.

PARAMETER DESCRIPTION use_source_names

Determines whether the field names are named in Python style (=False) or as originally named by StreamPipes (=True).

TYPE: bool DEFAULT: False

RETURNS DESCRIPTION dictionary_list

List of resources in dictionary representation. If use_source_names equals True the keys are named as in the StreamPipes backend.

TYPE: List[Dict[str, Any]]

"},{"location":"reference/model/container/versions/#streampipes.model.container.versions.Versions.to_json","title":"to_json()","text":"

Returns the resource container in the StreamPipes JSON representation.

RETURNS DESCRIPTION json_string: str

JSON representation of the resource container where key names are equal to keys used in the StreamPipes backend

"},{"location":"reference/model/container/versions/#streampipes.model.container.versions.Versions.to_pandas","title":"to_pandas()","text":"

Returns the resource container in representation of a Pandas Dataframe.

RETURNS DESCRIPTION resource_container_df

Representation of the resource container as pandas DataFrame

TYPE: DataFrame

"},{"location":"reference/model/resource/data_lake_measure/","title":"Data lake measure","text":""},{"location":"reference/model/resource/data_lake_measure/#streampipes.model.resource.data_lake_measure.DataLakeMeasure","title":"DataLakeMeasure","text":"

Bases: Resource

Implementation of a resource for data lake measures.

This resource defines the data model used by resource container (model.container.DataLakeMeasures). It inherits from Pydantic's BaseModel to get all its superpowers, which are used to parse, validate the API response, and to easily switch between the Python representation (both serialized and deserialized) and Java representation (serialized only).

"},{"location":"reference/model/resource/data_lake_measure/#streampipes.model.resource.data_lake_measure.DataLakeMeasure.Config","title":"Config","text":"

Configuration class for Pydantic. Defines alias generator to convert field names from camelCase (API) to snake_case (Python codebase).

"},{"location":"reference/model/resource/data_lake_measure/#streampipes.model.resource.data_lake_measure.DataLakeMeasure.convert_to_pandas_representation","title":"convert_to_pandas_representation()","text":"

Returns the dictionary representation of a data lake measure to be used when creating a pandas Dataframe.

It excludes the following fields: element_id, event_schema, schema_version. Instead of the whole event schema the number of event properties contained is returned with the column name num_event_properties.

RETURNS DESCRIPTION pandas_repr

Pandas representation of the resource as a dictionary, which is then used by the respource container to create a data frame from a collection of resources.

TYPE: Dict[str, Any]

"},{"location":"reference/model/resource/data_lake_measure/#streampipes.model.resource.data_lake_measure.DataLakeMeasure.to_dict","title":"to_dict(use_source_names=True)","text":"

Returns the resource in dictionary representation.

PARAMETER DESCRIPTION use_source_names

Indicates if the dictionary keys are in python representation or equally named to the StreamPipes backend

DEFAULT: True

RETURNS DESCRIPTION resource

The resource as dictionary representation

TYPE: Dict[str, Any]

"},{"location":"reference/model/resource/data_series/","title":"Data series","text":""},{"location":"reference/model/resource/data_series/#streampipes.model.resource.data_series.DataSeries","title":"DataSeries","text":"

Bases: Resource

Implementation of a resource for data series. This resource defines the data model used by its resource container(model.container.DataLakeMeasures). It inherits from Pydantic's BaseModel to get all its superpowers, which are used to parse, validate the API response and to easily switch between the Python representation (both serialized and deserialized) and Java representation (serialized only).

Notes
This class will only exist temporarily in it its current appearance since\nthere are some inconsistencies in the StreamPipes API.\n
"},{"location":"reference/model/resource/data_series/#streampipes.model.resource.data_series.DataSeries.Config","title":"Config","text":"

Configuration class for Pydantic. Defines alias generator to convert field names from camelCase (API) to snake_case (Python codebase).

"},{"location":"reference/model/resource/data_series/#streampipes.model.resource.data_series.DataSeries.convert_to_pandas_representation","title":"convert_to_pandas_representation()","text":"

Returns the dictionary representation of a data lake series to be used when creating a pandas Dataframe.

It contains only the \"header rows\" (the column names) and \"rows\" that contain the actual data.

RETURNS DESCRIPTION pandas_repr

Dictionary with the keys headers and rows

TYPE: dict[str, Any]

"},{"location":"reference/model/resource/data_series/#streampipes.model.resource.data_series.DataSeries.from_json","title":"from_json(json_string) classmethod","text":"

Creates an instance of DataSeries from a given JSON string.

This method is used by the resource container to parse the JSON response of the StreamPipes API. Currently, it only supports data lake series that consist of exactly one series of data.

PARAMETER DESCRIPTION json_string

The JSON string the data lake series should be created on.

TYPE: str

RETURNS DESCRIPTION DataSeries

Instance of DataSeries that is created based on the given JSON string.

RAISES DESCRIPTION StreamPipesUnsupportedDataLakeSeries

If the data lake series returned by the StreamPipes API cannot be parsed with the current version of the Python client.

"},{"location":"reference/model/resource/data_series/#streampipes.model.resource.data_series.DataSeries.to_dict","title":"to_dict(use_source_names=True)","text":"

Returns the resource in dictionary representation.

PARAMETER DESCRIPTION use_source_names

Indicates if the dictionary keys are in python representation or equally named to the StreamPipes backend

DEFAULT: True

RETURNS DESCRIPTION resource

The resource as dictionary representation

TYPE: Dict[str, Any]

"},{"location":"reference/model/resource/data_series/#streampipes.model.resource.data_series.DataSeries.to_pandas","title":"to_pandas()","text":"

Returns the data lake series in representation of a Pandas Dataframe.

RETURNS DESCRIPTION pd

The data lake series in form of a pandas dataframe

TYPE: DataFrame

"},{"location":"reference/model/resource/data_stream/","title":"Data stream","text":""},{"location":"reference/model/resource/data_stream/#streampipes.model.resource.data_stream.DataStream","title":"DataStream(**kwargs)","text":"

Bases: Resource

Implementation of a resource for data streams.

This resource defines the data model used by resource container (model.container.DataStreams). It inherits from Pydantic's BaseModel to get all its superpowers, which are used to parse, validate the API response and to easily switch between the Python representation (both serialized and deserialized) and Java representation (serialized only).

"},{"location":"reference/model/resource/data_stream/#streampipes.model.resource.data_stream.DataStream.Config","title":"Config","text":"

Configuration class for Pydantic. Defines alias generator to convert field names from camelCase (API) to snake_case (Python codebase).

"},{"location":"reference/model/resource/data_stream/#streampipes.model.resource.data_stream.DataStream.convert_to_pandas_representation","title":"convert_to_pandas_representation()","text":"

Returns the dictionary representation of a data stream to be used when creating a pandas Dataframe.

RETURNS DESCRIPTION pandas_repr

Pandas representation of the resource as a dictionary, which is then used by the respource container to create a data frame from a collection of resources.

TYPE: Dict[str, Any]

"},{"location":"reference/model/resource/data_stream/#streampipes.model.resource.data_stream.DataStream.to_dict","title":"to_dict(use_source_names=True)","text":"

Returns the resource in dictionary representation.

PARAMETER DESCRIPTION use_source_names

Indicates if the dictionary keys are in python representation or equally named to the StreamPipes backend

DEFAULT: True

RETURNS DESCRIPTION resource

The resource as dictionary representation

TYPE: Dict[str, Any]

"},{"location":"reference/model/resource/exceptions/","title":"Exceptions","text":""},{"location":"reference/model/resource/exceptions/#streampipes.model.resource.exceptions.StreamPipesUnsupportedDataSeries","title":"StreamPipesUnsupportedDataSeries(reason=None)","text":"

Bases: Exception

Exception to be raised when the returned data lake series cannot be parsed with the current implementation of the resource.

"},{"location":"reference/model/resource/function_definition/","title":"Function definition","text":""},{"location":"reference/model/resource/function_definition/#streampipes.model.resource.function_definition.FunctionDefinition","title":"FunctionDefinition","text":"

Bases: Resource

Configuration for a StreamPipes Function.

This class maps to the FunctionDefinition class in the StreamPipes model. It contains all metadata that are required to register a function at the StreamPipes backend.

ATTRIBUTE DESCRIPTION consumed_streams

List of data streams the function is consuming from

TYPE: List[str]

function_id

identifier object of a StreamPipes function

TYPE: FunctionId

output_data_streams

Map off all output data streams added to the function definition

TYPE: Dict[str, DataStream]

"},{"location":"reference/model/resource/function_definition/#streampipes.model.resource.function_definition.FunctionDefinition.Config","title":"Config","text":"

Configuration class for Pydantic. Defines alias generator to convert field names from camelCase (API) to snake_case (Python codebase).

"},{"location":"reference/model/resource/function_definition/#streampipes.model.resource.function_definition.FunctionDefinition.add_output_data_stream","title":"add_output_data_stream(data_stream)","text":"

Adds an output data stream to the function which makes it possible to write data back to StreamPipes.

PARAMETER DESCRIPTION data_stream

The schema of the output data stream.

TYPE: DataStream

RETURNS DESCRIPTION self

Instance of the function definition that is extended by the provided DataStream

TYPE: FunctionDefinition

"},{"location":"reference/model/resource/function_definition/#streampipes.model.resource.function_definition.FunctionDefinition.convert_to_pandas_representation","title":"convert_to_pandas_representation()","text":"

Returns the dictionary representation of a function definition to be used when creating a pandas Dataframe.

RETURNS DESCRIPTION pandas_repr

Pandas representation of the resource as a dictionary, which is then used by the respource container to create a data frame from a collection of resources.

TYPE: Dict[str, Any]

"},{"location":"reference/model/resource/function_definition/#streampipes.model.resource.function_definition.FunctionDefinition.get_output_data_streams","title":"get_output_data_streams()","text":"

Get the output data streams of the function.

RETURNS DESCRIPTION output_streams

Dictionary with every known stream id and the related output stream.

TYPE: Dict[str, DataStream]

"},{"location":"reference/model/resource/function_definition/#streampipes.model.resource.function_definition.FunctionDefinition.get_output_stream_ids","title":"get_output_stream_ids()","text":"

Get the stream ids of the output data streams.

RETURNS DESCRIPTION output_stream_ids

List of all stream ids

TYPE: List[str]

"},{"location":"reference/model/resource/function_definition/#streampipes.model.resource.function_definition.FunctionDefinition.to_dict","title":"to_dict(use_source_names=True)","text":"

Returns the resource in dictionary representation.

PARAMETER DESCRIPTION use_source_names

Indicates if the dictionary keys are in python representation or equally named to the StreamPipes backend

DEFAULT: True

RETURNS DESCRIPTION resource

The resource as dictionary representation

TYPE: Dict[str, Any]

"},{"location":"reference/model/resource/function_definition/#streampipes.model.resource.function_definition.FunctionId","title":"FunctionId","text":"

Bases: BasicModel

Identification object for a StreamPipes function.

Maps to the FunctionId class defined in the StreamPipes model.

ATTRIBUTE DESCRIPTION id

unique identifier of the function instance

TYPE: str

version

version of the corresponding function

TYPE: int

"},{"location":"reference/model/resource/function_definition/#streampipes.model.resource.function_definition.FunctionId.Config","title":"Config","text":"

Configuration class for Pydantic. Defines alias generator to convert field names from camelCase (API) to snake_case (Python codebase).

"},{"location":"reference/model/resource/query_result/","title":"Query result","text":""},{"location":"reference/model/resource/query_result/#streampipes.model.resource.query_result.QueryResult","title":"QueryResult","text":"

Bases: Resource

Implementation of a resource for query result. This resource defines the data model used by its resource container(model.container.DataLakeMeasures). It inherits from Pydantic's BaseModel to get all its superpowers, which are used to parse, validate the API response and to easily switch between the Python representation (both serialized and deserialized) and Java representation (serialized only).

"},{"location":"reference/model/resource/query_result/#streampipes.model.resource.query_result.QueryResult.Config","title":"Config","text":"

Configuration class for Pydantic. Defines alias generator to convert field names from camelCase (API) to snake_case (Python codebase).

"},{"location":"reference/model/resource/query_result/#streampipes.model.resource.query_result.QueryResult.convert_to_pandas_representation","title":"convert_to_pandas_representation()","text":"

Returns the dictionary representation of a data lake series to be used when creating a pandas Dataframe.

It contains only the \"header rows\" (the column names) and \"rows\" that contain the actual data.

RETURNS DESCRIPTION dict

Dictionary with the keys headers and rows

RAISES DESCRIPTION StreamPipesUnsupportedDataLakeSeries

If the query result returned by the StreamPipes API cannot be converted to the pandas representation

"},{"location":"reference/model/resource/query_result/#streampipes.model.resource.query_result.QueryResult.to_dict","title":"to_dict(use_source_names=True)","text":"

Returns the resource in dictionary representation.

PARAMETER DESCRIPTION use_source_names

Indicates if the dictionary keys are in python representation or equally named to the StreamPipes backend

DEFAULT: True

RETURNS DESCRIPTION resource

The resource as dictionary representation

TYPE: Dict[str, Any]

"},{"location":"reference/model/resource/query_result/#streampipes.model.resource.query_result.QueryResult.to_pandas","title":"to_pandas()","text":"

Returns the data lake series in representation of a Pandas Dataframe.

RETURNS DESCRIPTION df

Pandas df containing the query result

TYPE: DataFrame

"},{"location":"reference/model/resource/resource/","title":"Resource","text":"

General and abstract implementation for a resource.

A resource defines the data model that is used by a resource container (model.container.resourceContainer).

"},{"location":"reference/model/resource/resource/#streampipes.model.resource.resource.Resource","title":"Resource","text":"

Bases: ABC, BasicModel

General and abstract implementation for a resource.

A resource defines the data model used by a resource container (model.container.resourceContainer). It inherits from Pydantic's BaseModel to get all its superpowers, which are used to parse, validate the API response and to easily switch between the Python representation (both serialized and deserialized) and Java representation (serialized only).

"},{"location":"reference/model/resource/resource/#streampipes.model.resource.resource.Resource.Config","title":"Config","text":"

Configuration class for Pydantic. Defines alias generator to convert field names from camelCase (API) to snake_case (Python codebase).

"},{"location":"reference/model/resource/resource/#streampipes.model.resource.resource.Resource.convert_to_pandas_representation","title":"convert_to_pandas_representation() abstractmethod","text":"

Returns a dictionary representation to be used when creating a pandas Dataframe.

RETURNS DESCRIPTION pandas_repr

Pandas representation of the resource as a dictionary, which is then used by the respource container to create a data frame from a collection of resources.

TYPE: Dict[str, Any]

"},{"location":"reference/model/resource/resource/#streampipes.model.resource.resource.Resource.to_dict","title":"to_dict(use_source_names=True)","text":"

Returns the resource in dictionary representation.

PARAMETER DESCRIPTION use_source_names

Indicates if the dictionary keys are in python representation or equally named to the StreamPipes backend

DEFAULT: True

RETURNS DESCRIPTION resource

The resource as dictionary representation

TYPE: Dict[str, Any]

"},{"location":"reference/model/resource/version/","title":"Version","text":""},{"location":"reference/model/resource/version/#streampipes.model.resource.version.Version","title":"Version","text":"

Bases: Resource

Metadata about the version of the connected StreamPipes server.

ATTRIBUTE DESCRIPTION backend_version

version of the StreamPipes backend the client is connected to

TYPE: str

"},{"location":"reference/model/resource/version/#streampipes.model.resource.version.Version.Config","title":"Config","text":"

Configuration class for Pydantic. Defines alias generator to convert field names from camelCase (API) to snake_case (Python codebase).

"},{"location":"reference/model/resource/version/#streampipes.model.resource.version.Version.convert_to_pandas_representation","title":"convert_to_pandas_representation()","text":"

Returns the dictionary representation of the version metadata to be used when creating a pandas Dataframe.

"},{"location":"reference/model/resource/version/#streampipes.model.resource.version.Version.to_dict","title":"to_dict(use_source_names=True)","text":"

Returns the resource in dictionary representation.

PARAMETER DESCRIPTION use_source_names

Indicates if the dictionary keys are in python representation or equally named to the StreamPipes backend

DEFAULT: True

RETURNS DESCRIPTION resource

The resource as dictionary representation

TYPE: Dict[str, Any]

"},{"location":"reference/model/resource/version/#streampipes.model.resource.version.Version.validate_backend_version","title":"validate_backend_version(backend_version)","text":"

Validates the backend version of the StreamPipes. Sets 'development' if none is returned since this the behavior of StreamPipes backend running in development mode.

"},{"location":"tutorials/1-introduction-to-streampipes-python-client/","title":"Introduction to StreamPipes Python","text":"In\u00a0[\u00a0]: Copied!
%pip install streampipes\n
%pip install streampipes

If you want to have the current development state you can also execute:

In\u00a0[\u00a0]: Copied!
%pip install git+https://github.com/apache/streampipes.git#subdirectory=streampipes-client-python\n
%pip install git+https://github.com/apache/streampipes.git#subdirectory=streampipes-client-python

The corresponding documentation can be found here.

In\u00a0[\u00a0]: Copied!
from streampipes.client import StreamPipesClient\nfrom streampipes.client.config import StreamPipesClientConfig\nfrom streampipes.client.credential_provider import StreamPipesApiKeyCredentials\n
from streampipes.client import StreamPipesClient from streampipes.client.config import StreamPipesClientConfig from streampipes.client.credential_provider import StreamPipesApiKeyCredentials In\u00a0[\u00a0]: Copied!
config = StreamPipesClientConfig(\n    credential_provider=StreamPipesApiKeyCredentials(\n        username=\"test@streampipes.apache.org\",\n        api_key=\"API-KEY\",\n    ),\n    host_address=\"localhost\",\n    https_disabled=True,\n    port=80\n)\n
config = StreamPipesClientConfig( credential_provider=StreamPipesApiKeyCredentials( username=\"test@streampipes.apache.org\", api_key=\"API-KEY\", ), host_address=\"localhost\", https_disabled=True, port=80 )

Please be aware that connecting to StreamPipes via a https connection is currently not supported by the Python client.

Providing secrets like the api_key as plaintext in the source code is an anti-pattern. This is why the StreamPipes client also supports passing the required secrets as environment variables. To do so, you must initialize the credential provider like the following:

In\u00a0[\u00a0]: Copied!
StreamPipesApiKeyCredentials()\n
StreamPipesApiKeyCredentials()

To ensure that the above code works, you must set the environment variables as expected. This can be done as follows:

In\u00a0[\u00a0]: Copied!
import os\nos.environ[\"SP_USERNAME\"] = \"admin@streampipes.apache.org\"\nos.environ[\"SP_API_KEY\"] = \"XXX\"\n
import os os.environ[\"SP_USERNAME\"] = \"admin@streampipes.apache.org\" os.environ[\"SP_API_KEY\"] = \"XXX\"

Having the config ready, we can now initialize the actual client.

In\u00a0[\u00a0]: Copied!
client = StreamPipesClient(client_config=config)\n
client = StreamPipesClient(client_config=config)

That's already it. You can check if everything works out by using the following command:

In\u00a0[6]: Copied!
client.describe()\n
client.describe()
2023-02-24 17:05:49,398 - streampipes.endpoint.endpoint - [INFO] - [endpoint.py:167] [_make_request] - Successfully retrieved all resources.\n2023-02-24 17:05:49,457 - streampipes.endpoint.endpoint - [INFO] - [endpoint.py:167] [_make_request] - Successfully retrieved all resources.\n\nHi there!\nYou are connected to a StreamPipes instance running at http://localhost:80.\nThe following StreamPipes resources are available with this client:\n1x DataLakeMeasures\n1x DataStreams\n

This prints you a short textual description of the connected StreamPipes instance to the console.

The created client instance serves as the central point of interaction with StreamPipes. You can invoke a variety of commands directly on this object.

Are you curious now how you actually can get data out of StreamPipes and make use of it with Python? Then check out the next tutorial on extracting Data from the StreamPipes data lake.

Thanks for reading this introductory tutorial. We hope you like it and would love to receive some feedback from you. Just go to our GitHub discussion page and let us know your impression. We'll read and react to them all, we promise!

"},{"location":"tutorials/1-introduction-to-streampipes-python-client/#Introduction-to-StreamPipes-Python","title":"Introduction to StreamPipes Python\u00b6","text":""},{"location":"tutorials/1-introduction-to-streampipes-python-client/#Why-there-is-an-extra-Python-library-for-StreamPipes?","title":"Why there is an extra Python library for StreamPipes?\u00b6","text":"

Apache StreamPipes aims to enable non-technical users to connect and analyze IoT data streams. To achieve this, it provides an easy-to-use and convenient user interface that allows one to connect to an IoT data source and create some visual graphs within a few minutes. While this is the primary use case for Apache StreamPipes, it also offers significant value to those interested in data analysis or data science with IoT data, without the need to handle the complexities of extracting data from devices in a suitable format. In this scenario, StreamPipes helps you connect to your data source and extract the data for you. You then can make the data available outside StreamPipes by writing it into an external source, such as a database, Kafka, etc. While this requires another component, you can also extract your data directly from StreamPipes programmatically using the StreamPipes API. For convenience, we also provide you with a StreamPipes client both available for Java and Python. Specifically with StreamPipes Python, we want to address the amazing data analytics and data science community in Python and benefit from the great universe of Python libraries out there.

"},{"location":"tutorials/1-introduction-to-streampipes-python-client/#How-to-install-StreamPipes-Python?","title":"How to install StreamPipes Python?\u00b6","text":"

Simply use the following pip command:

"},{"location":"tutorials/1-introduction-to-streampipes-python-client/#How-to-prepare-the-tutorials","title":"How to prepare the tutorials\u00b6","text":"

In case you want to reproduce the first two tutorials exactly on your end, you need to create a simple pipeline in StreamPipes like demonstrated below.

"},{"location":"tutorials/1-introduction-to-streampipes-python-client/#How-to-configure-the-Python-client","title":"How to configure the Python client\u00b6","text":"

In order to access the resources available in StreamPipes, one must be able to authenticate against the backend. For this purpose, the client so far only supports the authentication via an API token that can be generated via the StreamPipes UI, as you can see below.

Having generated the API token, one can directly start initializing a client instance as follows:

"},{"location":"tutorials/2-extracting-data-from-the-streampipes-data-lake/","title":"Extracting Data from the StreamPipes data lake","text":"In\u00a0[1]: Copied!
from streampipes.client import StreamPipesClient\nfrom streampipes.client.config import StreamPipesClientConfig\nfrom streampipes.client.credential_provider import StreamPipesApiKeyCredentials\n
from streampipes.client import StreamPipesClient from streampipes.client.config import StreamPipesClientConfig from streampipes.client.credential_provider import StreamPipesApiKeyCredentials In\u00a0[\u00a0]: Copied!
# if you want all necessary dependencies required for this tutorial to be installed,\n# you can simply execute the following command\n%pip install matplotlib streampipes\n
# if you want all necessary dependencies required for this tutorial to be installed, # you can simply execute the following command %pip install matplotlib streampipes In\u00a0[2]: Copied!
import os\nos.environ[\"SP_USERNAME\"] = \"admin@streampipes.apache.org\"\nos.environ[\"SP_API_KEY\"] = \"XXX\"\n
import os os.environ[\"SP_USERNAME\"] = \"admin@streampipes.apache.org\" os.environ[\"SP_API_KEY\"] = \"XXX\" In\u00a0[3]: Copied!
config = StreamPipesClientConfig(\n    credential_provider=StreamPipesApiKeyCredentials(),\n    host_address=\"localhost\",\n    https_disabled=True,\n    port=80\n)\n
config = StreamPipesClientConfig( credential_provider=StreamPipesApiKeyCredentials(), host_address=\"localhost\", https_disabled=True, port=80 ) In\u00a0[4]: Copied!
client = StreamPipesClient(client_config=config)\n
client = StreamPipesClient(client_config=config)
2023-02-24 17:34:25,860 - streampipes.client.client - [INFO] - [client.py:128] [_set_up_logging] - Logging successfully initialized with logging level INFO.\n

As a first step, we want to get an overview about all data available in the data lake. The data is stored as so-called measures, which refer to a data stream stored in the data lake. For his purpose we use the all() method of the dataLakeMeasure endpoint.

In\u00a0[5]: Copied!
data_lake_measures = client.dataLakeMeasureApi.all()\n
data_lake_measures = client.dataLakeMeasureApi.all()
2023-02-24 17:34:25,929 - streampipes.endpoint.endpoint - [INFO] - [endpoint.py:167] [_make_request] - Successfully retrieved all resources.\n

So let's see how many measures are available:

In\u00a0[6]: Copied!
len(data_lake_measures)\n
len(data_lake_measures) Out[6]:
2

All resources of the StreamPipes Python client support the standard Python expressions. If not, please let us know.

In\u00a0[7]: Copied!
data_lake_measures[-1]\n
data_lake_measures[-1] Out[7]:
DataLakeMeasure(element_id='3cb6b5e6f107452483d1fd2ccf4bf9f9', measure_name='test', timestamp_field='s0::timestamp', event_schema=EventSchema(event_properties=[EventProperty(class_name='org.apache.streampipes.model.schema.EventPropertyPrimitive', element_id='sp:eventproperty:EiFnkL', label='Density', description='Denotes the current density of the fluid', runtime_name='density', required=False, domain_properties=['http://schema.org/Number'], property_scope='MEASUREMENT_PROPERTY', index=5, runtime_id=None, runtime_type='http://www.w3.org/2001/XMLSchema#float', measurement_unit=None, value_specification=None), EventProperty(class_name='org.apache.streampipes.model.schema.EventPropertyPrimitive', element_id='sp:eventproperty:ghSkQI', label='Mass Flow', description='Denotes the current mass flow in the sensor', runtime_name='mass_flow', required=False, domain_properties=['http://schema.org/Number'], property_scope='MEASUREMENT_PROPERTY', index=2, runtime_id=None, runtime_type='http://www.w3.org/2001/XMLSchema#float', measurement_unit=None, value_specification=None), EventProperty(class_name='org.apache.streampipes.model.schema.EventPropertyPrimitive', element_id='sp:eventproperty:cQAUry', label='Sensor ID', description='The ID of the sensor', runtime_name='sensorId', required=False, domain_properties=['https://streampipes.org/vocabulary/examples/watertank/v1/hasSensorId'], property_scope='DIMENSION_PROPERTY', index=1, runtime_id=None, runtime_type='http://www.w3.org/2001/XMLSchema#string', measurement_unit=None, value_specification=None), EventProperty(class_name='org.apache.streampipes.model.schema.EventPropertyPrimitive', element_id='sp:eventproperty:pbPMyL', label='Sensor Fault Flags', description='Any fault flags of the sensors', runtime_name='sensor_fault_flags', required=False, domain_properties=['http://schema.org/Boolean'], property_scope='MEASUREMENT_PROPERTY', index=6, runtime_id=None, runtime_type='http://www.w3.org/2001/XMLSchema#boolean', measurement_unit=None, value_specification=None), EventProperty(class_name='org.apache.streampipes.model.schema.EventPropertyPrimitive', element_id='sp:eventproperty:Qmayhw', label='Temperature', description='Denotes the current temperature in degrees celsius', runtime_name='temperature', required=False, domain_properties=['http://schema.org/Number'], property_scope='MEASUREMENT_PROPERTY', index=4, runtime_id=None, runtime_type='http://www.w3.org/2001/XMLSchema#float', measurement_unit='http://qudt.org/vocab/unit#DegreeCelsius', value_specification=ValueSpecification(class_name='org.apache.streampipes.model.schema.QuantitativeValue', element_id=None, min_value=0, max_value=100, step=0.1)), EventProperty(class_name='org.apache.streampipes.model.schema.EventPropertyPrimitive', element_id='sp:eventproperty:YQYhjd', label='Volume Flow', description='Denotes the current volume flow', runtime_name='volume_flow', required=False, domain_properties=['http://schema.org/Number'], property_scope='MEASUREMENT_PROPERTY', index=3, runtime_id=None, runtime_type='http://www.w3.org/2001/XMLSchema#float', measurement_unit=None, value_specification=None)]), pipeline_id=None, pipeline_name=None, pipeline_is_running=False, schema_version='1.1')

To get a more comprehensive overview, you can take a look at the pandas representation:

In\u00a0[8]: Copied!
display(data_lake_measures.to_pandas())\n
display(data_lake_measures.to_pandas()) measure_name timestamp_field pipeline_id pipeline_name pipeline_is_running num_event_properties 0 flow-rate s0::timestamp None None False 6 1 test s0::timestamp None None False 6

So far, we have only retrieved metadata about the available data lake measure. In the following, we will access the actual data of the measure flow-rate.

For this purpose, we will use the get() method of the dataLakeMeasure endpoint.

In\u00a0[9]: Copied!
flow_rate_measure = client.dataLakeMeasureApi.get(identifier=\"flow-rate\")\n
flow_rate_measure = client.dataLakeMeasureApi.get(identifier=\"flow-rate\")
2023-02-24 17:34:26,020 - streampipes.endpoint.endpoint - [INFO] - [endpoint.py:167] [_make_request] - Successfully retrieved all resources.\n

For further processing, the easiest way is to turn the data measure into a pandas DataFrame.

In\u00a0[10]: Copied!
flow_rate_pd = flow_rate_measure.to_pandas()\n
flow_rate_pd = flow_rate_measure.to_pandas()

Let's see how many data points we got...

In\u00a0[11]: Copied!
len(flow_rate_pd)\n
len(flow_rate_pd) Out[11]:
1000

... and get a first overview

In\u00a0[12]: Copied!
flow_rate_pd.describe()\n
flow_rate_pd.describe() Out[12]: density mass_flow temperature volume_flow count 1000.000000 1000.000000 1000.000000 1000.000000 mean 45.560337 5.457014 45.480231 5.659558 std 3.201544 3.184959 3.132878 3.122437 min 40.007698 0.004867 40.000992 0.039422 25% 42.819497 2.654101 42.754623 3.021625 50% 45.679264 5.382355 45.435944 5.572553 75% 48.206881 8.183144 48.248473 8.338209 max 50.998310 10.986015 50.964909 10.998676

As a final step, we want to create a plot of both attributes.

In\u00a0[13]: Copied!
import matplotlib.pyplot as plt\nflow_rate_pd.plot(y=[\"mass_flow\", \"temperature\"])\nplt.show()\n
import matplotlib.pyplot as plt flow_rate_pd.plot(y=[\"mass_flow\", \"temperature\"]) plt.show()

For data lake measurements, the get() method is even more powerful than simply returning all the data for a given data lake measurement. We will look at a selection of these below. The full list of supported parameters can be found in the docs. Let's start by referring to the graph we created above, where we use only two columns of our data lake measurement. If we already know this, we can directly restrict the queried data to a subset of columns by using the columns parameter. columns takes a list of column names as a comma-separated string:

In\u00a0[14]: Copied!
flow_rate_pd = client.dataLakeMeasureApi.get(identifier=\"flow-rate\", columns=\"mass_flow,temperature\").to_pandas()\nflow_rate_pd\n
flow_rate_pd = client.dataLakeMeasureApi.get(identifier=\"flow-rate\", columns=\"mass_flow,temperature\").to_pandas() flow_rate_pd
2023-02-24 17:34:26,492 - streampipes.endpoint.endpoint - [INFO] - [endpoint.py:167] [_make_request] - Successfully retrieved all resources.\n
Out[14]: timestamp mass_flow temperature 0 2023-02-24T16:19:41.472Z 3.309556 44.448483 1 2023-02-24T16:19:41.482Z 5.608580 40.322033 2 2023-02-24T16:19:41.493Z 7.692881 49.239639 3 2023-02-24T16:19:41.503Z 3.632898 49.933754 4 2023-02-24T16:19:41.513Z 0.711260 50.106617 ... ... ... ... 995 2023-02-24T16:19:52.927Z 1.740114 46.558231 996 2023-02-24T16:19:52.94Z 7.211723 48.048622 997 2023-02-24T16:19:52.952Z 7.770180 48.188026 998 2023-02-24T16:19:52.965Z 4.458602 48.280899 999 2023-02-24T16:19:52.977Z 2.592060 47.505951

1000 rows \u00d7 3 columns

By default, the client returns only the first one thousand records of a Data Lake measurement. This can be changed by passing a concrete value for the limit parameter:

In\u00a0[15]: Copied!
flow_rate_pd = client.dataLakeMeasureApi.get(identifier=\"flow-rate\", limit=10000).to_pandas()\nlen(flow_rate_pd)\n
flow_rate_pd = client.dataLakeMeasureApi.get(identifier=\"flow-rate\", limit=10000).to_pandas() len(flow_rate_pd)
2023-02-24 17:34:26,736 - streampipes.endpoint.endpoint - [INFO] - [endpoint.py:167] [_make_request] - Successfully retrieved all resources.\n
Out[15]:
9528

If you want your data to be selected by time of occurrence rather than quantity, you can specify your time window by passing the start_date and end_date parameters:

In\u00a0[16]: Copied!
from datetime import datetime\nflow_rate_pd = client.dataLakeMeasureApi.get(\n    identifier=\"flow-rate\",\n    start_date=datetime(year=2023, month=2, day=24, hour=17, minute=21, second=0),\n    end_date=datetime(year=2023, month=2, day=24, hour=17, minute=21, second=1),\n    ).to_pandas()\nflow_rate_pd.plot(y=[\"mass_flow\", \"temperature\"])\nplt.show()\n
from datetime import datetime flow_rate_pd = client.dataLakeMeasureApi.get( identifier=\"flow-rate\", start_date=datetime(year=2023, month=2, day=24, hour=17, minute=21, second=0), end_date=datetime(year=2023, month=2, day=24, hour=17, minute=21, second=1), ).to_pandas() flow_rate_pd.plot(y=[\"mass_flow\", \"temperature\"]) plt.show()
2023-02-24 17:34:26,899 - streampipes.endpoint.endpoint - [INFO] - [endpoint.py:167] [_make_request] - Successfully retrieved all resources.\n

... from this point on, we leave all future processing of the data up to your creativity. Keep in mind: the general syntax used in this tutorial (all(), to_pandas(), get()) applies to all endpoints and associated resources of the StreamPipes Python client.

If you get further and create exciting stuff with data extracted from StreamPipes please let us know. We are thrilled to see what you as a community will build with the provided client. Furthermore, don't hesitate to discuss feature requests to extend the current functionality with us.

For now, that's all about the StreamPipes client. Read the next tutorial (Getting live data from the StreamPipes data stream) if you are interested in making use of the powerful StreamPipes functions to interact with StreamPipes in an event-based manner.

How do you like this tutorial? We hope you like it and would love to receive some feedback from you. Just go to our GitHub discussion page and let us know your impression. We'll read and react to them all, we promise!

"},{"location":"tutorials/2-extracting-data-from-the-streampipes-data-lake/#Extracting-Data-from-the-StreamPipes-data-lake","title":"Extracting Data from the StreamPipes data lake\u00b6","text":"

In the first tutorial (Introduction to the StreamPipes Python client) we took the first steps with the StreamPipes Python client and learned how to set everything up. Now we are ready to get started and want to retrieve some data out of StreamPipes. In this tutorial, we'll focus on the StreamPipes Data Lake, the component where StreamPipes stores data internally. To get started, we'll use the client instance created in the first tutorial.

"},{"location":"tutorials/3-getting-live-data-from-the-streampipes-data-stream/","title":"Getting live data from the StreamPipes data stream","text":"

Note As of now we mainly developed the support for StreamPipes functions using NATS as messaging protocol. Consequently, this setup is tested most and should work flawlessly. Visit our first-steps page to see how to start StreamPipes accordingly. Anyhow, you can also use the other brokers that are currently supported in StreamPipes Python. In case you observe any problems, please reach out to us and file us an issue on GitHub.

In\u00a0[1]: Copied!
from streampipes.client import StreamPipesClient\nfrom streampipes.client.config import StreamPipesClientConfig\nfrom streampipes.client.credential_provider import StreamPipesApiKeyCredentials\n
from streampipes.client import StreamPipesClient from streampipes.client.config import StreamPipesClientConfig from streampipes.client.credential_provider import StreamPipesApiKeyCredentials In\u00a0[\u00a0]: Copied!
# You can install all required libraries for this tutorial with the following command\n%pip install matplotlib ipython streampipes\n
# You can install all required libraries for this tutorial with the following command %pip install matplotlib ipython streampipes In\u00a0[2]: Copied!
import os\n\nos.environ[\"SP_USERNAME\"] = \"admin@streampipes.apache.org\"\nos.environ[\"SP_API_KEY\"] = \"XXX\"\n\n# Use this if you work locally:\nos.environ[\"BROKER-HOST\"] = \"localhost\"  \nos.environ[\"KAFKA-PORT\"] = \"9094\" # When using Kafka as message broker. If Kafka is not running on localhost, KAFKA_ADVERTISED_LISTENERS should be adjusted to the external address\n
import os os.environ[\"SP_USERNAME\"] = \"admin@streampipes.apache.org\" os.environ[\"SP_API_KEY\"] = \"XXX\" # Use this if you work locally: os.environ[\"BROKER-HOST\"] = \"localhost\" os.environ[\"KAFKA-PORT\"] = \"9094\" # When using Kafka as message broker. If Kafka is not running on localhost, KAFKA_ADVERTISED_LISTENERS should be adjusted to the external address In\u00a0[3]: Copied!
client_config = StreamPipesClientConfig(\n    credential_provider=StreamPipesApiKeyCredentials(),\n    host_address=\"localhost\",\n    port=80,\n    https_disabled=True,\n)\nclient = StreamPipesClient(client_config=client_config)\n
client_config = StreamPipesClientConfig( credential_provider=StreamPipesApiKeyCredentials(), host_address=\"localhost\", port=80, https_disabled=True, ) client = StreamPipesClient(client_config=client_config)
2022-12-14 10:43:37,664 - streampipes.client.client - [INFO] - [client.py:127] [_set_up_logging] - Logging successfully initialized with logging level INFO.\n

Now we can have a look at the available data streams. We can choose one or more stream to receive the data from and copy their element_id.

In\u00a0[4]: Copied!
client.dataStreamApi.all().to_pandas()\n
client.dataStreamApi.all().to_pandas()
2022-12-14 10:43:39,944 - streampipes.endpoint.endpoint - [INFO] - [endpoint.py:153] [_make_request] - Successfully retrieved all resources.\n
Out[4]: element_id name description icon_url app_id includes_assets includes_locales internally_managed measurement_object index ... uri dom num_transport_protocols num_measurement_capability num_application_links num_included_assets num_connected_to num_category num_event_properties num_included_locales 0 urn:streampipes.apache.org:eventstream:HHoidJ Test2 None None False False True None 0 ... urn:streampipes.apache.org:eventstream:HHoidJ None 1 0 0 0 0 0 7 0 1 urn:streampipes.apache.org:eventstream:uPDKLI Test None None False False True None 0 ... urn:streampipes.apache.org:eventstream:uPDKLI None 1 0 0 0 0 0 7 0

2 rows \u00d7 21 columns

Next we can create a StreamPipesFunction. For this we need to implement the 3 following methods:

  • onServiceStarted is called when the function gets started. There you can use the given meta information of the FunctionContext to initialize the function.
  • onEvent is called when ever a new event arrives. The event contains the live data and you can use the streamId to identify a stream if the function is connected to multiple data streams.
  • onServiceStopped is called when the function gets stopped.

For this tutorial we just create a function that saves every new event in a pandas DataFrame and plots the first column of the DataFrame when the function gets stopped.

(If you want to use the same structure as in Java you can overwrite the getFunctionId and requiredStreamIds methods instead of using the FunctionDefinition)

In\u00a0[5]: Copied!
from typing import Dict, Any\nimport pandas as pd\nfrom datetime import datetime\nimport matplotlib.pyplot as plt\nfrom streampipes.functions.function_handler import FunctionHandler\nfrom streampipes.functions.registration import Registration\nfrom streampipes.functions.streampipes_function import StreamPipesFunction\nfrom streampipes.functions.utils.function_context import FunctionContext\nfrom streampipes.model.resource.function_definition import FunctionDefinition, FunctionId\n\nclass ExampleFunction(StreamPipesFunction):\n    def __init__(self, function_definition: FunctionDefinition) -> None:\n        super().__init__(function_definition)\n        # Create the Dataframe to save the live data\n        self.df = pd.DataFrame()\n\n    def onServiceStarted(self, context: FunctionContext):\n        # Get the name of the timestamp field\n        for event_property in context.schema[context.streams[0]].event_schema.event_properties:\n            if event_property.property_scope == \"HEADER_PROPERTY\":\n                self.timestamp = event_property.runtime_name\n\n    def onEvent(self, event: Dict[str, Any], streamId: str):\n        # Convert the unix timestamp to datetime\n        event[self.timestamp] = datetime.fromtimestamp(event[self.timestamp] / 1000)\n        # Add every value of the event to the DataFrame\n        self.df = pd.concat(\n            [self.df, pd.DataFrame({key: [event[key]] for key in event.keys()}).set_index(self.timestamp)]\n        )\n\n    def onServiceStopped(self):\n        # Plot the first column of the Dataframe\n        plt.figure(figsize=(10, 5))\n        plt.xlabel(self.timestamp)\n        plt.ylabel(self.df.columns[0])\n        plt.plot(self.df.iloc[:, 0])\n        plt.show()\n
from typing import Dict, Any import pandas as pd from datetime import datetime import matplotlib.pyplot as plt from streampipes.functions.function_handler import FunctionHandler from streampipes.functions.registration import Registration from streampipes.functions.streampipes_function import StreamPipesFunction from streampipes.functions.utils.function_context import FunctionContext from streampipes.model.resource.function_definition import FunctionDefinition, FunctionId class ExampleFunction(StreamPipesFunction): def __init__(self, function_definition: FunctionDefinition) -> None: super().__init__(function_definition) # Create the Dataframe to save the live data self.df = pd.DataFrame() def onServiceStarted(self, context: FunctionContext): # Get the name of the timestamp field for event_property in context.schema[context.streams[0]].event_schema.event_properties: if event_property.property_scope == \"HEADER_PROPERTY\": self.timestamp = event_property.runtime_name def onEvent(self, event: Dict[str, Any], streamId: str): # Convert the unix timestamp to datetime event[self.timestamp] = datetime.fromtimestamp(event[self.timestamp] / 1000) # Add every value of the event to the DataFrame self.df = pd.concat( [self.df, pd.DataFrame({key: [event[key]] for key in event.keys()}).set_index(self.timestamp)] ) def onServiceStopped(self): # Plot the first column of the Dataframe plt.figure(figsize=(10, 5)) plt.xlabel(self.timestamp) plt.ylabel(self.df.columns[0]) plt.plot(self.df.iloc[:, 0]) plt.show()

Now we can start the function. First we create an instance of the ExampleFunction and insert the element_id of the stream which data we want to consume. Then we have to register this function and we can start all functions by initializing the FunctionHandler. (it's also possible to register multiple functions with .register(...).register(...))

In\u00a0[6]: Copied!
example_function = ExampleFunction(\n    FunctionDefinition(\n        function_id=FunctionId(id=\"example-function\"),\n        consumed_streams=[\"urn:streampipes.apache.org:eventstream:uPDKLI\"]\n    )\n)\n\nregistration = Registration()\nregistration.register(example_function)\n\nfunction_handler = FunctionHandler(registration, client)\nfunction_handler.initializeFunctions()\n
example_function = ExampleFunction( FunctionDefinition( function_id=FunctionId(id=\"example-function\"), consumed_streams=[\"urn:streampipes.apache.org:eventstream:uPDKLI\"] ) ) registration = Registration() registration.register(example_function) function_handler = FunctionHandler(registration, client) function_handler.initializeFunctions()
2022-12-14 10:43:42,810 - streampipes.endpoint.endpoint - [INFO] - [endpoint.py:153] [_make_request] - Successfully retrieved all resources.\n2022-12-14 10:43:42,812 - streampipes.functions.function_handler - [INFO] - [function_handler.py:82] [initializeFunctions] - Using NatsBroker for <__main__.ExampleFunction object at 0x000001F2EF298D90>\n

The DataFrame of the ExampleFunction gets updated whenever a new event arrives. Let's test this by executing the cell below.

In\u00a0[7]: Copied!
import asyncio\nfrom asyncio.exceptions import CancelledError\nfrom IPython.display import clear_output\n\nwhile True:\n    clear_output(wait=True)\n    display(example_function.df)\n    try:\n        await asyncio.sleep(1)\n    except CancelledError:\n        break\n
import asyncio from asyncio.exceptions import CancelledError from IPython.display import clear_output while True: clear_output(wait=True) display(example_function.df) try: await asyncio.sleep(1) except CancelledError: break mass_flow density volume_flow sensor_fault_flags temperature sensorId timestamp 2022-12-14 10:43:43.357 10.955496 47.546290 1.001985 False 44.993413 flowrate02 2022-12-14 10:43:44.371 6.499040 44.392069 2.034402 False 41.232352 flowrate02 2022-12-14 10:43:45.382 10.168300 41.192146 9.724287 False 46.812779 flowrate02 2022-12-14 10:43:46.395 10.849059 50.086308 5.832691 False 45.860412 flowrate02 2022-12-14 10:43:47.410 3.081855 47.254246 8.860531 False 50.505801 flowrate02 ... ... ... ... ... ... ... 2022-12-14 10:44:43.920 1.803572 41.978894 10.294002 False 47.820239 flowrate02 2022-12-14 10:44:44.932 1.967062 42.212883 3.237440 False 49.047258 flowrate02 2022-12-14 10:44:45.934 4.457819 47.561256 0.315024 False 40.223413 flowrate02 2022-12-14 10:44:46.949 8.745343 46.346891 7.439090 False 41.982529 flowrate02 2022-12-14 10:44:47.950 5.828744 47.679720 6.307405 False 42.100354 flowrate02

65 rows \u00d7 6 columns

The while loop just displays the the DataFrame every second until the cell is stopped. We could achieve the same result manually by executing example_function.df repeatedly.

You can stop the functions whenever you want by executing the command below.

In\u00a0[\u00a0]: Copied!
function_handler.disconnect()\n
function_handler.disconnect()
2022-12-14 10:44:53,309 - streampipes.functions.broker.nats_broker - [INFO] - [nats_broker.py:67] [disconnect] - Stopped connection to stream: urn:streampipes.apache.org:eventstream:uPDKLI\n

That's enough for this tutorial. Now you can try to write your own StreamPipesFunction. All you need to do is creating a new class, implementing the 4 required methods and registering the function.

Want to see more exciting use cases you can achieve with StreamPipes functions in Python? Then don't hesitate and jump to our next tutorial on applying online machine learning algorithms to StreamPipes data streams with River.

How do you like this tutorial? We hope you like it and would love to receive some feedback from you. Just go to our GitHub discussion page and let us know your impression. We'll read and react to them all, we promise!

"},{"location":"tutorials/3-getting-live-data-from-the-streampipes-data-stream/#Getting-live-data-from-the-StreamPipes-data-stream","title":"Getting live data from the StreamPipes data stream\u00b6","text":"

In the last tutorial (Extracting Data from the StreamPipes data lake) we learned how to extract the stored data from a StreamPipes data lake. This tutorial is about the StreamPipes data stream and shows how to get the live data from StreamPipes into Python. Therefore, we first create the client instance as before.

"},{"location":"tutorials/4-using-online-machine-learning-on-a-streampipes-data-stream/","title":"Using Online Machine Learning on a StreamPipes data stream","text":"In\u00a0[1]: Copied!
from streampipes.client import StreamPipesClient\nfrom streampipes.client.config import StreamPipesClientConfig\nfrom streampipes.client.credential_provider import StreamPipesApiKeyCredentials\n
from streampipes.client import StreamPipesClient from streampipes.client.config import StreamPipesClientConfig from streampipes.client.credential_provider import StreamPipesApiKeyCredentials In\u00a0[\u00a0]: Copied!
# you can install all required dependencies for this tutorial by executing the following command\n%pip install river streampipes\n
# you can install all required dependencies for this tutorial by executing the following command %pip install river streampipes In\u00a0[2]: Copied!
import os\n\nos.environ[\"SP_USERNAME\"] = \"admin@streampipes.apache.org\"\nos.environ[\"SP_API_KEY\"] = \"XXX\"\n\n# Use this if you work locally:\nos.environ[\"BROKER-HOST\"] = \"localhost\"  \nos.environ[\"KAFKA-PORT\"] = \"9094\" # When using Kafka as message broker\n
import os os.environ[\"SP_USERNAME\"] = \"admin@streampipes.apache.org\" os.environ[\"SP_API_KEY\"] = \"XXX\" # Use this if you work locally: os.environ[\"BROKER-HOST\"] = \"localhost\" os.environ[\"KAFKA-PORT\"] = \"9094\" # When using Kafka as message broker In\u00a0[3]: Copied!
client_config = StreamPipesClientConfig(\n    credential_provider=StreamPipesApiKeyCredentials(),\n    host_address=\"localhost\",\n    port=80,\n    https_disabled=True,\n)\nclient = StreamPipesClient(client_config=client_config)\n
client_config = StreamPipesClientConfig( credential_provider=StreamPipesApiKeyCredentials(), host_address=\"localhost\", port=80, https_disabled=True, ) client = StreamPipesClient(client_config=client_config)
2023-01-27 16:04:24,784 - streampipes.client.client - [INFO] - [client.py:128] [_set_up_logging] - Logging successfully initialized with logging level INFO.\n
In\u00a0[4]: Copied!
client.dataStreamApi.all().to_pandas()\n
client.dataStreamApi.all().to_pandas()
2023-01-27 16:04:28,212 - streampipes.endpoint.endpoint - [INFO] - [endpoint.py:163] [_make_request] - Successfully retrieved all resources.\n
Out[4]: element_id name description icon_url app_id includes_assets includes_locales internally_managed measurement_object index ... dom rev num_transport_protocols num_measurement_capability num_application_links num_included_assets num_connected_to num_category num_event_properties num_included_locales 0 sp:spdatastream:xboBFK Test None None False False True None 0 ... None 5-558c861debc745e1ebae29a266a8bdb9 1 0 0 0 0 0 7 0 1 urn:streampipes.apache.org:eventstream:Wgyrse Test File None None False False True None 0 ... None 4-66548b6b84287011b7cec0876ef82baf 1 0 0 0 0 0 2 0

2 rows \u00d7 22 columns

In\u00a0[5]: Copied!
from river import cluster, compose, preprocessing\nfrom streampipes.function_zoo.river_function import OnlineML\nfrom streampipes.functions.utils.data_stream_generator import RuntimeType\n\nk_means = compose.Pipeline(\n    (\"drop_features\", compose.Discard(\"sensorId\", \"timestamp\")),\n    (\"scale\", preprocessing.StandardScaler()),\n    (\"k_means\", cluster.KMeans(n_clusters=2)),\n)\n\nclustering = OnlineML(\n    client=client, stream_ids=[\"sp:spdatastream:xboBFK\"], model=k_means, prediction_type=RuntimeType.INTEGER.value\n)\nclustering.start()\n
from river import cluster, compose, preprocessing from streampipes.function_zoo.river_function import OnlineML from streampipes.functions.utils.data_stream_generator import RuntimeType k_means = compose.Pipeline( (\"drop_features\", compose.Discard(\"sensorId\", \"timestamp\")), (\"scale\", preprocessing.StandardScaler()), (\"k_means\", cluster.KMeans(n_clusters=2)), ) clustering = OnlineML( client=client, stream_ids=[\"sp:spdatastream:xboBFK\"], model=k_means, prediction_type=RuntimeType.INTEGER.value ) clustering.start()
2023-01-27 16:04:35,599 - streampipes.endpoint.endpoint - [INFO] - [endpoint.py:163] [_make_request] - Successfully retrieved all resources.\n2023-01-27 16:04:35,599 - streampipes.functions.function_handler - [INFO] - [function_handler.py:64] [initializeFunctions] - Create output data stream \"sp:spdatastream:cwKPoo\" for the function \"65cf8b86-bcdf-433e-a1c7-3e920eab55d0\"\n2023-01-27 16:04:37,766 - streampipes.endpoint.endpoint - [INFO] - [endpoint.py:163] [_make_request] - Successfully retrieved all resources.\n2023-01-27 16:04:37,767 - streampipes.functions.function_handler - [INFO] - [function_handler.py:78] [initializeFunctions] - Using NatsBroker for RiverFunction\n
2023-01-27 16:04:37,791 - streampipes.functions.broker.nats_broker - [INFO] - [nats_broker.py:48] [_makeConnection] - Connected to NATS at localhost:4222\n2023-01-27 16:04:37,791 - streampipes.functions.broker.nats_broker - [INFO] - [nats_broker.py:48] [_makeConnection] - Connected to NATS at localhost:4222\n2023-01-27 16:04:37,792 - streampipes.functions.broker.nats_broker - [INFO] - [nats_broker.py:58] [createSubscription] - Subscribed to stream: sp:spdatastream:xboBFK\n
In\u00a0[6]: Copied!
clustering.set_learning(False)\n
clustering.set_learning(False) In\u00a0[\u00a0]: Copied!
clustering.stop()\n
clustering.stop()
2023-01-27 16:04:57,303 - streampipes.functions.broker.nats_broker - [INFO] - [nats_broker.py:82] [disconnect] - Stopped connection to stream: sp:spdatastream:xboBFK\n2023-01-27 16:04:57,304 - streampipes.functions.broker.nats_broker - [INFO] - [nats_broker.py:82] [disconnect] - Stopped connection to stream: sp:spdatastream:cwKPoo\n
In\u00a0[\u00a0]: Copied!
import pickle\nfrom river import compose, tree\nfrom streampipes.function_zoo.river_function import OnlineML\nfrom streampipes.functions.utils.data_stream_generator import RuntimeType\n\nhoeffding_tree = compose.Pipeline(\n    (\"drop_features\", compose.Discard(\"sensorId\", \"timestamp\")),\n    (\"hoeffding_tree\", tree.HoeffdingTreeRegressor(grace_period=5)),\n)\n\n\ndef draw_tree(self, event, streamId):\n\"\"\"Draw the tree and save the image.\"\"\"\n    if self.learning:\n        if self.model[1].n_nodes != None:\n            self.model[1].draw().render(\"hoeffding_tree\", format=\"png\", cleanup=True)\n\n\ndef save_model(self):\n\"\"\"Save the trained model.\"\"\"\n    with open(\"hoeffding_tree.pkl\", \"wb\") as f:\n        pickle.dump(self.model, f)\n\n\nregressor = OnlineML(\n    client=client,\n    stream_ids=[\"sp:spdatastream:xboBFK\"],\n    model=hoeffding_tree,\n    prediction_type=RuntimeType.FLOAT.value,\n    supervised=True,\n    target_label=\"temperature\",\n    on_event=draw_tree,\n    on_stop=save_model,\n)\nregressor.start()\n
import pickle from river import compose, tree from streampipes.function_zoo.river_function import OnlineML from streampipes.functions.utils.data_stream_generator import RuntimeType hoeffding_tree = compose.Pipeline( (\"drop_features\", compose.Discard(\"sensorId\", \"timestamp\")), (\"hoeffding_tree\", tree.HoeffdingTreeRegressor(grace_period=5)), ) def draw_tree(self, event, streamId): \"\"\"Draw the tree and save the image.\"\"\" if self.learning: if self.model[1].n_nodes != None: self.model[1].draw().render(\"hoeffding_tree\", format=\"png\", cleanup=True) def save_model(self): \"\"\"Save the trained model.\"\"\" with open(\"hoeffding_tree.pkl\", \"wb\") as f: pickle.dump(self.model, f) regressor = OnlineML( client=client, stream_ids=[\"sp:spdatastream:xboBFK\"], model=hoeffding_tree, prediction_type=RuntimeType.FLOAT.value, supervised=True, target_label=\"temperature\", on_event=draw_tree, on_stop=save_model, ) regressor.start() In\u00a0[9]: Copied!
regressor.set_learning(False)\n
regressor.set_learning(False) In\u00a0[\u00a0]: Copied!
regressor.stop()\n
regressor.stop() In\u00a0[\u00a0]: Copied!
import pickle\nfrom river import compose, tree\nfrom streampipes.function_zoo.river_function import OnlineML\nfrom streampipes.functions.utils.data_stream_generator import RuntimeType\n\ndecision_tree = compose.Pipeline(\n    (\"drop_features\", compose.Discard(\"sensorId\", \"timestamp\")),\n    (\"decision_tree\", tree.ExtremelyFastDecisionTreeClassifier(grace_period=5)),\n)\n\n\ndef draw_tree(self, event, streamId):\n\"\"\"Draw the tree and save the image.\"\"\"\n    if self.learning:\n        if self.model[1].n_nodes != None:\n            self.model[1].draw().render(\"decicion_tree\", format=\"png\", cleanup=True)\n\n\ndef save_model(self):\n\"\"\"Save the trained model.\"\"\"\n    with open(\"decision_tree.pkl\", \"wb\") as f:\n        pickle.dump(self.model, f)\n\n\nclassifier = OnlineML(\n    client=client,\n    stream_ids=[\"sp:spdatastream:xboBFK\"],\n    model=decision_tree,\n    prediction_type=RuntimeType.BOOLEAN.value,\n    supervised=True,\n    target_label=\"sensor_fault_flags\",\n    on_event=draw_tree,\n    on_stop=save_model,\n)\nclassifier.start()\n
import pickle from river import compose, tree from streampipes.function_zoo.river_function import OnlineML from streampipes.functions.utils.data_stream_generator import RuntimeType decision_tree = compose.Pipeline( (\"drop_features\", compose.Discard(\"sensorId\", \"timestamp\")), (\"decision_tree\", tree.ExtremelyFastDecisionTreeClassifier(grace_period=5)), ) def draw_tree(self, event, streamId): \"\"\"Draw the tree and save the image.\"\"\" if self.learning: if self.model[1].n_nodes != None: self.model[1].draw().render(\"decicion_tree\", format=\"png\", cleanup=True) def save_model(self): \"\"\"Save the trained model.\"\"\" with open(\"decision_tree.pkl\", \"wb\") as f: pickle.dump(self.model, f) classifier = OnlineML( client=client, stream_ids=[\"sp:spdatastream:xboBFK\"], model=decision_tree, prediction_type=RuntimeType.BOOLEAN.value, supervised=True, target_label=\"sensor_fault_flags\", on_event=draw_tree, on_stop=save_model, ) classifier.start() In\u00a0[12]: Copied!
classifier.set_learning(False)\n
classifier.set_learning(False) In\u00a0[\u00a0]: Copied!
classifier.stop()\n
classifier.stop()

That's already it! Isn't it truly easy to apply Online ML with StreamPipes and River? Please go ahead and apply it to your own use cases. We would be happy to hear about them!

Want to see more exciting use cases you can achieve with StreamPipes functions in Python? Then don\u2019t hesitate and jump to our next tutorial on using interoperable machine learning algorithm models with StreamPipes Python and ONNX.

How do you like this tutorial? We hope you like it and would love to receive some feedback from you. Just go to our GitHub discussion page and let us know your impression. We'll read and react to them all, we promise!

"},{"location":"tutorials/4-using-online-machine-learning-on-a-streampipes-data-stream/#Using-Online-Machine-Learning-on-a-StreamPipes-data-stream","title":"Using Online Machine Learning on a StreamPipes data stream\u00b6","text":"

The last tutorial (Getting live data from the StreamPipes data stream) showed how we can connect to a data stream, and it would be possible to use Online Machine Learning with this approach and train a model with the incoming events at the onEvent method. However, the StreamPipes client also provides an easier way to do this with the use of the River library for Online Machine Learning. We will have a look at this now.

"},{"location":"tutorials/4-using-online-machine-learning-on-a-streampipes-data-stream/#How-to-use-Online-Machine-Learning-with-StreamPipes","title":"How to use Online Machine Learning with StreamPipes\u00b6","text":"

After we configured the client as usual, we can start with the new part. The approach is straight forward and you can start with the ML part in just 3 steps:

  1. Create a pipeline with River and insert the preprocessing steps and model of your choice.
  2. Configure the OnlineML wrapper to fit to your model and insert the client and required data stream ids.
  3. Start the wrapper and let the learning begin.

A StreamPipesFunction is then started, which trains the model for each new event. It also creates an output data stream which will send the prediction of the model back to StreamPipes. This output stream can be seen when creating a new pipeline and can be used like every other data source. So you can use it in a pipeline and save the predictions in a Data Lake. You can also stop and start the training with the method set_learning. To stop the whole function use the stop methode and if you want to delete the output stream entirely, you can go to the Pipeline Element Installer in StreamPipes and uninstall it.

Now let's take a look at some examples. If you want to execute the examples below you have to create an adapter for the Machine Data Simulator, select the flowrate sensor and insert the stream id of this stream.

"},{"location":"tutorials/4-using-online-machine-learning-on-a-streampipes-data-stream/#KMeans","title":"KMeans\u00b6","text":""},{"location":"tutorials/4-using-online-machine-learning-on-a-streampipes-data-stream/#HoeffdingTreeRegressor","title":"HoeffdingTreeRegressor\u00b6","text":""},{"location":"tutorials/4-using-online-machine-learning-on-a-streampipes-data-stream/#DecisionTreeClassifier","title":"DecisionTreeClassifier\u00b6","text":""},{"location":"tutorials/5-applying-interoperable-machine-learning-in-streampipes/","title":"Applying Interoperable Machine Learning in StreamPipes","text":"In\u00a0[\u00a0]: Copied!
%pip install git+https://github.com/apache/streampipes.git#subdirectory=streampipes-client-python\n%pip install scikit-learn==1.4.0 skl2onnx==1.16.0 onnxruntime==1.17.1 matplotlib==3.8.3\n
%pip install git+https://github.com/apache/streampipes.git#subdirectory=streampipes-client-python %pip install scikit-learn==1.4.0 skl2onnx==1.16.0 onnxruntime==1.17.1 matplotlib==3.8.3 In\u00a0[8]: Copied!
import os\nfrom streampipes.client import StreamPipesClient\nfrom streampipes.client.config import StreamPipesClientConfig\nfrom streampipes.client.credential_provider import StreamPipesApiKeyCredentials\n\nos.environ[\"BROKER-HOST\"] = \"localhost\"\nos.environ[\"KAFKA-PORT\"] = \"9094\"  # When using Kafka as message broker\n\nconfig = StreamPipesClientConfig(\n    credential_provider=StreamPipesApiKeyCredentials(\n        username=\"admin@streampipes.apache.org\",\n        api_key=\"TOKEN\",\n    ),\n    host_address=\"localhost\",\n    https_disabled=True,\n    port=80\n)\n\nclient = StreamPipesClient(client_config=config)\n
import os from streampipes.client import StreamPipesClient from streampipes.client.config import StreamPipesClientConfig from streampipes.client.credential_provider import StreamPipesApiKeyCredentials os.environ[\"BROKER-HOST\"] = \"localhost\" os.environ[\"KAFKA-PORT\"] = \"9094\" # When using Kafka as message broker config = StreamPipesClientConfig( credential_provider=StreamPipesApiKeyCredentials( username=\"admin@streampipes.apache.org\", api_key=\"TOKEN\", ), host_address=\"localhost\", https_disabled=True, port=80 ) client = StreamPipesClient(client_config=config)
2024-03-26 10:21:38,538 - streampipes.client.client - [INFO] - [client.py:198] [_set_up_logging] - Logging successfully initialized with logging level INFO.\n2024-03-26 10:21:38,632 - streampipes.endpoint.endpoint - [INFO] - [endpoint.py:164] [_make_request] - Successfully retrieved all resources.\n2024-03-26 10:21:38,634 - streampipes.client.client - [INFO] - [client.py:171] [_get_server_version] - The StreamPipes version was successfully retrieved from the backend: 0.95.0. By means of that, authentication via the provided credentials is also tested successfully.\n

The main objective of this tutorial is to demonstrate how to make predictions with an existing and pre-trained ML model using a StreamPipes function and ONNX. Therefore, you can skip the following sections on use case and model training if you already have an existing ONNX model and are only interested in applying it using StreamPipes.

In\u00a0[9]: Copied!
flowrate_df = client.dataLakeMeasureApi.get(\"flow-rate\").to_pandas()\nX = flowrate_df[\"volume_flow\"].values.reshape(-1, 1).astype(\"float32\")\n
flowrate_df = client.dataLakeMeasureApi.get(\"flow-rate\").to_pandas() X = flowrate_df[\"volume_flow\"].values.reshape(-1, 1).astype(\"float32\")
2024-03-26 10:21:48,582 - streampipes.endpoint.endpoint - [INFO] - [endpoint.py:164] [_make_request] - Successfully retrieved all resources.\n

Let's fit the model to the data:

In\u00a0[14]: Copied!
from sklearn.ensemble import IsolationForest\n\nmodel = IsolationForest(contamination=0.01)\nmodel.fit(X)\n
from sklearn.ensemble import IsolationForest model = IsolationForest(contamination=0.01) model.fit(X) Out[14]:
IsolationForest(contamination=0.01)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.\u00a0\u00a0IsolationForest?Documentation for IsolationForestiFitted
IsolationForest(contamination=0.01)

The contamination parameter models the proportion of outliers in the data. See the scikit-learn documentation for more information.

Before we convert the model to an ONNX representation, let's do a quick visual analysis of the model results:

In\u00a0[15]: Copied!
import matplotlib.pyplot as plt\n\nflowrate_df[\"anomaly\"] = model.predict(X)\n\nfig, ax = plt.subplots(figsize=(10, 6))\nanomalies = flowrate_df.loc[flowrate_df[\"anomaly\"] == -1, [\"volume_flow\"]]\nax.plot(flowrate_df.index, flowrate_df['volume_flow'], color='black', label='volume_flow')\nax.scatter(anomalies.index, anomalies['volume_flow'], color='red', label='Anomaly')\nplt.legend()\nplt.show()\n
import matplotlib.pyplot as plt flowrate_df[\"anomaly\"] = model.predict(X) fig, ax = plt.subplots(figsize=(10, 6)) anomalies = flowrate_df.loc[flowrate_df[\"anomaly\"] == -1, [\"volume_flow\"]] ax.plot(flowrate_df.index, flowrate_df['volume_flow'], color='black', label='volume_flow') ax.scatter(anomalies.index, anomalies['volume_flow'], color='red', label='Anomaly') plt.legend() plt.show()

Okay, that looks quite reasonable so lets covert the model to an ONNX representation so that we can make use of it later.

In\u00a0[16]: Copied!
from onnxconverter_common import FloatTensorType\nfrom skl2onnx import to_onnx\n\nmodel_onnx = to_onnx(\n    model,\n    initial_types=[('input', FloatTensorType([None, X.shape[1]]))],\n    target_opset={'ai.onnx.ml': 3, 'ai.onnx': 15, '': 15}\n)\n\nwith open(\"isolation_forest.onnx\", \"wb\") as f:\n    f.write(model_onnx.SerializeToString())\n
from onnxconverter_common import FloatTensorType from skl2onnx import to_onnx model_onnx = to_onnx( model, initial_types=[('input', FloatTensorType([None, X.shape[1]]))], target_opset={'ai.onnx.ml': 3, 'ai.onnx': 15, '': 15} ) with open(\"isolation_forest.onnx\", \"wb\") as f: f.write(model_onnx.SerializeToString()) In\u00a0[23]: Copied!
import numpy as np\nimport onnxruntime as rt\n\nfrom streampipes.functions.broker.broker_handler import get_broker_description\nfrom streampipes.functions.streampipes_function import StreamPipesFunction\nfrom streampipes.functions.utils.data_stream_generator import create_data_stream, RuntimeType\nfrom streampipes.functions.utils.function_context import FunctionContext\nfrom streampipes.model.resource import FunctionDefinition, DataStream\n\nfrom typing import Dict, Any, List\n\n\nclass ONNXFunction(StreamPipesFunction):\n\n    def __init__(self, feature_names: list[str], input_stream: DataStream):\n        output_stream = create_data_stream(\n            name=\"flowrate-prediction\",\n            attributes={\n                \"is_anomaly\": RuntimeType.BOOLEAN.value\n            },\n            broker=get_broker_description(input_stream)\n        )\n\n        function_definition = FunctionDefinition(\n            consumed_streams=[input_stream.element_id]\n        ).add_output_data_stream(output_stream)\n\n        self.feature_names = feature_names\n        self.input_name = None\n        self.output_name = None\n        self.session = None\n\n        super().__init__(function_definition=function_definition)\n\n    def onServiceStarted(self, context: FunctionContext) -> None:\n        self.session = rt.InferenceSession(\n            path_or_bytes=\"isolation_forest.onnx\",\n            providers=rt.get_available_providers(),\n        )\n        self.input_name = self.session.get_inputs()[0].name\n        self.output_name = self.session.get_outputs()[0].name\n\n    def onEvent(self, event: Dict[str, Any], streamId: str) -> None:\n        feature_vector = []\n        for feature in self.feature_names:\n            feature_vector.append(event[feature])\n\n        prediction = self.session.run(\n            [self.output_name],\n            {self.input_name: np.expand_dims(np.array(feature_vector), axis=0).astype(\"float32\")}\n        )[0]\n\n        output = {\n            \"is_anomaly\": int(prediction[0]) == -1\n        }\n\n        self.add_output(\n            stream_id=self.function_definition.get_output_stream_ids()[0],\n            event=output\n        )\n\n    def onServiceStopped(self) -> None:\n        pass\n
import numpy as np import onnxruntime as rt from streampipes.functions.broker.broker_handler import get_broker_description from streampipes.functions.streampipes_function import StreamPipesFunction from streampipes.functions.utils.data_stream_generator import create_data_stream, RuntimeType from streampipes.functions.utils.function_context import FunctionContext from streampipes.model.resource import FunctionDefinition, DataStream from typing import Dict, Any, List class ONNXFunction(StreamPipesFunction): def __init__(self, feature_names: list[str], input_stream: DataStream): output_stream = create_data_stream( name=\"flowrate-prediction\", attributes={ \"is_anomaly\": RuntimeType.BOOLEAN.value }, broker=get_broker_description(input_stream) ) function_definition = FunctionDefinition( consumed_streams=[input_stream.element_id] ).add_output_data_stream(output_stream) self.feature_names = feature_names self.input_name = None self.output_name = None self.session = None super().__init__(function_definition=function_definition) def onServiceStarted(self, context: FunctionContext) -> None: self.session = rt.InferenceSession( path_or_bytes=\"isolation_forest.onnx\", providers=rt.get_available_providers(), ) self.input_name = self.session.get_inputs()[0].name self.output_name = self.session.get_outputs()[0].name def onEvent(self, event: Dict[str, Any], streamId: str) -> None: feature_vector = [] for feature in self.feature_names: feature_vector.append(event[feature]) prediction = self.session.run( [self.output_name], {self.input_name: np.expand_dims(np.array(feature_vector), axis=0).astype(\"float32\")} )[0] output = { \"is_anomaly\": int(prediction[0]) == -1 } self.add_output( stream_id=self.function_definition.get_output_stream_ids()[0], event=output ) def onServiceStopped(self) -> None: pass

Let's dive a little deeper into the different parts of the function

  • __init__: First, we need to take care of the data stream that is required to send the predictions from our function to StreamPipes. Thus, we create a dedicated output data stream which we need to provide with the attributes our event will consist of (a timestamp attribute is always added automatically). This output data stream needs to be registered at the function definition which is to be passed to the parent class. Lastly, we need to define some instance variables that are mainly required for the ONNX runtime.

  • onServiceStarted: Here we prepare the ONNX runtime session by creating an InferenceSession and retrieving the corresponding configuration parameters.

  • onEvent: Following the parameter names specified by self.feature_names, we extract all feature values from the current event. Subsequently, the corresponding feature vector is transmitted to the ONNX runtime session. The resulting prediction is then converted into our output event, where a value of -1 signifies an anomaly. Finally, the generated output event is forwarded to StreamPipes.

Having the function code in place, we can start the function with the following:

In\u00a0[25]: Copied!
from streampipes.functions.registration import Registration\nfrom streampipes.functions.function_handler import FunctionHandler\n\nstream = [\n    stream\n    for stream\n    in client.dataStreamApi.all()\n    if stream.name == \"flow-rate\"\n][0]\n\nfunction = ONNXFunction(\n    feature_names=[\"volume_flow\"],\n    input_stream=stream\n)\n\nregistration = Registration()\nregistration.register(function)\nfunction_handler = FunctionHandler(registration, client)\nfunction_handler.initializeFunctions()\n
from streampipes.functions.registration import Registration from streampipes.functions.function_handler import FunctionHandler stream = [ stream for stream in client.dataStreamApi.all() if stream.name == \"flow-rate\" ][0] function = ONNXFunction( feature_names=[\"volume_flow\"], input_stream=stream ) registration = Registration() registration.register(function) function_handler = FunctionHandler(registration, client) function_handler.initializeFunctions()
2024-03-26 12:39:50,443 - streampipes.endpoint.endpoint - [INFO] - [endpoint.py:164] [_make_request] - Successfully retrieved all resources.\n2024-03-26 12:39:50,502 - streampipes.functions.function_handler - [INFO] - [function_handler.py:76] [initializeFunctions] - The data stream could not be created.\n2024-03-26 12:39:50,503 - streampipes.functions.function_handler - [INFO] - [function_handler.py:78] [initializeFunctions] - This is due to the fact that this data stream already exists. Continuing with the existing data stream.\n2024-03-26 12:39:50,503 - streampipes.functions.function_handler - [INFO] - [function_handler.py:84] [initializeFunctions] - Using output data stream 'sp:spdatastream:flowrate-prediction' for function '7c06fa31-9534-4f91-9c50-b7a3607ec3dc'\n2024-03-26 12:39:50,548 - streampipes.endpoint.endpoint - [INFO] - [endpoint.py:164] [_make_request] - Successfully retrieved all resources.\n2024-03-26 12:39:50,549 - streampipes.functions.function_handler - [INFO] - [function_handler.py:100] [initializeFunctions] - Using KafkaConsumer for ONNXFunction\n

We can now access the live values of the prediction in the StreamPipes UI, e.g., in the pipeline editor.

That's already it. We hope this tutorial serves as an illustration how ML models can be utilized in StreamPipes with the help of ONNX.

How do you like this tutorial? We hope you like it and would love to receive some feedback from you. Just go to our GitHub discussion page and let us know your impression. We'll read and react to them all, we promise!

"},{"location":"tutorials/5-applying-interoperable-machine-learning-in-streampipes/#Applying-Interoperable-Machine-Learning-in-StreamPipes","title":"Applying Interoperable Machine Learning in StreamPipes\u00b6","text":"

The last tutorial (Using Online Machine Learning on a StreamPipes data stream) demonstrated how patterns in streaming data can be learned online. In contrast, this tutorial demonstrates how one can apply a pre-trained machine learning (ML) model to a StreamPipes data stream making use of ONNX. We will show how StreamPipes can be used for both: extracting historical data for training purposes and using model inference on live data with a pre-trained model.

"},{"location":"tutorials/5-applying-interoperable-machine-learning-in-streampipes/#Preparation","title":"Preparation\u00b6","text":"

The following lines configure the client and establish a connection to the StreamPipes instance. If you're not familiar with it or anything is unclear, please have a look at our first tutorial.

"},{"location":"tutorials/5-applying-interoperable-machine-learning-in-streampipes/#Machine-Learning-Use-Case","title":"Machine Learning Use Case\u00b6","text":"

In this tutorial, we will use data generated by the Machine Data Simulator adapter. More specifically, we will focus on the flowrate data, which consists of various sensor values coming from a water pipe system. Our goal is keep an eye on the parameter volume_flow, which represents the current volume flow in cubic meters/second. For this parameter, we want to detect anomalies that could indicate problems such as leaks, blockages, etc.

To get the concerned data, we simply need to create an instance of the machine data simulator and persist the data in the data lake:

If you choose to perform the model training step yourself, you will need to wait approximately 15 minutes for enough data to be available for model training. If you want to speed this up, you can configure a lower wait time when creating the adapter. Please be aware that this also influences the inference scenario.

"},{"location":"tutorials/5-applying-interoperable-machine-learning-in-streampipes/#Model-Training-with-Historic-Data","title":"Model Training with Historic Data\u00b6","text":"

As said above, the aim of our model is to detect anomalies of the volume_flow parameter. For this task, we will use Isolation Forests. Please note that the focus of the tutorial is not on training the model, so please be patient even though the training is very simplified and lacks important preparation steps such as standardization.

As a first step, lets query the flowrate data from the StreamPipes data lake and extract the values of volume_flow as a feature:

"},{"location":"tutorials/5-applying-interoperable-machine-learning-in-streampipes/#Model-Inference-with-Live-Data","title":"Model Inference with Live Data\u00b6","text":"

Utilizing a pre-trained model within StreamPipes becomes seamless with the ONNX interoperability standard, enabling effortless application of your existing model on live data streams.

Interacting with live data from StreamPipes is facilitated through StreamPipes functions. Below, we'll create a Python StreamPipes function that leverages an ONNX model to generate predictions for each incoming event, making the results accessible as a data stream within StreamPipes for subsequent steps.

So let's create an ONNXFunction that is capable of applying a model in ONNX representation to a StreamPipes data stream. If you'd like to read more details about how functions are defined, refer to our third tutorial.

"}]} \ No newline at end of file diff --git a/docs-python/dev/sitemap.xml b/docs-python/dev/sitemap.xml index 90253d563..f1852e4cf 100644 --- a/docs-python/dev/sitemap.xml +++ b/docs-python/dev/sitemap.xml @@ -2,242 +2,242 @@ https://streampipes.apache.org/docs/docs/python/latest/latest/ - 2024-07-13 + 2024-07-27 daily https://streampipes.apache.org/docs/docs/python/latest/latest/getting-started/developing/ - 2024-07-13 + 2024-07-27 daily https://streampipes.apache.org/docs/docs/python/latest/latest/getting-started/first-steps/ - 2024-07-13 + 2024-07-27 daily https://streampipes.apache.org/docs/docs/python/latest/latest/getting-started/quickstart/ - 2024-07-13 + 2024-07-27 daily https://streampipes.apache.org/docs/docs/python/latest/latest/reference/client/client/ - 2024-07-13 + 2024-07-27 daily https://streampipes.apache.org/docs/docs/python/latest/latest/reference/client/config/ - 2024-07-13 + 2024-07-27 daily https://streampipes.apache.org/docs/docs/python/latest/latest/reference/client/credential_provider/ - 2024-07-13 + 2024-07-27 daily https://streampipes.apache.org/docs/docs/python/latest/latest/reference/endpoint/endpoint/ - 2024-07-13 + 2024-07-27 daily https://streampipes.apache.org/docs/docs/python/latest/latest/reference/endpoint/exceptions/ - 2024-07-13 + 2024-07-27 daily https://streampipes.apache.org/docs/docs/python/latest/latest/reference/endpoint/api/data_lake_measure/ - 2024-07-13 + 2024-07-27 daily https://streampipes.apache.org/docs/docs/python/latest/latest/reference/endpoint/api/data_stream/ - 2024-07-13 + 2024-07-27 daily https://streampipes.apache.org/docs/docs/python/latest/latest/reference/endpoint/api/version/ - 2024-07-13 + 2024-07-27 daily https://streampipes.apache.org/docs/docs/python/latest/latest/reference/function_zoo/river_function/ - 2024-07-13 + 2024-07-27 daily https://streampipes.apache.org/docs/docs/python/latest/latest/reference/functions/function_handler/ - 2024-07-13 + 2024-07-27 daily https://streampipes.apache.org/docs/docs/python/latest/latest/reference/functions/registration/ - 2024-07-13 + 2024-07-27 daily https://streampipes.apache.org/docs/docs/python/latest/latest/reference/functions/streampipes_function/ - 2024-07-13 + 2024-07-27 daily https://streampipes.apache.org/docs/docs/python/latest/latest/reference/functions/broker/broker/ - 2024-07-13 + 2024-07-27 daily https://streampipes.apache.org/docs/docs/python/latest/latest/reference/functions/broker/broker_handler/ - 2024-07-13 + 2024-07-27 daily https://streampipes.apache.org/docs/docs/python/latest/latest/reference/functions/broker/consumer/ - 2024-07-13 + 2024-07-27 daily https://streampipes.apache.org/docs/docs/python/latest/latest/reference/functions/broker/output_collector/ - 2024-07-13 + 2024-07-27 daily https://streampipes.apache.org/docs/docs/python/latest/latest/reference/functions/broker/publisher/ - 2024-07-13 + 2024-07-27 daily https://streampipes.apache.org/docs/docs/python/latest/latest/reference/functions/broker/kafka/kafka_consumer/ - 2024-07-13 + 2024-07-27 daily https://streampipes.apache.org/docs/docs/python/latest/latest/reference/functions/broker/kafka/kafka_message_fetcher/ - 2024-07-13 + 2024-07-27 daily https://streampipes.apache.org/docs/docs/python/latest/latest/reference/functions/broker/kafka/kafka_publisher/ - 2024-07-13 + 2024-07-27 daily https://streampipes.apache.org/docs/docs/python/latest/latest/reference/functions/broker/nats/nats_consumer/ - 2024-07-13 + 2024-07-27 daily https://streampipes.apache.org/docs/docs/python/latest/latest/reference/functions/broker/nats/nats_publisher/ - 2024-07-13 + 2024-07-27 daily https://streampipes.apache.org/docs/docs/python/latest/latest/reference/functions/utils/async_iter_handler/ - 2024-07-13 + 2024-07-27 daily https://streampipes.apache.org/docs/docs/python/latest/latest/reference/functions/utils/data_stream_context/ - 2024-07-13 + 2024-07-27 daily https://streampipes.apache.org/docs/docs/python/latest/latest/reference/functions/utils/data_stream_generator/ - 2024-07-13 + 2024-07-27 daily https://streampipes.apache.org/docs/docs/python/latest/latest/reference/functions/utils/function_context/ - 2024-07-13 + 2024-07-27 daily https://streampipes.apache.org/docs/docs/python/latest/latest/reference/model/common/ - 2024-07-13 + 2024-07-27 daily https://streampipes.apache.org/docs/docs/python/latest/latest/reference/model/container/data_lake_measures/ - 2024-07-13 + 2024-07-27 daily https://streampipes.apache.org/docs/docs/python/latest/latest/reference/model/container/data_streams/ - 2024-07-13 + 2024-07-27 daily https://streampipes.apache.org/docs/docs/python/latest/latest/reference/model/container/resource_container/ - 2024-07-13 + 2024-07-27 daily https://streampipes.apache.org/docs/docs/python/latest/latest/reference/model/container/versions/ - 2024-07-13 + 2024-07-27 daily https://streampipes.apache.org/docs/docs/python/latest/latest/reference/model/resource/data_lake_measure/ - 2024-07-13 + 2024-07-27 daily https://streampipes.apache.org/docs/docs/python/latest/latest/reference/model/resource/data_series/ - 2024-07-13 + 2024-07-27 daily https://streampipes.apache.org/docs/docs/python/latest/latest/reference/model/resource/data_stream/ - 2024-07-13 + 2024-07-27 daily https://streampipes.apache.org/docs/docs/python/latest/latest/reference/model/resource/exceptions/ - 2024-07-13 + 2024-07-27 daily https://streampipes.apache.org/docs/docs/python/latest/latest/reference/model/resource/function_definition/ - 2024-07-13 + 2024-07-27 daily https://streampipes.apache.org/docs/docs/python/latest/latest/reference/model/resource/query_result/ - 2024-07-13 + 2024-07-27 daily https://streampipes.apache.org/docs/docs/python/latest/latest/reference/model/resource/resource/ - 2024-07-13 + 2024-07-27 daily https://streampipes.apache.org/docs/docs/python/latest/latest/reference/model/resource/version/ - 2024-07-13 + 2024-07-27 daily https://streampipes.apache.org/docs/docs/python/latest/latest/tutorials/1-introduction-to-streampipes-python-client/ - 2024-07-13 + 2024-07-27 daily https://streampipes.apache.org/docs/docs/python/latest/latest/tutorials/2-extracting-data-from-the-streampipes-data-lake/ - 2024-07-13 + 2024-07-27 daily https://streampipes.apache.org/docs/docs/python/latest/latest/tutorials/3-getting-live-data-from-the-streampipes-data-stream/ - 2024-07-13 + 2024-07-27 daily https://streampipes.apache.org/docs/docs/python/latest/latest/tutorials/4-using-online-machine-learning-on-a-streampipes-data-stream/ - 2024-07-13 + 2024-07-27 daily https://streampipes.apache.org/docs/docs/python/latest/latest/tutorials/5-applying-interoperable-machine-learning-in-streampipes/ - 2024-07-13 + 2024-07-27 daily \ No newline at end of file diff --git a/docs-python/dev/sitemap.xml.gz b/docs-python/dev/sitemap.xml.gz index 80645239c761b095d2f9ad232d6830b0d0fde9da..a452771636000599f94c8a9d847519a6ba11ff5f 100644 GIT binary patch literal 712 zcmV;(0yq61iwFo2SfpkG|8r?{Wo=<_E_iKh0Nt9+a@!yf$M1OxkM9`AO($)~ac+Hr z_N*ajB{N1q*u{za^j%5*P}9uh(jL4z2(iZh;ip{$i?n`x?fh&H8C?ty^V`L8K7(Lm z?Lzx7|M}y)`Z9mq+^kFX2sGoy0UzdDqV!GcmzS3XI)ct5T*TaJv|FyAJ{t9M@pZnr zneovJj90_4_R(&76=WHt45mvi0WM53tTBky(c*YY2inGHsVOeGCc7wgSx zxw=!!d$qdfCP&+(ncbSuLX+WnQyb?GYdx`yebwjA{`+pXK*aY}A(>22>l*gpV`7sO zqQ+$;u0cX6NYBOD-B7t!q6`gW9@wC*cM#}Qh{1`WajhJj3^kX|_$uYNcQxeeXM<3u z=$P(&AqeT!LL#0+Q4o|#PS=J^<;@PN4h)tI68T0&VPwZe-Sty7CD?vwRlj1?*)dmj zGRc9%3nq7@_7WcW7q$$oV>KhqXA>onG5M6^jwA6{NClHP;*Nm4F2>S{M2m$|p~^<@ z!BUinn97rPxE1NXooRMvJXeCFbc)#lqG_ROfUJC{N)Fy&2#1s8(7iy4bY&suQ5!r2 zTRD=s3K$tDIc^`X1g|6;vV{PgY(#;+JZFhw9LfCx&kNPAlaM=|XaX6>LHPIn846__ zMFrBa%VNnEs*6b+<8iN0jQ37Hlem5T9w9<@Ak=$OHs{~4NqS@S2JO5?siASf$=DQq z!}wevA1baEPvYRt7t+A4{GAvT&S{+CG~iabKsnZB7-)*3nkcIA&+59r@wmXQ6el{_ z*x{R;eojzLj-8?{{6RhZ($DqRPy9)pK0)$s4<9jK_Y8TVzEcHz<`_J$N>#@Sed)pE uFxr)oOaED!l>DIw<*^MpCdlT=zjGg4nBIl|Pd*d~wD}E}Vj$u!DgXd6H)Dzb literal 712 zcmV;(0yq61iwFn;1X^(8fDg+ZQTnR&%gf6O9YJRju3~OA+8tL=pNx9F`m)@9 zSn$ycj90_4@zHL36=WHt45mvi0j^9ktTBky(c*YYN7}`py&*t47aQGcOeGCc7whfq z`u1L}KdGBLZgR3sn%SKREi@UPx3zKpxX}~K*jIh&?7#1J3q*Wx6_UvWwXWd+J|;Fv zA!=Mk;u<8Bg7jRR-4B(y5@l#0^S}mey@NpKLJUp}jceuLWT?4x##brFgR3FWpAABt zqGP(tg&?F?3yF9RML|#|Ib9nvl{b5+Ixtu=NaPz8g^`^Wb@QicO0fOVs(!|(vtzF6 zY?1?qS4{3m?Ik?$uWT7w$7)8J&n8MDWAZ7-JxAiHkP0So#GL?nU5up@i53f`LY0l) zgQX}DF_kCpa3|7zd(-UAc&-FT=@hdAMAJgm09pA?l^ndm5Dq8Fp?ifC>B>URqc(U9 zwsItM6)-YRa@;;%30_GyWD5Z}*@yytdC3yRIFkDXo)@ZJCn0w_(*!b(gYeJ&GZe}= ziVCD-m&KATR9BNY#?xM*81J2ZCUN`vJwk-+K&bbmY|g)7lk~>u4ccXmQbXf{ld&oK zhVi*TK2}^Up2WdjE~J6Y{GAvT&S{+CG~h5` z*x{R;eojzLj-8?%{6;