From c24d1cd6a9590684ca05572d6638fac9e939b761 Mon Sep 17 00:00:00 2001
From: PaulKalho <kalhornpaul@gmail.com>
Date: Mon, 28 Oct 2024 19:42:00 +0100
Subject: [PATCH 01/22] wip: add config

---
 .gitignore                            |   1 +
 README.md                             |  19 +++++
 scystream/sdk/config/__init__.py      |   0
 scystream/sdk/config/config_loader.py | 103 ++++++++++++++++++++++++++
 scystream/sdk/core.py                 |   2 +
 scystream/sdk/scheduler.py            |   4 +-
 setup.py                              |   4 +-
 tests/example_config.yaml             |  31 ++++++++
 tests/test_core.py                    |   2 +
 9 files changed, 163 insertions(+), 3 deletions(-)
 create mode 100644 scystream/sdk/config/__init__.py
 create mode 100644 scystream/sdk/config/config_loader.py
 create mode 100644 tests/example_config.yaml

diff --git a/.gitignore b/.gitignore
index 0e61186..36baf00 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,3 +6,4 @@ __pycache__/
 dist/
 build/
 venv/
+.venv/
diff --git a/README.md b/README.md
index dbfff7e..f236535 100644
--- a/README.md
+++ b/README.md
@@ -35,3 +35,22 @@ if __name__ == "__main__":
     main()
 
 ```
+
+### Development
+
+1. Create a venv
+
+```bash
+python3 -m venv .venv
+```
+
+2. Install the package within the venv 
+
+> [!INFO]
+> This will also install all the install_requirements from the setup.py
+
+```bash
+pip install -e .[dev]
+```
+
+3. Develop!
diff --git a/scystream/sdk/config/__init__.py b/scystream/sdk/config/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/scystream/sdk/config/config_loader.py b/scystream/sdk/config/config_loader.py
new file mode 100644
index 0000000..215dbab
--- /dev/null
+++ b/scystream/sdk/config/config_loader.py
@@ -0,0 +1,103 @@
+import yaml
+from typing import Optional, Dict, Literal, Any, Callable
+from pydantic import BaseModel, StrictStr, validator, Field
+import os
+
+"""
+This file contains the schema definition, the read function and validation
+for the config file.
+"""
+
+STRING_TYPE = "string"
+INT_TYPE = "int"
+FLOAT_TYPE = "float"
+BOOL_TYPE = "bool"
+LIST_TYPE = "list"
+SPARK_TABLE_TYPE = "spark_table"
+
+CONFIG_FILE_DEFAULT_NAME = "cbc.yaml"
+
+DataTypes = Literal[STRING_TYPE, INT_TYPE, FLOAT_TYPE,
+                    BOOL_TYPE, LIST_TYPE, SPARK_TABLE_TYPE]
+
+VALIDATORS: Dict[str, Callable[[Any], bool]] = {
+    "string": lambda x: isinstance(x, str),
+    "int": lambda x: isinstance(x, (int)),
+    "number": lambda x: isinstance(x, (float)),
+    "bool": lambda x: isinstance(x, (bool)),
+    "list": lambda x: isinstance(x, (list)),
+    # spark_table must be of type str
+    "spark_table": lambda x: isinstance(x, (str))
+}
+
+
+class InputOutputDefinitions(BaseModel):
+    type: DataTypes
+    description: Optional[StrictStr] = None
+    item_type: Optional[DataTypes] = Field(
+        None, description="Type of items in the list")
+    table_name: Optional[StrictStr] = Field(
+        None, description="Name of the spark_table,\
+                required if type is spark_table")
+    example: Optional[DataTypes] = Field(
+        None, description="Example for the Input/Output"
+    )
+
+    """
+    If the type is spark_table, table_name must also be set
+    """
+    @validator("table_name", always=True)
+    def validate_table_name(cls, v, values):
+        set_type = values.get("type")
+        if set_type == "spark_table":
+            if not v:
+                raise ValueError(
+                    "table_name must be set when type is 'spark_table'")
+        return v
+
+    """
+    Check if the example corresponds with the inputs type
+    """
+    @validator("example")
+    def validate_example_type(cls, v, values):
+        expected_type = values.get("type")
+
+        if expected_type in VALIDATORS:
+            if not VALIDATORS[expected_type](v):
+                raise ValueError(f"Example must be of type \
+                        '{expected_type}' when type is '{expected_type}'")
+
+        return v
+
+
+class Entrypoint(BaseModel):
+    description: StrictStr
+    inputs: Dict[StrictStr, InputOutputDefinitions]
+    outputs: Dict[StrictStr, InputOutputDefinitions]
+
+
+class ComputeBlock(BaseModel):
+    name: StrictStr
+    description: StrictStr
+    author: StrictStr
+    entrypoints: Dict[StrictStr, Entrypoint]
+
+    @validator("entrypoints")
+    def check_entrypoints(cls, v):
+        if not v:
+            raise ValueError("At least one entrypoint must be defined.")
+        return v
+
+
+def load_config(config_path: str = CONFIG_FILE_DEFAULT_NAME) -> ComputeBlock:
+    """
+    Loads a YAML configuration file for workflow unit definitions.
+    """
+
+    root_dir = os.path.dirname(os.path.abspath(__file__))
+    full_path = os.path.join(root_dir, "..", config_path)
+
+    with open(full_path, "r") as file:
+        config = yaml.safe_load(file)
+
+    return ComputeBlock(**config)
diff --git a/scystream/sdk/core.py b/scystream/sdk/core.py
index 47efac8..3965d1c 100644
--- a/scystream/sdk/core.py
+++ b/scystream/sdk/core.py
@@ -2,6 +2,7 @@
 
 _registered_functions = {}
 
+
 def entrypoint(func):
     """Decorator to mark a function as an entrypoint."""
     @functools.wraps(func)
@@ -10,6 +11,7 @@ def wrapper(*args, **kwargs):
     _registered_functions[func.__name__] = func
     return wrapper
 
+
 def get_registered_functions():
     """Returns a dictionary of registered entrypoint functions."""
     return _registered_functions
diff --git a/scystream/sdk/scheduler.py b/scystream/sdk/scheduler.py
index 8e91d18..f403c83 100644
--- a/scystream/sdk/scheduler.py
+++ b/scystream/sdk/scheduler.py
@@ -1,5 +1,6 @@
 from .core import get_registered_functions
 
+
 class Scheduler:
     @staticmethod
     def list_entrypoints():
@@ -7,7 +8,7 @@ def list_entrypoints():
         functions = get_registered_functions()
         for name in functions:
             print(f"'{name}' is available as an entrypoint.")
-            
+
     @staticmethod
     def execute_function(name, *args, **kwargs):
         functions = get_registered_functions()
@@ -15,4 +16,3 @@ def execute_function(name, *args, **kwargs):
             return functions[name](*args, **kwargs)
         else:
             raise Exception(f"No entrypoint found with the name: {name}")
-
diff --git a/setup.py b/setup.py
index e66963e..be1c613 100644
--- a/setup.py
+++ b/setup.py
@@ -11,7 +11,9 @@
     author_email="evers@time.rwth-aachen.de",
     license="MIT",
     packages=find_packages(),
-    install_requires=[],
+    install_requires=[
+        "pydantic>=2.9.2"
+    ],
     classifiers=[
         "Programming Language :: Python :: 3",
         "License :: OSI Approved :: MIT License",
diff --git a/tests/example_config.yaml b/tests/example_config.yaml
new file mode 100644
index 0000000..b46befd
--- /dev/null
+++ b/tests/example_config.yaml
@@ -0,0 +1,31 @@
+workflow_unit:
+  name: "The first Web-Crawler"
+  description: "This is a web crawler, it crawls text..."
+  author: "John Doe"
+
+  entrypoints:
+    crawl:
+      description: "Crawl text from specified URLs"
+      inputs:
+        url_list:
+          type: "list"
+          item_type: "string"
+          description: "List of URLs to crawl. Can be defined by the user."
+          example: ["https://example.com", "http://one.com"]
+      outputs:
+        text_data:
+          type: "spark_table"
+          description: "Crawled text data in a spark table"
+          name: "text_data_spark"
+
+    analyze_url:
+      description: "Analyzes if data is crawlable"
+      inputs:
+        url-list:
+          type: "list"
+          item_type: "string"
+          description: "List of URLS to check"
+          example: ["https://example.com"]
+        outputs:
+          type: "bool"
+          description: "True if all urls can be crawled"
diff --git a/tests/test_core.py b/tests/test_core.py
index b9b5030..10fb7ac 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -1,6 +1,7 @@
 import unittest
 from scystream_sdk.core import entrypoint, get_registered_functions
 
+
 class TestEntrypoint(unittest.TestCase):
     def test_entrypoint_registration(self):
         @entrypoint
@@ -11,5 +12,6 @@ def dummy_function():
         self.assertIn("dummy_function", registered)
         self.assertEqual(registered["dummy_function"](), "Hello")
 
+
 if __name__ == "__main__":
     unittest.main()

From a77f4beac5d5bad363f26f3d51e890d61c63f625 Mon Sep 17 00:00:00 2001
From: PaulKalho <kalhornpaul@gmail.com>
Date: Tue, 29 Oct 2024 17:46:55 +0100
Subject: [PATCH 02/22] wip: add yaml dependency

---
 scystream/sdk/config/config_loader.py | 19 +++++++++++++++----
 setup.py                              |  3 ++-
 2 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/scystream/sdk/config/config_loader.py b/scystream/sdk/config/config_loader.py
index 215dbab..4146f6a 100644
--- a/scystream/sdk/config/config_loader.py
+++ b/scystream/sdk/config/config_loader.py
@@ -1,6 +1,6 @@
 import yaml
 from typing import Optional, Dict, Literal, Any, Callable
-from pydantic import BaseModel, StrictStr, validator, Field
+from pydantic import BaseModel, StrictStr, field_validator, Field
 import os
 
 """
@@ -8,6 +8,10 @@
 for the config file.
 """
 
+PROJECT_ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
+
+print(PROJECT_ROOT_DIR)
+
 STRING_TYPE = "string"
 INT_TYPE = "int"
 FLOAT_TYPE = "float"
@@ -46,7 +50,7 @@ class InputOutputDefinitions(BaseModel):
     """
     If the type is spark_table, table_name must also be set
     """
-    @validator("table_name", always=True)
+    @field_validator("table_name")
     def validate_table_name(cls, v, values):
         set_type = values.get("type")
         if set_type == "spark_table":
@@ -58,7 +62,7 @@ def validate_table_name(cls, v, values):
     """
     Check if the example corresponds with the inputs type
     """
-    @validator("example")
+    @field_validator("example")
     def validate_example_type(cls, v, values):
         expected_type = values.get("type")
 
@@ -82,13 +86,20 @@ class ComputeBlock(BaseModel):
     author: StrictStr
     entrypoints: Dict[StrictStr, Entrypoint]
 
-    @validator("entrypoints")
+    @field_validator("entrypoints")
     def check_entrypoints(cls, v):
         if not v:
             raise ValueError("At least one entrypoint must be defined.")
         return v
 
 
+def validate_config(config_path: str = CONFIG_FILE_DEFAULT_NAME) -> bool:
+    """
+    Reads the passed Compute Block YAML definition.
+    Returns True if the validation using pydantic was successfull
+    """
+
+
 def load_config(config_path: str = CONFIG_FILE_DEFAULT_NAME) -> ComputeBlock:
     """
     Loads a YAML configuration file for workflow unit definitions.
diff --git a/setup.py b/setup.py
index be1c613..1a176d3 100644
--- a/setup.py
+++ b/setup.py
@@ -12,7 +12,8 @@
     license="MIT",
     packages=find_packages(),
     install_requires=[
-        "pydantic>=2.9.2"
+        "pydantic>=2.9.2",
+        "PyYAML>=6.0.2"
     ],
     classifiers=[
         "Programming Language :: Python :: 3",

From d2598559bcc7296c958513207e4464063594eeb0 Mon Sep 17 00:00:00 2001
From: PaulKalho <kalhornpaul@gmail.com>
Date: Wed, 30 Oct 2024 23:05:29 +0100
Subject: [PATCH 03/22] feat: add validation and loading of config file

---
 README.md                                     |  81 ++++++++++-
 scystream/sdk/config/config_loader.py         | 137 +++++-------------
 scystream/sdk/config/models.py                |  59 ++++++++
 setup.py                                      |   2 +-
 tests/example_config.yaml                     |  31 ----
 tests/test_config.py                          |  42 ++++++
 tests/test_config_files/invalid_datatype.yaml |  29 ++++
 .../missing_entrypoints.yaml                  |   5 +
 .../test_config_files/missing_table_name.yaml |  16 ++
 tests/test_config_files/not_a_yaml.json       |   3 +
 tests/test_config_files/valid_config.yaml     |  29 ++++
 tests/test_core.py                            |   2 +-
 12 files changed, 302 insertions(+), 134 deletions(-)
 create mode 100644 scystream/sdk/config/models.py
 delete mode 100644 tests/example_config.yaml
 create mode 100644 tests/test_config.py
 create mode 100644 tests/test_config_files/invalid_datatype.yaml
 create mode 100644 tests/test_config_files/missing_entrypoints.yaml
 create mode 100644 tests/test_config_files/missing_table_name.yaml
 create mode 100644 tests/test_config_files/not_a_yaml.json
 create mode 100644 tests/test_config_files/valid_config.yaml

diff --git a/README.md b/README.md
index f236535..fd18e0c 100644
--- a/README.md
+++ b/README.md
@@ -36,7 +36,75 @@ if __name__ == "__main__":
 
 ```
 
-### Development
+### Compute Block Config Files
+We expect every repository which will be used within the scystream application
+to contain a `Compute Block Config File`, the `cbc.yaml`, within the root directory.
+
+This yaml-file describes the compute block itself.
+It shows the entrypoints, their inputs and outputs.
+
+This is an example `cbc.yaml`:
+
+```yaml
+name: "The first Web-Crawler"
+description: "This is a web crawler, it crawls text..."
+author: "John Doe"
+
+entrypoints:
+  crawl:
+    description: "Crawl text from specified URLs"
+    inputs:
+      url_list:
+        type: "list"
+        item_type: "string"
+        description: "List of URLs to crawl. Can be defined by the user."
+    outputs:
+      text_data:
+        type: "spark_table"
+        description: "Crawled text data in a spark table"
+        table_name: "text_data_spark"
+
+  analyze_url:
+    description: "Analyzes if data is crawlable"
+    inputs:
+      url-list:
+        type: "list"
+        item_type: "string"
+        description: "List of URLS to check"
+    outputs:
+      was_sucess:
+        type: "bool"
+        description: "True if all urls can be crawled"
+```
+
+To read and validate such a config file u can proceed as follows:
+
+```python3
+from scystream.sdk.config.config_loader import load_config 
+
+def main():
+    load_config() 
+
+if __name__ == "__main__":
+    main()
+```
+
+If you want the file to have another name than `cbc.yaml` or you want the file to be 
+somewhere else than the root directory you can define that using the parameters the
+`load_config` function takes.
+
+Example:
+
+```python3
+load_config(config_file_name="test.yaml", config_path="configs/")
+```
+
+the `config_path` is the path relative to your root directory
+
+
+## Development of the SDK
+
+### Installation
 
 1. Create a venv
 
@@ -50,7 +118,16 @@ python3 -m venv .venv
 > This will also install all the install_requirements from the setup.py
 
 ```bash
-pip install -e .[dev]
+pip install -e .
 ```
 
 3. Develop!
+
+### Tests
+
+To run all the tests run the following command:
+
+```bash
+python3 -m unittest discover -s tests
+```
+
diff --git a/scystream/sdk/config/config_loader.py b/scystream/sdk/config/config_loader.py
index 4146f6a..1826439 100644
--- a/scystream/sdk/config/config_loader.py
+++ b/scystream/sdk/config/config_loader.py
@@ -1,114 +1,53 @@
 import yaml
-from typing import Optional, Dict, Literal, Any, Callable
-from pydantic import BaseModel, StrictStr, field_validator, Field
-import os
-
-"""
-This file contains the schema definition, the read function and validation
-for the config file.
-"""
-
-PROJECT_ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
-
-print(PROJECT_ROOT_DIR)
-
-STRING_TYPE = "string"
-INT_TYPE = "int"
-FLOAT_TYPE = "float"
-BOOL_TYPE = "bool"
-LIST_TYPE = "list"
-SPARK_TABLE_TYPE = "spark_table"
+from typing import Union
+from pydantic import ValidationError
+from pathlib import Path
+from .models import ComputeBlock
 
 CONFIG_FILE_DEFAULT_NAME = "cbc.yaml"
 
-DataTypes = Literal[STRING_TYPE, INT_TYPE, FLOAT_TYPE,
-                    BOOL_TYPE, LIST_TYPE, SPARK_TABLE_TYPE]
-
-VALIDATORS: Dict[str, Callable[[Any], bool]] = {
-    "string": lambda x: isinstance(x, str),
-    "int": lambda x: isinstance(x, (int)),
-    "number": lambda x: isinstance(x, (float)),
-    "bool": lambda x: isinstance(x, (bool)),
-    "list": lambda x: isinstance(x, (list)),
-    # spark_table must be of type str
-    "spark_table": lambda x: isinstance(x, (str))
-}
-
-
-class InputOutputDefinitions(BaseModel):
-    type: DataTypes
-    description: Optional[StrictStr] = None
-    item_type: Optional[DataTypes] = Field(
-        None, description="Type of items in the list")
-    table_name: Optional[StrictStr] = Field(
-        None, description="Name of the spark_table,\
-                required if type is spark_table")
-    example: Optional[DataTypes] = Field(
-        None, description="Example for the Input/Output"
-    )
-
-    """
-    If the type is spark_table, table_name must also be set
-    """
-    @field_validator("table_name")
-    def validate_table_name(cls, v, values):
-        set_type = values.get("type")
-        if set_type == "spark_table":
-            if not v:
-                raise ValueError(
-                    "table_name must be set when type is 'spark_table'")
-        return v
 
+def load_config(
+    config_file_name: str = CONFIG_FILE_DEFAULT_NAME,
+    config_path: Union[str, Path] = None
+) -> ComputeBlock:
     """
-    Check if the example corresponds with the inputs type
+    Returns and Validates the Compute Block YAML definition.
+    Returns a ComputeBlock instance if the validation is successfull
     """
-    @field_validator("example")
-    def validate_example_type(cls, v, values):
-        expected_type = values.get("type")
+    try:
+        file = _find_and_load_config(config_file_name, config_path)
+        block = ComputeBlock(**file)
+        return block
+    except ValidationError as e:
+        raise ValueError(f"Configuration file validation error: {e}")
 
-        if expected_type in VALIDATORS:
-            if not VALIDATORS[expected_type](v):
-                raise ValueError(f"Example must be of type \
-                        '{expected_type}' when type is '{expected_type}'")
 
-        return v
-
-
-class Entrypoint(BaseModel):
-    description: StrictStr
-    inputs: Dict[StrictStr, InputOutputDefinitions]
-    outputs: Dict[StrictStr, InputOutputDefinitions]
-
-
-class ComputeBlock(BaseModel):
-    name: StrictStr
-    description: StrictStr
-    author: StrictStr
-    entrypoints: Dict[StrictStr, Entrypoint]
-
-    @field_validator("entrypoints")
-    def check_entrypoints(cls, v):
-        if not v:
-            raise ValueError("At least one entrypoint must be defined.")
-        return v
-
-
-def validate_config(config_path: str = CONFIG_FILE_DEFAULT_NAME) -> bool:
+def _find_and_load_config(
+        config_file_name: str,
+        config_path: Union[str, Path] = None
+):
     """
-    Reads the passed Compute Block YAML definition.
-    Returns True if the validation using pydantic was successfull
+    Loads the compute block config YAML from the projects root directory
+    returns the loaded file
     """
+    base_path = Path.cwd()
+    if config_path:
+        base_path /= Path(config_path)
 
+    full_path = base_path / config_file_name
 
-def load_config(config_path: str = CONFIG_FILE_DEFAULT_NAME) -> ComputeBlock:
-    """
-    Loads a YAML configuration file for workflow unit definitions.
-    """
-
-    root_dir = os.path.dirname(os.path.abspath(__file__))
-    full_path = os.path.join(root_dir, "..", config_path)
+    if not full_path.is_file():
+        raise FileNotFoundError(f"Configuration file '{
+                                full_path}' not found.")
 
-    with open(full_path, "r") as file:
-        config = yaml.safe_load(file)
+    try:
+        with full_path.open("r") as file:
+            config_data = yaml.safe_load(file)
+    except FileNotFoundError:
+        raise FileNotFoundError(f"Configuration file '{
+                                full_path}' not found.'")
+    except yaml.YAMLError as e:
+        raise ValueError(f"Error parsing YAML file: {e}")
 
-    return ComputeBlock(**config)
+    return config_data
diff --git a/scystream/sdk/config/models.py b/scystream/sdk/config/models.py
new file mode 100644
index 0000000..ab0d53b
--- /dev/null
+++ b/scystream/sdk/config/models.py
@@ -0,0 +1,59 @@
+from typing import Optional, Dict, Literal
+from pydantic import BaseModel, StrictStr, field_validator, Field
+
+"""
+This file contains the schema definition for the config file.
+"""
+
+STRING_TYPE = "string"
+INT_TYPE = "int"
+FLOAT_TYPE = "float"
+BOOL_TYPE = "bool"
+LIST_TYPE = "list"
+SPARK_TABLE_TYPE = "spark_table"
+
+DataTypes = Literal[STRING_TYPE, INT_TYPE, FLOAT_TYPE,
+                    BOOL_TYPE, LIST_TYPE, SPARK_TABLE_TYPE]
+
+
+class InputOutputDefinitions(BaseModel):
+    type: DataTypes
+    description: Optional[StrictStr] = None
+    item_type: Optional[DataTypes] = Field(
+        None, description="Type of items in the list")
+    table_name: Optional[StrictStr] = Field(
+        None, description="Name of the spark_table,\
+                required if type is spark_table", validate_default=True)
+    # TODO: Add an optional example field, this could be very helpful for the
+    # frontend
+
+    """
+    If the type is spark_table, table_name must also be set
+    """
+    @field_validator("table_name")
+    def validate_table_name(cls, v, info):
+        set_type = info.data.get("type")
+        if set_type == "spark_table":
+            if not v:
+                raise ValueError(
+                    "table_name must be set when type is 'spark_table'")
+        return v
+
+
+class Entrypoint(BaseModel):
+    description: StrictStr
+    inputs: Dict[StrictStr, InputOutputDefinitions]
+    outputs: Dict[StrictStr, InputOutputDefinitions]
+
+
+class ComputeBlock(BaseModel):
+    name: StrictStr
+    description: StrictStr
+    author: StrictStr
+    entrypoints: Dict[StrictStr, Entrypoint]
+
+    @field_validator("entrypoints")
+    def check_entrypoints(cls, v):
+        if not v:
+            raise ValueError("At least one entrypoint must be defined.")
+        return v
diff --git a/setup.py b/setup.py
index 1a176d3..76ae135 100644
--- a/setup.py
+++ b/setup.py
@@ -1,7 +1,7 @@
 from setuptools import setup, find_packages
 
 setup(
-    name="scystream-sdk",
+    name="scystream_sdk",
     version="0.1.4",
     description="The official SDK for developing scystream compute blocks",
     long_description=open("README.md", "r", encoding="utf-8").read(),
diff --git a/tests/example_config.yaml b/tests/example_config.yaml
deleted file mode 100644
index b46befd..0000000
--- a/tests/example_config.yaml
+++ /dev/null
@@ -1,31 +0,0 @@
-workflow_unit:
-  name: "The first Web-Crawler"
-  description: "This is a web crawler, it crawls text..."
-  author: "John Doe"
-
-  entrypoints:
-    crawl:
-      description: "Crawl text from specified URLs"
-      inputs:
-        url_list:
-          type: "list"
-          item_type: "string"
-          description: "List of URLs to crawl. Can be defined by the user."
-          example: ["https://example.com", "http://one.com"]
-      outputs:
-        text_data:
-          type: "spark_table"
-          description: "Crawled text data in a spark table"
-          name: "text_data_spark"
-
-    analyze_url:
-      description: "Analyzes if data is crawlable"
-      inputs:
-        url-list:
-          type: "list"
-          item_type: "string"
-          description: "List of URLS to check"
-          example: ["https://example.com"]
-        outputs:
-          type: "bool"
-          description: "True if all urls can be crawled"
diff --git a/tests/test_config.py b/tests/test_config.py
new file mode 100644
index 0000000..e5b2d2d
--- /dev/null
+++ b/tests/test_config.py
@@ -0,0 +1,42 @@
+import unittest
+from scystream.sdk.config.config_loader import load_config, ComputeBlock
+
+
+class TestComputeBlockValidation(unittest.TestCase):
+    TEST_CONFIG_FOLDER = "tests/test_config_files"
+
+    def test_valid_config(self):
+        try:
+            compute_block = load_config(
+                "valid_config.yaml", config_path=self.TEST_CONFIG_FOLDER)
+            self.assertIsInstance(compute_block, ComputeBlock)
+        except Exception:
+            self.fail("ComputeBlock raised an Exception unexpectedly!")
+
+    def test_missing_entrypoints(self):
+        with self.assertRaises(ValueError):
+            load_config("missing_entrypoints.yaml",
+                        config_path=self.TEST_CONFIG_FOLDER)
+
+    def test_missing_table_name_for_spark_table(self):
+        with self.assertRaises(ValueError):
+            load_config("missing_table_name.yaml",
+                        config_path=self.TEST_CONFIG_FOLDER)
+
+    def test_invalid_datatypes(self):
+        with self.assertRaises(ValueError):
+            load_config("invalid_datatype.yaml",
+                        config_path=self.TEST_CONFIG_FOLDER)
+
+    def test_not_a_yaml(self):
+        with self.assertRaises(ValueError):
+            load_config("not_a_yaml.json",
+                        config_path=self.TEST_CONFIG_FOLDER)
+
+    def test_file_not_found(self):
+        with self.assertRaises(FileNotFoundError):
+            load_config("test.yaml")
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/test_config_files/invalid_datatype.yaml b/tests/test_config_files/invalid_datatype.yaml
new file mode 100644
index 0000000..84727af
--- /dev/null
+++ b/tests/test_config_files/invalid_datatype.yaml
@@ -0,0 +1,29 @@
+name: "The first Web-Crawler"
+description: "This is a web crawler, it crawls text..."
+author: "John Doe"
+
+entrypoints:
+  crawl:
+    description: "Crawl text from specified URLs"
+    inputs:
+      url_list:
+        type: "invalid_type"
+        item_type: "string"
+        description: "List of URLs to crawl. Can be defined by the user."
+    outputs:
+      text_data:
+        type: "spark_table"
+        description: "Crawled text data in a spark table"
+        name: "text_data_spark"
+
+  analyze_url:
+    description: "Analyzes if data is crawlable"
+    inputs:
+      url-list:
+        type: "list"
+        item_type: "string"
+        description: "List of URLS to check"
+    outputs:
+      was_sucess:
+        type: "bool"
+        description: "True if all urls can be crawled"
diff --git a/tests/test_config_files/missing_entrypoints.yaml b/tests/test_config_files/missing_entrypoints.yaml
new file mode 100644
index 0000000..32cf852
--- /dev/null
+++ b/tests/test_config_files/missing_entrypoints.yaml
@@ -0,0 +1,5 @@
+name: "The first Web-Crawler"
+description: "This is a web crawler, it crawls text..."
+author: "John Doe"
+# Missing `entrypoints` field, which should cause validation to fail.
+
diff --git a/tests/test_config_files/missing_table_name.yaml b/tests/test_config_files/missing_table_name.yaml
new file mode 100644
index 0000000..a22fa16
--- /dev/null
+++ b/tests/test_config_files/missing_table_name.yaml
@@ -0,0 +1,16 @@
+name: "The first Web-Crawler"
+description: "This is a web crawler, it crawls text..."
+author: "John Doe"
+entrypoints:
+  crawl:
+    description: "Crawl text from specified URLs"
+    inputs:
+      url_list:
+        type: "list"
+        item_type: "string"
+        description: "List of URLs to crawl. Can be defined by the user."
+    outputs:
+      text_data:
+        type: "spark_table"
+        description: "Crawled text data in a spark table"
+        # Missing `table_name`, which should cause validation to fail.
diff --git a/tests/test_config_files/not_a_yaml.json b/tests/test_config_files/not_a_yaml.json
new file mode 100644
index 0000000..21da3b2
--- /dev/null
+++ b/tests/test_config_files/not_a_yaml.json
@@ -0,0 +1,3 @@
+{
+    "key": "value"
+}
diff --git a/tests/test_config_files/valid_config.yaml b/tests/test_config_files/valid_config.yaml
new file mode 100644
index 0000000..0e8c994
--- /dev/null
+++ b/tests/test_config_files/valid_config.yaml
@@ -0,0 +1,29 @@
+name: "The first Web-Crawler"
+description: "This is a web crawler, it crawls text..."
+author: "John Doe"
+
+entrypoints:
+  crawl:
+    description: "Crawl text from specified URLs"
+    inputs:
+      url_list:
+        type: "list"
+        item_type: "string"
+        description: "List of URLs to crawl. Can be defined by the user."
+    outputs:
+      text_data:
+        type: "spark_table"
+        description: "Crawled text data in a spark table"
+        table_name: "text_data_spark"
+
+  analyze_url:
+    description: "Analyzes if data is crawlable"
+    inputs:
+      url-list:
+        type: "list"
+        item_type: "string"
+        description: "List of URLS to check"
+    outputs:
+      was_sucess:
+        type: "bool"
+        description: "True if all urls can be crawled"
diff --git a/tests/test_core.py b/tests/test_core.py
index 10fb7ac..775ae75 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -1,5 +1,5 @@
 import unittest
-from scystream_sdk.core import entrypoint, get_registered_functions
+from scystream.sdk.core import entrypoint, get_registered_functions
 
 
 class TestEntrypoint(unittest.TestCase):

From 46d2f9e315232e38560df79ce26352cb861f58e8 Mon Sep 17 00:00:00 2001
From: PaulKalho <kalhornpaul@gmail.com>
Date: Wed, 30 Oct 2024 23:15:07 +0100
Subject: [PATCH 04/22] style: fix linting

---
 scystream/sdk/config/config_loader.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/scystream/sdk/config/config_loader.py b/scystream/sdk/config/config_loader.py
index 1826439..f68e565 100644
--- a/scystream/sdk/config/config_loader.py
+++ b/scystream/sdk/config/config_loader.py
@@ -38,15 +38,17 @@ def _find_and_load_config(
     full_path = base_path / config_file_name
 
     if not full_path.is_file():
-        raise FileNotFoundError(f"Configuration file '{
-                                full_path}' not found.")
+        raise FileNotFoundError(
+            f"Configuration file '{full_path}' not found."
+        )
 
     try:
         with full_path.open("r") as file:
             config_data = yaml.safe_load(file)
     except FileNotFoundError:
-        raise FileNotFoundError(f"Configuration file '{
-                                full_path}' not found.'")
+        raise FileNotFoundError(
+            f"Configuration file '{full_path}' not found.'"
+        )
     except yaml.YAMLError as e:
         raise ValueError(f"Error parsing YAML file: {e}")
 

From 1fbbd39c63f76fe483de93bb88fc3092c66c08d3 Mon Sep 17 00:00:00 2001
From: PaulKalho <kalhornpaul@gmail.com>
Date: Wed, 30 Oct 2024 23:21:46 +0100
Subject: [PATCH 05/22] style: remove line

---
 tests/test_core.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/test_core.py b/tests/test_core.py
index acd8055..775ae75 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -2,7 +2,6 @@
 from scystream.sdk.core import entrypoint, get_registered_functions
 
 
-
 class TestEntrypoint(unittest.TestCase):
     def test_entrypoint_registration(self):
         @entrypoint

From f6e715058a8e79f40ed70fae65268fd5523a6732 Mon Sep 17 00:00:00 2001
From: PaulKalho <kalhornpaul@gmail.com>
Date: Wed, 30 Oct 2024 23:25:05 +0100
Subject: [PATCH 06/22] docs: fix note

---
 README.md | 2 +-
 setup.py  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index fd18e0c..2e1ef5b 100644
--- a/README.md
+++ b/README.md
@@ -114,7 +114,7 @@ python3 -m venv .venv
 
 2. Install the package within the venv 
 
-> [!INFO]
+> [!NOTE]
 > This will also install all the install_requirements from the setup.py
 
 ```bash
diff --git a/setup.py b/setup.py
index 76ae135..1a176d3 100644
--- a/setup.py
+++ b/setup.py
@@ -1,7 +1,7 @@
 from setuptools import setup, find_packages
 
 setup(
-    name="scystream_sdk",
+    name="scystream-sdk",
     version="0.1.4",
     description="The official SDK for developing scystream compute blocks",
     long_description=open("README.md", "r", encoding="utf-8").read(),

From 73c88433b03c16f7bc7170c119c37c2956b9006d Mon Sep 17 00:00:00 2001
From: PaulKalho <kalhornpaul@gmail.com>
Date: Mon, 4 Nov 2024 14:40:45 +0100
Subject: [PATCH 07/22] feat: add optional to inputs validation

---
 scystream/sdk/config/models.py                | 63 ++++++++++++-------
 tests/test_config.py                          | 10 +++
 tests/test_config_files/invalid_datatype.yaml |  2 -
 .../optional_invalid_default.yaml             | 31 +++++++++
 .../optional_no_default.yaml                  | 30 +++++++++
 tests/test_config_files/valid_config.yaml     |  9 +--
 6 files changed, 117 insertions(+), 28 deletions(-)
 create mode 100644 tests/test_config_files/optional_invalid_default.yaml
 create mode 100644 tests/test_config_files/optional_no_default.yaml

diff --git a/scystream/sdk/config/models.py b/scystream/sdk/config/models.py
index ab0d53b..c83400e 100644
--- a/scystream/sdk/config/models.py
+++ b/scystream/sdk/config/models.py
@@ -1,4 +1,4 @@
-from typing import Optional, Dict, Literal
+from typing import Optional, Dict, Literal, Any, Callable
 from pydantic import BaseModel, StrictStr, field_validator, Field
 
 """
@@ -15,35 +15,53 @@
 DataTypes = Literal[STRING_TYPE, INT_TYPE, FLOAT_TYPE,
                     BOOL_TYPE, LIST_TYPE, SPARK_TABLE_TYPE]
 
+VALIDATORS: Dict[DataTypes, Callable[[Any], bool]] = {
+    STRING_TYPE: lambda v: isinstance(v, str),
+    INT_TYPE: lambda v: isinstance(v, str),
+    FLOAT_TYPE: lambda v: isinstance(v, float),
+    BOOL_TYPE: lambda v: isinstance(v, bool),
+    LIST_TYPE: lambda v: isinstance(v, list),
+    # SPARK_TABLE_TYPE should be the name of the spark table (str)
+    SPARK_TABLE_TYPE: lambda v: isinstance(v, str)
+}
 
-class InputOutputDefinitions(BaseModel):
+
+class BaseIOModel(BaseModel):
     type: DataTypes
     description: Optional[StrictStr] = None
-    item_type: Optional[DataTypes] = Field(
-        None, description="Type of items in the list")
-    table_name: Optional[StrictStr] = Field(
-        None, description="Name of the spark_table,\
-                required if type is spark_table", validate_default=True)
-    # TODO: Add an optional example field, this could be very helpful for the
-    # frontend
-
-    """
-    If the type is spark_table, table_name must also be set
-    """
-    @field_validator("table_name")
-    def validate_table_name(cls, v, info):
-        set_type = info.data.get("type")
-        if set_type == "spark_table":
-            if not v:
-                raise ValueError(
-                    "table_name must be set when type is 'spark_table'")
+
+
+class InputDefinitions(BaseIOModel):
+    optional: bool
+    default_value: Optional[Any] = Field(default=None, validate_default=True)
+
+    @field_validator("default_value")
+    def validate_default_value(cls, v, info):
+        optional = info.data.get("optional")
+        expected_type = info.data.get("type")
+
+        if not optional:
+            # If field is not optional, default_value does not have to be set
+            return v
+
+        if v is None:
+            raise ValueError("default_value must be set when optional is True")
+
+        validator = VALIDATORS.get(expected_type)
+        if validator and not validator(v):
+            raise TypeError(f"default_value must be of type {expected_type}")
+
         return v
 
 
+class OutputDefinitions(BaseIOModel):
+    pass
+
+
 class Entrypoint(BaseModel):
     description: StrictStr
-    inputs: Dict[StrictStr, InputOutputDefinitions]
-    outputs: Dict[StrictStr, InputOutputDefinitions]
+    inputs: Dict[StrictStr, InputDefinitions]
+    outputs: Dict[StrictStr, OutputDefinitions]
 
 
 class ComputeBlock(BaseModel):
@@ -51,6 +69,7 @@ class ComputeBlock(BaseModel):
     description: StrictStr
     author: StrictStr
     entrypoints: Dict[StrictStr, Entrypoint]
+    docker_image: Optional[StrictStr]
 
     @field_validator("entrypoints")
     def check_entrypoints(cls, v):
diff --git a/tests/test_config.py b/tests/test_config.py
index e5b2d2d..212cb04 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -37,6 +37,16 @@ def test_file_not_found(self):
         with self.assertRaises(FileNotFoundError):
             load_config("test.yaml")
 
+    def test_optional_invalid_default(self):
+        with self.assertRaises(TypeError):
+            load_config("optional_invalid_default.yaml",
+                        config_path=self.TEST_CONFIG_FOLDER)
+
+    def test_optional_no_default(self):
+        with self.assertRaises(ValueError):
+            load_config("optional_no_default.yaml",
+                        config_path=self.TEST_CONFIG_FOLDER)
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/tests/test_config_files/invalid_datatype.yaml b/tests/test_config_files/invalid_datatype.yaml
index 84727af..3083e9b 100644
--- a/tests/test_config_files/invalid_datatype.yaml
+++ b/tests/test_config_files/invalid_datatype.yaml
@@ -8,7 +8,6 @@ entrypoints:
     inputs:
       url_list:
         type: "invalid_type"
-        item_type: "string"
         description: "List of URLs to crawl. Can be defined by the user."
     outputs:
       text_data:
@@ -21,7 +20,6 @@ entrypoints:
     inputs:
       url-list:
         type: "list"
-        item_type: "string"
         description: "List of URLS to check"
     outputs:
       was_sucess:
diff --git a/tests/test_config_files/optional_invalid_default.yaml b/tests/test_config_files/optional_invalid_default.yaml
new file mode 100644
index 0000000..7d9c9cf
--- /dev/null
+++ b/tests/test_config_files/optional_invalid_default.yaml
@@ -0,0 +1,31 @@
+name: "The first Web-Crawler"
+description: "This is a web crawler, it crawls text..."
+author: "John Doe"
+docker_image: "https://ghcr.io/sycstream"
+
+entrypoints:
+  crawl:
+    description: "Crawl text from specified URLs"
+    inputs:
+      url_list:
+        type: "list"
+        description: "List of URLs to crawl. Can be defined by the user."
+        optional: True
+        default_value: "a string"
+    outputs:
+      text_data:
+        type: "spark_table"
+        description: "Crawled text data in a spark table"
+        table_name: "text_data_spark"
+        optional: False
+
+  analyze_url:
+    description: "Analyzes if data is crawlable"
+    inputs:
+      url-list:
+        type: "list"
+        description: "List of URLS to check"
+    outputs:
+      was_sucess:
+        type: "bool"
+        description: "True if all urls can be crawled"
diff --git a/tests/test_config_files/optional_no_default.yaml b/tests/test_config_files/optional_no_default.yaml
new file mode 100644
index 0000000..98c9503
--- /dev/null
+++ b/tests/test_config_files/optional_no_default.yaml
@@ -0,0 +1,30 @@
+name: "The first Web-Crawler"
+description: "This is a web crawler, it crawls text..."
+author: "John Doe"
+docker_image: "https://ghcr.io/sycstream"
+
+entrypoints:
+  crawl:
+    description: "Crawl text from specified URLs"
+    inputs:
+      url_list:
+        type: "list"
+        description: "List of URLs to crawl. Can be defined by the user."
+        optional: True
+    outputs:
+      text_data:
+        type: "spark_table"
+        description: "Crawled text data in a spark table"
+        table_name: "text_data_spark"
+        optional: False
+
+  analyze_url:
+    description: "Analyzes if data is crawlable"
+    inputs:
+      url-list:
+        type: "list"
+        description: "List of URLS to check"
+    outputs:
+      was_sucess:
+        type: "bool"
+        description: "True if all urls can be crawled"
diff --git a/tests/test_config_files/valid_config.yaml b/tests/test_config_files/valid_config.yaml
index 0e8c994..12bae4d 100644
--- a/tests/test_config_files/valid_config.yaml
+++ b/tests/test_config_files/valid_config.yaml
@@ -1,6 +1,7 @@
 name: "The first Web-Crawler"
 description: "This is a web crawler, it crawls text..."
 author: "John Doe"
+docker_image: "https://ghcr.io/sycstream"
 
 entrypoints:
   crawl:
@@ -8,21 +9,21 @@ entrypoints:
     inputs:
       url_list:
         type: "list"
-        item_type: "string"
         description: "List of URLs to crawl. Can be defined by the user."
+        optional: True
+        default_value: ["test", "1234"]
     outputs:
       text_data:
         type: "spark_table"
         description: "Crawled text data in a spark table"
-        table_name: "text_data_spark"
 
   analyze_url:
     description: "Analyzes if data is crawlable"
     inputs:
-      url-list:
+      url_list:
         type: "list"
-        item_type: "string"
         description: "List of URLS to check"
+        optional: False
     outputs:
       was_sucess:
         type: "bool"

From f0868239391b31d01a15de92c32c805c7db57f51 Mon Sep 17 00:00:00 2001
From: PaulKalho <kalhornpaul@gmail.com>
Date: Mon, 4 Nov 2024 18:02:30 +0100
Subject: [PATCH 08/22] feat: remodel config structure

---
 README.md                                     |  67 ++++---
 scystream/sdk/config/models.py                | 164 +++++++++++++-----
 tests/test_config.py                          |  15 +-
 tests/test_config_files/invalid_datatype.yaml |  59 +++++--
 .../missing_entrypoints.yaml                  |   6 +-
 .../missing_required_fields.yaml              |  51 ++++++
 .../test_config_files/missing_table_name.yaml |  16 --
 .../optional_default_not_set.yaml             |  54 ++++++
 .../optional_env_key_not_set.yaml             |  49 ++++++
 .../optional_invalid_default.yaml             |  31 ----
 .../optional_no_default.yaml                  |  30 ----
 tests/test_config_files/valid_config.yaml     |  64 ++++---
 12 files changed, 414 insertions(+), 192 deletions(-)
 create mode 100644 tests/test_config_files/missing_required_fields.yaml
 delete mode 100644 tests/test_config_files/missing_table_name.yaml
 create mode 100644 tests/test_config_files/optional_default_not_set.yaml
 create mode 100644 tests/test_config_files/optional_env_key_not_set.yaml
 delete mode 100644 tests/test_config_files/optional_invalid_default.yaml
 delete mode 100644 tests/test_config_files/optional_no_default.yaml

diff --git a/README.md b/README.md
index 2e1ef5b..70138bf 100644
--- a/README.md
+++ b/README.md
@@ -46,35 +46,58 @@ It shows the entrypoints, their inputs and outputs.
 This is an example `cbc.yaml`:
 
 ```yaml
-name: "The first Web-Crawler"
-description: "This is a web crawler, it crawls text..."
+name: "NLP toolbox"
+description: "Contains NLP algorithms..."
 author: "John Doe"
+docker_image: "https://ghcr.io/nlp-toolbox"
 
 entrypoints:
-  crawl:
-    description: "Crawl text from specified URLs"
+  topic_modelling:
+    description: "Run topic modelling"
     inputs:
-      url_list:
-        type: "list"
-        item_type: "string"
-        description: "List of URLs to crawl. Can be defined by the user."
-    outputs:
+      language:
+        description: "The language to use"
+        type: "env"
+        env_key: "LANG"
+        optional: True
+        default_value: "de" 
       text_data:
-        type: "spark_table"
-        description: "Crawled text data in a spark table"
-        table_name: "text_data_spark"
-
-  analyze_url:
-    description: "Analyzes if data is crawlable"
+        description: "Text file. Can be uploaded by the user."
+        type: "file"
+        env_key: "TXT_SRC_PATH"
+        optional: False
+      db_data:
+        description: "Information in a database"
+        type: "db_table"
+        env_key: "DATA_TABLE_NAME"
+        table_name: "nlp_information"
+        optional: True
+    outputs:
+      topic_model:
+        type: "file"
+        description: "Topic model file"
+        env_key: "OUTPUT_PATH_TOPIC_MODEL"
+        file_path: "outputs/output.pkl"
+      run_durations:
+        type: "db_table"
+        description: "Table that contains the run durations per day."
+        env_key: "RUN_DURATIONS_TABLE_NAME"
+        table_name: "run_durations_nlp"
+
+  analyze_runtime:
+    description: "Analyze the runtimes"
     inputs:
-      url-list:
-        type: "list"
-        item_type: "string"
-        description: "List of URLS to check"
+      run_durations:
+        type: "db_table"
+        env_key: "RUN_DURATIONS_TABLE_NAME"
+        table_name: "run_durations_nlp"
+        optional: True
     outputs:
-      was_sucess:
-        type: "bool"
-        description: "True if all urls can be crawled"
+      csv_output:
+        type: "file"
+        description: "A csv containing statistical information"
+        env_key: "CSV_OUTPUT_PATH"
+        file_path: "outputs/statistics.csv"
 ```
 
 To read and validate such a config file u can proceed as follows:
diff --git a/scystream/sdk/config/models.py b/scystream/sdk/config/models.py
index c83400e..b786a25 100644
--- a/scystream/sdk/config/models.py
+++ b/scystream/sdk/config/models.py
@@ -1,67 +1,149 @@
-from typing import Optional, Dict, Literal, Any, Callable
+from typing import Optional, Dict, Literal, Union
 from pydantic import BaseModel, StrictStr, field_validator, Field
 
 """
 This file contains the schema definition for the config file.
 """
 
-STRING_TYPE = "string"
-INT_TYPE = "int"
-FLOAT_TYPE = "float"
-BOOL_TYPE = "bool"
-LIST_TYPE = "list"
-SPARK_TABLE_TYPE = "spark_table"
-
-DataTypes = Literal[STRING_TYPE, INT_TYPE, FLOAT_TYPE,
-                    BOOL_TYPE, LIST_TYPE, SPARK_TABLE_TYPE]
-
-VALIDATORS: Dict[DataTypes, Callable[[Any], bool]] = {
-    STRING_TYPE: lambda v: isinstance(v, str),
-    INT_TYPE: lambda v: isinstance(v, str),
-    FLOAT_TYPE: lambda v: isinstance(v, float),
-    BOOL_TYPE: lambda v: isinstance(v, bool),
-    LIST_TYPE: lambda v: isinstance(v, list),
-    # SPARK_TABLE_TYPE should be the name of the spark table (str)
-    SPARK_TABLE_TYPE: lambda v: isinstance(v, str)
-}
-
-
-class BaseIOModel(BaseModel):
-    type: DataTypes
+
+class BaseInputModel(BaseModel):
     description: Optional[StrictStr] = None
+    optional: bool = False
+    env_key: Optional[StrictStr] = Field(
+        default=None, validate_default=True,
+        description="The env_key describes the key of the environment variable\
+                which can be set to override the default value"
+    )
+
+    @field_validator("env_key")
+    def validate_env_key(cls, v, info):
+        """a
+        If optional == False, the env_key must be set! As the user must have
+        the possibility to define the variable.
+        """
+        optional = info.data.get("optional")
+
+        if not optional and v is None:
+            raise ValueError("If optional is False, the env_key must be set.")
+
+        return v
+
 
+class EnvInput(BaseInputModel):
+    """
+    The EnvInput type describes the input of an ENV variable
+    It should describe one env-variable the compute unit accesses.
 
-class InputDefinitions(BaseIOModel):
-    optional: bool
-    default_value: Optional[Any] = Field(default=None, validate_default=True)
+    The default_value can be overridden, if the env_key is set.
+    """
+    type: Literal["env"]
+    default_value: Optional[StrictStr] = Field(
+        default=None, validate_default=True)
 
     @field_validator("default_value")
     def validate_default_value(cls, v, info):
+        """
+        If optional == True, default_value must be set!
+        """
         optional = info.data.get("optional")
-        expected_type = info.data.get("type")
 
-        if not optional:
-            # If field is not optional, default_value does not have to be set
-            return v
+        if optional and v is None:
+            raise ValueError("If optional is True, default_value must be set.")
 
-        if v is None:
-            raise ValueError("default_value must be set when optional is True")
+        return v
 
-        validator = VALIDATORS.get(expected_type)
-        if validator and not validator(v):
-            raise TypeError(f"default_value must be of type {expected_type}")
 
-        return v
+class FileInput(BaseInputModel):
+    """
+    The FileInput type describes the input for files.
+    The file_path describes the path to a file on the S3 bucket,
+    it can be overriden by using the env_key, if set.
+
+    This makes sense, if a user should be able to manually upload
+    files the compute units wants to access. It does not know the
+    path to the file while writing the defintion.
+    """
+    type: Literal["file"]
+    file_path: Optional[StrictStr] = Field(
+        default=None, validate_default=True,
+        description="The default value of the FileInput type.\
+                Can be overriden."
+    )
+
+    @field_validator("file_path")
+    def validate_file_path(cls, v, info):
+        """
+        If optional == True, file_path must be set!
+        """
 
+        optional = info.data.get("optional")
+
+        if optional and v is None:
+            raise ValueError("If optional is True, file_path must be set.")
+
+
+class BaseOutputModel(BaseModel):
+    description: StrictStr
+    env_key: StrictStr = Field(
+        description="The env_key describes the key of the environment variable\
+                which can be set to override the default value"
+    )
+
+
+class FileOutput(BaseOutputModel):
+    """
+    The FileOutput type describes the output of a file.
+    The file_path describes the path to a file on the S3 bucket.
+    """
+    type: Literal["file"]
+    file_path: StrictStr = Field(
+        desscription="The path to the file on the S3 bucket."
+    )
+
+
+class DBTableOutput(BaseOutputModel):
+    """
+    The DBTableOutput type defines a table that provides output data.
+    The table_name refers to the output table name.
+    """
+    type: Literal["db_table"]
+    table_name: StrictStr = Field(
+        description="The name of the output database table."
+    )
+
+
+class DBTableInput(BaseInputModel):
+    """
+    The DBTableInput type defines a table that provides input data.
+    The table_name can be overriden by using the env_key, if set.
+
+    This makes sense, if a previous compute units output db_table should
+    be used as an input. This table_name is then not known while writing the
+    definition.
+    """
+    type: Literal["db_table"]
+    table_name: Optional[StrictStr] = Field(
+        default=None, validate_default=True,
+        description="The default value of the DBTableInput type.\
+                Can be overriden."
+    )
+
+    @field_validator("table_name")
+    def validate_table_name(cls, v, info):
+        """
+        If optional == True, table_name must be set!
+        """
+
+        optional = info.data.get("optional")
 
-class OutputDefinitions(BaseIOModel):
-    pass
+        if optional and v is None:
+            raise ValueError("If optional is True, table_name must be set.")
 
 
 class Entrypoint(BaseModel):
     description: StrictStr
-    inputs: Dict[StrictStr, InputDefinitions]
-    outputs: Dict[StrictStr, OutputDefinitions]
+    inputs: Dict[StrictStr, Union[EnvInput, FileInput, DBTableInput]]
+    outputs: Dict[StrictStr, Union[FileInput, DBTableOutput]]
 
 
 class ComputeBlock(BaseModel):
diff --git a/tests/test_config.py b/tests/test_config.py
index 212cb04..3a426f2 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -18,11 +18,6 @@ def test_missing_entrypoints(self):
             load_config("missing_entrypoints.yaml",
                         config_path=self.TEST_CONFIG_FOLDER)
 
-    def test_missing_table_name_for_spark_table(self):
-        with self.assertRaises(ValueError):
-            load_config("missing_table_name.yaml",
-                        config_path=self.TEST_CONFIG_FOLDER)
-
     def test_invalid_datatypes(self):
         with self.assertRaises(ValueError):
             load_config("invalid_datatype.yaml",
@@ -37,14 +32,14 @@ def test_file_not_found(self):
         with self.assertRaises(FileNotFoundError):
             load_config("test.yaml")
 
-    def test_optional_invalid_default(self):
-        with self.assertRaises(TypeError):
-            load_config("optional_invalid_default.yaml",
+    def test_optional_default_not_set(self):
+        with self.assertRaises(ValueError):
+            load_config("optional_default_not_set.yaml",
                         config_path=self.TEST_CONFIG_FOLDER)
 
-    def test_optional_no_default(self):
+    def test_optional_env_key_not_set(self):
         with self.assertRaises(ValueError):
-            load_config("optional_no_default.yaml",
+            load_config("optional_env_key_not_set.yaml",
                         config_path=self.TEST_CONFIG_FOLDER)
 
 
diff --git a/tests/test_config_files/invalid_datatype.yaml b/tests/test_config_files/invalid_datatype.yaml
index 3083e9b..8df5b73 100644
--- a/tests/test_config_files/invalid_datatype.yaml
+++ b/tests/test_config_files/invalid_datatype.yaml
@@ -1,27 +1,50 @@
-name: "The first Web-Crawler"
-description: "This is a web crawler, it crawls text..."
+name: "NLP toolbox"
+description: "Contains NLP algorithms..."
 author: "John Doe"
+docker_image: "https://ghcr.io/nlp-toolbox"
 
 entrypoints:
-  crawl:
-    description: "Crawl text from specified URLs"
+  topic_modelling:
+    description: "Run topic modelling"
     inputs:
-      url_list:
+      language:
+        description: "The language to use"
         type: "invalid_type"
-        description: "List of URLs to crawl. Can be defined by the user."
-    outputs:
+        env_key: "LANG"
+        optional: True
+        default_value: "de"
       text_data:
-        type: "spark_table"
-        description: "Crawled text data in a spark table"
-        name: "text_data_spark"
+        description: "Text file. Can be uploaded by the user."
+        type: "file"
+        env_key: "TXT_SRC_PATH"
+        optional: False
+      db_data:
+        description: "Information in a database"
+        type: "db_table"
+        env_key: "DATA_TABLE_NAME"
+        optional: True
+    outputs:
+      topic_model:
+        type: "file"
+        description: "Topic model file"
+        env_key: "OUTPUT_PATH_TOPIC_MODEL"
+        # Missing file_path here will trigger validation error
+      run_durations:
+        type: "db_table"
+        env_key: "RUN_DURATIONS_TABLE_NAME"
+        table_name: "run_durations_nlp"
 
-  analyze_url:
-    description: "Analyzes if data is crawlable"
+  analyze_runtime:
+    description: "Analyze the runtimes"
     inputs:
-      url-list:
-        type: "list"
-        description: "List of URLS to check"
+      run_durations:
+        type: "db_table"
+        env_key: "RUN_DURATIONS_TABLE_NAME"
+        table_name: "run_durations_nlp"
+        optional: True
     outputs:
-      was_sucess:
-        type: "bool"
-        description: "True if all urls can be crawled"
+      csv_output:
+        type: "file"
+        description: "A csv containing statistical information"
+        env_key: "CSV_OUTPUT_PATH"
+        file_path: "outputs/statistics.csv"
diff --git a/tests/test_config_files/missing_entrypoints.yaml b/tests/test_config_files/missing_entrypoints.yaml
index 32cf852..e02ed4f 100644
--- a/tests/test_config_files/missing_entrypoints.yaml
+++ b/tests/test_config_files/missing_entrypoints.yaml
@@ -1,5 +1,5 @@
-name: "The first Web-Crawler"
-description: "This is a web crawler, it crawls text..."
+name: "NLP toolbox"
+description: "Contains NLP algorithms..."
 author: "John Doe"
-# Missing `entrypoints` field, which should cause validation to fail.
+docker_image: "https://ghcr.io/nlp-toolbox"
 
diff --git a/tests/test_config_files/missing_required_fields.yaml b/tests/test_config_files/missing_required_fields.yaml
new file mode 100644
index 0000000..5187f95
--- /dev/null
+++ b/tests/test_config_files/missing_required_fields.yaml
@@ -0,0 +1,51 @@
+name: "NLP toolbox"
+description: "Contains NLP algorithms..."
+author: "John Doe"
+docker_image: "https://ghcr.io/nlp-toolbox"
+
+entrypoints:
+  topic_modelling:
+    description: "Run topic modelling"
+    inputs:
+      language:
+        description: "The language to use"
+        type: "env"
+        env_key: "LANG"
+        optional: True
+        default_value: "de"
+      text_data:
+        description: "Text file. Can be uploaded by the user."
+        type: "file"
+        env_key: "TXT_SRC_PATH"
+        optional: False
+      db_data:
+        description: "Information in a database"
+        type: "db_table"
+        env_key: "DATA_TABLE_NAME"
+        optional: True
+    outputs:
+      topic_model:
+        type: "file"
+        description: "Topic model file"
+        env_key: "OUTPUT_PATH_TOPIC_MODEL"
+        # Missing file_path here will trigger validation error
+      run_durations:
+        type: "db_table"
+        env_key: "RUN_DURATIONS_TABLE_NAME"
+        table_name: "run_durations_nlp"
+
+  analyze_runtime:
+    description: "Analyze the runtimes"
+    inputs:
+      run_durations:
+        type: "db_table"
+        env_key: "RUN_DURATIONS_TABLE_NAME"
+        table_name: "run_durations_nlp"
+        optional: True
+    outputs:
+      csv_output:
+        type: "file"
+        description: "A csv containing statistical information"
+        env_key: "CSV_OUTPUT_PATH"
+        file_path: "outputs/statistics.csv"
+
diff --git a/tests/test_config_files/missing_table_name.yaml b/tests/test_config_files/missing_table_name.yaml
deleted file mode 100644
index a22fa16..0000000
--- a/tests/test_config_files/missing_table_name.yaml
+++ /dev/null
@@ -1,16 +0,0 @@
-name: "The first Web-Crawler"
-description: "This is a web crawler, it crawls text..."
-author: "John Doe"
-entrypoints:
-  crawl:
-    description: "Crawl text from specified URLs"
-    inputs:
-      url_list:
-        type: "list"
-        item_type: "string"
-        description: "List of URLs to crawl. Can be defined by the user."
-    outputs:
-      text_data:
-        type: "spark_table"
-        description: "Crawled text data in a spark table"
-        # Missing `table_name`, which should cause validation to fail.
diff --git a/tests/test_config_files/optional_default_not_set.yaml b/tests/test_config_files/optional_default_not_set.yaml
new file mode 100644
index 0000000..83ac369
--- /dev/null
+++ b/tests/test_config_files/optional_default_not_set.yaml
@@ -0,0 +1,54 @@
+name: "NLP toolbox"
+description: "Contains NLP algorithms..."
+author: "John Doe"
+docker_image: "https://ghcr.io/nlp-toolbox"
+
+entrypoints:
+  topic_modelling:
+    description: "Run topic modelling"
+    inputs:
+      language:
+        description: "The language to use"
+        type: "env"
+        env_key: "LANG"
+        optional: True
+        # Missing default_value here will trigger validation error
+      text_data:
+        description: "Text file. Can be uploaded by the user."
+        type: "file"
+        env_key: "TXT_SRC_PATH"
+        optional: False
+      db_data:
+        description: "Information in a database"
+        type: "db_table"
+        env_key: "DATA_TABLE_NAME"
+        table_name: "nlp_information"
+        optional: True
+    outputs:
+      topic_model:
+        type: "file"
+        description: "Topic model file"
+        env_key: "OUTPUT_PATH_TOPIC_MODEL"
+        file_path: "outputs/output.pkl"
+      run_durations:
+        description: "A table which contains the run durations per day."
+        type: "db_table"
+        env_key: "RUN_DURATIONS_TABLE_NAME"
+        table_name: "run_durations_nlp"
+
+  analyze_runtime:
+    description: "Analyze the runtimes"
+    inputs:
+      run_durations:
+        description: "A table which contains the run durations per day."
+        type: "db_table"
+        env_key: "RUN_DURATIONS_TABLE_NAME"
+        table_name: "run_durations_nlp"
+        optional: True
+    outputs:
+      csv_output:
+        type: "file"
+        description: "A csv containing statistical information"
+        env_key: "CSV_OUTPUT_PATH"
+        file_path: "outputs/statistics.csv"
+
diff --git a/tests/test_config_files/optional_env_key_not_set.yaml b/tests/test_config_files/optional_env_key_not_set.yaml
new file mode 100644
index 0000000..b6e2040
--- /dev/null
+++ b/tests/test_config_files/optional_env_key_not_set.yaml
@@ -0,0 +1,49 @@
+name: "NLP toolbox"
+description: "Contains NLP algorithms..."
+author: "John Doe"
+docker_image: "https://ghcr.io/nlp-toolbox"
+
+entrypoints:
+  topic_modelling:
+    description: "Run topic modelling"
+    inputs:
+      language:
+        description: "The language to use"
+        type: "env"
+        optional: False
+      text_data:
+        description: "Text file. Can be uploaded by the user."
+        type: "file"
+        env_key: "TXT_SRC_PATH"
+        optional: False
+      db_data:
+        description: "Information in a database"
+        type: "db_table"
+        env_key: "DATA_TABLE_NAME"
+        optional: True
+    outputs:
+      topic_model:
+        type: "file"
+        description: "Topic model file"
+        env_key: "OUTPUT_PATH_TOPIC_MODEL"
+        file_path: "test/test.pkj"
+      run_durations:
+        type: "db_table"
+        env_key: "RUN_DURATIONS_TABLE_NAME"
+        table_name: "run_durations_nlp"
+
+  analyze_runtime:
+    description: "Analyze the runtimes"
+    inputs:
+      run_durations:
+        type: "db_table"
+        env_key: "RUN_DURATIONS_TABLE_NAME"
+        table_name: "run_durations_nlp"
+        optional: True
+    outputs:
+      csv_output:
+        type: "file"
+        description: "A csv containing statistical information"
+        env_key: "CSV_OUTPUT_PATH"
+        file_path: "outputs/statistics.csv"
+
diff --git a/tests/test_config_files/optional_invalid_default.yaml b/tests/test_config_files/optional_invalid_default.yaml
deleted file mode 100644
index 7d9c9cf..0000000
--- a/tests/test_config_files/optional_invalid_default.yaml
+++ /dev/null
@@ -1,31 +0,0 @@
-name: "The first Web-Crawler"
-description: "This is a web crawler, it crawls text..."
-author: "John Doe"
-docker_image: "https://ghcr.io/sycstream"
-
-entrypoints:
-  crawl:
-    description: "Crawl text from specified URLs"
-    inputs:
-      url_list:
-        type: "list"
-        description: "List of URLs to crawl. Can be defined by the user."
-        optional: True
-        default_value: "a string"
-    outputs:
-      text_data:
-        type: "spark_table"
-        description: "Crawled text data in a spark table"
-        table_name: "text_data_spark"
-        optional: False
-
-  analyze_url:
-    description: "Analyzes if data is crawlable"
-    inputs:
-      url-list:
-        type: "list"
-        description: "List of URLS to check"
-    outputs:
-      was_sucess:
-        type: "bool"
-        description: "True if all urls can be crawled"
diff --git a/tests/test_config_files/optional_no_default.yaml b/tests/test_config_files/optional_no_default.yaml
deleted file mode 100644
index 98c9503..0000000
--- a/tests/test_config_files/optional_no_default.yaml
+++ /dev/null
@@ -1,30 +0,0 @@
-name: "The first Web-Crawler"
-description: "This is a web crawler, it crawls text..."
-author: "John Doe"
-docker_image: "https://ghcr.io/sycstream"
-
-entrypoints:
-  crawl:
-    description: "Crawl text from specified URLs"
-    inputs:
-      url_list:
-        type: "list"
-        description: "List of URLs to crawl. Can be defined by the user."
-        optional: True
-    outputs:
-      text_data:
-        type: "spark_table"
-        description: "Crawled text data in a spark table"
-        table_name: "text_data_spark"
-        optional: False
-
-  analyze_url:
-    description: "Analyzes if data is crawlable"
-    inputs:
-      url-list:
-        type: "list"
-        description: "List of URLS to check"
-    outputs:
-      was_sucess:
-        type: "bool"
-        description: "True if all urls can be crawled"
diff --git a/tests/test_config_files/valid_config.yaml b/tests/test_config_files/valid_config.yaml
index 12bae4d..5a15dd0 100644
--- a/tests/test_config_files/valid_config.yaml
+++ b/tests/test_config_files/valid_config.yaml
@@ -1,30 +1,52 @@
-name: "The first Web-Crawler"
-description: "This is a web crawler, it crawls text..."
+name: "NLP toolbox"
+description: "Contains NLP algorithms..."
 author: "John Doe"
-docker_image: "https://ghcr.io/sycstream"
+docker_image: "https://ghcr.io/nlp-toolbox"
 
 entrypoints:
-  crawl:
-    description: "Crawl text from specified URLs"
+  topic_modelling:
+    description: "Run topic modelling"
     inputs:
-      url_list:
-        type: "list"
-        description: "List of URLs to crawl. Can be defined by the user."
+      language:
+        description: "The language to use"
+        type: "env"
+        env_key: "LANG"
         optional: True
-        default_value: ["test", "1234"]
-    outputs:
+        default_value: "de" 
       text_data:
-        type: "spark_table"
-        description: "Crawled text data in a spark table"
+        description: "Text file. Can be uploaded by the user."
+        type: "file"
+        env_key: "TXT_SRC_PATH"
+        optional: False
+      db_data:
+        description: "Information in a database"
+        type: "db_table"
+        env_key: "DATA_TABLE_NAME"
+        table_name: "nlp_information"
+        optional: True
+    outputs:
+      topic_model:
+        type: "file"
+        description: "Topic model file"
+        env_key: "OUTPUT_PATH_TOPIC_MODEL"
+        file_path: "outputs/output.pkl"
+      run_durations:
+        type: "db_table"
+        description: "Table that contains the run durations per day."
+        env_key: "RUN_DURATIONS_TABLE_NAME"
+        table_name: "run_durations_nlp"
 
-  analyze_url:
-    description: "Analyzes if data is crawlable"
+  analyze_runtime:
+    description: "Analyze the runtimes"
     inputs:
-      url_list:
-        type: "list"
-        description: "List of URLS to check"
-        optional: False
+      run_durations:
+        type: "db_table"
+        env_key: "RUN_DURATIONS_TABLE_NAME"
+        table_name: "run_durations_nlp"
+        optional: True
     outputs:
-      was_sucess:
-        type: "bool"
-        description: "True if all urls can be crawled"
+      csv_output:
+        type: "file"
+        description: "A csv containing statistical information"
+        env_key: "CSV_OUTPUT_PATH"
+        file_path: "outputs/statistics.csv"

From 8d8574af97a766984e11770e5cfd494270e08a34 Mon Sep 17 00:00:00 2001
From: PaulKalho <kalhornpaul@gmail.com>
Date: Mon, 4 Nov 2024 20:30:13 +0100
Subject: [PATCH 09/22] feat: ensure env_key is set

---
 scystream/sdk/config/models.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/scystream/sdk/config/models.py b/scystream/sdk/config/models.py
index b786a25..31b1428 100644
--- a/scystream/sdk/config/models.py
+++ b/scystream/sdk/config/models.py
@@ -9,15 +9,15 @@
 class BaseInputModel(BaseModel):
     description: Optional[StrictStr] = None
     optional: bool = False
-    env_key: Optional[StrictStr] = Field(
-        default=None, validate_default=True,
+    env_key: StrictStr = Field(
+        validate_default=True,
         description="The env_key describes the key of the environment variable\
                 which can be set to override the default value"
     )
 
     @field_validator("env_key")
     def validate_env_key(cls, v, info):
-        """a
+        """
         If optional == False, the env_key must be set! As the user must have
         the possibility to define the variable.
         """

From 6b56af76107fd1f6af5aebf0bb01d70d3f0f8ccb Mon Sep 17 00:00:00 2001
From: PaulKalho <kalhornpaul@gmail.com>
Date: Thu, 7 Nov 2024 15:22:55 +0100
Subject: [PATCH 10/22] feat: allow multiple envs for input configuration

---
 scystream/sdk/config/models.py                | 146 ++----------------
 tests/test_config.py                          |  10 --
 tests/test_config_files/invalid_datatype.yaml |  39 ++---
 tests/test_config_files/missing_fields.yaml   |  42 +++++
 .../missing_required_fields.yaml              |  51 ------
 .../optional_default_not_set.yaml             |  54 -------
 .../optional_env_key_not_set.yaml             |  49 ------
 tests/test_config_files/valid_config.yaml     |  35 ++---
 8 files changed, 85 insertions(+), 341 deletions(-)
 create mode 100644 tests/test_config_files/missing_fields.yaml
 delete mode 100644 tests/test_config_files/missing_required_fields.yaml
 delete mode 100644 tests/test_config_files/optional_default_not_set.yaml
 delete mode 100644 tests/test_config_files/optional_env_key_not_set.yaml

diff --git a/scystream/sdk/config/models.py b/scystream/sdk/config/models.py
index 31b1428..f57b07f 100644
--- a/scystream/sdk/config/models.py
+++ b/scystream/sdk/config/models.py
@@ -1,149 +1,29 @@
-from typing import Optional, Dict, Literal, Union
+from typing import Optional, Dict, Literal
 from pydantic import BaseModel, StrictStr, field_validator, Field
 
+FILE_TYPE_IDENTIFIER = "file"
+DB_TABLE_TYPE_IDENTIFIER = "db_table"
+
 """
 This file contains the schema definition for the config file.
 """
 
 
-class BaseInputModel(BaseModel):
+class InputOutputModel(BaseModel):
+    type: Literal[FILE_TYPE_IDENTIFIER, DB_TABLE_TYPE_IDENTIFIER]
     description: Optional[StrictStr] = None
-    optional: bool = False
-    env_key: StrictStr = Field(
-        validate_default=True,
-        description="The env_key describes the key of the environment variable\
-                which can be set to override the default value"
-    )
-
-    @field_validator("env_key")
-    def validate_env_key(cls, v, info):
-        """
-        If optional == False, the env_key must be set! As the user must have
-        the possibility to define the variable.
-        """
-        optional = info.data.get("optional")
-
-        if not optional and v is None:
-            raise ValueError("If optional is False, the env_key must be set.")
-
-        return v
-
-
-class EnvInput(BaseInputModel):
-    """
-    The EnvInput type describes the input of an ENV variable
-    It should describe one env-variable the compute unit accesses.
-
-    The default_value can be overridden, if the env_key is set.
-    """
-    type: Literal["env"]
-    default_value: Optional[StrictStr] = Field(
-        default=None, validate_default=True)
-
-    @field_validator("default_value")
-    def validate_default_value(cls, v, info):
-        """
-        If optional == True, default_value must be set!
-        """
-        optional = info.data.get("optional")
-
-        if optional and v is None:
-            raise ValueError("If optional is True, default_value must be set.")
-
-        return v
-
-
-class FileInput(BaseInputModel):
-    """
-    The FileInput type describes the input for files.
-    The file_path describes the path to a file on the S3 bucket,
-    it can be overriden by using the env_key, if set.
-
-    This makes sense, if a user should be able to manually upload
-    files the compute units wants to access. It does not know the
-    path to the file while writing the defintion.
-    """
-    type: Literal["file"]
-    file_path: Optional[StrictStr] = Field(
-        default=None, validate_default=True,
-        description="The default value of the FileInput type.\
-                Can be overriden."
-    )
-
-    @field_validator("file_path")
-    def validate_file_path(cls, v, info):
-        """
-        If optional == True, file_path must be set!
-        """
-
-        optional = info.data.get("optional")
-
-        if optional and v is None:
-            raise ValueError("If optional is True, file_path must be set.")
-
-
-class BaseOutputModel(BaseModel):
-    description: StrictStr
-    env_key: StrictStr = Field(
-        description="The env_key describes the key of the environment variable\
-                which can be set to override the default value"
-    )
-
-
-class FileOutput(BaseOutputModel):
-    """
-    The FileOutput type describes the output of a file.
-    The file_path describes the path to a file on the S3 bucket.
-    """
-    type: Literal["file"]
-    file_path: StrictStr = Field(
-        desscription="The path to the file on the S3 bucket."
-    )
-
-
-class DBTableOutput(BaseOutputModel):
-    """
-    The DBTableOutput type defines a table that provides output data.
-    The table_name refers to the output table name.
-    """
-    type: Literal["db_table"]
-    table_name: StrictStr = Field(
-        description="The name of the output database table."
+    config: Optional[Dict[StrictStr, Optional[StrictStr]]] = Field(
+        default=None,
+        description="The configuration for the input values\
+                (file_path, table_name, etc.)"
     )
 
 
-class DBTableInput(BaseInputModel):
-    """
-    The DBTableInput type defines a table that provides input data.
-    The table_name can be overriden by using the env_key, if set.
-
-    This makes sense, if a previous compute units output db_table should
-    be used as an input. This table_name is then not known while writing the
-    definition.
-    """
-    type: Literal["db_table"]
-    table_name: Optional[StrictStr] = Field(
-        default=None, validate_default=True,
-        description="The default value of the DBTableInput type.\
-                Can be overriden."
-    )
-
-    @field_validator("table_name")
-    def validate_table_name(cls, v, info):
-        """
-        If optional == True, table_name must be set!
-        """
-
-        optional = info.data.get("optional")
-
-        if optional and v is None:
-            raise ValueError("If optional is True, table_name must be set.")
-
-
 class Entrypoint(BaseModel):
     description: StrictStr
-    inputs: Dict[StrictStr, Union[EnvInput, FileInput, DBTableInput]]
-    outputs: Dict[StrictStr, Union[FileInput, DBTableOutput]]
+    envs: Optional[Dict[StrictStr, StrictStr]] = None
+    inputs: Dict[StrictStr, InputOutputModel]
+    outputs: Dict[StrictStr, InputOutputModel]
 
 
 class ComputeBlock(BaseModel):
diff --git a/tests/test_config.py b/tests/test_config.py
index 3a426f2..149812e 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -32,16 +32,6 @@ def test_file_not_found(self):
         with self.assertRaises(FileNotFoundError):
             load_config("test.yaml")
 
-    def test_optional_default_not_set(self):
-        with self.assertRaises(ValueError):
-            load_config("optional_default_not_set.yaml",
-                        config_path=self.TEST_CONFIG_FOLDER)
-
-    def test_optional_env_key_not_set(self):
-        with self.assertRaises(ValueError):
-            load_config("optional_env_key_not_set.yaml",
-                        config_path=self.TEST_CONFIG_FOLDER)
-
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/tests/test_config_files/invalid_datatype.yaml b/tests/test_config_files/invalid_datatype.yaml
index 8df5b73..e38fe43 100644
--- a/tests/test_config_files/invalid_datatype.yaml
+++ b/tests/test_config_files/invalid_datatype.yaml
@@ -6,45 +6,36 @@ docker_image: "https://ghcr.io/nlp-toolbox"
 entrypoints:
   topic_modelling:
     description: "Run topic modelling"
-    inputs:
-      language:
-        description: "The language to use"
-        type: "invalid_type"
-        env_key: "LANG"
-        optional: True
-        default_value: "de"
+    envs:
+      LANG: "de"
+    inputs: 
       text_data:
-        description: "Text file. Can be uploaded by the user."
-        type: "file"
-        env_key: "TXT_SRC_PATH"
-        optional: False
+        description: "Text file. Must be uploaded by the user."
+        type: "invalid_type"
+        config:
+          TXT_SRC_PATH: null
       db_data:
         description: "Information in a database"
         type: "db_table"
-        env_key: "DATA_TABLE_NAME"
-        optional: True
+        config:
+          DATA_TABLE_NAME: "test_db_table"
     outputs:
       topic_model:
         type: "file"
         description: "Topic model file"
-        env_key: "OUTPUT_PATH_TOPIC_MODEL"
-        # Missing file_path here will trigger validation error
+        config:
+          OUTPUT_PATH_TOPIC_MODEL: null
       run_durations:
         type: "db_table"
-        env_key: "RUN_DURATIONS_TABLE_NAME"
-        table_name: "run_durations_nlp"
+        config:
+          DURATIONS_TABLE_NAME: "run_durations_table"
 
   analyze_runtime:
     description: "Analyze the runtimes"
     inputs:
       run_durations:
-        type: "db_table"
-        env_key: "RUN_DURATIONS_TABLE_NAME"
-        table_name: "run_durations_nlp"
-        optional: True
+        type: "db_table" 
     outputs:
       csv_output:
         type: "file"
-        description: "A csv containing statistical information"
-        env_key: "CSV_OUTPUT_PATH"
-        file_path: "outputs/statistics.csv"
+        description: "A csv containing statistical information" 
diff --git a/tests/test_config_files/missing_fields.yaml b/tests/test_config_files/missing_fields.yaml
new file mode 100644
index 0000000..db761de
--- /dev/null
+++ b/tests/test_config_files/missing_fields.yaml
@@ -0,0 +1,42 @@
+name: "NLP toolbox"
+description: "Contains NLP algorithms..."
+author: "John Doe"
+docker_image: "https://ghcr.io/nlp-toolbox"
+
+entrypoints:
+  topic_modelling:
+    envs:
+      LANGUAGE: "de"
+    inputs:
+      text_data:
+        description: "Text file. Can be uploaded by the user."
+        config:
+          TXT_SRC_PATH: null
+      db_data:
+        description: "Information in a database"
+        type: "db_table"
+        config:
+          DATA_TABLE_NAME: "nlp_information"
+    outputs:
+      topic_model:
+        description: "Topic model file"
+        config:
+          OUTPUT_PATH_TOPIC_MODEL: null
+      run_durations:
+        type: "db_table"
+        description: "Table that contains the run durations per day."
+        config:
+          RUN_DURATIONS_TABLE_NAME: "run_durations_nlp"
+
+  analyze_runtime:
+    description: "Analyze the runtimes"
+    inputs:
+      run_durations:
+        config:
+          RUN_DURATIONS_TABLE_NAME: "run_durations_nlp"
+    outputs:
+      csv_output:
+        type: "file"
+        description: "A csv containing statistical information"
+        config:
+          CSV_OUTPUT_PATH: "outputs/statistics.csv"
diff --git a/tests/test_config_files/missing_required_fields.yaml b/tests/test_config_files/missing_required_fields.yaml
deleted file mode 100644
index 5187f95..0000000
--- a/tests/test_config_files/missing_required_fields.yaml
+++ /dev/null
@@ -1,51 +0,0 @@
-name: "NLP toolbox"
-description: "Contains NLP algorithms..."
-author: "John Doe"
-docker_image: "https://ghcr.io/nlp-toolbox"
-
-entrypoints:
-  topic_modelling:
-    description: "Run topic modelling"
-    inputs:
-      language:
-        description: "The language to use"
-        type: "env"
-        env_key: "LANG"
-        optional: True
-        default_value: "de"
-      text_data:
-        description: "Text file. Can be uploaded by the user."
-        type: "file"
-        env_key: "TXT_SRC_PATH"
-        optional: False
-      db_data:
-        description: "Information in a database"
-        type: "db_table"
-        env_key: "DATA_TABLE_NAME"
-        optional: True
-    outputs:
-      topic_model:
-        type: "file"
-        description: "Topic model file"
-        env_key: "OUTPUT_PATH_TOPIC_MODEL"
-        # Missing file_path here will trigger validation error
-      run_durations:
-        type: "db_table"
-        env_key: "RUN_DURATIONS_TABLE_NAME"
-        table_name: "run_durations_nlp"
-
-  analyze_runtime:
-    description: "Analyze the runtimes"
-    inputs:
-      run_durations:
-        type: "db_table"
-        env_key: "RUN_DURATIONS_TABLE_NAME"
-        table_name: "run_durations_nlp"
-        optional: True
-    outputs:
-      csv_output:
-        type: "file"
-        description: "A csv containing statistical information"
-        env_key: "CSV_OUTPUT_PATH"
-        file_path: "outputs/statistics.csv"
-
diff --git a/tests/test_config_files/optional_default_not_set.yaml b/tests/test_config_files/optional_default_not_set.yaml
deleted file mode 100644
index 83ac369..0000000
--- a/tests/test_config_files/optional_default_not_set.yaml
+++ /dev/null
@@ -1,54 +0,0 @@
-name: "NLP toolbox"
-description: "Contains NLP algorithms..."
-author: "John Doe"
-docker_image: "https://ghcr.io/nlp-toolbox"
-
-entrypoints:
-  topic_modelling:
-    description: "Run topic modelling"
-    inputs:
-      language:
-        description: "The language to use"
-        type: "env"
-        env_key: "LANG"
-        optional: True
-        # Missing default_value here will trigger validation error
-      text_data:
-        description: "Text file. Can be uploaded by the user."
-        type: "file"
-        env_key: "TXT_SRC_PATH"
-        optional: False
-      db_data:
-        description: "Information in a database"
-        type: "db_table"
-        env_key: "DATA_TABLE_NAME"
-        table_name: "nlp_information"
-        optional: True
-    outputs:
-      topic_model:
-        type: "file"
-        description: "Topic model file"
-        env_key: "OUTPUT_PATH_TOPIC_MODEL"
-        file_path: "outputs/output.pkl"
-      run_durations:
-        description: "A table which contains the run durations per day."
-        type: "db_table"
-        env_key: "RUN_DURATIONS_TABLE_NAME"
-        table_name: "run_durations_nlp"
-
-  analyze_runtime:
-    description: "Analyze the runtimes"
-    inputs:
-      run_durations:
-        description: "A table which contains the run durations per day."
-        type: "db_table"
-        env_key: "RUN_DURATIONS_TABLE_NAME"
-        table_name: "run_durations_nlp"
-        optional: True
-    outputs:
-      csv_output:
-        type: "file"
-        description: "A csv containing statistical information"
-        env_key: "CSV_OUTPUT_PATH"
-        file_path: "outputs/statistics.csv"
-
diff --git a/tests/test_config_files/optional_env_key_not_set.yaml b/tests/test_config_files/optional_env_key_not_set.yaml
deleted file mode 100644
index b6e2040..0000000
--- a/tests/test_config_files/optional_env_key_not_set.yaml
+++ /dev/null
@@ -1,49 +0,0 @@
-name: "NLP toolbox"
-description: "Contains NLP algorithms..."
-author: "John Doe"
-docker_image: "https://ghcr.io/nlp-toolbox"
-
-entrypoints:
-  topic_modelling:
-    description: "Run topic modelling"
-    inputs:
-      language:
-        description: "The language to use"
-        type: "env"
-        optional: False
-      text_data:
-        description: "Text file. Can be uploaded by the user."
-        type: "file"
-        env_key: "TXT_SRC_PATH"
-        optional: False
-      db_data:
-        description: "Information in a database"
-        type: "db_table"
-        env_key: "DATA_TABLE_NAME"
-        optional: True
-    outputs:
-      topic_model:
-        type: "file"
-        description: "Topic model file"
-        env_key: "OUTPUT_PATH_TOPIC_MODEL"
-        file_path: "test/test.pkj"
-      run_durations:
-        type: "db_table"
-        env_key: "RUN_DURATIONS_TABLE_NAME"
-        table_name: "run_durations_nlp"
-
-  analyze_runtime:
-    description: "Analyze the runtimes"
-    inputs:
-      run_durations:
-        type: "db_table"
-        env_key: "RUN_DURATIONS_TABLE_NAME"
-        table_name: "run_durations_nlp"
-        optional: True
-    outputs:
-      csv_output:
-        type: "file"
-        description: "A csv containing statistical information"
-        env_key: "CSV_OUTPUT_PATH"
-        file_path: "outputs/statistics.csv"
-
diff --git a/tests/test_config_files/valid_config.yaml b/tests/test_config_files/valid_config.yaml
index 5a15dd0..6d05169 100644
--- a/tests/test_config_files/valid_config.yaml
+++ b/tests/test_config_files/valid_config.yaml
@@ -6,47 +6,42 @@ docker_image: "https://ghcr.io/nlp-toolbox"
 entrypoints:
   topic_modelling:
     description: "Run topic modelling"
+    envs:
+      LANGUAGE: "de"
     inputs:
-      language:
-        description: "The language to use"
-        type: "env"
-        env_key: "LANG"
-        optional: True
-        default_value: "de" 
       text_data:
         description: "Text file. Can be uploaded by the user."
         type: "file"
-        env_key: "TXT_SRC_PATH"
-        optional: False
+        config:
+          TXT_SRC_PATH: null
       db_data:
         description: "Information in a database"
         type: "db_table"
-        env_key: "DATA_TABLE_NAME"
-        table_name: "nlp_information"
-        optional: True
+        config:
+          DATA_TABLE_NAME: "nlp_information"
     outputs:
       topic_model:
         type: "file"
         description: "Topic model file"
-        env_key: "OUTPUT_PATH_TOPIC_MODEL"
-        file_path: "outputs/output.pkl"
+        config:
+          OUTPUT_PATH_TOPIC_MODEL: null
       run_durations:
         type: "db_table"
         description: "Table that contains the run durations per day."
-        env_key: "RUN_DURATIONS_TABLE_NAME"
-        table_name: "run_durations_nlp"
+        config:
+          RUN_DURATIONS_TABLE_NAME: "run_durations_nlp"
 
   analyze_runtime:
     description: "Analyze the runtimes"
     inputs:
       run_durations:
+        description: "Teble that contains all runtimes and dates"
         type: "db_table"
-        env_key: "RUN_DURATIONS_TABLE_NAME"
-        table_name: "run_durations_nlp"
-        optional: True
+        config:
+          RUN_DURATIONS_TABLE_NAME: "run_durations_nlp"
     outputs:
       csv_output:
         type: "file"
         description: "A csv containing statistical information"
-        env_key: "CSV_OUTPUT_PATH"
-        file_path: "outputs/statistics.csv"
+        config:
+          CSV_OUTPUT_PATH: "outputs/statistics.csv"

From 8cc227000d4c061fda4f8e726a11d5b8bc487273 Mon Sep 17 00:00:00 2001
From: PaulKalho <kalhornpaul@gmail.com>
Date: Thu, 7 Nov 2024 15:32:19 +0100
Subject: [PATCH 11/22] docs: update readme

---
 README.md                                 | 37 +++++++++++------------
 scystream/sdk/config/models.py            | 15 ++++++---
 tests/test_config_files/valid_config.yaml |  2 ++
 3 files changed, 29 insertions(+), 25 deletions(-)

diff --git a/README.md b/README.md
index 70138bf..50052fa 100644
--- a/README.md
+++ b/README.md
@@ -54,50 +54,47 @@ docker_image: "https://ghcr.io/nlp-toolbox"
 entrypoints:
   topic_modelling:
     description: "Run topic modelling"
+    envs:
+      LANGUAGE: "de"
     inputs:
-      language:
-        description: "The language to use"
-        type: "env"
-        env_key: "LANG"
-        optional: True
-        default_value: "de" 
       text_data:
         description: "Text file. Can be uploaded by the user."
         type: "file"
-        env_key: "TXT_SRC_PATH"
-        optional: False
+        config:
+          TXT_SRC_PATH: null
       db_data:
         description: "Information in a database"
         type: "db_table"
-        env_key: "DATA_TABLE_NAME"
-        table_name: "nlp_information"
-        optional: True
+        config:
+          DATA_TABLE_NAME: "nlp_information"
+          DB_HOST: "time.rwth-aachen.de"
+          DB_PORT: 1234
     outputs:
       topic_model:
         type: "file"
         description: "Topic model file"
-        env_key: "OUTPUT_PATH_TOPIC_MODEL"
-        file_path: "outputs/output.pkl"
+        config:
+          OUTPUT_PATH_TOPIC_MODEL: null
       run_durations:
         type: "db_table"
         description: "Table that contains the run durations per day."
-        env_key: "RUN_DURATIONS_TABLE_NAME"
-        table_name: "run_durations_nlp"
+        config:
+          RUN_DURATIONS_TABLE_NAME: "run_durations_nlp"
 
   analyze_runtime:
     description: "Analyze the runtimes"
     inputs:
       run_durations:
+        description: "Teble that contains all runtimes and dates"
         type: "db_table"
-        env_key: "RUN_DURATIONS_TABLE_NAME"
-        table_name: "run_durations_nlp"
-        optional: True
+        config:
+          RUN_DURATIONS_TABLE_NAME: "run_durations_nlp"
     outputs:
       csv_output:
         type: "file"
         description: "A csv containing statistical information"
-        env_key: "CSV_OUTPUT_PATH"
-        file_path: "outputs/statistics.csv"
+        config:
+          CSV_OUTPUT_PATH: "outputs/statistics.csv"
 ```
 
 To read and validate such a config file u can proceed as follows:
diff --git a/scystream/sdk/config/models.py b/scystream/sdk/config/models.py
index f57b07f..18cdc9a 100644
--- a/scystream/sdk/config/models.py
+++ b/scystream/sdk/config/models.py
@@ -1,5 +1,6 @@
-from typing import Optional, Dict, Literal
-from pydantic import BaseModel, StrictStr, field_validator, Field
+from typing import Optional, Dict, Literal, Union
+from pydantic import BaseModel, StrictStr, field_validator, Field, \
+    StrictInt, StrictFloat
 
 FILE_TYPE_IDENTIFIER = "file"
 DB_TABLE_TYPE_IDENTIFIER = "db_table"
@@ -12,9 +13,13 @@
 class InputOutputModel(BaseModel):
     type: Literal[FILE_TYPE_IDENTIFIER, DB_TABLE_TYPE_IDENTIFIER]
     description: Optional[StrictStr] = None
-    config: Optional[Dict[StrictStr, Optional[StrictStr]]] = Field(
-        default=None,
-        description="The configuration for the input values\
+    config: Optional[
+        Dict[
+            StrictStr,
+            Optional[Union[StrictStr, StrictInt, StrictFloat]]
+        ]] = Field(
+            default=None,
+            description="The configuration for the input values\
                 (file_path, table_name, etc.)"
     )
 
diff --git a/tests/test_config_files/valid_config.yaml b/tests/test_config_files/valid_config.yaml
index 6d05169..e479e71 100644
--- a/tests/test_config_files/valid_config.yaml
+++ b/tests/test_config_files/valid_config.yaml
@@ -19,6 +19,8 @@ entrypoints:
         type: "db_table"
         config:
           DATA_TABLE_NAME: "nlp_information"
+          DB_HOST: "time.rwth-aachen.de"
+          DB_PORT: 1234
     outputs:
       topic_model:
         type: "file"

From 61738de87e69b7f2a032d5daca5cbd8712d2c5fc Mon Sep 17 00:00:00 2001
From: PaulKalho <kalhornpaul@gmail.com>
Date: Thu, 7 Nov 2024 16:40:04 +0100
Subject: [PATCH 12/22] style: add better comments

---
 scystream/sdk/config/models.py | 42 +++++++++++++++++++++++++++++++++-
 1 file changed, 41 insertions(+), 1 deletion(-)

diff --git a/scystream/sdk/config/models.py b/scystream/sdk/config/models.py
index 18cdc9a..22859c9 100644
--- a/scystream/sdk/config/models.py
+++ b/scystream/sdk/config/models.py
@@ -11,6 +11,18 @@
 
 
 class InputOutputModel(BaseModel):
+    """
+    Represents configuration for inputs or outputs in a ComputeBlock.
+
+    The configuration is defined as a dictionary with key-value pairs, where:
+    - The key is the name of an environment variable (e.., `FILE_PATH`,
+                                                      `TABLE_NAME`).
+    - The value is the default value for that environment variable, which can
+    be a string, integer, or float.
+
+    If a value is explicitly set to `null`, validation will fail unless the
+    ENV-Variable is manually set by the ComputeBlock user.
+    """
     type: Literal[FILE_TYPE_IDENTIFIER, DB_TABLE_TYPE_IDENTIFIER]
     description: Optional[StrictStr] = None
     config: Optional[
@@ -25,13 +37,41 @@ class InputOutputModel(BaseModel):
 
 
 class Entrypoint(BaseModel):
+    """
+    Represents an entrypoint within a ComputeBlock.
+
+    An entrypoint includes:
+    - A description of the entrypoint's purpose.
+    - A dictionary of environment variables (`envs`), where each key-value
+    pair represents an environment variable and its default value.
+        - These variables should be shared variables across the entrypoint
+    - Input and output configurations, each described by the
+    `InputOutputModel`.
+
+    If an environment variable’s value is set to `None` in the configuration,
+    the ComputeBlock user must provide that variable during runtime, or else
+    the process will fail.
+    """
     description: StrictStr
-    envs: Optional[Dict[StrictStr, StrictStr]] = None
+    envs: Optional[Dict[StrictStr, StrictStr]
+                   ] = None  # Todo can be set to Null
     inputs: Dict[StrictStr, InputOutputModel]
     outputs: Dict[StrictStr, InputOutputModel]
 
 
 class ComputeBlock(BaseModel):
+    """
+    Represents a ComputeBlock configuration, which describes the compute
+    process, including entrypoints, inputs, and outputs.
+
+    A ComputeBlock is defined by:
+    - A name, description, and author.
+    - One or more entrypoints that specify how data is passed into and out of
+    the compute process.
+    - Optionally, a Docker image to specify the execution environment.
+
+    At least one entrypoint must be defined for the ComputeBlock to be valid.
+    """
     name: StrictStr
     description: StrictStr
     author: StrictStr

From c5434011074d0caea5550513117bcc0dad12e705 Mon Sep 17 00:00:00 2001
From: PaulKalho <kalhornpaul@gmail.com>
Date: Sat, 9 Nov 2024 23:41:43 +0100
Subject: [PATCH 13/22] feat: add more datatypes for env keys

---
 scystream/sdk/config/models.py            | 12 ++++++++----
 tests/test_config_files/valid_config.yaml |  2 ++
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/scystream/sdk/config/models.py b/scystream/sdk/config/models.py
index 22859c9..cca2008 100644
--- a/scystream/sdk/config/models.py
+++ b/scystream/sdk/config/models.py
@@ -1,4 +1,4 @@
-from typing import Optional, Dict, Literal, Union
+from typing import Optional, Dict, Literal, Union, List
 from pydantic import BaseModel, StrictStr, field_validator, Field, \
     StrictInt, StrictFloat
 
@@ -28,7 +28,7 @@ class InputOutputModel(BaseModel):
     config: Optional[
         Dict[
             StrictStr,
-            Optional[Union[StrictStr, StrictInt, StrictFloat]]
+            Optional[Union[StrictStr, StrictInt, StrictFloat, List, bool]]
         ]] = Field(
             default=None,
             description="The configuration for the input values\
@@ -53,8 +53,12 @@ class Entrypoint(BaseModel):
     the process will fail.
     """
     description: StrictStr
-    envs: Optional[Dict[StrictStr, StrictStr]
-                   ] = None  # Todo can be set to Null
+    envs: Optional[
+        Dict[
+            StrictStr,
+            Optional[Union[StrictStr, StrictInt, StrictFloat, List, bool]]
+        ]
+    ] = None
     inputs: Dict[StrictStr, InputOutputModel]
     outputs: Dict[StrictStr, InputOutputModel]
 
diff --git a/tests/test_config_files/valid_config.yaml b/tests/test_config_files/valid_config.yaml
index e479e71..ed443a1 100644
--- a/tests/test_config_files/valid_config.yaml
+++ b/tests/test_config_files/valid_config.yaml
@@ -21,6 +21,8 @@ entrypoints:
           DATA_TABLE_NAME: "nlp_information"
           DB_HOST: "time.rwth-aachen.de"
           DB_PORT: 1234
+          TXT_SRC_PATH: ["test.txt", "hi.txt"] # for testing purposes
+          IS_INDEXED: True
     outputs:
       topic_model:
         type: "file"

From 771b5b1e58d2d7ca207d453b5a32274ac296301f Mon Sep 17 00:00:00 2001
From: PaulKalho <kalhornpaul@gmail.com>
Date: Sun, 10 Nov 2024 19:23:46 +0100
Subject: [PATCH 14/22] feat: add basic validation

---
 README.md                                 | 123 ++++++++++++++++------
 scystream/sdk/core.py                     |  33 ++++--
 scystream/sdk/env/settings.py             |  30 ++++++
 setup.py                                  |   3 +-
 tests/test_config_files/valid_config.yaml |   2 +-
 tests/test_core.py                        |   2 +-
 tests/test_settings.py                    |  50 +++++++++
 7 files changed, 202 insertions(+), 41 deletions(-)
 create mode 100644 scystream/sdk/env/settings.py
 create mode 100644 tests/test_settings.py

diff --git a/README.md b/README.md
index 50052fa..a5a87dc 100644
--- a/README.md
+++ b/README.md
@@ -8,40 +8,27 @@ You can install the package via pip once it's published:
 pip install scystream-sdk
 ```
 
-## Usage
-
-```python3
-from scystream.sdk.core import entrypoint
-from scystream.sdk.scheduler import Scheduler
-
-
-@entrypoint
-def example_task():
-    print("Executing example_task...")
+### Compute Blocks and their configs
+One of the central concepts of scystream are the so-called **Compute Blocks**.
 
+A Compute Block describes an independent programm, that acts as some kind of worker
+which will be scheduled using the scystream-core application.
+This worker executes a task (e.g. a NLP task, a crwaling task).
 
-@entrypoint
-def another_task(task_name):
-    print(f"Executing another_task with task name: {task_name}")
+Each worker can have multiple entrypoints, each aiming to solve one task.
+These entrypoints can be configured from the outside using the **Settings**.
+These are basically ENV-Variables, which will be parsed & validated using pydantic.
 
+This SDK aims to implement helper functions and other requirements we expect each
+Compute Block to have.
 
-def main():
-    Scheduler.list_entrypoints()
-    Scheduler.execute_function("example_task")
-    Scheduler.execute_function("another_task", "ScheduledTask")
+To understand the concept of such a Compute Block even more, take a look at the
+config below.
 
-
-if __name__ == "__main__":
-    main()
-
-```
-
-### Compute Block Config Files
 We expect every repository which will be used within the scystream application
-to contain a `Compute Block Config File`, the `cbc.yaml`, within the root directory.
-
-This yaml-file describes the compute block itself.
-It shows the entrypoints, their inputs and outputs.
+to contain a **Compute Block Config File**, the `cbc.yaml`, within the root directory.
+This `cbc.yaml` will be used to define the entrypoints, the inputs & outputs each
+Compute Block offers, necessary for the scystream-frontend to understand.
 
 This is an example `cbc.yaml`:
 
@@ -85,7 +72,7 @@ entrypoints:
     description: "Analyze the runtimes"
     inputs:
       run_durations:
-        description: "Teble that contains all runtimes and dates"
+        description: "Table that contains all runtimes and dates"
         type: "db_table"
         config:
           RUN_DURATIONS_TABLE_NAME: "run_durations_nlp"
@@ -97,7 +84,10 @@ entrypoints:
           CSV_OUTPUT_PATH: "outputs/statistics.csv"
 ```
 
-To read and validate such a config file u can proceed as follows:
+For now, you have to write this config file on your own. However, at some
+point you will be able to generate this config from your code.
+
+To read and validate such a config file you can proceed as follows:
 
 ```python3
 from scystream.sdk.config.config_loader import load_config 
@@ -121,15 +111,86 @@ load_config(config_file_name="test.yaml", config_path="configs/")
 
 the `config_path` is the path relative to your root directory
 
+## Basic Usage of the SDK
+
+```python3
+from scystream.sdk.core import entrypoint
+from scystream.sdk.scheduler import Scheduler
+
+
+@entrypoint
+def example_task():
+    print("Executing example_task...")
+
+
+@entrypoint
+def another_task(task_name):
+    print(f"Executing another_task with task name: {task_name}")
+
+
+def main():
+    Scheduler.list_entrypoints()
+    Scheduler.execute_function("example_task")
+    Scheduler.execute_function("another_task", "ScheduledTask")
+
+
+if __name__ == "__main__":
+    main()
+
+```
+
+## Defining Settings and Using them.
+
+Earlier, we already wrote about **Settings**.
+Each Input & Output can be configured using these settings.
+There are also Global Settings, refered to as `envs` in the `cbc.yaml`
+
+Below you can find a simple example of how we define & validate these settings.
+Therefore you should use the `BaseENVSettings` class.
+
+```python3
+from scystream.sdk.core import entrypoint
+from scystream.sdk.env.settings import BaseENVSettings
+
+class GlobalSettings(BaseENVSettings):
+    LANGUAGE: str = "de"
+
+class TopicModellingEntrypointSettings(BaseENVSettings):
+    TXT_SRC_PATH: str # if no default provided, setting this ENV manually is a MUST
+
+@entrypoint(TopicModellingEntrypointSettings) # Pass it to the Entrypoint
+def topic_modelling(settings):
+    print(f"Running topic modelling, using file: {settings.TXT_SRC_PATH}")
+
+@entrypoint
+def test_entrypint():
+    print("This entrypoint does not have any configs.")
+```
+
+We recommend defining your `GlobalSettings` in an extra file and "exporting" the loaded
+Settings to make them accessible to other files.
+See an example below:
+
+```python3
+from scystream.sdk.env.settings import BaseENVSettings
+
+class GlobalSettings(BaseENVSettings):
+    LANGUAGE: str = "de"
+
+GLOBAL_SETTINGS = GlobalSettings.load_settings()
+```
+
+You can then use the loaded `GLOBAL_SETTINGS` in your other files, by importing them.
 
 ## Development of the SDK
 
 ### Installation
 
-1. Create a venv
+1. Create a venv and use it
 
 ```bash
 python3 -m venv .venv
+source .venv/bin/activate
 ```
 
 2. Install the package within the venv 
diff --git a/scystream/sdk/core.py b/scystream/sdk/core.py
index 3965d1c..07ee5b1 100644
--- a/scystream/sdk/core.py
+++ b/scystream/sdk/core.py
@@ -1,15 +1,34 @@
 import functools
 
+from typing import Callable, Type, Optional
+from .env.settings import BaseENVSettings
+from pydantic import ValidationError
+
 _registered_functions = {}
 
 
-def entrypoint(func):
-    """Decorator to mark a function as an entrypoint."""
-    @functools.wraps(func)
-    def wrapper(*args, **kwargs):
-        return func(*args, **kwargs)
-    _registered_functions[func.__name__] = func
-    return wrapper
+def entrypoint(settings_class: Optional[Type[BaseENVSettings]] = None):
+    """
+    Decorator to mark a function as an entrypoint.
+    It also loads and injects the settings of the entrypoint.
+    """
+    def decorator(func: Callable):
+        @functools.wraps(func)
+        def wrapper(*args, **kwargs):
+            if settings_class is not None:
+                # Load settings
+                try:
+                    settings = settings_class.load_settings()
+                except ValidationError as e:
+                    raise ValueError(f"Invalid environment configuration: {e}")
+
+                return func(settings, *args, **kwargs)
+            else:
+                return func(*args, **kwargs)
+
+        _registered_functions[func.__name__] = wrapper
+        return wrapper
+    return decorator
 
 
 def get_registered_functions():
diff --git a/scystream/sdk/env/settings.py b/scystream/sdk/env/settings.py
new file mode 100644
index 0000000..9eab887
--- /dev/null
+++ b/scystream/sdk/env/settings.py
@@ -0,0 +1,30 @@
+from pydantic_settings import BaseSettings, SettingsConfigDict
+from typing import Type
+
+ENV_FILE_ENCODING = "utf-8"
+
+
+class BaseENVSettings(BaseSettings):
+    """
+    This class acts as the BaseClass which can be used to define custom
+    ENV-Variables which can be used across the ComputeBlock & for entrypoints
+    This definition, and pydantic, will then take care of validating the envs
+    """
+
+    model_config = SettingsConfigDict(
+        env_file_encoding=ENV_FILE_ENCODING,
+        case_sensitive=True,
+        extra="ignore"
+    )
+
+    @classmethod
+    def load_settings(
+        cls: Type["BaseENVSettings"],
+        env_file: str = ".env"
+    ) -> "BaseENVSettings":
+        """
+        load_settings loads the env file. The name of the env_file can be
+        passed as an argument.
+        Returns the parsed ENVs
+        """
+        return cls(_env_file=env_file, _env_file_encoding=ENV_FILE_ENCODING)
diff --git a/setup.py b/setup.py
index 1a176d3..411a077 100644
--- a/setup.py
+++ b/setup.py
@@ -13,7 +13,8 @@
     packages=find_packages(),
     install_requires=[
         "pydantic>=2.9.2",
-        "PyYAML>=6.0.2"
+        "PyYAML>=6.0.2",
+        "pydantic-settings>=2.6.1"
     ],
     classifiers=[
         "Programming Language :: Python :: 3",
diff --git a/tests/test_config_files/valid_config.yaml b/tests/test_config_files/valid_config.yaml
index ed443a1..73d0c3c 100644
--- a/tests/test_config_files/valid_config.yaml
+++ b/tests/test_config_files/valid_config.yaml
@@ -39,7 +39,7 @@ entrypoints:
     description: "Analyze the runtimes"
     inputs:
       run_durations:
-        description: "Teble that contains all runtimes and dates"
+        description: "Table that contains all runtimes and dates"
         type: "db_table"
         config:
           RUN_DURATIONS_TABLE_NAME: "run_durations_nlp"
diff --git a/tests/test_core.py b/tests/test_core.py
index 775ae75..f9a19ec 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -4,7 +4,7 @@
 
 class TestEntrypoint(unittest.TestCase):
     def test_entrypoint_registration(self):
-        @entrypoint
+        @entrypoint()
         def dummy_function():
             return "Hello"
 
diff --git a/tests/test_settings.py b/tests/test_settings.py
new file mode 100644
index 0000000..755f6d9
--- /dev/null
+++ b/tests/test_settings.py
@@ -0,0 +1,50 @@
+import unittest
+import os
+from scystream.sdk.core import entrypoint
+from scystream.sdk.env.settings import BaseENVSettings
+
+
+class WithDefaultSettings(BaseENVSettings):
+    DUMMY_SETTING: str = "this is a dummy setting"
+
+
+class NoDefaultSetting(BaseENVSettings):
+    DUMMY_SETTING: str
+
+
+class TestSettings(unittest.TestCase):
+    def test_entrypoint_with_setting_default(self):
+        @entrypoint(WithDefaultSettings)
+        def with_default_settings(settings):
+            return settings.DUMMY_SETTING
+
+        result = with_default_settings()
+        self.assertEqual(result, "this is a dummy setting")
+
+        """
+        environment is set
+        """
+        os.environ["DUMMY_SETTING"] = "overridden setting"
+        result = with_default_settings()
+        self.assertEqual(result, "overridden setting")
+        del os.environ["DUMMY_SETTING"]
+
+    def test_entrypoint_with_no_setting_default(self):
+        @entrypoint(NoDefaultSetting)
+        def with_no_default_settings(settings):
+            return settings.DUMMY_SETTING
+
+        with self.assertRaises(ValueError):
+            with_no_default_settings()
+
+        """
+        environemnt is set
+        """
+        os.environ["DUMMY_SETTING"] = "required setting"
+        result = with_no_default_settings()
+        self.assertEqual(result, "required setting")
+        del os.environ["DUMMY_SETTING"]
+
+
+if __name__ == "__main__":
+    unittest.main()

From dde0e500b661aa16c4a6abade78c056639ae72cf Mon Sep 17 00:00:00 2001
From: PaulKalho <kalhornpaul@gmail.com>
Date: Mon, 11 Nov 2024 03:27:19 +0100
Subject: [PATCH 15/22] feat: add validation

---
 scystream/sdk/core.py         |   2 +-
 scystream/sdk/env/settings.py |  66 +++++++++++++----
 tests/test_settings.py        | 132 ++++++++++++++++++++++++++++------
 3 files changed, 163 insertions(+), 37 deletions(-)

diff --git a/scystream/sdk/core.py b/scystream/sdk/core.py
index 07ee5b1..bd1414f 100644
--- a/scystream/sdk/core.py
+++ b/scystream/sdk/core.py
@@ -18,7 +18,7 @@ def wrapper(*args, **kwargs):
             if settings_class is not None:
                 # Load settings
                 try:
-                    settings = settings_class.load_settings()
+                    settings = settings_class.from_env()
                 except ValidationError as e:
                     raise ValueError(f"Invalid environment configuration: {e}")
 
diff --git a/scystream/sdk/env/settings.py b/scystream/sdk/env/settings.py
index 9eab887..d1aba76 100644
--- a/scystream/sdk/env/settings.py
+++ b/scystream/sdk/env/settings.py
@@ -1,16 +1,18 @@
+from pathlib import Path
 from pydantic_settings import BaseSettings, SettingsConfigDict
-from typing import Type
+from typing import Union, List, get_type_hints
+from pydantic import Field
 
 ENV_FILE_ENCODING = "utf-8"
 
 
 class BaseENVSettings(BaseSettings):
     """
-    This class acts as the BaseClass which can be used to define custom
-    ENV-Variables which can be used across the ComputeBlock & for entrypoints
-    This definition, and pydantic, will then take care of validating the envs
-    """
+    Allow kwargs to propagate to any fields whose default factory extends
+    BaseSettings,
 
+    This is mostly to allow _env_file to be passed through.
+    """
     model_config = SettingsConfigDict(
         env_file_encoding=ENV_FILE_ENCODING,
         case_sensitive=True,
@@ -18,13 +20,51 @@ class BaseENVSettings(BaseSettings):
     )
 
     @classmethod
-    def load_settings(
-        cls: Type["BaseENVSettings"],
-        env_file: str = ".env"
-    ) -> "BaseENVSettings":
+    def from_env(
+        cls,
+        env_file: Union[str, Path, List[Union[str, Path]]] = None,
+        *args,
+        **kwargs
+    ):
+        return cls(propagate_kwargs={"_env_file": env_file}, *args, **kwargs)
+
+    @classmethod
+    def _basesettings_fields(cls):
         """
-        load_settings loads the env file. The name of the env_file can be
-        passed as an argument.
-        Returns the parsed ENVs
+        :return a dict of field_name: default_factory for any fields that
+        extend BaseSettings
         """
-        return cls(_env_file=env_file, _env_file_encoding=ENV_FILE_ENCODING)
+        type_hints = get_type_hints(cls)
+        return {
+            name: typ for name, typ in type_hints.items()
+            if isinstance(typ, type) and issubclass(typ, BaseSettings)
+
+        }
+
+    @classmethod
+    def _propagate_kwargs(cls, kwargs):
+        """
+        Any settings that extend BaseSettings be passed the kwargs.
+        """
+        sub_settings = cls._basesettings_fields()
+        for name, field_type in sub_settings.items():
+            kwargs[name] = field_type(**kwargs)
+        return kwargs
+
+    def __init_subclass__(cls, **kwargs):
+        """
+        Automatically set up nested settings fields with default_factory.
+        """
+        super().__init_subclass__(**kwargs)
+        type_hints = get_type_hints(cls)
+        for field_name, field_type in type_hints.items():
+            if isinstance(field_type, type) and issubclass(
+                    field_type, BaseSettings):
+                # Set a default factory for nested BaseSettings fields
+                default_field = Field(default_factory=field_type)
+                setattr(cls, field_name, default_field)
+
+    def __init__(self, propagate_kwargs=None, *args, **kwargs):
+        if propagate_kwargs:
+            kwargs = self._propagate_kwargs(propagate_kwargs)
+        super().__init__(*args, **kwargs)
diff --git a/tests/test_settings.py b/tests/test_settings.py
index 755f6d9..4aceebd 100644
--- a/tests/test_settings.py
+++ b/tests/test_settings.py
@@ -2,48 +2,134 @@
 import os
 from scystream.sdk.core import entrypoint
 from scystream.sdk.env.settings import BaseENVSettings
+from scystream.sdk.scheduler import Scheduler
+
+
+class DummyInputSettings(BaseENVSettings):
+    DUMMY_INPUT: str = "test"
 
 
 class WithDefaultSettings(BaseENVSettings):
-    DUMMY_SETTING: str = "this is a dummy setting"
+    DUMMY_GLOBAL: str = "dummy global var"
+
+    dummy_input_settings: DummyInputSettings
+
+
+class DummyInputSettingsNoDef(BaseENVSettings):
+    DUMMY_INPUT: str
+
+
+class WithoutDefaultSettings(BaseENVSettings):
+    DUMMY_GLOBAL: str
+
+    dummy_input_settings_no_def: DummyInputSettingsNoDef
+
+
+class WithoutDefaultNoNesting(BaseENVSettings):
+    TEST: str = "teststr"
+    MUST_SET: str
+
+
+class SubOne(BaseENVSettings):
+    ONE: str
+    TWO: str
+
+
+class SubTwo(BaseENVSettings):
+    TEST: str
+    NO_DEF: str
 
 
-class NoDefaultSetting(BaseENVSettings):
-    DUMMY_SETTING: str
+class TwoSubclasses(BaseENVSettings):
+    GLOBAL: str
+
+    input_one: SubOne
+    input_two: SubTwo
 
 
 class TestSettings(unittest.TestCase):
     def test_entrypoint_with_setting_default(self):
         @entrypoint(WithDefaultSettings)
         def with_default_settings(settings):
-            return settings.DUMMY_SETTING
+            return settings.dummy_input_settings.DUMMY_INPUT
 
         result = with_default_settings()
-        self.assertEqual(result, "this is a dummy setting")
+        self.assertEqual(result, "test")
 
-        """
-        environment is set
-        """
-        os.environ["DUMMY_SETTING"] = "overridden setting"
+        # set environ
+        os.environ["DUMMY_INPUT"] = "overridden setting"
         result = with_default_settings()
+        # check if overriding works
         self.assertEqual(result, "overridden setting")
-        del os.environ["DUMMY_SETTING"]
 
-    def test_entrypoint_with_no_setting_default(self):
-        @entrypoint(NoDefaultSetting)
-        def with_no_default_settings(settings):
-            return settings.DUMMY_SETTING
+        del os.environ["DUMMY_INPUT"]
+
+    def test_entrypoint_no_setting_default_one(self):
+        @entrypoint(WithoutDefaultSettings)
+        def without_def_settings(settings):
+            print("test...")
 
+        # do we fail if environments not set
         with self.assertRaises(ValueError):
-            with_no_default_settings()
-
-        """
-        environemnt is set
-        """
-        os.environ["DUMMY_SETTING"] = "required setting"
-        result = with_no_default_settings()
-        self.assertEqual(result, "required setting")
-        del os.environ["DUMMY_SETTING"]
+            Scheduler.execute_function("without_def_settings")
+
+    def test_entrypoint_no_setting_default_two(self):
+        @entrypoint(WithoutDefaultSettings)
+        def without_def_settings(settings):
+            return (
+                settings.DUMMY_GLOBAL,
+                settings.dummy_input_settings_no_def.DUMMY_INPUT
+            )
+
+        # set environments
+        os.environ["DUMMY_GLOBAL"] = "dummy global"
+        os.environ["DUMMY_INPUT"] = "dummy input"
+
+        # check if environments have been set
+        result = without_def_settings()
+        self.assertEqual(result[0], "dummy global")
+        self.assertEqual(result[1], "dummy input")
+
+        del os.environ["DUMMY_GLOBAL"]
+        del os.environ["DUMMY_INPUT"]
+
+    def test_entrypoint_no_setting_defautl_three(self):
+        @entrypoint(WithoutDefaultNoNesting)
+        def no_nesting(settings):
+            print("testing...")
+
+        with self.assertRaises(ValueError):
+            Scheduler.execute_function("no_nesting")
+
+    def test_two_subs(self):
+        @entrypoint(TwoSubclasses)
+        def two_subs(settings):
+            return (
+                settings.GLOBAL,
+                settings.input_one.ONE,
+                settings.input_one.TWO,
+                settings.input_two.TEST,
+                settings.input_two.NO_DEF
+            )
+
+        os.environ["GLOBAL"] = "global"
+        os.environ["ONE"] = "one"
+        os.environ["TWO"] = "two"
+        os.environ["TEST"] = "test"
+        os.environ["NO_DEF"] = "no_def"
+
+        result = two_subs()
+        self.assertEqual(result[0], "global")
+        self.assertEqual(result[1], "one")
+        self.assertEqual(result[2], "two")
+        self.assertEqual(result[3], "test")
+        self.assertEqual(result[4], "no_def")
+
+        del os.environ["GLOBAL"]
+        del os.environ["ONE"]
+        del os.environ["TWO"]
+        del os.environ["TEST"]
+        del os.environ["NO_DEF"]
 
 
 if __name__ == "__main__":

From 1e797700232b8a3eb0b6a4be7b6767fca4b82be7 Mon Sep 17 00:00:00 2001
From: PaulKalho <kalhornpaul@gmail.com>
Date: Mon, 11 Nov 2024 03:38:26 +0100
Subject: [PATCH 16/22] docs: update readme

---
 README.md | 31 ++++++++++++-------------------
 1 file changed, 12 insertions(+), 19 deletions(-)

diff --git a/README.md b/README.md
index a5a87dc..c8f39d1 100644
--- a/README.md
+++ b/README.md
@@ -152,36 +152,29 @@ Therefore you should use the `BaseENVSettings` class.
 from scystream.sdk.core import entrypoint
 from scystream.sdk.env.settings import BaseENVSettings
 
-class GlobalSettings(BaseENVSettings):
-    LANGUAGE: str = "de"
+class TextDataInputSettings(BaseENVSettings):
+    TXT_SRC_PATH: str # no default provided, manual setting is a MUST
+
+class DBDataInputSettings(BaseENVSettings):
+    DATA_TABLE_NAME: str = "nlp_information"
+    DB_HOST: str = "time.rwth-aachen.de"
+    DB_PORT: str = 1234
 
 class TopicModellingEntrypointSettings(BaseENVSettings):
-    TXT_SRC_PATH: str # if no default provided, setting this ENV manually is a MUST
+    LANGUAGE: str = "de"
+    
+    text_data: TextDataInputSettings
+    db_data:  DBDataInputSettings
 
 @entrypoint(TopicModellingEntrypointSettings) # Pass it to the Entrypoint
 def topic_modelling(settings):
-    print(f"Running topic modelling, using file: {settings.TXT_SRC_PATH}")
+    print(f"Running topic modelling, using file: {settings.text_data.TXT_SRC_PATH}")
 
 @entrypoint
 def test_entrypint():
     print("This entrypoint does not have any configs.")
 ```
 
-We recommend defining your `GlobalSettings` in an extra file and "exporting" the loaded
-Settings to make them accessible to other files.
-See an example below:
-
-```python3
-from scystream.sdk.env.settings import BaseENVSettings
-
-class GlobalSettings(BaseENVSettings):
-    LANGUAGE: str = "de"
-
-GLOBAL_SETTINGS = GlobalSettings.load_settings()
-```
-
-You can then use the loaded `GLOBAL_SETTINGS` in your other files, by importing them.
-
 ## Development of the SDK
 
 ### Installation

From f3553ad80e7f04b2d9b7b587e725279ed4f6ce27 Mon Sep 17 00:00:00 2001
From: PaulKalho <kalhornpaul@gmail.com>
Date: Mon, 11 Nov 2024 14:50:37 +0100
Subject: [PATCH 17/22] style: rename base class

---
 README.md                     | 22 +++++++++++++---------
 scystream/sdk/core.py         |  6 +++---
 scystream/sdk/env/settings.py |  6 +++++-
 tests/test_settings.py        | 18 +++++++++---------
 4 files changed, 30 insertions(+), 22 deletions(-)

diff --git a/README.md b/README.md
index c8f39d1..65034fa 100644
--- a/README.md
+++ b/README.md
@@ -118,12 +118,12 @@ from scystream.sdk.core import entrypoint
 from scystream.sdk.scheduler import Scheduler
 
 
-@entrypoint
+@entrypoint()
 def example_task():
     print("Executing example_task...")
 
 
-@entrypoint
+@entrypoint()
 def another_task(task_name):
     print(f"Executing another_task with task name: {task_name}")
 
@@ -146,35 +146,39 @@ Each Input & Output can be configured using these settings.
 There are also Global Settings, refered to as `envs` in the `cbc.yaml`
 
 Below you can find a simple example of how we define & validate these settings.
-Therefore you should use the `BaseENVSettings` class.
+Therefore you should use the `EnvSettings` class.
 
 ```python3
 from scystream.sdk.core import entrypoint
-from scystream.sdk.env.settings import BaseENVSettings
+from scystream.sdk.env.settings import EnvSettings
 
-class TextDataInputSettings(BaseENVSettings):
+class TextDataInputSettings(EnvSettings):
     TXT_SRC_PATH: str # no default provided, manual setting is a MUST
 
-class DBDataInputSettings(BaseENVSettings):
+class DBDataInputSettings(EnvSettings):
     DATA_TABLE_NAME: str = "nlp_information"
     DB_HOST: str = "time.rwth-aachen.de"
     DB_PORT: str = 1234
 
-class TopicModellingEntrypointSettings(BaseENVSettings):
+class TopicModellingEntrypointSettings(EnvSettings):
     LANGUAGE: str = "de"
     
     text_data: TextDataInputSettings
     db_data:  DBDataInputSettings
 
 @entrypoint(TopicModellingEntrypointSettings) # Pass it to the Entrypoint
-def topic_modelling(settings):
+def topic_modelling(settings): # The settings param is automatically injected to your function, you can use it
     print(f"Running topic modelling, using file: {settings.text_data.TXT_SRC_PATH}")
 
-@entrypoint
+@entrypoint()
 def test_entrypint():
     print("This entrypoint does not have any configs.")
 ```
 
+Of course, you will also be able to use your settings in other files/directories.
+For that, just import your desired setting and use the `get_settings()` function.
+It will load the configurations correctly.
+
 ## Development of the SDK
 
 ### Installation
diff --git a/scystream/sdk/core.py b/scystream/sdk/core.py
index bd1414f..5dd8b12 100644
--- a/scystream/sdk/core.py
+++ b/scystream/sdk/core.py
@@ -1,13 +1,13 @@
 import functools
 
 from typing import Callable, Type, Optional
-from .env.settings import BaseENVSettings
+from .env.settings import EnvSettings
 from pydantic import ValidationError
 
 _registered_functions = {}
 
 
-def entrypoint(settings_class: Optional[Type[BaseENVSettings]] = None):
+def entrypoint(settings_class: Optional[Type[EnvSettings]] = None):
     """
     Decorator to mark a function as an entrypoint.
     It also loads and injects the settings of the entrypoint.
@@ -18,7 +18,7 @@ def wrapper(*args, **kwargs):
             if settings_class is not None:
                 # Load settings
                 try:
-                    settings = settings_class.from_env()
+                    settings = settings_class.get_settings()
                 except ValidationError as e:
                     raise ValueError(f"Invalid environment configuration: {e}")
 
diff --git a/scystream/sdk/env/settings.py b/scystream/sdk/env/settings.py
index d1aba76..b217b93 100644
--- a/scystream/sdk/env/settings.py
+++ b/scystream/sdk/env/settings.py
@@ -6,7 +6,7 @@
 ENV_FILE_ENCODING = "utf-8"
 
 
-class BaseENVSettings(BaseSettings):
+class EnvSettings(BaseSettings):
     """
     Allow kwargs to propagate to any fields whose default factory extends
     BaseSettings,
@@ -51,6 +51,10 @@ def _propagate_kwargs(cls, kwargs):
             kwargs[name] = field_type(**kwargs)
         return kwargs
 
+    @classmethod
+    def get_settings(cls):
+        return cls.from_env(env_file=".env")
+
     def __init_subclass__(cls, **kwargs):
         """
         Automatically set up nested settings fields with default_factory.
diff --git a/tests/test_settings.py b/tests/test_settings.py
index 4aceebd..b6144d2 100644
--- a/tests/test_settings.py
+++ b/tests/test_settings.py
@@ -1,46 +1,46 @@
 import unittest
 import os
 from scystream.sdk.core import entrypoint
-from scystream.sdk.env.settings import BaseENVSettings
+from scystream.sdk.env.settings import EnvSettings
 from scystream.sdk.scheduler import Scheduler
 
 
-class DummyInputSettings(BaseENVSettings):
+class DummyInputSettings(EnvSettings):
     DUMMY_INPUT: str = "test"
 
 
-class WithDefaultSettings(BaseENVSettings):
+class WithDefaultSettings(EnvSettings):
     DUMMY_GLOBAL: str = "dummy global var"
 
     dummy_input_settings: DummyInputSettings
 
 
-class DummyInputSettingsNoDef(BaseENVSettings):
+class DummyInputSettingsNoDef(EnvSettings):
     DUMMY_INPUT: str
 
 
-class WithoutDefaultSettings(BaseENVSettings):
+class WithoutDefaultSettings(EnvSettings):
     DUMMY_GLOBAL: str
 
     dummy_input_settings_no_def: DummyInputSettingsNoDef
 
 
-class WithoutDefaultNoNesting(BaseENVSettings):
+class WithoutDefaultNoNesting(EnvSettings):
     TEST: str = "teststr"
     MUST_SET: str
 
 
-class SubOne(BaseENVSettings):
+class SubOne(EnvSettings):
     ONE: str
     TWO: str
 
 
-class SubTwo(BaseENVSettings):
+class SubTwo(EnvSettings):
     TEST: str
     NO_DEF: str
 
 
-class TwoSubclasses(BaseENVSettings):
+class TwoSubclasses(EnvSettings):
     GLOBAL: str
 
     input_one: SubOne

From bf8936164c819a8bc1a47e5043ab35b2820fef40 Mon Sep 17 00:00:00 2001
From: PaulKalho <kalhornpaul@gmail.com>
Date: Mon, 11 Nov 2024 22:00:58 +0100
Subject: [PATCH 18/22] feat: add input output abstraction to settings

---
 scystream/sdk/config/config_loader.py | 23 ++++++++
 scystream/sdk/config/models.py        |  9 ++-
 scystream/sdk/core.py                 | 81 ++++++++++++++++++++++++++-
 scystream/sdk/env/settings.py         | 16 ++++++
 scystream/sdk/scheduler.py            |  2 +-
 5 files changed, 124 insertions(+), 7 deletions(-)

diff --git a/scystream/sdk/config/config_loader.py b/scystream/sdk/config/config_loader.py
index f68e565..13760e2 100644
--- a/scystream/sdk/config/config_loader.py
+++ b/scystream/sdk/config/config_loader.py
@@ -7,6 +7,18 @@
 CONFIG_FILE_DEFAULT_NAME = "cbc.yaml"
 
 
+def _remove_empty_dicts(data):
+    """
+    Remove keys with empty dictionaries from a nested structure.
+    """
+    if isinstance(data, dict):
+        return {k: _remove_empty_dicts(v) for k, v in data.items() if v != {}}
+    elif isinstance(data, list):
+        return [_remove_empty_dicts(i) for i in data]
+    else:
+        return data
+
+
 def load_config(
     config_file_name: str = CONFIG_FILE_DEFAULT_NAME,
     config_path: Union[str, Path] = None
@@ -18,11 +30,22 @@ def load_config(
     try:
         file = _find_and_load_config(config_file_name, config_path)
         block = ComputeBlock(**file)
+        # TODO: Check if envs && input/output configs correspond to the
+        # loaded one
         return block
     except ValidationError as e:
         raise ValueError(f"Configuration file validation error: {e}")
 
 
+def generate_yaml_from_compute_block(
+    compute_block: ComputeBlock,
+    output_path: Path
+):
+    cleaned_data = _remove_empty_dicts(compute_block.dict())
+    with output_path.open("w") as file:
+        yaml.dump(cleaned_data, file, default_flow_style=False)
+
+
 def _find_and_load_config(
         config_file_name: str,
         config_path: Union[str, Path] = None
diff --git a/scystream/sdk/config/models.py b/scystream/sdk/config/models.py
index cca2008..bcc67de 100644
--- a/scystream/sdk/config/models.py
+++ b/scystream/sdk/config/models.py
@@ -4,6 +4,8 @@
 
 FILE_TYPE_IDENTIFIER = "file"
 DB_TABLE_TYPE_IDENTIFIER = "db_table"
+# TODO: reevaluate the identifier
+TODO_TYPE_IDENTIFIER = "TODO: SetType"
 
 """
 This file contains the schema definition for the config file.
@@ -23,7 +25,8 @@ class InputOutputModel(BaseModel):
     If a value is explicitly set to `null`, validation will fail unless the
     ENV-Variable is manually set by the ComputeBlock user.
     """
-    type: Literal[FILE_TYPE_IDENTIFIER, DB_TABLE_TYPE_IDENTIFIER]
+    type: Literal[FILE_TYPE_IDENTIFIER,
+                  DB_TABLE_TYPE_IDENTIFIER, TODO_TYPE_IDENTIFIER]
     description: Optional[StrictStr] = None
     config: Optional[
         Dict[
@@ -59,8 +62,8 @@ class Entrypoint(BaseModel):
             Optional[Union[StrictStr, StrictInt, StrictFloat, List, bool]]
         ]
     ] = None
-    inputs: Dict[StrictStr, InputOutputModel]
-    outputs: Dict[StrictStr, InputOutputModel]
+    inputs: Optional[Dict[StrictStr, InputOutputModel]] = None
+    outputs: Optional[Dict[StrictStr, InputOutputModel]] = None
 
 
 class ComputeBlock(BaseModel):
diff --git a/scystream/sdk/core.py b/scystream/sdk/core.py
index 5dd8b12..58cf18d 100644
--- a/scystream/sdk/core.py
+++ b/scystream/sdk/core.py
@@ -1,8 +1,11 @@
 import functools
-
-from typing import Callable, Type, Optional
+from typing import Callable, Type, Optional, Union
 from .env.settings import EnvSettings
 from pydantic import ValidationError
+from scystream.sdk.config.models import ComputeBlock, Entrypoint, \
+    InputOutputModel
+from pydantic_core import PydanticUndefinedType
+from scystream.sdk.env.settings import InputSettings, OutputSettings
 
 _registered_functions = {}
 
@@ -18,6 +21,9 @@ def wrapper(*args, **kwargs):
             if settings_class is not None:
                 # Load settings
                 try:
+                    # TODO: 1. LoadSettings
+                    # TODO: 2. Generate config from settings (only for the entrypoint)
+                    # TODO: 3. Validate if generated config and given config are same
                     settings = settings_class.get_settings()
                 except ValidationError as e:
                     raise ValueError(f"Invalid environment configuration: {e}")
@@ -26,11 +32,80 @@ def wrapper(*args, **kwargs):
             else:
                 return func(*args, **kwargs)
 
-        _registered_functions[func.__name__] = wrapper
+        _registered_functions[func.__name__] = {
+            "function": wrapper,
+            "settings": settings_class
+        }
         return wrapper
     return decorator
 
 
 def get_registered_functions():
     """Returns a dictionary of registered entrypoint functions."""
+    print(_registered_functions)
     return _registered_functions
+
+
+def _get_pydantic_default_value_or_none(value):
+    if type(value.default) is PydanticUndefinedType:
+        return None
+    return value.default
+
+
+def _build_input_output_dict_from_class(
+    subject: Union[InputSettings, OutputSettings]
+):
+    config_dict = {}
+    for key, value in subject.model_fields.items():
+        config_dict[key] = _get_pydantic_default_value_or_none(value)
+    return InputOutputModel(
+        type="TODO: SetType",
+        description="<to-be-set>",
+        config=config_dict
+    )
+
+
+def generate_compute_block() -> ComputeBlock:
+    """
+    Converts the Settings to a ComputeBlock
+    """
+    entrypoints = {}
+    for entrypoint, func in _registered_functions.items():
+        envs = {}
+        inputs = {}
+        outputs = {}
+
+        if func["settings"]:
+            entrypoint_settings_class = func["settings"]
+            for key, value in entrypoint_settings_class.model_fields.items():
+                if (
+                    isinstance(value.default_factory, type) and
+                    issubclass(value.default_factory, InputSettings)
+                ):
+                    inputs[key] = _build_input_output_dict_from_class(
+                        value.default_factory
+                    )
+                elif (
+                    isinstance(value.default_factory, type) and
+                    issubclass(value.default_factory, OutputSettings)
+                ):
+                    outputs[key] = _build_input_output_dict_from_class(
+                        value.default_factory
+                    )
+                else:
+                    envs[key] = _get_pydantic_default_value_or_none(value)
+
+        entrypoints[entrypoint] = Entrypoint(
+            description="<tbd>",
+            envs=envs,
+            inputs=inputs,
+            outputs=outputs
+        )
+
+    return ComputeBlock(
+        name="<tbs>",
+        description="<tbs>",
+        author="<tbs>",
+        entrypoints=entrypoints,
+        docker_image="<tbs>"
+    )
diff --git a/scystream/sdk/env/settings.py b/scystream/sdk/env/settings.py
index b217b93..e0c27c8 100644
--- a/scystream/sdk/env/settings.py
+++ b/scystream/sdk/env/settings.py
@@ -72,3 +72,19 @@ def __init__(self, propagate_kwargs=None, *args, **kwargs):
         if propagate_kwargs:
             kwargs = self._propagate_kwargs(propagate_kwargs)
         super().__init__(*args, **kwargs)
+
+
+class InputSettings(EnvSettings):
+    """
+    Abstraction-Layer for inputs
+    could be extended
+    """
+    pass
+
+
+class OutputSettings(EnvSettings):
+    """
+    Abstraction-Layer for outputs
+    could be exended
+    """
+    pass
diff --git a/scystream/sdk/scheduler.py b/scystream/sdk/scheduler.py
index f403c83..c610897 100644
--- a/scystream/sdk/scheduler.py
+++ b/scystream/sdk/scheduler.py
@@ -13,6 +13,6 @@ def list_entrypoints():
     def execute_function(name, *args, **kwargs):
         functions = get_registered_functions()
         if name in functions:
-            return functions[name](*args, **kwargs)
+            return functions[name]["function"](*args, **kwargs)
         else:
             raise Exception(f"No entrypoint found with the name: {name}")

From 55dac98ead5c3b3b1e2fd27b7536a9728d296225 Mon Sep 17 00:00:00 2001
From: PaulKalho <kalhornpaul@gmail.com>
Date: Tue, 12 Nov 2024 03:08:09 +0100
Subject: [PATCH 19/22] feat: validate in outputs with load_and_validate func

---
 scystream/sdk/config/compute_block_utils.py | 72 ++++++++++++++++++
 scystream/sdk/config/config_loader.py       | 24 ++++--
 scystream/sdk/config/models.py              | 57 ++++++++++++--
 scystream/sdk/core.py                       | 82 ++-------------------
 scystream/sdk/env/settings.py               |  2 -
 tests/test_config.py                        | 19 ++---
 tests/test_core.py                          |  2 +-
 7 files changed, 158 insertions(+), 100 deletions(-)
 create mode 100644 scystream/sdk/config/compute_block_utils.py

diff --git a/scystream/sdk/config/compute_block_utils.py b/scystream/sdk/config/compute_block_utils.py
new file mode 100644
index 0000000..6c6f8bb
--- /dev/null
+++ b/scystream/sdk/config/compute_block_utils.py
@@ -0,0 +1,72 @@
+from pydantic_core import PydanticUndefinedType
+from scystream.sdk.config.models import ComputeBlock, Entrypoint, \
+    InputOutputModel
+from scystream.sdk.env.settings import InputSettings, \
+    OutputSettings
+from typing import Union
+from scystream.sdk.core import get_registered_functions
+
+
+def _get_pydantic_default_value_or_none(value):
+    if type(value.default) is PydanticUndefinedType:
+        return None
+    return value.default
+
+
+def _build_input_output_dict_from_class(
+    subject: Union[InputSettings, OutputSettings]
+):
+    config_dict = {}
+    for key, value in subject.model_fields.items():
+        config_dict[key] = _get_pydantic_default_value_or_none(value)
+    return InputOutputModel(
+        type="TODO: SetType",
+        description="<to-be-set>",
+        config=config_dict
+    )
+
+
+def get_compute_block() -> ComputeBlock:
+    """
+    Converts Entrypoints & Settings to a ComputeBlock
+    """
+    entrypoints = {}
+    for entrypoint, func in get_registered_functions().items():
+        envs = {}
+        inputs = {}
+        outputs = {}
+
+        if func["settings"]:
+            entrypoint_settings_class = func["settings"]
+            for key, value in entrypoint_settings_class.model_fields.items():
+                if (
+                    isinstance(value.default_factory, type) and
+                    issubclass(value.default_factory, InputSettings)
+                ):
+                    inputs[key] = _build_input_output_dict_from_class(
+                        value.default_factory
+                    )
+                elif (
+                    isinstance(value.default_factory, type) and
+                    issubclass(value.default_factory, OutputSettings)
+                ):
+                    outputs[key] = _build_input_output_dict_from_class(
+                        value.default_factory
+                    )
+                else:
+                    envs[key] = _get_pydantic_default_value_or_none(value)
+
+        entrypoints[entrypoint] = Entrypoint(
+            description="<tbd>",
+            envs=envs,
+            inputs=inputs,
+            outputs=outputs
+        )
+
+    return ComputeBlock(
+        name="<tbs>",
+        description="<tbs>",
+        author="<tbs>",
+        entrypoints=entrypoints,
+        docker_image="<tbs>"
+    )
diff --git a/scystream/sdk/config/config_loader.py b/scystream/sdk/config/config_loader.py
index 13760e2..49bdc97 100644
--- a/scystream/sdk/config/config_loader.py
+++ b/scystream/sdk/config/config_loader.py
@@ -3,6 +3,7 @@
 from pydantic import ValidationError
 from pathlib import Path
 from .models import ComputeBlock
+from scystream.sdk.config.compute_block_utils import get_compute_block
 
 CONFIG_FILE_DEFAULT_NAME = "cbc.yaml"
 
@@ -19,9 +20,9 @@ def _remove_empty_dicts(data):
         return data
 
 
-def load_config(
+def load_and_validate_config(
     config_file_name: str = CONFIG_FILE_DEFAULT_NAME,
-    config_path: Union[str, Path] = None
+    config_path: Union[str, Path] = None,
 ) -> ComputeBlock:
     """
     Returns and Validates the Compute Block YAML definition.
@@ -29,15 +30,24 @@ def load_config(
     """
     try:
         file = _find_and_load_config(config_file_name, config_path)
-        block = ComputeBlock(**file)
-        # TODO: Check if envs && input/output configs correspond to the
-        # loaded one
-        return block
+        block_from_cfg = ComputeBlock(**file)
+        block_from_code = get_compute_block()
+
+        if (
+            block_from_cfg != block_from_code
+        ):
+            # check the total config
+            raise ValueError(
+                "The entrypoint configs (envs, inputs, outputs) defined in "
+                "your config yaml do not correspond with the entrypoint "
+                "settings defined in your code."
+            )
+        return block_from_code
     except ValidationError as e:
         raise ValueError(f"Configuration file validation error: {e}")
 
 
-def generate_yaml_from_compute_block(
+def generate_config_from_compute_block(
     compute_block: ComputeBlock,
     output_path: Path
 ):
diff --git a/scystream/sdk/config/models.py b/scystream/sdk/config/models.py
index bcc67de..29313bf 100644
--- a/scystream/sdk/config/models.py
+++ b/scystream/sdk/config/models.py
@@ -4,13 +4,9 @@
 
 FILE_TYPE_IDENTIFIER = "file"
 DB_TABLE_TYPE_IDENTIFIER = "db_table"
-# TODO: reevaluate the identifier
+# TODO: reevaluate identifier
 TODO_TYPE_IDENTIFIER = "TODO: SetType"
 
-"""
-This file contains the schema definition for the config file.
-"""
-
 
 class InputOutputModel(BaseModel):
     """
@@ -38,6 +34,20 @@ class InputOutputModel(BaseModel):
                 (file_path, table_name, etc.)"
     )
 
+    def __eq__(self, other):
+        """
+        Compares the configuration keys only, as other attributes
+        are not relevant for determining equality at this stage.
+        """
+        if isinstance(other, InputOutputModel):
+            return (
+                self._sorted_config() == other._sorted_config()
+            )
+        return False
+
+    def _sorted_config(self):
+        return dict(sorted(self.config.items() if self.config else {}))
+
 
 class Entrypoint(BaseModel):
     """
@@ -65,6 +75,28 @@ class Entrypoint(BaseModel):
     inputs: Optional[Dict[StrictStr, InputOutputModel]] = None
     outputs: Optional[Dict[StrictStr, InputOutputModel]] = None
 
+    def __eq__(self, other):
+        """
+        Compares the envs, inputs, outputs only, as other attributes
+        are not relevant for determining equality at this stage.
+        """
+        if isinstance(other, Entrypoint):
+            return (
+                self._sorted_envs() == other._sorted_envs() and
+                self._sorted_inputs() == other._sorted_inputs() and
+                self._sorted_outputs() == other._sorted_outputs()
+            )
+        return False
+
+    def _sorted_envs(self):
+        return dict(sorted(self.envs.items()) if self.envs else {})
+
+    def _sorted_inputs(self):
+        return dict(sorted(self.inputs.items()) if self.inputs else {})
+
+    def _sorted_outputs(self):
+        return dict(sorted(self.outputs.items()) if self.outputs else {})
+
 
 class ComputeBlock(BaseModel):
     """
@@ -90,3 +122,18 @@ def check_entrypoints(cls, v):
         if not v:
             raise ValueError("At least one entrypoint must be defined.")
         return v
+
+    def __eq__(self, other):
+        """
+        Compares the entrypoints only, as other attributes
+        are not relevant for determining equality at this stage.
+        """
+
+        if isinstance(other, ComputeBlock):
+            return (
+                self._sorted_entrypoints() == other._sorted_entrypoints()
+            )
+        return False
+
+    def _sorted_entrypoints(self):
+        return {key: value for key, value in sorted(self.entrypoints.items())}
diff --git a/scystream/sdk/core.py b/scystream/sdk/core.py
index 58cf18d..c514d3b 100644
--- a/scystream/sdk/core.py
+++ b/scystream/sdk/core.py
@@ -1,11 +1,7 @@
 import functools
-from typing import Callable, Type, Optional, Union
-from .env.settings import EnvSettings
+from typing import Callable, Type, Optional
+from scystream.sdk.env.settings import EnvSettings
 from pydantic import ValidationError
-from scystream.sdk.config.models import ComputeBlock, Entrypoint, \
-    InputOutputModel
-from pydantic_core import PydanticUndefinedType
-from scystream.sdk.env.settings import InputSettings, OutputSettings
 
 _registered_functions = {}
 
@@ -19,15 +15,15 @@ def decorator(func: Callable):
         @functools.wraps(func)
         def wrapper(*args, **kwargs):
             if settings_class is not None:
-                # Load settings
+                # TODO: validate the entrypoint settings with the config yaml
+
                 try:
-                    # TODO: 1. LoadSettings
-                    # TODO: 2. Generate config from settings (only for the entrypoint)
-                    # TODO: 3. Validate if generated config and given config are same
+                    # load the settings
                     settings = settings_class.get_settings()
                 except ValidationError as e:
                     raise ValueError(f"Invalid environment configuration: {e}")
 
+                # inject the settings
                 return func(settings, *args, **kwargs)
             else:
                 return func(*args, **kwargs)
@@ -42,70 +38,4 @@ def wrapper(*args, **kwargs):
 
 def get_registered_functions():
     """Returns a dictionary of registered entrypoint functions."""
-    print(_registered_functions)
     return _registered_functions
-
-
-def _get_pydantic_default_value_or_none(value):
-    if type(value.default) is PydanticUndefinedType:
-        return None
-    return value.default
-
-
-def _build_input_output_dict_from_class(
-    subject: Union[InputSettings, OutputSettings]
-):
-    config_dict = {}
-    for key, value in subject.model_fields.items():
-        config_dict[key] = _get_pydantic_default_value_or_none(value)
-    return InputOutputModel(
-        type="TODO: SetType",
-        description="<to-be-set>",
-        config=config_dict
-    )
-
-
-def generate_compute_block() -> ComputeBlock:
-    """
-    Converts the Settings to a ComputeBlock
-    """
-    entrypoints = {}
-    for entrypoint, func in _registered_functions.items():
-        envs = {}
-        inputs = {}
-        outputs = {}
-
-        if func["settings"]:
-            entrypoint_settings_class = func["settings"]
-            for key, value in entrypoint_settings_class.model_fields.items():
-                if (
-                    isinstance(value.default_factory, type) and
-                    issubclass(value.default_factory, InputSettings)
-                ):
-                    inputs[key] = _build_input_output_dict_from_class(
-                        value.default_factory
-                    )
-                elif (
-                    isinstance(value.default_factory, type) and
-                    issubclass(value.default_factory, OutputSettings)
-                ):
-                    outputs[key] = _build_input_output_dict_from_class(
-                        value.default_factory
-                    )
-                else:
-                    envs[key] = _get_pydantic_default_value_or_none(value)
-
-        entrypoints[entrypoint] = Entrypoint(
-            description="<tbd>",
-            envs=envs,
-            inputs=inputs,
-            outputs=outputs
-        )
-
-    return ComputeBlock(
-        name="<tbs>",
-        description="<tbs>",
-        author="<tbs>",
-        entrypoints=entrypoints,
-        docker_image="<tbs>"
-    )
diff --git a/scystream/sdk/env/settings.py b/scystream/sdk/env/settings.py
index e0c27c8..981353e 100644
--- a/scystream/sdk/env/settings.py
+++ b/scystream/sdk/env/settings.py
@@ -79,7 +79,6 @@ class InputSettings(EnvSettings):
     Abstraction-Layer for inputs
     could be extended
     """
-    pass
 
 
 class OutputSettings(EnvSettings):
@@ -87,4 +86,3 @@ class OutputSettings(EnvSettings):
     Abstraction-Layer for outputs
     could be exended
     """
-    pass
diff --git a/tests/test_config.py b/tests/test_config.py
index 149812e..3426024 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -1,5 +1,6 @@
 import unittest
-from scystream.sdk.config.config_loader import load_config, ComputeBlock
+from scystream.sdk.config.config_loader import load_and_validate_config, \
+    ComputeBlock
 
 
 class TestComputeBlockValidation(unittest.TestCase):
@@ -7,7 +8,7 @@ class TestComputeBlockValidation(unittest.TestCase):
 
     def test_valid_config(self):
         try:
-            compute_block = load_config(
+            compute_block = load_and_validate_config(
                 "valid_config.yaml", config_path=self.TEST_CONFIG_FOLDER)
             self.assertIsInstance(compute_block, ComputeBlock)
         except Exception:
@@ -15,22 +16,22 @@ def test_valid_config(self):
 
     def test_missing_entrypoints(self):
         with self.assertRaises(ValueError):
-            load_config("missing_entrypoints.yaml",
-                        config_path=self.TEST_CONFIG_FOLDER)
+            load_and_validate_config("missing_entrypoints.yaml",
+                                     config_path=self.TEST_CONFIG_FOLDER)
 
     def test_invalid_datatypes(self):
         with self.assertRaises(ValueError):
-            load_config("invalid_datatype.yaml",
-                        config_path=self.TEST_CONFIG_FOLDER)
+            load_and_validate_config("invalid_datatype.yaml",
+                                     config_path=self.TEST_CONFIG_FOLDER)
 
     def test_not_a_yaml(self):
         with self.assertRaises(ValueError):
-            load_config("not_a_yaml.json",
-                        config_path=self.TEST_CONFIG_FOLDER)
+            load_and_validate_config("not_a_yaml.json",
+                                     config_path=self.TEST_CONFIG_FOLDER)
 
     def test_file_not_found(self):
         with self.assertRaises(FileNotFoundError):
-            load_config("test.yaml")
+            load_and_validate_config("test.yaml")
 
 
 if __name__ == "__main__":
diff --git a/tests/test_core.py b/tests/test_core.py
index f9a19ec..27107ed 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -10,7 +10,7 @@ def dummy_function():
 
         registered = get_registered_functions()
         self.assertIn("dummy_function", registered)
-        self.assertEqual(registered["dummy_function"](), "Hello")
+        self.assertEqual(registered["dummy_function"]["function"](), "Hello")
 
 
 if __name__ == "__main__":

From 5e12337d9250f104530638806a570c9cf942bd7d Mon Sep 17 00:00:00 2001
From: PaulKalho <kalhornpaul@gmail.com>
Date: Tue, 12 Nov 2024 17:23:52 +0100
Subject: [PATCH 20/22] feat: validate on execute and custom validation
 function

---
 README.md                                   | 185 +++++++++++---------
 scystream/sdk/config/compute_block_utils.py |  10 +-
 scystream/sdk/config/config_loader.py       |  60 ++++---
 scystream/sdk/config/entrypoints.py         |  12 ++
 scystream/sdk/core.py                       |  18 +-
 scystream/sdk/scheduler.py                  |   9 +-
 6 files changed, 168 insertions(+), 126 deletions(-)
 create mode 100644 scystream/sdk/config/entrypoints.py

diff --git a/README.md b/README.md
index 65034fa..7bb036c 100644
--- a/README.md
+++ b/README.md
@@ -8,22 +8,93 @@ You can install the package via pip once it's published:
 pip install scystream-sdk
 ```
 
-### Compute Blocks and their configs
+## Introduction
+
 One of the central concepts of scystream are the so-called **Compute Blocks**.
 
 A Compute Block describes an independent programm, that acts as some kind of worker
 which will be scheduled using the scystream-core application.
-This worker executes a task (e.g. a NLP task, a crwaling task).
+This worker executes a task (e.g. a NLP task, a crawling task).
+
+This SDK aims to provide helper functions and all other requirements you need to implement
+a custom Compute Block on your own.
 
 Each worker can have multiple entrypoints, each aiming to solve one task.
 These entrypoints can be configured from the outside using the **Settings**.
 These are basically ENV-Variables, which will be parsed & validated using pydantic.
 
-This SDK aims to implement helper functions and other requirements we expect each
-Compute Block to have.
+You can either set "global" Settings (for the entrypoint), by using the `envs` block.
+Or you can set "input/output-related" Settings by using the `config` block in each input/output.
+
+## Basic Usage of the SDK
+
+```python3
+from scystream.sdk.core import entrypoint
+from scystream.sdk.scheduler import Scheduler
+
+
+@entrypoint()
+def example_task():
+    print("Executing example_task...")
+
+
+@entrypoint()
+def another_task(task_name):
+    print(f"Executing another_task with task name: {task_name}")
+
+
+def main():
+    Scheduler.list_entrypoints()
+    Scheduler.execute_function("example_task")
+    Scheduler.execute_function("another_task", "ScheduledTask")
+
+
+if __name__ == "__main__":
+    main()
+
+```
+
+## Defining Settings and Using them.
+
+Earlier, we already wrote about **Settings**.
+Each Input & Output can be configured using these settings.
+There are also Global Settings, refered to as `envs` in the `cbc.yaml`
+
+Below you can find a simple example of how we define & validate these settings.
+Therefore you should use the `EnvSettings` class.
+
+```python3
+from scystream.sdk.core import entrypoint
+from scystream.sdk.env.settings import EnvSettings
+
+class TextDataInputSettings(EnvSettings):
+    TXT_SRC_PATH: str # no default provided, manual setting is a MUST
+
+class DBDataInputSettings(EnvSettings):
+    DATA_TABLE_NAME: str = "nlp_information"
+    DB_HOST: str = "time.rwth-aachen.de"
+    DB_PORT: str = 1234
+
+class TopicModellingEntrypointSettings(EnvSettings):
+    LANGUAGE: str = "de"
+    
+    text_data: TextDataInputSettings
+    db_data:  DBDataInputSettings
+
+@entrypoint(TopicModellingEntrypointSettings) # Pass it to the Entrypoint
+def topic_modelling(settings): # The settings param is automatically injected to your function, you can use it
+    print(f"Running topic modelling, using file: {settings.text_data.TXT_SRC_PATH}")
+
+@entrypoint()
+def test_entrypint():
+    print("This entrypoint does not have any configs.")
+```
+
+Of course, you will also be able to use your settings in other files/directories.
+For that, just import your desired setting and use the `get_settings()` function.
+It will load the configurations correctly.
 
-To understand the concept of such a Compute Block even more, take a look at the
-config below.
+## Compute Block Config
 
 We expect every repository which will be used within the scystream application
 to contain a **Compute Block Config File**, the `cbc.yaml`, within the root directory.
@@ -84,101 +155,53 @@ entrypoints:
           CSV_OUTPUT_PATH: "outputs/statistics.csv"
 ```
 
-For now, you have to write this config file on your own. However, at some
-point you will be able to generate this config from your code.
+### Generating a config
 
-To read and validate such a config file you can proceed as follows:
+After writing the functionality of your ComputeBlock (see more below) you can generate
+the corresponding `cbc.yaml` by using the following function:
 
 ```python3
-from scystream.sdk.config.config_loader import load_config 
+from scystream.sdk.config.config_loader import generate_config_from_compute_block
+from scystream.sdk.config.compute_block_utils import get_compute_block
+from pathlib import Path
 
-def main():
-    load_config() 
+@entrypoint()
+def example_entrypoint():
+    print("Example...")
 
 if __name__ == "__main__":
-    main()
+    compute_block = get_compute_block()
+    generate_config_from_compute_block(cb, Path("cbc.yaml"))
 ```
 
-If you want the file to have another name than `cbc.yaml` or you want the file to be 
-somewhere else than the root directory you can define that using the parameters the
-`load_config` function takes.
+This will take all the entrypoints, their defined settings, and generate a config from them.
 
-Example:
+> [!NOTE]
+> Make sure to edit the generated config by your user-defined metadata
+> (e.g. author, description, docker_image, ...)
 
-```python3
-load_config(config_file_name="test.yaml", config_path="configs/")
-```
+### Validating a config
 
-the `config_path` is the path relative to your root directory
+Of course, you can also write the config completely on your own.
 
-## Basic Usage of the SDK
+> [!NOTE]
+> When using `Scheduler.execute_function("entrypoint")` the Settings for the
+> entrypoint and the config will be validated.
+> If the Settings do not correspond to the definition in the yaml, execution will not be possible.
 
-```python3
-from scystream.sdk.core import entrypoint
-from scystream.sdk.scheduler import Scheduler
+To validate the config, you can also use a helper function like this:
 
+```python3
+from scystream.sdk.config.config_loader import validate_config_with_code
 
 @entrypoint()
-def example_task():
-    print("Executing example_task...")
-
-
-@entrypoint()
-def another_task(task_name):
-    print(f"Executing another_task with task name: {task_name}")
-
-
-def main():
-    Scheduler.list_entrypoints()
-    Scheduler.execute_function("example_task")
-    Scheduler.execute_function("another_task", "ScheduledTask")
-
+def example_entrypoint():
+    print("Example...")
 
 if __name__ == "__main__":
-    main()
-
+    validate_config_with_code()
 ```
 
-## Defining Settings and Using them.
-
-Earlier, we already wrote about **Settings**.
-Each Input & Output can be configured using these settings.
-There are also Global Settings, refered to as `envs` in the `cbc.yaml`
-
-Below you can find a simple example of how we define & validate these settings.
-Therefore you should use the `EnvSettings` class.
-
-```python3
-from scystream.sdk.core import entrypoint
-from scystream.sdk.env.settings import EnvSettings
-
-class TextDataInputSettings(EnvSettings):
-    TXT_SRC_PATH: str # no default provided, manual setting is a MUST
-
-class DBDataInputSettings(EnvSettings):
-    DATA_TABLE_NAME: str = "nlp_information"
-    DB_HOST: str = "time.rwth-aachen.de"
-    DB_PORT: str = 1234
-
-class TopicModellingEntrypointSettings(EnvSettings):
-    LANGUAGE: str = "de"
-    
-    text_data: TextDataInputSettings
-    db_data:  DBDataInputSettings
-
-@entrypoint(TopicModellingEntrypointSettings) # Pass it to the Entrypoint
-def topic_modelling(settings): # The settings param is automatically injected to your function, you can use it
-    print(f"Running topic modelling, using file: {settings.text_data.TXT_SRC_PATH}")
-
-@entrypoint()
-def test_entrypint():
-    print("This entrypoint does not have any configs.")
-```
-
-Of course, you will also be able to use your settings in other files/directories.
-For that, just import your desired setting and use the `get_settings()` function.
-It will load the configurations correctly.
-
 ## Development of the SDK
 
 ### Installation
diff --git a/scystream/sdk/config/compute_block_utils.py b/scystream/sdk/config/compute_block_utils.py
index 6c6f8bb..cd1ebbe 100644
--- a/scystream/sdk/config/compute_block_utils.py
+++ b/scystream/sdk/config/compute_block_utils.py
@@ -1,10 +1,10 @@
+from typing import Union
 from pydantic_core import PydanticUndefinedType
 from scystream.sdk.config.models import ComputeBlock, Entrypoint, \
     InputOutputModel
 from scystream.sdk.env.settings import InputSettings, \
     OutputSettings
-from typing import Union
-from scystream.sdk.core import get_registered_functions
+from scystream.sdk.config.entrypoints import get_registered_functions
 
 
 def _get_pydantic_default_value_or_none(value):
@@ -58,9 +58,9 @@ def get_compute_block() -> ComputeBlock:
 
         entrypoints[entrypoint] = Entrypoint(
             description="<tbd>",
-            envs=envs,
-            inputs=inputs,
-            outputs=outputs
+            envs=envs if envs != {} else None,
+            inputs=inputs if inputs != {} else None,
+            outputs=outputs if outputs != {} else None
         )
 
     return ComputeBlock(
diff --git a/scystream/sdk/config/config_loader.py b/scystream/sdk/config/config_loader.py
index 49bdc97..a50aef5 100644
--- a/scystream/sdk/config/config_loader.py
+++ b/scystream/sdk/config/config_loader.py
@@ -2,47 +2,58 @@
 from typing import Union
 from pydantic import ValidationError
 from pathlib import Path
-from .models import ComputeBlock
+from scystream.sdk.config.models import ComputeBlock, Entrypoint, \
+    InputOutputModel
 from scystream.sdk.config.compute_block_utils import get_compute_block
 
 CONFIG_FILE_DEFAULT_NAME = "cbc.yaml"
 
 
-def _remove_empty_dicts(data):
+def _compare_configs(
+        config_from_yaml: Union[ComputeBlock, Entrypoint, InputOutputModel],
+        config_from_code: Union[ComputeBlock, Entrypoint, InputOutputModel],
+        name="block"
+):
     """
-    Remove keys with empty dictionaries from a nested structure.
+    Compares two configurations and raises a ValueError if they don't match.
     """
-    if isinstance(data, dict):
-        return {k: _remove_empty_dicts(v) for k, v in data.items() if v != {}}
-    elif isinstance(data, list):
-        return [_remove_empty_dicts(i) for i in data]
+    if config_from_yaml != config_from_code:
+        raise ValueError(
+            f"The {name} configs (envs, inputs, outputs) defined\
+            in your config YAML do not match the settings defined\
+            in your code."
+        )
+
+
+def validate_config_with_code(
+        config_file_name: str = CONFIG_FILE_DEFAULT_NAME,
+        config_path: Union[str, Path] = None,
+        entrypoint_name: str = None
+):
+    block_from_cfg = load_config(config_file_name, config_path)
+    block_from_code = get_compute_block()
+
+    if entrypoint_name:
+        _compare_configs(
+            block_from_cfg.entrypoints[entrypoint_name],
+            block_from_code.entrypoints[entrypoint_name]
+        )
     else:
-        return data
+        _compare_configs(block_from_cfg, block_from_code)
 
 
-def load_and_validate_config(
+def load_config(
     config_file_name: str = CONFIG_FILE_DEFAULT_NAME,
     config_path: Union[str, Path] = None,
 ) -> ComputeBlock:
     """
-    Returns and Validates the Compute Block YAML definition.
-    Returns a ComputeBlock instance if the validation is successfull
+    Returns the Compute Block defined by the passed yaml.
+    Returns a ComputeBlock instance if the syntax-validation is successfull
     """
     try:
         file = _find_and_load_config(config_file_name, config_path)
         block_from_cfg = ComputeBlock(**file)
-        block_from_code = get_compute_block()
-
-        if (
-            block_from_cfg != block_from_code
-        ):
-            # check the total config
-            raise ValueError(
-                "The entrypoint configs (envs, inputs, outputs) defined in "
-                "your config yaml do not correspond with the entrypoint "
-                "settings defined in your code."
-            )
-        return block_from_code
+        return block_from_cfg
     except ValidationError as e:
         raise ValueError(f"Configuration file validation error: {e}")
 
@@ -51,9 +62,8 @@ def generate_config_from_compute_block(
     compute_block: ComputeBlock,
     output_path: Path
 ):
-    cleaned_data = _remove_empty_dicts(compute_block.dict())
     with output_path.open("w") as file:
-        yaml.dump(cleaned_data, file, default_flow_style=False)
+        yaml.dump(compute_block.dict(), file, default_flow_style=False)
 
 
 def _find_and_load_config(
diff --git a/scystream/sdk/config/entrypoints.py b/scystream/sdk/config/entrypoints.py
new file mode 100644
index 0000000..ee48825
--- /dev/null
+++ b/scystream/sdk/config/entrypoints.py
@@ -0,0 +1,12 @@
+_registered_functions = {}
+
+
+def register_entrypoint(func_name, func, settings_class):
+    _registered_functions[func_name] = {
+        "function": func,
+        "settings": settings_class
+    }
+
+
+def get_registered_functions():
+    return _registered_functions
diff --git a/scystream/sdk/core.py b/scystream/sdk/core.py
index c514d3b..abae394 100644
--- a/scystream/sdk/core.py
+++ b/scystream/sdk/core.py
@@ -1,9 +1,8 @@
 import functools
 from typing import Callable, Type, Optional
-from scystream.sdk.env.settings import EnvSettings
 from pydantic import ValidationError
-
-_registered_functions = {}
+from scystream.sdk.config.entrypoints import register_entrypoint
+from scystream.sdk.env.settings import EnvSettings
 
 
 def entrypoint(settings_class: Optional[Type[EnvSettings]] = None):
@@ -15,8 +14,7 @@ def decorator(func: Callable):
         @functools.wraps(func)
         def wrapper(*args, **kwargs):
             if settings_class is not None:
-                # TODO: validate the entrypoint settings with the config yaml
-
+                # Load the settings
                 try:
                     # load the settings
                     settings = settings_class.get_settings()
@@ -28,14 +26,6 @@ def wrapper(*args, **kwargs):
             else:
                 return func(*args, **kwargs)
 
-        _registered_functions[func.__name__] = {
-            "function": wrapper,
-            "settings": settings_class
-        }
+        register_entrypoint(func.__name__, wrapper, settings_class)
         return wrapper
     return decorator
-
-
-def get_registered_functions():
-    """Returns a dictionary of registered entrypoint functions."""
-    return _registered_functions
diff --git a/scystream/sdk/scheduler.py b/scystream/sdk/scheduler.py
index c610897..5549348 100644
--- a/scystream/sdk/scheduler.py
+++ b/scystream/sdk/scheduler.py
@@ -1,4 +1,5 @@
-from .core import get_registered_functions
+from scystream.sdk.config.entrypoints import get_registered_functions
+from scystream.sdk.config.config_loader import validate_config_with_code
 
 
 class Scheduler:
@@ -11,6 +12,12 @@ def list_entrypoints():
 
     @staticmethod
     def execute_function(name, *args, **kwargs):
+        """
+        Validate the in code defined entrypoints
+        with the settings defined in the cfg file
+        """
+        validate_config_with_code(entrypoint_name=name)
+
         functions = get_registered_functions()
         if name in functions:
             return functions[name]["function"](*args, **kwargs)

From 1799e7d76c0a85feaca58419921c98af2478a283 Mon Sep 17 00:00:00 2001
From: PaulKalho <kalhornpaul@gmail.com>
Date: Tue, 12 Nov 2024 19:27:26 +0100
Subject: [PATCH 21/22] tests: add and refactor tests

---
 scystream/sdk/config/config_loader.py         |  47 +++--
 scystream/sdk/config/entrypoints.py           |   4 +
 tests/test_config.py                          |  28 +--
 tests/test_core.py                            |   7 +-
 tests/test_setting_files/simple_cfg.yaml      |  21 +++
 .../simple_cfg_entrypoint_inv.yaml            |  22 +++
 .../simple_cfg_entrypoint_v.yaml              |  35 ++++
 .../simple_cfg_invalid.yaml                   |  25 +++
 .../without_default_settings.yaml             |  15 ++
 tests/test_settings.py                        | 176 ++++++++++--------
 10 files changed, 278 insertions(+), 102 deletions(-)
 create mode 100644 tests/test_setting_files/simple_cfg.yaml
 create mode 100644 tests/test_setting_files/simple_cfg_entrypoint_inv.yaml
 create mode 100644 tests/test_setting_files/simple_cfg_entrypoint_v.yaml
 create mode 100644 tests/test_setting_files/simple_cfg_invalid.yaml
 create mode 100644 tests/test_setting_files/without_default_settings.yaml

diff --git a/scystream/sdk/config/config_loader.py b/scystream/sdk/config/config_loader.py
index a50aef5..d09213e 100644
--- a/scystream/sdk/config/config_loader.py
+++ b/scystream/sdk/config/config_loader.py
@@ -9,6 +9,31 @@
 CONFIG_FILE_DEFAULT_NAME = "cbc.yaml"
 
 
+class SDKConfig:
+    """
+    This is a singleton class that holds the configuration of
+    the sdk.
+    For now, it only holds the config_path which points to
+    the cbc.yaml.
+    """
+    _instance = None
+
+    def __new__(cls):
+        if cls._instance is None:
+            cls._instance = super(SDKConfig, cls).__new__(cls)
+            cls._instance.config_path = CONFIG_FILE_DEFAULT_NAME
+        return cls._instance
+
+    def set_config_path(self, config_path: str):
+        self.config_path = config_path
+
+    def get_config_path(self) -> str:
+        return self.config_path
+
+
+global_config = SDKConfig()
+
+
 def _compare_configs(
         config_from_yaml: Union[ComputeBlock, Entrypoint, InputOutputModel],
         config_from_code: Union[ComputeBlock, Entrypoint, InputOutputModel],
@@ -26,11 +51,9 @@ def _compare_configs(
 
 
 def validate_config_with_code(
-        config_file_name: str = CONFIG_FILE_DEFAULT_NAME,
-        config_path: Union[str, Path] = None,
         entrypoint_name: str = None
 ):
-    block_from_cfg = load_config(config_file_name, config_path)
+    block_from_cfg = load_config()
     block_from_code = get_compute_block()
 
     if entrypoint_name:
@@ -42,16 +65,13 @@ def validate_config_with_code(
         _compare_configs(block_from_cfg, block_from_code)
 
 
-def load_config(
-    config_file_name: str = CONFIG_FILE_DEFAULT_NAME,
-    config_path: Union[str, Path] = None,
-) -> ComputeBlock:
+def load_config() -> ComputeBlock:
     """
     Returns the Compute Block defined by the passed yaml.
     Returns a ComputeBlock instance if the syntax-validation is successfull
     """
     try:
-        file = _find_and_load_config(config_file_name, config_path)
+        file = _find_and_load_config()
         block_from_cfg = ComputeBlock(**file)
         return block_from_cfg
     except ValidationError as e:
@@ -66,19 +86,14 @@ def generate_config_from_compute_block(
         yaml.dump(compute_block.dict(), file, default_flow_style=False)
 
 
-def _find_and_load_config(
-        config_file_name: str,
-        config_path: Union[str, Path] = None
-):
+def _find_and_load_config():
     """
     Loads the compute block config YAML from the projects root directory
     returns the loaded file
     """
-    base_path = Path.cwd()
-    if config_path:
-        base_path /= Path(config_path)
+    config_path = global_config.get_config_path()
 
-    full_path = base_path / config_file_name
+    full_path = Path.cwd() / config_path
 
     if not full_path.is_file():
         raise FileNotFoundError(
diff --git a/scystream/sdk/config/entrypoints.py b/scystream/sdk/config/entrypoints.py
index ee48825..8b285c5 100644
--- a/scystream/sdk/config/entrypoints.py
+++ b/scystream/sdk/config/entrypoints.py
@@ -10,3 +10,7 @@ def register_entrypoint(func_name, func, settings_class):
 
 def get_registered_functions():
     return _registered_functions
+
+
+def TEST_reset_registered_functions():
+    _registered_functions.clear()
diff --git a/tests/test_config.py b/tests/test_config.py
index 3426024..e50f3be 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -1,6 +1,6 @@
 import unittest
-from scystream.sdk.config.config_loader import load_and_validate_config, \
-    ComputeBlock
+from scystream.sdk.config.config_loader import load_config, \
+    ComputeBlock, global_config
 
 
 class TestComputeBlockValidation(unittest.TestCase):
@@ -8,30 +8,36 @@ class TestComputeBlockValidation(unittest.TestCase):
 
     def test_valid_config(self):
         try:
-            compute_block = load_and_validate_config(
-                "valid_config.yaml", config_path=self.TEST_CONFIG_FOLDER)
+            global_config.set_config_path(
+                f"{self.TEST_CONFIG_FOLDER}/valid_config.yaml")
+            compute_block = load_config()
             self.assertIsInstance(compute_block, ComputeBlock)
         except Exception:
             self.fail("ComputeBlock raised an Exception unexpectedly!")
 
     def test_missing_entrypoints(self):
+        global_config.set_config_path(
+            f"{self.TEST_CONFIG_FOLDER}/missing_entrypoints.yaml")
         with self.assertRaises(ValueError):
-            load_and_validate_config("missing_entrypoints.yaml",
-                                     config_path=self.TEST_CONFIG_FOLDER)
+            load_config()
 
     def test_invalid_datatypes(self):
+        global_config.set_config_path(
+            f"{self.TEST_CONFIG_FOLDER}/invalid_datatype.yaml")
         with self.assertRaises(ValueError):
-            load_and_validate_config("invalid_datatype.yaml",
-                                     config_path=self.TEST_CONFIG_FOLDER)
+            load_config()
 
     def test_not_a_yaml(self):
+        global_config.set_config_path(
+            f"{self.TEST_CONFIG_FOLDER}/not_a_yaml.json")
         with self.assertRaises(ValueError):
-            load_and_validate_config("not_a_yaml.json",
-                                     config_path=self.TEST_CONFIG_FOLDER)
+            load_config()
 
     def test_file_not_found(self):
+        global_config.set_config_path(
+            f"{self.TEST_CONFIG_FOLDER}/testyamll")
         with self.assertRaises(FileNotFoundError):
-            load_and_validate_config("test.yaml")
+            load_config()
 
 
 if __name__ == "__main__":
diff --git a/tests/test_core.py b/tests/test_core.py
index 27107ed..921164b 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -1,8 +1,13 @@
 import unittest
-from scystream.sdk.core import entrypoint, get_registered_functions
+from scystream.sdk.core import entrypoint
+from scystream.sdk.config.entrypoints import get_registered_functions
+from scystream.sdk.config.entrypoints import TEST_reset_registered_functions
 
 
 class TestEntrypoint(unittest.TestCase):
+    def tearDown(self):
+        TEST_reset_registered_functions()
+
     def test_entrypoint_registration(self):
         @entrypoint()
         def dummy_function():
diff --git a/tests/test_setting_files/simple_cfg.yaml b/tests/test_setting_files/simple_cfg.yaml
new file mode 100644
index 0000000..6db4ded
--- /dev/null
+++ b/tests/test_setting_files/simple_cfg.yaml
@@ -0,0 +1,21 @@
+name: <tbs>
+author: <tbs>
+description: <tbs>
+docker_image: <tbs>
+entrypoints:
+  example_entrypoint:
+    description: <tbd>
+    envs:
+      LANGUAGE: de
+    inputs:
+      input_one:
+        config:
+          TEST: test
+        description: <to-be-set>
+        type: 'TODO: SetType'
+    outputs:
+      output_one:
+        config:
+          OUT: out
+        description: <to-be-set>
+        type: 'TODO: SetType' 
diff --git a/tests/test_setting_files/simple_cfg_entrypoint_inv.yaml b/tests/test_setting_files/simple_cfg_entrypoint_inv.yaml
new file mode 100644
index 0000000..485b8aa
--- /dev/null
+++ b/tests/test_setting_files/simple_cfg_entrypoint_inv.yaml
@@ -0,0 +1,22 @@
+name: <tbs>
+author: <tbs>
+description: <tbs>
+docker_image: <tbs>
+entrypoints:
+  example_entrypoint:
+    description: <tbd>
+    envs:
+      LANGUAGE: de
+    inputs:
+      input_one:
+        config:
+          TEST: test
+          ADDITIONAL: tesing # SHOULD FAIL BECAUSE NOT IN SETTINGS CLASS
+        description: <to-be-set>
+        type: 'TODO: SetType'
+    outputs:
+      output_one:
+        config:
+          OUT: out
+        description: <to-be-set>
+        type: 'TODO: SetType' 
diff --git a/tests/test_setting_files/simple_cfg_entrypoint_v.yaml b/tests/test_setting_files/simple_cfg_entrypoint_v.yaml
new file mode 100644
index 0000000..342b784
--- /dev/null
+++ b/tests/test_setting_files/simple_cfg_entrypoint_v.yaml
@@ -0,0 +1,35 @@
+name: <tbs>
+author: <tbs>
+description: <tbs>
+docker_image: <tbs>
+entrypoints:
+  example_entrypoint:
+    description: <tbd>
+    envs:
+      LANGUAGE: de
+    inputs:
+      input_one:
+        config:
+          TEST: test
+        description: <to-be-set>
+        type: 'TODO: SetType'
+    outputs:
+      output_one:
+        config:
+          OUT: out
+        description: <to-be-set>
+        type: 'TODO: SetType'
+  test_entryping:
+    # This entrpoint is not defined in the Settings and passed to the
+    # however, as example_entrypoint is beeing calles in the test
+    # this should not fail
+    description: <tbd>
+    envs:
+      TEST: null
+      ONE: test
+    inputs:
+      test_inp:
+        config:
+          TESTER: test
+        description: <to-be-set>
+        type: 'TODO: SetType'
diff --git a/tests/test_setting_files/simple_cfg_invalid.yaml b/tests/test_setting_files/simple_cfg_invalid.yaml
new file mode 100644
index 0000000..3fc49ff
--- /dev/null
+++ b/tests/test_setting_files/simple_cfg_invalid.yaml
@@ -0,0 +1,25 @@
+name: <tbs>
+author: <tbs>
+description: <tbs>
+docker_image: <tbs>
+entrypoints:
+  example_entrypoint:
+    description: <tbd>
+    envs:
+      LANGUAGE: de
+    inputs:
+      input_one:
+        config:
+          TEST: test
+        description: <to-be-set>
+        type: 'TODO: SetType'
+    outputs:
+      output_one:
+        config:
+          OUT: out
+        description: <to-be-set>
+        type: 'TODO: SetType'
+  again_entrypoint:
+    description: <tbd>
+    envs:
+      HI: null
diff --git a/tests/test_setting_files/without_default_settings.yaml b/tests/test_setting_files/without_default_settings.yaml
new file mode 100644
index 0000000..e688d90
--- /dev/null
+++ b/tests/test_setting_files/without_default_settings.yaml
@@ -0,0 +1,15 @@
+name: <tbs>
+author: <tbs>
+description: <tbs>
+docker_image: <tbs>
+entrypoints:
+  without_def_settings:
+    description: <tbd>
+    envs:
+      LANGUAGE: null
+    inputs:
+      input_one:
+        config:
+          TEST: null
+        description: <to-be-set>
+        type: 'TODO: SetType'
diff --git a/tests/test_settings.py b/tests/test_settings.py
index b6144d2..640d90f 100644
--- a/tests/test_settings.py
+++ b/tests/test_settings.py
@@ -1,135 +1,163 @@
 import unittest
 import os
 from scystream.sdk.core import entrypoint
-from scystream.sdk.env.settings import EnvSettings
+from scystream.sdk.env.settings import EnvSettings, InputSettings, \
+    OutputSettings
 from scystream.sdk.scheduler import Scheduler
+from scystream.sdk.config.config_loader import global_config
+from scystream.sdk.config.config_loader import validate_config_with_code
+from scystream.sdk.config.entrypoints import TEST_reset_registered_functions
 
+# Validate Cfgs
 
-class DummyInputSettings(EnvSettings):
-    DUMMY_INPUT: str = "test"
 
+class SimpleSettingsInputOne(InputSettings):
+    TEST: str = "test"
 
-class WithDefaultSettings(EnvSettings):
-    DUMMY_GLOBAL: str = "dummy global var"
 
-    dummy_input_settings: DummyInputSettings
+class SimpleSettingsOutputOne(OutputSettings):
+    OUT: str = "out"
 
 
-class DummyInputSettingsNoDef(EnvSettings):
-    DUMMY_INPUT: str
+class SimpleSettings(EnvSettings):
+    LANGUAGE: str = "de"
 
+    input_one: SimpleSettingsInputOne
+    output_one: SimpleSettingsOutputOne
 
-class WithoutDefaultSettings(EnvSettings):
-    DUMMY_GLOBAL: str
+# WithoutDefaults
 
-    dummy_input_settings_no_def: DummyInputSettingsNoDef
 
+class WithoutDefaultsInputOne(InputSettings):
+    TEST: str
 
-class WithoutDefaultNoNesting(EnvSettings):
-    TEST: str = "teststr"
-    MUST_SET: str
 
+class WithoutDefaults(EnvSettings):
+    LANGUAGE: str  # MUST BE SET
 
-class SubOne(EnvSettings):
-    ONE: str
-    TWO: str
+    input_one: WithoutDefaultsInputOne
 
 
-class SubTwo(EnvSettings):
-    TEST: str
-    NO_DEF: str
+class TestSettings(unittest.TestCase):
+    TEST_SETTINGS_FILES = "tests/test_setting_files/"
 
+    def tearDown(self):
+        TEST_reset_registered_functions()
 
-class TwoSubclasses(EnvSettings):
-    GLOBAL: str
+    def test_entrypoint_yaml_cfg_different_to_code_cfg(self):
+        # Tests if the passed settings to entrypoint config is different
+        # to the one in yaml
+        @entrypoint(SimpleSettings)
+        def example_entrypoint(settings):
+            print("Running example_entrypoint...")
 
-    input_one: SubOne
-    input_two: SubTwo
+        global_config.set_config_path(
+            f"{self.TEST_SETTINGS_FILES}/simple_cfg_entrypoint_inv.yaml"
+        )
 
+        with self.assertRaises(ValueError):
+            Scheduler.execute_function("example_entrypoint")
+
+    def test_entrypoint_yaml_cfg_not_different_to_code_cfg(self):
+        # Tests if the passed settings to entrypoint config is different
+        # to the one in yaml
+        # HINT: TOTAL CONFIG does not fit, only the entrypoint ones fits
+        @entrypoint(SimpleSettings)
+        def example_entrypoint(settings):
+            print("Running example_entrypoint...")
+
+        global_config.set_config_path(
+            f"{self.TEST_SETTINGS_FILES}/simple_cfg_entrypoint_v.yaml"
+        )
+
+        try:
+            Scheduler.execute_function("example_entrypoint")
+        except Exception:
+            self.fail("")
+
+    def test_validate_cfgs_no_error(self):
+        # Tests if validate_config_with_code works if config and settings
+        # correspond
+        @entrypoint(SimpleSettings)
+        def example_entrypoint(settings):
+            print(f"{settings}....")
+
+        global_config.set_config_path(
+            f"{self.TEST_SETTINGS_FILES}/simple_cfg.yaml")
+
+        try:
+            validate_config_with_code()
+        except Exception:
+            self.fail(
+                "validate_config_with_code raised an Exception unexpectedly!")
+
+    def test_validate_cfgs_error(self):
+        # Tests if validate_config_with_code works if config and settings
+        # do not correspond
+        @entrypoint(SimpleSettings)
+        def example_entrypoint(settings):
+            print(f"{settings}....")
+
+        global_config.set_config_path(
+            f"{self.TEST_SETTINGS_FILES}/simple_cfg_invalid.yaml")
+
+        with self.assertRaises(ValueError):
+            validate_config_with_code()
 
-class TestSettings(unittest.TestCase):
     def test_entrypoint_with_setting_default(self):
-        @entrypoint(WithDefaultSettings)
+        # Tests if defaults and overriding defaults with ENvs works
+        # We use SimpleSettings as they all have a default
+        @entrypoint(SimpleSettings)
         def with_default_settings(settings):
-            return settings.dummy_input_settings.DUMMY_INPUT
+            return settings.input_one.TEST
+
+        global_config.set_config_path(
+            f"{self.TEST_SETTINGS_FILES}/simple_cfg.yaml")
 
         result = with_default_settings()
         self.assertEqual(result, "test")
 
         # set environ
-        os.environ["DUMMY_INPUT"] = "overridden setting"
+        os.environ["TEST"] = "overridden setting"
         result = with_default_settings()
         # check if overriding works
         self.assertEqual(result, "overridden setting")
 
-        del os.environ["DUMMY_INPUT"]
+        del os.environ["TEST"]
 
     def test_entrypoint_no_setting_default_one(self):
-        @entrypoint(WithoutDefaultSettings)
+        # Tests if fails, if ENVs that MUST be set, are not set
+        @entrypoint(WithoutDefaults)
         def without_def_settings(settings):
             print("test...")
 
+        global_config.set_config_path(
+            f"{self.TEST_SETTINGS_FILES}/without_default_settings.yaml")
+
         # do we fail if environments not set
         with self.assertRaises(ValueError):
             Scheduler.execute_function("without_def_settings")
 
     def test_entrypoint_no_setting_default_two(self):
-        @entrypoint(WithoutDefaultSettings)
+        # Tests if it works, if ENVs that MUST be set, are actually set
+        @entrypoint(WithoutDefaults)
         def without_def_settings(settings):
             return (
-                settings.DUMMY_GLOBAL,
-                settings.dummy_input_settings_no_def.DUMMY_INPUT
+                settings.LANGUAGE,
+                settings.input_one.TEST
             )
 
         # set environments
-        os.environ["DUMMY_GLOBAL"] = "dummy global"
-        os.environ["DUMMY_INPUT"] = "dummy input"
+        os.environ["LANGUAGE"] = "dummy global"
+        os.environ["TEST"] = "dummy input"
 
         # check if environments have been set
         result = without_def_settings()
         self.assertEqual(result[0], "dummy global")
         self.assertEqual(result[1], "dummy input")
 
-        del os.environ["DUMMY_GLOBAL"]
-        del os.environ["DUMMY_INPUT"]
-
-    def test_entrypoint_no_setting_defautl_three(self):
-        @entrypoint(WithoutDefaultNoNesting)
-        def no_nesting(settings):
-            print("testing...")
-
-        with self.assertRaises(ValueError):
-            Scheduler.execute_function("no_nesting")
-
-    def test_two_subs(self):
-        @entrypoint(TwoSubclasses)
-        def two_subs(settings):
-            return (
-                settings.GLOBAL,
-                settings.input_one.ONE,
-                settings.input_one.TWO,
-                settings.input_two.TEST,
-                settings.input_two.NO_DEF
-            )
-
-        os.environ["GLOBAL"] = "global"
-        os.environ["ONE"] = "one"
-        os.environ["TWO"] = "two"
-        os.environ["TEST"] = "test"
-        os.environ["NO_DEF"] = "no_def"
-
-        result = two_subs()
-        self.assertEqual(result[0], "global")
-        self.assertEqual(result[1], "one")
-        self.assertEqual(result[2], "two")
-        self.assertEqual(result[3], "test")
-        self.assertEqual(result[4], "no_def")
-
-        del os.environ["GLOBAL"]
-        del os.environ["ONE"]
-        del os.environ["TWO"]
+        del os.environ["LANGUAGE"]
         del os.environ["TEST"]
-        del os.environ["NO_DEF"]
 
 
 if __name__ == "__main__":

From e9eb8d67e529149ca8254d97cfba60e26698e1e1 Mon Sep 17 00:00:00 2001
From: PaulKalho <kalhornpaul@gmail.com>
Date: Tue, 12 Nov 2024 19:54:05 +0100
Subject: [PATCH 22/22] feat: export important functions directly

---
 README.md                             | 16 +++++++++---
 scystream/sdk/config/__init__.py      |  6 +++++
 scystream/sdk/config/config_loader.py |  2 +-
 scystream/sdk/scheduler.py            |  2 +-
 tests/test_config.py                  |  4 +--
 tests/test_setting_files/ref.yaml     | 21 ++++++++++++++++
 tests/test_settings.py                | 36 +++++++++++++++++++++++++--
 7 files changed, 78 insertions(+), 9 deletions(-)
 create mode 100644 tests/test_setting_files/ref.yaml

diff --git a/README.md b/README.md
index 7bb036c..c989026 100644
--- a/README.md
+++ b/README.md
@@ -161,8 +161,7 @@ After writing the functionality of your ComputeBlock (see more below) you can ge
 the corresponding `cbc.yaml` by using the following function:
 
 ```python3
-from scystream.sdk.config.config_loader import generate_config_from_compute_block
-from scystream.sdk.config.compute_block_utils import get_compute_block
+from scystream.sdk.config import generate_config_from_compute_block, get_compute_block
 from pathlib import Path
 
 @entrypoint()
@@ -182,6 +181,17 @@ This will take all the entrypoints, their defined settings, and generate a confi
 
 ### Validating a config
 
+If you want your `cbc.yaml` to be located in a different directory or have a different name, you
+have to configure that accordingly:
+
+```python3
+from scystream.sdk.config import global_config
+
+if __name__ == "__main__":
+    # Set the config_path
+    global_config.set_config_path("custom_dir/custom_name.yaml")
+```
+
 Of course, you can also write the config completely on your own.
 
 > [!NOTE]
@@ -192,7 +202,7 @@ Of course, you can also write the config completely on your own.
 To validate the config, you can also use a helper function like this:
 
 ```python3
-from scystream.sdk.config.config_loader import validate_config_with_code
+from scystream.sdk.config import validate_config_with_code
 
 @entrypoint()
 def example_entrypoint():
diff --git a/scystream/sdk/config/__init__.py b/scystream/sdk/config/__init__.py
index e69de29..94120e7 100644
--- a/scystream/sdk/config/__init__.py
+++ b/scystream/sdk/config/__init__.py
@@ -0,0 +1,6 @@
+from .config_loader import global_config, \
+    validate_config_with_code, load_config
+from .compute_block_utils import get_compute_block
+
+__all__ = ["global_config", "validate_config_with_code",
+           "load_config", "EnvSettings", "get_compute_block"]
diff --git a/scystream/sdk/config/config_loader.py b/scystream/sdk/config/config_loader.py
index d09213e..ccaf120 100644
--- a/scystream/sdk/config/config_loader.py
+++ b/scystream/sdk/config/config_loader.py
@@ -83,7 +83,7 @@ def generate_config_from_compute_block(
     output_path: Path
 ):
     with output_path.open("w") as file:
-        yaml.dump(compute_block.dict(), file, default_flow_style=False)
+        yaml.dump(compute_block.model_dump(), file, default_flow_style=False)
 
 
 def _find_and_load_config():
diff --git a/scystream/sdk/scheduler.py b/scystream/sdk/scheduler.py
index 5549348..10e6bb0 100644
--- a/scystream/sdk/scheduler.py
+++ b/scystream/sdk/scheduler.py
@@ -1,5 +1,5 @@
 from scystream.sdk.config.entrypoints import get_registered_functions
-from scystream.sdk.config.config_loader import validate_config_with_code
+from scystream.sdk.config import validate_config_with_code
 
 
 class Scheduler:
diff --git a/tests/test_config.py b/tests/test_config.py
index e50f3be..fa1b28c 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -1,6 +1,6 @@
 import unittest
-from scystream.sdk.config.config_loader import load_config, \
-    ComputeBlock, global_config
+from scystream.sdk.config import global_config, load_config
+from scystream.sdk.config.models import ComputeBlock
 
 
 class TestComputeBlockValidation(unittest.TestCase):
diff --git a/tests/test_setting_files/ref.yaml b/tests/test_setting_files/ref.yaml
new file mode 100644
index 0000000..b722477
--- /dev/null
+++ b/tests/test_setting_files/ref.yaml
@@ -0,0 +1,21 @@
+author: <tbs>
+description: <tbs>
+docker_image: <tbs>
+entrypoints:
+  example_entrypoint:
+    description: <tbd>
+    envs:
+      LANGUAGE: de
+    inputs:
+      input_one:
+        config:
+          TEST: test
+        description: <to-be-set>
+        type: 'TODO: SetType'
+    outputs:
+      output_one:
+        config:
+          OUT: out
+        description: <to-be-set>
+        type: 'TODO: SetType'
+name: <tbs>
diff --git a/tests/test_settings.py b/tests/test_settings.py
index 640d90f..27d9fef 100644
--- a/tests/test_settings.py
+++ b/tests/test_settings.py
@@ -4,9 +4,12 @@
 from scystream.sdk.env.settings import EnvSettings, InputSettings, \
     OutputSettings
 from scystream.sdk.scheduler import Scheduler
-from scystream.sdk.config.config_loader import global_config
-from scystream.sdk.config.config_loader import validate_config_with_code
+from scystream.sdk.config.config_loader import global_config, \
+    validate_config_with_code, get_compute_block, \
+    generate_config_from_compute_block
 from scystream.sdk.config.entrypoints import TEST_reset_registered_functions
+from pathlib import Path
+import yaml
 
 # Validate Cfgs
 
@@ -44,6 +47,35 @@ class TestSettings(unittest.TestCase):
     def tearDown(self):
         TEST_reset_registered_functions()
 
+    def test_generate_config_from_code(self):
+        generated_config_path = Path(f"{self.TEST_SETTINGS_FILES}/gen.yaml")
+        reference_config_path = Path(f"{self.TEST_SETTINGS_FILES}/ref.yaml")
+
+        @entrypoint(SimpleSettings)
+        def example_entrypoint(settings):
+            print("Running...")
+
+        try:
+            cb = get_compute_block()
+            generate_config_from_compute_block(
+                cb, generated_config_path)
+        except Exception as e:
+            self.fail(f"Exception raised unexpectedly: {e}")
+
+        with generated_config_path.open("r") as gen_file:
+            generated_yaml = yaml.safe_load(gen_file)
+
+        with reference_config_path.open("r") as ref_file:
+            reference_yaml = yaml.safe_load(ref_file)
+
+        # Compare the contents
+        self.assertEqual(
+            generated_yaml, reference_yaml,
+            "Generated YAML does not match the reference YAML"
+        )
+
+        generated_config_path.unlink()
+
     def test_entrypoint_yaml_cfg_different_to_code_cfg(self):
         # Tests if the passed settings to entrypoint config is different
         # to the one in yaml