diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 5f1edc0d0..ac7cd4c67 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -121,7 +121,7 @@ jobs: python-version: ["3.10"] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: fetch-depth: 0 @@ -167,7 +167,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout Code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Login to GitHub Container Registry uses: docker/login-action@v2 @@ -177,8 +177,9 @@ jobs: password: ${{ secrets.GITHUB_TOKEN }} - name: Build images - uses: docker/build-push-action@v3 + uses: docker/build-push-action@v5 with: + context: . push: true # Will only build if this is not here build-args: | LOGPREP_VERSION=dev diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 60cacdcc1..817321118 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -91,7 +91,7 @@ jobs: python-version: ["3.10"] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: fetch-depth: 0 diff --git a/.github/workflows/publish-latest-dev-release-to-pypi.yml b/.github/workflows/publish-latest-dev-release-to-pypi.yml index 6556dc33a..f5f6f9a9e 100644 --- a/.github/workflows/publish-latest-dev-release-to-pypi.yml +++ b/.github/workflows/publish-latest-dev-release-to-pypi.yml @@ -12,7 +12,7 @@ jobs: if: github.event.pull_request.merged == true steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Initialize Python uses: actions/setup-python@v1 @@ -44,7 +44,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout Code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Login to GitHub Container Registry uses: docker/login-action@v2 @@ -54,7 +54,7 @@ jobs: password: ${{ secrets.GITHUB_TOKEN }} - name: Build and Push Docker Image - uses: docker/build-push-action@v3 + uses: docker/build-push-action@v5 with: push: true # Will only build if this is not here build-args: | diff --git a/.github/workflows/publish-release-to-pypi.yml b/.github/workflows/publish-release-to-pypi.yml index 869cbf409..9ea10c8d6 100644 --- a/.github/workflows/publish-release-to-pypi.yml +++ b/.github/workflows/publish-release-to-pypi.yml @@ -9,7 +9,7 @@ jobs: runs-on: ubuntu-latest name: Build Logprep steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Initialize Python uses: actions/setup-python@v1 @@ -58,7 +58,7 @@ jobs: needs: publish-latest-release-to-pypi steps: - name: Checkout Code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Login to GitHub Container Registry uses: docker/login-action@v2 @@ -68,7 +68,7 @@ jobs: password: ${{ secrets.GITHUB_TOKEN }} - name: Build and Push Docker Image - uses: docker/build-push-action@v3 + uses: docker/build-push-action@v5 with: push: true # Will only build if this is not here build-args: | diff --git a/.readthedocs.yaml b/.readthedocs.yaml index f5cd06cc5..2279916e6 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -32,4 +32,5 @@ python: - method: pip path: . extra_requirements: + - dev - doc diff --git a/CHANGELOG.md b/CHANGELOG.md index e280b1c7e..45caac297 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,19 +2,27 @@ ## next release ### Breaking - ### Features +* retrieve oauth token automatically from different oauth endpoints by introducing an additional file to + define the credentials for every configuration source + +#### Improvements +### Bugfix + +## v10.0.4 ### Improvements * refactor logprep build process and requirements management ### Bugfix +* fix `generic_adder` not creating new field from type `list` + ## v10.0.3 ### Bugfix * fix loading of configuration inside the `AutoRuleCorpusTester` for `logprep test integration` -* fix auto rule tester (`test unit`), which was broken after adding support for multiple configuration files and resolving paths in configuration files +* fix auto rule tester (`test unit`), which was broken after adding support for multiple configuration files and resolving paths in configuration files ## v10.0.2 ### Bugfix diff --git a/README.md b/README.md index 6ae353be5..075c1e382 100644 --- a/README.md +++ b/README.md @@ -15,29 +15,26 @@ Logprep allows to collect, process and forward log messages from various data sources. Log messages are being read and written by so-called connectors. -Currently, connectors for Kafka, Opensearch, Opensearch and JSON(L) files exist. -Additionally, an Input Connector for HTTP Input is provided, which starts an uvicorn server and -accepts log message via POST Requests. +Currently, connectors for Kafka, Opensearch, ElasticSearch, S3, HTTP and JSON(L) files exist. -The log messages are processed step-by-step by a pipeline of processors, +The log messages are processed in serial by a pipeline of processors, where each processor modifies an event that is being passed through. The main idea is that each processor performs a simple task that is easy to carry out. -Once the log massage is passed through all processors in the pipeline the resulting +Once the log message is passed through all processors in the pipeline the resulting message is sent to a configured output connector. -Logprep is designed to be expandable with new connectors and processors. - Logprep is primarily designed to process log messages. Generally, Logprep can handle JSON messages, allowing further applications besides log handling. This readme provides basic information about the following topics: - [About Logprep](#about-logprep) -- [Getting Started](#getting-started) -- [Docker Quickstart](#logprep-quickstart-environment) -- [Event Generation](#event-generation) -- [Documentation](#documentation) +- [Getting Started](https://logprep.readthedocs.io/en/latest/getting_started.html) +- [Docker Quickstart](https://logprep.readthedocs.io/en/latest/getting_started.html#logprep-quickstart-environment) +- [Event Generation](https://logprep.readthedocs.io/en/latest/user_manual/execution.html#event-generation) +- [Documentation](https://logprep.readthedocs.io/en/latest) - [Contributing](#contributing) -- [License](#license) +- [License](LICENSE) +- [Changelog](CHANGELOG.md) More detailed information can be found in the [Documentation](https://logprep.readthedocs.io/en/latest/). @@ -235,84 +232,9 @@ Details about the rule language and how to write rules for the processors can be ## Getting Started -### Installation - -Python should be present on the system, currently supported are the versions 3.10 - 3.12. - -To install Logprep you have following options: - -**1. Option:** Installation via PyPI: - -This option is recommended if you just want to use the latest release of logprep. -``` -pip install logprep -``` - -**2. Option:** Installation via Git Repository: - -This option is recommended if you are interested in the latest developments and might want to -contribute to them. -``` -git clone https://github.com/fkie-cad/Logprep.git -cd Logprep -pip install . -``` +For installation instructions see: https://logprep.readthedocs.io/en/latest/getting_started.html#installation +For execution instructions see: https://logprep.readthedocs.io/en/latest/getting_started.html#run-logprep -**3. Option:** Installation via Github Release - -This option is recommended if you just want to try out the latest developments. -``` -pip install git+https://github.com/fkie-cad/Logprep.git@latest -``` - -**4. Option:** Docker build from current commit - -This option can be used to build a container image from a specific commit -``` -docker build -t logprep . -docker run logprep --help -``` - -### Testing - -We use `pytest` as our testing framework. We have unittests located in `./tests/unit` and -acceptance tests located in `./tests/acceptance`. - -Run all tests with: - -``` -pytest ./tests --cov=logprep --cov-report=xml -vvv -``` - -### Running Logprep - -Depending on how you have installed Logprep you have different choices to run Logprep as well. -If you have installed it via PyPI or the Github Development release just run: - -``` -logprep run $CONFIG -``` - -If you have installed Logprep via cloning the repository then you should run it via: - -``` -PYTHONPATH="." python3 logprep/run_logprep.py run $CONFIG -``` - -Where `$CONFIG` is the path or uri to a configuration file (see the documentation about the -[configuration](https://logprep.readthedocs.io/en/latest/user_manual/configuration/index.html)). -The next sections all assume an installation via pip - -### Verifying Configuration - -The following command can be executed to verify the configuration file without having to run Logprep: - -``` -logprep test config $CONFIG -``` - -Where `$CONFIG` is the path or uri to a configuration file (see the documentation about the -[configuration](https://logprep.readthedocs.io/en/latest/user_manual/configuration/index.html)). ### Reload the Configuration @@ -324,210 +246,6 @@ If the configuration does not pass a consistency check, then an error message is Logprep keeps running with the previous configuration. The configuration should be then checked and corrected on the basis of the error message. -## Logprep Quickstart Environment - -To demonstrate the functionality of logprep this repo comes with a complete `kafka`, `lokgprep` and -`opensearch` stack. -To get it running `docker` and `docker-compose` (version >= 1.28) must be first installed. -The docker-compose file is located in the directory `quickstart`. -A prerequisite is to run `sysctl -w vm.max_map_count=262144`, otherwise Opensearch might not -properly start. - -The environment can either be started with a Logprep container or without one: - -### Run without Logprep Container (default) - - 1. Run from within the `quickstart` directory: - ```bash - docker-compose up -d - ``` - It starts and connects `Kafka`, `logprep`, `Opensearch` and `Opensearch Dashboards`. - 2. Run Logprep against loaded environment from main `Logprep` directory: - ```bash - logprep run quickstart/exampledata/config/pipeline.yml - ``` - -### Run with Logprep Container - - * Run from within the `quickstart` directory: - ```bash - docker-compose --profile logprep up -d - ``` - -### Run with getting config from http server with basic authentication - - * Run from within the `quickstart` directory: - ```bash - docker-compose --profile basic_auth up -d - ``` - * Run within the project root directory: - ```bash - export LOGPREP_CONFIG_AUTH_USERNAME="user" - export LOGPREP_CONFIG_AUTH_PASSWORD="password" - logprep http://localhost:8081/config/pipeline.yml - ``` - -### Run with getting config from FDA with oauth2 authentication - -Start logprep by using the oauth2 profile with docker-compose: - - ```bash - docker-compose --profile oauth2 up -d - ``` - - -In order to run logprep with the FDA configuration it is necessary to set the following environment -variables: - -- `LOGPREP_CONFIG_AUTH_METHOD=oauth` -- `LOGPREP_CONFIG_AUTH_TOKEN=` (can be intercepted from the FDA<->Keycloak http requests) -- `LOGPREP_INPUT=` -- `LOGPREP_OUTPUT=` -- `LOGPREP_OPTIONS=` - -Once they are set logprep can be started with: - -```bash -logprep run "http://localhost:8000/api/v1/pipelines?stage=prod&logclass=ExampleClass" -``` - -### Interacting with the Quickstart Environment - -The start up takes a few seconds to complete, but once everything is up -and running it is possible to write JSON events into Kafka and read the processed events in -Opensearch Dashboards. Following services are available after start up: - -| Service | Location | User | Password | -|:-----------------------|:------------------|:---------|:---------| -| Kafka: | `localhost:9092` | / | / | -| Kafka Exporter: | `localhost:9308` | / | / | -| Logprep metrics: | `localhost:8001` | / | / | -| Opensearch: | `localhost:9200` | / | / | -| Opensearch Dashboards: | `localhost:5601` | / | / | -| Grafana Dashboards: | `localhost:3000` | admin | admin | -| Prometheus: | `localhost:9090` | / | / | -| Nginx: | `localhost:8081` | user | password | -| Keycloak: | `localhost:8080` | admin | admin | -| Keycloak Postgres: | `localhost:5432` | keycloak | bitnami | -| FDA: | `localhost:8002` | logprep | logprep | -| FDA Postgres: | `localhost:25432` | fda | fda | - -The example rules that are used in the docker instance of Logprep can be found -in `quickstart/exampledata/rules`. -Example events that trigger for the example rules can be found in -`quickstart/exampledata/input_logdata/logclass/test_input.jsonl`. -These events can be added to Kafka with the following command: - -```bash -(docker exec -i kafka kafka-console-producer.sh --bootstrap-server 127.0.0.1:9092 --topic consumer) < exampledata/input_logdata/logclass/test_input.jsonl -``` - -Once the events have been processed for the first time, the new indices *processed*, *sre* -and *pseudonyms* should be available in Opensearch Dashboards. - -The environment can be stopped via `docker-compose down`. - -## Event Generation - -Logprep has the additional functionality of generating events and sending them to two -different targets. -It can send events to kafka, while also loading events from kafka or reading them from file, -and it can send events to a http endpoint as POST requests. - -Following sections describe the usage of these event generators. - -### Kafka -The kafka load-tester can send a configurable amount of documents to Kafka. -The documents that are being send can be obtained either from Kafka or from a file with JSON lines. - -It can be configured how many documents should be retrieved from Kafka (if Kafka is used as source) -and how many documents will be sent. -Documents obtained from Kafka won't be written down to disk. - -The documents will be sent repeatedly until the desired amount has been sent. -The `tags` field and the `_index` field of each document will be set to `load-tester`. -Furthermore, a field `load-tester-unique` with a unique value will be added to each document every -time a document is sent. -This is done to prevent that repeatedly sent documents are identical. - -To find out more about the usage of the kafka load-tester execute: - -```bash -logprep generate kafka --help -``` - -### Configuration -The kafka load-tester is configured via a YAML file. -It must have the following format: - -```yaml -logging_level: LOG_LEVEL # Default: "INFO" -source_count: INTERGER # Number of documents to obtain form Kafka -count: INTERGER # Number of documents to send -process_count: INTERGER # Number of processes (default: 1) -profile: BOOL # Shows profiling data (default: false) -target_send_per_sec: INTERGER # Desired number of documents to send per second with each process. Setting it to 0 sends as much as possible (default: 0). - -kafka: - bootstrap_servers: # List of bootstrap servers - - URL:PORT # i.e. "127.0.0.1:9092" - consumer: # Kafka consumer - topic: STRING # Topic to obtain documents from - group_id: STRING # Should be different from the group_id of the Logprep Consumer, otherwise the offset in Logprep will be changed! - timeout: FLOAT # Timeout for retrieving documents (default: 1.0) - producer: # Kafka producer - acks: STRING/INTERGER # Determines if sending should be acknowledged (default: 0) - compression_type: STRING # Compression type (default: "none") - topic: STRING # Topic to send documents to - queue_buffering_max_messages: INTEGER # Batch for sending documents (default: 10000) - linger_ms: INTEGER # Time to wait before a batch is sent if the max wasn't reached before (default: 5000) - flush_timeout: FLOAT # Timeout to flush the producer (default 30.0) - ssl: # SSL authentication (Optional) - ca_location: STRING - certificate_location: STRING - key: - location: STRING - password: STRING # Optional -``` -Unused parameters must be removed or commented. - -### Http - -The http endpoint allows for generating events based on templated sample files which are stored -inside a dataset directory. - -The dataset directory with the sample files has to have the following format: - -``` - | - Test-Logs-Directory - | | - Test-Logs-Class-1-Directory - | | | - config.yaml - | | | - Test-Logs-1.jsonl - | | | - Test-Logs-2.jsonl - | | - Test-Logs-Class-2-Directory - | | | - config.yaml - | | | - Test-Logs-A.jsonl - | | | - Test-Logs-B.jsonl -``` - -While the jsonl event files can have arbitrary names, the `config.yaml` needs to be called exactly -that. It also needs to follow the following schema: - -```yaml -target_path: /endpoint/logsource/path -timestamps: - - key: TIMESTAMP_FIELD_1 - format: "%Y%m%d" - - key: TIMESTAMP_FIELD_1 - format: "%H%M%S" - time_shift: "+0200" # Optional, sets time shift in hours and minutes, if needed ([+-]HHMM) -``` - -To find out more about the usage of the http event generator execute: - -```bash -logprep generate http --help -``` ## Documentation @@ -548,7 +266,3 @@ A HTML documentation can be then found in `doc/_build/html/index.html`. Every contribution is highly appreciated. If you have ideas or improvements feel free to create a fork and open a pull requests. Issues and engagement in open discussions are also welcome. - -## License - -Logprep is distributed under the LGPL-2.1 License. See LICENSE file for more information. diff --git a/doc/source/_images/Credentials.svg b/doc/source/_images/Credentials.svg new file mode 100644 index 000000000..af60dfd49 --- /dev/null +++ b/doc/source/_images/Credentials.svg @@ -0,0 +1 @@ +
use GetterFactory to create getter for given target
use GetterFactory to...
load file from path given in env variable
LOGPREP_CREDENTIALS_FILE
load file from path given in en...
expected credentials object/ authentication method is matched to given credentials in file
expected credentials object/ a...
if method is an oauth authentication method: post request is sent to token endpoint containing a payload depending on the auth method that has been chosen 
if method is an oauth authenticatio...
token is retrieved and authorization header is set
token is retrieved a...
getter target is requested
getter target is re...
if the authentication was successful the getter target is retrieved
if the authenticatio...
Text is not SVG - cannot display
\ No newline at end of file diff --git a/doc/source/development/architecture/diagramms/Credentials.drawio b/doc/source/development/architecture/diagramms/Credentials.drawio new file mode 100644 index 000000000..2db4d1580 --- /dev/null +++ b/doc/source/development/architecture/diagramms/Credentials.drawio @@ -0,0 +1,61 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/source/development/coding_examples.rst b/doc/source/development/coding_examples.rst index fb1c6b787..d3d3377d4 100644 --- a/doc/source/development/coding_examples.rst +++ b/doc/source/development/coding_examples.rst @@ -8,6 +8,7 @@ Processor Case Examples notebooks/processor_examples/calculator.ipynb notebooks/processor_examples/dissector.ipynb notebooks/processor_examples/field_manager.ipynb + notebooks/processor_examples/generic_adder.ipynb notebooks/processor_examples/grokker.ipynb notebooks/processor_examples/geo_ip_enricher_custom_outputfields.ipynb notebooks/processor_examples/ip_informer.ipynb diff --git a/doc/source/development/index.rst b/doc/source/development/index.rst index f26d17cf9..d8a8166e4 100644 --- a/doc/source/development/index.rst +++ b/doc/source/development/index.rst @@ -11,3 +11,4 @@ Development register_a_new_component testing coding_examples + programaticly_start_logprep \ No newline at end of file diff --git a/doc/source/development/notebooks/processor_examples/generic_adder.ipynb b/doc/source/development/notebooks/processor_examples/generic_adder.ipynb new file mode 100644 index 000000000..16a826a13 --- /dev/null +++ b/doc/source/development/notebooks/processor_examples/generic_adder.ipynb @@ -0,0 +1,199 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Generic Adder\n", + "\n", + "This presentations goal it to introduce the features of the `Generic Adder` and how to configure it." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### The challenge\n", + "\n", + "I want add fields or values depending on a matching filter." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "from this:" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [], + "source": [ + "document = {\n", + " 'message': {\n", + " \"time_in_ms\": \"bla\",\n", + " \"tags\": [\"hello\"]\n", + " }\n", + " }" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "to this:" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [], + "source": [ + "expected = {\n", + " 'message': {\n", + " \"time_in_ms\": \"bla\",\n", + " \"tags\": [\"hello\", \"new\"]\n", + " }\n", + " }" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create rule and processor" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "create the rule:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "create the processor config:" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [], + "source": [ + "processor_config = {\n", + " \"almighty generic adder\":{ \n", + " \"type\": \"generic_adder\",\n", + " \"specific_rules\": [{\"filter\": \"*\", \"generic_adder\": {\"extend_target_list\": True, \"add\": {\"message.tags\": \"New\"}} }],\n", + " \"generic_rules\": [],\n", + " }\n", + " }" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "create the processor with the factory:" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "generic_adder" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from unittest import mock\n", + "from logprep.factory import Factory\n", + "\n", + "mock_logger = mock.MagicMock()\n", + "calculator = Factory.create(processor_config, mock_logger)\n", + "calculator" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Process event" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "before: {'message': {'time_in_ms': 'bla', 'tags': ['hello']}}\n", + "after: {'message': {'time_in_ms': 'bla', 'tags': ['hello', 'New']}}\n", + "False\n" + ] + } + ], + "source": [ + "from copy import deepcopy\n", + "mydocument = deepcopy(document)\n", + "\n", + "\n", + "print(f\"before: {mydocument}\")\n", + "calculator.process(mydocument)\n", + "print(f\"after: {mydocument}\")\n", + "print(mydocument == expected)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.11.0 ('.venv': venv)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.7" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "586280540a85d3e21edc698fe7b86af2848b9b02644e6c22463da25c40a3f1be" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/doc/source/development/programaticly_start_logprep.rst b/doc/source/development/programaticly_start_logprep.rst new file mode 100644 index 000000000..646490110 --- /dev/null +++ b/doc/source/development/programaticly_start_logprep.rst @@ -0,0 +1,91 @@ +Start Logprep programaticly +=========================== + +It is possible to make use of the Logprep :ref:`pipeline_config` in plain python, without any +input or output connectors or further configurations. +If on the other hand you want to make use of the input connector preprocessors you have to at least +use an input connector like the DummyInput. +The integration in python + +An example with input connector and preprocessors could look like this: + +.. code-block:: python + + from logprep.framework.pipeline import Pipeline + + event = { + "some": "data", + "test_pre_detector": "bad_information" + } + config = { + "pipeline": [ + { + "predetector": { + "type": "pre_detector", + "specific_rules": [ + "quickstart/exampledata/rules/pre_detector/specific" + ], + "generic_rules": [ + "quickstart/exampledata/rules/pre_detector/generic" + ], + "pre_detector_topic": "output_topic" + } + } + ], + "input": { + "my_input":{ + "type": "dummy_input", + "documents": [event], + "preprocessing": { + "log_arrival_time_target_field": "arrival_time" + } + } + } + } + pipeline = Pipeline(config=config) + extra_outputs = pipeline.process_pipeline() + +An example without input connector and preprocessors could look like this: + +.. code-block:: python + + from logprep.framework.pipeline import Pipeline + + event = { + "some": "data", + "test_pre_detector": "bad_information" + } + config = { + "pipeline": [ + { + "predetector": { + "type": "pre_detector", + "specific_rules": [ + "quickstart/exampledata/rules/pre_detector/specific" + ], + "generic_rules": [ + "quickstart/exampledata/rules/pre_detector/generic" + ], + "pre_detector_topic": "output_topic" + } + } + ], + } + pipeline = Pipeline(config=config) + extra_outputs = pipeline.process_event(event) + + +.. hint:: + + To make use of preprocessors call :code:`pipeline.process_pipeline()`. + Calling the respective method multiple times will result in iterating through the list of input + events. + To call the pipeline without input connector call :code:`pipeline.process_event(event)`. + + +.. warning:: + + When using the pipeline like this Logprep does not store any events or errors in an + designated output. + All relevant information are returned to the user and have to be taken care of the user + themself. diff --git a/doc/source/development/testing.rst b/doc/source/development/testing.rst index e463063ea..8aff3c987 100644 --- a/doc/source/development/testing.rst +++ b/doc/source/development/testing.rst @@ -11,13 +11,13 @@ Run acceptance tests with: .. code-block:: bash - pytest tests/acceptance + pytest tests/acceptance --cov=logprep --cov-report=xml -vvv or unittests with: .. code-block:: bash - pytest tests/unit + pytest tests/unit --cov=logprep --cov-report=xml -vvv Log Messages ============ diff --git a/doc/source/getting_started.rst b/doc/source/getting_started.rst index aac0c474d..0f12a9852 100644 --- a/doc/source/getting_started.rst +++ b/doc/source/getting_started.rst @@ -56,7 +56,6 @@ To see if the installation was successful run :code:`docker run logprep --versio Run Logprep =========== -Depending on how you have installed Logprep you have different choices to run Logprep as well. If you have installed it via PyPI or the Github Development release just run: .. code-block:: bash @@ -64,96 +63,121 @@ If you have installed it via PyPI or the Github Development release just run: logprep run $CONFIG Where :code:`$CONFIG` is the path to a configuration file. -For more information see the :ref:`configuration` section. - -Integrate Logprep in Python -=========================== - -It is possible to make use of the Logprep :ref:`pipeline_config` in plain python, without any -input or output connectors or further configurations. -If on the other hand you want to make use of the input connector preprocessors you have to at least -use an input connector like the DummyInput. -The integration in python - -An example with input connector and preprocessors could look like this: - -.. code-block:: python - - from logprep.framework.pipeline import Pipeline - - event = { - "some": "data", - "test_pre_detector": "bad_information" - } - config = { - "pipeline": [ - { - "predetector": { - "type": "pre_detector", - "specific_rules": [ - "quickstart/exampledata/rules/pre_detector/specific" - ], - "generic_rules": [ - "quickstart/exampledata/rules/pre_detector/generic" - ], - "pre_detector_topic": "output_topic" - } - } - ], - "input": { - "my_input":{ - "type": "dummy_input", - "documents": [event], - "preprocessing": { - "log_arrival_time_target_field": "arrival_time" - } - } - } - } - pipeline = Pipeline(config=config) - extra_outputs = pipeline.process_pipeline() - -An example without input connector and preprocessors could look like this: - -.. code-block:: python - - from logprep.framework.pipeline import Pipeline - - event = { - "some": "data", - "test_pre_detector": "bad_information" - } - config = { - "pipeline": [ - { - "predetector": { - "type": "pre_detector", - "specific_rules": [ - "quickstart/exampledata/rules/pre_detector/specific" - ], - "generic_rules": [ - "quickstart/exampledata/rules/pre_detector/generic" - ], - "pre_detector_topic": "output_topic" - } - } - ], - } - pipeline = Pipeline(config=config) - extra_outputs = pipeline.process_event(event) - - -.. hint:: - - To make use of preprocessors call :code:`pipeline.process_pipeline()`. - Calling the respective method multiple times will result in iterating through the list of input - events. - To call the pipeline without input connector call :code:`pipeline.process_event(event)`. - - -.. warning:: - - When using the pipeline like this Logprep does not store any events or errors in an - designated output. - All relevant information are returned to the user and have to be taken care of the user - themself. +For more information on running logprep with different configruation files or running +logprep with configruation from an api see the :ref:`configuration` section. + + + +Logprep Quickstart Environment +============================== + +To demonstrate the functionality of logprep this repo comes with a complete `kafka`, `logprep` and +`opensearch` stack. +To get it running `docker` with compose support must be first installed. +The docker compose file is located in the directory `quickstart`. +A prerequisite is to run `sysctl -w vm.max_map_count=262144`, otherwise Opensearch might not +properly start. + +The environment can either be started with a Logprep container or without one: + +Run without Logprep Container (default) +--------------------------------------- + + 1. Run from within the `quickstart` directory: + + .. code-block:: bash + + docker compose up -d + + It starts and connects `Kafka`, `logprep`, `Opensearch` and `Opensearch Dashboards`. + 2. Run Logprep against loaded environment from main `Logprep` directory: + + .. code-block:: bash + + logprep run quickstart/exampledata/config/pipeline.yml + + +Run with Logprep Container +-------------------------- + + * Run from within the `quickstart` directory: + + .. code-block:: bash + + docker compose --profile logprep up -d + + +Run with getting config from http server with basic authentication +------------------------------------------------------------------ + + * Run from within the `quickstart` directory: + + .. code-block:: bash + + docker compose --profile basic_auth up -d + + * Run within the project root directory: + + .. code-block:: bash + + export LOGPREP_CREDENTIALS_FILE="quickstart/exampledata/config/credentials.yml" + logprep run http://localhost:8081/config/pipeline.yml + + +Run with getting config from FDA with oauth2 authentication +----------------------------------------------------------- + +Start logprep by using the oauth2 profile with docker compose: + + .. code-block:: bash + + export LOGPREP_CREDENTIALS_FILE="quickstart/exampledata/config/credentials.yml" + docker compose --profile oauth2 up -d + + +Once they are set logprep can be started from the project root directory with: + +.. code-block:: bash + + logprep run "http://localhost:8002/api/v1/pipelines?stage=prod&logclass=ExampleClass" + + +Interacting with the Quickstart Environment +------------------------------------------- + +The start up takes a few seconds to complete, but once everything is up +and running it is possible to write JSON events into Kafka and read the processed events in +Opensearch Dashboards. Following services are available after start up: + +====================== ================= ======== ======== +Service Location User Password +====================== ================= ======== ======== +Kafka: `localhost:9092` / / +Kafka Exporter: `localhost:9308` / / +Logprep metrics: `localhost:8001` / / +Opensearch: `localhost:9200` / / +Opensearch Dashboards: `localhost:5601` / / +Grafana Dashboards: `localhost:3000` admin admin +Prometheus: `localhost:9090` / / +Nginx: `localhost:8081` user password +Keycloak: `localhost:8080` admin admin +Keycloak Postgres: `localhost:5432` keycloak bitnami +FDA: `localhost:8002` logprep logprep +FDA Postgres: `localhost:25432` fda fda +====================== ================= ======== ======== + +The example rules that are used in the docker instance of Logprep can be found +in `quickstart/exampledata/rules`. +Example events that trigger for the example rules can be found in +`quickstart/exampledata/input_logdata/logclass/test_input.jsonl`. +These events can be added to Kafka with the following command: + +.. code-block:: bash + + (docker exec -i kafka kafka-console-producer.sh --bootstrap-server 127.0.0.1:9092 --topic consumer) < exampledata/input_logdata/logclass/test_input.jsonl + + +Once the events have been processed for the first time, the new indices *processed*, *sre* +and *pseudonyms* should be available in Opensearch Dashboards. + +The environment can be stopped via :code:`docker compose down`. \ No newline at end of file diff --git a/doc/source/user_manual/configuration/getter.rst b/doc/source/user_manual/configuration/getter.rst index 7feb20b88..dfaa4a07b 100644 --- a/doc/source/user_manual/configuration/getter.rst +++ b/doc/source/user_manual/configuration/getter.rst @@ -3,8 +3,5 @@ Getters ======= -.. automodule:: logprep.util.getter - :members: - :no-undoc-members: - - +.. autoclass:: logprep.util.getter.FileGetter +.. autoclass:: logprep.util.getter.HttpGetter diff --git a/doc/source/user_manual/execution.rst b/doc/source/user_manual/execution.rst index 43633a740..7f6edf8ce 100644 --- a/doc/source/user_manual/execution.rst +++ b/doc/source/user_manual/execution.rst @@ -14,3 +14,117 @@ To get help on the different parameters use: .. code-block:: bash logprep --help + + +Event Generation +---------------- + +Logprep has the additional functionality of generating events and sending them to two +different targets. +It can send events to kafka, while also loading events from kafka or reading them from file, +and it can send events to a http endpoint as POST requests. + +Following sections describe the usage of these event generators. + +Kafka +^^^^^ + +The kafka load-tester can send a configurable amount of documents to Kafka. +The documents that are being send can be obtained either from Kafka or from a file with JSON lines. + +It can be configured how many documents should be retrieved from Kafka (if Kafka is used as source) +and how many documents will be sent. +Documents obtained from Kafka won't be written down to disk. + +The documents will be sent repeatedly until the desired amount has been sent. +The `tags` field and the `_index` field of each document will be set to `load-tester`. +Furthermore, a field `load-tester-unique` with a unique value will be added to each document every +time a document is sent. +This is done to prevent that repeatedly sent documents are identical. + +To find out more about the usage of the kafka load-tester execute: + +.. code-block:: bash + + logprep generate kafka --help + + +Configuration +""""""""""""" + +The kafka load-tester is configured via a YAML file. +It must have the following format: + +.. code-block:: yaml + :caption: Example configuration file for the kafka load-tester + + logging_level: LOG_LEVEL # Default: "INFO" + source_count: INTERGER # Number of documents to obtain form Kafka + count: INTERGER # Number of documents to send + process_count: INTERGER # Number of processes (default: 1) + profile: BOOL # Shows profiling data (default: false) + target_send_per_sec: INTERGER # Desired number of documents to send per second with each process. Setting it to 0 sends as much as possible (default: 0). + + kafka: + bootstrap_servers: # List of bootstrap servers + - URL:PORT # i.e. "127.0.0.1:9092" + consumer: # Kafka consumer + topic: STRING # Topic to obtain documents from + group_id: STRING # Should be different from the group_id of the Logprep Consumer, otherwise the offset in Logprep will be changed! + timeout: FLOAT # Timeout for retrieving documents (default: 1.0) + producer: # Kafka producer + acks: STRING/INTERGER # Determines if sending should be acknowledged (default: 0) + compression_type: STRING # Compression type (default: "none") + topic: STRING # Topic to send documents to + queue_buffering_max_messages: INTEGER # Batch for sending documents (default: 10000) + linger_ms: INTEGER # Time to wait before a batch is sent if the max wasn't reached before (default: 5000) + flush_timeout: FLOAT # Timeout to flush the producer (default 30.0) + ssl: # SSL authentication (Optional) + ca_location: STRING + certificate_location: STRING + key: + location: STRING + password: STRING # Optional + +Unused parameters must be removed or commented. + +Http +^^^^ + +The http endpoint allows for generating events based on templated sample files which are stored +inside a dataset directory. + +The dataset directory with the sample files has to have the following format: + +.. code-block:: bash + + | - Test-Logs-Directory + | | - Test-Logs-Class-1-Directory + | | | - config.yaml + | | | - Test-Logs-1.jsonl + | | | - Test-Logs-2.jsonl + | | - Test-Logs-Class-2-Directory + | | | - config.yaml + | | | - Test-Logs-A.jsonl + | | | - Test-Logs-B.jsonl + +While the jsonl event files can have arbitrary names, the `config.yaml` needs to be called exactly +that. It also needs to follow the following schema: + +.. code-block:: yaml + :caption: Example configuration file for the http event generator + + target_path: /endpoint/logsource/path + timestamps: + - key: TIMESTAMP_FIELD_1 + format: "%Y%m%d" + - key: TIMESTAMP_FIELD_1 + format: "%H%M%S" + time_shift: "+0200" # Optional, sets time shift in hours and minutes, if needed ([+-]HHMM) + +To find out more about the usage of the http event generator execute: + +.. code-block:: bash + + logprep generate http --help + diff --git a/doc/source/user_manual/verification.rst b/doc/source/user_manual/verification.rst index 3c6c739d9..d92bb5a43 100644 --- a/doc/source/user_manual/verification.rst +++ b/doc/source/user_manual/verification.rst @@ -1,13 +1,13 @@ Verifying the Configuration =========================== -Verification is automatically performed on starting Logprep. +Verification of the given configruation is automatically performed on starting Logprep. The following command can be used to verify the configuration without running Logprep: .. code-block:: bash :caption: Directly with Python - PYTHONPATH="." python3 logprep/run_logprep.py test config $CONFIG + logprep test config $CONFIG .. code-block:: bash :caption: With PEX file diff --git a/logprep/connector/confluent_kafka/input.py b/logprep/connector/confluent_kafka/input.py index 9565c6a8f..df31044b8 100644 --- a/logprep/connector/confluent_kafka/input.py +++ b/logprep/connector/confluent_kafka/input.py @@ -383,7 +383,7 @@ def _get_raw_event(self, timeout: float) -> bytearray: kafka_error = message.error() if kafka_error: raise CriticalInputError( - self, "A confluent-kafka record contains an error code", kafka_error + self, "A confluent-kafka record contains an error code", str(kafka_error) ) self._last_valid_records[message.partition()] = message labels = {"description": f"topic: {self._config.topic} - partition: {message.partition()}"} diff --git a/logprep/processor/generic_adder/rule.py b/logprep/processor/generic_adder/rule.py index e29064352..f49b7e77a 100644 --- a/logprep/processor/generic_adder/rule.py +++ b/logprep/processor/generic_adder/rule.py @@ -98,7 +98,7 @@ from attrs import define, field, validators -from logprep.processor.base.rule import Rule, InvalidRuleDefinitionError +from logprep.processor.base.rule import InvalidRuleDefinitionError, Rule from logprep.processor.field_manager.rule import FieldManagerRule from logprep.util.getter import GetterFactory @@ -119,7 +119,7 @@ class Config(Rule.Config): add: dict = field( validator=validators.deep_mapping( key_validator=validators.instance_of(str), - value_validator=validators.instance_of((str, bool)), + value_validator=validators.instance_of((str, bool, list)), ), default={}, ) diff --git a/logprep/util/auto_rule_tester/auto_rule_tester.py b/logprep/util/auto_rule_tester/auto_rule_tester.py index 563f579df..a5704b062 100644 --- a/logprep/util/auto_rule_tester/auto_rule_tester.py +++ b/logprep/util/auto_rule_tester/auto_rule_tester.py @@ -37,7 +37,7 @@ .. code-block:: bash :caption: Directly with Python - PYTHONPATH="." python3 logprep/run_logprep.py test unit $CONFIG + logprep test unit $CONFIG .. code-block:: bash :caption: With PEX file diff --git a/logprep/util/configuration.py b/logprep/util/configuration.py index c5507a361..87aa21bff 100644 --- a/logprep/util/configuration.py +++ b/logprep/util/configuration.py @@ -21,6 +21,109 @@ logprep run http://api/v1/pipeline http://api/v1/addition_processor_pipline /path/to/conector.yaml +Configuration File Structure +---------------------------- + +.. code-block:: yaml + :caption: full configuration file example + + version: config-1.0 + process_count: 2 + timeout: 5 + logger: + level: INFO + input: + kafka: + type: confluentkafka_input + topic: consumer + offset_reset_policy: smallest + kafka_config: + bootstrap.servers: localhost:9092 + group.id: test + output: + kafka: + type: confluentkafka_output + topic: producer + error_topic: producer_error + flush_timeout: 30 + send_timeout: 2 + kafka_config: + bootstrap.servers: localhost:9092 + pipeline: + - labelername: + type: labeler + schema: quickstart/exampledata/rules/labeler/schema.json + include_parent_labels: true + specific_rules: + - quickstart/exampledata/rules/labeler/specific + generic_rules: + - quickstart/exampledata/rules/labeler/generic + + - dissectorname: + type: dissector + specific_rules: + - quickstart/exampledata/rules/dissector/specific/ + generic_rules: + - quickstart/exampledata/rules/dissector/generic/ + + - dropper: + type: dropper + specific_rules: + - quickstart/exampledata/rules/dropper/specific + generic_rules: + - quickstart/exampledata/rules/dropper/generic + - filter: "test_dropper" + dropper: + drop: + - drop_me + description: "..." + + - pre_detector: + type: pre_detector + specific_rules: + - quickstart/exampledata/rules/pre_detector/specific + generic_rules: + - quickstart/exampledata/rules/pre_detector/generic + outputs: + - opensearch: sre + tree_config: quickstart/exampledata/rules/pre_detector/tree_config.json + alert_ip_list_path: quickstart/exampledata/rules/pre_detector/alert_ips.yml + + - amides: + type: amides + specific_rules: + - quickstart/exampledata/rules/amides/specific + generic_rules: + - quickstart/exampledata/rules/amides/generic + models_path: quickstart/exampledata/models/model.zip + num_rule_attributions: 10 + max_cache_entries: 1000000 + decision_threshold: 0.32 + + - pseudonymizer: + type: pseudonymizer + pubkey_analyst: quickstart/exampledata/rules/pseudonymizer/example_analyst_pub.pem + pubkey_depseudo: quickstart/exampledata/rules/pseudonymizer/example_depseudo_pub.pem + regex_mapping: quickstart/exampledata/rules/pseudonymizer/regex_mapping.yml + hash_salt: a_secret_tasty_ingredient + outputs: + - opensearch: pseudonyms + specific_rules: + - quickstart/exampledata/rules/pseudonymizer/specific/ + generic_rules: + - quickstart/exampledata/rules/pseudonymizer/generic/ + max_cached_pseudonyms: 1000000 + + - calculator: + type: calculator + specific_rules: + - filter: "test_label: execute" + calculator: + target_field: "calculation" + calc: "1 + 1" + generic_rules: [] + + The options under :code:`input`, :code:`output` and :code:`pipeline` are passed to factories in Logprep. They contain settings for each separate processor and connector. @@ -39,7 +142,7 @@ The following config file will be valid by setting the given environment variables: .. code-block:: yaml - :caption: pipeline.yml config file + :caption: pipeline.yml config file with environment variables version: $LOGPREP_VERSION process_count: $LOGPREP_PROCESS_COUNT @@ -108,6 +211,7 @@ from logprep.factory_error import FactoryError, InvalidConfigurationError from logprep.processor.base.exceptions import InvalidRuleDefinitionError from logprep.util import getter +from logprep.util.credentials import CredentialsEnvNotFoundError from logprep.util.defaults import DEFAULT_CONFIG_LOCATION from logprep.util.getter import GetterFactory, GetterNotFoundError from logprep.util.json_handling import list_json_files_in_directory @@ -328,7 +432,7 @@ def from_sources(cls, config_paths: Iterable[str] = None) -> "Configuration": try: config = Configuration.from_source(config_path) configs.append(config) - except (GetterNotFoundError, RequestException) as error: + except (GetterNotFoundError, RequestException, CredentialsEnvNotFoundError) as error: raise ConfigGetterException(f"{config_path} {error}") from error except FileNotFoundError as error: raise ConfigGetterException( diff --git a/logprep/util/credentials.py b/logprep/util/credentials.py new file mode 100644 index 000000000..597b7f357 --- /dev/null +++ b/logprep/util/credentials.py @@ -0,0 +1,576 @@ +""" +Authentication for HTTP Getters +------------------------------- + +In order for Logprep to choose the correct authentication method the +:code:`LOGPREP_CREDENTIALS_FILE` environment variable has to be set. +This file should provide the credentials that are needed and can either be +in yaml or in json format. +To use the authentication, the given credentials file has to be +filled with the correct values that correspond to the method you want to use. + +.. code-block:: yaml + :caption: Example for credentials file + + "http://target.url": + # example for token given directly via file + token_file: # won't be refreshed if expired + "http://target.url": + # example for token given directly inline + token: # won't be refreshed if expired + "http://target.url": + # example for OAuth2 Client Credentials Grant + endpoint: + client_id: + client_secret_file: + "http://target.url": + # example for OAuth2 Client Credentials Grant with inline secret + endpoint: + client_id: + client_secret: + "http://target.url": + # example for OAuth2 Resource Owner Password Credentials Grant with authentication for a confidential client + endpoint: + username: + password_file: + client_id: # optional if required + client_secret_file: # optional if required + "http://target.url": + # example for OAuth2 Resource Owner Password Credentials Grant for a public unconfidential client + endpoint: + username: + password_file: + "http://target.url": + # example for OAuth2 Resource Owner Password Credentials Grant for a public unconfidential client with inline password + endpoint: + username: + password: + "http://target.url": + # example for Basic Authentication + username: + password_file: + "http://target.url": + # example for Basic Authentication with inline password + username: + password: # will be overwritten if 'password_file' is given + +Options for the credentials file are: +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. autoclass:: logprep.util.credentials.BasicAuthCredentials + :members: username, password + :no-index: +.. autoclass:: logprep.util.credentials.OAuth2ClientFlowCredentials + :members: endpoint, client_id, client_secret + :no-index: +.. autoclass:: logprep.util.credentials.OAuth2PasswordFlowCredentials + :members: endpoint, client_id, client_secret, username, password + :no-index: + +Authentication Process: +^^^^^^^^^^^^^^^^^^^^^^^ +.. figure:: ../../_images/Credentials.svg + :align: left + +""" + +import json +import logging +import os +from base64 import b64encode +from datetime import datetime, timedelta +from pathlib import Path +from urllib.parse import urlparse + +import requests +from attrs import define, field, validators +from requests import HTTPError, Session +from requests.adapters import HTTPAdapter +from ruamel.yaml import YAML +from ruamel.yaml.error import YAMLError +from urllib3 import Retry + +from logprep.factory_error import InvalidConfigurationError + +yaml = YAML(typ="safe", pure=True) + + +class CredentialsBadRequestError(Exception): + """Raised when the API returns a 400 Bad Request error""" + + +class CredentialsEnvNotFoundError(Exception): + """Raised when the API returns a 401 Not Found""" + + +class CredentialsFactory: + """Factory class to create credentials for a given target URL.""" + + _logger = logging.getLogger(__name__) + + @classmethod + def from_target(cls, target_url: str) -> "Credentials": + """Factory method to create a credentials object based on the credentials stored in the + environment variable :code:`LOGPREP_CREDENTIALS_FILE`. + Based on these credentials the expected authentication method is chosen and represented + by the corresponding credentials object. + + Parameters + ---------- + target_url : str + target against which to authenticate with the given credentials + + Returns + ------- + credentials: Credentials + Credentials object representing the correct authorization method + + """ + credentials_file_path = os.environ.get("LOGPREP_CREDENTIALS_FILE") + if credentials_file_path is None: + return None + raw_content: dict = cls._get_content(Path(credentials_file_path)) + domain = urlparse(target_url).netloc + scheme = urlparse(target_url).scheme + credential_mapping = raw_content.get(f"{scheme}://{domain}") + credentials = cls.from_dict(credential_mapping) + return credentials + + @staticmethod + def _get_content(file_path: Path) -> dict: + """gets content from credentials file + file can be either json or yaml + + Parameters + ---------- + file_path : Path + path to credentials file given in :code:`LOGPREP_CREDENTIALS_FILE` + + Returns + ------- + file_content: dict + content from file + + Raises + ------ + InvalidConfigurationError + raises when credentials have wrong type or when credentials file + is invalid + """ + try: + file_content = file_path.read_text(encoding="utf-8") + try: + return json.loads(file_content) + except (json.JSONDecodeError, ValueError): + return yaml.load(file_content) + except (TypeError, YAMLError) as error: + raise InvalidConfigurationError( + f"Invalid credentials file: {file_path} {error.args[0]}" + ) from error + except FileNotFoundError as error: + raise InvalidConfigurationError( + f"Environment variable has wrong credentials file path: {file_path}" + ) from error + + @staticmethod + def _resolve_secret_content(credential_mapping: dict): + """gets content from given secret_file in credentials file and updates + credentials_mapping with this content. + + This file should only contain the content of the given secret e.g. the client secret. + + Parameters + ---------- + credentials_mapping : dict + content from given credentials mapping + """ + secret_content = { + credential_type.removesuffix("_file"): Path(credential_content).read_text( + encoding="utf-8" + ) + for credential_type, credential_content in credential_mapping.items() + if "_file" in credential_type + } + for credential_type in secret_content: + credential_mapping.pop(f"{credential_type}_file") + credential_mapping.update(secret_content) + + @classmethod + def from_dict(cls, credential_mapping: dict) -> "Credentials": + """matches the given credentials of the credentials mapping with the expected credential object""" + if credential_mapping: + cls._resolve_secret_content(credential_mapping) + try: + return cls._match_credentials(credential_mapping) + except TypeError as error: + raise InvalidConfigurationError( + f"Wrong type in given credentials file on argument: {error.args[0]}" + ) from error + + @classmethod + def _match_credentials(cls, credential_mapping: dict) -> "Credentials": + """matches the given credentials of a given mapping to the expected credential object + + Parameters + ---------- + credential_mapping : dict + mapping of given credentials used for authentication against target + + Returns + ------- + Credentials + expected credentials object representing the correct authentication method + """ + match credential_mapping: + case {"token": token, **extra_params}: + if extra_params: + cls._logger.warning( + "Other parameters were given: %s but OAuth token authorization was chosen", + extra_params.keys(), + ) + return OAuth2TokenCredentials(token=token) + case { + "endpoint": endpoint, + "client_id": client_id, + "client_secret": client_secret, + "username": username, + "password": password, + **extra_params, + }: + if extra_params: + cls._logger.warning( + "Other parameters were given: %s but OAuth password authorization for confidential clients was chosen", + extra_params.keys(), + ) + return OAuth2PasswordFlowCredentials( + endpoint=endpoint, + client_id=client_id, + client_secret=client_secret, + username=username, + password=password, + ) + case { + "endpoint": endpoint, + "client_id": client_id, + "client_secret": client_secret, + **extra_params, + }: + if extra_params: + cls._logger.warning( + "Other parameters were given: %s but OAuth client authorization was chosen", + extra_params.keys(), + ) + return OAuth2ClientFlowCredentials( + endpoint=endpoint, client_id=client_id, client_secret=client_secret + ) + case { + "endpoint": endpoint, + "username": username, + "password": password, + **extra_params, + }: + if extra_params: + cls._logger.warning( + "Other parameters were given: %s but OAuth password authorization was chosen", + extra_params.keys(), + ) + return OAuth2PasswordFlowCredentials( + endpoint=endpoint, username=username, password=password + ) + case {"username": username, "password": password, **extra_params}: + if extra_params: + cls._logger.warning( + "Other parameters were given but Basic authentication was chosen: %s", + extra_params.keys(), + ) + return BasicAuthCredentials(username=username, password=password) + case _: + cls._logger.warning("No matching credentials authentication could be found.") + return None + + +@define(kw_only=True) +class AccessToken: + """A simple dataclass to hold the token and its expiry time.""" + + token: str = field(validator=validators.instance_of(str), repr=False) + """token used for athentication against the target""" + refresh_token: str = field( + validator=validators.instance_of((str, type(None))), default=None, repr=False + ) + """is used incase the token is expired""" + expires_in: int = field( + validator=validators.instance_of(int), + default=0, + converter=lambda x: 0 if x is None else int(x), + ) + """time the token stays valid""" + expiry_time: datetime = field( + validator=validators.instance_of((datetime, type(None))), init=False + ) + """time when token is expired""" + + def __attrs_post_init__(self): + self.expiry_time = datetime.now() + timedelta(seconds=self.expires_in) + + def __str__(self) -> str: + return self.token + + @property + def is_expired(self) -> bool: + """Checks if the token is already expired.""" + if self.expires_in == 0: + return False + return datetime.now() > self.expiry_time + + +@define(kw_only=True) +class Credentials: + """Abstract Base Class for Credentials""" + + _logger = logging.getLogger(__name__) + + _session: Session = field(validator=validators.instance_of((Session, type(None))), default=None) + + def get_session(self): + if self._session is None: + self._session = Session() + max_retries = 3 + retries = Retry(total=max_retries, status_forcelist=[500, 502, 503, 504]) + self._session.mount("https://", HTTPAdapter(max_retries=retries)) + self._session.mount("http://", HTTPAdapter(max_retries=retries)) + return self._session + + def _no_authorization_header(self, session): + """checks if authorization header already exists in the given request session""" + return session.headers.get("Authorization") is None + + def _handle_bad_requests_errors(self, response): + """handles requests with status code 400 and raises Error + + Parameters + ---------- + response : Response + signifies the respone from the post request sent while retrieving the token + + Raises + ------ + CredentialsBadRequestError + raises error with status code 400 + """ + try: + response.raise_for_status() + except HTTPError as error: + if response.status_code == 400: + raise CredentialsBadRequestError( + f"Authentication failed with status code 400 Bad Request: {response.json().get('error')}" + ) from error + raise + + +@define(kw_only=True) +class BasicAuthCredentials(Credentials): + """Basic Authentication Credentials + This is used for authenticating with Basic Authentication""" + + username: str = field(validator=validators.instance_of(str)) + """The username for the basic authentication.""" + password: str = field(validator=validators.instance_of(str), repr=False) + """The password for the basic authentication.""" + + def get_session(self) -> Session: + """the request session used for basic authentication containing the username and password + which are set as the authentication parameters + + :meta private: + + Returns + ------- + session: Session + session with username and password used for the authentication + """ + session = super().get_session() + session.auth = (self.username, self.password) + return session + + +@define(kw_only=True) +class OAuth2TokenCredentials(Credentials): + """OAuth2 Bearer Token Credentials + This is used for authenticating with an API that uses OAuth2 Bearer Tokens. + The Token is not refreshed automatically. If it expires, the requests will + fail with http status code `401`. + """ + + token: AccessToken = field( + validator=validators.instance_of(AccessToken), + converter=lambda token: AccessToken(token=token), + repr=False, + ) + """The OAuth2 Bearer Token. This is used to authenticate.""" + + def get_session(self) -> Session: + """request session with Bearer Token set in the authorization header""" + session = super().get_session() + session.headers["Authorization"] = f"Bearer {self.token}" + return session + + +@define(kw_only=True) +class OAuth2PasswordFlowCredentials(Credentials): + """OAuth2 Resource Owner Password Credentials Grant as described in + https://datatracker.ietf.org/doc/html/rfc6749#section-4.3 + + Token refresh is implemented as described in + https://datatracker.ietf.org/doc/html/rfc6749#section-6 + """ + + endpoint: str = field(validator=validators.instance_of(str)) + """The token endpoint for the OAuth2 server. This is used to request the token.""" + password: str = field(validator=validators.instance_of(str), repr=False) + """the password for the token request""" + username: str = field(validator=validators.instance_of(str)) + """the username for the token request""" + timeout: int = field(validator=validators.instance_of(int), default=1) + """The timeout for the token request. Defaults to 1 second.""" + client_id: str = field(validator=validators.instance_of((str, type(None))), default=None) + """The client id for the token request. This is used to identify the client. (Optional)""" + client_secret: str = field( + validator=validators.instance_of((str, type(None))), default=None, repr=False + ) + """The client secret for the token request. This is used to authenticate the client. (Optional)""" + _token: AccessToken = field( + validator=validators.instance_of((AccessToken, type(None))), + init=False, + repr=False, + ) + + def get_session(self) -> Session: + session = super().get_session() + payload = None + if self._no_authorization_header(session): + payload = { + "grant_type": "password", + "username": self.username, + "password": self.password, + } + session.headers["Authorization"] = f"Bearer {self._get_token(payload)}" + + if self._token.is_expired and self._token.refresh_token is not None: + session = Session() + payload = { + "grant_type": "refresh_token", + "refresh_token": self._token.refresh_token, + } + session.headers["Authorization"] = f"Bearer {self._get_token(payload)}" + self._session = session + return session + + def _get_token(self, payload: dict[str, str]) -> AccessToken: + """sends a post request containing the payload to the token endpoint to retrieve + the token. + If status code 400 is recieved a Bad Request Error is raised. + + Parameters + ---------- + payload : dict[str, str] + contains credentials and the OAuth2 grant type for the given token endpoint to retrieve + the token + Returns + ------- + _token: AccessToken + returns access token to be used, refresh token to be used when + token is expired and the expiry time of the given access token + """ + headers = {} + if self.client_id and self.client_secret: + client_secrets = b64encode( + f"{self.client_id}:{self.client_secret}".encode("utf-8") + ).decode("utf-8") + headers |= {"Authorization": f"Basic {client_secrets}"} + response = requests.post( + url=self.endpoint, + data=payload, + timeout=self.timeout, + headers=headers, + ) + self._handle_bad_requests_errors(response) + token_response = response.json() + access_token = token_response.get("access_token") + refresh_token = token_response.get("refresh_token") + expires_in = token_response.get("expires_in") + self._token = AccessToken( + token=access_token, refresh_token=refresh_token, expires_in=expires_in + ) + return self._token + + +@define(kw_only=True) +class OAuth2ClientFlowCredentials(Credentials): + """OAuth2 Client Credentials Flow Implementation as described in + https://datatracker.ietf.org/doc/html/rfc6749#section-1.3.4 + """ + + endpoint: str = field(validator=validators.instance_of(str)) + """The token endpoint for the OAuth2 server. This is used to request the token.""" + client_id: str = field(validator=validators.instance_of(str)) + """The client id for the token request. This is used to identify the client.""" + client_secret: str = field(validator=validators.instance_of(str), repr=False) + """The client secret for the token request. This is used to authenticate the client.""" + timeout: int = field(validator=validators.instance_of(int), default=1) + """The timeout for the token request. Defaults to 1 second.""" + _token: AccessToken = field( + validator=validators.instance_of((AccessToken, type(None))), init=False, repr=False + ) + + def get_session(self) -> Session: + """Retrieves or creates session with token in authorization header. + If no authorization header is set yet, a post request containing only + the grant type as payload is sent to the token endpoint given in the + credentials file to retrieve the token. + + The client secret and a client id given in the credentials file are used to + authenticate against the token endpoint. + + Returns + ------- + Session + a request session with the retrieved token set in the authorization header + + """ + session = super().get_session() + if "Authorization" in session.headers and self._token.is_expired: + session = Session() + if self._no_authorization_header(session): + session.headers["Authorization"] = f"Bearer {self._get_token()}" + return session + + def _get_token(self) -> AccessToken: + """send post request to token endpoint + to retrieve access token using the client credentials grant. + If received status code is 400 a Bad Request Error is raised. + + Returns + ------- + _token: AccessToken + AccessToken object containing the token, the refresh token and the expiry time + """ + payload = { + "grant_type": "client_credentials", + } + client_secrets = b64encode(f"{self.client_id}:{self.client_secret}".encode("utf-8")).decode( + "utf-8" + ) + headers = {"Authorization": f"Basic {client_secrets}"} + response = requests.post( + url=self.endpoint, + data=payload, + timeout=self.timeout, + headers=headers, + ) + self._handle_bad_requests_errors(response) + token_response = response.json() + access_token = token_response.get("access_token") + expires_in = token_response.get("expires_in") + self._token = AccessToken(token=access_token, expires_in=expires_in) + return self._token diff --git a/logprep/util/decorators.py b/logprep/util/decorators.py index 56ef81262..62bd99c63 100644 --- a/logprep/util/decorators.py +++ b/logprep/util/decorators.py @@ -2,8 +2,8 @@ import errno import os -from functools import wraps import signal +from functools import wraps def timeout(seconds=100, error_message=os.strerror(errno.ETIME)): diff --git a/logprep/util/defaults.py b/logprep/util/defaults.py index 5859f76e5..38f02d911 100644 --- a/logprep/util/defaults.py +++ b/logprep/util/defaults.py @@ -2,3 +2,5 @@ DEFAULT_CONFIG_LOCATION = "file:///etc/logprep/pipeline.yml" DEFAULT_LOG_FORMAT = "%(asctime)-15s %(name)-5s %(levelname)-8s: %(message)s" +ENV_NAME_LOGPREP_CREDENTIALS_FILE = "LOGPREP_CREDENTIALS_FILE" + diff --git a/logprep/util/getter.py b/logprep/util/getter.py index 9b88178db..28fe4ecb9 100644 --- a/logprep/util/getter.py +++ b/logprep/util/getter.py @@ -12,11 +12,16 @@ import requests from attrs import define, field, validators -from requests.auth import HTTPBasicAuth from logprep._version import get_versions from logprep.abc.exceptions import LogprepException from logprep.abc.getter import Getter +from logprep.util.credentials import ( + Credentials, + CredentialsEnvNotFoundError, + CredentialsFactory, +) +from logprep.util.defaults import ENV_NAME_LOGPREP_CREDENTIALS_FILE class GetterNotFoundError(LogprepException): @@ -47,6 +52,7 @@ def from_string(cls, getter_string: str) -> "Getter": """ protocol, target = cls._dissect(getter_string) target = cls._expand_variables(target, os.environ) + # get credentials if protocol is None: protocol = "file" if protocol == "file": @@ -88,29 +94,20 @@ def get_raw(self) -> bytearray: @define(kw_only=True) class HttpGetter(Getter): - """get files from a api or simple web server. + """Get files from an api or simple web server. - Matching string examples: - - * Simple http target: :code:`http://your.target/file.yml` - * Simple https target: :code:`https://your.target/file.json` - - if you want to use basic auth, then you have to set the environment variables + Matching string examples: - * :code:`LOGPREP_CONFIG_AUTH_USERNAME=<your_username>` - * :code:`LOGPREP_CONFIG_AUTH_PASSWORD=<your_password>` + * Simple http target: :code:`http://your.target/file.yml` + * Simple https target: :code:`https://your.target/file.json` - if you want to use oauth, then you have to set the environment variables - - * :code:`LOGPREP_CONFIG_AUTH_TOKEN=<your_token>` - * :code:`LOGPREP_CONFIG_AUTH_METHOD=oauth` + .. automodule:: logprep.util.credentials + :no-index: """ - _sessions: dict = {} + _credentials_registry: dict[str, Credentials] = {} - _username: str = field(validator=validators.optional(validators.instance_of(str)), default=None) - _password: str = field(validator=validators.optional(validators.instance_of(str)), default=None) _headers: dict = field(validator=validators.instance_of(dict), factory=dict) def __attrs_post_init__(self): @@ -122,47 +119,43 @@ def __attrs_post_init__(self): if target_match.group("username") or target_match.group("password"): raise NotImplementedError( "Basic auth credentials via commandline are not supported." - "Please use environment variables " - "LOGPREP_CONFIG_AUTH_USERNAME and LOGPREP_CONFIG_AUTH_PASSWORD instead." + "Please use the credential file in connection with the " + f"environment variable '{ENV_NAME_LOGPREP_CREDENTIALS_FILE}' to authenticate." ) - self._set_credentials() - def _set_credentials(self): - if os.environ.get("LOGPREP_CONFIG_AUTH_METHOD") == "oauth": - if token := os.environ.get("LOGPREP_CONFIG_AUTH_TOKEN"): - self._headers.update({"Authorization": f"Bearer {token}"}) - self._username = None - self._password = None - self._username = os.environ.get("LOGPREP_CONFIG_AUTH_USERNAME") - self._password = os.environ.get("LOGPREP_CONFIG_AUTH_PASSWORD") + @property + def url(self) -> str: + """Returns the url of the target.""" + return f"{self.protocol}://{self.target}" + + @property + def credentials(self) -> Credentials: + """get credentials for target from environment variable""" + creds = None + if ENV_NAME_LOGPREP_CREDENTIALS_FILE in os.environ: + creds = CredentialsFactory.from_target(self.url) + return creds if creds else Credentials() def get_raw(self) -> bytearray: """gets the content from a http server via uri""" - basic_auth = None - username, password = self._username, self._password - if username is not None: - basic_auth = HTTPBasicAuth(username, password) - url = f"{self.protocol}://{self.target}" - domain = urlparse(url).netloc - if domain not in self._sessions: - domain_session = requests.Session() - self._sessions.update({domain: domain_session}) - session = self._sessions.get(domain) - if basic_auth: - session.auth = basic_auth - retries = 3 - resp = None - while resp is None: - try: - resp = session.get( - url=url, - timeout=5, - allow_redirects=True, - headers=self._headers, + domain = urlparse(self.url).netloc + scheme = urlparse(self.url).scheme + domain_uri = f"{scheme}://{domain}" + if domain_uri not in self._credentials_registry: + self._credentials_registry.update({domain_uri: self.credentials}) + session = self._credentials_registry.get(domain_uri).get_session() + resp = session.get(url=self.url, timeout=5, allow_redirects=True, headers=self._headers) + try: + resp.raise_for_status() + except requests.exceptions.HTTPError as error: + if not error.response.status_code == 401: + raise error + if os.environ.get(ENV_NAME_LOGPREP_CREDENTIALS_FILE): + raise error + raise CredentialsEnvNotFoundError( + ( + "Credentials file not found. Please set the environment variable " + f"'{ENV_NAME_LOGPREP_CREDENTIALS_FILE}'" ) - except requests.exceptions.RequestException as error: - retries -= 1 - if retries == 0: - raise error - resp.raise_for_status() + ) from error return resp.content diff --git a/logprep/util/rule_dry_runner.py b/logprep/util/rule_dry_runner.py index 774ce2818..0a1375960 100644 --- a/logprep/util/rule_dry_runner.py +++ b/logprep/util/rule_dry_runner.py @@ -11,7 +11,7 @@ .. code-block:: bash :caption: Directly with Python - PYTHONPATH="." python3 logprep/run_logprep.py test dry-run $CONFIG $EVENTS + logprep test dry-run $CONFIG $EVENTS .. code-block:: bash :caption: With a PEX file diff --git a/pyproject.toml b/pyproject.toml index 82e1bd56f..3268ef7c5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,15 +18,20 @@ license = { file = "LICENSE" } classifiers = [ "Development Status :: 3 - Alpha", - "Intended Audience :: Developers", + "Intended Audience :: Information Technology", - "License :: LGPL-2.1", + "License :: OSI Approved :: GNU Lesser General Public License v2 or later (LGPLv2+)", "Programming Language :: Python", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", + + "Operating System :: POSIX :: Linux", + + "Topic :: Security", + "Topic :: System :: Logging", ] keywords = [ "elasticsearch", diff --git a/quickstart/docker-compose.yml b/quickstart/docker-compose.yml index 40f517b2b..d94a2ea51 100644 --- a/quickstart/docker-compose.yml +++ b/quickstart/docker-compose.yml @@ -161,7 +161,7 @@ services: - ../quickstart/exampledata/config/keycloak:/docker-entrypoint-initdb.d fda-api: container_name: fda-api - image: registry.gitlab.com/z-e-u-s/fda/fda-backend:0.16.0 + image: registry.gitlab.com/z-e-u-s/fda/fda-backend:0.21.0 network_mode: host expose: - 8000 @@ -172,14 +172,15 @@ services: POSTGRES_HOST: localhost POSTGRES_PORT: 25432 DJANGO_SECRET_KEY: "django-insecure-*w($$5i@@iq%!ygufa%%@nfdplt(!e#hoahnjy^@6xdutl8mlqz" - ALLOWED_HOSTS: '["*"]' - CORS_ALLOWED_ORIGINS: '["http://localhost"]' + ALLOWED_HOSTS: "*" + CORS_ALLOWED_ORIGINS: "http://localhost" KEYCLOAK_SERVER_URL: "http://localhost:8080" KEYCLOAK_REALM: logprep KEYCLOAK_CLIENT_ID: fda-backend KEYCLOAK_CLIENT_SECRET: tYfkKygb1g2Hf6fmAInoq3XPK1OILbSp KEYCLOAK_CLIENT_ID_FOR_AUTHZ_ROLES: fda SERVICE_BASE_URL: 'http://localhost:8000/' + SECURE_REDIRECT_EXEMPT: '[".*localhost.*"]' depends_on: fda-db: condition: service_healthy @@ -205,7 +206,7 @@ services: timeout: 5s retries: 10 fda-frontend: - image: registry.gitlab.com/z-e-u-s/fda/fda-frontend:0.16.0 + image: registry.gitlab.com/z-e-u-s/fda/fda-frontend:0.21.0 container_name: fda-frontend network_mode: host restart: always diff --git a/quickstart/exampledata/config/credentials.yml b/quickstart/exampledata/config/credentials.yml new file mode 100644 index 000000000..8e99890d7 --- /dev/null +++ b/quickstart/exampledata/config/credentials.yml @@ -0,0 +1,9 @@ +"http://localhost:8002": + endpoint: http://localhost:8080/realms/logprep/protocol/openid-connect/token + username: logprep + password: logprep + client_id: fda + client_secret: tYfkKygb1g2Hf6fmAInoq3XPK1OILbSp +"http://localhost:8081": + username: user + password: password diff --git a/tests/unit/connector/test_confluent_kafka_input.py b/tests/unit/connector/test_confluent_kafka_input.py index f777c7bbf..b7ba6dda0 100644 --- a/tests/unit/connector/test_confluent_kafka_input.py +++ b/tests/unit/connector/test_confluent_kafka_input.py @@ -8,7 +8,7 @@ from unittest import mock import pytest -from confluent_kafka import OFFSET_BEGINNING, KafkaException +from confluent_kafka import OFFSET_BEGINNING, KafkaError, KafkaException from logprep.abc.input import ( CriticalInputError, @@ -67,7 +67,16 @@ def test_get_next_returns_none_if_no_records(self, _): @mock.patch("logprep.connector.confluent_kafka.input.Consumer") def test_get_next_raises_critical_input_exception_for_invalid_confluent_kafka_record(self, _): mock_record = mock.MagicMock() - mock_record.error = mock.MagicMock(return_value="An arbitrary confluent-kafka error") + mock_record.error = mock.MagicMock( + return_value=KafkaError( + error=3, + reason="Subscribed topic not available: (Test Instance Name) : Broker: Unknown topic or partition", + fatal=False, + retriable=False, + txn_requires_abort=False, + ) + ) + mock_record.value = mock.MagicMock(return_value=None) self.object._consumer.poll = mock.MagicMock(return_value=mock_record) with pytest.raises( @@ -76,7 +85,7 @@ def test_get_next_raises_critical_input_exception_for_invalid_confluent_kafka_re r"CriticalInputError in ConfluentKafkaInput \(Test Instance Name\) - " r"Kafka Input: testserver:9092: " r"A confluent-kafka record contains an error code -> " - r"An arbitrary confluent-kafka error" + r"KafkaError{code=UNKNOWN_TOPIC_OR_PART,val=3,str=\"Subscribed topic not available: \(Test Instance Name\) : Broker: Unknown topic or partition\"}" ), ): _, _ = self.object.get_next(1) @@ -156,7 +165,7 @@ def test_get_next_raises_critical_input_error_if_not_a_dict(self, _): self.object.get_next(1) @mock.patch("logprep.connector.confluent_kafka.input.Consumer") - def test_get_next_raises_critical_input_error_if_unvalid_json(self, _): + def test_get_next_raises_critical_input_error_if_invalid_json(self, _): mock_record = mock.MagicMock() mock_record.error = mock.MagicMock() mock_record.error.return_value = None diff --git a/tests/unit/connector/test_real_kafka.py b/tests/unit/connector/test_real_kafka.py index 3f5237087..36e6a5568 100644 --- a/tests/unit/connector/test_real_kafka.py +++ b/tests/unit/connector/test_real_kafka.py @@ -24,7 +24,7 @@ def setup_module(): if not in_ci: subprocess.run( - ["docker-compose", "-f", "quickstart/docker-compose.yml", "up", "-d", "kafka"] + ["docker", "compose", "-f", "quickstart/docker-compose.yml", "up", "-d", "kafka"] ) diff --git a/tests/unit/processor/generic_adder/test_generic_adder.py b/tests/unit/processor/generic_adder/test_generic_adder.py index 109a0026c..b66f11fdc 100644 --- a/tests/unit/processor/generic_adder/test_generic_adder.py +++ b/tests/unit/processor/generic_adder/test_generic_adder.py @@ -282,6 +282,19 @@ class TestGenericAdder(BaseProcessorTestCase): "dotted": {"added": {"field": "yet_another_value"}}, }, ), + ( + "Extend list field with 'extend_target_list' enabled", + { + "filter": "*", + "generic_adder": { + "add": { + "some_added_field": ["some value"], + }, + }, + }, + {"extend_generic_test": "Test"}, + {"extend_generic_test": "Test", "some_added_field": ["some value"]}, + ), ] failure_test_cases = [ # testcase, rule, event, expected, error_message diff --git a/tests/unit/util/test_credentials.py b/tests/unit/util/test_credentials.py new file mode 100644 index 000000000..bd61733c9 --- /dev/null +++ b/tests/unit/util/test_credentials.py @@ -0,0 +1,998 @@ +# pylint: disable=missing-docstring +# pylint: disable=protected-access +import re +from datetime import datetime, timedelta +from unittest import mock + +import pytest +import requests +import responses +from requests import Session +from responses import matchers + +from logprep.factory_error import InvalidConfigurationError +from logprep.util.credentials import ( + AccessToken, + BasicAuthCredentials, + Credentials, + CredentialsBadRequestError, + CredentialsFactory, + OAuth2ClientFlowCredentials, + OAuth2PasswordFlowCredentials, + OAuth2TokenCredentials, +) + + +class TestBasicAuthCredentials: + + @pytest.mark.parametrize( + "testcase, kwargs, error, error_message", + [ + ( + "invalid because no kwargs", + {}, + TypeError, + r"missing \d required keyword-only argument", + ), + ( + "invalid because username is not a string", + {"username": 123, "password": "password"}, + TypeError, + r"must be <class \'str\'>", + ), + ( + "invalid because password is missing", + {"username": "user"}, + TypeError, + r"missing \d required keyword-only argument", + ), + ( + "valid", + {"username": "user", "password": "password"}, + None, + None, + ), + ( + "invalid, because password not a string", + {"username": "user", "password": 1.2}, + TypeError, + r"must be <class \'str\'>", + ), + ], + ) + def test_init(self, testcase, kwargs, error, error_message): + if error is None: + _ = BasicAuthCredentials(**kwargs) + else: + with pytest.raises(error, match=error_message): + _ = BasicAuthCredentials(**kwargs) + + def test_get_session_returns_session(self): + test = BasicAuthCredentials(username="user", password="password") + assert test.get_session() is not None + + def test_get_session_returns_session_with_auth(self): + test = BasicAuthCredentials(username="user", password="password") + session = test.get_session() + assert session.auth == ("user", "password") + + +class TestOAuth2TokenCredentials: + + @pytest.mark.parametrize( + "testcase, kwargs, error, error_message", + [ + ( + "invalid because no kwargs", + {}, + TypeError, + r"missing \d required keyword-only argument", + ), + ( + "invalid because token is not a string", + {"token": 216742}, + TypeError, + r"must be <class 'str'>", + ), + ( + "valid token", + {"token": "hioinnjdijskjdhfue672534kmsdk"}, + None, + None, + ), + ], + ) + def test_init(self, testcase, kwargs, error, error_message): + if error is None: + test = OAuth2TokenCredentials(**kwargs) + else: + with pytest.raises(error, match=error_message): + test = OAuth2TokenCredentials(**kwargs) + + def test_get_session_returns_session(self): + test = OAuth2TokenCredentials(token="tooooooken") + assert test.get_session() is not None + + def test_get_session_returns_session_with_auth(self): + test = OAuth2TokenCredentials(token="tooooooken") + session = test.get_session() + assert session.headers.get("Authorization") == "Bearer tooooooken" + + +class TestOAuth2PasswordFlowCredentials: + + @pytest.mark.parametrize( + "testcase, kwargs, error, error_message", + [ + ( + "invalid because no kwargs", + {}, + TypeError, + r"missing \d required keyword-only argument", + ), + ( + "invalid because kwarg is not string", + { + "endpoint": 12345, + "password": "hskwmksölkpwksmksksksmk", + "username": "test_user", + }, + TypeError, + r"must be <class 'str'>", + ), + ( + "invalid because one kwarg is missing", + { + "password": "hskwmksölkpwksmksksksmk", + "username": "test_user", + }, + TypeError, + r"missing \d required keyword-only argument", + ), + ( + "valid", + { + "endpoint": "https://some.endpoint/endpoint", + "password": "hskwmksölkpwksmksksksmk", + "username": "test_user", + }, + None, + None, + ), + ( + "valid with optional timeout", + { + "endpoint": "https://some.endpoint/endpoint", + "password": "hskwmksölkpwksmksksksmk", + "username": "test_user", + "timeout": 123, + }, + None, + None, + ), + ( + "invalid with refresh token", + { + "endpoint": "https://some.endpoint/endpoint", + "password": "hskwmksölkpwksmksksksmk", + "username": "test_user", + "refresh_token": "refresh_token", + }, + TypeError, + r"got an unexpected keyword argument 'refresh_token'", + ), + ( + "invalid with expiry time", + { + "endpoint": "https://some.endpoint/endpoint", + "password": "hskwmksölkpwksmksksksmk", + "username": "test_user", + "expiry_time": "2022-12-12 12:12:12", + }, + TypeError, + r"got an unexpected keyword argument 'expiry_time'", + ), + ( + "valid with client credentials", + { + "endpoint": "https://some.endpoint/endpoint", + "password": "hskwmksölkpwksmksksksmk", + "username": "test_user", + "client_id": "client_id", + "client_secret": "client_secret", + }, + None, + None, + ), + ], + ) + def test_init(self, testcase, error, kwargs, error_message): + if error is None: + test = OAuth2PasswordFlowCredentials(**kwargs) + else: + with pytest.raises(error, match=error_message): + test = OAuth2PasswordFlowCredentials(**kwargs) + + @responses.activate + def test_get_session_returns_session(self): + responses.add( + responses.POST, + "https://the.endpoint", + json={"access_token": "toooooken"}, + ) + test = OAuth2PasswordFlowCredentials( + endpoint="https://the.endpoint", + password="password", + username="user", + ) + assert test.get_session() is not None + + @responses.activate + def test_get_session_returns_session_with_token(self): + responses.add( + responses.POST, + "https://the.endpoint", + json={ + "access_token": "toooooken", + "expires_in": 3600, + "refresh_token": "refresh_token123123", + }, + match=[ + matchers.urlencoded_params_matcher( + { + "grant_type": "password", + "username": "user", + "password": "password", + } + ), + matchers.header_matcher({"Content-Type": "application/x-www-form-urlencoded"}), + ], + ) + test = OAuth2PasswordFlowCredentials( + endpoint="https://the.endpoint", + password="password", + username="user", + ) + session = test.get_session() + assert session.headers.get("Authorization") == "Bearer toooooken" + + @responses.activate + def test_get_session_returns_session_with_token_and_uses_client_creds(self): + responses.add( + responses.POST, + "https://the.endpoint", + json={ + "access_token": "toooooken", + "expires_in": 3600, + "refresh_token": "refresh_token123123", + }, + match=[ + matchers.urlencoded_params_matcher( + { + "grant_type": "password", + "username": "user", + "password": "password", + } + ), + matchers.header_matcher( + { + "Content-Type": "application/x-www-form-urlencoded", + "Authorization": "Basic Y2xpZW50X2lkOmNsaWVudF9zZWNyZXQ=", + } + ), + ], + ) + test = OAuth2PasswordFlowCredentials( + endpoint="https://the.endpoint", + password="password", + username="user", + client_id="client_id", + client_secret="client_secret", + ) + session = test.get_session() + assert session.headers.get("Authorization") == "Bearer toooooken" + + @responses.activate + def test_get_session_sets_refresh_token_and_expiry_time(self): + responses.add( + responses.POST, + "https://the.endpoint", + json={ + "access_token": "toooooken", + "expires_in": 3600, + "refresh_token": "refresh_token123123", + }, + match=[ + matchers.urlencoded_params_matcher( + { + "grant_type": "password", + "username": "user", + "password": "password", + } + ), + matchers.header_matcher({"Content-Type": "application/x-www-form-urlencoded"}), + ], + ) + test = OAuth2PasswordFlowCredentials( + endpoint="https://the.endpoint", + password="password", + username="user", + ) + mock_now = datetime.now() + expected_expiry_time = mock_now + timedelta(seconds=3600) + with mock.patch("logprep.util.credentials.datetime") as mock_datetime: + mock_datetime.now.return_value = mock_now + session = test.get_session() + assert session.headers.get("Authorization").startswith("Bearer") + assert test._token.refresh_token == "refresh_token123123" + assert test._token.expiry_time == expected_expiry_time + + @responses.activate + def test_get_session_uses_refresh_token_if_token_is_expired(self): + responses.add( + responses.POST, + "https://the.endpoint", + json={ + "access_token": "new toooken", + "expires_in": 3600, + "refresh_token": "refresh_token123123", + }, + match=[ + matchers.urlencoded_params_matcher( + { + "grant_type": "refresh_token", + "refresh_token": "refresh1234", + } + ), + matchers.header_matcher({"Content-Type": "application/x-www-form-urlencoded"}), + ], + ) + # start prepare mock state after getting first authorization token + test = OAuth2PasswordFlowCredentials( + endpoint="https://the.endpoint", + password="password", + username="user", + ) + test._session = Session() + test._session.headers.update({"Authorization": "Bearer bla"}) + test._token = AccessToken( + token="doesnotmatter", refresh_token="refresh1234", expires_in=3600 + ) + mock_expiry_time = datetime.now() - timedelta(seconds=3600) + test._token.expiry_time = mock_expiry_time # expire the token + # end prepare mock + session = test.get_session() + assert session.headers.get("Authorization") == "Bearer new toooken", "new should be used" + assert test._token.refresh_token == "refresh_token123123", "new refresh token should be set" + # next refresh with new refresh token + responses.add( + responses.POST, + "https://the.endpoint", + json={ + "access_token": "very new token", + "expires_in": 3600, + "refresh_token": "next_refresh_token", + }, + match=[ + matchers.urlencoded_params_matcher( + { + "grant_type": "refresh_token", + "refresh_token": "refresh_token123123", + } + ), + matchers.header_matcher({"Content-Type": "application/x-www-form-urlencoded"}), + ], + ) + test._token.expiry_time = mock_expiry_time # expire the token + new_session = test.get_session() + assert new_session is not session, "new session should be returned for every refresh" + + @responses.activate + def test_get_session_does_not_refresh_token_if_not_expired(self): + test = OAuth2PasswordFlowCredentials( + endpoint="https://the.endpoint", + password="password", + username="user", + ) + test._token = AccessToken(token="bla", refresh_token="refresh1234", expires_in=3600) + test._session = Session() + test._session.headers.update({"Authorization": "Bearer bla"}) + session = test.get_session() # should not lead to an exception + assert session + + @pytest.mark.parametrize( + "error_reason", + [ + "invalid_request", + "invalid_client", + "invalid_grant", + "unauthorized_client", + "unsupported_grant_type", + ], + ) + @responses.activate + def test_get_session_error_handling(self, error_reason): + test = OAuth2PasswordFlowCredentials( + endpoint="https://the.endpoint", + username="user", + password="password", + ) + responses.add( + responses.POST, + "https://the.endpoint", + json={ + "error": error_reason, + }, + status=400, + match=[ + matchers.urlencoded_params_matcher( + { + "grant_type": "password", + "username": "user", + "password": "password", + } + ), + matchers.header_matcher( + { + "Content-Type": "application/x-www-form-urlencoded", + } + ), + ], + ) + error_message = rf"Authentication failed with status code 400 Bad Request: {error_reason}" + with pytest.raises(CredentialsBadRequestError, match=error_message): + _ = test.get_session() + + +class TestOAuth2ClientFlowCredentials: + + @pytest.mark.parametrize( + "testcase, kwargs, error, error_message", + [ + ( + "invalid because no kwargs", + {}, + TypeError, + r"missing \d required keyword-only argument", + ), + ( + "invalid because kwargs missing", + {"endpoint": "https://some.url/endpoint", "client_id": "test_id"}, + TypeError, + r"missing \d required keyword-only argument", + ), + ( + "invalid because invalid kwarg", + { + "endpoint": "https://some.url/endpoint", + "client_id": "some_id", + "client_secret": 1253.67484, + }, + TypeError, + r"must be <class 'str'>", + ), + ( + "valid", + { + "endpoint": "https://some.url/endpoint", + "client_id": "some_id", + "client_secret": "hijijsmmakaksjasd", + }, + None, + None, + ), + ], + ) + def test_init(self, testcase, kwargs, error, error_message): + if error is None: + test = OAuth2ClientFlowCredentials(**kwargs) + else: + with pytest.raises(error, match=error_message): + test = OAuth2ClientFlowCredentials(**kwargs) + + @responses.activate + def test_get_session_returns_session(self): + responses.add( + responses.POST, + "https://the.endpoint", + json={"access_token": "toooooken"}, + ) + test = OAuth2PasswordFlowCredentials( + endpoint="https://the.endpoint", + password="password", + username="user", + ) + assert test.get_session() is not None + + @responses.activate + def test_get_session_returns_session_with_auth(self): + responses.add( + responses.POST, + "https://the.endpoint", + json={ + "access_token": "toooooken", + "expires_in": 3600, + "refresh_token": "refresh_token123123", + }, + match=[ + matchers.urlencoded_params_matcher( + { + "grant_type": "client_credentials", + } + ), + matchers.header_matcher( + { + "Content-Type": "application/x-www-form-urlencoded", + "Authorization": "Basic YWxsbWlnaHR5X2NsaWVudF9pZDp2ZXJ5IHNlY3JldCBwYXNzd29yZA==", + } + ), + ], + ) + test = OAuth2ClientFlowCredentials( + endpoint="https://the.endpoint", + client_secret="very secret password", + client_id="allmighty_client_id", + ) + session = test.get_session() + assert session.headers.get("Authorization") == "Bearer toooooken" + + @responses.activate + def test_get_session_does_not_requests_new_token_if_not_expired(self): + test = OAuth2ClientFlowCredentials( + endpoint="https://the.endpoint", + client_secret="very secret password", + client_id="allmighty_client_id", + ) + test._token = AccessToken(token="bla", expires_in=3600) + test._session = Session() + test._session.headers.update({"Authorization": "Bearer bla"}) + session = test.get_session(), "should not raise" + assert session + + @responses.activate + def test_get_session_refreshes_token(self): + responses.add( + responses.POST, + "https://the.endpoint", + json={ + "access_token": "new toooken", + "expires_in": 3600, + }, + match=[ + matchers.urlencoded_params_matcher( + { + "grant_type": "client_credentials", + } + ), + matchers.header_matcher( + { + "Content-Type": "application/x-www-form-urlencoded", + "Authorization": "Basic YWxsbWlnaHR5X2NsaWVudF9pZDp2ZXJ5IHNlY3JldCBwYXNzd29yZA==", + } + ), + ], + ) + # start prepare mock state after getting first authorization token + test = OAuth2ClientFlowCredentials( + endpoint="https://the.endpoint", + client_secret="very secret password", + client_id="allmighty_client_id", + ) + test._session = Session() + test._session.headers.update({"Authorization": "Bearer bla"}) + test._token = AccessToken(token="doesnotmatter", expires_in=3600) + mock_expiry_time = datetime.now() - timedelta(seconds=3600) + test._token.expiry_time = mock_expiry_time # expire the token + # end prepare mock + session = test.get_session() + assert session.headers.get("Authorization") == "Bearer new toooken", "new should be used" + # next refresh with new refresh token + responses.add( + responses.POST, + "https://the.endpoint", + json={ + "access_token": "very new token", + "expires_in": 3600, + }, + match=[ + matchers.urlencoded_params_matcher( + { + "grant_type": "client_credentials", + } + ), + matchers.header_matcher( + { + "Content-Type": "application/x-www-form-urlencoded", + "Authorization": "Basic YWxsbWlnaHR5X2NsaWVudF9pZDp2ZXJ5IHNlY3JldCBwYXNzd29yZA==", + } + ), + ], + ) + test._token.expiry_time = mock_expiry_time # expire the token + new_session = test.get_session() + assert new_session is not session, "new session should be returned for every refresh" + + @pytest.mark.parametrize( + "error_reason", + [ + "invalid_request", + "invalid_client", + "invalid_grant", + "unauthorized_client", + "unsupported_grant_type", + ], + ) + @responses.activate + def test_get_session_error_handling(self, error_reason): + test = OAuth2ClientFlowCredentials( + endpoint="https://the.endpoint", + client_secret="very secret password", + client_id="allmighty_client_id", + ) + responses.add( + responses.POST, + "https://the.endpoint", + json={ + "error": error_reason, + }, + status=400, + match=[ + matchers.urlencoded_params_matcher( + { + "grant_type": "client_credentials", + } + ), + matchers.header_matcher( + { + "Content-Type": "application/x-www-form-urlencoded", + "Authorization": "Basic YWxsbWlnaHR5X2NsaWVudF9pZDp2ZXJ5IHNlY3JldCBwYXNzd29yZA==", + } + ), + ], + ) + error_message = rf"Authentication failed with status code 400 Bad Request: {error_reason}" + with pytest.raises(CredentialsBadRequestError, match=error_message): + _ = test.get_session() + + @responses.activate + def test_get_session_error_handling_for_status_code_not_400(self): + test = OAuth2ClientFlowCredentials( + endpoint="https://the.endpoint", + client_secret="very secret password", + client_id="allmighty_client_id", + ) + responses.add( + responses.POST, + "https://the.endpoint", + json={ + "error": "this is a custom application error", + }, + status=503, + match=[ + matchers.urlencoded_params_matcher( + { + "grant_type": "client_credentials", + } + ), + matchers.header_matcher( + { + "Content-Type": "application/x-www-form-urlencoded", + "Authorization": "Basic YWxsbWlnaHR5X2NsaWVudF9pZDp2ZXJ5IHNlY3JldCBwYXNzd29yZA==", + } + ), + ], + ) + error_message = r"Service Unavailable for url" + with pytest.raises(requests.HTTPError, match=error_message): + _ = test.get_session() + + +class TestCredentialsFactory: + + @pytest.mark.parametrize( + "testcase, credential_file_content, instance, error", + [ + ( + "Return BasicAuthCredential object", + """--- +"https://some.url": + username: test + password: test +""", + BasicAuthCredentials, + None, + ), + ( + "Return OAuthPasswordFlowCredential object", + """--- +"https://some.url": + endpoint: https://endpoint.end + username: test + password: test +""", + OAuth2PasswordFlowCredentials, + None, + ), + ( + "Return OAuthClientFlowCredential object", + """--- +"https://some.url": + endpoint: https://endpoint.end + client_id: test + client_secret: test +""", + OAuth2ClientFlowCredentials, + None, + ), + ( + "Return OAuthTokenCredential object", + """--- +"https://some.url": + token: "jsoskdmoiewjdoeijkxsmoiqw8jdiowd0" +""", + OAuth2TokenCredentials, + None, + ), + ( + "Return None if credentials are missing", + """--- +"https://some.url": +""", + type(None), + None, + ), + ( + "Return None if wrong URL is given", + """--- +"https://some.other.url": + token: "jsoskdmoiewjdoeijkxsmoiqw8jdiowd0" +""", + type(None), + None, + ), + ( + "Raises InvalidConfigurationError credentials file is invalid yml", + """--- +"https://some.url": + password no colon here + username: test + endpoint: https://endpoint.end +""", + None, + InvalidConfigurationError, + ), + ( + "Return OAuthClientFlowCredential object when credentials file is valid json", + """ +{ + "https://some.url": { + "endpoint": "https://endpoint.end", + "client_id": "test", + "client_secret": "test" + } +} +""", + OAuth2ClientFlowCredentials, + None, + ), + ( + "Raise InvalidConfigurationError when credentials file is invalid json", + """ +{ + "https://some.url": + "endpoint": "https://endpoint.end", + "client_id": "test", + "client_secret": "test" +""", + None, + InvalidConfigurationError, + ), + ( + "Return OAuth2PassowordFlowCredentials object with additional client_id in credentials file", + """--- +"https://some.url": + endpoint: https://endpoint.end + client_id: test + username: test + password: test +""", + OAuth2PasswordFlowCredentials, + None, + ), + ( + "Return OAuthTokenCredential object when username, passowrd, client_id and client_secret are also given", + """--- +"https://some.url": + endpoint: https://endpoint.end + client_id: test + username: test + client_secret: test + password: test + token: "73475289038didjhwxnwnxwoiencn" + +""", + OAuth2TokenCredentials, + None, + ), + ( + "Raise InvalidConfigurationError if credentials have wrong type", + """--- +"https://some.url": + endpoint: https://endpoint.end + username: 123 + password: test + client_secret: 456 + +""", + None, + InvalidConfigurationError, + ), + ( + "Return OAuthClientFlowCredential object when username passowrd are also given", + """--- +"https://some.url": + endpoint: https://endpoint.end + client_id: test + username: test + password: test + client_secret: test +""", + OAuth2PasswordFlowCredentials, + None, + ), + ( + "Return None if no matching credentials class is found", + """--- +"https://some.url": + endpoint: https://endpoint.end + username: test + client_secret: test +""", + type(None), + None, + ), + ( + "Error", + """--- +"https://some.url": + endpoint: https://endpoint.end + username: test + password: +""", + type(None), + InvalidConfigurationError, + ), + ], + ) + def test_credentials_returns_expected_credential_object( + self, testcase, credential_file_content, instance, tmp_path, error + ): + credential_file_path = tmp_path / "credentials" + credential_file_path.write_text(credential_file_content) + mock_env = {"LOGPREP_CREDENTIALS_FILE": str(credential_file_path)} + with mock.patch.dict("os.environ", mock_env): + if error is not None: + with pytest.raises(error): + creds = CredentialsFactory.from_target("https://some.url/configuration") + else: + creds = CredentialsFactory.from_target("https://some.url/configuration") + assert isinstance(creds, instance), testcase + + def test_credentials_returns_none_if_env_not_set(self): + creds = CredentialsFactory.from_target("https://some.url/configuration") + assert creds is None + + def test_credentials_from_root_url(self, tmp_path): + credential_file_path = tmp_path / "credentials.yml" + credential_file_path.write_text( + """--- +"http://some.url": + endpoint: https://endpoint.end + client_id: test + client_secret: test +""" + ) + mock_env = {"LOGPREP_CREDENTIALS_FILE": str(credential_file_path)} + with mock.patch.dict("os.environ", mock_env): + creds = CredentialsFactory.from_target("http://some.url") + assert isinstance(creds, OAuth2ClientFlowCredentials) + + def test_credentials_is_none_on_invalid_credentials_file_path(self): + mock_env = {"LOGPREP_CREDENTIALS_FILE": "this is something useless"} + with mock.patch.dict("os.environ", mock_env): + with pytest.raises(InvalidConfigurationError, match=r"wrong credentials file path"): + creds = CredentialsFactory.from_target("https://some.url") + assert creds is None + + @pytest.mark.parametrize( + "testcase, type_of_secret, endpoint, secret_content, instance", + [ + ( + "Return OAuthPasswordFlowCredential object when password file is given", + "password_file", + "endpoint: https://endpoint.end", + "hiansdnjskwuthisisaverysecretsecret", + OAuth2PasswordFlowCredentials, + ), + ( + "Return OAuthClientFlowCredentials object when client secret file is given", + "client_secret_file", + "endpoint: https://endpoint.end", + "hiansdnjskwuthisisaverysecretsecret", + OAuth2ClientFlowCredentials, + ), + ( + "Return OAuthTokenCredential object when token file is given", + "token_file", + "endpoint: https://endpoint.end", + "hiansdnjskwuthisisaverysecretsecret", + OAuth2TokenCredentials, + ), + ( + "Return BasicAuthCredential object when no endpoint is given and password_file is given", + "password_file", + "", + "hiansdnjskwuthisisaverysecretsecret", + BasicAuthCredentials, + ), + ], + ) + def test_credentials_reads_secret_file_content( + self, tmp_path, testcase, type_of_secret, endpoint, secret_content, instance + ): + credential_file_path = tmp_path / "credentials.yml" + secret_file_path = tmp_path / "secret.txt" + credential_file_path.write_text( + f"""--- +"http://some.url": + {endpoint} + username: testuser + client_id: testid + {type_of_secret}: {secret_file_path} +""" + ) + secret_file_path.write_text(secret_content) + mock_env = {"LOGPREP_CREDENTIALS_FILE": str(credential_file_path)} + with mock.patch.dict("os.environ", mock_env): + creds = CredentialsFactory.from_target("http://some.url/configuration") + assert isinstance(creds, instance), testcase + + def test_credentials_reads_secret_file_content_from_every_given_file(self, tmp_path): + credential_file_path = tmp_path / "credentials.yml" + secret_file_path_0 = tmp_path / "secret-0.txt" + secret_file_path_1 = tmp_path / "secret-1.txt" + + credential_file_path.write_text( + f"""--- +"http://some.url": + endpoint: "https://endpoint.end" + username: testuser + client_id: testid + client_secret_file: {secret_file_path_0} + password_file: {secret_file_path_1} +""" + ) + secret_file_path_0.write_text("thisismysecretsecretclientsecret") + secret_file_path_1.write_text("thisismysecorndsecretsecretpasswordsecret") + + mock_env = {"LOGPREP_CREDENTIALS_FILE": str(credential_file_path)} + with mock.patch.dict("os.environ", mock_env): + creds = CredentialsFactory.from_target("http://some.url/configuration") + assert isinstance(creds, Credentials) + + @mock.patch.object(CredentialsFactory, "_logger") + def test_warning_logged_when_extra_params_given(self, mock_logger): + credentials_file_content_with_extra_params = { + "endpoint": "https://endpoint.end", + "client_id": "test", + "client_secret": "test", + "username": "user1", + "password": "password", + "extra_param": "extra", + } + creds = CredentialsFactory.from_dict(credentials_file_content_with_extra_params) + mock_logger.warning.assert_called_once() + assert re.search( + r"OAuth password authorization for confidential clients", + mock_logger.mock_calls[0][1][0], + ) diff --git a/tests/unit/util/test_getter.py b/tests/unit/util/test_getter.py index d17a1e802..2241fab94 100644 --- a/tests/unit/util/test_getter.py +++ b/tests/unit/util/test_getter.py @@ -3,18 +3,21 @@ # pylint: disable=line-too-long # pylint: disable=unspecified-encoding # pylint: disable=protected-access +import json import os +import uuid +from datetime import datetime, timedelta from pathlib import Path from unittest import mock import pytest +import requests.exceptions import responses -from requests.auth import HTTPBasicAuth -from requests.exceptions import Timeout from responses import matchers from ruamel.yaml import YAML from logprep._version import get_versions +from logprep.util.credentials import Credentials, CredentialsEnvNotFoundError from logprep.util.getter import ( FileGetter, GetterFactory, @@ -48,33 +51,21 @@ def test_from_string_sets_protocol_and_target( assert my_getter.protocol == expected_protocol assert my_getter.target == expected_target + @mock.patch.dict("os.environ", {"PYTEST_TEST_TARGET": "the-web-target"}) def test_getter_expands_from_environment(self): - os.environ["PYTEST_TEST_TARGET"] = "the-web-target" url = "https://${PYTEST_TEST_TARGET}" my_getter = GetterFactory.from_string(url) assert my_getter.target == "the-web-target" - def test_getter_expands_not_set_environment_to_blank(self): - if "PYTEST_TEST_TOKEN" in os.environ: - os.environ.pop("PYTEST_TEST_TOKEN") - if "PYTEST_TEST_TARGET" in os.environ: - os.environ.pop("PYTEST_TEST_TARGET") - url = "https://oauth:${PYTEST_TEST_TOKEN}@randomtarget/${PYTEST_TEST_TARGET}" - my_getter = GetterFactory.from_string(url) - assert my_getter._password is None - assert my_getter.target == "oauth:@randomtarget/" - + @mock.patch.dict("os.environ", {"PYTEST_TEST_TOKEN": "mytoken"}) def test_getter_expands_environment_variables_in_content(self, tmp_path): - os.environ.update({"PYTEST_TEST_TOKEN": "mytoken"}) testfile = tmp_path / "test_getter.json" testfile.write_text("this is my $PYTEST_TEST_TOKEN") my_getter = GetterFactory.from_string(str(testfile)) assert my_getter.get() == "this is my mytoken" + @mock.patch.dict("os.environ", {"PYTEST_TEST_TOKEN": "mytoken"}) def test_getter_expands_setted_environment_variables_and_missing_to_blank(self, tmp_path): - os.environ.update({"PYTEST_TEST_TOKEN": "mytoken"}) - if "LOGPREP_MISSING_TOKEN" in os.environ: - os.environ.pop("LOGPREP_MISSING_TOKEN") testfile = tmp_path / "test_getter.json" testfile.write_text("this is my $PYTEST_TEST_TOKEN, and this is my $LOGPREP_MISSING_TOKEN") my_getter = GetterFactory.from_string(str(testfile)) @@ -82,19 +73,17 @@ def test_getter_expands_setted_environment_variables_and_missing_to_blank(self, assert "LOGPREP_MISSING_TOKEN" in my_getter.missing_env_vars assert len(my_getter.missing_env_vars) == 1 + @mock.patch.dict("os.environ", {"PYTEST_TEST_TOKEN": "mytoken"}) def test_getter_expands_only_uppercase_variable_names(self, tmp_path): - os.environ.update({"PYTEST_TEST_TOKEN": "mytoken"}) testfile = tmp_path / "test_getter.json" testfile.write_text("this is my $PYTEST_TEST_TOKEN, and this is my $pytest_test_token") my_getter = GetterFactory.from_string(str(testfile)) assert my_getter.get() == "this is my mytoken, and this is my $pytest_test_token" + @mock.patch.dict("os.environ", {"PYTEST_TEST_TOKEN": "mytoken"}) def test_getter_expands_setted_environment_variables_and_missing_to_blank_with_braced_variables( self, tmp_path ): - os.environ.update({"PYTEST_TEST_TOKEN": "mytoken"}) - if "LOGPREP_MISSING_TOKEN" in os.environ: - os.environ.pop("LOGPREP_MISSING_TOKEN") testfile = tmp_path / "test_getter.json" testfile.write_text( "this is my ${PYTEST_TEST_TOKEN}, and this is my ${LOGPREP_MISSING_TOKEN}" @@ -102,32 +91,31 @@ def test_getter_expands_setted_environment_variables_and_missing_to_blank_with_b my_getter = GetterFactory.from_string(str(testfile)) assert my_getter.get() == "this is my mytoken, and this is my " + @mock.patch.dict("os.environ", {"PYTEST_TEST_TOKEN": "mytoken"}) def test_getter_expands_only_uppercase_variable_names_with_braced_variables(self, tmp_path): - os.environ.update({"PYTEST_TEST_TOKEN": "mytoken"}) testfile = tmp_path / "test_getter.json" testfile.write_text("this is my ${PYTEST_TEST_TOKEN}, and this is my ${not_a_token}") my_getter = GetterFactory.from_string(str(testfile)) assert my_getter.get() == "this is my mytoken, and this is my ${not_a_token}" + @mock.patch.dict("os.environ", {"PYTEST_TEST_TOKEN": "mytoken"}) def test_getter_ignores_list_comparison_logprep_list_variable(self, tmp_path): - os.environ.update({"PYTEST_TEST_TOKEN": "mytoken"}) testfile = tmp_path / "test_getter.json" testfile.write_text("this is my ${PYTEST_TEST_TOKEN}, and this is my ${LOGPREP_LIST}") my_getter = GetterFactory.from_string(str(testfile)) assert my_getter.get() == "this is my mytoken, and this is my ${LOGPREP_LIST}" assert len(my_getter.missing_env_vars) == 0 + @mock.patch.dict("os.environ", {"PYTEST_TEST_TOKEN": "mytoken", "LOGPREP_LIST": "foo"}) def test_getter_ignores_list_comparison_logprep_list_variable_if_set(self, tmp_path): - os.environ.update({"PYTEST_TEST_TOKEN": "mytoken"}) - os.environ.update({"LOGPREP_LIST": "foo"}) testfile = tmp_path / "test_getter.json" testfile.write_text("this is my ${PYTEST_TEST_TOKEN}, and this is my ${LOGPREP_LIST}") my_getter = GetterFactory.from_string(str(testfile)) assert my_getter.get() == "this is my mytoken, and this is my ${LOGPREP_LIST}" assert len(my_getter.missing_env_vars) == 0 + @mock.patch.dict("os.environ", {"PYTEST_TEST_TOKEN": "mytoken"}) def test_getter_expands_environment_variables_in_yaml_content(self, tmp_path): - os.environ.update({"PYTEST_TEST_TOKEN": "mytoken"}) testfile = tmp_path / "test_getter.json" testfile.write_text( """--- @@ -147,8 +135,8 @@ def test_getter_expands_environment_variables_in_yaml_content(self, tmp_path): } assert my_getter.get_yaml() == expected + @mock.patch.dict("os.environ", {"PYTEST_TEST_TOKEN": "mytoken"}) def test_getter_expands_only_whitelisted_in_yaml_content(self, tmp_path): - os.environ.update({"PYTEST_TEST_TOKEN": "mytoken"}) testfile = tmp_path / "test_getter.json" testfile.write_text( """--- @@ -169,9 +157,8 @@ def test_getter_expands_only_whitelisted_in_yaml_content(self, tmp_path): } assert my_getter.get_yaml() == expected + @mock.patch.dict("os.environ", {"PYTEST_TEST_TOKEN": "mytoken", "LOGPREP_LIST": "foo"}) def test_getter_does_not_reduces_double_dollar_for_unvalid_prefixes(self, tmp_path): - os.environ.update({"PYTEST_TEST_TOKEN": "mytoken"}) - os.environ.update({"LOGPREP_LIST": "foo"}) testfile = tmp_path / "test_getter.json" testfile.write_text( "this is my $PYTEST_TEST_TOKEN, and this is my $$UNVALID_PREFIXED_TOKEN" @@ -294,6 +281,11 @@ def test_get_returns_binary_content(self): {"second_dict": {"key": ["valid_list_element", "valid_list_element"]}}, ], ), + ( + "get_yaml", + b"""""", + {}, + ), ], ) def test_parses_content(self, method_name, input_content, expected_output): @@ -305,6 +297,7 @@ def test_parses_content(self, method_name, input_content, expected_output): class TestHttpGetter: + def test_factory_returns_http_getter_for_http(self): http_getter = GetterFactory.from_string("http://testfile.json") assert isinstance(http_getter, HttpGetter) @@ -356,29 +349,6 @@ def test_sends_logprep_version_in_user_agent(self): http_getter = GetterFactory.from_string("https://the-target/file") http_getter.get() - @responses.activate - def test_provides_oauth_compliant_headers_if_token_is_set_via_env(self): - mock_env = { - "LOGPREP_CONFIG_AUTH_METHOD": "oauth", - "LOGPREP_CONFIG_AUTH_TOKEN": "ajhsdfpoweiurjdfs239487", - } - - logprep_version = get_versions().get("version") - responses.get( - url="https://the.target.url/targetfile", - match=[ - matchers.header_matcher( - { - "User-Agent": f"Logprep version {logprep_version}", - "Authorization": "Bearer ajhsdfpoweiurjdfs239487", - } - ) - ], - ) - with mock.patch.dict("os.environ", mock_env): - http_getter = GetterFactory.from_string("https://the.target.url/targetfile") - http_getter.get() - def test_raises_on_try_to_set_credentials_from_url_string(self): with pytest.raises( NotImplementedError, match="Basic auth credentials via commandline are not supported" @@ -387,27 +357,191 @@ def test_raises_on_try_to_set_credentials_from_url_string(self): "https://oauth:ajhsdfpoweiurjdfs239487@the.target.url/targetfile" ) + @mock.patch("urllib3.connectionpool.HTTPConnectionPool._get_conn") + def test_raises_requestexception_after_3_retries(self, getconn_mock): + getconn_mock.return_value.getresponse.side_effect = [ + mock.MagicMock(status=500), # one initial request and three retries + mock.MagicMock(status=502), + mock.MagicMock(status=500), + mock.MagicMock(status=500), + mock.MagicMock(status=500), # fourth is not considered because of raise + ] + http_getter = GetterFactory.from_string("https://does-not-matter/bar") + with pytest.raises(requests.exceptions.RequestException, match="Max retries exceed"): + http_getter.get() + assert getconn_mock.return_value.request.mock_calls == [ + # one initial request and three retries + mock.call("GET", "/bar", body=None, headers=mock.ANY), + mock.call("GET", "/bar", body=None, headers=mock.ANY), + mock.call("GET", "/bar", body=None, headers=mock.ANY), + mock.call("GET", "/bar", body=None, headers=mock.ANY), + ] + + @mock.patch("urllib3.connectionpool.HTTPConnectionPool._get_conn") + def test_get_does_one_successful_request_after_two_failed(self, getconn_mock): + getconn_mock.return_value.getresponse.side_effect = [ + mock.MagicMock(status=500), + mock.MagicMock(status=502), + mock.MagicMock(status=200), + ] + http_getter = GetterFactory.from_string("https://does-not-matter/bar") + http_getter.get() + assert getconn_mock.return_value.request.mock_calls == [ + mock.call("GET", "/bar", body=None, headers=mock.ANY), + mock.call("GET", "/bar", body=None, headers=mock.ANY), + mock.call("GET", "/bar", body=None, headers=mock.ANY), + ] + + def test_credentials_returns_credential_object_if_no_credentials(self): + http_getter = GetterFactory.from_string("https://does-not-matter/bar") + assert isinstance(http_getter.credentials, Credentials) + + def test_credentials_returns_credentials_if_set(self, tmp_path): + credentials_file_content = { + "https://does-not-matter": { + "username": "myuser", + "password": "mypassword", + } + } + credentials_file: Path = tmp_path / "credentials.json" + credentials_file.write_text(json.dumps(credentials_file_content)) + with mock.patch.dict("os.environ", {"LOGPREP_CREDENTIALS_FILE": str(credentials_file)}): + http_getter = GetterFactory.from_string("https://does-not-matter/bar") + assert isinstance(http_getter.credentials, Credentials) + + @responses.activate + def test_get_raw_gets_token_before_request(self, tmp_path): + domain = str(uuid.uuid4()) + responses.add( + responses.POST, + "https://the.krass.endpoint/token", + json={ + "access_token": "toooooken", + "expires_in": 3600, + "refresh_token": "refresh_token123123", + }, + ) + responses.add( + responses.GET, + f"https://{domain}/bar", + json={"key": "the cooooontent"}, + match=[matchers.header_matcher({"Authorization": "Bearer toooooken"})], + ) + credentials_file_content = { + f"https://{domain}": { + "username": "myuser", + "password": "mypassword", + "endpoint": "https://the.krass.endpoint/token", + } + } + credentials_file: Path = tmp_path / "credentials.json" + credentials_file.write_text(json.dumps(credentials_file_content)) + with mock.patch.dict("os.environ", {"LOGPREP_CREDENTIALS_FILE": str(credentials_file)}): + http_getter = GetterFactory.from_string(f"https://{domain}/bar") + return_content = http_getter.get_json() + assert return_content == {"key": "the cooooontent"} + responses.assert_call_count("https://the.krass.endpoint/token", 1) + responses.assert_call_count(f"https://{domain}/bar", 1) + @responses.activate - def test_provides_basic_authentication_creds_from_environment(self): - mock_env = { - "LOGPREP_CONFIG_AUTH_USERNAME": "myusername", - "LOGPREP_CONFIG_AUTH_PASSWORD": "mypassword", + def test_get_raw_reuses_existing_session(self, tmp_path): + domain = str(uuid.uuid4()) + responses.add( + responses.POST, + "https://the.krass.endpoint/token", + json={ + "access_token": "toooooken", + "expires_in": 3600, + "refresh_token": "refresh_token123123", + }, + ) + responses.add( + responses.GET, + f"https://{domain}/bar", + json={"key": "the cooooontent"}, + match=[matchers.header_matcher({"Authorization": "Bearer toooooken"})], + ) + credentials_file_content = { + f"https://{domain}": { + "username": "myuser", + "password": "mypassword", + "endpoint": "https://the.krass.endpoint/token", + } } + credentials_file: Path = tmp_path / "credentials.json" + credentials_file.write_text(json.dumps(credentials_file_content)) + with mock.patch.dict("os.environ", {"LOGPREP_CREDENTIALS_FILE": str(credentials_file)}): + http_getter = GetterFactory.from_string(f"https://{domain}/bar") + return_content = http_getter.get_json() + return_content = http_getter.get_json() + assert return_content == {"key": "the cooooontent"} + responses.assert_call_count("https://the.krass.endpoint/token", 1) + responses.assert_call_count(f"https://{domain}/bar", 2) + + @responses.activate + def test_get_raw_refreshes_token_if_expired(self, tmp_path): + domain = str(uuid.uuid4()) + responses.add( + responses.POST, + "https://the.krass.endpoint/token", + json={ + "access_token": "toooooken", + "expires_in": 3600, + "refresh_token": "refresh_token123123", + }, + ) responses.add( responses.GET, - "https://the.target.url/targetfile", + f"https://{domain}/bar", + json={"key": "the cooooontent"}, + match=[matchers.header_matcher({"Authorization": "Bearer toooooken"})], ) - with mock.patch.dict("os.environ", mock_env): - http_getter = GetterFactory.from_string("https://the.target.url/targetfile") - http_getter.get() - assert http_getter._sessions["the.target.url"].auth == HTTPBasicAuth( - "myusername", "mypassword" + credentials_file_content = { + f"https://{domain}": { + "username": "myuser", + "password": "mypassword", + "endpoint": "https://the.krass.endpoint/token", + } + } + credentials_file: Path = tmp_path / "credentials.json" + credentials_file.write_text(json.dumps(credentials_file_content)) + with mock.patch.dict("os.environ", {"LOGPREP_CREDENTIALS_FILE": str(credentials_file)}): + http_getter: HttpGetter = GetterFactory.from_string(f"https://{domain}/bar") + return_content = http_getter.get_json() + assert return_content == {"key": "the cooooontent"} + responses.assert_call_count("https://the.krass.endpoint/token", 1) + responses.assert_call_count(f"https://{domain}/bar", 1) + # expire token + http_getter._credentials_registry.get(f"https://{domain}")._token.expiry_time = ( + datetime.now() - timedelta(seconds=3600) + ) + return_content = http_getter.get_json() + + @responses.activate + def test_get_raw_raises_if_credential_file_env_not_set_and_unauthorizes(self): + domain = str(uuid.uuid4()) + responses.add( + responses.GET, + f"https://{domain}/bar", + status=401, ) + with pytest.raises(CredentialsEnvNotFoundError): + http_getter: HttpGetter = GetterFactory.from_string(f"https://{domain}/bar") + http_getter.get_json() @responses.activate - def test_raises_requestexception_after_3_retries(self): - responses.add(responses.GET, "https://does-not-matter", Timeout()) - http_getter = GetterFactory.from_string("https://does-not-matter") - with pytest.raises(Timeout): - http_getter.get() - responses.assert_call_count("https://does-not-matter", 3) + def test_get_raw_raises_if_credential_file_env_set_and_unauthorizes(self): + domain = str(uuid.uuid4()) + responses.add( + responses.GET, + f"https://{domain}/bar", + status=401, + ) + with pytest.raises(requests.exceptions.HTTPError) as error: + http_getter: HttpGetter = GetterFactory.from_string(f"https://{domain}/bar") + with mock.patch.dict( + "os.environ", + {"LOGPREP_CREDENTIALS_FILE": "quickstart/exampledata/config/credentials.yml"}, + ): + http_getter.get_json() + assert error.value.response.status_code == 401