Skip to content

Commit

Permalink
241 feature load files from http is broken because of file validator (#…
Browse files Browse the repository at this point in the history
…243)

* Refactor http input acceptance test
* add acceptance test for full pipeline run
* add convert_to_http_config
* remove file validation
Co-authored-by: dtrai2 <[email protected]>
  • Loading branch information
ekneg54 authored Dec 8, 2022
1 parent 68bcd21 commit d459e73
Show file tree
Hide file tree
Showing 13 changed files with 245 additions and 62 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ jobs:
pylint --rcfile=.pylintrc --fail-under 9.5 ${{ steps.changed-files.outputs.all_changed_files }}
- name: Run tests and collect coverage
run: pytest tests --cov=logprep --cov-report=xml
run: pytest tests/unit --cov=logprep --cov-report=xml

- name: Upload coverage reports to Codecov with GitHub Action
uses: codecov/codecov-action@v2
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ jobs:
pylint --rcfile=.pylintrc --fail-under 9.5 ${{ steps.changed-files.outputs.all_changed_files }}
- name: Run tests and collect coverage
run: pytest tests --cov=logprep --cov-report=xml
run: pytest tests/unit --cov=logprep --cov-report=xml

- name: Upload coverage reports to Codecov with GitHub Action
uses: codecov/codecov-action@v2
Expand Down
3 changes: 1 addition & 2 deletions logprep/processor/labeler/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@
from logprep.abc import Processor
from logprep.processor.labeler.labeling_schema import LabelingSchema
from logprep.processor.labeler.rule import LabelerRule
from logprep.util.validators import file_validator, json_validator


class Labeler(Processor):
Expand All @@ -48,7 +47,7 @@ class Labeler(Processor):
class Config(Processor.Config):
"""Labeler Configurations"""

schema: str = field(validator=[file_validator, json_validator])
schema: str = field(validator=[validators.instance_of(str)])
"""Path to a labeling schema file. For string format see :ref:`getters`."""
include_parent_labels: Optional[bool] = field(
default=False, validator=validators.optional(validator=validators.instance_of(bool))
Expand Down
8 changes: 5 additions & 3 deletions logprep/processor/normalizer/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@
from logprep.processor.normalizer.rule import NormalizerRule
from logprep.util.getter import GetterFactory
from logprep.util.helper import add_field_to, get_dotted_field_value
from logprep.util.validators import file_validator, directory_validator
from logprep.util.validators import directory_validator


class Normalizer(Processor):
Expand All @@ -57,10 +57,12 @@ class Normalizer(Processor):
class Config(Processor.Config):
"""config description for Normalizer"""

regex_mapping: str = field(validator=file_validator)
regex_mapping: str = field(validator=validators.instance_of(str))
"""Path to regex mapping file with regex keywords that are replaced with regex expressions
by the normalizer. For string format see :ref:`getters`."""
html_replace_fields: Optional[str] = field(default=None, validator=file_validator)
html_replace_fields: Optional[str] = field(
default=None, validator=[validators.optional(validators.instance_of(str))]
)
"""Path to yaml file with html replace fields. For string format see :ref:`getters`"""
count_grok_pattern_matches: Optional[dict] = field(
default=None, validator=validators.optional(validators.instance_of(dict))
Expand Down
5 changes: 3 additions & 2 deletions logprep/processor/pre_detector/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@
from logprep.abc import Processor
from logprep.processor.pre_detector.ip_alerter import IPAlerter
from logprep.processor.pre_detector.rule import PreDetectorRule
from logprep.util.validators import file_validator


class PreDetectorError(BaseException):
Expand All @@ -55,7 +54,9 @@ class Config(Processor.Config):
A Kafka topic for the detection results of the Predetector.
Results in this topic can be linked to the original event via a `pre_detector_id`.
"""
alert_ip_list_path: str = field(default=None, validator=file_validator)
alert_ip_list_path: str = field(
default=None, validator=validators.optional(validators.instance_of(str))
)
"""
Path to a YML file or a list of paths to YML files with dictionaries of IPs.
For string format see :ref:`getters`.
Expand Down
8 changes: 4 additions & 4 deletions logprep/processor/pseudonymizer/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
from logprep.util.cache import Cache
from logprep.util.getter import GetterFactory
from logprep.util.hasher import SHA256Hasher
from logprep.util.validators import file_validator, list_of_urls_validator
from logprep.util.validators import list_of_urls_validator


class Pseudonymizer(Processor):
Expand All @@ -60,20 +60,20 @@ class Config(Processor.Config):
These are not the pseudonymized events, but just the pseudonyms with the encrypted real
values.
"""
pubkey_analyst: str = field(validator=file_validator)
pubkey_analyst: str = field(validator=validators.instance_of(str))
"""
Path to the public key of an analyst. For string format see :ref:`getters`.
* /var/git/analyst_pub.pem"""
pubkey_depseudo: str = field(validator=file_validator)
pubkey_depseudo: str = field(validator=validators.instance_of(str))
"""
Path to the public key for depseudonymization. For string format see :ref:`getters`.
* /var/git/depseudo_pub.pem
"""
hash_salt: str = field(validator=validators.instance_of(str))
"""A salt that is used for hashing."""
regex_mapping: str = field(validator=file_validator)
regex_mapping: str = field(validator=validators.instance_of(str))
"""
Path to a file (for string format see :ref:`getters`) with a regex mapping for pseudonymization, i.e.:
Expand Down
3 changes: 1 addition & 2 deletions logprep/processor/template_replacer/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@
from logprep.processor.template_replacer.rule import TemplateReplacerRule
from logprep.util.getter import GetterFactory
from logprep.util.helper import get_dotted_field_value
from logprep.util.validators import file_validator


class TemplateReplacerError(BaseException):
Expand All @@ -52,7 +51,7 @@ class TemplateReplacer(Processor):
class Config(Processor.Config):
"""TemplateReplacer config"""

template: str = field(validator=file_validator)
template: str = field(validator=validators.instance_of(str))
"""
Path to a YML file (for path format see :ref:`getters`) with a list of replacements in the
format `%{provider_name}-%{event_id}: %{new_message}`.
Expand Down
90 changes: 90 additions & 0 deletions tests/acceptance/test_full_configuration.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
# pylint: disable=missing-docstring
import contextlib
from pathlib import Path
import threading
import socketserver
import http.server
import re
from tests.acceptance.util import (
get_full_pipeline,
get_default_logprep_config,
start_logprep,
stop_logprep,
convert_to_http_config,
)
from logprep.util.json_handling import dump_config_as_file


class TestServer(socketserver.TCPServer):
allow_reuse_address = True

@classmethod
def run_http_server(cls, port=32000):
with TestServer(("", port), http.server.SimpleHTTPRequestHandler) as httpd:
try:
cls.httpd = httpd
cls.httpd.serve_forever()
finally:
cls.httpd.server_close()

@classmethod
@contextlib.contextmanager
def run_in_thread(cls):
"""Context manager to run the server in a separate thread"""
cls.thread = threading.Thread(target=cls.run_http_server)
cls.thread.start()
yield
cls.httpd.shutdown()
cls.thread.join()

@classmethod
def stop(cls):
if hasattr(cls, "httpd"):
cls.httpd.shutdown()
if hasattr(cls, "thread"):
cls.thread.join()


def teardown_function():
Path("generated_config.yml").unlink(missing_ok=True)
TestServer.stop()
stop_logprep()


def test_start_of_logprep_with_full_configuration_from_file(tmp_path):
pipeline = get_full_pipeline()
config = get_default_logprep_config(pipeline, with_hmac=False)
config_path = str(tmp_path / "generated_config.yml")
dump_config_as_file(config_path, config)
proc = start_logprep(config_path)
output = proc.stdout.readline().decode("utf8")
while True:
assert not re.search("Invalid", output)
assert not re.search("Exception", output)
assert not re.search("critical", output)
assert not re.search("Error", output)
assert not re.search("ERROR", output)
if re.search("Startup complete", output):
break
output = proc.stdout.readline().decode("utf8")


def test_start_of_logprep_with_full_configuration_http():
pipeline = get_full_pipeline()
config = get_default_logprep_config(pipeline, with_hmac=False)
endpoint = "http://localhost:32000"
config = convert_to_http_config(config, endpoint)
config_path = "generated_config.yml"
dump_config_as_file(config_path, config)
with TestServer.run_in_thread():
proc = start_logprep(f"{endpoint}/{config_path}")
output = proc.stdout.readline().decode("utf8")
while True:
assert not re.search("Invalid", output)
assert not re.search("Exception", output)
assert not re.search("critical", output)
assert not re.search("Error", output)
assert not re.search("ERROR", output)
if re.search("Startup complete", output):
break
output = proc.stdout.readline().decode("utf8")
84 changes: 39 additions & 45 deletions tests/acceptance/test_http_input.py
Original file line number Diff line number Diff line change
@@ -1,43 +1,23 @@
# pylint: disable=missing-docstring
# pylint: disable=line-too-long
import os
import re
import signal
import subprocess
import sys
import time
from logging import DEBUG, basicConfig, getLogger

import pytest
import requests

from logprep.util.json_handling import dump_config_as_file
from tests.acceptance.util import get_default_logprep_config
from tests.acceptance.util import (
get_default_logprep_config,
start_logprep,
wait_for_output,
stop_logprep,
)

basicConfig(level=DEBUG, format="%(asctime)-15s %(name)-5s %(levelname)-8s: %(message)s")
logger = getLogger("Logprep-Test")


def start_logprep(config_path: str) -> subprocess.Popen:
environment = {"PYTHONPATH": "."}
return subprocess.Popen( # nosemgrep
f"{sys.executable} logprep/run_logprep.py {config_path}",
shell=True,
env=environment,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
close_fds=True,
)


def wait_for_output(proc, expected_output):
output = proc.stdout.readline()
while expected_output not in output.decode("utf8"):
output = proc.stdout.readline()
time.sleep(0.1) # nosemgrep


@pytest.fixture(name="config")
def config_fixture():
pipeline = [
Expand Down Expand Up @@ -74,14 +54,7 @@ def config_fixture():


def teardown_function():
# cleanup processes
output = subprocess.check_output("ps -x | grep run_logprep", shell=True) # nosemgrep
for line in output.decode("utf8").splitlines():
process_id = re.match(r"^\s+(\d+)\s.+", line).group(1)
try:
os.kill(int(process_id), signal.SIGKILL)
except ProcessLookupError:
pass
stop_logprep()


def test_http_input_accepts_message_for_single_pipeline(tmp_path, config):
Expand All @@ -91,7 +64,8 @@ def test_http_input_accepts_message_for_single_pipeline(tmp_path, config):
dump_config_as_file(config_path, config)
proc = start_logprep(config_path)
wait_for_output(proc, "Uvicorn running on https://127.0.0.1:9000")
requests.post("https://127.0.0.1:9000/plaintext", data="my message", verify=False) # nosemgrep
# nosemgrep
requests.post("https://127.0.0.1:9000/plaintext", data="my message", verify=False, timeout=5)
time.sleep(0.5) # nosemgrep
assert "my message" in output_path.read_text()

Expand All @@ -104,11 +78,19 @@ def test_http_input_accepts_message_for_two_pipelines(tmp_path, config):
dump_config_as_file(config_path, config)
proc = start_logprep(config_path)
wait_for_output(proc, "Uvicorn running on https://127.0.0.1:9001")
requests.post( # nosemgrep
"https://127.0.0.1:9000/plaintext", data="my first message", verify=False
# nosemgrep
requests.post(
"https://127.0.0.1:9000/plaintext",
data="my first message",
verify=False,
timeout=5,
)
requests.post( # nosemgrep
"https://127.0.0.1:9001/plaintext", data="my second message", verify=False
# nosemgrep
requests.post(
"https://127.0.0.1:9001/plaintext",
data="my second message",
verify=False,
timeout=5,
)
time.sleep(0.5) # nosemgrep
output_content = output_path.read_text()
Expand All @@ -124,14 +106,26 @@ def test_http_input_accepts_message_for_three_pipelines(tmp_path, config):
dump_config_as_file(config_path, config)
proc = start_logprep(config_path)
wait_for_output(proc, "Uvicorn running on https://127.0.0.1:9002")
requests.post( # nosemgrep
"https://127.0.0.1:9000/plaintext", data="my first message", verify=False
# nosemgrep
requests.post(
"https://127.0.0.1:9000/plaintext",
data="my first message",
verify=False,
timeout=5,
)
requests.post( # nosemgrep
"https://127.0.0.1:9001/plaintext", data="my second message", verify=False
# nosemgrep
requests.post(
"https://127.0.0.1:9001/plaintext",
data="my second message",
verify=False,
timeout=5,
)
requests.post( # nosemgrep
"https://127.0.0.1:9002/plaintext", data="my third message", verify=False
# nosemgrep
requests.post(
"https://127.0.0.1:9002/plaintext",
data="my third message",
verify=False,
timeout=5,
)
time.sleep(0.5) # nosemgrep
output_content = output_path.read_text()
Expand Down
Loading

0 comments on commit d459e73

Please sign in to comment.