Skip to content

Commit

Permalink
fix multiple outputs test
Browse files Browse the repository at this point in the history
  • Loading branch information
ekneg54 committed Sep 25, 2024
1 parent fb3818b commit 5fc1ea2
Showing 1 changed file with 90 additions and 92 deletions.
182 changes: 90 additions & 92 deletions tests/acceptance/test_multiple_outputs.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# pylint: disable=missing-docstring
# pylint: disable=line-too-long
import tempfile
import time
from pathlib import Path

Expand All @@ -17,67 +18,85 @@ def wait_for_interval(interval):

@pytest.fixture(name="config")
def get_config():
return Configuration(
**{
"version": "1",
"logger": {"level": "DEBUG"},
"process_count": 1,
"timeout": 0.1,
"profile_pipelines": False,
"pipeline": [
{
"dissector": {
"type": "dissector",
"specific_rules": ["tests/testdata/acceptance/dissector/rules/specific"],
"generic_rules": ["tests/testdata/acceptance/dissector/rules/generic"],
}
},
{
"selective_extractor": {
"type": "selective_extractor",
"specific_rules": [
"tests/testdata/acceptance/selective_extractor/rules/specific"
],
"generic_rules": [
"tests/testdata/acceptance/selective_extractor/rules/generic"
],
}
},
{
"pseudonymizer": {
"type": "pseudonymizer",
"pubkey_analyst": "tests/testdata/acceptance/pseudonymizer/example_analyst_pub.pem",
"pubkey_depseudo": "tests/testdata/acceptance/pseudonymizer/example_depseudo_pub.pem",
"hash_salt": "a_secret_tasty_ingredient",
"outputs": [{"jsonl": "pseudonyms"}],
"specific_rules": [
"tests/testdata/acceptance/pseudonymizer/rules_static/specific"
],
"generic_rules": [
"tests/testdata/acceptance/pseudonymizer/rules_static/generic"
],
"regex_mapping": "tests/testdata/acceptance/pseudonymizer/rules_static/regex_mapping.yml",
"max_cached_pseudonyms": 1000000,
}
},
{
"pre_detector": {
"type": "pre_detector",
"outputs": [{"jsonl": "pre_detector_topic"}],
"generic_rules": ["tests/testdata/acceptance/pre_detector/rules/generic"],
"specific_rules": ["tests/testdata/acceptance/pre_detector/rules/specific"],
"tree_config": "tests/testdata/acceptance/pre_detector/tree_config.json",
}
},
],
"input": {
"jsonl": {
"type": "jsonl_input",
"documents_path": "tests/testdata/input_logdata/kafka_raw_event_for_pre_detector.jsonl",
config = {
"version": "1",
"logger": {"level": "DEBUG"},
"process_count": 1,
"timeout": 0.1,
"profile_pipelines": False,
"restart_count": -1,
"pipeline": [
{
"dissector": {
"type": "dissector",
"specific_rules": ["tests/testdata/acceptance/dissector/rules/specific"],
"generic_rules": ["tests/testdata/acceptance/dissector/rules/generic"],
}
},
}
)
{
"selective_extractor": {
"type": "selective_extractor",
"specific_rules": [
"tests/testdata/acceptance/selective_extractor/rules/specific"
],
"generic_rules": [
"tests/testdata/acceptance/selective_extractor/rules/generic"
],
}
},
{
"pseudonymizer": {
"type": "pseudonymizer",
"pubkey_analyst": "tests/testdata/acceptance/pseudonymizer/example_analyst_pub.pem",
"pubkey_depseudo": "tests/testdata/acceptance/pseudonymizer/example_depseudo_pub.pem",
"hash_salt": "a_secret_tasty_ingredient",
"outputs": [{"second_output": "pseudonyms"}],
"specific_rules": [
"tests/testdata/acceptance/pseudonymizer/rules_static/specific"
],
"generic_rules": [
"tests/testdata/acceptance/pseudonymizer/rules_static/generic"
],
"regex_mapping": "tests/testdata/acceptance/pseudonymizer/rules_static/regex_mapping.yml",
"max_cached_pseudonyms": 1000000,
}
},
{
"pre_detector": {
"type": "pre_detector",
"outputs": [{"jsonl": "pre_detector_topic"}],
"generic_rules": ["tests/testdata/acceptance/pre_detector/rules/generic"],
"specific_rules": ["tests/testdata/acceptance/pre_detector/rules/specific"],
"tree_config": "tests/testdata/acceptance/pre_detector/tree_config.json",
}
},
],
"input": {
"jsonl": {
"type": "jsonl_input",
"documents_path": "tests/testdata/input_logdata/selective_extractor_events.jsonl",
}
},
"output": {
"jsonl": {
"type": "jsonl_output",
"output_file": tempfile.mkstemp(suffix="output1.jsonl")[1],
"output_file_custom": tempfile.mkstemp(suffix="custom1.jsonl")[1],
},
"second_output": {
"type": "jsonl_output",
"output_file": tempfile.mkstemp(suffix="output2.jsonl")[1],
"output_file_custom": tempfile.mkstemp(suffix="custom2.jsonl")[1],
},
},
"error_output": {
"jsonl": {
"type": "jsonl_output",
"output_file": tempfile.mkstemp(suffix="error.jsonl")[1],
}
},
}
return Configuration(**config)


def setup_function():
Expand All @@ -89,43 +108,22 @@ def teardown_function():


def test_full_pipeline_run_with_two_outputs(tmp_path: Path, config: Configuration):
output_path1 = tmp_path / "output1.jsonl"
output_path_custom1 = tmp_path / "output_custom1.jsonl"
output_path_error = tmp_path / "output_error.jsonl"
output_path2 = tmp_path / "output2.jsonl"
output_path_custom2 = tmp_path / "output_custom2.jsonl"
config.input["jsonl"][
"documents_path"
] = "tests/testdata/input_logdata/selective_extractor_events.jsonl"
config.output = {
"jsonl": {
"type": "jsonl_output",
"output_file": f"{str(output_path1)}",
"output_file_custom": f"{str(output_path_custom1)}",
},
"second_output": {
"type": "jsonl_output",
"output_file": f"{str(output_path2)}",
"output_file_custom": f"{str(output_path_custom2)}",
},
}
config.error_output = {
"jsonl": {
"type": "jsonl_output",
"output_file": f"{str(output_path_error)}",
}
}
output_path1 = Path(config.output["jsonl"]["output_file"])
output_path_custom1 = Path(config.output["jsonl"]["output_file_custom"])
output_path_error = Path(config.error_output["jsonl"]["output_file"])
output_path2 = Path(config.output["second_output"]["output_file"])
output_path_custom2 = Path(config.output["second_output"]["output_file_custom"])
config_path = tmp_path / "generated_config.yml"
config_path.write_text(config.as_yaml())
proc = start_logprep(str(config_path))
wait_for_output(proc, "no documents left")
stop_logprep(proc)
assert output_path1.read_text(), "output is not empty"
assert (
output_path1.read_text() == output_path2.read_text()
assert output_path1.read_text("utf8"), "output is not empty"
assert output_path1.read_text("utf8") == output_path2.read_text(
"utf8"
), "stored output in both default outputs"
assert output_path_custom1.read_text(), "stored custom output in output with name 'jsonl'"
assert (
not output_path_custom2.read_text()
assert output_path_custom1.read_text("utf8"), "stored custom output in output with name 'jsonl'"
assert not output_path_custom2.read_text(
"utf8"
), "stored custom output not in output with name 'second_output'"
assert not output_path_error.read_text(), "no errors in processing"
assert not output_path_error.read_text("utf8"), "no errors in processing"

0 comments on commit 5fc1ea2

Please sign in to comment.