fix multiple outputs test

fkie-cad · Sep 25, 2024 · 5fc1ea2 · 5fc1ea2
1 parent fb3818b
commit 5fc1ea2
Showing 1 changed file with 90 additions and 92 deletions.
diff --git a/tests/acceptance/test_multiple_outputs.py b/tests/acceptance/test_multiple_outputs.py
@@ -1,5 +1,6 @@
 # pylint: disable=missing-docstring
 # pylint: disable=line-too-long
+import tempfile
 import time
 from pathlib import Path
 
@@ -17,67 +18,85 @@ def wait_for_interval(interval):
 
 @pytest.fixture(name="config")
 def get_config():
-    return Configuration(
-        **{
-            "version": "1",
-            "logger": {"level": "DEBUG"},
-            "process_count": 1,
-            "timeout": 0.1,
-            "profile_pipelines": False,
-            "pipeline": [
-                {
-                    "dissector": {
-                        "type": "dissector",
-                        "specific_rules": ["tests/testdata/acceptance/dissector/rules/specific"],
-                        "generic_rules": ["tests/testdata/acceptance/dissector/rules/generic"],
-                    }
-                },
-                {
-                    "selective_extractor": {
-                        "type": "selective_extractor",
-                        "specific_rules": [
-                            "tests/testdata/acceptance/selective_extractor/rules/specific"
-                        ],
-                        "generic_rules": [
-                            "tests/testdata/acceptance/selective_extractor/rules/generic"
-                        ],
-                    }
-                },
-                {
-                    "pseudonymizer": {
-                        "type": "pseudonymizer",
-                        "pubkey_analyst": "tests/testdata/acceptance/pseudonymizer/example_analyst_pub.pem",
-                        "pubkey_depseudo": "tests/testdata/acceptance/pseudonymizer/example_depseudo_pub.pem",
-                        "hash_salt": "a_secret_tasty_ingredient",
-                        "outputs": [{"jsonl": "pseudonyms"}],
-                        "specific_rules": [
-                            "tests/testdata/acceptance/pseudonymizer/rules_static/specific"
-                        ],
-                        "generic_rules": [
-                            "tests/testdata/acceptance/pseudonymizer/rules_static/generic"
-                        ],
-                        "regex_mapping": "tests/testdata/acceptance/pseudonymizer/rules_static/regex_mapping.yml",
-                        "max_cached_pseudonyms": 1000000,
-                    }
-                },
-                {
-                    "pre_detector": {
-                        "type": "pre_detector",
-                        "outputs": [{"jsonl": "pre_detector_topic"}],
-                        "generic_rules": ["tests/testdata/acceptance/pre_detector/rules/generic"],
-                        "specific_rules": ["tests/testdata/acceptance/pre_detector/rules/specific"],
-                        "tree_config": "tests/testdata/acceptance/pre_detector/tree_config.json",
-                    }
-                },
-            ],
-            "input": {
-                "jsonl": {
-                    "type": "jsonl_input",
-                    "documents_path": "tests/testdata/input_logdata/kafka_raw_event_for_pre_detector.jsonl",
+    config = {
+        "version": "1",
+        "logger": {"level": "DEBUG"},
+        "process_count": 1,
+        "timeout": 0.1,
+        "profile_pipelines": False,
+        "restart_count": -1,
+        "pipeline": [
+            {
+                "dissector": {
+                    "type": "dissector",
+                    "specific_rules": ["tests/testdata/acceptance/dissector/rules/specific"],
+                    "generic_rules": ["tests/testdata/acceptance/dissector/rules/generic"],
                 }
             },
-        }
-    )
+            {
+                "selective_extractor": {
+                    "type": "selective_extractor",
+                    "specific_rules": [
+                        "tests/testdata/acceptance/selective_extractor/rules/specific"
+                    ],
+                    "generic_rules": [
+                        "tests/testdata/acceptance/selective_extractor/rules/generic"
+                    ],
+                }
+            },
+            {
+                "pseudonymizer": {
+                    "type": "pseudonymizer",
+                    "pubkey_analyst": "tests/testdata/acceptance/pseudonymizer/example_analyst_pub.pem",
+                    "pubkey_depseudo": "tests/testdata/acceptance/pseudonymizer/example_depseudo_pub.pem",
+                    "hash_salt": "a_secret_tasty_ingredient",
+                    "outputs": [{"second_output": "pseudonyms"}],
+                    "specific_rules": [
+                        "tests/testdata/acceptance/pseudonymizer/rules_static/specific"
+                    ],
+                    "generic_rules": [
+                        "tests/testdata/acceptance/pseudonymizer/rules_static/generic"
+                    ],
+                    "regex_mapping": "tests/testdata/acceptance/pseudonymizer/rules_static/regex_mapping.yml",
+                    "max_cached_pseudonyms": 1000000,
+                }
+            },
+            {
+                "pre_detector": {
+                    "type": "pre_detector",
+                    "outputs": [{"jsonl": "pre_detector_topic"}],
+                    "generic_rules": ["tests/testdata/acceptance/pre_detector/rules/generic"],
+                    "specific_rules": ["tests/testdata/acceptance/pre_detector/rules/specific"],
+                    "tree_config": "tests/testdata/acceptance/pre_detector/tree_config.json",
+                }
+            },
+        ],
+        "input": {
+            "jsonl": {
+                "type": "jsonl_input",
+                "documents_path": "tests/testdata/input_logdata/selective_extractor_events.jsonl",
+            }
+        },
+        "output": {
+            "jsonl": {
+                "type": "jsonl_output",
+                "output_file": tempfile.mkstemp(suffix="output1.jsonl")[1],
+                "output_file_custom": tempfile.mkstemp(suffix="custom1.jsonl")[1],
+            },
+            "second_output": {
+                "type": "jsonl_output",
+                "output_file": tempfile.mkstemp(suffix="output2.jsonl")[1],
+                "output_file_custom": tempfile.mkstemp(suffix="custom2.jsonl")[1],
+            },
+        },
+        "error_output": {
+            "jsonl": {
+                "type": "jsonl_output",
+                "output_file": tempfile.mkstemp(suffix="error.jsonl")[1],
+            }
+        },
+    }
+    return Configuration(**config)
 
 
 def setup_function():
@@ -89,43 +108,22 @@ def teardown_function():
 
 
 def test_full_pipeline_run_with_two_outputs(tmp_path: Path, config: Configuration):
-    output_path1 = tmp_path / "output1.jsonl"
-    output_path_custom1 = tmp_path / "output_custom1.jsonl"
-    output_path_error = tmp_path / "output_error.jsonl"
-    output_path2 = tmp_path / "output2.jsonl"
-    output_path_custom2 = tmp_path / "output_custom2.jsonl"
-    config.input["jsonl"][
-        "documents_path"
-    ] = "tests/testdata/input_logdata/selective_extractor_events.jsonl"
-    config.output = {
-        "jsonl": {
-            "type": "jsonl_output",
-            "output_file": f"{str(output_path1)}",
-            "output_file_custom": f"{str(output_path_custom1)}",
-        },
-        "second_output": {
-            "type": "jsonl_output",
-            "output_file": f"{str(output_path2)}",
-            "output_file_custom": f"{str(output_path_custom2)}",
-        },
-    }
-    config.error_output = {
-        "jsonl": {
-            "type": "jsonl_output",
-            "output_file": f"{str(output_path_error)}",
-        }
-    }
+    output_path1 = Path(config.output["jsonl"]["output_file"])
+    output_path_custom1 = Path(config.output["jsonl"]["output_file_custom"])
+    output_path_error = Path(config.error_output["jsonl"]["output_file"])
+    output_path2 = Path(config.output["second_output"]["output_file"])
+    output_path_custom2 = Path(config.output["second_output"]["output_file_custom"])
     config_path = tmp_path / "generated_config.yml"
     config_path.write_text(config.as_yaml())
     proc = start_logprep(str(config_path))
     wait_for_output(proc, "no documents left")
     stop_logprep(proc)
-    assert output_path1.read_text(), "output is not empty"
-    assert (
-        output_path1.read_text() == output_path2.read_text()
+    assert output_path1.read_text("utf8"), "output is not empty"
+    assert output_path1.read_text("utf8") == output_path2.read_text(
+        "utf8"
     ), "stored output in both default outputs"
-    assert output_path_custom1.read_text(), "stored custom output in output with name 'jsonl'"
-    assert (
-        not output_path_custom2.read_text()
+    assert output_path_custom1.read_text("utf8"), "stored custom output in output with name 'jsonl'"
+    assert not output_path_custom2.read_text(
+        "utf8"
     ), "stored custom output not in output with name 'second_output'"
-    assert not output_path_error.read_text(), "no errors in processing"
+    assert not output_path_error.read_text("utf8"), "no errors in processing"