From cfc4d8ab066002cd3d5e56b9b3980edb2ba87f81 Mon Sep 17 00:00:00 2001 From: "MoessnerFabian(Group)" Date: Tue, 17 Dec 2024 16:15:25 +0100 Subject: [PATCH 01/23] First positive tests of Lucene compliant filter --- logprep/filter/lucene_filter.py | 36 ++++++++++++++++++++++--- tests/unit/filter/test_lucene_filter.py | 24 +++++++++++++++++ 2 files changed, 57 insertions(+), 3 deletions(-) diff --git a/logprep/filter/lucene_filter.py b/logprep/filter/lucene_filter.py index 703eec5fb..8b851e535 100644 --- a/logprep/filter/lucene_filter.py +++ b/logprep/filter/lucene_filter.py @@ -314,6 +314,8 @@ def _collect_children(self, tree: luqum.tree) -> List[FilterExpression]: return expressions def _create_field(self, tree: luqum.tree) -> Optional[FilterExpression]: + # ok also hier bin ich denke ich richtig. ich muss jetzt mal ueberlegen, was hier was macht. und dann kann ich + # entscheiden wie ich es umsetze if isinstance(tree.expr, (Phrase, Word)): key = tree.name.replace("\\", "") key = key.split(".") @@ -323,6 +325,15 @@ def _create_field(self, tree: luqum.tree) -> Optional[FilterExpression]: value = self._strip_quote_from_string(tree.expr.value) value = self._remove_lucene_escaping(value) return self._get_filter_expression(key, value) + elif isinstance(tree.expr, Regex): + key = tree.name.replace("\\", "") + key = key.split(".") + if tree.expr.value == "null": + return Null(key) + + value = self._strip_quote_from_string(tree.expr.value) + value = self._remove_lucene_escaping(value) + return self._get_filter_expression_regex(key, value) return None def _get_filter_expression( @@ -338,6 +349,7 @@ def _get_filter_expression( if self._special_fields.items(): for sf_key, sf_value in self._special_fields.items(): if sf_value is True or dotted_field in sf_value: + # Todo: this has to be removed at the end of the ticket. if sf_key == "regex_fields": logger.warning( "[Deprecated]: regex_fields are no longer necessary. " @@ -346,12 +358,30 @@ def _get_filter_expression( return self._special_fields_map[sf_key](key, value) - if value.startswith("/") and value.endswith("/"): - value = value.strip("/") - return RegExFilterExpression(key, value) + #Todo: this has to be removed at the end of the ticket. + + # if hasattr(self._tree, 'expr') and isinstance(self._tree.expr, Regex): + # value = value.strip("/") + # return RegExFilterExpression(key, value) + # if value.startswith("/") and value.endswith("/"): + # value = value.strip("/") + # return RegExFilterExpression(key, value) return StringFilterExpression(key, value) + def _get_filter_expression_regex( + self, key: List[str], value + ) -> Union[RegExFilterExpression, StringFilterExpression]: + key_and_modifier = key[-1].split("|") + if len(key_and_modifier) == 2: + if key_and_modifier[-1] == "re": + return RegExFilterExpression(key[:-1] + key_and_modifier[:-1], value) + + + value = value.strip("/") + return RegExFilterExpression(key, value) + + @staticmethod def _create_value_expression(word: luqum.tree) -> Union[Exists, Always]: value = word.value.replace("\\", "") diff --git a/tests/unit/filter/test_lucene_filter.py b/tests/unit/filter/test_lucene_filter.py index 2873a7a8c..cd03b8792 100644 --- a/tests/unit/filter/test_lucene_filter.py +++ b/tests/unit/filter/test_lucene_filter.py @@ -482,3 +482,27 @@ def test_creates_lucene_compliance_filter_one_matching_one_missmatch_regex_key_o RegExFilterExpression(["regex_key_one"], ".*value.*"), StringFilterExpression(["key_two"], "value"), ) + + def test_new_lucene_compliance(self): + lucene_filter = LuceneFilter.create( + 'regex_key_one:".*value.*"' + ) + + assert lucene_filter == RegExFilterExpression(["regex_key_one"], ".*value.*") + + def test_new_lucene_compliance2(self): + lucene_filter = LuceneFilter.create( + 'regex_key_one:/.*value.*/' + ) + + assert lucene_filter == RegExFilterExpression(["regex_key_one"], ".*value.*") + + def test_new_lucene_compliance3(self): + lucene_filter = LuceneFilter.create( + 'regex_key_one:/.*value.*/ AND key_two: "/.*value.*/"', + ) + + assert lucene_filter == And(RegExFilterExpression(["regex_key_one"], ".*value.*"), + StringFilterExpression(["key_two"], "/.*value.*/")) + + From f2a099e15d250d51bb1591bcfc91d2cb5974326a Mon Sep 17 00:00:00 2001 From: "MoessnerFabian(Group)" Date: Tue, 17 Dec 2024 16:28:07 +0100 Subject: [PATCH 02/23] Remove comments --- logprep/filter/lucene_filter.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/logprep/filter/lucene_filter.py b/logprep/filter/lucene_filter.py index 8b851e535..48cee13ef 100644 --- a/logprep/filter/lucene_filter.py +++ b/logprep/filter/lucene_filter.py @@ -314,8 +314,6 @@ def _collect_children(self, tree: luqum.tree) -> List[FilterExpression]: return expressions def _create_field(self, tree: luqum.tree) -> Optional[FilterExpression]: - # ok also hier bin ich denke ich richtig. ich muss jetzt mal ueberlegen, was hier was macht. und dann kann ich - # entscheiden wie ich es umsetze if isinstance(tree.expr, (Phrase, Word)): key = tree.name.replace("\\", "") key = key.split(".") @@ -349,7 +347,6 @@ def _get_filter_expression( if self._special_fields.items(): for sf_key, sf_value in self._special_fields.items(): if sf_value is True or dotted_field in sf_value: - # Todo: this has to be removed at the end of the ticket. if sf_key == "regex_fields": logger.warning( "[Deprecated]: regex_fields are no longer necessary. " @@ -358,15 +355,6 @@ def _get_filter_expression( return self._special_fields_map[sf_key](key, value) - #Todo: this has to be removed at the end of the ticket. - - # if hasattr(self._tree, 'expr') and isinstance(self._tree.expr, Regex): - # value = value.strip("/") - # return RegExFilterExpression(key, value) - # if value.startswith("/") and value.endswith("/"): - # value = value.strip("/") - # return RegExFilterExpression(key, value) - return StringFilterExpression(key, value) def _get_filter_expression_regex( From f7427b67b158c03ff9e52c2dbc0c3cb49931b550 Mon Sep 17 00:00:00 2001 From: "MoessnerFabian(Group)" Date: Wed, 18 Dec 2024 10:25:48 +0100 Subject: [PATCH 03/23] Refactoring --- logprep/filter/lucene_filter.py | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/logprep/filter/lucene_filter.py b/logprep/filter/lucene_filter.py index 48cee13ef..15fc70bb2 100644 --- a/logprep/filter/lucene_filter.py +++ b/logprep/filter/lucene_filter.py @@ -334,13 +334,22 @@ def _create_field(self, tree: luqum.tree) -> Optional[FilterExpression]: return self._get_filter_expression_regex(key, value) return None - def _get_filter_expression( - self, key: List[str], value - ) -> Union[RegExFilterExpression, StringFilterExpression]: + @staticmethod + def _check_key_and_modifier(key, value): key_and_modifier = key[-1].split("|") if len(key_and_modifier) == 2: if key_and_modifier[-1] == "re": return RegExFilterExpression(key[:-1] + key_and_modifier[:-1], value) + return None + + + def _get_filter_expression( + self, key: List[str], value + ) -> Union[RegExFilterExpression, StringFilterExpression]: + + key_and_modifier_check = LuceneTransformer._check_key_and_modifier(key, value) + if key_and_modifier_check is not None: + return key_and_modifier_check dotted_field = ".".join(key) @@ -360,11 +369,10 @@ def _get_filter_expression( def _get_filter_expression_regex( self, key: List[str], value ) -> Union[RegExFilterExpression, StringFilterExpression]: - key_and_modifier = key[-1].split("|") - if len(key_and_modifier) == 2: - if key_and_modifier[-1] == "re": - return RegExFilterExpression(key[:-1] + key_and_modifier[:-1], value) + key_and_modifier_check = LuceneTransformer._check_key_and_modifier(key, value) + if key_and_modifier_check is not None: + return key_and_modifier_check value = value.strip("/") return RegExFilterExpression(key, value) From 5511ef8789a33fd71eebd811306a8391b89b03f8 Mon Sep 17 00:00:00 2001 From: "MoessnerFabian(Group)" Date: Wed, 18 Dec 2024 10:34:18 +0100 Subject: [PATCH 04/23] Straighten up tests --- tests/unit/filter/test_lucene_filter.py | 25 ++++--------------------- 1 file changed, 4 insertions(+), 21 deletions(-) diff --git a/tests/unit/filter/test_lucene_filter.py b/tests/unit/filter/test_lucene_filter.py index cd03b8792..67764b0cb 100644 --- a/tests/unit/filter/test_lucene_filter.py +++ b/tests/unit/filter/test_lucene_filter.py @@ -458,7 +458,7 @@ def test_create_filter_error(self, testcase, input_str, message): def test_creates_lucene_compliance_filter_two_matching_regex_keys_of_two(self): lucene_filter = LuceneFilter.create( - 'regex_key_one: "/.*value.*/" AND regex_key_two: "/.*value.*/"', + 'regex_key_one: /.*value.*/ AND regex_key_two: /.*value.*/', ) assert lucene_filter == And( @@ -466,38 +466,21 @@ def test_creates_lucene_compliance_filter_two_matching_regex_keys_of_two(self): RegExFilterExpression(["regex_key_two"], ".*value.*"), ) - def test_creates_lucene_compliance_filter_one_regex_key(self): + def test_creates_StringFilter_not_Regex(self): lucene_filter = LuceneFilter.create( 'regex_key_one: "/.*value.*/"', ) - assert lucene_filter == RegExFilterExpression(["regex_key_one"], ".*value.*") - - def test_creates_lucene_compliance_filter_one_matching_one_missmatch_regex_key_of_two(self): - lucene_filter = LuceneFilter.create( - 'regex_key_one: "/.*value.*/" AND key_two: "value"', - ) - - assert lucene_filter == And( - RegExFilterExpression(["regex_key_one"], ".*value.*"), - StringFilterExpression(["key_two"], "value"), - ) + assert lucene_filter == StringFilterExpression(["regex_key_one"], "/.*value.*/") def test_new_lucene_compliance(self): - lucene_filter = LuceneFilter.create( - 'regex_key_one:".*value.*"' - ) - - assert lucene_filter == RegExFilterExpression(["regex_key_one"], ".*value.*") - - def test_new_lucene_compliance2(self): lucene_filter = LuceneFilter.create( 'regex_key_one:/.*value.*/' ) assert lucene_filter == RegExFilterExpression(["regex_key_one"], ".*value.*") - def test_new_lucene_compliance3(self): + def test_creates_lucene_compliance_filter_one_matching_one_missmatch_regex_key_of_two2(self): lucene_filter = LuceneFilter.create( 'regex_key_one:/.*value.*/ AND key_two: "/.*value.*/"', ) From 5d4ebd87fe1cb7d08868de3d75ea58c1ba6f7fdb Mon Sep 17 00:00:00 2001 From: "MoessnerFabian(Group)" Date: Wed, 18 Dec 2024 11:08:41 +0100 Subject: [PATCH 05/23] Update regex notebook --- .../notebooks/processor_examples/regex.ipynb | 119 ++++++++++++++++-- 1 file changed, 106 insertions(+), 13 deletions(-) diff --git a/doc/source/development/notebooks/processor_examples/regex.ipynb b/doc/source/development/notebooks/processor_examples/regex.ipynb index 2c05d28bb..6aa0c6756 100644 --- a/doc/source/development/notebooks/processor_examples/regex.ipynb +++ b/doc/source/development/notebooks/processor_examples/regex.ipynb @@ -21,7 +21,7 @@ " 'data_stream': {\n", " 'dataset': 'windows', \n", " 'namespace': 'devopslab', \n", - " 'type': 'logs'\n", + " 'type': '/logs/'\n", " }, \n", " '_op_type': 'create'\n", " }\n", @@ -95,7 +95,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 20, "metadata": {}, "outputs": [ { @@ -109,12 +109,9 @@ "name": "stdout", "output_type": "stream", "text": [ - "\n", - "\n", - "[Deprecation warning]: regex_fields are no longer necessary. Use lucene regex annotation.\n", - "before: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': 'logs'}, '_op_type': 'create'}\n", - "after: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': 'logs'}, '_op_type': 'create', '_index': 'logs-windows-devopslab'}\n", - "True\n" + "before: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create'}\n", + "after: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create'}\n", + "False\n" ] } ], @@ -137,6 +134,47 @@ "concat_with_rule(rule_yaml)\n" ] }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[Deprecated]: regex_fields are no longer necessary. Use Lucene regex annotation.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "before: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create'}\n", + "after: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create'}\n", + "False\n" + ] + } + ], + "source": [ + "rule_yaml = \"\"\"---\n", + "filter: 'data_stream.type: \"/d.*lo.*/\"' \n", + "regex_fields:\n", + " - \"data_stream.type\"\n", + "concatenator:\n", + " source_fields:\n", + " - data_stream.type\n", + " - data_stream.dataset\n", + " - data_stream.namespace\n", + " target_field: _index\n", + " separator: \"-\"\n", + " overwrite_target: false\n", + " delete_source_fields: false\n", + "\"\"\"\n", + "\n", + "concat_with_rule(rule_yaml)\n" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -146,22 +184,62 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "before: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create'}\n", + "after: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create', '_index': '/logs/-windows-devopslab'}\n", + "False\n" + ] + } + ], + "source": [ + "rule_yaml = \"\"\"---\n", + "filter: 'data_stream.type: /.*lo.*/' \n", + "concatenator:\n", + " source_fields:\n", + " - data_stream.type\n", + " - data_stream.dataset\n", + " - data_stream.namespace\n", + " target_field: _index\n", + " separator: \"-\"\n", + " overwrite_target: false\n", + " delete_source_fields: false\n", + "\"\"\"\n", + "concat_with_rule(rule_yaml)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 20, "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[Deprecated]: regex_fields are no longer necessary. Use Lucene regex annotation.\n" + ] + }, { "name": "stdout", "output_type": "stream", "text": [ - "before: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': 'logs'}, '_op_type': 'create'}\n", - "after: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': 'logs'}, '_op_type': 'create', '_index': 'logs-windows-devopslab'}\n", - "True\n" + "before: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create'}\n", + "after: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create'}\n", + "False\n" ] } ], "source": [ "rule_yaml = \"\"\"---\n", - "filter: 'data_stream.type: \"/.*lo.*/\"' \n", + "filter: 'data_stream.type: \".*lo.*\"' \n", + "regex_fields:\n", + " - \"data_stream.type\"\n", "concatenator:\n", " source_fields:\n", " - data_stream.type\n", @@ -172,8 +250,23 @@ " overwrite_target: false\n", " delete_source_fields: false\n", "\"\"\"\n", + "\n", "concat_with_rule(rule_yaml)\n" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { From 1f8404d895806277cf29a305f1d3b3f80299e014 Mon Sep 17 00:00:00 2001 From: "MoessnerFabian(Group)" Date: Wed, 18 Dec 2024 11:16:29 +0100 Subject: [PATCH 06/23] Update regex notebook 2 --- .../notebooks/processor_examples/regex.ipynb | 30 +++++++++---------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/doc/source/development/notebooks/processor_examples/regex.ipynb b/doc/source/development/notebooks/processor_examples/regex.ipynb index 6aa0c6756..dc867deb5 100644 --- a/doc/source/development/notebooks/processor_examples/regex.ipynb +++ b/doc/source/development/notebooks/processor_examples/regex.ipynb @@ -136,7 +136,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 32, "metadata": {}, "outputs": [ { @@ -151,14 +151,14 @@ "output_type": "stream", "text": [ "before: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create'}\n", - "after: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create'}\n", + "after: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create', '_index': '/logs/-windows-devopslab'}\n", "False\n" ] } ], "source": [ "rule_yaml = \"\"\"---\n", - "filter: 'data_stream.type: \"/d.*lo.*/\"' \n", + "filter: 'data_stream.type: \"/.*lo.*/\"' \n", "regex_fields:\n", " - \"data_stream.type\"\n", "concatenator:\n", @@ -215,31 +215,32 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 38, "metadata": {}, "outputs": [ { - "name": "stderr", + "name": "stdout", "output_type": "stream", "text": [ - "[Deprecated]: regex_fields are no longer necessary. Use Lucene regex annotation.\n" + "before: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create'}\n", + "after: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create', '_index': '/logs/-windows-devopslab'}\n", + "False\n" ] }, { - "name": "stdout", + "name": "stderr", "output_type": "stream", "text": [ - "before: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create'}\n", - "after: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create'}\n", - "False\n" + "<>:1: SyntaxWarning: invalid escape sequence '\\/'\n", + "<>:1: SyntaxWarning: invalid escape sequence '\\/'\n", + "/tmp/ipykernel_3404/1194941343.py:1: SyntaxWarning: invalid escape sequence '\\/'\n", + " rule_yaml = \"\"\"---\n" ] } ], "source": [ "rule_yaml = \"\"\"---\n", - "filter: 'data_stream.type: \".*lo.*\"' \n", - "regex_fields:\n", - " - \"data_stream.type\"\n", + "filter: 'data_stream.type: /\\/.*lo.*/' \n", "concatenator:\n", " source_fields:\n", " - data_stream.type\n", @@ -250,8 +251,7 @@ " overwrite_target: false\n", " delete_source_fields: false\n", "\"\"\"\n", - "\n", - "concat_with_rule(rule_yaml)\n" + "concat_with_rule(rule_yaml)" ] }, { From 02aa59e8b243aa6c7e856708ccd113289209014d Mon Sep 17 00:00:00 2001 From: "MoessnerFabian(Group)" Date: Wed, 18 Dec 2024 16:29:48 +0100 Subject: [PATCH 07/23] Update regex notebook 3 --- .../notebooks/processor_examples/regex.ipynb | 41 +++++++------------ tests/unit/filter/test_lucene_filter.py | 26 ++++++++++++ 2 files changed, 41 insertions(+), 26 deletions(-) diff --git a/doc/source/development/notebooks/processor_examples/regex.ipynb b/doc/source/development/notebooks/processor_examples/regex.ipynb index dc867deb5..c28f68bed 100644 --- a/doc/source/development/notebooks/processor_examples/regex.ipynb +++ b/doc/source/development/notebooks/processor_examples/regex.ipynb @@ -13,7 +13,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 55, "metadata": {}, "outputs": [], "source": [ @@ -81,9 +81,7 @@ " concatenator = Factory.create(processor_config)\n", " print(f\"before: {mydocument}\")\n", " concatenator.process(mydocument)\n", - " print(f\"after: {mydocument}\")\n", - " print(mydocument == expected)\n", - " " + " print(f\"after: {mydocument}\")" ] }, { @@ -95,7 +93,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 53, "metadata": {}, "outputs": [ { @@ -109,15 +107,15 @@ "name": "stdout", "output_type": "stream", "text": [ - "before: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create'}\n", - "after: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create'}\n", + "before: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/123log+/'}, '_op_type': 'create'}\n", + "after: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/123log+/'}, '_op_type': 'create', '_index': '/123log+/-windows-devopslab'}\n", "False\n" ] } ], "source": [ "rule_yaml = \"\"\"---\n", - "filter: 'data_stream.type: \".*lo.*\"' \n", + "filter: 'data_stream.type: \".*lo.*\"'\n", "regex_fields:\n", " - \"data_stream.type\"\n", "concatenator:\n", @@ -136,7 +134,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 78, "metadata": {}, "outputs": [ { @@ -151,14 +149,14 @@ "output_type": "stream", "text": [ "before: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create'}\n", - "after: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create', '_index': '/logs/-windows-devopslab'}\n", + "after: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create'}\n", "False\n" ] } ], "source": [ "rule_yaml = \"\"\"---\n", - "filter: 'data_stream.type: \"/.*lo.*/\"' \n", + "filter: 'data_stream.type: \"/logs/\"' \n", "regex_fields:\n", " - \"data_stream.type\"\n", "concatenator:\n", @@ -171,7 +169,7 @@ " overwrite_target: false\n", " delete_source_fields: false\n", "\"\"\"\n", - "\n", + "/\n", "concat_with_rule(rule_yaml)\n" ] }, @@ -184,7 +182,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 77, "metadata": {}, "outputs": [ { @@ -199,7 +197,7 @@ ], "source": [ "rule_yaml = \"\"\"---\n", - "filter: 'data_stream.type: /.*lo.*/' \n", + "filter: 'data_stream.type: /.*log.*/' \n", "concatenator:\n", " source_fields:\n", " - data_stream.type\n", @@ -215,7 +213,7 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 71, "metadata": {}, "outputs": [ { @@ -226,21 +224,12 @@ "after: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create', '_index': '/logs/-windows-devopslab'}\n", "False\n" ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "<>:1: SyntaxWarning: invalid escape sequence '\\/'\n", - "<>:1: SyntaxWarning: invalid escape sequence '\\/'\n", - "/tmp/ipykernel_3404/1194941343.py:1: SyntaxWarning: invalid escape sequence '\\/'\n", - " rule_yaml = \"\"\"---\n" - ] } ], "source": [ "rule_yaml = \"\"\"---\n", - "filter: 'data_stream.type: /\\/.*lo.*/' \n", + "filter: 'data_stream.type: /\\\\/.*lo.*/' \n", + " \n", "concatenator:\n", " source_fields:\n", " - data_stream.type\n", diff --git a/tests/unit/filter/test_lucene_filter.py b/tests/unit/filter/test_lucene_filter.py index 67764b0cb..b957f46f9 100644 --- a/tests/unit/filter/test_lucene_filter.py +++ b/tests/unit/filter/test_lucene_filter.py @@ -488,4 +488,30 @@ def test_creates_lucene_compliance_filter_one_matching_one_missmatch_regex_key_o assert lucene_filter == And(RegExFilterExpression(["regex_key_one"], ".*value.*"), StringFilterExpression(["key_two"], "/.*value.*/")) + def test_new_lucene_compliance2(self): + lucene_filter = LuceneFilter.create( + 'regex_key_one:/\/.*value.*/' + ) + + lucene_filter = LuceneFilter.create( + 'regex_key_one:/\\/.*value.*/' + ) + + # lucene_filter = LuceneFilter.create( + # 'regex_key_one: "\/.*value.*"', + # special_fields={"regex_fields": ["regex_key_one"]}, + # ) + + # in opensearch mit lucene ausprobiert: /\/value/ um /value zu finden + # opensearch lucene regex ohne escapen funktioniert nicht. also //value// geht nicht, so wie es im notebook + # mit dem regex_field ist. + + # so wie es hier ist funktioniert (mti /\/ ebenso wie mit /\\/ weil im Code sowieso beides auf /\\/ gesetzt wird) + # im Notebook mit yaml doc muss allerdings mit \\/ escaped werden, dass es funktioniert. + # Das waere noch gut, wenn man das nicht muesste. + + + + assert lucene_filter == RegExFilterExpression(["regex_key_one"], "\/.*value.*") + #assert lucene_filter == RegExFilterExpression(["regex_key_one"], "/.*value.*") From 8bbe4f1f36885d968769f09c073a9c9e88134e8a Mon Sep 17 00:00:00 2001 From: "MoessnerFabian(Group)" Date: Thu, 19 Dec 2024 13:23:08 +0100 Subject: [PATCH 08/23] Cleaning up --- .../notebooks/processor_examples/regex.ipynb | 60 +++++++++++++++---- tests/unit/filter/test_lucene_filter.py | 30 +++------- 2 files changed, 57 insertions(+), 33 deletions(-) diff --git a/doc/source/development/notebooks/processor_examples/regex.ipynb b/doc/source/development/notebooks/processor_examples/regex.ipynb index c28f68bed..ef3372362 100644 --- a/doc/source/development/notebooks/processor_examples/regex.ipynb +++ b/doc/source/development/notebooks/processor_examples/regex.ipynb @@ -134,7 +134,16 @@ }, { "cell_type": "code", - "execution_count": 78, + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "## Das ist meiner nach falsch und sollte nur mit \"\\/logs\\/\" funktionieren" + ] + }, + { + "cell_type": "code", + "execution_count": 79, "metadata": {}, "outputs": [ { @@ -149,7 +158,7 @@ "output_type": "stream", "text": [ "before: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create'}\n", - "after: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create'}\n", + "after: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create', '_index': '/logs/-windows-devopslab'}\n", "False\n" ] } @@ -212,8 +221,44 @@ ] }, { + "metadata": {}, "cell_type": "code", - "execution_count": 71, + "outputs": [], + "execution_count": null, + "source": "### Fehlerwarnung mit nur einem Escape." + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": null, + "source": [ + "rule_yaml = \"\"\"---\n", + "filter: 'data_stream.type: /\\/lo.*/' \n", + " \n", + "concatenator:\n", + " source_fields:\n", + " - data_stream.type\n", + " - data_stream.dataset\n", + " - data_stream.namespace\n", + " target_field: _index\n", + " separator: \"-\"\n", + " overwrite_target: false\n", + " delete_source_fields: false\n", + "\"\"\"\n", + "concat_with_rule(rule_yaml)" + ] + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": null, + "source": "### Zweimal Escapen ist richtig und funktioniert. Würde sich allerdings nicht eins zu eins in Opensearch kopieren lassen. " + }, + { + "cell_type": "code", + "execution_count": 87, "metadata": {}, "outputs": [ { @@ -228,7 +273,7 @@ ], "source": [ "rule_yaml = \"\"\"---\n", - "filter: 'data_stream.type: /\\\\/.*lo.*/' \n", + "filter: 'data_stream.type: /\\\\/lo.*/' \n", " \n", "concatenator:\n", " source_fields:\n", @@ -243,13 +288,6 @@ "concat_with_rule(rule_yaml)" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, { "cell_type": "code", "execution_count": null, diff --git a/tests/unit/filter/test_lucene_filter.py b/tests/unit/filter/test_lucene_filter.py index b957f46f9..460e25fd0 100644 --- a/tests/unit/filter/test_lucene_filter.py +++ b/tests/unit/filter/test_lucene_filter.py @@ -480,7 +480,7 @@ def test_new_lucene_compliance(self): assert lucene_filter == RegExFilterExpression(["regex_key_one"], ".*value.*") - def test_creates_lucene_compliance_filter_one_matching_one_missmatch_regex_key_of_two2(self): + def test_creates_lucene_compliance_filter_one_matching_one_missmatch_regex_key_of_two(self): lucene_filter = LuceneFilter.create( 'regex_key_one:/.*value.*/ AND key_two: "/.*value.*/"', ) @@ -488,30 +488,16 @@ def test_creates_lucene_compliance_filter_one_matching_one_missmatch_regex_key_o assert lucene_filter == And(RegExFilterExpression(["regex_key_one"], ".*value.*"), StringFilterExpression(["key_two"], "/.*value.*/")) - def test_new_lucene_compliance2(self): - lucene_filter = LuceneFilter.create( - 'regex_key_one:/\/.*value.*/' - ) - + def test_new_lucene_compliance_double_escape(self): lucene_filter = LuceneFilter.create( 'regex_key_one:/\\/.*value.*/' ) - # lucene_filter = LuceneFilter.create( - # 'regex_key_one: "\/.*value.*"', - # special_fields={"regex_fields": ["regex_key_one"]}, - # ) - - # in opensearch mit lucene ausprobiert: /\/value/ um /value zu finden - # opensearch lucene regex ohne escapen funktioniert nicht. also //value// geht nicht, so wie es im notebook - # mit dem regex_field ist. - - # so wie es hier ist funktioniert (mti /\/ ebenso wie mit /\\/ weil im Code sowieso beides auf /\\/ gesetzt wird) - # im Notebook mit yaml doc muss allerdings mit \\/ escaped werden, dass es funktioniert. - # Das waere noch gut, wenn man das nicht muesste. - - + assert lucene_filter == RegExFilterExpression(["regex_key_one"], "\/.*value.*") + def test_new_lucene_compliance_single_escape(self): + lucene_filter = LuceneFilter.create( + 'regex_key_one:/\/.*value.*/' + ) - assert lucene_filter == RegExFilterExpression(["regex_key_one"], "\/.*value.*") - #assert lucene_filter == RegExFilterExpression(["regex_key_one"], "/.*value.*") + assert lucene_filter == RegExFilterExpression(["regex_key_one"], "\/.*value.*") \ No newline at end of file From 1eea975de85df2f767a6aaefa5ab9a481f2d53d4 Mon Sep 17 00:00:00 2001 From: "MoessnerFabian(Group)" Date: Thu, 19 Dec 2024 13:26:53 +0100 Subject: [PATCH 09/23] Black formatting. --- tests/unit/filter/test_lucene_filter.py | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/tests/unit/filter/test_lucene_filter.py b/tests/unit/filter/test_lucene_filter.py index 460e25fd0..8ab949619 100644 --- a/tests/unit/filter/test_lucene_filter.py +++ b/tests/unit/filter/test_lucene_filter.py @@ -458,7 +458,7 @@ def test_create_filter_error(self, testcase, input_str, message): def test_creates_lucene_compliance_filter_two_matching_regex_keys_of_two(self): lucene_filter = LuceneFilter.create( - 'regex_key_one: /.*value.*/ AND regex_key_two: /.*value.*/', + "regex_key_one: /.*value.*/ AND regex_key_two: /.*value.*/", ) assert lucene_filter == And( @@ -474,9 +474,7 @@ def test_creates_StringFilter_not_Regex(self): assert lucene_filter == StringFilterExpression(["regex_key_one"], "/.*value.*/") def test_new_lucene_compliance(self): - lucene_filter = LuceneFilter.create( - 'regex_key_one:/.*value.*/' - ) + lucene_filter = LuceneFilter.create("regex_key_one:/.*value.*/") assert lucene_filter == RegExFilterExpression(["regex_key_one"], ".*value.*") @@ -485,19 +483,17 @@ def test_creates_lucene_compliance_filter_one_matching_one_missmatch_regex_key_o 'regex_key_one:/.*value.*/ AND key_two: "/.*value.*/"', ) - assert lucene_filter == And(RegExFilterExpression(["regex_key_one"], ".*value.*"), - StringFilterExpression(["key_two"], "/.*value.*/")) + assert lucene_filter == And( + RegExFilterExpression(["regex_key_one"], ".*value.*"), + StringFilterExpression(["key_two"], "/.*value.*/"), + ) def test_new_lucene_compliance_double_escape(self): - lucene_filter = LuceneFilter.create( - 'regex_key_one:/\\/.*value.*/' - ) + lucene_filter = LuceneFilter.create("regex_key_one:/\\/.*value.*/") assert lucene_filter == RegExFilterExpression(["regex_key_one"], "\/.*value.*") def test_new_lucene_compliance_single_escape(self): - lucene_filter = LuceneFilter.create( - 'regex_key_one:/\/.*value.*/' - ) + lucene_filter = LuceneFilter.create("regex_key_one:/\/.*value.*/") - assert lucene_filter == RegExFilterExpression(["regex_key_one"], "\/.*value.*") \ No newline at end of file + assert lucene_filter == RegExFilterExpression(["regex_key_one"], "\/.*value.*") From 99fe68a4d9c38ef0e22b5ee97b8f4e256ffb6784 Mon Sep 17 00:00:00 2001 From: "MoessnerFabian(Group)" Date: Thu, 19 Dec 2024 13:28:33 +0100 Subject: [PATCH 10/23] Black formatting 2. --- logprep/filter/lucene_filter.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/logprep/filter/lucene_filter.py b/logprep/filter/lucene_filter.py index 15fc70bb2..5b7268403 100644 --- a/logprep/filter/lucene_filter.py +++ b/logprep/filter/lucene_filter.py @@ -342,7 +342,6 @@ def _check_key_and_modifier(key, value): return RegExFilterExpression(key[:-1] + key_and_modifier[:-1], value) return None - def _get_filter_expression( self, key: List[str], value ) -> Union[RegExFilterExpression, StringFilterExpression]: @@ -367,7 +366,7 @@ def _get_filter_expression( return StringFilterExpression(key, value) def _get_filter_expression_regex( - self, key: List[str], value + self, key: List[str], value ) -> Union[RegExFilterExpression, StringFilterExpression]: key_and_modifier_check = LuceneTransformer._check_key_and_modifier(key, value) @@ -377,7 +376,6 @@ def _get_filter_expression_regex( value = value.strip("/") return RegExFilterExpression(key, value) - @staticmethod def _create_value_expression(word: luqum.tree) -> Union[Exists, Always]: value = word.value.replace("\\", "") From 65cb4dee79716262110374c639eb4e49c024ccd8 Mon Sep 17 00:00:00 2001 From: "MoessnerFabian(Group)" Date: Thu, 19 Dec 2024 14:11:21 +0100 Subject: [PATCH 11/23] Docu update --- logprep/filter/lucene_filter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/logprep/filter/lucene_filter.py b/logprep/filter/lucene_filter.py index 5b7268403..bcf5aa853 100644 --- a/logprep/filter/lucene_filter.py +++ b/logprep/filter/lucene_filter.py @@ -70,7 +70,7 @@ :linenos: :caption: Example - filter: 'ip_address: "/192\.168\.0\..*/"' + filter: 'ip_address: /192\.168\.0\..*/' [Deprecated, but still functional] The field with the regex pattern must be added to the optional field From 5685730523f38385c1b6b686f082bbd24ce1be60 Mon Sep 17 00:00:00 2001 From: "MoessnerFabian(Group)" Date: Thu, 19 Dec 2024 14:47:57 +0100 Subject: [PATCH 12/23] Test update --- tests/unit/processor/labeler/test_labeler_rule.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/unit/processor/labeler/test_labeler_rule.py b/tests/unit/processor/labeler/test_labeler_rule.py index 72afb22da..a2ebdeb23 100644 --- a/tests/unit/processor/labeler/test_labeler_rule.py +++ b/tests/unit/processor/labeler/test_labeler_rule.py @@ -218,7 +218,7 @@ def test_null_returns_true_for_matching_document(self): def test_lucene_regex_matches_returns_true_for_matching_document(self): rule_definition = { - "filter": 'applyrule: "/.*yes.*/"', + "filter": 'applyrule: /.*yes.*/', "labeler": {"label": {"reporter": ["windows"]}}, } rule = LabelerRule._create_from_dict(rule_definition) @@ -228,7 +228,7 @@ def test_lucene_regex_matches_returns_true_for_matching_document(self): def test_lucene_regex_matches_returns_false_for_non_matching_document(self): rule_definition = { - "filter": 'applyrule: "/.*yes.*/"', + "filter": 'applyrule: /.*yes.*/', "labeler": {"label": {"reporter": ["windows"]}}, } rule = LabelerRule._create_from_dict(rule_definition) @@ -245,7 +245,7 @@ def test_lucene_regex_matches_returns_false_for_non_matching_document(self): def test_complex_lucene_regex_matches_returns_true_for_matching_document(self): rule_definition = { - "filter": r'applyrule: "/(?:(?=.*[a-z])(?:(?=.*[A-Z])(?=.*[\d\W])|(?=.*\W)(?=.*\d))|(?=.*\W)(?=.*[A-Z])(?=.*\d)).{8,}/"', + "filter": r'applyrule: /(?:(?=.*[a-z])(?:(?=.*[A-Z])(?=.*[\d\W])|(?=.*\W)(?=.*\d))|(?=.*\W)(?=.*[A-Z])(?=.*\d)).{8,}/', # pylint: disable=line-too-long "labeler": {"label": {"reporter": ["windows"]}}, } @@ -257,7 +257,7 @@ def test_complex_lucene_regex_matches_returns_true_for_matching_document(self): def test_complex_lucene_regex_does_not_match_returns_true_for_matching_document(self): rule_definition = { - "filter": r'applyrule: "/(?:(?=.*[a-z])(?:(?=.*[A-Z])(?=.*[\d\W])|(?=.*\W)(?=.*\d))|(?=.*\W)(?=.*[A-Z])(?=.*\d)).{8,}/"', + "filter": r'applyrule: /(?:(?=.*[a-z])(?:(?=.*[A-Z])(?=.*[\d\W])|(?=.*\W)(?=.*\d))|(?=.*\W)(?=.*[A-Z])(?=.*\d)).{8,}/', # pylint: disable=line-too-long "labeler": {"label": {"reporter": ["windows"]}}, } From aaf1327a27634a2d51922f1499b509bb777b4cbd Mon Sep 17 00:00:00 2001 From: "MoessnerFabian(Group)" Date: Thu, 19 Dec 2024 14:59:55 +0100 Subject: [PATCH 13/23] Black formatting --- tests/unit/processor/labeler/test_labeler_rule.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/unit/processor/labeler/test_labeler_rule.py b/tests/unit/processor/labeler/test_labeler_rule.py index a2ebdeb23..aade96836 100644 --- a/tests/unit/processor/labeler/test_labeler_rule.py +++ b/tests/unit/processor/labeler/test_labeler_rule.py @@ -218,7 +218,7 @@ def test_null_returns_true_for_matching_document(self): def test_lucene_regex_matches_returns_true_for_matching_document(self): rule_definition = { - "filter": 'applyrule: /.*yes.*/', + "filter": "applyrule: /.*yes.*/", "labeler": {"label": {"reporter": ["windows"]}}, } rule = LabelerRule._create_from_dict(rule_definition) @@ -228,7 +228,7 @@ def test_lucene_regex_matches_returns_true_for_matching_document(self): def test_lucene_regex_matches_returns_false_for_non_matching_document(self): rule_definition = { - "filter": 'applyrule: /.*yes.*/', + "filter": "applyrule: /.*yes.*/", "labeler": {"label": {"reporter": ["windows"]}}, } rule = LabelerRule._create_from_dict(rule_definition) @@ -245,7 +245,7 @@ def test_lucene_regex_matches_returns_false_for_non_matching_document(self): def test_complex_lucene_regex_matches_returns_true_for_matching_document(self): rule_definition = { - "filter": r'applyrule: /(?:(?=.*[a-z])(?:(?=.*[A-Z])(?=.*[\d\W])|(?=.*\W)(?=.*\d))|(?=.*\W)(?=.*[A-Z])(?=.*\d)).{8,}/', + "filter": r"applyrule: /(?:(?=.*[a-z])(?:(?=.*[A-Z])(?=.*[\d\W])|(?=.*\W)(?=.*\d))|(?=.*\W)(?=.*[A-Z])(?=.*\d)).{8,}/", # pylint: disable=line-too-long "labeler": {"label": {"reporter": ["windows"]}}, } @@ -257,7 +257,7 @@ def test_complex_lucene_regex_matches_returns_true_for_matching_document(self): def test_complex_lucene_regex_does_not_match_returns_true_for_matching_document(self): rule_definition = { - "filter": r'applyrule: /(?:(?=.*[a-z])(?:(?=.*[A-Z])(?=.*[\d\W])|(?=.*\W)(?=.*\d))|(?=.*\W)(?=.*[A-Z])(?=.*\d)).{8,}/', + "filter": r"applyrule: /(?:(?=.*[a-z])(?:(?=.*[A-Z])(?=.*[\d\W])|(?=.*\W)(?=.*\d))|(?=.*\W)(?=.*[A-Z])(?=.*\d)).{8,}/", # pylint: disable=line-too-long "labeler": {"label": {"reporter": ["windows"]}}, } From c04fbf5b77f116d44ca64c11b74fd97fe2e3d3b5 Mon Sep 17 00:00:00 2001 From: "MoessnerFabian(Group)" Date: Tue, 7 Jan 2025 12:31:23 +0100 Subject: [PATCH 14/23] Improving Notebook. --- .../notebooks/processor_examples/regex.ipynb | 158 ++++-------------- 1 file changed, 31 insertions(+), 127 deletions(-) diff --git a/doc/source/development/notebooks/processor_examples/regex.ipynb b/doc/source/development/notebooks/processor_examples/regex.ipynb index ef3372362..9b41dae8a 100644 --- a/doc/source/development/notebooks/processor_examples/regex.ipynb +++ b/doc/source/development/notebooks/processor_examples/regex.ipynb @@ -5,18 +5,34 @@ "metadata": {}, "source": [ "# Lucene regex filter\n", - "This presentations contains an example of a filter with a lucene conform regular expression. \n", + "This presentations contains an example of a filter with a Lucene conform regular expression. \n", "A concatenator that merges different fields form an event is used as a processor for demonstrating the filter function. \n", "\n", - "Until now it was necessary to flag keys of values that contain a regular expression with regex_fields. " + "Until now it was necessary to flag the keys in regex_fields, when the value was containing a regular expression. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Set document and define concatenator process to test the filter" ] }, { "cell_type": "code", - "execution_count": 55, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ + "import sys\n", + "sys.path.insert(0,\"../../../../../\")\n", + "import tempfile\n", + "from copy import deepcopy\n", + "from pathlib import Path\n", + "\n", + "from unittest import mock\n", + "from logprep.factory import Factory\n", + "\n", "document = {\n", " 'data_stream': {\n", " 'dataset': 'windows', \n", @@ -34,30 +50,7 @@ " }, \n", " '_op_type': 'create', \n", " '_index': 'logs-windows-devopslab'\n", - " }" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Define process" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "import sys\n", - "sys.path.insert(0,\"../../../../../\")\n", - "import tempfile\n", - "from copy import deepcopy\n", - "from pathlib import Path\n", - "\n", - "from unittest import mock\n", - "from logprep.factory import Factory\n", + " }\n", "\n", "rule_path = Path(tempfile.gettempdir()) / \"concatenator\"\n", "rule_path.mkdir(exist_ok=True)\n", @@ -88,12 +81,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### regex_fields version" + "### Former version with explicit regex_fields annotation" ] }, { "cell_type": "code", - "execution_count": 53, + "execution_count": 2, "metadata": {}, "outputs": [ { @@ -107,9 +100,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "before: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/123log+/'}, '_op_type': 'create'}\n", - "after: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/123log+/'}, '_op_type': 'create', '_index': '/123log+/-windows-devopslab'}\n", - "False\n" + "before: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create'}\n", + "after: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create', '_index': '/logs/-windows-devopslab'}\n" ] } ], @@ -132,66 +124,16 @@ "concat_with_rule(rule_yaml)\n" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "## Das ist meiner nach falsch und sollte nur mit \"\\/logs\\/\" funktionieren" - ] - }, - { - "cell_type": "code", - "execution_count": 79, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[Deprecated]: regex_fields are no longer necessary. Use Lucene regex annotation.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "before: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create'}\n", - "after: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create', '_index': '/logs/-windows-devopslab'}\n", - "False\n" - ] - } - ], - "source": [ - "rule_yaml = \"\"\"---\n", - "filter: 'data_stream.type: \"/logs/\"' \n", - "regex_fields:\n", - " - \"data_stream.type\"\n", - "concatenator:\n", - " source_fields:\n", - " - data_stream.type\n", - " - data_stream.dataset\n", - " - data_stream.namespace\n", - " target_field: _index\n", - " separator: \"-\"\n", - " overwrite_target: false\n", - " delete_source_fields: false\n", - "\"\"\"\n", - "/\n", - "concat_with_rule(rule_yaml)\n" - ] - }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Lucene conform version without the need of regex_fields" + "### New Lucene conform version without the need of regex_fields" ] }, { "cell_type": "code", - "execution_count": 77, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -199,8 +141,7 @@ "output_type": "stream", "text": [ "before: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create'}\n", - "after: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create', '_index': '/logs/-windows-devopslab'}\n", - "False\n" + "after: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create', '_index': '/logs/-windows-devopslab'}\n" ] } ], @@ -221,44 +162,15 @@ ] }, { + "cell_type": "markdown", "metadata": {}, - "cell_type": "code", - "outputs": [], - "execution_count": null, - "source": "### Fehlerwarnung mit nur einem Escape." - }, - { - "metadata": {}, - "cell_type": "code", - "outputs": [], - "execution_count": null, "source": [ - "rule_yaml = \"\"\"---\n", - "filter: 'data_stream.type: /\\/lo.*/' \n", - " \n", - "concatenator:\n", - " source_fields:\n", - " - data_stream.type\n", - " - data_stream.dataset\n", - " - data_stream.namespace\n", - " target_field: _index\n", - " separator: \"-\"\n", - " overwrite_target: false\n", - " delete_source_fields: false\n", - "\"\"\"\n", - "concat_with_rule(rule_yaml)" + "### Escaping a slash. One escape is needed for yml format, the other one for Lucene syntax. " ] }, - { - "metadata": {}, - "cell_type": "code", - "outputs": [], - "execution_count": null, - "source": "### Zweimal Escapen ist richtig und funktioniert. Würde sich allerdings nicht eins zu eins in Opensearch kopieren lassen. " - }, { "cell_type": "code", - "execution_count": 87, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -266,8 +178,7 @@ "output_type": "stream", "text": [ "before: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create'}\n", - "after: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create', '_index': '/logs/-windows-devopslab'}\n", - "False\n" + "after: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create', '_index': '/logs/-windows-devopslab'}\n" ] } ], @@ -287,13 +198,6 @@ "\"\"\"\n", "concat_with_rule(rule_yaml)" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { From c62a31fa9520fb2ffd8876fd6949e147029d7396 Mon Sep 17 00:00:00 2001 From: "MoessnerFabian(Group)" Date: Wed, 18 Dec 2024 11:08:41 +0100 Subject: [PATCH 15/23] Update regex notebook --- .../notebooks/processor_examples/regex.ipynb | 109 ++++++++++++++++++ 1 file changed, 109 insertions(+) diff --git a/doc/source/development/notebooks/processor_examples/regex.ipynb b/doc/source/development/notebooks/processor_examples/regex.ipynb index 9b41dae8a..29867251c 100644 --- a/doc/source/development/notebooks/processor_examples/regex.ipynb +++ b/doc/source/development/notebooks/processor_examples/regex.ipynb @@ -86,7 +86,11 @@ }, { "cell_type": "code", +<<<<<<< HEAD "execution_count": 2, +======= + "execution_count": 20, +>>>>>>> 21ac87d3 (Update regex notebook) "metadata": {}, "outputs": [ { @@ -101,7 +105,12 @@ "output_type": "stream", "text": [ "before: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create'}\n", +<<<<<<< HEAD "after: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create', '_index': '/logs/-windows-devopslab'}\n" +======= + "after: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create'}\n", + "False\n" +>>>>>>> 21ac87d3 (Update regex notebook) ] } ], @@ -124,6 +133,47 @@ "concat_with_rule(rule_yaml)\n" ] }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[Deprecated]: regex_fields are no longer necessary. Use Lucene regex annotation.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "before: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create'}\n", + "after: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create'}\n", + "False\n" + ] + } + ], + "source": [ + "rule_yaml = \"\"\"---\n", + "filter: 'data_stream.type: \"/d.*lo.*/\"' \n", + "regex_fields:\n", + " - \"data_stream.type\"\n", + "concatenator:\n", + " source_fields:\n", + " - data_stream.type\n", + " - data_stream.dataset\n", + " - data_stream.namespace\n", + " target_field: _index\n", + " separator: \"-\"\n", + " overwrite_target: false\n", + " delete_source_fields: false\n", + "\"\"\"\n", + "\n", + "concat_with_rule(rule_yaml)\n" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -133,7 +183,11 @@ }, { "cell_type": "code", +<<<<<<< HEAD "execution_count": 3, +======= + "execution_count": 31, +>>>>>>> 21ac87d3 (Update regex notebook) "metadata": {}, "outputs": [ { @@ -141,13 +195,22 @@ "output_type": "stream", "text": [ "before: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create'}\n", +<<<<<<< HEAD "after: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create', '_index': '/logs/-windows-devopslab'}\n" +======= + "after: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create', '_index': '/logs/-windows-devopslab'}\n", + "False\n" +>>>>>>> 21ac87d3 (Update regex notebook) ] } ], "source": [ "rule_yaml = \"\"\"---\n", +<<<<<<< HEAD "filter: 'data_stream.type: /.*log.*/' \n", +======= + "filter: 'data_stream.type: /.*lo.*/' \n", +>>>>>>> 21ac87d3 (Update regex notebook) "concatenator:\n", " source_fields:\n", " - data_stream.type\n", @@ -162,6 +225,7 @@ ] }, { +<<<<<<< HEAD "cell_type": "markdown", "metadata": {}, "source": [ @@ -174,18 +238,43 @@ "metadata": {}, "outputs": [ { +======= + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[Deprecated]: regex_fields are no longer necessary. Use Lucene regex annotation.\n" + ] + }, + { +>>>>>>> 21ac87d3 (Update regex notebook) "name": "stdout", "output_type": "stream", "text": [ "before: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create'}\n", +<<<<<<< HEAD "after: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create', '_index': '/logs/-windows-devopslab'}\n" +======= + "after: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create'}\n", + "False\n" +>>>>>>> 21ac87d3 (Update regex notebook) ] } ], "source": [ "rule_yaml = \"\"\"---\n", +<<<<<<< HEAD "filter: 'data_stream.type: /\\\\/lo.*/' \n", " \n", +======= + "filter: 'data_stream.type: \".*lo.*\"' \n", + "regex_fields:\n", + " - \"data_stream.type\"\n", +>>>>>>> 21ac87d3 (Update regex notebook) "concatenator:\n", " source_fields:\n", " - data_stream.type\n", @@ -196,8 +285,28 @@ " overwrite_target: false\n", " delete_source_fields: false\n", "\"\"\"\n", +<<<<<<< HEAD "concat_with_rule(rule_yaml)" ] +======= + "\n", + "concat_with_rule(rule_yaml)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] +>>>>>>> 21ac87d3 (Update regex notebook) } ], "metadata": { From 5d4c20dc1ba1b0f4c19aff671c29e42a9102b202 Mon Sep 17 00:00:00 2001 From: "MoessnerFabian(Group)" Date: Wed, 18 Dec 2024 11:16:29 +0100 Subject: [PATCH 16/23] Update regex notebook 2 --- .../notebooks/processor_examples/regex.ipynb | 33 ++++++++++++++++--- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/doc/source/development/notebooks/processor_examples/regex.ipynb b/doc/source/development/notebooks/processor_examples/regex.ipynb index 29867251c..7902f2bdc 100644 --- a/doc/source/development/notebooks/processor_examples/regex.ipynb +++ b/doc/source/development/notebooks/processor_examples/regex.ipynb @@ -135,7 +135,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 32, "metadata": {}, "outputs": [ { @@ -150,14 +150,14 @@ "output_type": "stream", "text": [ "before: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create'}\n", - "after: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create'}\n", + "after: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create', '_index': '/logs/-windows-devopslab'}\n", "False\n" ] } ], "source": [ "rule_yaml = \"\"\"---\n", - "filter: 'data_stream.type: \"/d.*lo.*/\"' \n", + "filter: 'data_stream.type: \"/.*lo.*/\"' \n", "regex_fields:\n", " - \"data_stream.type\"\n", "concatenator:\n", @@ -240,10 +240,11 @@ { ======= "cell_type": "code", - "execution_count": 20, + "execution_count": 38, "metadata": {}, "outputs": [ { +<<<<<<< HEAD "name": "stderr", "output_type": "stream", "text": [ @@ -252,21 +253,38 @@ }, { >>>>>>> 21ac87d3 (Update regex notebook) +======= +>>>>>>> 32bf3e59 (Update regex notebook 2) "name": "stdout", "output_type": "stream", "text": [ "before: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create'}\n", +<<<<<<< HEAD <<<<<<< HEAD "after: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create', '_index': '/logs/-windows-devopslab'}\n" ======= "after: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create'}\n", +======= + "after: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create', '_index': '/logs/-windows-devopslab'}\n", +>>>>>>> 32bf3e59 (Update regex notebook 2) "False\n" >>>>>>> 21ac87d3 (Update regex notebook) ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "<>:1: SyntaxWarning: invalid escape sequence '\\/'\n", + "<>:1: SyntaxWarning: invalid escape sequence '\\/'\n", + "/tmp/ipykernel_3404/1194941343.py:1: SyntaxWarning: invalid escape sequence '\\/'\n", + " rule_yaml = \"\"\"---\n" + ] } ], "source": [ "rule_yaml = \"\"\"---\n", +<<<<<<< HEAD <<<<<<< HEAD "filter: 'data_stream.type: /\\\\/lo.*/' \n", " \n", @@ -275,6 +293,9 @@ "regex_fields:\n", " - \"data_stream.type\"\n", >>>>>>> 21ac87d3 (Update regex notebook) +======= + "filter: 'data_stream.type: /\\/.*lo.*/' \n", +>>>>>>> 32bf3e59 (Update regex notebook 2) "concatenator:\n", " source_fields:\n", " - data_stream.type\n", @@ -285,12 +306,16 @@ " overwrite_target: false\n", " delete_source_fields: false\n", "\"\"\"\n", +<<<<<<< HEAD <<<<<<< HEAD "concat_with_rule(rule_yaml)" ] ======= "\n", "concat_with_rule(rule_yaml)\n" +======= + "concat_with_rule(rule_yaml)" +>>>>>>> 32bf3e59 (Update regex notebook 2) ] }, { From 2e711d67f986f334cb374f02801149a1e54af7f7 Mon Sep 17 00:00:00 2001 From: "MoessnerFabian(Group)" Date: Wed, 18 Dec 2024 16:29:48 +0100 Subject: [PATCH 17/23] Update regex notebook 3 --- .../notebooks/processor_examples/regex.ipynb | 55 +++++++++++-------- 1 file changed, 33 insertions(+), 22 deletions(-) diff --git a/doc/source/development/notebooks/processor_examples/regex.ipynb b/doc/source/development/notebooks/processor_examples/regex.ipynb index 7902f2bdc..8dd9da667 100644 --- a/doc/source/development/notebooks/processor_examples/regex.ipynb +++ b/doc/source/development/notebooks/processor_examples/regex.ipynb @@ -20,7 +20,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 55, "metadata": {}, "outputs": [], "source": [ @@ -86,11 +86,14 @@ }, { "cell_type": "code", +<<<<<<< HEAD <<<<<<< HEAD "execution_count": 2, ======= "execution_count": 20, >>>>>>> 21ac87d3 (Update regex notebook) +======= + "execution_count": 53, "metadata": {}, "outputs": [ { @@ -104,13 +107,9 @@ "name": "stdout", "output_type": "stream", "text": [ - "before: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create'}\n", -<<<<<<< HEAD - "after: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create', '_index': '/logs/-windows-devopslab'}\n" -======= - "after: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create'}\n", + "before: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/123log+/'}, '_op_type': 'create'}\n", + "after: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/123log+/'}, '_op_type': 'create', '_index': '/123log+/-windows-devopslab'}\n", "False\n" ->>>>>>> 21ac87d3 (Update regex notebook) ] } ], @@ -135,7 +134,8 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 78, +>>>>>>> 63a05f12 (Update regex notebook 3) "metadata": {}, "outputs": [ { @@ -150,14 +150,22 @@ "output_type": "stream", "text": [ "before: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create'}\n", - "after: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create', '_index': '/logs/-windows-devopslab'}\n", +<<<<<<< HEAD + "after: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create', '_index': '/logs/-windows-devopslab'}\n" +======= + "after: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create'}\n", "False\n" +>>>>>>> 21ac87d3 (Update regex notebook) ] } ], "source": [ "rule_yaml = \"\"\"---\n", - "filter: 'data_stream.type: \"/.*lo.*/\"' \n", +<<<<<<< HEAD + "filter: 'data_stream.type: \".*lo.*\"'\n", +======= + "filter: 'data_stream.type: \"/logs/\"' \n", +>>>>>>> 63a05f12 (Update regex notebook 3) "regex_fields:\n", " - \"data_stream.type\"\n", "concatenator:\n", @@ -170,7 +178,7 @@ " overwrite_target: false\n", " delete_source_fields: false\n", "\"\"\"\n", - "\n", + "/\n", "concat_with_rule(rule_yaml)\n" ] }, @@ -183,11 +191,15 @@ }, { "cell_type": "code", +<<<<<<< HEAD <<<<<<< HEAD "execution_count": 3, ======= "execution_count": 31, >>>>>>> 21ac87d3 (Update regex notebook) +======= + "execution_count": 77, +>>>>>>> 63a05f12 (Update regex notebook 3) "metadata": {}, "outputs": [ { @@ -206,11 +218,15 @@ ], "source": [ "rule_yaml = \"\"\"---\n", +<<<<<<< HEAD <<<<<<< HEAD "filter: 'data_stream.type: /.*log.*/' \n", ======= "filter: 'data_stream.type: /.*lo.*/' \n", >>>>>>> 21ac87d3 (Update regex notebook) +======= + "filter: 'data_stream.type: /.*log.*/' \n", +>>>>>>> 63a05f12 (Update regex notebook 3) "concatenator:\n", " source_fields:\n", " - data_stream.type\n", @@ -240,7 +256,7 @@ { ======= "cell_type": "code", - "execution_count": 38, + "execution_count": 71, "metadata": {}, "outputs": [ { @@ -270,21 +286,12 @@ "False\n" >>>>>>> 21ac87d3 (Update regex notebook) ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "<>:1: SyntaxWarning: invalid escape sequence '\\/'\n", - "<>:1: SyntaxWarning: invalid escape sequence '\\/'\n", - "/tmp/ipykernel_3404/1194941343.py:1: SyntaxWarning: invalid escape sequence '\\/'\n", - " rule_yaml = \"\"\"---\n" - ] } ], "source": [ "rule_yaml = \"\"\"---\n", <<<<<<< HEAD +<<<<<<< HEAD <<<<<<< HEAD "filter: 'data_stream.type: /\\\\/lo.*/' \n", " \n", @@ -296,6 +303,10 @@ ======= "filter: 'data_stream.type: /\\/.*lo.*/' \n", >>>>>>> 32bf3e59 (Update regex notebook 2) +======= + "filter: 'data_stream.type: /\\\\/.*lo.*/' \n", + " \n", +>>>>>>> 63a05f12 (Update regex notebook 3) "concatenator:\n", " source_fields:\n", " - data_stream.type\n", From 91ba9253af37db4a7a3771bce6632a0d218760a6 Mon Sep 17 00:00:00 2001 From: "MoessnerFabian(Group)" Date: Thu, 19 Dec 2024 13:23:08 +0100 Subject: [PATCH 18/23] Cleaning up --- .../notebooks/processor_examples/regex.ipynb | 70 ++++++++++++++++++- 1 file changed, 68 insertions(+), 2 deletions(-) diff --git a/doc/source/development/notebooks/processor_examples/regex.ipynb b/doc/source/development/notebooks/processor_examples/regex.ipynb index 8dd9da667..0f47dda70 100644 --- a/doc/source/development/notebooks/processor_examples/regex.ipynb +++ b/doc/source/development/notebooks/processor_examples/regex.ipynb @@ -134,8 +134,21 @@ }, { "cell_type": "code", +<<<<<<< HEAD "execution_count": 78, >>>>>>> 63a05f12 (Update regex notebook 3) +======= + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "## Das ist meiner nach falsch und sollte nur mit \"\\/logs\\/\" funktionieren" + ] + }, + { + "cell_type": "code", + "execution_count": 79, +>>>>>>> ec595eb3 (Cleaning up) "metadata": {}, "outputs": [ { @@ -150,10 +163,14 @@ "output_type": "stream", "text": [ "before: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create'}\n", +<<<<<<< HEAD <<<<<<< HEAD "after: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create', '_index': '/logs/-windows-devopslab'}\n" ======= "after: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create'}\n", +======= + "after: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create', '_index': '/logs/-windows-devopslab'}\n", +>>>>>>> ec595eb3 (Cleaning up) "False\n" >>>>>>> 21ac87d3 (Update regex notebook) ] @@ -241,6 +258,7 @@ ] }, { +<<<<<<< HEAD <<<<<<< HEAD "cell_type": "markdown", "metadata": {}, @@ -305,6 +323,22 @@ >>>>>>> 32bf3e59 (Update regex notebook 2) ======= "filter: 'data_stream.type: /\\\\/.*lo.*/' \n", +======= + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": null, + "source": "### Fehlerwarnung mit nur einem Escape." + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": null, + "source": [ + "rule_yaml = \"\"\"---\n", + "filter: 'data_stream.type: /\\/lo.*/' \n", +>>>>>>> ec595eb3 (Cleaning up) " \n", >>>>>>> 63a05f12 (Update regex notebook 3) "concatenator:\n", @@ -330,11 +364,43 @@ ] }, { + "metadata": {}, "cell_type": "code", + "outputs": [], "execution_count": null, + "source": "### Zweimal Escapen ist richtig und funktioniert. Würde sich allerdings nicht eins zu eins in Opensearch kopieren lassen. " + }, + { + "cell_type": "code", + "execution_count": 87, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "before: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create'}\n", + "after: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create', '_index': '/logs/-windows-devopslab'}\n", + "False\n" + ] + } + ], + "source": [ + "rule_yaml = \"\"\"---\n", + "filter: 'data_stream.type: /\\\\/lo.*/' \n", + " \n", + "concatenator:\n", + " source_fields:\n", + " - data_stream.type\n", + " - data_stream.dataset\n", + " - data_stream.namespace\n", + " target_field: _index\n", + " separator: \"-\"\n", + " overwrite_target: false\n", + " delete_source_fields: false\n", + "\"\"\"\n", + "concat_with_rule(rule_yaml)" + ] }, { "cell_type": "code", From 468eb9e3fc9b3914dd7cdf6bfd632fec55efe98b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Zimmermann?= Date: Fri, 3 Jan 2025 13:41:41 +0100 Subject: [PATCH 19/23] fix import --- logprep/filter/lucene_filter.py | 1 + 1 file changed, 1 insertion(+) diff --git a/logprep/filter/lucene_filter.py b/logprep/filter/lucene_filter.py index bcf5aa853..f2eb3a60f 100644 --- a/logprep/filter/lucene_filter.py +++ b/logprep/filter/lucene_filter.py @@ -107,6 +107,7 @@ Not, OrOperation, Phrase, + Regex, SearchField, Word, ) From 7c3dd139800296ec4dd9485c590c2570beef275e Mon Sep 17 00:00:00 2001 From: "MoessnerFabian(Group)" Date: Tue, 7 Jan 2025 12:35:20 +0100 Subject: [PATCH 20/23] Improving Notebook 2. --- .../notebooks/processor_examples/regex.ipynb | 38 +++++++++++++++++-- 1 file changed, 35 insertions(+), 3 deletions(-) diff --git a/doc/source/development/notebooks/processor_examples/regex.ipynb b/doc/source/development/notebooks/processor_examples/regex.ipynb index 0f47dda70..4d38c9588 100644 --- a/doc/source/development/notebooks/processor_examples/regex.ipynb +++ b/doc/source/development/notebooks/processor_examples/regex.ipynb @@ -20,7 +20,7 @@ }, { "cell_type": "code", - "execution_count": 55, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -62,7 +62,8 @@ "processor_config = {\n", " \"myconcatenator\":{ \n", " \"type\": \"concatenator\",\n", - " \"rules\": [str(rule_path), \"/dev\"],\n", + " \"specific_rules\": [str(rule_path)],\n", + " \"generic_rules\": [\"/dev\"],\n", " }\n", " }\n", "\n", @@ -87,6 +88,7 @@ { "cell_type": "code", <<<<<<< HEAD +<<<<<<< HEAD <<<<<<< HEAD "execution_count": 2, ======= @@ -94,6 +96,9 @@ >>>>>>> 21ac87d3 (Update regex notebook) ======= "execution_count": 53, +======= + "execution_count": 2, +>>>>>>> 78a96b0d (Improving Notebook 2.) "metadata": {}, "outputs": [ { @@ -107,9 +112,14 @@ "name": "stdout", "output_type": "stream", "text": [ +<<<<<<< HEAD "before: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/123log+/'}, '_op_type': 'create'}\n", "after: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/123log+/'}, '_op_type': 'create', '_index': '/123log+/-windows-devopslab'}\n", "False\n" +======= + "before: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create'}\n", + "after: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create', '_index': '/logs/-windows-devopslab'}\n" +>>>>>>> 78a96b0d (Improving Notebook 2.) ] } ], @@ -133,6 +143,7 @@ ] }, { +<<<<<<< HEAD "cell_type": "code", <<<<<<< HEAD "execution_count": 78, @@ -200,6 +211,8 @@ ] }, { +======= +>>>>>>> 78a96b0d (Improving Notebook 2.) "cell_type": "markdown", "metadata": {}, "source": [ @@ -209,6 +222,7 @@ { "cell_type": "code", <<<<<<< HEAD +<<<<<<< HEAD <<<<<<< HEAD "execution_count": 3, ======= @@ -217,6 +231,9 @@ ======= "execution_count": 77, >>>>>>> 63a05f12 (Update regex notebook 3) +======= + "execution_count": 3, +>>>>>>> 78a96b0d (Improving Notebook 2.) "metadata": {}, "outputs": [ { @@ -224,12 +241,14 @@ "output_type": "stream", "text": [ "before: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create'}\n", -<<<<<<< HEAD "after: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create', '_index': '/logs/-windows-devopslab'}\n" +<<<<<<< HEAD ======= "after: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create', '_index': '/logs/-windows-devopslab'}\n", "False\n" >>>>>>> 21ac87d3 (Update regex notebook) +======= +>>>>>>> 78a96b0d (Improving Notebook 2.) ] } ], @@ -260,6 +279,9 @@ { <<<<<<< HEAD <<<<<<< HEAD +<<<<<<< HEAD +======= +>>>>>>> 78a96b0d (Improving Notebook 2.) "cell_type": "markdown", "metadata": {}, "source": [ @@ -269,6 +291,7 @@ { "cell_type": "code", "execution_count": 4, +<<<<<<< HEAD "metadata": {}, "outputs": [ { @@ -373,6 +396,8 @@ { "cell_type": "code", "execution_count": 87, +======= +>>>>>>> 78a96b0d (Improving Notebook 2.) "metadata": {}, "outputs": [ { @@ -380,8 +405,12 @@ "output_type": "stream", "text": [ "before: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create'}\n", +<<<<<<< HEAD "after: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create', '_index': '/logs/-windows-devopslab'}\n", "False\n" +======= + "after: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create', '_index': '/logs/-windows-devopslab'}\n" +>>>>>>> 78a96b0d (Improving Notebook 2.) ] } ], @@ -401,6 +430,7 @@ "\"\"\"\n", "concat_with_rule(rule_yaml)" ] +<<<<<<< HEAD }, { "cell_type": "code", @@ -409,6 +439,8 @@ "outputs": [], "source": [] >>>>>>> 21ac87d3 (Update regex notebook) +======= +>>>>>>> 78a96b0d (Improving Notebook 2.) } ], "metadata": { From 8abfea6756fdebec20a050210ba054837797606d Mon Sep 17 00:00:00 2001 From: "MoessnerFabian(Group)" Date: Tue, 7 Jan 2025 13:02:04 +0100 Subject: [PATCH 21/23] Improving Notebook 3. --- .../notebooks/processor_examples/regex.ipynb | 20 ++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/doc/source/development/notebooks/processor_examples/regex.ipynb b/doc/source/development/notebooks/processor_examples/regex.ipynb index 4d38c9588..c9e3ccd6b 100644 --- a/doc/source/development/notebooks/processor_examples/regex.ipynb +++ b/doc/source/development/notebooks/processor_examples/regex.ipynb @@ -20,7 +20,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -89,6 +89,7 @@ "cell_type": "code", <<<<<<< HEAD <<<<<<< HEAD +<<<<<<< HEAD <<<<<<< HEAD "execution_count": 2, ======= @@ -123,6 +124,11 @@ ] } ], +======= + "execution_count": null, + "metadata": {}, + "outputs": [], +>>>>>>> 0a454895 (Improving Notebook 3.) "source": [ "rule_yaml = \"\"\"---\n", "filter: 'data_stream.type: \".*lo.*\"'\n", @@ -223,6 +229,7 @@ "cell_type": "code", <<<<<<< HEAD <<<<<<< HEAD +<<<<<<< HEAD <<<<<<< HEAD "execution_count": 3, ======= @@ -252,6 +259,11 @@ ] } ], +======= + "execution_count": null, + "metadata": {}, + "outputs": [], +>>>>>>> 0a454895 (Improving Notebook 3.) "source": [ "rule_yaml = \"\"\"---\n", <<<<<<< HEAD @@ -290,6 +302,7 @@ }, { "cell_type": "code", +<<<<<<< HEAD "execution_count": 4, <<<<<<< HEAD "metadata": {}, @@ -414,6 +427,11 @@ ] } ], +======= + "execution_count": null, + "metadata": {}, + "outputs": [], +>>>>>>> 0a454895 (Improving Notebook 3.) "source": [ "rule_yaml = \"\"\"---\n", "filter: 'data_stream.type: /\\\\/lo.*/' \n", From 402e600bff7dd73c8b120765b385ce2a3748af66 Mon Sep 17 00:00:00 2001 From: "MoessnerFabian(Group)" Date: Tue, 7 Jan 2025 13:23:19 +0100 Subject: [PATCH 22/23] Improving Notebook 3. --- .../notebooks/processor_examples/regex.ipynb | 294 ------------------ 1 file changed, 294 deletions(-) diff --git a/doc/source/development/notebooks/processor_examples/regex.ipynb b/doc/source/development/notebooks/processor_examples/regex.ipynb index c9e3ccd6b..80b2b1a0c 100644 --- a/doc/source/development/notebooks/processor_examples/regex.ipynb +++ b/doc/source/development/notebooks/processor_examples/regex.ipynb @@ -87,48 +87,9 @@ }, { "cell_type": "code", -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD - "execution_count": 2, -======= - "execution_count": 20, ->>>>>>> 21ac87d3 (Update regex notebook) -======= - "execution_count": 53, -======= - "execution_count": 2, ->>>>>>> 78a96b0d (Improving Notebook 2.) - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[Deprecated]: regex_fields are no longer necessary. Use Lucene regex annotation.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ -<<<<<<< HEAD - "before: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/123log+/'}, '_op_type': 'create'}\n", - "after: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/123log+/'}, '_op_type': 'create', '_index': '/123log+/-windows-devopslab'}\n", - "False\n" -======= - "before: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create'}\n", - "after: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create', '_index': '/logs/-windows-devopslab'}\n" ->>>>>>> 78a96b0d (Improving Notebook 2.) - ] - } - ], -======= "execution_count": null, "metadata": {}, "outputs": [], ->>>>>>> 0a454895 (Improving Notebook 3.) "source": [ "rule_yaml = \"\"\"---\n", "filter: 'data_stream.type: \".*lo.*\"'\n", @@ -149,76 +110,6 @@ ] }, { -<<<<<<< HEAD - "cell_type": "code", -<<<<<<< HEAD - "execution_count": 78, ->>>>>>> 63a05f12 (Update regex notebook 3) -======= - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "## Das ist meiner nach falsch und sollte nur mit \"\\/logs\\/\" funktionieren" - ] - }, - { - "cell_type": "code", - "execution_count": 79, ->>>>>>> ec595eb3 (Cleaning up) - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[Deprecated]: regex_fields are no longer necessary. Use Lucene regex annotation.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "before: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create'}\n", -<<<<<<< HEAD -<<<<<<< HEAD - "after: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create', '_index': '/logs/-windows-devopslab'}\n" -======= - "after: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create'}\n", -======= - "after: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create', '_index': '/logs/-windows-devopslab'}\n", ->>>>>>> ec595eb3 (Cleaning up) - "False\n" ->>>>>>> 21ac87d3 (Update regex notebook) - ] - } - ], - "source": [ - "rule_yaml = \"\"\"---\n", -<<<<<<< HEAD - "filter: 'data_stream.type: \".*lo.*\"'\n", -======= - "filter: 'data_stream.type: \"/logs/\"' \n", ->>>>>>> 63a05f12 (Update regex notebook 3) - "regex_fields:\n", - " - \"data_stream.type\"\n", - "concatenator:\n", - " source_fields:\n", - " - data_stream.type\n", - " - data_stream.dataset\n", - " - data_stream.namespace\n", - " target_field: _index\n", - " separator: \"-\"\n", - " overwrite_target: false\n", - " delete_source_fields: false\n", - "\"\"\"\n", - "/\n", - "concat_with_rule(rule_yaml)\n" - ] - }, - { -======= ->>>>>>> 78a96b0d (Improving Notebook 2.) "cell_type": "markdown", "metadata": {}, "source": [ @@ -227,54 +118,12 @@ }, { "cell_type": "code", -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD - "execution_count": 3, -======= - "execution_count": 31, ->>>>>>> 21ac87d3 (Update regex notebook) -======= - "execution_count": 77, ->>>>>>> 63a05f12 (Update regex notebook 3) -======= - "execution_count": 3, ->>>>>>> 78a96b0d (Improving Notebook 2.) - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "before: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create'}\n", - "after: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create', '_index': '/logs/-windows-devopslab'}\n" -<<<<<<< HEAD -======= - "after: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create', '_index': '/logs/-windows-devopslab'}\n", - "False\n" ->>>>>>> 21ac87d3 (Update regex notebook) -======= ->>>>>>> 78a96b0d (Improving Notebook 2.) - ] - } - ], -======= "execution_count": null, "metadata": {}, "outputs": [], ->>>>>>> 0a454895 (Improving Notebook 3.) "source": [ "rule_yaml = \"\"\"---\n", -<<<<<<< HEAD -<<<<<<< HEAD "filter: 'data_stream.type: /.*log.*/' \n", -======= - "filter: 'data_stream.type: /.*lo.*/' \n", ->>>>>>> 21ac87d3 (Update regex notebook) -======= - "filter: 'data_stream.type: /.*log.*/' \n", ->>>>>>> 63a05f12 (Update regex notebook 3) "concatenator:\n", " source_fields:\n", " - data_stream.type\n", @@ -289,11 +138,6 @@ ] }, { -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> 78a96b0d (Improving Notebook 2.) "cell_type": "markdown", "metadata": {}, "source": [ @@ -302,136 +146,9 @@ }, { "cell_type": "code", -<<<<<<< HEAD - "execution_count": 4, -<<<<<<< HEAD - "metadata": {}, - "outputs": [ - { -======= - "cell_type": "code", - "execution_count": 71, - "metadata": {}, - "outputs": [ - { -<<<<<<< HEAD - "name": "stderr", - "output_type": "stream", - "text": [ - "[Deprecated]: regex_fields are no longer necessary. Use Lucene regex annotation.\n" - ] - }, - { ->>>>>>> 21ac87d3 (Update regex notebook) -======= ->>>>>>> 32bf3e59 (Update regex notebook 2) - "name": "stdout", - "output_type": "stream", - "text": [ - "before: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create'}\n", -<<<<<<< HEAD -<<<<<<< HEAD - "after: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create', '_index': '/logs/-windows-devopslab'}\n" -======= - "after: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create'}\n", -======= - "after: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create', '_index': '/logs/-windows-devopslab'}\n", ->>>>>>> 32bf3e59 (Update regex notebook 2) - "False\n" ->>>>>>> 21ac87d3 (Update regex notebook) - ] - } - ], - "source": [ - "rule_yaml = \"\"\"---\n", -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD - "filter: 'data_stream.type: /\\\\/lo.*/' \n", - " \n", -======= - "filter: 'data_stream.type: \".*lo.*\"' \n", - "regex_fields:\n", - " - \"data_stream.type\"\n", ->>>>>>> 21ac87d3 (Update regex notebook) -======= - "filter: 'data_stream.type: /\\/.*lo.*/' \n", ->>>>>>> 32bf3e59 (Update regex notebook 2) -======= - "filter: 'data_stream.type: /\\\\/.*lo.*/' \n", -======= - "metadata": {}, - "cell_type": "code", - "outputs": [], - "execution_count": null, - "source": "### Fehlerwarnung mit nur einem Escape." - }, - { - "metadata": {}, - "cell_type": "code", - "outputs": [], - "execution_count": null, - "source": [ - "rule_yaml = \"\"\"---\n", - "filter: 'data_stream.type: /\\/lo.*/' \n", ->>>>>>> ec595eb3 (Cleaning up) - " \n", ->>>>>>> 63a05f12 (Update regex notebook 3) - "concatenator:\n", - " source_fields:\n", - " - data_stream.type\n", - " - data_stream.dataset\n", - " - data_stream.namespace\n", - " target_field: _index\n", - " separator: \"-\"\n", - " overwrite_target: false\n", - " delete_source_fields: false\n", - "\"\"\"\n", -<<<<<<< HEAD -<<<<<<< HEAD - "concat_with_rule(rule_yaml)" - ] -======= - "\n", - "concat_with_rule(rule_yaml)\n" -======= - "concat_with_rule(rule_yaml)" ->>>>>>> 32bf3e59 (Update regex notebook 2) - ] - }, - { - "metadata": {}, - "cell_type": "code", - "outputs": [], - "execution_count": null, - "source": "### Zweimal Escapen ist richtig und funktioniert. Würde sich allerdings nicht eins zu eins in Opensearch kopieren lassen. " - }, - { - "cell_type": "code", - "execution_count": 87, -======= ->>>>>>> 78a96b0d (Improving Notebook 2.) - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "before: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create'}\n", -<<<<<<< HEAD - "after: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create', '_index': '/logs/-windows-devopslab'}\n", - "False\n" -======= - "after: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create', '_index': '/logs/-windows-devopslab'}\n" ->>>>>>> 78a96b0d (Improving Notebook 2.) - ] - } - ], -======= "execution_count": null, "metadata": {}, "outputs": [], ->>>>>>> 0a454895 (Improving Notebook 3.) "source": [ "rule_yaml = \"\"\"---\n", "filter: 'data_stream.type: /\\\\/lo.*/' \n", @@ -448,17 +165,6 @@ "\"\"\"\n", "concat_with_rule(rule_yaml)" ] -<<<<<<< HEAD - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] ->>>>>>> 21ac87d3 (Update regex notebook) -======= ->>>>>>> 78a96b0d (Improving Notebook 2.) } ], "metadata": { From dcf0d09d5b1017489d67ebcd1ecabb6d45021fca Mon Sep 17 00:00:00 2001 From: "MoessnerFabian(Group)" Date: Thu, 9 Jan 2025 10:58:02 +0100 Subject: [PATCH 23/23] Adjusting Notebook to new format. --- .../notebooks/processor_examples/regex.ipynb | 54 +++++++++++++++---- 1 file changed, 45 insertions(+), 9 deletions(-) diff --git a/doc/source/development/notebooks/processor_examples/regex.ipynb b/doc/source/development/notebooks/processor_examples/regex.ipynb index 80b2b1a0c..ab109ca53 100644 --- a/doc/source/development/notebooks/processor_examples/regex.ipynb +++ b/doc/source/development/notebooks/processor_examples/regex.ipynb @@ -20,7 +20,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -62,11 +62,12 @@ "processor_config = {\n", " \"myconcatenator\":{ \n", " \"type\": \"concatenator\",\n", - " \"specific_rules\": [str(rule_path)],\n", - " \"generic_rules\": [\"/dev\"],\n", + " \"rules\": [str(rule_path), \"/dev\"],\n", " }\n", " }\n", "\n", + "concatenator = Factory.create(processor_config)\n", + "\n", "def concat_with_rule(rule_yaml):\n", " mydocument = deepcopy(document)\n", " if rule_file.exists():\n", @@ -87,9 +88,25 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[Deprecated]: regex_fields are no longer necessary. Use Lucene regex annotation.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "before: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create'}\n", + "after: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create', '_index': '/logs/-windows-devopslab'}\n" + ] + } + ], "source": [ "rule_yaml = \"\"\"---\n", "filter: 'data_stream.type: \".*lo.*\"'\n", @@ -106,6 +123,7 @@ " delete_source_fields: false\n", "\"\"\"\n", "\n", + "\n", "concat_with_rule(rule_yaml)\n" ] }, @@ -118,9 +136,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "before: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create'}\n", + "after: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create', '_index': '/logs/-windows-devopslab'}\n" + ] + } + ], "source": [ "rule_yaml = \"\"\"---\n", "filter: 'data_stream.type: /.*log.*/' \n", @@ -146,9 +173,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "before: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create'}\n", + "after: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': '/logs/'}, '_op_type': 'create', '_index': '/logs/-windows-devopslab'}\n" + ] + } + ], "source": [ "rule_yaml = \"\"\"---\n", "filter: 'data_stream.type: /\\\\/lo.*/' \n",