diff --git a/CHANGELOG.md b/CHANGELOG.md index decc97519..91f8458c4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,9 @@ ### Features + * new documentation part with security best practices which compiles to `user_manual/security/best_practices.html` + * also comes with excel export functionality of given best practices + ### Improvements ### Bugfix diff --git a/doc/source/_static/css/custom_theme.css b/doc/source/_static/css/custom_theme.css index 4636697ae..0c85c2cfb 100644 --- a/doc/source/_static/css/custom_theme.css +++ b/doc/source/_static/css/custom_theme.css @@ -1,5 +1,9 @@ @import url("theme.css"); +p { + margin: 0 0 10px; +} + .wy-nav-content { max-width: 100% !important; } @@ -27,3 +31,29 @@ .rst-content div[class^=highlight] div[class^=highlight], .rst-content pre.literal-block div[class^=highlight] { margin: 0 !important; } + + +.security-best-practice { + background: #f3e9ff !important; +} + +.security-best-practice .admonition-title { + background: #6d259d; +} + +#security-best-practices .security-best-practice { + background: inherit !important; + padding: 0; +} + +#security-best-practices .admonition-title { + display: none; +} + +#security-best-practices .topic { + margin-top: 0; + font-weight: 700; + font-family: Roboto Slab, ff-tisa-web-pro, Georgia, Arial, sans-serif; + font-size: 150%; + padding: 0 0 15px 0; +} diff --git a/doc/source/_templates/defaults-renderer.tmpl b/doc/source/_templates/defaults-renderer.tmpl index 2902a883f..e63bf33ec 100644 --- a/doc/source/_templates/defaults-renderer.tmpl +++ b/doc/source/_templates/defaults-renderer.tmpl @@ -3,5 +3,5 @@ DEFAULTS: {% for key, value in data.DEFAULTS.items() %} -- {{ key }}: {{ value }} +- :code:`{{ key }}`: :code:`{{ value }}` {% endfor %} diff --git a/doc/source/conf.py b/doc/source/conf.py index 71dbb3da0..e563def09 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -16,8 +16,10 @@ import os import sys from datetime import date +from pathlib import Path sys.path.insert(0, os.path.abspath("../..")) +sys.path.append(os.path.abspath("./custom_extensions")) from importlib.metadata import version as get_versions @@ -30,6 +32,12 @@ def skip_tests(app, what, name, obj, skip, options): def setup(app): app.connect("autodoc-skip-member", skip_tests) + # Needed to trick sphinx into believing that this file exists already, even though it is + # automatically generated by the security_best_practices extension. The path + # "_static/security-best-practices-check-list.xlsx" must match the reference in the + # doc/source/user_manual/security_best_practices.rst. The filename is fixed as it is needed + # by the extension to reference the download + Path(f"{app.srcdir}/_static/security-best-practices-check-list.xlsx").touch() # -- Project information ----------------------------------------------------- @@ -61,6 +69,7 @@ def setup(app): "nbsphinx", "IPython.sphinxext.ipython_console_highlighting", "sphinx_copybutton", + "security_best_practices", ] extensions.append("sphinx.ext.todo") diff --git a/doc/source/custom_extensions/security_best_practices.py b/doc/source/custom_extensions/security_best_practices.py new file mode 100644 index 000000000..033f469f7 --- /dev/null +++ b/doc/source/custom_extensions/security_best_practices.py @@ -0,0 +1,229 @@ +""" +Security Best Practices +======================= + +Sphinx Extension to enable and list security best practices + +Derived from the original documentation: +https://www.sphinx-doc.org/en/master/development/tutorials/todo.html + +Usage +----- + +The extension enables two different rst directives, the first acts like a single note/admonition +and lets you describe a current best practice somewhere in the documentation. +An example would look like: + +.. code-block:: rst + + .. security-best-practice:: + :title: Example Best Practice + :location: configuration.example.param + :suggested-value: True + + The example.param should always be set to true + +The options `location` and `suggested-value` are optional and are only used to fill the excel +check list, they are not rendered in the actual sphinx documentation. + +The second directive collects all these admonitions and creates a list of them. +This can simply be added by using the following snippet to a file: + +.. code-block:: rst + + .. security-best-practices-list:: + +Lastly the extension generates an excel sheet with the best practices as checklist. +In order to expose it into the documentation you have to use the following resource link: + +.. code-block:: rst + + :download:`Best Practice Check List <../_static/security-best-practices-check-list.xlsx>` + +Note that the filepath and name must match this exact example and that the sphinx config needs to +create this file in the docs source directory. + +Known limitations +----------------- + +At the moment it is not possible to add `:ref:` links to security best practice admonitions, +when the `security-best-practices-list` directive is used. +When creating the list it is not possible yet to resolve the links, leading to an unknown pending +xref exception. +""" + +import pandas as pd +from docutils import nodes +from docutils.parsers.rst import Directive, directives +from openpyxl.styles import Alignment +from sphinx.application import Sphinx +from sphinx.locale import _ +from sphinx.util.docutils import SphinxDirective + + +class SecurityBestPractice(nodes.Admonition, nodes.Element): + """Admonition for Security Best Practices""" + + def __init__(self, *args, **kwargs): + super(SecurityBestPractice, self).__init__(*args, **kwargs) + self.attributes.update({"classes": ["security-best-practice"]}) + + +class SecurityBestPracticesLists(nodes.General, nodes.Element): + """PlaceHolder for a List of Security Best Practices""" + + pass + + +def visit_best_practice_node(self, node): + self.visit_admonition(node) + + +def depart_best_practice_node(self, node): + self.depart_admonition(node) + + +class BestPracticeListDirective(Directive): + """Initializer for Security Best Practices List""" + + def run(self): + return [SecurityBestPracticesLists("")] + + +class BestPracticeDirective(SphinxDirective): + """ + Initializer for Security Best Practice. Content of run method is triggered for every security + best practice admonition""" + + has_content = True + option_spec = { + "title": directives.unchanged_required, + "location": directives.unchanged, + "suggested-value": directives.unchanged, + } + + def run(self): + targetid = "sbp-%d" % self.env.new_serialno("sbp") # sbp = security best practice + targetnode = nodes.target("", "", ids=[targetid]) + title = "No title provided" + if "title" in self.options: + title = self.options["title"] + node = SecurityBestPractice("\n".join(self.content)) + admonition_title = f"Security Best Practice - {title}" + node += nodes.title(_(admonition_title), _(admonition_title)) + self.state.nested_parse(self.content, self.content_offset, node) + if not hasattr(self.env, "all_security_best_practices"): + self.env.all_security_best_practices = [] + self.env.all_security_best_practices.append( + { + "docname": self.env.docname, + "lineno": self.lineno, + "best_practice": node.deepcopy(), + "target": targetnode, + "meta": { + "title": title, + "location": self.options.get("location", ""), + "suggested-value": self.options.get("suggested-value", ""), + }, + } + ) + return [targetnode, node] + + +def purge_best_practice(app, env, docname): + if not hasattr(env, "all_security_best_practices"): + return + env.all_security_best_practices = [ + node for node in env.all_security_best_practices if node["docname"] != docname + ] + + +def merge_best_practice(app, env, docnames, other): + if not hasattr(env, "all_security_best_practices"): + env.all_security_best_practices = [] + if hasattr(other, "all_security_best_practices"): + env.all_security_best_practices.extend(other.all_security_best_practices) + + +def process_nodes(app, doctree, fromdocname): + """ + Builds a list of all security best practices with back references to the original + admonition. + """ + env = app.builder.env + if not hasattr(env, "all_security_best_practices"): + env.all_security_best_practices = [] + for node in doctree.findall(SecurityBestPracticesLists): + content = [] + for node_info in env.all_security_best_practices: + title = nodes.topic() + title += nodes.Text(node_info.get("meta").get("title")) + back_reference = create_back_reference(app, fromdocname, node_info) + content.extend((title, node_info["best_practice"], back_reference)) + node.replace_self(content) + create_xls_checklist(app, env) + + +def create_xls_checklist(app, env): + description = [] + for node in env.all_security_best_practices: + meta_info = node.get("meta") + text = node.get("best_practice").rawsource + description.append( + { + "Topic": meta_info.get("title"), + "Requirement": text, + "Configuration Location": meta_info.get("location"), + "Suggested Value": meta_info.get("suggested-value"), + "Is": "", + "Comment": "", + } + ) + dataframe = pd.DataFrame(description) + download_file_name = "security-best-practices-check-list" + download_file_obj = [env.dlfiles[key] for key in env.dlfiles if download_file_name in key][0] + download_file_path = download_file_obj[1] + full_file_path = f"{app.outdir}/_downloads/{download_file_path}" + writer = pd.ExcelWriter(full_file_path, engine="openpyxl") + dataframe.to_excel(writer, index=False, sheet_name="Security Best Practices") + worksheet = writer.sheets["Security Best Practices"] + column_width = {"A": 60, "B": 60, "C": 30, "D": 30, "E": 30, "F": 30, "G": 30} + for column, width in column_width.items(): + worksheet.column_dimensions[column].width = width + worksheet["B2"].alignment = Alignment(wrap_text=True) + writer.close() + + +def create_back_reference(app, fromdocname, node_info): + """Creates a sphinx paragraph node containing a reference to the original admonition.""" + back_reference = nodes.paragraph() + newnode = nodes.reference("", "") + reference_text = "Reference to original description" + innernode = nodes.emphasis(_(reference_text), _(reference_text)) + newnode["refdocname"] = node_info["docname"] + newnode["refuri"] = app.builder.get_relative_uri(fromdocname, node_info["docname"]) + newnode["refuri"] += "#" + node_info["target"]["refid"] + newnode.append(innernode) + back_reference += newnode + return back_reference + + +def setup(app: Sphinx): + """Initializer for the Security Best Practices Extension""" + app.add_node( + SecurityBestPractice, + html=(visit_best_practice_node, depart_best_practice_node), + latex=(visit_best_practice_node, depart_best_practice_node), + text=(visit_best_practice_node, depart_best_practice_node), + ) + app.add_directive("security-best-practice", BestPracticeDirective) + app.add_directive("security-best-practices-list", BestPracticeListDirective) + app.connect("doctree-resolved", process_nodes) + app.connect("env-purge-doc", purge_best_practice) + app.connect("env-merge-info", merge_best_practice) + + return { + "version": "0.1", + "parallel_read_safe": True, + "parallel_write_safe": True, + } diff --git a/doc/source/development/programaticly_start_logprep.rst b/doc/source/development/programaticly_start_logprep.rst index 646490110..4c1426ab7 100644 --- a/doc/source/development/programaticly_start_logprep.rst +++ b/doc/source/development/programaticly_start_logprep.rst @@ -1,7 +1,7 @@ Start Logprep programaticly =========================== -It is possible to make use of the Logprep :ref:`pipeline_config` in plain python, without any +It is possible to make use of the Logprep pipeline in plain python, without any input or output connectors or further configurations. If on the other hand you want to make use of the input connector preprocessors you have to at least use an input connector like the DummyInput. diff --git a/doc/source/user_manual/configuration/index.rst b/doc/source/user_manual/configuration/index.rst index 9ee760a78..c683d1b39 100644 --- a/doc/source/user_manual/configuration/index.rst +++ b/doc/source/user_manual/configuration/index.rst @@ -16,7 +16,6 @@ Configuration input output - pipeline processor rules getter diff --git a/doc/source/user_manual/configuration/input.rst b/doc/source/user_manual/configuration/input.rst index 0c9b02b38..c13dd18a1 100644 --- a/doc/source/user_manual/configuration/input.rst +++ b/doc/source/user_manual/configuration/input.rst @@ -4,6 +4,26 @@ Input ===== +.. security-best-practice:: + :title: Input Connectors + :location: config.input..type and config.input..preprocessing.hmac + :suggested-value: and + + It is advised to only use the :code:`ConfluentKafkaInput`, :code:`HttpConnector` or + :code:`FileInput` as input connectors in production environments. + The connectors :code:`DummyInput`, :code:`JsonInput` and :code:`JsonlInput` are mainly designed + for debugging purposes. + + Furthermore, it is suggested to enable the :code:`HMAC` preprocessor to ensure no tempering of + processed events. + + .. code:: yaml + + hmac: + target: + key: + output_field: HMAC + .. automodule:: logprep.connector.confluent_kafka.input .. autoclass:: logprep.connector.confluent_kafka.input.ConfluentKafkaInput.Config :members: @@ -31,7 +51,7 @@ Input :undoc-members: :inherited-members: :noindex: - + .. automodule:: logprep.connector.jsonl.input .. autoclass:: logprep.connector.jsonl.input.JsonlInput.Config :members: diff --git a/doc/source/user_manual/configuration/output.rst b/doc/source/user_manual/configuration/output.rst index 1c946f7aa..c9e5ae80a 100644 --- a/doc/source/user_manual/configuration/output.rst +++ b/doc/source/user_manual/configuration/output.rst @@ -6,12 +6,24 @@ Output It is possible to define multiple outputs as a dictionary of :code:`: `. If you define multiple outputs with the attribute :code:`default: true` then be aware, that -logprep only guaranties that one output has received data by calling the :code:`batch_finished_callback`. +logprep only guaranties that one output has received data by calling the +:code:`batch_finished_callback`. -We recommed to only use one default output and define other outputs only for storing custom extra data. +.. security-best-practice:: + :title: Output Connectors + :location: config.output..type + :suggested-value: + + Similar to the input connectors there is a list of available output connectors of which some + are only meant for debugging, namely: :code:`ConsoleOutput` and :code:`JsonlOutput`. + It is advised to not use these in production environments. + + When configuring multiple outputs it is also recommend to only use one default output and to + define other outputs only for storing custom extra data. + Otherwise it cannot be guaranteed that all events are safely stored. .. automodule:: logprep.connector.confluent_kafka.output -.. autoclass:: logprep.connector.confluent_kafka.input.ConfluentKafkaInput.Config +.. autoclass:: logprep.connector.confluent_kafka.output.ConfluentKafkaOutput.Config :members: :undoc-members: :inherited-members: diff --git a/doc/source/user_manual/configuration/pipeline.rst b/doc/source/user_manual/configuration/pipeline.rst deleted file mode 100644 index fdbc1e7bd..000000000 --- a/doc/source/user_manual/configuration/pipeline.rst +++ /dev/null @@ -1,19 +0,0 @@ -.. _pipeline_config: - -========= -Pipelines -========= - -The pipeline is configured as a list of objects under the option `pipeline`. -The processors are being processed in the order given in `pipeline`. -The field `type` decides which processor will be created. -The descriptor of the object will be used in the log messages of the corresponding processor. -Due to this it is possible to attribute log messages to their corresponding processor even if multiple processors of the same type exist in the pipeline. - -Example -------- - -.. literalinclude:: /../../quickstart/exampledata/config/pipeline.yml - :language: yaml - :start-after: port: 8000 - :end-before: input: diff --git a/doc/source/user_manual/index.rst b/doc/source/user_manual/index.rst index bcf40898d..b8da91383 100644 --- a/doc/source/user_manual/index.rst +++ b/doc/source/user_manual/index.rst @@ -10,3 +10,4 @@ User Manual verification testing_rules configuration/index + security_best_practices diff --git a/doc/source/user_manual/introduction.rst b/doc/source/user_manual/introduction.rst index 3d7a5e446..f9e2a9b89 100644 --- a/doc/source/user_manual/introduction.rst +++ b/doc/source/user_manual/introduction.rst @@ -24,10 +24,6 @@ Pipelines A pipeline consists of a sequence of multiple processing steps (processors). The main idea is that each processor performs a simple task that is easy to carry out. -How are Pipelines Implemented? ------------------------------- - -This program generates a list of processors according to the configuration. Each incoming event is being processed by these processors in the configured order. The processors modify the event that is being `passed` through. Thus, the order of the processors is relevant. @@ -40,22 +36,4 @@ Therefore, results of a processor should not depend on other events. Processors ========== -.. autosummary:: - - logprep.processor.clusterer.processor - logprep.processor.datetime_extractor.processor - logprep.processor.deleter.processor - logprep.processor.domain_label_extractor.processor - logprep.processor.domain_resolver.processor - logprep.processor.dropper.processor - logprep.processor.generic_adder.processor - logprep.processor.generic_resolver.processor - logprep.processor.geoip_enricher.processor - logprep.processor.labeler.processor - logprep.processor.list_comparison.processor - logprep.processor.normalizer.processor - logprep.processor.pre_detector.processor - logprep.processor.pseudonymizer.processor - logprep.processor.selective_extractor.processor - logprep.processor.template_replacer.processor - logprep.processor.hyperscan_resolver.processor +A list of all available processors can be found under the configuration section :ref:`processors`. diff --git a/doc/source/user_manual/security_best_practices.rst b/doc/source/user_manual/security_best_practices.rst new file mode 100644 index 000000000..b9cd69e47 --- /dev/null +++ b/doc/source/user_manual/security_best_practices.rst @@ -0,0 +1,10 @@ +Security Best Practices +======================= + +Here you find a list of all security best practices that should be considered when running logprep +in a production environment. + +To compare your production environment against these best practices we provide a +:download:`Best Practice Check List <../_static/security-best-practices-check-list.xlsx>` for your use. + +.. security-best-practices-list:: diff --git a/logprep/abc/processor.py b/logprep/abc/processor.py index c1cddfa1f..9508dce40 100644 --- a/logprep/abc/processor.py +++ b/logprep/abc/processor.py @@ -61,7 +61,7 @@ class Config(Component.Config): default=None, validator=[validators.optional(validators.instance_of(str))] ) """Path to a JSON file with a valid rule tree configuration. - For string format see :ref:`getters`""" + For string format see :ref:`getters`.""" apply_multiple_times: Optional[bool] = field( default=False, validator=[validators.optional(validators.instance_of(bool))] ) diff --git a/logprep/connector/confluent_kafka/input.py b/logprep/connector/confluent_kafka/input.py index df31044b8..8cae34d0a 100644 --- a/logprep/connector/confluent_kafka/input.py +++ b/logprep/connector/confluent_kafka/input.py @@ -225,8 +225,8 @@ class Config(Input.Config): - bootstrap.servers (STRING): a comma separated list of kafka brokers - group.id (STRING): a unique identifier for the consumer group - For additional configuration options and their description see: - + For additional configuration options see the official: + `librdkafka configuration `_. .. datatemplate:import-module:: logprep.connector.confluent_kafka.input :template: defaults-renderer.tmpl diff --git a/logprep/connector/confluent_kafka/output.py b/logprep/connector/confluent_kafka/output.py index 1b950ab01..daf03b71a 100644 --- a/logprep/connector/confluent_kafka/output.py +++ b/logprep/connector/confluent_kafka/output.py @@ -3,8 +3,7 @@ ==================== This section contains the connection settings for ConfluentKafka, the default -index, the error index and a buffer size. Documents are sent in batches to Elasticsearch to reduce -the amount of times connections are created. +index, the error index and a buffer size. Example ^^^^^^^ @@ -144,7 +143,9 @@ class Config(Output.Config): """Confluent Kafka Output Config""" topic: str = field(validator=validators.instance_of(str)) + """The topic into which the processed events should be written to.""" error_topic: str + """The topic into which events should be written that couldn't be processed successfully.""" flush_timeout: float send_timeout: int = field(validator=validators.instance_of(int), default=0) kafka_config: Optional[dict] = field( @@ -158,14 +159,14 @@ class Config(Output.Config): ], factory=dict, ) - """ Kafka configuration for the kafka client. + """ Kafka configuration for the kafka client. At minimum the following keys must be set: - + - bootstrap.servers (STRING): a comma separated list of kafka brokers - - For additional configuration options and their description see: + + For additional configuration options and their description see: - + .. datatemplate:import-module:: logprep.connector.confluent_kafka.output :template: defaults-renderer.tmpl diff --git a/logprep/connector/elasticsearch/output.py b/logprep/connector/elasticsearch/output.py index 4d2972ac9..ff75eb971 100644 --- a/logprep/connector/elasticsearch/output.py +++ b/logprep/connector/elasticsearch/output.py @@ -55,7 +55,14 @@ class ElasticsearchOutput(Output): @define(kw_only=True, slots=False) class Config(Output.Config): - """Elastic/Opensearch Output Config""" + """Elastic/Opensearch Output Config + + .. security-best-practice:: + :title: Output Connectors - ElasticsearchOutput + + It is suggested to enable a secure message transfer by setting :code:`user`, + :code:`secret` and a valid :code:`ca_cert`. + """ hosts: List[str] = field( validator=validators.deep_iterable( @@ -80,7 +87,7 @@ class Config(Output.Config): converter=(lambda x: x * 10**6 if x else None), default=None, ) - """(Optional) Maximum estimated size of a document in MB before discarding it if it causes + """(Optional) Maximum estimated size of a document in MB before discarding it if it causes an error.""" timeout: int = field(validator=validators.instance_of(int), default=500) """(Optional) Timeout for the connection (default is 500ms).""" @@ -94,7 +101,7 @@ class Config(Output.Config): ca_cert: Optional[str] = field(validator=validators.instance_of(str), default="") """(Optional) Path to a SSL ca certificate to verify the ssl context.""" flush_timeout: Optional[int] = field(validator=validators.instance_of(int), default=60) - """(Optional) Timout after :code:`message_backlog` is flushed if + """(Optional) Timout after :code:`message_backlog` is flushed if :code:`message_backlog_size` is not reached.""" __slots__ = ["_message_backlog", "_size_error_pattern"] diff --git a/logprep/connector/http/input.py b/logprep/connector/http/input.py index fe56ab444..cea672a1a 100644 --- a/logprep/connector/http/input.py +++ b/logprep/connector/http/input.py @@ -323,7 +323,7 @@ class Config(Input.Config): ) """Configure endpoint routes with a Mapping of a path to an endpoint. Possible endpoints are: :code:`json`, :code:`jsonl`, :code:`plaintext`. - + .. autoclass:: logprep.connector.http.input.PlaintextHttpEndpoint :noindex: .. autoclass:: logprep.connector.http.input.JSONLHttpEndpoint @@ -341,9 +341,15 @@ class Config(Input.Config): """ collect_meta: str = field(validator=validators.instance_of(bool), default=True) - """Defines if metadata should be collected + """Defines if metadata should be collected - :code:`True`: Collect metadata - :code:`False`: Won't collect metadata + + .. security-best-practice:: + :title: Input Connector - HttpConnector + + It is suggested to enable the collection of meta data (:code:`collect_meta: True`) to + ensure transparency of the incoming events. """ metafield_name: str = field(validator=validators.instance_of(str), default="@metadata") diff --git a/logprep/connector/opensearch/output.py b/logprep/connector/opensearch/output.py index f0d0bd0a9..76f26c67a 100644 --- a/logprep/connector/opensearch/output.py +++ b/logprep/connector/opensearch/output.py @@ -70,7 +70,15 @@ class OpensearchOutput(ElasticsearchOutput): @define(kw_only=True, slots=False) class Config(ElasticsearchOutput.Config): - """Config for OpensearchOutput.""" + """ + Config for OpensearchOutput. + + .. security-best-practice:: + :title: Output Connectors - OpensearchOutput + + It is suggested to enable a secure message transfer by setting :code:`user`, + :code:`secret` and a valid :code:`ca_cert`. + """ parallel_bulk: bool = field(default=True, validator=validators.instance_of(bool)) """Configure if all events in the backlog should be send, in parallel, via multiple threads diff --git a/logprep/connector/s3/output.py b/logprep/connector/s3/output.py index adb3b3edc..a62fcc741 100644 --- a/logprep/connector/s3/output.py +++ b/logprep/connector/s3/output.py @@ -69,7 +69,15 @@ class S3Output(Output): @define(kw_only=True, slots=False) class Config(Output.Config): - """S3 Output Config""" + """ + S3 Output Config + + .. security-best-practice:: + :title: Output Connectors - S3Output + + It is suggested to activate SSL for a secure connection. In order to do that set + :code:`use_ssl` and the corresponding :code:`ca_cert`. + """ endpoint_url: str = field(validator=validators.instance_of(str)) """Address of s3 endpoint in the format SCHEMA:HOST:PORT.""" @@ -110,7 +118,7 @@ class Config(Output.Config): call_input_callback: Optional[bool] = field( validator=validators.instance_of(bool), default=True ) - """The input callback is called after the maximum backlog size has been reached + """The input callback is called after the maximum backlog size has been reached if this is set to True (optional)""" @define(kw_only=True) diff --git a/logprep/processor/amides/processor.py b/logprep/processor/amides/processor.py index 671347f67..63ed54c3d 100644 --- a/logprep/processor/amides/processor.py +++ b/logprep/processor/amides/processor.py @@ -15,12 +15,12 @@ Overview of the AMIDES architecture. -The machine learning components of AMIDES are trained using the current SIEM rule set and -historical benign events. Incoming events are transformed into feature vectors by the feature +The machine learning components of AMIDES are trained using the current SIEM rule set and +historical benign events. Incoming events are transformed into feature vectors by the feature extraction component. During operation, features learned during the training phase will be re-used by the feature extraction component. -Feature vectors are then passed to the Misuse Classification component which classifies events as -malicious or benign. In case of a malicious result, the feature vector is passed to the Rule +Feature vectors are then passed to the Misuse Classification component which classifies events as +malicious or benign. In case of a malicious result, the feature vector is passed to the Rule Attribution component which generates a ranked list of SIEM rules potentially evaded by the event. Finally, results generated by the Rule Attribution component and conventional rule matching results can be correlated for alert generation. @@ -32,10 +32,10 @@ to create rules for other event types that provide process command lines, e.g. Process Creation events generated by Windows Security Auditing. -Misuse classification is performed by the :code:`MisuseDetector` class. Instances of the -:code:`MisuseDetector` contain the model for misuse classification, which includes the trained -classifier instance, the corresponding feature extractor, and an additional scaler to transform -classifier results into the pre-defined output range between 0 and 1. The processor configuration +Misuse classification is performed by the :code:`MisuseDetector` class. Instances of the +:code:`MisuseDetector` contain the model for misuse classification, which includes the trained +classifier instance, the corresponding feature extractor, and an additional scaler to transform +classifier results into the pre-defined output range between 0 and 1. The processor configuration parameter :code:`decision_threshold` is used to fine-tune the classification results produced by the misuse detector. @@ -117,8 +117,16 @@ class Config(Processor.Config): num_rule_attributions: int = field(default=10, validator=validators.instance_of(int)) """Number of rule attributions returned in case of a positive misuse detection result.""" models_path: str = field(validator=validators.instance_of(str)) - """Path or URI of the archive (.zip) containing the models used by the misuse detector - and the rule attributor.""" + """ + Path or URI of the archive (.zip) containing the models used by the misuse detector + and the rule attributor. + + .. security-best-practice:: + :title: Processor - Amides Model + + Ensure that you only use models from trusted sources, as it can be used to inject python + code into the runtime. + """ @define(kw_only=True) class Metrics(Processor.Metrics): diff --git a/logprep/processor/domain_resolver/processor.py b/logprep/processor/domain_resolver/processor.py index 485a3b994..d85a941b6 100644 --- a/logprep/processor/domain_resolver/processor.py +++ b/logprep/processor/domain_resolver/processor.py @@ -15,7 +15,6 @@ - tests/testdata/rules/specific/ generic_rules: - tests/testdata/rules/generic/ - hyperscan_db_path: tmp/path/scan.db tld_list: tmp/path/tld.dat timeout: 0.5 max_cached_domains: 20000 diff --git a/logprep/processor/generic_adder/processor.py b/logprep/processor/generic_adder/processor.py index a66034bd9..2e47652bb 100644 --- a/logprep/processor/generic_adder/processor.py +++ b/logprep/processor/generic_adder/processor.py @@ -19,7 +19,7 @@ sql_config: user: example_user password: example_password - host: "127.0.0.1 + host: "127.0.0.1" database: example_db table: example_table target_column: example_column @@ -103,6 +103,12 @@ class Config(Processor.Config): (default: ./sql_update.lock). - `db_file_path` - Path to a file used to store the SQL table obtained by the generic adder (default: ./sql_db_table.json). + + .. security-best-practice:: + :title: Processor - GenericAdder + + When using a sql database to enrich events, ensure that it is a database which is + protected with a user credentials. """ rule_class = GenericAdderRule diff --git a/logprep/processor/labeler/processor.py b/logprep/processor/labeler/processor.py index 8046b851a..fe27fbca8 100644 --- a/logprep/processor/labeler/processor.py +++ b/logprep/processor/labeler/processor.py @@ -2,18 +2,6 @@ Labeler ======= -Labeling-Schema and validating Rules -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -The validation of schemata and rules can be started separately by executing: - -.. code-block:: bash - - PYTHONPATH="." python3 logprep/util/schema_and_rule_checker.py $LABELING_SCHEMA $RULES - -Where :code:`$LABELING_SCHEMA` is the path to a labeling schema file and :code:`$RULES` is the path -to a directory with rule files. - Processor Configuration ^^^^^^^^^^^^^^^^^^^^^^^ @@ -39,7 +27,7 @@ """ from logging import Logger -from typing import List, Optional +from typing import Optional from attr import define, field, validators diff --git a/logprep/processor/list_comparison/rule.py b/logprep/processor/list_comparison/rule.py index 2d920c877..9b7f1fb63 100644 --- a/logprep/processor/list_comparison/rule.py +++ b/logprep/processor/list_comparison/rule.py @@ -32,7 +32,7 @@ .. note:: - Currently it is not possible to check in more than one source_field per rule + Currently, it is not possible to check in more than one :code:`source_field` per rule. .. autoclass:: logprep.processor.list_comparison.rule.ListComparisonRule.Config :members: diff --git a/logprep/processor/pseudonymizer/processor.py b/logprep/processor/pseudonymizer/processor.py index 73ce37340..2ca907fad 100644 --- a/logprep/processor/pseudonymizer/processor.py +++ b/logprep/processor/pseudonymizer/processor.py @@ -5,6 +5,13 @@ The `pseudonymizer` is a processor that pseudonymizes certain fields of log messages to ensure privacy regulations can be adhered to. +.. security-best-practice:: + :title: Processor - Pseudonymizer + + The `pseudonymizer` works with two public keys for different roles. + It is suggested to ensure that two different keys are being used such that the separation of the + roles can be maintained. + Processor Configuration ^^^^^^^^^^^^^^^^^^^^^^^ .. code-block:: yaml @@ -39,7 +46,7 @@ from functools import cached_property, lru_cache from itertools import chain from logging import Logger -from typing import List, Optional, Pattern +from typing import Optional, Pattern from urllib.parse import parse_qs, urlencode, urlparse from attrs import define, field, validators diff --git a/logprep/processor/requester/processor.py b/logprep/processor/requester/processor.py index 87320146c..8860712e1 100644 --- a/logprep/processor/requester/processor.py +++ b/logprep/processor/requester/processor.py @@ -5,6 +5,16 @@ A processor to invoke http requests. Can be used to enrich events from an external api or to trigger external systems by and with event field values. +.. security-best-practice:: + :title: Processor - Requester + + As the `requester` can execute arbitrary http requests it is advised to execute requests only + against known and trusted endpoints and that the communication is protected with a valid + SSL-Certificate. Do so by setting a certificate path with the option :code:`cert`. + To ensure that the communication is trusted it is also recommended to set either an + :code:`Authorization`-Header or a corresponding authentication with a username and password, via + :code:`auth`. + Processor Configuration ^^^^^^^^^^^^^^^^^^^^^^^ .. code-block:: yaml diff --git a/logprep/util/configuration.py b/logprep/util/configuration.py index 038d9d23b..37f8173bb 100644 --- a/logprep/util/configuration.py +++ b/logprep/util/configuration.py @@ -6,26 +6,26 @@ You can pass multiple configuration files via valid file paths or urls. .. code-block:: bash + :caption: Valid Run Examples logprep run /different/path/file.yml - -or - -.. code-block:: bash - logprep run http://url-to-our-yaml-file-or-api + logprep run http://api/v1/pipeline http://api/v1/addition_processor_pipline /path/to/conector.yaml -or -.. code-block:: bash - - logprep run http://api/v1/pipeline http://api/v1/addition_processor_pipline /path/to/conector.yaml +.. security-best-practice:: + :title: Configuration - Combining multiple configuration files + + Consider when using multiple configuration files logprep will reject all configuration files + if one can not be retrieved or is not valid. + If using multiple files ensure that all can be loaded safely and that all endpoints (if using + http resources) are accessible. Configuration File Structure ---------------------------- .. code-block:: yaml - :caption: full configuration file example + :caption: Example of a complete configuration file version: config-1.0 process_count: 2 @@ -129,8 +129,6 @@ They contain settings for each separate processor and connector. Details for configuring connectors are described in :ref:`output` and :ref:`input` and for processors in :ref:`processors`. -General information about the configuration of the pipeline can be found -in :ref:`pipeline_config`. It is possible to use environment variables in all configuration and rule files in all places. @@ -139,6 +137,16 @@ :code:`CI_`. Lowercase variables are ignored. Forbidden variable names are: :code:`["LOGPREP_LIST"]`, as it is already used internally. +.. security-best-practice:: + :title: Configuration Environment Variables + + As it is possible to replace all configuration options with environment variables it is + recommended to use these especially for sensitive information like usernames, password, secrets + or hash salts. + Examples where this could be useful would be the :code:`key` for the hmac calculation (see + `input` > `preprocessing`) or the :code:`user`/:code:`secret` for the elastic-/opensearch + connectors. + The following config file will be valid by setting the given environment variables: .. code-block:: yaml @@ -316,7 +324,28 @@ class Configuration: If configured the configuration will only be reloaded if the configuration version changes. If http errors occurs on configuration reload `config_refresh_interval` is set to a quarter of the current `config_refresh_interval` until a minimum of 5 seconds is reached. - Defaults to :code:`None`, which means that the configuration will not be refreshed.""" + Defaults to :code:`None`, which means that the configuration will not be refreshed. + + .. security-best-practice:: + :title: Configuration Refresh Interval + :location: config.config_refresh_interval + :suggested-value: <= 300 + + The refresh interval for the configuration shouldn't be set too high in production + environments. + It is suggested to not set a value higher than :code:`300` (5 min). + That way configuration updates are propagated fairly quickly instead of once a day. + + It should also be noted that a new configuration file will be read as long as it is a valid + config. + There is no further check to ensure credibility. + + In case a new configuration could not be retrieved successfully and the + :code:`config_refresh_interval` is already reduced automatically to 5 seconds it should be + noted that this could lead to a blocking behavior or an significant reduction in performance + as logprep is often retrying to reload the configuration. + Because of that ensure that the configuration endpoint is always available. + """ process_count: int = field( validator=[validators.instance_of(int), validators.ge(1)], default=1, eq=False ) @@ -334,13 +363,31 @@ class Configuration: logger: dict = field( validator=validators.instance_of(dict), default={"level": "INFO"}, eq=False ) - """Logger configuration. Defaults to :code:`{"level": "INFO"}`.""" + """Logger configuration. Defaults to :code:`{"level": "INFO"}`. + + .. security-best-practice:: + :title: Logprep Log-Level + :location: config.logger.level + :suggested-value: INFO + + The loglevel of logprep should be set to :code:`"INFO"` in production environments, as the + :code:`"DEBUG"` level could expose sensitive events into the log. + """ input: dict = field(validator=validators.instance_of(dict), factory=dict, eq=False) - """Input connector configuration. Defaults to :code:`{}`.""" + """ + Input connector configuration. Defaults to :code:`{}`. + For detailed configurations see :ref:`input`. + """ output: dict = field(validator=validators.instance_of(dict), factory=dict, eq=False) - """Output connector configuration. Defaults to :code:`{}`.""" + """ + Output connector configuration. Defaults to :code:`{}`. + For detailed configurations see :ref:`output`. + """ pipeline: list[dict] = field(validator=validators.instance_of(list), factory=list, eq=False) - """Pipeline configuration. Defaults to :code:`[]`.""" + """ + Pipeline configuration. Defaults to :code:`[]`. + See :ref:`processors` for a detailed overview on how to configure a pipeline. + """ metrics: MetricsConfig = field( validator=validators.instance_of(MetricsConfig), factory=MetricsConfig, diff --git a/logprep/util/getter.py b/logprep/util/getter.py index 7948ca607..9b05756d7 100644 --- a/logprep/util/getter.py +++ b/logprep/util/getter.py @@ -101,9 +101,22 @@ class HttpGetter(Getter): * Simple http target: :code:`http://your.target/file.yml` * Simple https target: :code:`https://your.target/file.json` + .. security-best-practice:: + :title: HttpGetter + :location: any http resource + :suggested-value: MTLSCredential or OAuth2PasswordFlowCredentials + + If recourses are loaded via HttpGetters it is recommended to + + - use a credential file to securely manage authentication + - use preferably the :code:`MTLSCredentials` or :code:`OAuth2PasswordFlowCredentials` (with + client-auth) + - use always HTTPS connections as HTTPS is not enforced by logprep + - consider that the HttpGetter does not support pagination. If the resource is provided by + an endpoint with pagination it could lead to a loss of data. + .. automodule:: logprep.util.credentials :no-index: - """ _credentials_registry: dict[str, Credentials] = {} diff --git a/pyproject.toml b/pyproject.toml index 4a734a0a2..dbc972de1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -110,6 +110,7 @@ doc = [ "sphinx-copybutton", "nbsphinx", "ipython", + "openpyxl" ] [project.urls]