fkie-cad · herrfeder · Apr 19, 2024 · Apr 5, 2024 · Apr 5, 2024 · Apr 8, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,6 +5,9 @@
 
 ### Features
 
+  * new documentation part with security best practices which compiles to `user_manual/security/best_practices.html`
+    * also comes with excel export functionality of given best practices  
+
 ### Improvements
 
 ### Bugfix

diff --git a/doc/source/_static/css/custom_theme.css b/doc/source/_static/css/custom_theme.css
@@ -1,5 +1,9 @@
 @import url("theme.css");
 
+p {
+    margin: 0 0 10px;
+}
+
 .wy-nav-content {
     max-width: 100% !important;
 }
@@ -27,3 +31,29 @@
 .rst-content div[class^=highlight] div[class^=highlight], .rst-content pre.literal-block div[class^=highlight] {
     margin: 0 !important;
 }
+
+
+.security-best-practice {
+    background: #f3e9ff !important;
+}
+
+.security-best-practice .admonition-title {
+    background: #6d259d;
+}
+
+#security-best-practices .security-best-practice {
+    background: inherit !important;
+    padding: 0;
+}
+
+#security-best-practices .admonition-title {
+    display: none;
+}
+
+#security-best-practices .topic {
+    margin-top: 0;
+    font-weight: 700;
+    font-family: Roboto Slab, ff-tisa-web-pro, Georgia, Arial, sans-serif;
+    font-size: 150%;
+    padding: 0 0 15px 0;
+}
diff --git a/doc/source/_templates/defaults-renderer.tmpl b/doc/source/_templates/defaults-renderer.tmpl
@@ -3,5 +3,5 @@
 DEFAULTS:
 
 {% for key, value in data.DEFAULTS.items() %}
-- {{ key }}: {{ value }}
+- :code:`{{ key }}`: :code:`{{ value }}`
 {% endfor %}
diff --git a/doc/source/conf.py b/doc/source/conf.py
@@ -16,8 +16,10 @@
 import os
 import sys
 from datetime import date
+from pathlib import Path
 
 sys.path.insert(0, os.path.abspath("../.."))
+sys.path.append(os.path.abspath("./custom_extensions"))
 
 from importlib.metadata import version as get_versions
 
@@ -30,6 +32,12 @@ def skip_tests(app, what, name, obj, skip, options):
 
 def setup(app):
     app.connect("autodoc-skip-member", skip_tests)
+    # Needed to trick sphinx into believing that this file exists already, even though it is
+    # automatically generated by the security_best_practices extension. The path
+    # "_static/security-best-practices-check-list.xlsx" must match the reference in the
+    # doc/source/user_manual/security_best_practices.rst. The filename is fixed as it is needed
+    # by the extension to reference the download
+    Path(f"{app.srcdir}/_static/security-best-practices-check-list.xlsx").touch()
 
 
 # -- Project information -----------------------------------------------------
@@ -61,6 +69,7 @@ def setup(app):
     "nbsphinx",
     "IPython.sphinxext.ipython_console_highlighting",
     "sphinx_copybutton",
+    "security_best_practices",
 ]
 
 extensions.append("sphinx.ext.todo")

diff --git a/doc/source/custom_extensions/security_best_practices.py b/doc/source/custom_extensions/security_best_practices.py
@@ -0,0 +1,229 @@
+"""
+Security Best Practices
+=======================
+
+Sphinx Extension to enable and list security best practices
+
+Derived from the original documentation:
+https://www.sphinx-doc.org/en/master/development/tutorials/todo.html
+
+Usage
+-----
+
+The extension enables two different rst directives, the first acts like a single note/admonition
+and lets you describe a current best practice somewhere in the documentation.
+An example would look like:
+
+.. code-block:: rst
+
+    .. security-best-practice::
+       :title: Example Best Practice
+       :location: configuration.example.param
+       :suggested-value: True
+
+       The example.param should always be set to true
+
+The options `location` and `suggested-value` are optional and are only used to fill the excel
+check list, they are not rendered in the actual sphinx documentation.
+
+The second directive collects all these admonitions and creates a list of them.
+This can simply be added by using the following snippet to a file:
+
+.. code-block:: rst
+
+    .. security-best-practices-list::
+
+Lastly the extension generates an excel sheet with the best practices as checklist.
+In order to expose it into the documentation you have to use the following resource link:
+
+.. code-block:: rst
+
+    :download:`Best Practice Check List <../_static/security-best-practices-check-list.xlsx>`
+
+Note that the filepath and name must match this exact example and that the sphinx config needs to
+create this file in the docs source directory.
+
+Known limitations
+-----------------
+
+At the moment it is not possible to add `:ref:` links to security best practice admonitions,
+when the `security-best-practices-list` directive is used.
+When creating the list it is not possible yet to resolve the links, leading to an unknown pending
+xref exception.
+"""
+
+import pandas as pd
+from docutils import nodes
+from docutils.parsers.rst import Directive, directives
+from openpyxl.styles import Alignment
+from sphinx.application import Sphinx
+from sphinx.locale import _
+from sphinx.util.docutils import SphinxDirective
+
+
+class SecurityBestPractice(nodes.Admonition, nodes.Element):
+    """Admonition for Security Best Practices"""
+
+    def __init__(self, *args, **kwargs):
+        super(SecurityBestPractice, self).__init__(*args, **kwargs)
+        self.attributes.update({"classes": ["security-best-practice"]})
+
+
+class SecurityBestPracticesLists(nodes.General, nodes.Element):
+    """PlaceHolder for a List of Security Best Practices"""
+
+    pass
+
+
+def visit_best_practice_node(self, node):
+    self.visit_admonition(node)
+
+
+def depart_best_practice_node(self, node):
+    self.depart_admonition(node)
+
+
+class BestPracticeListDirective(Directive):
+    """Initializer for Security Best Practices List"""
+
+    def run(self):
+        return [SecurityBestPracticesLists("")]
+
+
+class BestPracticeDirective(SphinxDirective):
+    """
+    Initializer for Security Best Practice. Content of run method is triggered for every security
+    best practice admonition"""
+
+    has_content = True
+    option_spec = {
+        "title": directives.unchanged_required,
+        "location": directives.unchanged,
+        "suggested-value": directives.unchanged,
+    }
+
+    def run(self):
+        targetid = "sbp-%d" % self.env.new_serialno("sbp")  # sbp = security best practice
+        targetnode = nodes.target("", "", ids=[targetid])
+        title = "No title provided"
+        if "title" in self.options:
+            title = self.options["title"]
+        node = SecurityBestPractice("\n".join(self.content))
+        admonition_title = f"Security Best Practice - {title}"
+        node += nodes.title(_(admonition_title), _(admonition_title))
+        self.state.nested_parse(self.content, self.content_offset, node)
+        if not hasattr(self.env, "all_security_best_practices"):
+            self.env.all_security_best_practices = []
+        self.env.all_security_best_practices.append(
+            {
+                "docname": self.env.docname,
+                "lineno": self.lineno,
+                "best_practice": node.deepcopy(),
+                "target": targetnode,
+                "meta": {
+                    "title": title,
+                    "location": self.options.get("location", ""),
+                    "suggested-value": self.options.get("suggested-value", ""),
+                },
+            }
+        )
+        return [targetnode, node]
+
+
+def purge_best_practice(app, env, docname):
+    if not hasattr(env, "all_security_best_practices"):
+        return
+    env.all_security_best_practices = [
+        node for node in env.all_security_best_practices if node["docname"] != docname
+    ]
+
+
+def merge_best_practice(app, env, docnames, other):
+    if not hasattr(env, "all_security_best_practices"):
+        env.all_security_best_practices = []
+    if hasattr(other, "all_security_best_practices"):
+        env.all_security_best_practices.extend(other.all_security_best_practices)
+
+
+def process_nodes(app, doctree, fromdocname):
+    """
+    Builds a list of all security best practices with back references to the original
+    admonition.
+    """
+    env = app.builder.env
+    if not hasattr(env, "all_security_best_practices"):
+        env.all_security_best_practices = []
+    for node in doctree.findall(SecurityBestPracticesLists):
+        content = []
+        for node_info in env.all_security_best_practices:
+            title = nodes.topic()
+            title += nodes.Text(node_info.get("meta").get("title"))
+            back_reference = create_back_reference(app, fromdocname, node_info)
+            content.extend((title, node_info["best_practice"], back_reference))
+        node.replace_self(content)
+    create_xls_checklist(app, env)
+
+
+def create_xls_checklist(app, env):
+    description = []
+    for node in env.all_security_best_practices:
+        meta_info = node.get("meta")
+        text = node.get("best_practice").rawsource
+        description.append(
+            {
+                "Topic": meta_info.get("title"),
+                "Requirement": text,
+                "Configuration Location": meta_info.get("location"),
+                "Suggested Value": meta_info.get("suggested-value"),
+                "Is": "",
+                "Comment": "",
+            }
+        )
+    dataframe = pd.DataFrame(description)
+    download_file_name = "security-best-practices-check-list"
+    download_file_obj = [env.dlfiles[key] for key in env.dlfiles if download_file_name in key][0]
+    download_file_path = download_file_obj[1]
+    full_file_path = f"{app.outdir}/_downloads/{download_file_path}"
+    writer = pd.ExcelWriter(full_file_path, engine="openpyxl")
+    dataframe.to_excel(writer, index=False, sheet_name="Security Best Practices")
+    worksheet = writer.sheets["Security Best Practices"]
+    column_width = {"A": 60, "B": 60, "C": 30, "D": 30, "E": 30, "F": 30, "G": 30}
+    for column, width in column_width.items():
+        worksheet.column_dimensions[column].width = width
+    worksheet["B2"].alignment = Alignment(wrap_text=True)
+    writer.close()
+
+
+def create_back_reference(app, fromdocname, node_info):
+    """Creates a sphinx paragraph node containing a reference to the original admonition."""
+    back_reference = nodes.paragraph()
+    newnode = nodes.reference("", "")
+    reference_text = "Reference to original description"
+    innernode = nodes.emphasis(_(reference_text), _(reference_text))
+    newnode["refdocname"] = node_info["docname"]
+    newnode["refuri"] = app.builder.get_relative_uri(fromdocname, node_info["docname"])
+    newnode["refuri"] += "#" + node_info["target"]["refid"]
+    newnode.append(innernode)
+    back_reference += newnode
+    return back_reference
+
+
+def setup(app: Sphinx):
+    """Initializer for the Security Best Practices Extension"""
+    app.add_node(
+        SecurityBestPractice,
+        html=(visit_best_practice_node, depart_best_practice_node),
+        latex=(visit_best_practice_node, depart_best_practice_node),
+        text=(visit_best_practice_node, depart_best_practice_node),
+    )
+    app.add_directive("security-best-practice", BestPracticeDirective)
+    app.add_directive("security-best-practices-list", BestPracticeListDirective)
+    app.connect("doctree-resolved", process_nodes)
+    app.connect("env-purge-doc", purge_best_practice)
+    app.connect("env-merge-info", merge_best_practice)
+
+    return {
+        "version": "0.1",
+        "parallel_read_safe": True,
+        "parallel_write_safe": True,
+    }
diff --git a/doc/source/development/programaticly_start_logprep.rst b/doc/source/development/programaticly_start_logprep.rst
@@ -1,7 +1,7 @@
 Start Logprep programaticly
 ===========================
 
-It is possible to make use of the Logprep :ref:`pipeline_config` in plain python, without any
+It is possible to make use of the Logprep pipeline in plain python, without any
 input or output connectors or further configurations.
 If on the other hand you want to make use of the input connector preprocessors you have to at least
 use an input connector like the DummyInput.

diff --git a/doc/source/user_manual/configuration/index.rst b/doc/source/user_manual/configuration/index.rst
@@ -16,7 +16,6 @@ Configuration
 
    input
    output
-   pipeline
    processor
    rules
    getter

diff --git a/doc/source/user_manual/configuration/input.rst b/doc/source/user_manual/configuration/input.rst
@@ -4,6 +4,26 @@
 Input
 =====
 
+.. security-best-practice::
+   :title: Input Connectors
+   :location: config.input.<INPUT-Name>.type and config.input.<INPUT-Name>.preprocessing.hmac
+   :suggested-value: <ConfluentKafkaInput|HttpConnector|FileInput> and <hmac>
+
+   It is advised to only use the :code:`ConfluentKafkaInput`, :code:`HttpConnector` or
+   :code:`FileInput` as input connectors in production environments.
+   The connectors :code:`DummyInput`, :code:`JsonInput` and :code:`JsonlInput` are mainly designed
+   for debugging purposes.
+
+   Furthermore, it is suggested to enable the :code:`HMAC` preprocessor to ensure no tempering of
+   processed events.
+
+   .. code:: yaml
+
+      hmac:
+         target: <RAW_MSG>
+         key: <SECRET>
+         output_field: HMAC
+
 .. automodule:: logprep.connector.confluent_kafka.input
 .. autoclass:: logprep.connector.confluent_kafka.input.ConfluentKafkaInput.Config
    :members:
@@ -31,7 +51,7 @@ Input
    :undoc-members:
    :inherited-members:
    :noindex:
-  
+
 .. automodule:: logprep.connector.jsonl.input
 .. autoclass:: logprep.connector.jsonl.input.JsonlInput.Config
    :members:

diff --git a/doc/source/user_manual/configuration/output.rst b/doc/source/user_manual/configuration/output.rst
@@ -6,12 +6,24 @@ Output
 
 It is possible to define multiple outputs as a dictionary of :code:`<output name>: <output config>`.
 If you define multiple outputs with the attribute :code:`default: true` then be aware, that
-logprep only guaranties that one output has received data by calling the :code:`batch_finished_callback`.
+logprep only guaranties that one output has received data by calling the
+:code:`batch_finished_callback`.
 
-We recommed to only use one default output and define other outputs only for storing custom extra data.
+.. security-best-practice::
+   :title: Output Connectors
+   :location: config.output.<Output-Name>.type
+   :suggested-value: <ConfluentKafkaOutput|ElasticsearchOutput|OpensearchOutput|S3Output>
+
+   Similar to the input connectors there is a list of available output connectors of which some
+   are only meant for debugging, namely: :code:`ConsoleOutput` and :code:`JsonlOutput`.
+   It is advised to not use these in production environments.
+
+   When configuring multiple outputs it is also recommend to only use one default output and to
+   define other outputs only for storing custom extra data.
+   Otherwise it cannot be guaranteed that all events are safely stored.
 
 .. automodule:: logprep.connector.confluent_kafka.output
-.. autoclass:: logprep.connector.confluent_kafka.input.ConfluentKafkaInput.Config
+.. autoclass:: logprep.connector.confluent_kafka.output.ConfluentKafkaOutput.Config
    :members:
    :undoc-members:
    :inherited-members:
-Original file line number
+Diff line change
@@ Expand Up / @@ -16,7 +16,6 @@ Configuration @@
        input
        output
-       pipeline
        processor
        rules
        getter
@@ Expand Down @@