Skip to content

Commit

Permalink
add depseudo tools (#599)
Browse files Browse the repository at this point in the history
* add depseudo tools
* update changelog
---------

Co-authored-by: djkhl <[email protected]>
  • Loading branch information
ekneg54 and djkhl authored Jun 6, 2024
1 parent 28d7c54 commit b6027a9
Show file tree
Hide file tree
Showing 17 changed files with 433 additions and 4 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,13 @@
* `timestamper` now writes `_timestamper_missing_field_warning` tag to event tags instead of `_timestamper_failure` in case of missing fields
* rename `--thread_count` parameter to `--thread-count` in http generator
* removed `--report` parameter and feature from http generator
* when using `extend_target_list` in the `field manager`the ordering of the given source fields is now preserved

### Features

* add UCL into the Quickstart Setup
* add logprep http output connector
* add pseudonymization tools to logprep -> see: `logprep pseudo --help`

### Improvements

Expand Down
44 changes: 44 additions & 0 deletions doc/source/user_manual/execution.rst
Original file line number Diff line number Diff line change
Expand Up @@ -128,3 +128,47 @@ To find out more about the usage of the http event generator execute:
logprep generate http --help
Pseudonymization Tools
----------------------

Logprep provides tools to pseudonymize and depseudonymize values. This can be useful for testing
and debugging purposes. But this can also be used to depseudonymize values pseudonymized by
Logpreps :code:`Pseudonymizer` Processor.

These tools can be used to pseudonymize given strings using the same method as used in Logprep
and provides functionality to depseudonymize values using a pair of keys.

generate keys
^^^^^^^^^^^^^

.. code-block:: bash
logprep pseudo generate -f analyst 1024
logprep pseudo generate -f depseudo 2048
this will generate four files to pseudonymize in the next step.
the depseudo key has to be longer than the analyst key due to the hash padding involved in the procedure.

* get help with :code:`logprep pseudo generate --help`

pseudonymize
^^^^^^^^^^^^

.. code-block:: bash
logprep pseudo pseudonymize analyst depseudo mystring
This will pseudonymize the provided string using the analyst and depseudo keys.
get help with :code:`logperp pseudo pseudonymize --help`

depseudonymize
^^^^^^^^^^^^^^

.. code-block:: bash
logprep pseudo depseudonymize analyst depseudo <output from above>
This will depseudonymize the provided string using the analyst and depseudo keys.

* get help with :code:`logprep pseudo depseudonymize --help`
6 changes: 2 additions & 4 deletions logprep/processor/pseudonymizer/encrypter.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from Crypto.Cipher import AES, PKCS1_OAEP
from Crypto.PublicKey import RSA
from Crypto.Random import get_random_bytes

from logprep.util.getter import GetterFactory


Expand Down Expand Up @@ -39,10 +40,7 @@ def load_public_keys(self, keyfile_analyst: str, keyfile_depseudo: str):
pub_key_depseudo_str = GetterFactory.from_string(keyfile_depseudo).get()
self._pubkey_depseudo = RSA.import_key(pub_key_depseudo_str)

def encrypt(
self,
input_str: str,
) -> str:
def encrypt(self, input_str: str) -> str:
"""Encrypt a string using hybrid encryption.
The input string is encrypted with AES in CTR mode using a random
Expand Down
14 changes: 14 additions & 0 deletions logprep/run_logprep.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from logprep.util.configuration import Configuration, InvalidConfigurationError
from logprep.util.defaults import DEFAULT_LOG_CONFIG
from logprep.util.helper import get_versions_string, print_fcolor
from logprep.util.pseudo.commands import depseudonymize, generate_keys, pseudonymize
from logprep.util.rule_dry_runner import DryRunner

warnings.simplefilter("always", DeprecationWarning)
Expand Down Expand Up @@ -298,6 +299,19 @@ def print_config(configs: tuple[str], output) -> None:
print(config.as_yaml())


@cli.group(short_help="pseudonymization toolbox")
def pseudo():
"""
The pseudo command group offers a set of commands to
generate keys, pseudonymize and depseudonymize
"""


pseudo.add_command(cmd=generate_keys.generate, name="generate")
pseudo.add_command(cmd=pseudonymize.pseudonymize, name="pseudonymize")
pseudo.add_command(cmd=depseudonymize.depseudonymize, name="depseudonymize")


def signal_handler(__: int, _) -> None:
"""Handle signals for stopping the runner and reloading the configuration."""
Runner.get_runner(Configuration()).stop()
Expand Down
Empty file added logprep/util/pseudo/__init__.py
Empty file.
Empty file.
21 changes: 21 additions & 0 deletions logprep/util/pseudo/commands/depseudonymize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
"""Command line tool to depseudonymize a string using the given keys."""

import click

from logprep.util.pseudo.depseudonymizer.depseudonymizer import Depseudonymizer


@click.command()
@click.argument("analyst-key", type=str)
@click.argument("depseudo-key", type=str)
@click.argument("pseudo-string", type=str)
def depseudonymize(analyst_key: str, depseudo_key: str, pseudo_string: str):
"""depseudonymize a string using the given keys."""
depseudo = Depseudonymizer(pseudo_string)
keys = {}
for key_file_name in analyst_key, depseudo_key:
with open(f"{key_file_name}.key", "r", encoding="utf8") as key_file:
keys[key_file_name] = key_file.read()
depseudo.depseudo_key = keys[depseudo_key]
depseudo.analyst_key = keys[analyst_key]
print(depseudo.depseudonymize())
19 changes: 19 additions & 0 deletions logprep/util/pseudo/commands/generate_keys.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import click

from logprep.util.pseudo.keygenerator import generate_rsa_key


@click.command()
@click.argument("key-length", default="1024", type=int)
@click.option("-f", "--file")
def generate(key_length: int, file: str):
"""Generate RSA keys for pseudonymization."""
priv_key, pub_key = generate_rsa_key.generate_keys(key_length=key_length)
if not file:
print(priv_key.decode("utf8"))
print(pub_key.decode("utf8"))
else:
with open(f"{file}.key", "w", encoding="utf8") as private_key_file:
private_key_file.write(priv_key.decode("utf8"))
with open(f"{file}.crt", "w", encoding="utf8") as public_key_file:
public_key_file.write(pub_key.decode("utf8"))
17 changes: 17 additions & 0 deletions logprep/util/pseudo/commands/pseudonymize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import click

from logprep.processor.pseudonymizer.encrypter import DualPKCS1HybridEncrypter


@click.command()
@click.argument("analyst-key", type=str)
@click.argument("depseudo-key", type=str)
@click.argument("string", type=str)
def pseudonymize(analyst_key: str, depseudo_key: str, string: str):
"""pseudonymize a string using the given keys."""
encrypter = DualPKCS1HybridEncrypter()
encrypter.load_public_keys(
keyfile_analyst=f"{analyst_key}.crt",
keyfile_depseudo=f"{depseudo_key}.crt",
)
print(encrypter.encrypt(string))
Empty file.
137 changes: 137 additions & 0 deletions logprep/util/pseudo/depseudonymizer/depseudonymizer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
"""module to depseudonymize"""

import base64
from dataclasses import dataclass
from Crypto.PublicKey import RSA
from Crypto.Cipher.PKCS1_OAEP import PKCS1OAEP_Cipher
from Crypto.Cipher import AES, PKCS1_OAEP


class DepseudonymizeError(Exception):
"""Depseudonymizer custom Exception"""


@dataclass
class Depseudonymizer:
"""class to depseudonymize a pseudonymized string
Parameters
----------
pseudonymized_string: str
The base64 encoded pseudonymized string.
Base64 decoding is done in __post_init__ method
"""

pseudonymized_string: str
"""the pseudonymized string"""

_analyst_key: PKCS1OAEP_Cipher = None

_depseudo_key: PKCS1OAEP_Cipher = None

def __post_init__(self) -> None:
self.pseudonymized_string = base64.b64decode(self.pseudonymized_string)

@property
def encrypted_session_key(self) -> bytes:
"""the encrypted session key
Returns
-------
bytes
the first 16 bytes of the pseudonymized_string
"""
return self.pseudonymized_string[:256]

@property
def cipher_nonce(self) -> bytes:
"""the cipher nonce
Returns
-------
bytes
The 2 bytes after the session key
"""
return self.pseudonymized_string[256:264]

@property
def ciphertext(self) -> bytes:
"""the cipher text
Returns
-------
bytes
All bytes after the first 18 bytes
"""
return self.pseudonymized_string[264:]

@property
def depseudo_key(self) -> PKCS1OAEP_Cipher:
"""getter for depseudo_key
Returns
-------
PKCS1OAEP_Cipher
returns a PKCS1OAEP_Cipher representation of the depseudo key
"""
return self._depseudo_key

@depseudo_key.setter
def depseudo_key(self, depseudo_key: str) -> None:
"""setter for the depseudo_key
saves the depseudo_key as PKCS1OAEP_Cipher in _depseudo_key
Parameters
----------
depseudo_key : str
the depseudo privat key
"""
self._depseudo_key = RSA.import_key(depseudo_key)

@property
def analyst_key(self) -> PKCS1OAEP_Cipher:
"""getter for analyst_key
Returns
-------
PKCS1OAEP_Cipher
returns a PKCS1OAEP_Cipher representation of the analyst key
"""
return self._analyst_key

@analyst_key.setter
def analyst_key(self, analyst_key: str) -> None:
"""setter for the analyst_key
saves the analyst_key as PKCS1OAEP_Cipher in _analyst_key
Parameters
----------
analyst_key : str
the analyst privat key
"""
self._analyst_key = RSA.import_key(analyst_key)

def depseudonymize(self) -> str:
"""depseudonymizes after setting the depseudo and analyst keys
Returns
-------
str
the depseudonymized string
Raises
------
DepseudonymizeError
if depseudo_key or analyst_key is not set
"""
if self._depseudo_key is None:
raise DepseudonymizeError("No depseudo key")
if self._analyst_key is None:
raise DepseudonymizeError("No analyst key")
cipher_rsa_depseudo = PKCS1_OAEP.new(self._depseudo_key)
cipher_rsa_analyst = PKCS1_OAEP.new(self._analyst_key)
depseudo_decrypted_session_key = cipher_rsa_depseudo.decrypt(self.encrypted_session_key)
analyst_decrypted_session_key = cipher_rsa_analyst.decrypt(depseudo_decrypted_session_key)
cipher_aes = AES.new(analyst_decrypted_session_key, AES.MODE_CTR, nonce=self.cipher_nonce)
return cipher_aes.decrypt(self.ciphertext).decode("utf-8")
Empty file.
8 changes: 8 additions & 0 deletions logprep/util/pseudo/keygenerator/generate_rsa_key.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from Crypto.PublicKey import RSA


def generate_keys(key_length):
key = RSA.generate(key_length)
pv_key_string = key.exportKey()
pb_key_string = key.publickey().exportKey()
return pv_key_string, pb_key_string
Empty file.
Loading

0 comments on commit b6027a9

Please sign in to comment.