sapcc · Paulchen5 · Jan 28, 2025 · Jan 28, 2025 · Jan 28, 2025 · Jan 28, 2025
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,160 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
diff --git a/main.py b/main.py
@@ -0,0 +1,112 @@
+#!/usr/bin/env python3
+
+# standard imports
+from datetime import datetime as dt
+
+# third party imports
+import click
+
+# first party imports
+from utilities import DataCleaner
+from utilities import QueryManager
+from utilities import Query
+from utilities import QuerySplitter
+from utilities.semaphore import ThreadManager
+
+
+@click.command()
+@click.option("-a", "--api-endpoint", default=None, required=True, help="api endpoint to query against")
+@click.option(
+    "-c",
+    "--cert",
+    default=None,
+    help="relative path to the certificate which is used to create the request",
+)
+@click.option(
+    "-t",
+    "--timeout",
+    default=30,
+    help="number of seconds the client will wait for the server to send a response",
+    show_default=True,
+    type=int,
+)
+@click.option(
+    "-k",
+    "--kwargs",
+    default=None,
+    help="parameters for the query; supported keys: target, params\ntarget > specifies a target behind the api endpoint\nparams > sets specific parameters for the query\n\tsupported parameters are:\n\t - 'query'\n\t - 'dedup'\n\t - 'partial_response'\n\t - 'step'\n\t - 'max_source_resolution'\n\t - 'engine'\n\t - 'analyze'",
+)
+@click.option("-p", "--directory-path", default=None, help="directory path in which the query results are stored")
+@click.option(
+    "-b",
+    "--threshold",
+    default=None,
+    help="Threshold which specifies when the data are interpolated by Thanos\nThis helps splitting the queries due to efficiency and resource optimization",
+    type=int,
+)
+# option for max long term storage
-# option for max long term storage
+# TODO: add possible option for max long term storage, currently fixed at 5y
-# option for max long term storage
+# TODO: add possible option for max long term storage, currently fixed at 5y
+def main(
+    api_endpoint: str = None,
+    cert: str = None,
+    timeout: int = 30,
+    kwargs: dict = None,
+    storage_path: str = None,
+    threshold: int = None,
+):
+    start = dt.now()
+    if kwargs is None:
+        kwargs = {}
+
+    tm = ThreadManager(12)
+    qm = QueryManager(cert=cert, timeout=timeout, storage_path=storage_path, threshold=threshold, thread_manager=tm)
+
+    query = Query(base_url=api_endpoint)
+
+    queries = QuerySplitter.split_by_treshold(QuerySplitter(), query=query, threshold=threshold)
+
+    query_uuids = [qm.add_query_queue() for i in range(len(queries))]
+
+    if not queries[0] is None:
+        qm.create_query_objects(query_queue_uuid=query_uuids[0], query=queries[0], separator=60 * 60 * 24)
+
+    if not queries[1] is None:
+        qm.create_query_objects(query_queue_uuid=query_uuids[1], query=queries[1], separator=60 * 60 * 24 * 90)
+
+    qm.create_environments()
+
+    for query_uuid in query_uuids:
+        if len(qm.queues[query_uuid].query_objects) == 0:
+            continue
+        qm.queues[query_uuid].schedule_queries()
+
+    tm.execute_all_threads()
+    end = dt.now()
+    print(f"Downloading data lastet: {(end - start)} seconds.")
+
+    start = dt.now()
+
+    max_index = 2
+    paths = [None for i in range(max_index)]
+    dc = DataCleaner()
+
+    for index in range(max_index):
+        query_uuid = query_uuids[index]
+
+        if not len(qm.queues[query_uuid].query_objects) == 0:
+            queue = qm.queues[query_uuid]
+            paths[index] = queue.path
+
+        if not paths[index] is None:
+            if index == 1:
+                dc.clear_query_results(path=paths[index], step=3600)
+                continue
+
+            dc.clear_query_results(path=paths[index], step=60)
+
+    end = dt.now()
+    print(f"Cleaning data lastet: {(end - start)} seconds.")
+
+
+if __name__ == "__main__":
+    main()
+    print("finished…")
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,3 @@
+click=>8,<9
+requests>=2,<3
+setuptools>=75
diff --git a/setup.py b/setup.py
@@ -0,0 +1,8 @@
+from setuptools import setup
+
+setup(
+    name="main",
+    version="0.1.0",
+    py_modules=["main", "utilities"],
+    install_requires=["click", "requests"],
+)
diff --git a/tests/Test_Filter.py b/tests/Test_Filter.py
@@ -0,0 +1,102 @@
+import unittest
+
+from utilities.data_filter import create_time_ranges, remove_state_from_timestamp_value
+
+# TODO consider outsorcing into corresponding
+
+
+class TestFilter(unittest.TestCase):
+    def test_remove_state_from_timestamp_value_with_normal_input(self):
+        data = [[0.0, "1"], [13.0, "1"]]
+
+        expected_result = [0.0, 13]
+        result = remove_state_from_timestamp_value(data=data)
+
+        self.assertEqual(result, expected_result)
+
+    def test_remove_state_from_timestamp_value_with_empty_input(self):
+        data = []
+
+        expected_result = []
+        result = remove_state_from_timestamp_value(data=data)
+
+        self.assertEqual(result, expected_result)
+
+    def test_remove_state_from_timestamp_value_with_wrong_inputs_1(self):
+        data = "ABAP"
+
+        with self.assertRaises(TypeError):
+            remove_state_from_timestamp_value(data=data)
+
+    def test_remove_state_from_timestamp_value_with_wrong_inputs_2(self):
+        data = ["ABAP"]
+
+        with self.assertRaises(TypeError):
+            remove_state_from_timestamp_value(data=data)
+
+    def test_remove_state_from_timestamp_value_with_wrong_inputs_3(self):
+        data = [["ABAP"]]
+
+        with self.assertRaises(TypeError):
+            remove_state_from_timestamp_value(data=data)
+
+    def test_create_time_ranges_with_normal_input(self):
+        data = [0, 5, 10, 15, 35, 50, 55, 60, 65, 67, 68, 69, 73, 78, 83, 88, 90]
+        step = 5
+
+        expected_result = [(0, 15), (35, 0), (50, 15), (67, 0), (68, 0), (69, 0), (73, 15), (90, 0)]
+
+        result = create_time_ranges(data=data, step=step)
+
+        self.assertEqual(result, expected_result)
+
+    def test_create_time_ranges_with_empty_input(self):
+        data = []
+        step = 5
+
+        expected_result = []
+
+        result = create_time_ranges(data=data, step=step)
+
+        self.assertEqual(result, expected_result)
+
+    def test_create_time_ranges_with_continous_input(self):
+        data = [0, 5, 10, 15, 20, 25]
+        step = 5
+
+        expected_result = [(0, 25)]
+
+        result = create_time_ranges(data=data, step=step)
+
+        self.assertEqual(result, expected_result)
+
+    def test_create_time_ranges_with_continous_double_input(self):
+        data = [0, 5, 10, 15, 20, 20, 25]
+        step = 5
+
+        expected_result = [(0, 25)]
+
+        result = create_time_ranges(data=data, step=step)
+
+        self.assertEqual(result, expected_result)
+
+    def test_create_time_ranges_with_one_input(self):
+        data = [77]
+        step = 5
+
+        expected_result = [(77, 0)]
+
+        result = create_time_ranges(data=data, step=step)
+
+        self.assertEqual(result, expected_result)
+
+    def test_create_time_ranges_with_wrong_input(self):
+        data = "ABAP"
+        step = 5
+
+        with self.assertRaises(TypeError):
+            create_time_ranges(data=data, step=step)
+
+
+if __name__ == "__main__":
+    unittest.main()