-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add query management functionality #1
base: main
Are you sure you want to change the base?
Changes from all commits
ce1a783
027a3d6
f44f3c0
4c71a79
60bff09
dd95146
ad0a9a2
cfe8a70
badebfa
8b1f68b
a09bbd3
8a7dc00
57ef645
1ae5e2d
d80026c
5c0da75
4f832b9
767a2e2
370e971
e9b6b55
7db3b4a
a3e6973
f7f2c05
7c09f97
6d6d510
b247dff
dbbe7a1
c8fa828
b950f36
ecf2c87
25324d1
950b354
36abc72
fcaf213
8fbbc08
86ac419
113679c
4c15db7
4470db2
577035d
86ace21
c942daf
5d24622
3205867
b9910da
6e40d61
3226b8a
e2a8caf
5fbacc5
e58ae4f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,160 @@ | ||
# Byte-compiled / optimized / DLL files | ||
__pycache__/ | ||
*.py[cod] | ||
*$py.class | ||
|
||
# C extensions | ||
*.so | ||
|
||
# Distribution / packaging | ||
.Python | ||
build/ | ||
develop-eggs/ | ||
dist/ | ||
downloads/ | ||
eggs/ | ||
.eggs/ | ||
lib/ | ||
lib64/ | ||
parts/ | ||
sdist/ | ||
var/ | ||
wheels/ | ||
share/python-wheels/ | ||
*.egg-info/ | ||
.installed.cfg | ||
*.egg | ||
MANIFEST | ||
|
||
# PyInstaller | ||
# Usually these files are written by a python script from a template | ||
# before PyInstaller builds the exe, so as to inject date/other infos into it. | ||
*.manifest | ||
*.spec | ||
|
||
# Installer logs | ||
pip-log.txt | ||
pip-delete-this-directory.txt | ||
|
||
# Unit test / coverage reports | ||
htmlcov/ | ||
.tox/ | ||
.nox/ | ||
.coverage | ||
.coverage.* | ||
.cache | ||
nosetests.xml | ||
coverage.xml | ||
*.cover | ||
*.py,cover | ||
.hypothesis/ | ||
.pytest_cache/ | ||
cover/ | ||
|
||
# Translations | ||
*.mo | ||
*.pot | ||
|
||
# Django stuff: | ||
*.log | ||
local_settings.py | ||
db.sqlite3 | ||
db.sqlite3-journal | ||
|
||
# Flask stuff: | ||
instance/ | ||
.webassets-cache | ||
|
||
# Scrapy stuff: | ||
.scrapy | ||
|
||
# Sphinx documentation | ||
docs/_build/ | ||
|
||
# PyBuilder | ||
.pybuilder/ | ||
target/ | ||
|
||
# Jupyter Notebook | ||
.ipynb_checkpoints | ||
|
||
# IPython | ||
profile_default/ | ||
ipython_config.py | ||
|
||
# pyenv | ||
# For a library or package, you might want to ignore these files since the code is | ||
# intended to run in multiple environments; otherwise, check them in: | ||
# .python-version | ||
|
||
# pipenv | ||
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. | ||
# However, in case of collaboration, if having platform-specific dependencies or dependencies | ||
# having no cross-platform support, pipenv may install dependencies that don't work, or not | ||
# install all needed dependencies. | ||
#Pipfile.lock | ||
|
||
# poetry | ||
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. | ||
# This is especially recommended for binary packages to ensure reproducibility, and is more | ||
# commonly ignored for libraries. | ||
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control | ||
#poetry.lock | ||
|
||
# pdm | ||
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. | ||
#pdm.lock | ||
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it | ||
# in version control. | ||
# https://pdm.fming.dev/#use-with-ide | ||
.pdm.toml | ||
|
||
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm | ||
__pypackages__/ | ||
|
||
# Celery stuff | ||
celerybeat-schedule | ||
celerybeat.pid | ||
|
||
# SageMath parsed files | ||
*.sage.py | ||
|
||
# Environments | ||
.env | ||
.venv | ||
env/ | ||
venv/ | ||
ENV/ | ||
env.bak/ | ||
venv.bak/ | ||
|
||
# Spyder project settings | ||
.spyderproject | ||
.spyproject | ||
|
||
# Rope project settings | ||
.ropeproject | ||
|
||
# mkdocs documentation | ||
/site | ||
|
||
# mypy | ||
.mypy_cache/ | ||
.dmypy.json | ||
dmypy.json | ||
|
||
# Pyre type checker | ||
.pyre/ | ||
|
||
# pytype static type analyzer | ||
.pytype/ | ||
|
||
# Cython debug symbols | ||
cython_debug/ | ||
|
||
# PyCharm | ||
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can | ||
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore | ||
# and can be added to the global gitignore or merged into this file. For a more nuclear | ||
# option (not recommended) you can uncomment the following to ignore the entire idea folder. | ||
#.idea/ |
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
@@ -0,0 +1,112 @@ | ||||||
#!/usr/bin/env python3 | ||||||
|
||||||
# standard imports | ||||||
from datetime import datetime as dt | ||||||
|
||||||
# third party imports | ||||||
import click | ||||||
|
||||||
# first party imports | ||||||
from utilities import DataCleaner | ||||||
from utilities import QueryManager | ||||||
from utilities import Query | ||||||
from utilities import QuerySplitter | ||||||
from utilities.semaphore import ThreadManager | ||||||
|
||||||
|
||||||
@click.command() | ||||||
@click.option("-a", "--api-endpoint", default=None, required=True, help="api endpoint to query against") | ||||||
@click.option( | ||||||
"-c", | ||||||
"--cert", | ||||||
default=None, | ||||||
help="relative path to the certificate which is used to create the request", | ||||||
) | ||||||
@click.option( | ||||||
"-t", | ||||||
"--timeout", | ||||||
default=30, | ||||||
help="number of seconds the client will wait for the server to send a response", | ||||||
show_default=True, | ||||||
type=int, | ||||||
) | ||||||
@click.option( | ||||||
"-k", | ||||||
"--kwargs", | ||||||
default=None, | ||||||
help="parameters for the query; supported keys: target, params\ntarget > specifies a target behind the api endpoint\nparams > sets specific parameters for the query\n\tsupported parameters are:\n\t - 'query'\n\t - 'dedup'\n\t - 'partial_response'\n\t - 'step'\n\t - 'max_source_resolution'\n\t - 'engine'\n\t - 'analyze'", | ||||||
) | ||||||
@click.option("-p", "--directory-path", default=None, help="directory path in which the query results are stored") | ||||||
@click.option( | ||||||
"-b", | ||||||
"--threshold", | ||||||
default=None, | ||||||
help="Threshold which specifies when the data are interpolated by Thanos\nThis helps splitting the queries due to efficiency and resource optimization", | ||||||
type=int, | ||||||
) | ||||||
# option for max long term storage | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. leftover? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The comment There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Then we might reshape this comment:
Suggested change
|
||||||
def main( | ||||||
api_endpoint: str = None, | ||||||
cert: str = None, | ||||||
timeout: int = 30, | ||||||
kwargs: dict = None, | ||||||
storage_path: str = None, | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this does not add up with the click param. |
||||||
threshold: int = None, | ||||||
): | ||||||
start = dt.now() | ||||||
if kwargs is None: | ||||||
kwargs = {} | ||||||
|
||||||
tm = ThreadManager(12) | ||||||
Paulchen5 marked this conversation as resolved.
Show resolved
Hide resolved
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd make this configureable with a param. Alternatively this should be None. |
||||||
qm = QueryManager(cert=cert, timeout=timeout, storage_path=storage_path, threshold=threshold, thread_manager=tm) | ||||||
|
||||||
query = Query(base_url=api_endpoint) | ||||||
|
||||||
queries = QuerySplitter.split_by_treshold(QuerySplitter(), query=query, threshold=threshold) | ||||||
|
||||||
query_uuids = [qm.add_query_queue() for i in range(len(queries))] | ||||||
|
||||||
if not queries[0] is None: | ||||||
qm.create_query_objects(query_queue_uuid=query_uuids[0], query=queries[0], separator=60 * 60 * 24) | ||||||
|
||||||
if not queries[1] is None: | ||||||
qm.create_query_objects(query_queue_uuid=query_uuids[1], query=queries[1], separator=60 * 60 * 24 * 90) | ||||||
|
||||||
qm.create_environments() | ||||||
|
||||||
for query_uuid in query_uuids: | ||||||
if len(qm.queues[query_uuid].query_objects) == 0: | ||||||
continue | ||||||
qm.queues[query_uuid].schedule_queries() | ||||||
Comment on lines
+77
to
+80
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can be expressed as list comprehension. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [
qm.queues[query_uuid].schedule_queries()
for query_uuid in query_uuids
if not len(qm.queues[query_uuid].query_objects) == 0
] Produces the following linting note:
|
||||||
|
||||||
tm.execute_all_threads() | ||||||
end = dt.now() | ||||||
print(f"Downloading data lastet: {(end - start)} seconds.") | ||||||
|
||||||
start = dt.now() | ||||||
|
||||||
max_index = 2 | ||||||
paths = [None for i in range(max_index)] | ||||||
dc = DataCleaner() | ||||||
|
||||||
for index in range(max_index): | ||||||
query_uuid = query_uuids[index] | ||||||
|
||||||
if not len(qm.queues[query_uuid].query_objects) == 0: | ||||||
queue = qm.queues[query_uuid] | ||||||
paths[index] = queue.path | ||||||
|
||||||
if not paths[index] is None: | ||||||
if index == 1: | ||||||
dc.clear_query_results(path=paths[index], step=3600) | ||||||
continue | ||||||
|
||||||
dc.clear_query_results(path=paths[index], step=60) | ||||||
|
||||||
end = dt.now() | ||||||
print(f"Cleaning data lastet: {(end - start)} seconds.") | ||||||
|
||||||
|
||||||
if __name__ == "__main__": | ||||||
main() | ||||||
print("finished…") |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
click=>8,<9 | ||
requests>=2,<3 | ||
setuptools>=75 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
from setuptools import setup | ||
|
||
setup( | ||
name="main", | ||
version="0.1.0", | ||
py_modules=["main", "utilities"], | ||
install_requires=["click", "requests"], | ||
) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
import unittest | ||
|
||
from utilities.data_filter import create_time_ranges, remove_state_from_timestamp_value | ||
|
||
# TODO consider outsorcing into corresponding | ||
|
||
|
||
class TestFilter(unittest.TestCase): | ||
def test_remove_state_from_timestamp_value_with_normal_input(self): | ||
data = [[0.0, "1"], [13.0, "1"]] | ||
|
||
expected_result = [0.0, 13] | ||
result = remove_state_from_timestamp_value(data=data) | ||
|
||
self.assertEqual(result, expected_result) | ||
|
||
def test_remove_state_from_timestamp_value_with_empty_input(self): | ||
data = [] | ||
|
||
expected_result = [] | ||
result = remove_state_from_timestamp_value(data=data) | ||
|
||
self.assertEqual(result, expected_result) | ||
|
||
def test_remove_state_from_timestamp_value_with_wrong_inputs_1(self): | ||
data = "ABAP" | ||
|
||
with self.assertRaises(TypeError): | ||
remove_state_from_timestamp_value(data=data) | ||
|
||
def test_remove_state_from_timestamp_value_with_wrong_inputs_2(self): | ||
data = ["ABAP"] | ||
|
||
with self.assertRaises(TypeError): | ||
remove_state_from_timestamp_value(data=data) | ||
|
||
def test_remove_state_from_timestamp_value_with_wrong_inputs_3(self): | ||
data = [["ABAP"]] | ||
|
||
with self.assertRaises(TypeError): | ||
remove_state_from_timestamp_value(data=data) | ||
|
||
def test_create_time_ranges_with_normal_input(self): | ||
data = [0, 5, 10, 15, 35, 50, 55, 60, 65, 67, 68, 69, 73, 78, 83, 88, 90] | ||
step = 5 | ||
|
||
expected_result = [(0, 15), (35, 0), (50, 15), (67, 0), (68, 0), (69, 0), (73, 15), (90, 0)] | ||
|
||
result = create_time_ranges(data=data, step=step) | ||
|
||
self.assertEqual(result, expected_result) | ||
|
||
def test_create_time_ranges_with_empty_input(self): | ||
data = [] | ||
step = 5 | ||
|
||
expected_result = [] | ||
|
||
result = create_time_ranges(data=data, step=step) | ||
|
||
self.assertEqual(result, expected_result) | ||
|
||
def test_create_time_ranges_with_continous_input(self): | ||
data = [0, 5, 10, 15, 20, 25] | ||
step = 5 | ||
|
||
expected_result = [(0, 25)] | ||
|
||
result = create_time_ranges(data=data, step=step) | ||
|
||
self.assertEqual(result, expected_result) | ||
|
||
def test_create_time_ranges_with_continous_double_input(self): | ||
data = [0, 5, 10, 15, 20, 20, 25] | ||
step = 5 | ||
|
||
expected_result = [(0, 25)] | ||
|
||
result = create_time_ranges(data=data, step=step) | ||
|
||
self.assertEqual(result, expected_result) | ||
|
||
def test_create_time_ranges_with_one_input(self): | ||
data = [77] | ||
step = 5 | ||
|
||
expected_result = [(77, 0)] | ||
|
||
result = create_time_ranges(data=data, step=step) | ||
|
||
self.assertEqual(result, expected_result) | ||
|
||
def test_create_time_ranges_with_wrong_input(self): | ||
data = "ABAP" | ||
step = 5 | ||
|
||
with self.assertRaises(TypeError): | ||
create_time_ranges(data=data, step=step) | ||
|
||
|
||
if __name__ == "__main__": | ||
unittest.main() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
it is missing the unit