Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: improve tests boot query peformance #281

Merged
merged 4 commits into from
Sep 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions backend/kernelCI_app/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
"tree/<str:commit_hash>/tests/", views.TreeTestsView.as_view(), name="treeTests"
),
path("build/<str:build_id>/status-count", views.BuildStatusCountView.as_view(), name="buildStatusCount"),
path("tree/<str:commit_hash>/full",
views.TreeDetailsSlow.as_view(), name="TreeDetailsSlow"),
WilsonNet marked this conversation as resolved.
Show resolved Hide resolved
path("build/<str:build_id>", views.BuildDetails.as_view(), name="buildDetails"),
path("build/<str:build_id>/tests", views.BuildTests.as_view(), name="buildTests"),
path("tests/test/<str:test_id>", views.TestDetails.as_view(), name="testDetails"),
Expand Down
45 changes: 43 additions & 2 deletions backend/kernelCI_app/utils.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,50 @@
import json
from typing import Union
from django.utils import timezone
from datetime import timedelta
import re

DEFAULT_QUERY_TIME_INTERVAL = {'days': 7}


def toIntOrDefault(value, default):
try:
return int(value)
except ValueError:
return default


def extract_platform(misc_environment: Union[str, dict, None]):
parsedEnvMisc = None
if isinstance(misc_environment, dict):
parsedEnvMisc = misc_environment
elif misc_environment is None:
return "unknown"
else:
parsedEnvMisc = json.loads(misc_environment)
platform = parsedEnvMisc.get("platform")
if platform:
return platform
print("unknown platform in misc_environment", misc_environment)
return "unknown"


# TODO misc is not stable and should be used as a POC only
def extract_error_message(misc: Union[str, dict, None]):
parsedEnv = None
if misc is None:
return "unknown error"
elif isinstance(misc, dict):
parsedEnv = misc
else:
parsedEnv = json.loads(misc)
error_message = parsedEnv.get("error_msg")
if error_message:
return error_message
print("unknown error_msg in misc", misc)
return "unknown error"


def getQueryTimeInterval(**kwargs):
if not kwargs:
return timezone.now() - timedelta(**DEFAULT_QUERY_TIME_INTERVAL)
Expand Down Expand Up @@ -73,11 +112,13 @@ def create_filters_from_req(self, request):

def add_filter(self, field, value, comparison_op):
self.validate_comparison_op(comparison_op)
self.filters.append({'field': field, 'value': value, 'comparison_op': comparison_op})
self.filters.append({'field': field, 'value': value,
'comparison_op': comparison_op})

def validate_comparison_op(self, op):
if op not in self.comparison_ops.keys():
raise InvalidComparisonOP(f'Filter with invalid comparison operator `{op}` found`')
raise InvalidComparisonOP(
f'Filter with invalid comparison operator `{op}` found`')

def get_comparison_op(self, filter, op_type='orm'):
idx = self.comparison_op_type_idx[op_type]
Expand Down
259 changes: 259 additions & 0 deletions backend/kernelCI_app/views/treeDetailsSlowView.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,259 @@
from django.http import JsonResponse, HttpResponseBadRequest
from django.views import View
from kernelCI_app.utils import (FilterParams, extract_error_message,
extract_platform, InvalidComparisonOP, getErrorResponseBody, toIntOrDefault)
from django.db import connection


class TreeDetailsSlow(View):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please add a httpie request so we can easily test it

def __processFilters(self, request):
filterTestStatus = ["FAIL", "MISS", "PASS", "DONE", "ERROR", "SKIP"]
filterTestDurationMin, filterTestDurationMax = None, None
filterBootStatus = ["FAIL", "MISS", "PASS", "DONE", "ERROR", "SKIP"]
filterBootDurationMin, filterBootDurationMax = None, None

try:
filter_params = FilterParams(request)
for f in filter_params.filters:
field = f["field"]
value = f["value"]
operation = f["comparison_op"]
if field == "boot.status":
filterBootStatus = value
elif field == "boot.duration":
if operation == "lte":
filterBootDurationMax = toIntOrDefault(value, None)
else:
filterBootDurationMin = toIntOrDefault(value, None)
if field == "test.status":
filterTestStatus = value
elif field == "test.duration":
if operation == "lte":
filterTestDurationMax = toIntOrDefault(value, None)
else:
filterTestDurationMin = toIntOrDefault(value, None)
except InvalidComparisonOP as e:
return HttpResponseBadRequest(getErrorResponseBody(str(e)))
return (filterTestStatus, filterTestDurationMin, filterTestDurationMax, filterBootStatus,
filterBootDurationMin, filterBootDurationMax)

def get(self, request, commit_hash: str | None):
Copy link
Collaborator

@WilsonNet WilsonNet Sep 12, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we are missing the filters, we want to get them in this PR or will it be a regression, we can do in a separated PR but targeting this PR and move this to draft if you think it is too much

origin_param = request.GET.get("origin")
git_url_param = request.GET.get("git_url")
git_branch_param = request.GET.get("git_branch")

(filterTestStatus, filterTestDurationMin, filterTestDurationMax, filterBootStatus,
filterBootDurationMin, filterBootDurationMax) = self.__processFilters(request)

query = """
SELECT
tests.build_id AS tests_build_id,
tests.id AS tests_id,
tests.origin AS tests_origin,
tests.environment_comment AS tests_enviroment_comment,
tests.environment_misc AS tests_enviroment_misc,
tests.path AS tests_path,
tests.comment AS tests_comment,
tests.log_url AS tests_log_url,
tests.status AS tests_status,
tests.waived AS tests_waived,
tests.start_time AS tests_start_time,
tests.duration AS tests_duration,
tests.number_value AS tests_number_value,
tests.misc AS tests_misc,
builds_filter.*
FROM
(
SELECT
builds.checkout_id AS builds_checkout_id,
builds.id AS builds_id,
builds.comment AS builds_comment,
builds.start_time AS builds_start_time,
builds.duration AS builds_duration,
builds.architecture AS builds_architecture,
builds.command AS builds_command,
builds.compiler AS builds_compiler,
builds.config_name AS builds_config_name,
builds.config_url AS builds_config_url,
builds.log_url AS builds_log_url,
builds.valid AS builds_valid,
tree_head.*
FROM
(
SELECT
checkouts.id AS checkout_id
FROM
checkouts
WHERE
checkouts.git_commit_hash = %(commit_hash)s AND
checkouts.git_repository_url = %(git_url_param)s AND
checkouts.git_repository_branch = %(git_branch_param)s AND
checkouts.origin = %(origin_param)s
) AS tree_head
LEFT JOIN builds
ON tree_head.checkout_id = builds.checkout_id
WHERE
builds.origin = %(origin_param)s
) AS builds_filter
LEFT JOIN tests
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

these joins can be inner joins because we are looking for tests, we don't have business with checkouts that does not have builds and builds that does not have tests

ON builds_filter.builds_id = tests.build_id
WHERE
tests.origin = %(origin_param)s
"""
with connection.cursor() as cursor:
cursor.execute(query, {
"commit_hash": commit_hash,
"origin_param": origin_param,
"git_url_param": git_url_param,
"git_branch_param": git_branch_param,
})
rows = cursor.fetchall()

testHistory = []
testStatusSummary = {}
testConfigs = {}
testPlatformsWithErrors = set()
testFailReasons = {}
testArchSummary = {}
bootHistory = []
bootStatusSummary = {}
bootConfigs = {}
bootPlatformsFailing = set()
bootFailReasons = {}
bootArchSummary = {}

tempColumnDict = {
"tests_id": 1,
"tests_origin": 2,
"tests_enviroment_comment": 3,
"tests_enviroment_misc": 4,
"tests_path": 5,
"tests_comment": 6,
"tests_log_url": 7,
"tests_status": 8,
"tests_waived": 9,
"tests_start_time": 10,
"tests_duration": 11,
"tests_number_value": 12,
"tests_misc": 13,
"builds_checkout_id": 14,
"builds_id": 15,
"builds_comment": 16,
"builds_start_time": 17,
"builds_duration": 18,
"builds_architecture": 19,
"builds_command": 20,
"builds_compiler": 21,
"builds_config_name": 22,
"builds_config_url": 23,
"builds_log_url": 24,
"builds_valid": 25,
}
t = tempColumnDict
for r in rows:
path = r[t["tests_path"]]
testId = r[t["tests_id"]]
testStatus = r[t["tests_status"]]
testDuration = r[t["tests_duration"]]
buildConfig = r[t["builds_config_name"]]
buildArch = r[t["builds_architecture"]]
buildCompiler = r[t["builds_compiler"]]

testPlatform = extract_platform(r[t["tests_enviroment_misc"]])
testError = extract_error_message(r[t["tests_misc"]])

# Test history for boot and non boot
historyItem = {
"id": testId,
"status": testStatus,
"path": path,
"duration": testDuration,
"startTime": r[t["tests_start_time"]],
}
if path.startswith("boot"):
if testStatus not in filterBootStatus:
continue
if (
filterBootDurationMax is not None
and (testStatus is None or toIntOrDefault(testStatus, 0) > filterBootDurationMax)
) and (
filterBootDurationMin is not None
and (testStatus is None or toIntOrDefault(testStatus, 0) < filterBootDurationMin)
):
continue

bootHistory.append(historyItem)
bootStatusSummary[testStatus] = bootStatusSummary.get(
testStatus, 0) + 1

archKey = "%s-%s" % (buildArch, buildCompiler)
archSummary = bootArchSummary.get(archKey, {
"arch": buildArch,
"compiler": buildCompiler,
"status": {}
})
archSummary["status"][testStatus] = archSummary["status"].get(
testStatus, 0) + 1
bootArchSummary[archKey] = archSummary

configSummary = bootConfigs.get(buildConfig, {})
configSummary[testStatus] = configSummary.get(
testStatus, 0) + 1
bootConfigs[buildConfig] = configSummary

if testStatus == "ERROR" or testStatus == "FAIL" or testStatus == "MISS":
bootPlatformsFailing.add(testPlatform)
bootFailReasons[testError] = bootFailReasons.get(
testError, 0) + 1
else:
if testStatus not in filterTestStatus:
continue
if (
filterTestDurationMax is not None
and (testStatus is None or toIntOrDefault(testStatus, 0) > filterTestDurationMax)
) and (
filterTestDurationMin is not None
and (testStatus is None or toIntOrDefault(testStatus, 0) < filterTestDurationMin)
):
continue
testHistory.append(historyItem)
testStatusSummary[testStatus] = testStatusSummary.get(
testStatus, 0) + 1

archKey = "%s-%s" % (buildArch, buildCompiler)
archSummary = testArchSummary.get(archKey, {
"arch": buildArch,
"compiler": buildCompiler,
"status": {}
})
archSummary["status"][testStatus] = archSummary["status"].get(
testStatus, 0) + 1
testArchSummary[archKey] = archSummary

configSummary = testConfigs.get(buildConfig, {})
configSummary[testStatus] = configSummary.get(
testStatus, 0) + 1
testConfigs[buildConfig] = configSummary

if testStatus == "ERROR" or testStatus == "FAIL" or testStatus == "MISS":
testPlatformsWithErrors.add(testPlatform)
testFailReasons[testError] = testFailReasons.get(
testError, 0) + 1

return JsonResponse(
{
"bootArchSummary": list(bootArchSummary.values()),
"testArchSummary": list(testArchSummary.values()),
"bootFailReasons": bootFailReasons,
"testFailReasons": testFailReasons,
"testPlatformsWithErrors": list(testPlatformsWithErrors),
"bootPlatformsFailing": list(bootPlatformsFailing),
"testConfigs": testConfigs,
"bootConfigs": bootConfigs,
"testStatusSummary": testStatusSummary,
"bootStatusSummary": bootStatusSummary,
"bootHistory": bootHistory,
"testHistory": testHistory
},
safe=False
)
Loading
Loading