Skip to content

feat: Backend for SDK metrics #5623

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
210 changes: 121 additions & 89 deletions api/app_analytics/analytics_db_service.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
from datetime import date, datetime, timedelta
from logging import getLogger
from typing import TYPE_CHECKING, List

from dateutil.relativedelta import relativedelta
from django.conf import settings
from django.db.models import Sum
from django.db.models import Q, Sum
from django.utils import timezone
from rest_framework.exceptions import NotFound

from app_analytics import constants
from app_analytics.dataclasses import FeatureEvaluationData, UsageData
from app_analytics.influxdb_wrapper import (
get_events_for_organisation,
Expand All @@ -17,86 +19,57 @@
from app_analytics.influxdb_wrapper import (
get_usage_data as get_usage_data_from_influxdb,
)
from app_analytics.mappers import map_annotated_api_usage_buckets_to_usage_data
from app_analytics.models import (
APIUsageBucket,
FeatureEvaluationBucket,
Resource,
)
from app_analytics.types import Labels, PeriodType
from environments.models import Environment
from features.models import Feature
from organisations.models import Organisation
from organisations.models import Organisation, OrganisationSubscriptionInformationCache

from . import constants
from .types import PERIOD_TYPE
logger = getLogger(__name__)


def get_usage_data(
organisation: Organisation,
environment_id: int | None = None,
project_id: int | None = None,
period: PERIOD_TYPE | None = None,
period: PeriodType | None = None,
labels_filter: Labels | None = None,
) -> list[UsageData]:
now = timezone.now()
date_start = date_stop = None
sub_cache = getattr(organisation, "subscription_information_cache", None)
sub_cache = OrganisationSubscriptionInformationCache.objects.filter(
organisation=organisation
).first()

is_subscription_valid = (
sub_cache is not None and sub_cache.is_billing_terms_dates_set()
date_start, date_stop = _get_start_date_and_stop_date_for_subscribed_organisation(
sub_cache=sub_cache,
period=period,
)

if period in (constants.CURRENT_BILLING_PERIOD, constants.PREVIOUS_BILLING_PERIOD):
if not is_subscription_valid:
raise NotFound("No billing periods found for this organisation.")

if TYPE_CHECKING:
assert sub_cache is not None

match period:
case constants.CURRENT_BILLING_PERIOD:
starts_at = sub_cache.current_billing_term_starts_at or now - timedelta(
days=30
)
month_delta = relativedelta(now, starts_at).months
date_start = relativedelta(months=month_delta) + starts_at
date_stop = now

case constants.PREVIOUS_BILLING_PERIOD:
starts_at = sub_cache.current_billing_term_starts_at or now - timedelta(
days=30
)
month_delta = relativedelta(now, starts_at).months - 1
month_delta += relativedelta(now, starts_at).years * 12
date_start = relativedelta(months=month_delta) + starts_at
date_stop = relativedelta(months=month_delta + 1) + starts_at
case constants.NINETY_DAY_PERIOD:
date_start = now - relativedelta(days=90)
date_stop = now

if settings.USE_POSTGRES_FOR_ANALYTICS:
kwargs = {
"organisation": organisation,
"environment_id": environment_id,
"project_id": project_id,
}
if date_start:
assert date_stop
kwargs["date_start"] = date_start # type: ignore[assignment]
kwargs["date_stop"] = date_stop # type: ignore[assignment]

return get_usage_data_from_local_db(**kwargs) # type: ignore[arg-type]

kwargs = {
"organisation_id": organisation.id,
"environment_id": environment_id,
"project_id": project_id,
}
return get_usage_data_from_local_db(
organisation=organisation,
environment_id=environment_id,
project_id=project_id,
date_start=date_start,
date_stop=date_stop,
labels_filter=labels_filter,
)

if date_start:
assert date_stop
kwargs["date_start"] = date_start # type: ignore[assignment]
kwargs["date_stop"] = date_stop # type: ignore[assignment]
if settings.INFLUXDB_TOKEN:
return get_usage_data_from_influxdb(
organisation_id=organisation.id,
environment_id=environment_id,
project_id=project_id,
date_start=date_start,
date_stop=date_stop,
labels_filter=labels_filter,
)

return get_usage_data_from_influxdb(**kwargs) # type: ignore[arg-type]
logger.warning(constants.NO_ANALYTICS_DATABASE_CONFIGURED_WARNING)
return []


def get_usage_data_from_local_db(
Expand All @@ -105,6 +78,7 @@ def get_usage_data_from_local_db(
project_id: int | None = None,
date_start: datetime | None = None,
date_stop: datetime | None = None,
labels_filter: Labels | None = None,
) -> List[UsageData]:
if date_start is None:
date_start = timezone.now() - timedelta(days=30)
Expand All @@ -127,28 +101,20 @@ def get_usage_data_from_local_db(
if environment_id:
qs = qs.filter(environment_id=environment_id)

if labels_filter:
qs = qs.filter(labels__contains=labels_filter)

qs = (
qs.filter( # type: ignore[assignment]
created_at__date__lte=date_stop,
created_at__date__gt=date_start,
)
.order_by("created_at__date")
.values("created_at__date", "resource")
.values("created_at__date", "resource", "labels")
.annotate(count=Sum("total_count"))
)
data_by_day = {}
for row in qs: # TODO Write proper mappers for this?
day = row["created_at__date"]
if day not in data_by_day:
data_by_day[day] = UsageData(day=day)
if column_name := Resource(row["resource"]).column_name:
setattr(
data_by_day[day],
column_name,
row["count"],
)

return data_by_day.values() # type: ignore[return-value]
return map_annotated_api_usage_buckets_to_usage_data(qs)


def get_total_events_count(organisation) -> int: # type: ignore[no-untyped-def]
Expand All @@ -168,30 +134,50 @@ def get_total_events_count(organisation) -> int: # type: ignore[no-untyped-def]


def get_feature_evaluation_data(
feature: Feature, environment_id: int, period: int = 30
feature: Feature,
environment_id: int,
period: int = 30,
labels_filter: Labels | None = None,
) -> List[FeatureEvaluationData]:
if settings.USE_POSTGRES_FOR_ANALYTICS:
return get_feature_evaluation_data_from_local_db(
feature, environment_id, period
feature=feature,
environment_id=environment_id,
period=period,
labels_filter=labels_filter,
)

if settings.INFLUXDB_TOKEN:
return get_feature_evaluation_data_from_influxdb(
feature_name=feature.name,
environment_id=environment_id,
period=f"{period}d",
labels_filter=labels_filter,
)
return get_feature_evaluation_data_from_influxdb(
feature_name=feature.name, environment_id=environment_id, period=f"{period}d"
)

logger.warning(constants.NO_ANALYTICS_DATABASE_CONFIGURED_WARNING)
return []


def get_feature_evaluation_data_from_local_db(
feature: Feature, environment_id: int, period: int = 30
feature: Feature,
environment_id: int,
period: int = 30,
labels_filter: Labels | None = None,
) -> List[FeatureEvaluationData]:
filter = Q(
environment_id=environment_id,
bucket_size=constants.ANALYTICS_READ_BUCKET_SIZE,
feature_name=feature.name,
created_at__date__lte=timezone.now(),
created_at__date__gt=timezone.now() - timedelta(days=period),
)
if labels_filter:
filter &= Q(labels__contains=labels_filter)
feature_evaluation_data = (
FeatureEvaluationBucket.objects.filter(
environment_id=environment_id,
bucket_size=constants.ANALYTICS_READ_BUCKET_SIZE,
feature_name=feature.name,
created_at__date__lte=timezone.now(),
created_at__date__gt=timezone.now() - timedelta(days=period),
)
FeatureEvaluationBucket.objects.filter(filter)
.order_by("created_at__date")
.values("created_at__date", "feature_name", "environment_id")
.values("created_at__date", "feature_name", "environment_id", "labels")
.annotate(count=Sum("total_count"))
)
usage_list = []
Expand All @@ -200,15 +186,61 @@ def get_feature_evaluation_data_from_local_db(
FeatureEvaluationData(
day=data["created_at__date"],
count=data["count"],
labels=data["labels"],
)
)
return usage_list


def _get_environment_ids_for_org(organisation) -> List[int]: # type: ignore[no-untyped-def]
def _get_environment_ids_for_org(organisation: Organisation) -> list[int]:
# We need to do this to prevent Django from generating a query that
# references the environments and projects tables,
# as they do not exist in the analytics database.
return [
e.id for e in Environment.objects.filter(project__organisation=organisation)
]


def _get_start_date_and_stop_date_for_subscribed_organisation(
sub_cache: OrganisationSubscriptionInformationCache | None,
period: PeriodType | None = None,
) -> tuple[datetime | None, datetime | None]:
"""
Populate start and stop date for the given period
from the organisation's subscription information.
"""
if period in {constants.CURRENT_BILLING_PERIOD, constants.PREVIOUS_BILLING_PERIOD}:
if not (sub_cache and sub_cache.is_billing_terms_dates_set()):
raise NotFound("No billing periods found for this organisation.")

if TYPE_CHECKING:
assert sub_cache
Comment on lines +212 to +217
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Context aside, this feels a bit fragile. Can we make sure sub_cache is not None before this assert statement?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We actually are doing it by raising NotFound, but mypy is too dumb to understand that.

Copy link
Contributor

@emyller emyller Jun 24, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It raises only if period is found in that set, + if sub_cache.is_billing_terms_dates_set() evaluates to true, which from here feels like sub_cache could still be None at L216. Perhaps tweaking the logical expression could help mypy? This is a nit unless there's actual risk of an type/attribute error.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nope, it raises if sub_cache is None. (sub_cache and sub_cache.is_billing_terms_dates_set()) evaluates to None, and not (sub_cache and sub_cache.is_billing_terms_dates_set()) evaluates to False.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, I'm slow today — will fix


now = timezone.now()

match period:
case constants.CURRENT_BILLING_PERIOD:
starts_at = sub_cache.current_billing_term_starts_at or now - timedelta(
days=30
)
month_delta = relativedelta(now, starts_at).months
date_start = relativedelta(months=month_delta) + starts_at
date_stop = now

case constants.PREVIOUS_BILLING_PERIOD:
starts_at = sub_cache.current_billing_term_starts_at or now - timedelta(
days=30
)
month_delta = relativedelta(now, starts_at).months - 1
month_delta += relativedelta(now, starts_at).years * 12
date_start = relativedelta(months=month_delta) + starts_at
date_stop = relativedelta(months=month_delta + 1) + starts_at

case constants.NINETY_DAY_PERIOD:
date_start = now - relativedelta(days=90)
date_stop = now

case _:
return None, None

return date_start, date_stop
Loading