Skip to content

Commit

Permalink
Add usage related stuff + fix popularity related stuff
Browse files Browse the repository at this point in the history
  • Loading branch information
benoit74 committed Nov 10, 2023
1 parent d2911f7 commit 40bfd62
Show file tree
Hide file tree
Showing 24 changed files with 1,005 additions and 237 deletions.
3 changes: 2 additions & 1 deletion backend/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -221,9 +221,10 @@ omit = [
"src/offspot_metrics_backend/__about__.py",
"src/offspot_metrics_backend/main.py", # glue logic
"src/offspot_metrics_backend/business/processor.py", # glue logic
"src/offspot_metrics_backend/business/indicators/content_visit.py", # straightforward implementation, pointless to test
"src/offspot_metrics_backend/business/indicators/package.py", # straightforward implementation, pointless to test
"src/offspot_metrics_backend/business/indicators/shared_files.py", # straightforward implementation, pointless to test
"src/offspot_metrics_backend/business/indicators/uptime.py", # straightforward implementation, pointless to test
"src/offspot_metrics_backend/business/indicators/total_usage.py", # straightforward implementation, pointless to test
"src/offspot_metrics_backend/simulator.py", # developer tool
]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from pydantic import BaseModel, Field, ValidationError

from offspot_metrics_backend.business.input_generator import (
CommonInputGenerator,
EdupiInputGenerator,
FilesInputGenerator,
InputGenerator,
Expand Down Expand Up @@ -54,15 +55,23 @@ def __init__(self, config: ReverseProxyConfig) -> None:
self.generators: list[InputGenerator] = []
for file in config.files:
self.generators.append(
FilesInputGenerator(host=file.host, title=file.title)
FilesInputGenerator(host=file.host, package_title=file.title)
)
for zim in config.zims:
self.generators.append(
ZimInputGenerator(host=zim.host, zim_name=zim.zim_name, title=zim.title)
ZimInputGenerator(
host=zim.host, zim_name=zim.zim_name, package_title=zim.title
)
)
for app in config.apps:
if app.ident == "edupi.offspot.kiwix.org":
self.generators.append(EdupiInputGenerator(host=app.host))
self.generators.append(
EdupiInputGenerator(host=app.host, package_title=app.title),
)
else:
self.generators.append(
CommonInputGenerator(host=app.host, package_title=app.title),
)

def process(self, line: str) -> ProcessingResult:
"""Transform one Caddy log line into corresponding inputs"""
Expand Down
10 changes: 10 additions & 0 deletions backend/src/offspot_metrics_backend/business/exceptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
class TooWideUsageError(Exception):
"""Exception raised when usage range is wider than expected number of minutes"""

pass


class WrongInputTypeError(Exception):
"""Exception raised when input received does not match expectations"""

pass
Original file line number Diff line number Diff line change
Expand Up @@ -90,4 +90,4 @@ def process_input(self, input_: Input) -> None:
if not self.can_process_input(input_):
return
record = self.get_or_create_recorder(input_)
record.process_input(input_)
record.process_input(input_=input_)
Original file line number Diff line number Diff line change
Expand Up @@ -6,42 +6,42 @@
IntCounterRecorder,
Recorder,
)
from offspot_metrics_backend.business.inputs.content_visit import (
ContentHomeVisit as ContentHomeVisitInput,
from offspot_metrics_backend.business.inputs.input import Input
from offspot_metrics_backend.business.inputs.package import (
PackageHomeVisit as PackageHomeVisitInput,
)
from offspot_metrics_backend.business.inputs.content_visit import (
ContentItemVisit as ContentItemVisitInput,
from offspot_metrics_backend.business.inputs.package import (
PackageItemVisit as PackageItemVisitInput,
)
from offspot_metrics_backend.business.inputs.input import Input


class ContentHomeVisit(Indicator):
"""An indicator counting number of visit of a given content home page"""
class PackageHomeVisit(Indicator):
"""An indicator counting number of visit of a given package home page"""

unique_id = 1001

def can_process_input(self, input_: Input) -> bool:
return isinstance(input_, ContentHomeVisitInput)
return isinstance(input_, PackageHomeVisitInput)

def get_new_recorder(self) -> Recorder:
return IntCounterRecorder()

def get_dimensions_values(self, input_: Input) -> DimensionsValues:
input_ = cast(ContentHomeVisitInput, input_)
return DimensionsValues(input_.content, None, None)
input_ = cast(PackageHomeVisitInput, input_)
return DimensionsValues(input_.package_title, None, None)


class ContentItemVisit(Indicator):
"""An indicator counting number of visit of a given content object"""
class PackageItemVisit(Indicator):
"""An indicator counting number of visit of a given package item"""

unique_id = 1002

def can_process_input(self, input_: Input) -> bool:
return isinstance(input_, ContentItemVisitInput)
return isinstance(input_, PackageItemVisitInput)

def get_new_recorder(self) -> Recorder:
return IntCounterRecorder()

def get_dimensions_values(self, input_: Input) -> DimensionsValues:
input_ = cast(ContentItemVisitInput, input_)
return DimensionsValues(input_.content, input_.item, None)
input_ = cast(PackageItemVisitInput, input_)
return DimensionsValues(input_.package_title, input_.item_path, None)
Original file line number Diff line number Diff line change
@@ -1,6 +1,16 @@
import abc
import datetime
import math

from offspot_metrics_backend.business.inputs.input import Input
from offspot_metrics_backend.business.exceptions import (
TooWideUsageError,
WrongInputTypeError,
)
from offspot_metrics_backend.business.inputs.input import Input, TimedInput


def _get_isoformat_from_minutes(minutes: int) -> str:
return datetime.datetime.fromtimestamp(minutes * 60).isoformat()


class Recorder(abc.ABC):
Expand Down Expand Up @@ -53,5 +63,84 @@ def state(self) -> str:
return f"{self.counter}"

def restore_state(self, value: str):
"""Return a serialized representation of recorder internal state"""
"""Restore the recorder internal state from its serialized representation"""
self.counter = int(value)


class UsageRecorder(Recorder):
"""Recorder counting the number of minutes of activity using slots
If any input is received during a slot (10 minutes), the slot is marked as active.
The final value is the count of all active slots multiplied by the slot duration.
"""

slot_duration = 10
max_active_intervals = 6
max_time_range = 62

def __init__(self) -> None:
self.active_interval_starts: list[int] = []

def process_input(
self,
input_: Input,
) -> None:
"""Processing an input consists in updating active starts list"""
if not isinstance(input_, TimedInput):
raise WrongInputTypeError(
f"{UsageRecorder.__name__} recorder can only process "
f"{TimedInput.__name__} inputs"
)
active_minute = math.floor(input_.ts.timestamp() / 60)

if not self.active_interval_starts:
# If there are no active intervals yet, use the current minute
active_interval_start = active_minute
else:
# Check for TooWideUsageError
time_range = active_minute - self.active_interval_starts[0]
if time_range > self.max_time_range:
raise TooWideUsageError(
f"Time range is too big ({time_range} mins from"
f" {_get_isoformat_from_minutes(self.active_interval_starts[0])} to"
f" {_get_isoformat_from_minutes(active_minute)})"
)

if time_range > self.max_active_intervals * self.slot_duration:
# When input happened just after the max active intervals, calculate
# the start time of the last interval aligned to slot_duration
active_interval_start = self.active_interval_starts[
0
] + self.slot_duration * (self.max_active_intervals - 1)
else:
# Calculate the start time of the next interval aligned to
# slot_duration
active_interval_start = (
active_minute
- (active_minute - self.active_interval_starts[0])
% self.slot_duration
)

# Check if the interval already exists in the list
if not any(
interval_start <= active_minute <= interval_start + self.slot_duration
for interval_start in self.active_interval_starts
):
# Add the start time of the new interval
self.active_interval_starts.append(active_interval_start)

@property
def value(self) -> int:
"""Retrieving the value consists in counting active slots"""

total_active_time = sum(self.slot_duration for _ in self.active_interval_starts)
return min(total_active_time, self.max_active_intervals * self.slot_duration)

@property
def state(self) -> str:
"""Return a serialized representation of recorder internal state"""
return f"{','.join([str(start) for start in self.active_interval_starts])}"

def restore_state(self, value: str):
"""Restore the recorder internal state from its serialized representation"""
self.active_interval_starts = [int(start) for start in value.split(",")]
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
from typing import cast

from offspot_metrics_backend.business.indicators.dimensions import DimensionsValues
from offspot_metrics_backend.business.indicators.indicator import Indicator
from offspot_metrics_backend.business.indicators.recorder import (
Recorder,
UsageRecorder,
)
from offspot_metrics_backend.business.inputs.input import Input
from offspot_metrics_backend.business.inputs.package import PackageRequest


class TotalUsageOverall(Indicator):
"""An indicator counting usage activity on all packages"""

unique_id = 1005

def can_process_input(self, input_: Input) -> bool:
return isinstance(input_, PackageRequest)

def get_new_recorder(self) -> Recorder:
return UsageRecorder()

def get_dimensions_values(self, input_: Input) -> DimensionsValues: # noqa: ARG002
return DimensionsValues(None, None, None)


class TotalUsageByPackage(Indicator):
"""An indicator counting usage activity by packages"""

unique_id = 1006

def can_process_input(self, input_: Input) -> bool:
return isinstance(input_, PackageRequest)

def get_new_recorder(self) -> Recorder:
return UsageRecorder()

def get_dimensions_values(self, input_: Input) -> DimensionsValues:
input_ = cast(PackageRequest, input_)
return DimensionsValues(input_.package_title, None, None)
75 changes: 52 additions & 23 deletions backend/src/offspot_metrics_backend/business/input_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,12 @@

from pydantic.dataclasses import dataclass

from offspot_metrics_backend.business.inputs.content_visit import (
ContentHomeVisit,
ContentItemVisit,
)
from offspot_metrics_backend.business.inputs.input import Input
from offspot_metrics_backend.business.inputs.package import (
PackageHomeVisit,
PackageItemVisit,
PackageRequest,
)
from offspot_metrics_backend.business.inputs.shared_files import (
SharedFilesOperation,
SharedFilesOperationKind,
Expand Down Expand Up @@ -38,7 +39,7 @@ class ZimInputGenerator(InputGenerator):
"""A generator for zim packages"""

zim_name: str
title: str
package_title: str

zim_re = re.compile(r"^/content/(?P<zim_name>.+?)(?P<zim_path>/.*)?$")

Expand All @@ -55,53 +56,81 @@ def process(self, log: LogData) -> list[Input]:
return []

if zim_path is None or zim_path == "/":
return [ContentHomeVisit(content=self.title)]
else:
if log.content_type is None:
return []
return [
PackageHomeVisit(package_title=self.package_title),
PackageRequest(ts=log.ts, package_title=self.package_title),
]

if (
"html" in log.content_type
or "epub" in log.content_type
or "pdf" in log.content_type
):
return [ContentItemVisit(content=self.title, item=zim_path)]
else:
return []
if log.content_type is None:
return [PackageRequest(ts=log.ts, package_title=self.package_title)]

if (
"html" in log.content_type
or "epub" in log.content_type
or "pdf" in log.content_type
):
return [
PackageRequest(ts=log.ts, package_title=self.package_title),
PackageItemVisit(package_title=self.package_title, item_path=zim_path),
]
else:
return [PackageRequest(ts=log.ts, package_title=self.package_title)]


@dataclass
class EdupiInputGenerator(InputGenerator):
"""A specific generator for edupi package"""

package_title: str

def process(self, log: LogData) -> list[Input]:
"""Transform one log event identified as edupi into inputs"""
if (
log.method == "POST"
and log.status == HTTPStatus.CREATED
and log.uri == "/api/documents/"
):
return [SharedFilesOperation(kind=SharedFilesOperationKind.FILE_CREATED)]
return [
PackageRequest(ts=log.ts, package_title=self.package_title),
SharedFilesOperation(kind=SharedFilesOperationKind.FILE_CREATED),
]
elif (
log.method == "DELETE"
and log.status == HTTPStatus.NO_CONTENT
and log.uri.startswith("/api/documents/")
and len(log.uri) > len("/api/documents/")
):
return [SharedFilesOperation(kind=SharedFilesOperationKind.FILE_DELETED)]
return [
PackageRequest(ts=log.ts, package_title=self.package_title),
SharedFilesOperation(kind=SharedFilesOperationKind.FILE_DELETED),
]
else:
return []
return [PackageRequest(ts=log.ts, package_title=self.package_title)]


@dataclass
class FilesInputGenerator(InputGenerator):
"""A generator for file packages"""

title: str
package_title: str

def process(self, log: LogData) -> list[Input]:
"""Process a given log line and generate corresponding inputs"""
if log.uri == "/":
return [ContentHomeVisit(content=self.title)]
return [
PackageRequest(ts=log.ts, package_title=self.package_title),
PackageHomeVisit(package_title=self.package_title),
]
else:
return []
return [PackageRequest(ts=log.ts, package_title=self.package_title)]


@dataclass
class CommonInputGenerator(InputGenerator):
"""A generator for common inputs (not specific to package type)"""

package_title: str

def process(self, log: LogData) -> list[Input]:
"""Process a given log line and generate corresponding inputs"""
return [PackageRequest(ts=log.ts, package_title=self.package_title)]
Loading

0 comments on commit 40bfd62

Please sign in to comment.