Skip to content

Commit

Permalink
Corrected logic and naming around "build result activity".
Browse files Browse the repository at this point in the history
  • Loading branch information
AlexanderMakarov committed Dec 19, 2023
1 parent 96e3e46 commit 04d5c78
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 33 deletions.
9 changes: 8 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -211,8 +211,15 @@ Everything is configured in [config.py](/activity_merger/config/config.py).
- [x] Find default coeffs and intersects to BAFinder based on LogisticRegression. Prepare way to retrain it.
- [x] Shift "day border" to 5AM and make it configurable.
- [x] Add Google Calendar importer.
- [ ] Implement BIFinder to search Jira ID-s in Jira, Windows, IDEA, VSCode, Browser activities.
- [x] Implement BIFinder to search Jira ID-s in Jira, Windows, IDEA, VSCode, Browser activities.
- [ ] Make (JiraId)BIFinder to correct description for resulting activity.
- [ ] JiraIdBIFinder respect max duration.
- [ ] Fix Google Calendar importer authentication to work under Enterprise Workspace.
- [ ] Add into get_activities.py flag/logic to deploy only resulting "debug bucket".
- [ ] Add into get_activities.py ability to append result to Google Spreadsheet.
- [ ] Add into get_activities.py flag to print only result.
- [ ] Implement BIFinder to search "activities edges".
- [ ] Try to summarise result activity description with help of LLM.
- [ ] Find a way to mark some events as "source of description" and other as "source of interval".
Git/Jira events are good for description, Window/IDEA - for interval, Browser/Outlook - both.
- [ ] Add more features into FromCandidatesByLogisticRegressionBIFinder.
Expand Down
59 changes: 29 additions & 30 deletions activity_merger/domain/analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -453,14 +453,14 @@ def _extend_events_from_activitybs(events: List, activitybs: ActivityByStrategy,


def build_result_activity(
ba_interval: intervaltree.Interval,
basic_interval: intervaltree.Interval,
candidates_tree: intervaltree.IntervalTree,
is_only_good_strategies_for_description: bool,
metrics: Metrics,
) -> Activity:
"""
Builds "result" activity.
:param ba_interval: "Base" activity interval.
:param basic_interval: "Base" interval to build activity on.
:param candidates_tree: Tree of candidates to add parts of overlapped activity-by-strategy-es into
"result" activity.
:param is_only_good_strategies_for_description: Flat to use for "result" activity description
Expand All @@ -469,16 +469,16 @@ def build_result_activity(
:return: "Result" activity.
"""
# Find all overlapping activities, even including those which was used for "basic interval".
overlapping_intervals = candidates_tree.overlap(ba_interval.begin, ba_interval.end)
overlapping_intervals = candidates_tree.overlap(basic_interval.begin, basic_interval.end)
LOG.info("Basic interval is overlapped by %d 'candidate' activities.", len(overlapping_intervals))
ra_duration = ba_interval.length().total_seconds()
ra_duration = basic_interval.length().total_seconds()
ra_events = []
overlapping_activities: List[ActivityByStrategy] = []
for interval in overlapping_intervals:
activitybs: ActivityByStrategy = interval.data
boundaries = activitybs.strategy.in_trustable_boundaries
# 1. If BA overlaps activity completely then just concatenate data from it into BA.
if ba_interval.contains_interval(interval):
if basic_interval.contains_interval(interval):
ra_events.extend(activitybs.events) # All events in interval for sure.
overlapping_activities.append(activitybs)
metrics.incr("activities placed completely inside basic interval", interval.length().total_seconds())
Expand All @@ -496,15 +496,15 @@ def build_result_activity(
)
continue
# 2. Handle case when BA overlaps the start of the activity.
if ba_interval.contains_point(interval.begin):
if basic_interval.contains_point(interval.begin):
if boundaries == IntervalBoundaries.START:
# If activity is `in_trustable_boundaries=start` then concatenate data from it into BA.
_extend_events_from_activitybs(ra_events, activitybs, ba_interval)
_extend_events_from_activitybs(ra_events, activitybs, basic_interval)
overlapping_activities.append(activitybs)
metrics.incr("activities absorbed by basic interval at the start", interval.length().total_seconds())
elif boundaries == IntervalBoundaries.DIM:
# Split activity and concatenate last part with BA. First part is not needed anyway.
split_activity = _cut_activity_end(activitybs, ba_interval.end)
split_activity = _cut_activity_end(activitybs, basic_interval.end)
ra_events.extend(split_activity.events) # All events in interval for sure.
overlapping_activities.append(split_activity)
metrics.incr("activities enhancing basic interval by the start", split_activity.duration())
Expand All @@ -515,16 +515,16 @@ def build_result_activity(
)
continue
# 3. Handle case when BA overlaps the end of activity.
if ba_interval.contains_point(interval.end):
if basic_interval.contains_point(interval.end):
if boundaries == IntervalBoundaries.END:
# If activity is `in_trustable_boundaries=end` then concatenate data from it into BA.
_extend_events_from_activitybs(ra_events, activitybs, ba_interval)
_extend_events_from_activitybs(ra_events, activitybs, basic_interval)
overlapping_activities.append(activitybs)
metrics.incr("activities absorbed by basic interval at the end", interval.length().total_seconds())
elif boundaries == IntervalBoundaries.DIM:
# If activity is `in_trustable_boundaries=whole` then split activity,
# and concatenate first part with BA.
split_activity = _cut_activity_start(activitybs, ba_interval.begin)
split_activity = _cut_activity_start(activitybs, basic_interval.begin)
ra_events.extend(split_activity.events) # All events in interval for sure.
overlapping_activities.append(split_activity)
metrics.incr("activities enhancing basic interval by the end", split_activity.duration())
Expand All @@ -536,8 +536,8 @@ def build_result_activity(
continue
# 4 Handle case when BA itself is placed inside actvity.
if boundaries == IntervalBoundaries.DIM:
tmp = _cut_activity_start(activitybs, ba_interval.begin)
tmp = _cut_activity_end(tmp, ba_interval.end)
tmp = _cut_activity_start(activitybs, basic_interval.begin)
tmp = _cut_activity_end(tmp, basic_interval.end)
ra_events.extend(tmp.events) # All events in interval for sure.
overlapping_activities.append(tmp)
metrics.incr("activities enhancing basic interval by the middle", ra_duration)
Expand All @@ -550,8 +550,8 @@ def build_result_activity(
name = _build_activity_name(overlapping_activities, metrics, ra_duration, is_only_good_strategies_for_description)
metrics.incr("result activities", ra_duration)
return Activity(
ba_interval.begin,
ba_interval.end,
basic_interval.begin,
basic_interval.end,
ra_events,
name,
)
Expand Down Expand Up @@ -894,22 +894,21 @@ def convert_basic_interval_to_ra(
"base intervals without other candidates on interval",
(bi_interval.end - bi_interval.begin).total_seconds(),
)
# Find all overlapping activities and make new `result` activity (RA).
ra = build_result_activity(bi_interval, candidates_tree, self.is_only_good_strategies_for_description, metrics)
# Check RA doesn't overlaps with existing result activities at the end.
result_tree_overlapped_with_ra_end: Set[intervaltree.Interval] = result_tree.at(ra.end_time)
# If we had interval in `result_tree` when added RA then we need to search next gap.
# Note that `result_tree_overlapped_with_ra_end` may contain few intervals not in order.
for existing_interval in result_tree_overlapped_with_ra_end:
if existing_interval.begin < ra.end_time:
# Chop end of resulting activity if it overlaps with already existing iterval in `result_tree`.
ra.end_time = existing_interval.begin
ra.events = [x for x in ra.events if x.timestamp < ra.end_time]
metrics.incr(
"result activities shrinked because it overlaps by end with alredy existing",
(ra.end_time - existing_interval.begin).total_seconds(),
)
# Add RA into the result tree.
result_tree_overlapped_with_ra_end: Set[intervaltree.Interval] = result_tree.at(bi_interval.end)
if result_tree_overlapped_with_ra_end:
# If we had interval in `result_tree` when added RA then we need to search next gap.
# Note that `result_tree_overlapped_with_ra_end` may contain few intervals not in order.
for existing_interval in result_tree_overlapped_with_ra_end:
if existing_interval.begin < bi_interval.end:
# Chop end of resulting activity if it overlaps with already existing iterval in `result_tree`.
bi_interval.end = existing_interval.begin
metrics.incr(
"base intervals shrinked because overlap by end with result activities",
(bi_interval.end - existing_interval.begin).total_seconds(),
)
# Make new `result` activity (RA) and add into the result tree.
ra = build_result_activity(bi_interval, candidates_tree, self.is_only_good_strategies_for_description, metrics)
add_activity_to_result_tree(
ra=ra,
result_tree=result_tree,
Expand Down
4 changes: 2 additions & 2 deletions activity_merger/domain/output_entities.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,10 +80,10 @@ def to_str(
# Print resulting activities as is. Order is important here.
activities_string = "\n ".join(str(x) for x in self.activities)
total_duration = sum((x.duration for x in self.activities), start=datetime.timedelta()).total_seconds()
desc += "Assembled %d activities on %s:\n %s" % (
desc += "Assembled activities:\n %s\n---- Total %d activities on %s. ----" % (
activities_string,
len(self.activities),
seconds_to_timedelta(total_duration),
activities_string,
)
return desc

Expand Down

0 comments on commit 04d5c78

Please sign in to comment.