Skip to content

Commit

Permalink
Bundle Analysis: Add asset comparison and fetching PR files (#480)
Browse files Browse the repository at this point in the history
* initial commit

* add unit tests

* add in match by uuid and tests
  • Loading branch information
JerrySentry authored Jan 22, 2025
1 parent 09d69db commit e854f50
Show file tree
Hide file tree
Showing 4 changed files with 506 additions and 66 deletions.
114 changes: 82 additions & 32 deletions shared/bundle_analysis/comparison.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
BundleAnalysisReport,
BundleReport,
BundleRouteReport,
ModuleReport,
)
from shared.bundle_analysis.storage import BundleAnalysisReportLoader
from shared.django_apps.core.models import Repository
Expand Down Expand Up @@ -77,11 +78,66 @@ class AssetChange(BaseChange):
"""

asset_name: str
percentage_delta: float
size_base: int
size_head: int


AssetMatch = Tuple[Optional[AssetReport], Optional[AssetReport]]


class AssetComparison:
def __init__(
self,
base_asset_report: Optional[AssetReport] = None,
head_asset_report: Optional[AssetReport] = None,
):
self.base_asset_report = base_asset_report
self.head_asset_report = head_asset_report

@sentry_sdk.trace
def asset_change(self) -> AssetChange:
if self.base_asset_report is None:
return AssetChange(
asset_name=self.head_asset_report.name,
change_type=AssetChange.ChangeType.ADDED,
size_delta=self.head_asset_report.size,
percentage_delta=100,
size_base=0,
size_head=self.head_asset_report.size,
)
elif self.head_asset_report is None:
return AssetChange(
asset_name=self.base_asset_report.name,
change_type=AssetChange.ChangeType.REMOVED,
size_delta=-self.base_asset_report.size,
percentage_delta=-100.0,
size_base=self.base_asset_report.size,
size_head=0,
)
else:
size_delta = self.head_asset_report.size - self.base_asset_report.size
percentage_delta = round(
(size_delta / self.base_asset_report.size) * 100, 2
)
return AssetChange(
asset_name=self.head_asset_report.name,
change_type=AssetChange.ChangeType.CHANGED,
size_delta=size_delta,
percentage_delta=percentage_delta,
size_base=self.base_asset_report.size,
size_head=self.head_asset_report.size,
)

def contributing_modules(
self, pr_changed_files: Optional[List[str]] = None
) -> List[ModuleReport]:
asset_report = self.head_asset_report
if asset_report is None:
return []
return asset_report.modules(pr_changed_files)


class BundleComparison:
def __init__(
self, base_bundle_report: BundleReport, head_bundle_report: BundleReport
Expand All @@ -95,7 +151,7 @@ def total_size_delta(self) -> int:
return head_size - base_size

@sentry_sdk.trace
def asset_changes(self) -> List[AssetChange]:
def asset_comparisons(self) -> List[AssetComparison]:
# this groups assets by name
# there can be multiple assets with the same name and we
# need to try and match them across base and head reports
Expand All @@ -119,29 +175,10 @@ def asset_changes(self) -> List[AssetChange]:
if asset_name not in asset_names:
matches += self._match_assets(asset_reports, [])

changes = []
for base_asset_report, head_asset_report in matches:
if base_asset_report is None:
change = AssetChange(
asset_name=head_asset_report.name,
change_type=AssetChange.ChangeType.ADDED,
size_delta=head_asset_report.size,
)
elif head_asset_report is None:
change = AssetChange(
asset_name=base_asset_report.name,
change_type=AssetChange.ChangeType.REMOVED,
size_delta=-base_asset_report.size,
)
else:
change = AssetChange(
asset_name=head_asset_report.name,
change_type=AssetChange.ChangeType.CHANGED,
size_delta=head_asset_report.size - base_asset_report.size,
)
changes.append(change)

return changes
return [
AssetComparison(base_asset_report, head_asset_report)
for base_asset_report, head_asset_report in matches
]

def _match_assets(
self,
Expand All @@ -153,9 +190,11 @@ def _match_assets(
This method attempts to pick the most likely matching of assets between
base and head (so as to track their changes through time).
The current approach is fairly naive and just picks the asset with the
closest size. There are probably better ways of doing this that we can
improve upon in the future.
Current approach:
1. Pick asset with the same UUID. This means the base and head assets have either of:
- same hashed name
- same modules by name
2. Pick asset with the closest size
"""
n = max([len(base_asset_reports), len(head_asset_reports)])
matches: List[AssetMatch] = []
Expand All @@ -169,13 +208,24 @@ def _match_assets(
# no more base assets to match against
matches.append((None, head_asset_report))
else:
# try and find the most "similar" base asset
size_deltas = {
abs(head_asset_report.size - base_bundle.size): base_bundle
# 1. Pick asset with the same UUID
base_asset_report_uuids = {
base_bundle.uuid: base_bundle
for base_bundle in base_asset_reports
}
min_delta = min(size_deltas.keys())
base_asset_report = size_deltas[min_delta]
if head_asset_report.uuid in base_asset_report_uuids:
base_asset_report = base_asset_report_uuids[
head_asset_report.uuid
]

# 2. Pick asset with the closest size
else:
size_deltas = {
abs(head_asset_report.size - base_bundle.size): base_bundle
for base_bundle in base_asset_reports
}
min_delta = min(size_deltas.keys())
base_asset_report = size_deltas[min_delta]

matches.append((base_asset_report, head_asset_report))
base_asset_reports.remove(base_asset_report)
Expand Down
11 changes: 11 additions & 0 deletions shared/bundle_analysis/parsers/v3.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

import ijson
import sentry_sdk
from sqlalchemy import tuple_
from sqlalchemy.orm import Session as DbSession
from sqlalchemy.orm.exc import MultipleResultsFound, NoResultFound

Expand Down Expand Up @@ -160,6 +161,16 @@ def parse(self, path: str) -> Tuple[int, str]:
# but first we need to find the Asset by the hashed file name
dynamic_imports_list = self._parse_dynamic_imports()
if dynamic_imports_list:
self.db_session.execute(
DynamicImport.__table__.delete().where(
tuple_(DynamicImport.chunk_id, DynamicImport.asset_id).in_(
[
(item["chunk_id"], item["asset_id"])
for item in dynamic_imports_list
]
)
)
)
insert_dynamic_imports = DynamicImport.__table__.insert().values(
dynamic_imports_list
)
Expand Down
20 changes: 16 additions & 4 deletions shared/bundle_analysis/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,16 +92,28 @@ def uuid(self) -> str:
def asset_type(self) -> AssetType:
return self.asset.asset_type

def modules(self) -> List[ModuleReport]:
def modules(
self, pr_changed_files: Optional[List[str]] = None
) -> List[ModuleReport]:
with get_db_session(self.db_path) as session:
modules = (
query = (
session.query(Module)
.join(Module.chunks)
.join(Chunk.assets)
.filter(Asset.id == self.asset.id)
.all()
)
return [ModuleReport(self.db_path, module) for module in modules]

# Apply additional filter if pr_changed_files is provided
if pr_changed_files:
# Normalize pr_changed_files to have './' prefix for relative paths
normalized_files = {
f"./{file}" if not file.startswith("./") else file
for file in pr_changed_files
}
query = query.filter(Module.name.in_(normalized_files))

modules = query.all()
return [ModuleReport(self.db_path, module) for module in modules]

def routes(self) -> Optional[List[str]]:
plugin_name = self.bundle_info.get("plugin_name")
Expand Down
Loading

0 comments on commit e854f50

Please sign in to comment.