Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DEV-6455] fix review first time not reviewd prs #3611

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -734,9 +734,14 @@ def _analyze(
**kwargs,
) -> np.ndarray:
result = np.full((len(min_times), len(facts)), self.nan, self.dtype)
result_mask = facts.notnull(
PullRequestFacts.f.first_comment_on_first_review,
) & facts.notnull(PullRequestFacts.f.first_review_request)
# facts were precomputed with first_comment_on_first_review field even though
# they have not real reviews
# extra filtering on last_review is then needed to get the mask where the metric is defined
result_mask = (
facts.notnull(PullRequestFacts.f.first_comment_on_first_review)
& facts.notnull(PullRequestFacts.f.first_review_request)
& facts.notnull(PullRequestFacts.f.last_review)
)
fc_on_fr = facts[PullRequestFacts.f.first_comment_on_first_review][result_mask]
fc_on_fr_in_range_mask = (min_times[:, None] <= fc_on_fr) & (fc_on_fr < max_times[:, None])
result_indexes = np.nonzero(result_mask)[0]
Expand Down
20 changes: 14 additions & 6 deletions server/athenian/api/internal/miners/github/pull_request.py
Original file line number Diff line number Diff line change
Expand Up @@ -2903,27 +2903,29 @@ def __call__(self, pr: MinedPullRequest) -> PullRequestFacts:
)
if closed and first_review_exact and first_review_exact > closed:
first_review_exact = None
first_comment_on_first_review = first_review_exact or merged
if first_comment_on_first_review:
potential_first_comment_on_first_review = first_review_exact or merged
if potential_first_comment_on_first_review:
committed_dates = pr.commits[PullRequestCommit.committed_date.name]
try:
last_commit_before_first_review = committed_dates[
committed_dates <= first_comment_on_first_review
committed_dates <= potential_first_comment_on_first_review
].max()
except ValueError:
last_commit_before_first_review = None
if not (last_commit_before_first_review_own := bool(last_commit_before_first_review)):
last_commit_before_first_review = first_comment_on_first_review
last_commit_before_first_review = potential_first_comment_on_first_review
# force pushes that were lost
first_commit = nonemin(first_commit, last_commit_before_first_review)
last_commit = nonemax(last_commit, first_commit)
first_review_request_backup = nonemin(
nonemax(created, last_commit_before_first_review), first_comment_on_first_review,
nonemax(created, last_commit_before_first_review),
potential_first_comment_on_first_review,
)
else:
last_commit_before_first_review = None
last_commit_before_first_review_own = False
first_review_request_backup = None

first_review_request_exact = pr.review_requests.nonemin(
PullRequestReviewRequest.created_at.name,
)
Expand All @@ -2934,7 +2936,7 @@ def __call__(self, pr: MinedPullRequest) -> PullRequestFacts:
if (
first_review_request_backup
and first_review_request
and first_review_request > first_comment_on_first_review
and first_review_request > potential_first_comment_on_first_review
):
# we cannot request a review after we received a review
first_review_request = first_review_request_backup
Expand All @@ -2951,6 +2953,12 @@ def __call__(self, pr: MinedPullRequest) -> PullRequestFacts:
if first_review_request and not first_review_exact:
first_review_request = nonemax(first_review_request, last_commit)

# only PR with a review will have `first_comment_on_first_review` filled
if potential_first_comment_on_first_review and first_review_exact:
first_comment_on_first_review = potential_first_comment_on_first_review
else:
first_comment_on_first_review = None

if closed:
if first_review_request and first_review_request > closed:
first_review_request = None
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,9 +91,9 @@ class TestCalcMetricsPRs(BaseCalcMetricsPRsTest):
(PullRequestMetricID.PR_REJECTED, 3),
(PullRequestMetricID.PR_CLOSED, 51),
(PullRequestMetricID.PR_DONE, 22),
(PullRequestMetricID.PR_WAIT_FIRST_REVIEW_TIME, 51),
(PullRequestMetricID.PR_WAIT_FIRST_REVIEW_COUNT, 51),
(PullRequestMetricID.PR_WAIT_FIRST_REVIEW_COUNT_Q, 51),
(PullRequestMetricID.PR_WAIT_FIRST_REVIEW_TIME, 46),
(PullRequestMetricID.PR_WAIT_FIRST_REVIEW_COUNT, 46),
(PullRequestMetricID.PR_WAIT_FIRST_REVIEW_COUNT_Q, 46),
(PullRequestMetricID.PR_SIZE, 51),
(PullRequestMetricID.PR_DEPLOYMENT_TIME, 0), # because pdb is empty
],
Expand Down
72 changes: 19 additions & 53 deletions server/tests/controllers/test_histograms_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -466,17 +466,9 @@ async def test_calc_histogram_prs_ticks(client, headers):

async def test_calc_histogram_prs_groups(client, headers):
body = {
"for": [
{
"repositories": ["{1}"],
"repogroups": [[0], [0]],
},
],
"for": [{"repositories": ["{1}"], "repogroups": [[0], [0]]}],
"histograms": [
{
"metric": PullRequestMetricID.PR_WAIT_FIRST_REVIEW_TIME,
"scale": "log",
},
{"metric": PullRequestMetricID.PR_WAIT_FIRST_REVIEW_TIME, "scale": "log"},
],
"date_from": "2017-10-13",
"date_to": "2018-01-23",
Expand All @@ -499,36 +491,16 @@ async def test_calc_histogram_prs_groups(client, headers):
"for": {"repositories": ["{1}"]},
"metric": "pr-wait-first-review-time",
"scale": "log",
"ticks": [
"60s",
"184s",
"569s",
"1752s",
"5398s",
"16624s",
"51201s",
"157688s",
"485648s",
],
"frequencies": [6, 5, 8, 5, 4, 14, 10, 2],
"interquartile": {"left": "790s", "right": "45167s"},
"ticks": ["60s", "216s", "783s", "2829s", "10221s", "36927s", "133408s", "481972s"],
"frequencies": [6, 6, 5, 6, 5, 8, 2],
"interquartile": {"left": "661s", "right": "37901s"},
}


async def test_calc_histogram_prs_lines(client, headers):
body = {
"for": [
{
"repositories": ["{1}"],
"lines": [0, 100, 100500],
},
],
"histograms": [
{
"metric": PullRequestMetricID.PR_WAIT_FIRST_REVIEW_TIME,
"scale": "log",
},
],
"for": [{"repositories": ["{1}"], "lines": [0, 100, 100500]}],
"histograms": [{"metric": PullRequestMetricID.PR_WAIT_FIRST_REVIEW_TIME, "scale": "log"}],
"date_from": "2017-10-13",
"date_to": "2018-05-23",
"exclude_inactive": False,
Expand All @@ -551,27 +523,26 @@ async def test_calc_histogram_prs_lines(client, headers):
"scale": "log",
"ticks": [
"60s",
"195s",
"637s",
"2078s",
"6776s",
"22090s",
"72014s",
"234763s",
"765318s",
"226s",
"856s",
"3237s",
"12234s",
"46234s",
"174714s",
"660223s",
"2494902s",
],
"frequencies": [4, 8, 7, 8, 4, 22, 8, 6, 1],
"interquartile": {"left": "1762s", "right": "62368s"},
"frequencies": [5, 5, 6, 5, 7, 9, 5, 2],
"interquartile": {"left": "1347s", "right": "58215s"},
}

assert body[1] == {
"for": {"repositories": ["{1}"], "lines": [100, 100500]},
"metric": "pr-wait-first-review-time",
"scale": "log",
"ticks": ["60s", "273s", "1243s", "5661s", "25774s", "117343s", "534220s"],
"frequencies": [8, 4, 1, 4, 7, 2],
"interquartile": {"left": "60s", "right": "49999s"},
"frequencies": [8, 4, 1, 3, 6, 2],
"interquartile": {"left": "60s", "right": "44357s"},
}


Expand Down Expand Up @@ -650,12 +621,7 @@ async def test_calc_histogram_prs_logical(
"repositories": ["github.com/src-d/go-git/alpha", "github.com/src-d/go-git/beta"],
},
],
"histograms": [
{
"metric": PullRequestMetricID.PR_REVIEW_TIME,
"scale": "log",
},
],
"histograms": [{"metric": PullRequestMetricID.PR_REVIEW_TIME, "scale": "log"}],
"date_from": "2015-10-13",
"date_to": "2020-05-23",
"exclude_inactive": False,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1094,7 +1094,7 @@ async def test_calc_pull_request_metrics_deep_filters(
],
dtype=object,
)
np.testing.assert_array_equal(np.array(metrics.tolist(), dtype=object), ground_truth)
assert_array_equal(np.array(metrics.tolist(), dtype=object), ground_truth)


def test_pull_request_metric_calculator_ensemble_accuracy(pr_samples):
Expand Down Expand Up @@ -2074,6 +2074,43 @@ def _mk_pr(cls, review_request: datetime, approved: Optional[datetime]) -> PullR
)


class TestWaitFirstReviewCalculator:
def test_no_first_review_request_exact(self) -> None:
quantiles = (0, 1)
min_times = dt64arr_us(dt(2022, 1, 1))
max_times = dt64arr_us(dt(2022, 2, 1))

calc = WaitFirstReviewTimeCalculator(quantiles=quantiles)

prs = [
PullRequestFactsFactory(
first_review_request=np.datetime64(datetime(2022, 1, 1)),
last_review=np.datetime64(datetime(2022, 1, 2)),
first_comment_on_first_review=np.datetime64(datetime(2022, 1, 2)),
),
PullRequestFactsFactory(
first_review_request=np.datetime64(datetime(2022, 1, 1)),
last_review=np.datetime64(datetime(2022, 1, 3)),
first_comment_on_first_review=np.datetime64(datetime(2022, 1, 3)),
),
PullRequestFactsFactory(
first_review_request=np.datetime64(datetime(2022, 1, 1)),
last_review=None,
first_comment_on_first_review=np.datetime64(datetime(2022, 1, 2)),
),
]
facts = df_from_structs(prs)
groups_mask = np.full((1, len(prs)), True, bool)

calc(facts, min_times, max_times, None, groups_mask)

assert_array_equal(
calc.peek[0], np.array([3600 * 24, 3600 * 24 * 2, "NaT"], dtype="timedelta64[s]"),
)
# only two PRs included in the average
assert calc.values[0][0].value == np.timedelta64(3600 * 36, "s")


class TestWaitFirstReviewTimeBelowThresholdRatio:
def test_base(self) -> None:
quantiles = (0, 1)
Expand Down
64 changes: 63 additions & 1 deletion server/tests/internal/miners/github/test_pull_request.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@
from tests.testutils.db import DBCleaner, models_insert
from tests.testutils.factory import metadata as md_factory
from tests.testutils.factory.common import DEFAULT_JIRA_ACCOUNT_ID, DEFAULT_MD_ACCOUNT_ID
from tests.testutils.factory.wizards import insert_repo, pr_models
from tests.testutils.time import dt


Expand Down Expand Up @@ -487,7 +488,8 @@ def validate_pull_request_facts(prmeta: Dict[str, Any], prt: PullRequestFacts):
assert prt.first_review_request <= prt.first_comment_on_first_review
else:
assert not prt.last_review
assert not prt.last_commit_before_first_review
if prt.reviews:
assert not prt.last_commit_before_first_review
if prt.first_review_request_exact:
assert prt.first_review_request
if prt.approved:
Expand Down Expand Up @@ -726,6 +728,66 @@ async def test_pr_facts_miner_empty_releases(
validate_pull_request_facts(*prt)


@with_defer
async def test_pr_facts_first_comment_first_review_not_reviewd(mdb_rw, pdb, rdb, sdb, pr_miner):
async with DBCleaner(mdb_rw) as mdb_cleaner:
repo = md_factory.RepositoryFactory(node_id=99, full_name="o/r")
await insert_repo(repo, mdb_cleaner, mdb_rw, sdb)

models = [
*pr_models(
99,
1,
1,
repository_full_name="o/r",
created_at=dt(2011, 1, 2),
closed_at=dt(2011, 1, 15),
merged_at=dt(2011, 1, 15),
user_login="user0",
merged_by_login="user0",
),
]
mdb_cleaner.add_models(*models)
await models_insert(mdb_rw, *models)

prefixer = await Prefixer.load((DEFAULT_MD_ACCOUNT_ID,), mdb_rw, None)
settings = Settings.from_account(1, prefixer, sdb, mdb_rw, None, None)
release_settings = await settings.list_release_matches()
branches, default_branches = await BranchMiner.load_branches(
{"o/r"}, prefixer, 1, (DEFAULT_MD_ACCOUNT_ID,), mdb_rw, None, None,
)

miner, _, _, _ = await pr_miner.mine(
date(2011, 1, 1),
date(2012, 1, 1),
dt(2011, 1, 1),
dt(2012, 1, 1),
{"o/r"},
{},
LabelFilter.empty(),
JIRAFilter.empty(),
True,
branches,
default_branches,
False,
release_settings,
LogicalRepositorySettings.empty(),
prefixer,
1,
(DEFAULT_MD_ACCOUNT_ID,),
mdb_rw,
pdb,
rdb,
None,
)
facts_miner = PullRequestFactsMiner({})
prs_facts = [facts_miner(mined_pr) for mined_pr in miner]
pr_facts = prs_facts[0]
assert pr_facts.first_review_request == np.datetime64(datetime(2011, 1, 15))
assert pr_facts.first_review_request_exact is None
assert pr_facts.first_comment_on_first_review is None


@with_defer
async def test_pr_mine_by_ids(
branches,
Expand Down