diff --git a/server/athenian/api/internal/features/github/pull_request_metrics.py b/server/athenian/api/internal/features/github/pull_request_metrics.py index 2032a4c0f..c6d285457 100644 --- a/server/athenian/api/internal/features/github/pull_request_metrics.py +++ b/server/athenian/api/internal/features/github/pull_request_metrics.py @@ -734,9 +734,14 @@ def _analyze( **kwargs, ) -> np.ndarray: result = np.full((len(min_times), len(facts)), self.nan, self.dtype) - result_mask = facts.notnull( - PullRequestFacts.f.first_comment_on_first_review, - ) & facts.notnull(PullRequestFacts.f.first_review_request) + # facts were precomputed with first_comment_on_first_review field even though + # they have not real reviews + # extra filtering on last_review is then needed to get the mask where the metric is defined + result_mask = ( + facts.notnull(PullRequestFacts.f.first_comment_on_first_review) + & facts.notnull(PullRequestFacts.f.first_review_request) + & facts.notnull(PullRequestFacts.f.last_review) + ) fc_on_fr = facts[PullRequestFacts.f.first_comment_on_first_review][result_mask] fc_on_fr_in_range_mask = (min_times[:, None] <= fc_on_fr) & (fc_on_fr < max_times[:, None]) result_indexes = np.nonzero(result_mask)[0] diff --git a/server/athenian/api/internal/miners/github/pull_request.py b/server/athenian/api/internal/miners/github/pull_request.py index d379abd54..1a2c96b9f 100644 --- a/server/athenian/api/internal/miners/github/pull_request.py +++ b/server/athenian/api/internal/miners/github/pull_request.py @@ -2903,27 +2903,29 @@ def __call__(self, pr: MinedPullRequest) -> PullRequestFacts: ) if closed and first_review_exact and first_review_exact > closed: first_review_exact = None - first_comment_on_first_review = first_review_exact or merged - if first_comment_on_first_review: + potential_first_comment_on_first_review = first_review_exact or merged + if potential_first_comment_on_first_review: committed_dates = pr.commits[PullRequestCommit.committed_date.name] try: last_commit_before_first_review = committed_dates[ - committed_dates <= first_comment_on_first_review + committed_dates <= potential_first_comment_on_first_review ].max() except ValueError: last_commit_before_first_review = None if not (last_commit_before_first_review_own := bool(last_commit_before_first_review)): - last_commit_before_first_review = first_comment_on_first_review + last_commit_before_first_review = potential_first_comment_on_first_review # force pushes that were lost first_commit = nonemin(first_commit, last_commit_before_first_review) last_commit = nonemax(last_commit, first_commit) first_review_request_backup = nonemin( - nonemax(created, last_commit_before_first_review), first_comment_on_first_review, + nonemax(created, last_commit_before_first_review), + potential_first_comment_on_first_review, ) else: last_commit_before_first_review = None last_commit_before_first_review_own = False first_review_request_backup = None + first_review_request_exact = pr.review_requests.nonemin( PullRequestReviewRequest.created_at.name, ) @@ -2934,7 +2936,7 @@ def __call__(self, pr: MinedPullRequest) -> PullRequestFacts: if ( first_review_request_backup and first_review_request - and first_review_request > first_comment_on_first_review + and first_review_request > potential_first_comment_on_first_review ): # we cannot request a review after we received a review first_review_request = first_review_request_backup @@ -2951,6 +2953,12 @@ def __call__(self, pr: MinedPullRequest) -> PullRequestFacts: if first_review_request and not first_review_exact: first_review_request = nonemax(first_review_request, last_commit) + # only PR with a review will have `first_comment_on_first_review` filled + if potential_first_comment_on_first_review and first_review_exact: + first_comment_on_first_review = potential_first_comment_on_first_review + else: + first_comment_on_first_review = None + if closed: if first_review_request and first_review_request > closed: first_review_request = None diff --git a/server/tests/controllers/metrics_controller/test_calc_metrics_prs.py b/server/tests/controllers/metrics_controller/test_calc_metrics_prs.py index 201d8c1fd..5683d9897 100644 --- a/server/tests/controllers/metrics_controller/test_calc_metrics_prs.py +++ b/server/tests/controllers/metrics_controller/test_calc_metrics_prs.py @@ -91,9 +91,9 @@ class TestCalcMetricsPRs(BaseCalcMetricsPRsTest): (PullRequestMetricID.PR_REJECTED, 3), (PullRequestMetricID.PR_CLOSED, 51), (PullRequestMetricID.PR_DONE, 22), - (PullRequestMetricID.PR_WAIT_FIRST_REVIEW_TIME, 51), - (PullRequestMetricID.PR_WAIT_FIRST_REVIEW_COUNT, 51), - (PullRequestMetricID.PR_WAIT_FIRST_REVIEW_COUNT_Q, 51), + (PullRequestMetricID.PR_WAIT_FIRST_REVIEW_TIME, 46), + (PullRequestMetricID.PR_WAIT_FIRST_REVIEW_COUNT, 46), + (PullRequestMetricID.PR_WAIT_FIRST_REVIEW_COUNT_Q, 46), (PullRequestMetricID.PR_SIZE, 51), (PullRequestMetricID.PR_DEPLOYMENT_TIME, 0), # because pdb is empty ], diff --git a/server/tests/controllers/test_histograms_controller.py b/server/tests/controllers/test_histograms_controller.py index 25f13c5f7..58719487f 100644 --- a/server/tests/controllers/test_histograms_controller.py +++ b/server/tests/controllers/test_histograms_controller.py @@ -466,17 +466,9 @@ async def test_calc_histogram_prs_ticks(client, headers): async def test_calc_histogram_prs_groups(client, headers): body = { - "for": [ - { - "repositories": ["{1}"], - "repogroups": [[0], [0]], - }, - ], + "for": [{"repositories": ["{1}"], "repogroups": [[0], [0]]}], "histograms": [ - { - "metric": PullRequestMetricID.PR_WAIT_FIRST_REVIEW_TIME, - "scale": "log", - }, + {"metric": PullRequestMetricID.PR_WAIT_FIRST_REVIEW_TIME, "scale": "log"}, ], "date_from": "2017-10-13", "date_to": "2018-01-23", @@ -499,36 +491,16 @@ async def test_calc_histogram_prs_groups(client, headers): "for": {"repositories": ["{1}"]}, "metric": "pr-wait-first-review-time", "scale": "log", - "ticks": [ - "60s", - "184s", - "569s", - "1752s", - "5398s", - "16624s", - "51201s", - "157688s", - "485648s", - ], - "frequencies": [6, 5, 8, 5, 4, 14, 10, 2], - "interquartile": {"left": "790s", "right": "45167s"}, + "ticks": ["60s", "216s", "783s", "2829s", "10221s", "36927s", "133408s", "481972s"], + "frequencies": [6, 6, 5, 6, 5, 8, 2], + "interquartile": {"left": "661s", "right": "37901s"}, } async def test_calc_histogram_prs_lines(client, headers): body = { - "for": [ - { - "repositories": ["{1}"], - "lines": [0, 100, 100500], - }, - ], - "histograms": [ - { - "metric": PullRequestMetricID.PR_WAIT_FIRST_REVIEW_TIME, - "scale": "log", - }, - ], + "for": [{"repositories": ["{1}"], "lines": [0, 100, 100500]}], + "histograms": [{"metric": PullRequestMetricID.PR_WAIT_FIRST_REVIEW_TIME, "scale": "log"}], "date_from": "2017-10-13", "date_to": "2018-05-23", "exclude_inactive": False, @@ -551,18 +523,17 @@ async def test_calc_histogram_prs_lines(client, headers): "scale": "log", "ticks": [ "60s", - "195s", - "637s", - "2078s", - "6776s", - "22090s", - "72014s", - "234763s", - "765318s", + "226s", + "856s", + "3237s", + "12234s", + "46234s", + "174714s", + "660223s", "2494902s", ], - "frequencies": [4, 8, 7, 8, 4, 22, 8, 6, 1], - "interquartile": {"left": "1762s", "right": "62368s"}, + "frequencies": [5, 5, 6, 5, 7, 9, 5, 2], + "interquartile": {"left": "1347s", "right": "58215s"}, } assert body[1] == { @@ -570,8 +541,8 @@ async def test_calc_histogram_prs_lines(client, headers): "metric": "pr-wait-first-review-time", "scale": "log", "ticks": ["60s", "273s", "1243s", "5661s", "25774s", "117343s", "534220s"], - "frequencies": [8, 4, 1, 4, 7, 2], - "interquartile": {"left": "60s", "right": "49999s"}, + "frequencies": [8, 4, 1, 3, 6, 2], + "interquartile": {"left": "60s", "right": "44357s"}, } @@ -650,12 +621,7 @@ async def test_calc_histogram_prs_logical( "repositories": ["github.com/src-d/go-git/alpha", "github.com/src-d/go-git/beta"], }, ], - "histograms": [ - { - "metric": PullRequestMetricID.PR_REVIEW_TIME, - "scale": "log", - }, - ], + "histograms": [{"metric": PullRequestMetricID.PR_REVIEW_TIME, "scale": "log"}], "date_from": "2015-10-13", "date_to": "2020-05-23", "exclude_inactive": False, diff --git a/server/tests/internal/features/github/test_pull_request_metrics.py b/server/tests/internal/features/github/test_pull_request_metrics.py index 3ca2fb08a..6094b2445 100644 --- a/server/tests/internal/features/github/test_pull_request_metrics.py +++ b/server/tests/internal/features/github/test_pull_request_metrics.py @@ -1094,7 +1094,7 @@ async def test_calc_pull_request_metrics_deep_filters( ], dtype=object, ) - np.testing.assert_array_equal(np.array(metrics.tolist(), dtype=object), ground_truth) + assert_array_equal(np.array(metrics.tolist(), dtype=object), ground_truth) def test_pull_request_metric_calculator_ensemble_accuracy(pr_samples): @@ -2074,6 +2074,43 @@ def _mk_pr(cls, review_request: datetime, approved: Optional[datetime]) -> PullR ) +class TestWaitFirstReviewCalculator: + def test_no_first_review_request_exact(self) -> None: + quantiles = (0, 1) + min_times = dt64arr_us(dt(2022, 1, 1)) + max_times = dt64arr_us(dt(2022, 2, 1)) + + calc = WaitFirstReviewTimeCalculator(quantiles=quantiles) + + prs = [ + PullRequestFactsFactory( + first_review_request=np.datetime64(datetime(2022, 1, 1)), + last_review=np.datetime64(datetime(2022, 1, 2)), + first_comment_on_first_review=np.datetime64(datetime(2022, 1, 2)), + ), + PullRequestFactsFactory( + first_review_request=np.datetime64(datetime(2022, 1, 1)), + last_review=np.datetime64(datetime(2022, 1, 3)), + first_comment_on_first_review=np.datetime64(datetime(2022, 1, 3)), + ), + PullRequestFactsFactory( + first_review_request=np.datetime64(datetime(2022, 1, 1)), + last_review=None, + first_comment_on_first_review=np.datetime64(datetime(2022, 1, 2)), + ), + ] + facts = df_from_structs(prs) + groups_mask = np.full((1, len(prs)), True, bool) + + calc(facts, min_times, max_times, None, groups_mask) + + assert_array_equal( + calc.peek[0], np.array([3600 * 24, 3600 * 24 * 2, "NaT"], dtype="timedelta64[s]"), + ) + # only two PRs included in the average + assert calc.values[0][0].value == np.timedelta64(3600 * 36, "s") + + class TestWaitFirstReviewTimeBelowThresholdRatio: def test_base(self) -> None: quantiles = (0, 1) diff --git a/server/tests/internal/miners/github/test_pull_request.py b/server/tests/internal/miners/github/test_pull_request.py index 1805fc10a..099af33f4 100644 --- a/server/tests/internal/miners/github/test_pull_request.py +++ b/server/tests/internal/miners/github/test_pull_request.py @@ -64,6 +64,7 @@ from tests.testutils.db import DBCleaner, models_insert from tests.testutils.factory import metadata as md_factory from tests.testutils.factory.common import DEFAULT_JIRA_ACCOUNT_ID, DEFAULT_MD_ACCOUNT_ID +from tests.testutils.factory.wizards import insert_repo, pr_models from tests.testutils.time import dt @@ -487,7 +488,8 @@ def validate_pull_request_facts(prmeta: Dict[str, Any], prt: PullRequestFacts): assert prt.first_review_request <= prt.first_comment_on_first_review else: assert not prt.last_review - assert not prt.last_commit_before_first_review + if prt.reviews: + assert not prt.last_commit_before_first_review if prt.first_review_request_exact: assert prt.first_review_request if prt.approved: @@ -726,6 +728,66 @@ async def test_pr_facts_miner_empty_releases( validate_pull_request_facts(*prt) +@with_defer +async def test_pr_facts_first_comment_first_review_not_reviewd(mdb_rw, pdb, rdb, sdb, pr_miner): + async with DBCleaner(mdb_rw) as mdb_cleaner: + repo = md_factory.RepositoryFactory(node_id=99, full_name="o/r") + await insert_repo(repo, mdb_cleaner, mdb_rw, sdb) + + models = [ + *pr_models( + 99, + 1, + 1, + repository_full_name="o/r", + created_at=dt(2011, 1, 2), + closed_at=dt(2011, 1, 15), + merged_at=dt(2011, 1, 15), + user_login="user0", + merged_by_login="user0", + ), + ] + mdb_cleaner.add_models(*models) + await models_insert(mdb_rw, *models) + + prefixer = await Prefixer.load((DEFAULT_MD_ACCOUNT_ID,), mdb_rw, None) + settings = Settings.from_account(1, prefixer, sdb, mdb_rw, None, None) + release_settings = await settings.list_release_matches() + branches, default_branches = await BranchMiner.load_branches( + {"o/r"}, prefixer, 1, (DEFAULT_MD_ACCOUNT_ID,), mdb_rw, None, None, + ) + + miner, _, _, _ = await pr_miner.mine( + date(2011, 1, 1), + date(2012, 1, 1), + dt(2011, 1, 1), + dt(2012, 1, 1), + {"o/r"}, + {}, + LabelFilter.empty(), + JIRAFilter.empty(), + True, + branches, + default_branches, + False, + release_settings, + LogicalRepositorySettings.empty(), + prefixer, + 1, + (DEFAULT_MD_ACCOUNT_ID,), + mdb_rw, + pdb, + rdb, + None, + ) + facts_miner = PullRequestFactsMiner({}) + prs_facts = [facts_miner(mined_pr) for mined_pr in miner] + pr_facts = prs_facts[0] + assert pr_facts.first_review_request == np.datetime64(datetime(2011, 1, 15)) + assert pr_facts.first_review_request_exact is None + assert pr_facts.first_comment_on_first_review is None + + @with_defer async def test_pr_mine_by_ids( branches,