Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ch] Dr. CI to use CH Part 2 #5637

Merged
merged 4 commits into from
Sep 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 6 additions & 22 deletions torchci/lib/commitUtils.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import getRocksetClient from "lib/rockset";
import { queryClickhouse } from "./clickhouse";

const ELIGIBLE_COMMITS_FOR_SIMILAR_FAILURE_CHECK: { [sha: string]: boolean } =
{};
Expand All @@ -19,33 +19,17 @@ export async function isEligibleCommitForSimilarFailureCheck(
const query = `
SELECT DISTINCT
last_commit_sha,
merge_commit_sha,
_event_time
merge_commit_sha
FROM
commons.merges
default.merges
WHERE
(
last_commit_sha = : sha
last_commit_sha = {sha: String}
AND merge_commit_sha != ''
)
OR merge_commit_sha = : sha
OR merge_commit_sha = {sha: String}
`;

const rocksetClient = getRocksetClient();
const results = (
await rocksetClient.queries.query({
sql: {
query: query,
parameters: [
{
name: "sha",
type: "string",
value: sha,
},
],
},
})
).results;
const results = await queryClickhouse(query, { sha });

ELIGIBLE_COMMITS_FOR_SIMILAR_FAILURE_CHECK[sha] =
results !== undefined && results.length !== 0 ? true : false;
Expand Down
44 changes: 42 additions & 2 deletions torchci/lib/drciUtils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ import { fetchIssuesByLabelCH } from "lib/fetchIssuesByLabel";
import {
hasS3Log,
isFailureFromPrevMergeCommit,
isSameAuthor,
isSameFailure,
} from "lib/jobUtils";
import { MAX_SIZE, OLDEST_FIRST, querySimilarFailures } from "lib/searchUtils";
Expand All @@ -15,6 +14,11 @@ import _ from "lodash";
import { Octokit } from "octokit";
import { isDrCIEnabled, isPyTorchPyTorch, isTime0, TIME_0 } from "./bot/utils";
import { queryClickhouseSaved } from "./clickhouse";
// Import itself to ensure that mocks can be applied, see
// https://stackoverflow.com/questions/51900413/jest-mock-function-doesnt-work-while-it-was-called-in-the-other-function
// https://stackoverflow.com/questions/45111198/how-to-mock-functions-in-the-same-module-using-jest
import * as thisModule from "./drciUtils";
import { getAuthors } from "./getAuthors";
import { IssueData } from "./types";
dayjs.extend(utc);

Expand Down Expand Up @@ -344,7 +348,7 @@ export async function hasSimilarFailures(
isSameFailure(job, failure) &&
// Run this check last because it costs one query to query for the commit
// author of the failure
!(await isSameAuthor(job, failure)) &&
!(await thisModule.isSameAuthor(job, failure)) &&
foundSimilarFailure === undefined
) {
// Save the first similar failure (the one with the highest score) and continue
Expand Down Expand Up @@ -489,3 +493,39 @@ export async function getPRMergeCommits(
return acc;
}, new Map<number, string[]>());
}

export async function isSameAuthor(
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Moved from other file due to client/server stuff

job: RecentWorkflowsData,
failure: RecentWorkflowsData
): Promise<boolean> {
const authors = await getAuthors([job, failure]);
// Extract the authors for each job
const jobAuthor =
job.head_sha in authors
? authors[job.head_sha]
: { email: "", commit_username: "", pr_username: "" };
const failureAuthor =
failure.head_sha in authors
? authors[failure.head_sha]
: { email: "", commit_username: "", pr_username: "" };

const isSameEmail =
jobAuthor.email !== "" &&
failureAuthor.email !== "" &&
jobAuthor.email === failureAuthor.email;
const isSameCommitUsername =
jobAuthor.commit_username !== "" &&
failureAuthor.commit_username !== "" &&
jobAuthor.commit_username === failureAuthor.commit_username;
const isSamePrUsername =
jobAuthor.pr_username !== "" &&
failureAuthor.pr_username !== "" &&
jobAuthor.pr_username === failureAuthor.pr_username;

// This function exists because we don't want to wrongly count similar failures
// from commits of the same author as flaky. Some common cases include:
// * ghstack
// * Draft commit
// * Cherry picking
return isSameEmail || isSameCommitUsername || isSamePrUsername;
}
86 changes: 11 additions & 75 deletions torchci/lib/getAuthors.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { RecentWorkflowsData } from "lib/types";
import _ from "lodash";
import getRocksetClient from "./rockset";
import { queryClickhouse } from "./clickhouse";

// NB: Surprisingly, jest cannot mock function in the same module so we need to
// keep this function here in its own module so that it can be mocked. See the
Expand All @@ -21,84 +21,20 @@ export async function getAuthors(jobs: RecentWorkflowsData[]): Promise<{
// events. We need both events because pull_request is when a PR is created while
// push is for a commit is pushed in PR or committed into trunk
const query = `
WITH email AS (
SELECT
w.head_commit.id AS sha,
w.head_commit.author.email
FROM
commons.workflow_run w
WHERE
ARRAY_CONTAINS(
SPLIT(: shas, ','),
w.head_commit.id
)
GROUP BY
sha,
email
),
commit_username AS (
SELECT
p.after AS sha,
p.head_commit.author.username
FROM
commons.push p
WHERE
ARRAY_CONTAINS(
SPLIT(: shas, ','),
p.after
)
GROUP BY
sha,
username
),
pr_username AS (
SELECT
pr.head.sha AS sha,
pr.user.login
FROM
commons.pull_request pr
WHERE
ARRAY_CONTAINS(
SPLIT(: shas, ','),
pr.head.sha
)
GROUP BY
sha,
login
)
SELECT
email.sha,
sha,
email,
IF(
commit_username.username IS NULL,
'', commit_username.username
) AS commit_username,
IF(
pr_username.login IS NULL, '', pr_username.login
) AS pr_username,
username AS commit_username,
login as pr_username
FROM
email
LEFT JOIN commit_username ON email.sha = commit_username.sha
LEFT JOIN pr_username ON email.sha = pr_username.sha
materialized_views.commit_authors final
where
sha in {shas: Array(String)}
`;

const rocksetClient = getRocksetClient();
const results = (
await rocksetClient.queries.query({
sql: {
query: query,
parameters: [
{
name: "shas",
type: "string",
value: _.map(jobs, (job: RecentWorkflowsData) => job.head_sha).join(
","
),
},
],
},
})
).results;
const results = await queryClickhouse(query, {
shas: _.map(jobs, (job: RecentWorkflowsData) => job.head_sha),
});

return results !== undefined ? _.keyBy(results, (record) => record.sha) : {};
return _.keyBy(results, (record) => record.sha);
}
37 changes: 0 additions & 37 deletions torchci/lib/jobUtils.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import dayjs from "dayjs";
import { jaroWinkler } from "jaro-winkler-typescript";
import { getAuthors } from "lib/getAuthors";
import {
BasicJobData,
IssueData,
Expand Down Expand Up @@ -331,42 +330,6 @@ export async function backfillMissingLog(
return res.status === 200;
}

export async function isSameAuthor(
job: RecentWorkflowsData,
failure: RecentWorkflowsData
): Promise<boolean> {
const authors = await getAuthors([job, failure]);
// Extract the authors for each job
const jobAuthor =
job.head_sha in authors
? authors[job.head_sha]
: { email: "", commit_username: "", pr_username: "" };
const failureAuthor =
failure.head_sha in authors
? authors[failure.head_sha]
: { email: "", commit_username: "", pr_username: "" };

const isSameEmail =
jobAuthor.email !== "" &&
failureAuthor.email !== "" &&
jobAuthor.email === failureAuthor.email;
const isSameCommitUsername =
jobAuthor.commit_username !== "" &&
failureAuthor.commit_username !== "" &&
jobAuthor.commit_username === failureAuthor.commit_username;
const isSamePrUsername =
jobAuthor.pr_username !== "" &&
failureAuthor.pr_username !== "" &&
jobAuthor.pr_username === failureAuthor.pr_username;

// This function exists because we don't want to wrongly count similar failures
// from commits of the same author as flaky. Some common cases include:
// * ghstack
// * Draft commit
// * Cherry picking
return isSameEmail || isSameCommitUsername || isSamePrUsername;
}

export function isFailureFromPrevMergeCommit(
failure: RecentWorkflowsData,
mergeCommits: String[]
Expand Down
3 changes: 2 additions & 1 deletion torchci/test/drciUtils.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import { TIME_0 } from "lib/bot/utils";
import { JobData, RecentWorkflowsData } from "lib/types";
import nock from "nock";
import * as commitUtils from "../lib/commitUtils";
import * as drciUtils from "../lib/drciUtils";
import {
getSuppressedLabels,
hasSimilarFailures,
Expand Down Expand Up @@ -47,7 +48,7 @@ describe("Test various utils used by Dr.CI", () => {

const mock = jest.spyOn(searchUtils, "searchSimilarFailures");
mock.mockImplementation(() => Promise.resolve({ jobs: [] }));
const mockJobUtils = jest.spyOn(jobUtils, "isSameAuthor");
const mockJobUtils = jest.spyOn(drciUtils, "isSameAuthor");
mockJobUtils.mockImplementation(() => Promise.resolve(false));
const mockCommitUtils = jest.spyOn(
commitUtils,
Expand Down
2 changes: 1 addition & 1 deletion torchci/test/jobUtils.test.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { TIME_0 } from "lib/bot/utils";
import { isSameAuthor } from "lib/drciUtils";
import {
BasicJobData,
IssueData,
Expand All @@ -13,7 +14,6 @@ import {
isDisabledTestMentionedInPR,
isFailureFromPrevMergeCommit,
isRecentlyCloseDisabledTest,
isSameAuthor,
isSameContext,
isSameFailure,
removeCancelledJobAfterRetry,
Expand Down
Loading