diff --git a/package.json b/package.json index 2639f1b..df34fb4 100644 --- a/package.json +++ b/package.json @@ -8,6 +8,7 @@ "bugs": { "url": "https://github.com/fensak-io/reng/issues" }, + "type": "module", "source": "src/index.ts", "main": "./dist/index.cjs", "module": "./dist/module.mjs", @@ -44,10 +45,15 @@ "@babel/preset-typescript": "^7.23.2", "@fensak-io/front-matter": "^1.0.0", "@octokit/rest": "^20.0.2", - "babel-preset-minify": "^0.5.2" + "babel-preset-minify": "^0.5.2", + "json5": "^2.2.3", + "microdiff": "^1.3.2", + "toml": "^3.0.0", + "yaml": "^2.3.3" }, "devDependencies": { "@jest/globals": "^29.7.0", + "@octokit/types": "^12.1.0", "@parcel/config-default": "2.9.3", "@parcel/packager-ts": "2.9.3", "@parcel/transformer-typescript-types": "2.9.3", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 7567f78..41fb661 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -23,11 +23,26 @@ dependencies: babel-preset-minify: specifier: ^0.5.2 version: 0.5.2 + json5: + specifier: ^2.2.3 + version: 2.2.3 + microdiff: + specifier: ^1.3.2 + version: 1.3.2 + toml: + specifier: ^3.0.0 + version: 3.0.0 + yaml: + specifier: ^2.3.3 + version: 2.3.3 devDependencies: '@jest/globals': specifier: ^29.7.0 version: 29.7.0 + '@octokit/types': + specifier: ^12.1.0 + version: 12.1.0 '@parcel/config-default': specifier: 2.9.3 version: 2.9.3(@parcel/core@2.9.3)(typescript@5.2.2) @@ -1750,7 +1765,7 @@ packages: '@octokit/graphql': 7.0.2 '@octokit/request': 8.1.2 '@octokit/request-error': 5.0.1 - '@octokit/types': 12.0.0 + '@octokit/types': 12.1.0 before-after-hook: 2.2.3 universal-user-agent: 6.0.0 @@ -1758,7 +1773,7 @@ packages: resolution: {integrity: sha512-hRlOKAovtINHQPYHZlfyFwaM8OyetxeoC81lAkBy34uLb8exrZB50SQdeW3EROqiY9G9yxQTpp5OHTV54QD+vA==} engines: {node: '>= 18'} dependencies: - '@octokit/types': 12.0.0 + '@octokit/types': 12.1.0 is-plain-object: 5.0.0 universal-user-agent: 6.0.0 @@ -1767,11 +1782,11 @@ packages: engines: {node: '>= 18'} dependencies: '@octokit/request': 8.1.2 - '@octokit/types': 12.0.0 + '@octokit/types': 12.1.0 universal-user-agent: 6.0.0 - /@octokit/openapi-types@19.0.0: - resolution: {integrity: sha512-PclQ6JGMTE9iUStpzMkwLCISFn/wDeRjkZFIKALpvJQNBGwDoYYi2fFvuHwssoQ1rXI5mfh6jgTgWuddeUzfWw==} + /@octokit/openapi-types@19.0.1: + resolution: {integrity: sha512-zC+73r2HIoRb9rWW5S3Y759hrpadlD5pNnya/QfZv0JZE7mvMu+FUa7nxHqTadi2hZc4BPZjJ8veDTuJnh8+8g==} /@octokit/plugin-paginate-rest@9.0.0(@octokit/core@5.0.1): resolution: {integrity: sha512-oIJzCpttmBTlEhBmRvb+b9rlnGpmFgDtZ0bB6nq39qIod6A5DP+7RkVLMOixIgRCYSHDTeayWqmiJ2SZ6xgfdw==} @@ -1780,7 +1795,7 @@ packages: '@octokit/core': '>=5' dependencies: '@octokit/core': 5.0.1 - '@octokit/types': 12.0.0 + '@octokit/types': 12.1.0 /@octokit/plugin-request-log@4.0.0(@octokit/core@5.0.1): resolution: {integrity: sha512-2uJI1COtYCq8Z4yNSnM231TgH50bRkheQ9+aH8TnZanB6QilOnx8RMD2qsnamSOXtDj0ilxvevf5fGsBhBBzKA==} @@ -1798,7 +1813,7 @@ packages: '@octokit/core': '>=5' dependencies: '@octokit/core': 5.0.1 - '@octokit/types': 12.0.0 + '@octokit/types': 12.1.0 dev: false /@octokit/plugin-retry@6.0.1(@octokit/core@5.0.1): @@ -1809,7 +1824,7 @@ packages: dependencies: '@octokit/core': 5.0.1 '@octokit/request-error': 5.0.1 - '@octokit/types': 12.0.0 + '@octokit/types': 12.1.0 bottleneck: 2.19.5 dev: true @@ -1820,7 +1835,7 @@ packages: '@octokit/core': ^5.0.0 dependencies: '@octokit/core': 5.0.1 - '@octokit/types': 12.0.0 + '@octokit/types': 12.1.0 bottleneck: 2.19.5 dev: true @@ -1828,7 +1843,7 @@ packages: resolution: {integrity: sha512-X7pnyTMV7MgtGmiXBwmO6M5kIPrntOXdyKZLigNfQWSEQzVxR4a4vo49vJjTWX70mPndj8KhfT4Dx+2Ng3vnBQ==} engines: {node: '>= 18'} dependencies: - '@octokit/types': 12.0.0 + '@octokit/types': 12.1.0 deprecation: 2.3.1 once: 1.4.0 @@ -1838,7 +1853,7 @@ packages: dependencies: '@octokit/endpoint': 9.0.1 '@octokit/request-error': 5.0.1 - '@octokit/types': 12.0.0 + '@octokit/types': 12.1.0 is-plain-object: 5.0.0 universal-user-agent: 6.0.0 @@ -1852,10 +1867,10 @@ packages: '@octokit/plugin-rest-endpoint-methods': 10.0.0(@octokit/core@5.0.1) dev: false - /@octokit/types@12.0.0: - resolution: {integrity: sha512-EzD434aHTFifGudYAygnFlS1Tl6KhbTynEWELQXIbTY8Msvb5nEqTZIm7sbPEt4mQYLZwu3zPKVdeIrw0g7ovg==} + /@octokit/types@12.1.0: + resolution: {integrity: sha512-JmjQr5ZbOnpnOLX5drI2O2I1N9suOYZAgINHXTlVVg4lRtUifMv2JssT+RhmNxQwXH153Pc8HaCMdTRkqI1oVQ==} dependencies: - '@octokit/openapi-types': 19.0.0 + '@octokit/openapi-types': 19.0.1 /@parcel/bundler-default@2.9.3(@parcel/core@2.9.3): resolution: {integrity: sha512-JjJK8dq39/UO/MWI/4SCbB1t/qgpQRFnFDetAAAezQ8oN++b24u1fkMDa/xqQGjbuPmGeTds5zxGgYs7id7PYg==} @@ -5711,6 +5726,10 @@ packages: engines: {node: '>= 8'} dev: true + /microdiff@1.3.2: + resolution: {integrity: sha512-pKy60S2febliZIbwdfEQKTtL5bLNxOyiRRmD400gueYl9XcHyNGxzHSlJWn9IMHwYXT0yohPYL08+bGozVk8cQ==} + dev: false + /micromatch@4.0.5: resolution: {integrity: sha512-DMy+ERcEW2q8Z2Po+WNXuw3c5YaUSFjAO5GsJqfEl7UjvtIuFKO6ZrKvcItdy98dwFI2N1tg3zNIdKaQT+aNdA==} engines: {node: '>=8.6'} diff --git a/src/engine/from_github.ts b/src/engine/from_github.ts deleted file mode 100644 index 64d53e1..0000000 --- a/src/engine/from_github.ts +++ /dev/null @@ -1,234 +0,0 @@ -// Copyright (c) Fensak, LLC. -// SPDX-License-Identifier: AGPL-3.0-or-later OR BUSL-1.1 - -import * as nodecrypto from "crypto"; - -import { Octokit } from "@octokit/rest"; -import { - hasParsableFrontMatter, - extract as extractFrontMatter, -} from "@fensak-io/front-matter"; - -import { parseUnifiedDiff } from "./patch.ts"; -import { - ILinkedPR, - IChangeSetMetadata, - IPatch, - PatchOp, -} from "./patch_types.ts"; -import { SourcePlatform } from "./from.ts"; - -const crypto = nodecrypto.webcrypto; - -/** - * Represents a repository hosted on GitHub. - * @property owner The owner of the repository. - * @property name The name of the repository. - */ -export interface IGitHubRepository { - owner: string; - name: string; -} - -/** - * Represents the decoded patches for the Pull Request. This also includes a mapping from patch IDs to the URL to - * retrieve the file contents. - * @property patchList The list of file patches that are included in this PR. - * @property patchFetchMap A mapping from a URL hash to the URL to fetch the contents for the file. The URL hash is - * the sha256 hash of the URL with a random salt. - */ -export interface IGitHubPullRequestPatches { - metadata: IChangeSetMetadata; - patchList: IPatch[]; - patchFetchMap: Record; -} - -/** - * Pull in the changes contained in the Pull Request and create an IPatch array and a mapping from PR file IDs to the - * URL to fetch the contents. - * @param clt An authenticated or anonymous GitHub API client created from Octokit. - * @param repo The repository to pull the pull request changes from. - * @param prNum The number of the PR where the changes should be pulled from. - * @returns The list of patches that are contained in the Pull Request. - */ -export async function patchFromGitHubPullRequest( - clt: Octokit, - repo: IGitHubRepository, - prNum: number, -): Promise { - const { data: pullReq } = await clt.pulls.get({ - owner: repo.owner, - repo: repo.name, - pull_number: prNum, - }); - - const iter = clt.paginate.iterator(clt.pulls.listFiles, { - owner: repo.owner, - repo: repo.name, - pull_number: prNum, - headers: { - "X-GitHub-Api-Version": "2022-11-28", - }, - per_page: 100, - }); - - const a = new Uint8Array(8); - crypto.getRandomValues(a); - const fetchMapSalt = hexEncode(a); - - const out: IGitHubPullRequestPatches = { - metadata: { - sourceBranch: pullReq.head.ref, - targetBranch: pullReq.base.ref, - linkedPRs: await extractLinkedPRs( - clt, - repo.owner, - repo.name, - prNum, - pullReq.body, - ), - }, - patchList: [], - patchFetchMap: {}, - }; - for await (const { data: prFiles } of iter) { - for (const f of prFiles) { - const fContentsURL = new URL(f.contents_url); - const fContentsHash = await getGitHubPRFileID(fetchMapSalt, fContentsURL); - out.patchFetchMap[fContentsHash] = fContentsURL; - const fid = `${SourcePlatform.GitHub}:${fContentsHash}`; - - let op = PatchOp.Unknown; - switch (f.status) { - // This should never happen, so we throw an error - default: - throw new Error( - `unknown status for file ${f.filename} in PR ${prNum} of repo ${repo.owner}/${repo.name}: ${f.status}`, - ); - - // A rename is a delete and then an insert, so special case it - case "renamed": - if (!f.previous_filename) { - // This shouldn't happen because of the way the GitHub API works, so we throw an error. - throw new Error("previous filename not available for a rename"); - } - out.patchList.push({ - contentsID: fid, - path: f.previous_filename, - op: PatchOp.Delete, - // TODO: this requires pulling down the file contents - additions: 0, - deletions: 0, - diff: [], - }); - out.patchList.push({ - contentsID: fid, - path: f.filename, - op: PatchOp.Insert, - // TODO: this requires pulling down the file contents - additions: 0, - deletions: 0, - diff: [], - }); - continue; - - // The rest only needs to set the op - - case "added": - case "copied": // a copy is the same as a file insert. - op = PatchOp.Insert; - break; - case "removed": - op = PatchOp.Delete; - break; - case "changed": - case "modified": - op = PatchOp.Modified; - break; - } - out.patchList.push({ - contentsID: fid, - path: f.filename, - op: op, - additions: f.additions, - deletions: f.deletions, - diff: parseUnifiedDiff(f.patch || ""), - }); - } - } - return out; -} - -async function getGitHubPRFileID(salt: string, url: URL): Promise { - const toHash = `${salt}:${url}`; - const digest = await crypto.subtle.digest( - "SHA-256", - new TextEncoder().encode(toHash), - ); - return hexEncode(new Uint8Array(digest)); -} - -async function extractLinkedPRs( - clt: Octokit, - owner: string, - repo: string, - prNum: number, - prDescription: string | null, -): Promise { - interface IFrontMatterLinkedPR { - repo?: string; - prNum: number; - } - - interface IExpectedFrontMatter { - fensak: { - linked: IFrontMatterLinkedPR[]; - }; - } - - if (!prDescription || !hasParsableFrontMatter(prDescription)) { - return []; - } - - const fm = extractFrontMatter(prDescription); - if (!fm.attrs.fensak) { - return []; - } - if (!fm.attrs.fensak.linked) { - throw new TypeError( - `PR ${owner}/${repo}#${prNum} has front matter, but it is not in the expected format`, - ); - } - - const out: ILinkedPR[] = await Promise.all( - fm.attrs.fensak.linked.map(async (l): Promise => { - let outR = ""; - let r = repo; - if (l.repo) { - outR = l.repo; - r = l.repo; - } - const { data: pullReq } = await clt.pulls.get({ - owner: owner, - repo: r, - pull_number: l.prNum, - }); - - return { - repo: outR, - prNum: l.prNum, - isMerged: pullReq.merged, - isClosed: pullReq.state === "closed", - }; - }), - ); - return out; -} - -function hexEncode(hb: Uint8Array): string { - const hashArray = Array.from(hb); - const hashHex = hashArray - .map((b) => b.toString(16).padStart(2, "0")) - .join(""); - return hashHex; -} diff --git a/src/engine/index.ts b/src/engine/index.ts index 534fff0..9561db4 100644 --- a/src/engine/index.ts +++ b/src/engine/index.ts @@ -1,9 +1,12 @@ // Copyright (c) Fensak, LLC. // SPDX-License-Identifier: AGPL-3.0-or-later OR BUSL-1.1 +/** + * engine + * Contains the core implementation for the JavaScript based rules engine. + */ + export * from "./compile.ts"; -export * from "./from.ts"; -export * from "./from_github.ts"; export * from "./interpreter.ts"; export * from "./patch.ts"; export * from "./patch_types.ts"; diff --git a/src/engine/interpreter.test.ts b/src/engine/interpreter.test.ts index 57b6f7f..bfcee43 100644 --- a/src/engine/interpreter.test.ts +++ b/src/engine/interpreter.test.ts @@ -28,6 +28,7 @@ test("sanity check", async () => { additions: 0, deletions: 0, diff: [], + objectDiff: null, }, ], nullMeta, @@ -51,6 +52,7 @@ test("sanity check old version", async () => { additions: 0, deletions: 0, diff: [], + objectDiff: null, }, ], nullMeta, @@ -74,6 +76,7 @@ test("ES5 minify", async () => { additions: 0, deletions: 0, diff: [], + objectDiff: null, }, ], nullMeta, @@ -99,6 +102,7 @@ test("ES6 support", async () => { additions: 0, deletions: 0, diff: [], + objectDiff: null, }, ], nullMeta, @@ -129,6 +133,7 @@ function main(inp: IPatch[], metadata: IChangeSetMetadata) { additions: 0, deletions: 0, diff: [], + objectDiff: null, }, ], nullMeta, @@ -281,6 +286,7 @@ test("XMLHTTPRequest not supported", async () => { additions: 0, deletions: 0, diff: [], + objectDiff: null, }, ], nullMeta, @@ -307,6 +313,7 @@ test("fetch is not supported", async () => { additions: 0, deletions: 0, diff: [], + objectDiff: null, }, ], nullMeta, @@ -331,6 +338,7 @@ test("process is not supported", async () => { additions: 0, deletions: 0, diff: [], + objectDiff: null, }, ], nullMeta, @@ -355,6 +363,7 @@ test("Deno is not supported", async () => { additions: 0, deletions: 0, diff: [], + objectDiff: null, }, ], nullMeta, diff --git a/src/engine/patch_types.ts b/src/engine/patch_types.ts index 9f2ad04..bb02e2a 100644 --- a/src/engine/patch_types.ts +++ b/src/engine/patch_types.ts @@ -1,6 +1,8 @@ // Copyright (c) Fensak, LLC. // SPDX-License-Identifier: AGPL-3.0-or-later OR BUSL-1.1 +import type { Difference } from "microdiff"; + /** * The operation on a line in a hunk of a patch. * @property Unknown Unknown operation. @@ -66,7 +68,10 @@ export enum PatchOp { * :. * @property path The relative path (from the root of the repo) to the file that was updated in the patch. * @property op The operation that was done on the file in the patch. + * @property additions The number of lines that were added in this patch. + * @property deletions The number of lines that were removed in this patch. * @property diff The list of diffs, organized into hunks. + * @property objectDiff If the file represents a parsable data file (e.g., json, yaml, toml), this will contain the object level diff. */ export interface IPatch { contentsID: string; @@ -75,6 +80,24 @@ export interface IPatch { additions: number; deletions: number; diff: IHunk[]; + objectDiff: IObjectDiff | null; +} + +/** + * Represents a diff of the object representation of a file. The specific diff returns a list of object patches that + * contains the keys that were added, removed, or updated. Note that the difference is only populated for updated + * objects - if the file was inserted or deleted, then the diff will be empty. + * @property previous The object representation of the data in the file before the change. + * @property current The object representation of the data in the file after the change. + * @property diff The difference across the two objects. + */ +export interface IObjectDiff { + // eslint-disable-next-line no-var,@typescript-eslint/no-explicit-any + previous: any; + // eslint-disable-next-line no-var,@typescript-eslint/no-explicit-any + current: any; + // eslint-disable-next-line no-var,@typescript-eslint/no-explicit-any + diff: Difference[]; } /** diff --git a/src/index.ts b/src/index.ts index 9f2d740..bc533fb 100644 --- a/src/index.ts +++ b/src/index.ts @@ -2,3 +2,4 @@ // SPDX-License-Identifier: AGPL-3.0-or-later OR BUSL-1.1 export * from "./engine/index.ts"; +export * from "./sourcer/index.ts"; diff --git a/src/engine/from.ts b/src/sourcer/from.ts similarity index 100% rename from src/engine/from.ts rename to src/sourcer/from.ts diff --git a/src/engine/from_github.test.ts b/src/sourcer/from_github.test.ts similarity index 82% rename from src/engine/from_github.test.ts rename to src/sourcer/from_github.test.ts index 8227088..b07e269 100644 --- a/src/engine/from_github.test.ts +++ b/src/sourcer/from_github.test.ts @@ -1,11 +1,12 @@ import { expect, test } from "@jest/globals"; import { Octokit } from "@octokit/rest"; +import { IPatch, PatchOp, LineOp, IObjectDiff } from "../engine/patch_types.ts"; + import { IGitHubRepository, patchFromGitHubPullRequest, } from "./from_github.ts"; -import { IPatch, PatchOp, LineOp } from "./patch_types.ts"; const maybeToken = process.env.GITHUB_TOKEN; let octokit: Octokit; @@ -30,11 +31,13 @@ test("a single file change from GitHub is parsed correctly", async () => { }); expect(patches.patchList.length).toEqual(1); + // Check top level patch const patch = patches.patchList[0]; expect(patch.path).toEqual("appversions.json"); expect(patch.op).toEqual(PatchOp.Modified); expect(patch.diff.length).toEqual(1); + // Check patch hunks const hunk = patch.diff[0]; expect(hunk.originalStart).toEqual(1); expect(hunk.originalLength).toEqual(5); @@ -67,6 +70,29 @@ test("a single file change from GitHub is parsed correctly", async () => { newText: "", }, ]); + + // Check object diffs + const maybeObjDiff = patch.objectDiff; + expect(maybeObjDiff).not.toBeNull(); + const objDiff = maybeObjDiff as IObjectDiff; + expect(objDiff.previous).toEqual({ + coreapp: "v0.1.0", + subapp: "v1.1.0", + logapp: "v100.1.0", + }); + expect(objDiff.current).toEqual({ + coreapp: "v0.1.0", + subapp: "v1.2.0", + logapp: "v100.1.0", + }); + expect(objDiff.diff).toEqual([ + { + type: "CHANGE", + path: ["subapp"], + value: "v1.2.0", + oldValue: "v1.1.0", + }, + ]); }); test("multiple file changes from GitHub is parsed correctly", async () => { @@ -145,6 +171,29 @@ test("multiple file changes from GitHub is parsed correctly", async () => { newText: "", }, ]); + const maybeJSONObjDiff = jsonPatch.objectDiff; + expect(maybeJSONObjDiff).not.toBeNull(); + const jsonObjDiff = maybeJSONObjDiff as IObjectDiff; + expect(jsonObjDiff).toEqual({ + previous: { + coreapp: "v0.1.0", + subapp: "v1.1.0", + logapp: "v100.1.0", + }, + current: { + coreapp: "v0.1.0", + subapp: "v1.2.0", + logapp: "v100.1.0", + }, + diff: [ + { + type: "CHANGE", + path: ["subapp"], + value: "v1.2.0", + oldValue: "v1.1.0", + }, + ], + }); // Check tfvars patch expect(tfvarsPatch.op).toEqual(PatchOp.Modified); @@ -171,6 +220,7 @@ test("multiple file changes from GitHub is parsed correctly", async () => { newText: "", }, ]); + expect(tfvarsPatch.objectDiff).toBeNull(); // Check toml patch expect(tomlPatch.op).toEqual(PatchOp.Modified); @@ -197,6 +247,29 @@ test("multiple file changes from GitHub is parsed correctly", async () => { newText: "", }, ]); + const maybeTOMLObjDiff = tomlPatch.objectDiff; + expect(maybeTOMLObjDiff).not.toBeNull(); + const tomlObjDiff = maybeTOMLObjDiff as IObjectDiff; + expect(tomlObjDiff).toEqual({ + previous: { + coreapp: "v0.1.0", + subapp: "v1.1.0", + logapp: "v100.1.0", + }, + current: { + coreapp: "v0.2.0", + subapp: "v1.1.0", + logapp: "v100.1.0", + }, + diff: [ + { + type: "CHANGE", + path: ["coreapp"], + value: "v0.2.0", + oldValue: "v0.1.0", + }, + ], + }); }); test("extracts linked PRs in front matter", async () => { diff --git a/src/sourcer/from_github.ts b/src/sourcer/from_github.ts new file mode 100644 index 0000000..af95810 --- /dev/null +++ b/src/sourcer/from_github.ts @@ -0,0 +1,401 @@ +// Copyright (c) Fensak, LLC. +// SPDX-License-Identifier: AGPL-3.0-or-later OR BUSL-1.1 + +import * as nodecrypto from "crypto"; +import YAML from "yaml"; +import toml from "toml"; +import JSON5 from "json5"; +import diff from "microdiff"; + +import { Octokit } from "@octokit/rest"; +import { Endpoints } from "@octokit/types"; +import { + hasParsableFrontMatter, + extract as extractFrontMatter, +} from "@fensak-io/front-matter"; + +import { parseUnifiedDiff } from "../engine/patch.ts"; +import { + ILinkedPR, + IChangeSetMetadata, + IPatch, + IObjectDiff, + PatchOp, +} from "../engine/patch_types.ts"; + +import { SourcePlatform } from "./from.ts"; + +const crypto = nodecrypto.webcrypto; + +// A type utility to unpack the element type from an array type +// See https://stackoverflow.com/questions/43537520/how-do-i-extract-a-type-from-an-array-in-typescript +type EleTypeUnpacked = T extends (infer U)[] ? U : T; + +type PRFile = EleTypeUnpacked< + Endpoints["GET /repos/{owner}/{repo}/pulls/{pull_number}/files"]["response"]["data"] +>; +type PullReq = + Endpoints["GET /repos/{owner}/{repo}/pulls/{pull_number}"]["response"]["data"]; + +/** + * Represents a repository hosted on GitHub. + * @property owner The owner of the repository. + * @property name The name of the repository. + */ +export interface IGitHubRepository { + owner: string; + name: string; +} + +/** + * Represents the decoded patches for the Pull Request. This also includes a mapping from patch IDs to the URL to + * retrieve the file contents. + * @property patchList The list of file patches that are included in this PR. + * @property patchFetchMap A mapping from a URL hash to the URL to fetch the contents for the file. The URL hash is + * the sha256 hash of the URL with a random salt. + */ +export interface IGitHubPullRequestPatches { + metadata: IChangeSetMetadata; + patchList: IPatch[]; + patchFetchMap: Record; +} + +/** + * Pull in the changes contained in the Pull Request and create an IPatch array and a mapping from PR file IDs to the + * URL to fetch the contents. + * @param clt An authenticated or anonymous GitHub API client created from Octokit. + * @param repo The repository to pull the pull request changes from. + * @param prNum The number of the PR where the changes should be pulled from. + * @returns The list of patches that are contained in the Pull Request. + */ +export async function patchFromGitHubPullRequest( + clt: Octokit, + repo: IGitHubRepository, + prNum: number, +): Promise { + const { data: pullReq } = await clt.pulls.get({ + owner: repo.owner, + repo: repo.name, + pull_number: prNum, + }); + + const iter = clt.paginate.iterator(clt.pulls.listFiles, { + owner: repo.owner, + repo: repo.name, + pull_number: prNum, + headers: { + "X-GitHub-Api-Version": "2022-11-28", + }, + per_page: 100, + }); + + const a = new Uint8Array(8); + crypto.getRandomValues(a); + const fetchMapSalt = hexEncode(a); + + const out: IGitHubPullRequestPatches = { + metadata: { + sourceBranch: pullReq.head.ref, + targetBranch: pullReq.base.ref, + linkedPRs: await extractLinkedPRs( + clt, + repo.owner, + repo.name, + prNum, + pullReq.body, + ), + }, + patchList: [], + patchFetchMap: {}, + }; + for await (const { data: prFiles } of iter) { + for (const f of prFiles) { + const fContentsURL = new URL(f.contents_url); + const fContentsHash = await getGitHubPRFileID(fetchMapSalt, fContentsURL); + out.patchFetchMap[fContentsHash] = fContentsURL; + const patches = await getPatchesFromPRFile( + clt, + f, + fContentsHash, + pullReq, + `${repo.owner}/${repo.name}`, + ); + out.patchList.push(...patches); + } + } + return out; +} + +async function getPatchesFromPRFile( + clt: Octokit, + f: PRFile, + fContentsHash: string, + pullReq: PullReq, + + // The following is only needed for error messaging + repoName: string, +): Promise { + const fid = `${SourcePlatform.GitHub}:${fContentsHash}`; + + let op = PatchOp.Unknown; + switch (f.status) { + // This should never happen, so we throw an error + default: + throw new Error( + `unknown status for file ${f.filename} in PR ${pullReq.number} of repo ${repoName}: ${f.status}`, + ); + + // A rename is a delete and then an insert, so special case it + case "renamed": + if (!f.previous_filename) { + // This shouldn't happen because of the way the GitHub API works, so we throw an error. + throw new Error("previous filename not available for a rename"); + } + return [ + { + contentsID: fid, + path: f.previous_filename, + op: PatchOp.Delete, + // TODO: this requires pulling down the file contents + additions: 0, + deletions: 0, + diff: [], + objectDiff: null, + }, + { + contentsID: fid, + path: f.filename, + op: PatchOp.Insert, + // TODO: this requires pulling down the file contents + additions: 0, + deletions: 0, + diff: [], + objectDiff: null, + }, + ]; + + // The rest only needs to set the op + + case "added": + case "copied": // a copy is the same as a file insert. + op = PatchOp.Insert; + break; + case "removed": + op = PatchOp.Delete; + break; + case "changed": + case "modified": + op = PatchOp.Modified; + break; + } + + return [ + { + contentsID: fid, + path: f.filename, + op: op, + additions: f.additions, + deletions: f.deletions, + diff: parseUnifiedDiff(f.patch || ""), + objectDiff: await getObjectDiff(clt, f, pullReq, op), + }, + ]; +} + +async function getGitHubPRFileID(salt: string, url: URL): Promise { + const toHash = `${salt}:${url}`; + const digest = await crypto.subtle.digest( + "SHA-256", + new TextEncoder().encode(toHash), + ); + return hexEncode(new Uint8Array(digest)); +} + +async function extractLinkedPRs( + clt: Octokit, + owner: string, + repo: string, + prNum: number, + prDescription: string | null, +): Promise { + interface IFrontMatterLinkedPR { + repo?: string; + prNum: number; + } + + interface IExpectedFrontMatter { + fensak: { + linked: IFrontMatterLinkedPR[]; + }; + } + + if (!prDescription || !hasParsableFrontMatter(prDescription)) { + return []; + } + + const fm = extractFrontMatter(prDescription); + if (!fm.attrs.fensak) { + return []; + } + if (!fm.attrs.fensak.linked) { + throw new TypeError( + `PR ${owner}/${repo}#${prNum} has front matter, but it is not in the expected format`, + ); + } + + const out: ILinkedPR[] = await Promise.all( + fm.attrs.fensak.linked.map(async (l): Promise => { + let outR = ""; + let r = repo; + if (l.repo) { + outR = l.repo; + r = l.repo; + } + const { data: pullReq } = await clt.pulls.get({ + owner: owner, + repo: r, + pull_number: l.prNum, + }); + + return { + repo: outR, + prNum: l.prNum, + isMerged: pullReq.merged, + isClosed: pullReq.state === "closed", + }; + }), + ); + return out; +} + +/** + * Returns a diff of the object representation of the PR file if it can be parsed as a object. This representation is + * more ergonomical to work with than the textual patch representation, as you can traverse the keys of the object to + * see which data has changed. + * + * Currently we support pulling down the object representation for the following file types: + * - JSON + * - JSON5 + * - YAML + * - TOML + * + * Returns null if the file can not be turned into an object type. + */ +async function getObjectDiff( + clt: Octokit, + f: PRFile, + pullReq: PullReq, + op: PatchOp, + // eslint-disable-next-line no-var,@typescript-eslint/no-explicit-any +): Promise { + // Get the file extension to determine the file type + const m = /(?:\.([^.]+))?$/.exec(f.filename); + if (m === null) { + return null; + } + const ext = m[1]; + + const supportedObjectExtensions = ["json", "json5", "yaml", "yml", "toml"]; + if (!supportedObjectExtensions.includes(ext)) { + return null; + } + + // At this point, we know the object can be parsed out of the file so start to pull down the contents. + // eslint-disable-next-line no-var,@typescript-eslint/no-explicit-any + let parser: (s: string) => any; + switch (ext) { + default: + // Throw error becauset this should never happen given the check for supportedObjectExtensions. + throw new Error(`unsupported file extension ${ext} for ${f.filename}`); + + case "json": + parser = JSON.parse; + break; + + case "json5": + parser = JSON5.parse; + break; + + case "yaml": + case "yml": + parser = YAML.parse; + break; + + case "toml": + parser = toml.parse; + break; + } + + switch (op) { + default: + return null; + + case PatchOp.Insert: { + const curContents = await getPRFileContent(clt, f, pullReq, "head"); + const cur = parser(curContents); + return { + previous: null, + current: cur, + diff: [], + }; + } + + case PatchOp.Delete: { + const prevContents = await getPRFileContent(clt, f, pullReq, "base"); + const prev = parser(prevContents); + return { + previous: prev, + current: null, + diff: [], + }; + } + + case PatchOp.Modified: { + const prevContents = await getPRFileContent(clt, f, pullReq, "base"); + const prev = parser(prevContents); + const curContents = await getPRFileContent(clt, f, pullReq, "head"); + const cur = parser(curContents); + return { + previous: prev, + current: cur, + diff: diff(prev, cur), + }; + } + } +} + +async function getPRFileContent( + clt: Octokit, + f: PRFile, + pullReq: PullReq, + refSrc: "base" | "head", +): Promise { + let repoOwner = pullReq.base.repo.owner.login; + let repoName = pullReq.base.repo.name; + let ref = pullReq.base.ref; + if (refSrc === "head") { + const repo = pullReq.head.repo || pullReq.base.repo; + repoOwner = repo.owner.login; + repoName = repo.name; + ref = pullReq.head.ref; + } + + const { data: fileRep } = await clt.repos.getContent({ + owner: repoOwner, + repo: repoName, + path: f.filename, + ref: ref, + }); + if (Array.isArray(fileRep) || fileRep.type !== "file") { + throw new Error(`${f.filename} is not a file`); + } + return Buffer.from(fileRep.content, "base64").toString(); +} + +function hexEncode(hb: Uint8Array): string { + const hashArray = Array.from(hb); + const hashHex = hashArray + .map((b) => b.toString(16).padStart(2, "0")) + .join(""); + return hashHex; +} diff --git a/src/sourcer/index.ts b/src/sourcer/index.ts new file mode 100644 index 0000000..1e0380c --- /dev/null +++ b/src/sourcer/index.ts @@ -0,0 +1,10 @@ +// Copyright (c) Fensak, LLC. +// SPDX-License-Identifier: AGPL-3.0-or-later OR BUSL-1.1 + +/** + * sourcer + * Contains functions and utilities for sourcing patch information from different sources, such as GitHub and GitLab. + */ + +export * from "./from.ts"; +export * from "./from_github.ts";