From 744c253845ca16d94b475490488e60186b0f0708 Mon Sep 17 00:00:00 2001 From: Janpot <2109932+Janpot@users.noreply.github.com> Date: Thu, 23 Oct 2025 11:52:59 +0200 Subject: [PATCH 01/17] WIP --- packages/code-infra/package.json | 3 +- .../src/brokenLinksChecker/index.mjs | 544 ++++++++++++++++++ pnpm-lock.yaml | 240 ++------ 3 files changed, 602 insertions(+), 185 deletions(-) create mode 100644 packages/code-infra/src/brokenLinksChecker/index.mjs diff --git a/packages/code-infra/package.json b/packages/code-infra/package.json index 9fb7fc4d3..e3907189b 100644 --- a/packages/code-infra/package.json +++ b/packages/code-infra/package.json @@ -96,6 +96,7 @@ "globals": "^16.4.0", "globby": "^15.0.0", "minimatch": "^10.0.3", + "node-html-parser": "^7.0.1", "open": "^10.2.0", "postcss-styled-syntax": "^0.7.1", "regexp.escape": "^2.0.1", @@ -111,6 +112,7 @@ "typescript": "^5.0.0" }, "devDependencies": { + "@octokit/types": "^15.0.0", "@types/babel__core": "^7.20.5", "@types/babel__preset-env": "^7.10.0", "@types/env-ci": "^3.1.4", @@ -122,7 +124,6 @@ "@typescript-eslint/parser": "^8.45.0", "@typescript-eslint/rule-tester": "^8.45.0", "eslint": "^9.36.0", - "@octokit/types": "^15.0.0", "prettier": "^3.6.2", "typescript-eslint": "^8.45.0" }, diff --git a/packages/code-infra/src/brokenLinksChecker/index.mjs b/packages/code-infra/src/brokenLinksChecker/index.mjs new file mode 100644 index 000000000..5fb8f999d --- /dev/null +++ b/packages/code-infra/src/brokenLinksChecker/index.mjs @@ -0,0 +1,544 @@ +/* eslint-disable no-console */ +import { execaCommand } from 'execa'; +import timers from 'node:timers/promises'; +import { parse } from 'node-html-parser'; +import fs from 'node:fs/promises'; +import chalk from 'chalk'; +import { Transform } from 'node:stream'; + +const DEFAULT_CONCURRENCY = 4; + +/** + * @param {string} prefix + * @returns {Transform} + */ +const prefixLines = (prefix) => { + let leftover = ''; + return new Transform({ + transform(chunk, enc, cb) { + const lines = (leftover + chunk.toString()).split(/\r?\n/); + leftover = /** @type {string} */ (lines.pop()); + this.push(lines.map((l) => `${prefix + l}\n`).join('')); + cb(); + }, + flush(cb) { + if (leftover) { + this.push(`${prefix + leftover}\n`); + } + cb(); + }, + }); +}; + +/** + * Maps pageUrl to ids of known targets on that page + * @typedef {Map>} LinkStructure + */ + +/** + * @typedef {Object} SerializedLinkStructure + * @property {Record} targets + */ + +/** + * @param {string | URL} url + * @returns {Promise} + */ +async function fetchUrl(url) { + const res = await fetch(url); + if (!res.ok) { + throw new Error(`Failed to fetch ${url}: [${res.status}] ${res.statusText}`); + } + return res; +} + +/** + * @param {string} url + * @param {number} timeout + * @returns {Promise} + */ +async function pollUrl(url, timeout) { + const start = Date.now(); + while (true) { + try { + // eslint-disable-next-line no-await-in-loop + await fetchUrl(url); + return; + } catch (/** @type {any} */ error) { + if (Date.now() - start > timeout) { + throw new Error(`Timeout waiting for ${url}: ${error.message}`, { cause: error }); + } + // eslint-disable-next-line no-await-in-loop + await timers.setTimeout(1000); + } + } +} + +/** + * @param {SerializedLinkStructure} data + * @returns {LinkStructure} + */ +function deserializeLinkStructure(data) { + const linkStructure = new Map(); + for (const url of Object.keys(data.targets)) { + linkStructure.set(url, new Set(data.targets[url])); + } + return linkStructure; +} + +/** + * @typedef {Object} LinkTarget + */ + +/** + * @typedef {Object} PageData + * @property {string} url + * @property {number} status + * @property {Map} targets + */ + +/** + * @param {Map} pages + * @param {string} outPath + * @returns {Promise} + */ +async function writePagesToFile(pages, outPath) { + /** @type {SerializedLinkStructure} */ + const fileContent = { targets: {} }; + for (const [url, pageData] of pages.entries()) { + fileContent.targets[url] = Array.from(pageData.targets.keys()); + } + await fs.writeFile(outPath, JSON.stringify(fileContent, null, 2), 'utf-8'); +} + +/** + * Polyfill for `node.computedName` available only in chrome v112+ + * @param {import('node-html-parser').HTMLElement | null} elm + * @param {import('node-html-parser').HTMLElement} ownerDocument + * @returns {string} + */ +function getAccessibleName(elm, ownerDocument) { + if (!elm) { + return ''; + } + + // 1. aria-label + const ariaLabel = elm.getAttribute('aria-label')?.trim(); + if (ariaLabel) { + return ariaLabel; + } + + // 2. aria-labelledby + const labelledby = elm.getAttribute('aria-labelledby'); + if (labelledby) { + const labels = []; + for (const id of labelledby.split(/\s+/)) { + const label = getAccessibleName(ownerDocument.getElementById(id), ownerDocument); + if (label) { + labels.push(label); + } + } + const label = labels.join(' ').trim(); + if (label) { + return label; + } + } + + // 3.