Skip to content

Commit

Permalink
Exempt some inaccessible links
Browse files Browse the repository at this point in the history
  • Loading branch information
Josh-Cena committed Aug 6, 2024
1 parent 29eac4e commit ec0cf57
Show file tree
Hide file tree
Showing 6 changed files with 117 additions and 91 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@ data/*
sidebars
docs/*.js
*.txt
!config/*.txt
.env
7 changes: 7 additions & 0 deletions config/inaccessible-links.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
https://codepen.io/
https://live.browserstack.com/dashboard
https://onlinelibrary.wiley.com/doi/{.*}
https://www.browserstack.com/{(users|accounts)/.*}
https://www.java.com/
https://www.openwebanalytics.com/
https://www.webpagetest.org/
33 changes: 22 additions & 11 deletions docs/warning-info.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,17 +17,17 @@ This rule checks for Markdown syntax that's supposed to turn into a `<dl>` but i
To fix this, check the source code. For example:

```diff
- Item1
- : Description1
- Item2
+ - : Description2

- Item1
- : Description1
- Item2
- : Description2
- - : Description3
+ Description3
- Item1
- : Description1
- Item2
+ - : Description2

- Item1
- : Description1
- Item2
- : Description2
- - : Description3
+ Description3
```

## Bad href
Expand Down Expand Up @@ -145,3 +145,14 @@ This rule reports pages that are unreachable via any in-page links (starting fro
## Unshortened bug link

This rule reports bug links where a known shortened version exists. For more information, see the [BCD lint rule](https://github.com/mdn/browser-compat-data/blob/main/lint/linter/test-links.ts).

In an editor, you can do the following regex replacements:

| Find | Replace |
| ----------------------------------------------------------------------------------------------------- | ------------------------- |
| `https?:\/\/bugzilla\.mozilla\.org\/show_bug\.cgi\?id=(\d+)` | `https://bugzil.la/$1` |
| `https?:\/\/(issues\.chromium\.org)\/issues\/(\d+)` | `https://crbug.com/$2` |
| `https?:\/\/(bugs\.chromium\.org\|code\.google\.com)\/p\/chromium\/issues\/detail\?id=(\d+)` | `https://crbug.com/$2` |
| `https?:\/\/(bugs\.chromium\.org\|code\.google\.com)\/p\/((?!chromium)\w+)\/issues\/detail\?id=(\d+)` | `https://crbug.com/$2/$3` |
| `https?:\/\/chromium\.googlesource\.com\/chromium\/src\/\+\/([\w\d]+)` | `https://crrev.com/$1` |
| `https?:\/\/bugs\.webkit\.org\/show_bug\.cgi\?id=(\d+)` | `https://webkit.org/b/$1` |
20 changes: 20 additions & 0 deletions src/server/config.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,23 @@
import Path from "node:path";

export const CONTENT_ROOT = Path.resolve(process.env.CONTENT_ROOT ?? "../content");

export async function readConfig(path: string) {
return (
await Bun.file(
Bun.fileURLToPath(
import.meta.resolve(`../../config/${path}`)
)
).text()
)
.split("\n")
.filter((x) => x && !x.startsWith(" "))
}

export function configHas<T>(config: Map<T, boolean>, key: T) {
if (config.has(key)) {
config.set(key, true);
return true;
}
return false;
}
23 changes: 4 additions & 19 deletions src/server/create-graph.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,10 @@ import FS from "node:fs/promises";
import Path from "node:path";
import { $ } from "bun";
import { load } from "cheerio";
import { CONTENT_ROOT } from "./config.js";
import { CONTENT_ROOT, readConfig, configHas } from "./config.js";

const allowedCodeLinkTextRec = new Map(
(
await Bun.file(
Bun.fileURLToPath(
import.meta.resolve("../../config/allowed-code-link-text.txt")
)
).text()
)
.split("\n")
.filter((x) => x && !x.startsWith(" "))
.map((x) => [x, false])
(await readConfig("allowed-code-link-text.txt")).map((x) => [x, false])
);

const allowedSpacedCodeLink = [
Expand Down Expand Up @@ -194,21 +185,15 @@ graph.forEachNode((node) => {
if (
code.includes(" ") &&
!allowedSpacedCodeLink.some((re) => re.test(code)) &&
!(
allowedCodeLinkTextRec.has(code) &&
(allowedCodeLinkTextRec.set(code, true), true)
) &&
!configHas(allowedCodeLinkTextRec, code) &&
// Canvas tutorial uses example code in DL, not worth fixing
!node.id.includes("Canvas_API/Tutorial")
) {
report(node, "Code with space", code);
} else if (
code.includes("_") &&
!allowedUnderscoreCodeLink.some((re) => re.test(code)) &&
!(
allowedCodeLinkTextRec.has(code) &&
(allowedCodeLinkTextRec.set(code, true), true)
)
!configHas(allowedCodeLinkTextRec, code)
) {
report(node, "Code with underscore", code);
}
Expand Down
124 changes: 63 additions & 61 deletions src/server/process-warnings.ts
Original file line number Diff line number Diff line change
@@ -1,61 +1,62 @@
import warnings from "../../data/warnings.json" with { type: "json" };
import nodes from "../../data/nodes.json" with { type: "json" };
import { readConfig, configHas } from "./config.js";

const missingFeatures = new Set(
(
await Bun.file(
Bun.fileURLToPath(
import.meta.resolve("../../config/missing-features.txt")
)
).text()
)
.split("\n")
.filter((x) => x && !x.startsWith(" "))
.map((x) => {
// JS has no undocumented things
if (x.startsWith("javascript.")) return;
const [scope, interfac, member, ...rest] = x.split(".");
if (rest.length) {
console.error("Unexpected data:", x);
return;
}
if (!member && scope !== "api" && scope !== "webassembly") {
console.error("Unexpected data:", x);
return;
}
switch (scope) {
case "api":
if (!member) return `/en-US/docs/Web/API/${interfac}`;
return `/en-US/docs/Web/API/${interfac}/${member}`;
case "css":
return `/en-US/docs/Web/CSS/${member}`;
case "http":
if (interfac === "headers") {
return `/en-US/docs/Web/HTTP/Headers/${member}`;
}
break;
case "webdriver":
if (interfac === "commands") {
return `/en-US/docs/Web/WebDriver/Commands/${member}`;
}
break;
case "webassembly":
// Not structured enough
return;
}
(await readConfig("missing-features.txt")).map((x) => {
// JS has no undocumented things
if (x.startsWith("javascript.")) return;
const [scope, interfac, member, ...rest] = x.split(".");
if (rest.length) {
console.error("Unexpected data:", x);
return;
}
if (!member && scope !== "api" && scope !== "webassembly") {
console.error("Unexpected data:", x);
})
return;
}
switch (scope) {
case "api":
if (!member) return `/en-US/docs/Web/API/${interfac}`;
return `/en-US/docs/Web/API/${interfac}/${member}`;
case "css":
return `/en-US/docs/Web/CSS/${member}`;
case "http":
if (interfac === "headers") {
return `/en-US/docs/Web/HTTP/Headers/${member}`;
}
break;
case "webdriver":
if (interfac === "commands") {
return `/en-US/docs/Web/WebDriver/Commands/${member}`;
}
break;
case "webassembly":
// Not structured enough
return;
}
console.error("Unexpected data:", x);
})
);

const noPageRec = new Map(
(
await Bun.file(
Bun.fileURLToPath(import.meta.resolve("../../config/no-page.txt"))
).text()
)
.split("\n")
.filter((x) => x && !x.startsWith(" "))
.map((x) => [x, false])
const noPage = new Map(
(await readConfig("no-page.txt")).map((x) => [x, false])
);

const knownInaccessibleLinks = new Map(
(await readConfig("inaccessible-links.txt")).map((x) => [
new RegExp(
`^${x
.split(/(\{.*\})/)
.map((part, i) =>
i % 2 === 0
? part.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")
: part.slice(1, -1)
)
.join("")}$`
),
false,
])
);

for (const node of nodes) {
Expand All @@ -80,11 +81,7 @@ for (const node of nodes) {
} else if (id === "macros") {
if (d.explanation.endsWith("does not exist")) {
const url = d.explanation.replace(" does not exist", "");
if (missingFeatures.has(url)) return;
else if (noPageRec.has(url)) {
noPageRec.set(url, true);
return;
}
if (missingFeatures.has(url) || configHas(noPage, url)) return;
}
} else if (id === "images") {
if (
Expand Down Expand Up @@ -192,7 +189,7 @@ const bugLinkShorteners: [RegExp, string][] = [
];

for (const node of nodes) {
for (const link of node.data.links) {
visitLinks: for (const link of node.data.links) {
if (/^https:\/\/(jsfiddle\.net|codepen\.io|jsbin\.com)\/./.test(link)) {
report(node, "External sandbox link", link);
continue;
Expand Down Expand Up @@ -229,6 +226,7 @@ for (const node of nodes) {
"https://caniuse.com",
"https://chromestatus.com",
"https://chromium.googlesource.com",
"https://web.archive.org",
// Youtube uses queries, so there's no real 404
"https://www.youtube.com",
"https://youtu.be",
Expand All @@ -241,6 +239,12 @@ for (const node of nodes) {
) {
continue;
}
for (const [regex, _] of knownInaccessibleLinks) {
if (regex.test(link)) {
knownInaccessibleLinks.set(regex, true);
continue visitLinks;
}
}
if (link.startsWith("http")) {
const url = new URL(link);
url.hash = "";
Expand Down Expand Up @@ -350,9 +354,7 @@ for (const [nodeId, baseMessages] of warningList) {
(
x.message === "Missing href" ||
(x.message === "Broken link" &&
(missingFeatures.has(x.data[0]) ||
(noPageRec.has(x.data[0]) &&
(noPageRec.set(x.data[0], true), true))))
(missingFeatures.has(x.data[0]) || configHas(noPage, x.data[0])))
)
)
);
Expand All @@ -370,7 +372,7 @@ Bun.write("data/warnings-processed.json", JSON.stringify(tree, null, 2));

brokenAnchorsWriter.end();

for (const [url, used] of noPageRec) {
for (const [url, used] of noPage) {
if (!used) {
console.error(`${url} is no longer referenced`);
}
Expand Down

0 comments on commit ec0cf57

Please sign in to comment.