|
| 1 | +const fs = require('fs/promises') |
| 2 | +const path = require('path') |
| 3 | +const unified = require('unified') |
| 4 | +const markdown = require('remark-parse') |
| 5 | +const remarkToRehype = require('remark-rehype') |
| 6 | +const raw = require('rehype-raw') |
| 7 | +const visit = require('unist-util-visit') |
| 8 | +const matter = require('gray-matter') |
| 9 | +const GithubSlugger = require('github-slugger') |
| 10 | +import type { Node, Data } from 'unist' |
| 11 | +/** |
| 12 | + * This script validates internal links in /docs including internal, |
| 13 | + * hash, source and related links. It does not validate external links. |
| 14 | + * 1. Collects all .mdx files. |
| 15 | + * 2. For each file, it extracts the content, metadata, and heading slugs. |
| 16 | + * 3. It creates a document map to efficiently lookup documents by path. |
| 17 | + * 4. It then traverses each document modified in the PR and... |
| 18 | + * - Checks if each internal link points |
| 19 | + * to an existing document |
| 20 | + * - Validates hash links (links starting with "#") against the list of |
| 21 | + * headings in the current document. |
| 22 | + * - Checks the source and related links found in the metadata of each |
| 23 | + * document. |
| 24 | + * 5. Any broken links discovered during these checks are categorized and a |
| 25 | + * comment is added to the PR. |
| 26 | + */ |
| 27 | + |
| 28 | +interface Document { |
| 29 | + body: string |
| 30 | + path: string |
| 31 | + slug: string |
| 32 | + headings: string[] |
| 33 | + sidebarDepth?: number |
| 34 | +} |
| 35 | + |
| 36 | +interface Errors { |
| 37 | + doc: Document |
| 38 | + link: string[] |
| 39 | + hash: string[] |
| 40 | + source: string[] |
| 41 | + related: string[] |
| 42 | +} |
| 43 | + |
| 44 | +type ErrorType = Exclude<keyof Errors, 'doc'> |
| 45 | + |
| 46 | +/** Label: Is the text that will be displayed in the sidebar */ |
| 47 | +type RouteSchema = {source: string, label: string, slug: string} |
| 48 | + |
| 49 | +/** |
| 50 | +* Source: Is the path to the .mdx file |
| 51 | +* |
| 52 | +* Slug: Is the route we will use to access the page in the browser |
| 53 | +*/ |
| 54 | +type RouteFragment = Omit<RouteSchema, 'label'> |
| 55 | + |
| 56 | +type FooterConfigSchema = Array<RouteSchema | |
| 57 | +{ |
| 58 | + "to": string, |
| 59 | + "label": string, |
| 60 | + "slug": string |
| 61 | +}> |
| 62 | + |
| 63 | +type ConfigSchema = Array<{title: string, slug: string, routes: Array<RouteSchema>}> |
| 64 | +type FailureFunction = (message: string) => void |
| 65 | + |
| 66 | +const RELATIVE_PATH = '/' |
| 67 | +const EXCLUDED_HASHES: string[] = [] |
| 68 | + |
| 69 | +const slugger = new GithubSlugger() |
| 70 | + |
| 71 | +// Collect the paths of all .mdx files present in the config files |
| 72 | +async function getAllMdxFilePaths(basePath: string): Promise<RouteFragment[]> { |
| 73 | + const sidebarLearn: ConfigSchema = JSON.parse(await fs.readFile(path.join(basePath, 'config/sidebar-learn.json'), 'utf8')) |
| 74 | + const sidebarReference: ConfigSchema = JSON.parse(await fs.readFile(path.join(basePath, 'config/sidebar-reference.json'), 'utf8')) |
| 75 | + const footer: FooterConfigSchema = JSON.parse(await fs.readFile(path.join(basePath, 'config/sidebar-footer.json'), 'utf8')) |
| 76 | + |
| 77 | + const config = [ |
| 78 | + ...sidebarLearn.map(group => ({...group, slug: path.join('learn', group.slug)})), |
| 79 | + ...sidebarReference.map(group => ({...group, slug: path.join('reference/', group.slug)})) |
| 80 | + ] |
| 81 | + |
| 82 | + let allRoutes: RouteSchema[] = [{source: path.join(basePath, 'home.mdx'), slug: '', label: 'Homepage'}] |
| 83 | + for (const group of config) { |
| 84 | + allRoutes = allRoutes.concat(group.routes.map(route => ({ |
| 85 | + ...route, |
| 86 | + slug: path.join(group.slug, route.slug), |
| 87 | + source: path.join(basePath, route.source) |
| 88 | + }))) |
| 89 | + } |
| 90 | + footer.forEach(item => 'source' in item && allRoutes.push({...item, source: path.join(basePath, item.source)})) |
| 91 | + |
| 92 | + return allRoutes |
| 93 | +} |
| 94 | + |
| 95 | +// Returns the slugs of all headings in a tree |
| 96 | +function getHeadingsFromMarkdownTree(tree: Node<Data>): string[] { |
| 97 | + const headings: string[] = [] |
| 98 | + slugger.reset() |
| 99 | + |
| 100 | + visit(tree, 'heading', (node: Node<Data>) => { |
| 101 | + let headingText = '' |
| 102 | + // Account for headings with inline code blocks by concatenating the |
| 103 | + // text values of all children of a heading node. |
| 104 | + visit(node, (node: any) => { |
| 105 | + if (node.value) { |
| 106 | + headingText += node.value |
| 107 | + } |
| 108 | + }) |
| 109 | + headings.push(slugger.slug(headingText)) |
| 110 | + }) |
| 111 | + |
| 112 | + return headings |
| 113 | +} |
| 114 | + |
| 115 | +// Create a processor to parse MDX content |
| 116 | +const markdownProcessor = unified() |
| 117 | + .use(markdown) |
| 118 | + .use(remarkToRehype, { allowDangerousHTML: true }) |
| 119 | + .use(raw) |
| 120 | + .use(function compiler() { |
| 121 | + // A compiler is required, and we only need the AST, so we can |
| 122 | + // just return it. |
| 123 | + // @ts-ignore |
| 124 | + this.Compiler = function treeCompiler(tree) { |
| 125 | + return tree |
| 126 | + } |
| 127 | + }) |
| 128 | + |
| 129 | +// use Map for faster lookup |
| 130 | +let documentMap: Map<string, Document> |
| 131 | + |
| 132 | +// Create a map of documents with their paths as keys and |
| 133 | +// document content and metadata as values |
| 134 | +// The key varies between doc pages and error pages |
| 135 | +// error pages: `/docs/messages/example` |
| 136 | +// doc pages: `api/example` |
| 137 | +async function prepareDocumentMapEntry( |
| 138 | + route: RouteFragment, |
| 139 | + setFailed: FailureFunction |
| 140 | +): Promise<[string, Document]> { |
| 141 | + try { |
| 142 | + const mdxContent = await fs.readFile(route.source, 'utf8') |
| 143 | + const { content, data } = matter(mdxContent) |
| 144 | + const tree = markdownProcessor.parse(content) |
| 145 | + const headings = getHeadingsFromMarkdownTree(tree) |
| 146 | + |
| 147 | + return [ |
| 148 | + route.slug, |
| 149 | + { body: content, path: route.source, slug: route.slug, headings, ...data }, |
| 150 | + ] |
| 151 | + } catch (error) { |
| 152 | + setFailed(`Error preparing document map for file ${route}: ${error}`) |
| 153 | + return ['', {} as Document] |
| 154 | + } |
| 155 | +} |
| 156 | + |
| 157 | +// Checks if the links point to existing documents |
| 158 | +function validateInternalLink(errors: Errors, href: string): void { |
| 159 | + // /docs/api/example#heading -> ["api/example", "heading""] |
| 160 | + const [link, hash] = href.split('#') |
| 161 | + |
| 162 | + // check if doc page exists |
| 163 | + const foundPage = documentMap.get(link.replace(/^\/+/, '')) |
| 164 | + |
| 165 | + |
| 166 | + if (!foundPage) { |
| 167 | + errors.link.push(href) |
| 168 | + } else if (hash && !EXCLUDED_HASHES.includes(hash)) { |
| 169 | + // TODO: Check if this block is still needed |
| 170 | + // // Account for documents that pull their content from another document |
| 171 | + // const foundPageSource = foundPage.source |
| 172 | + // ? documentMap.get(foundPage.source) |
| 173 | + // : undefined |
| 174 | + |
| 175 | + // Check if the hash link points to an existing section within the document |
| 176 | + // const hashFound = (foundPageSource || foundPage).headings.includes(hash) |
| 177 | + const hashFound = foundPage.headings.includes(hash) |
| 178 | + |
| 179 | + if (!hashFound) { |
| 180 | + errors.hash.push(href) |
| 181 | + } |
| 182 | + } |
| 183 | +} |
| 184 | + |
| 185 | +// Checks if the hash links point to existing sections within the same document |
| 186 | +function validateHashLink(errors: Errors, href: string, doc: Document): void { |
| 187 | + const hashLink = href.replace('#', '') |
| 188 | + |
| 189 | + if (!EXCLUDED_HASHES.includes(hashLink) && !doc.headings.includes(hashLink)) { |
| 190 | + errors.hash.push(href) |
| 191 | + } |
| 192 | +} |
| 193 | + |
| 194 | +// Checks if the source link points to an existing document |
| 195 | +function validateSourceLinks(doc: Document, errors: Errors): void { |
| 196 | + if (doc.slug && !documentMap.get(doc.slug)) { |
| 197 | + errors.source.push(doc.path) |
| 198 | + } |
| 199 | +} |
| 200 | + |
| 201 | +// Traverse the document tree and validate links |
| 202 | +function traverseTreeAndValidateLinks(tree: any, doc: Document, setFailed: FailureFunction): Errors { |
| 203 | + const errors: Errors = { |
| 204 | + doc, |
| 205 | + link: [], |
| 206 | + hash: [], |
| 207 | + source: [], |
| 208 | + related: [], |
| 209 | + } |
| 210 | + |
| 211 | + try { |
| 212 | + visit(tree, (node: any) => { |
| 213 | + if (node.type === 'element' && node.tagName === 'a') { |
| 214 | + const href = node.properties.href |
| 215 | + |
| 216 | + if (!href) return |
| 217 | + |
| 218 | + if (href.startsWith(RELATIVE_PATH)) { |
| 219 | + validateInternalLink(errors, href) |
| 220 | + } else if (href.startsWith('#')) { |
| 221 | + validateHashLink(errors, href, doc) |
| 222 | + } |
| 223 | + } |
| 224 | + }) |
| 225 | + |
| 226 | + validateSourceLinks(doc, errors) |
| 227 | + } catch (error) { |
| 228 | + setFailed('Error traversing tree: ' + error) |
| 229 | + } |
| 230 | + |
| 231 | + return errors |
| 232 | +} |
| 233 | + |
| 234 | +const formatTableRow = ( |
| 235 | + link: string, |
| 236 | + errorType: ErrorType, |
| 237 | + docPath: string, |
| 238 | + sha?: string |
| 239 | +) => { |
| 240 | + if (process.argv[2] === '--run-local-checker') return `| ${link} | ${errorType} | /${docPath} | \n` |
| 241 | + return `| ${link} | ${errorType} | [/${docPath}](https://github.com/meilisearch/documentation/blob/${sha}/${docPath}) | \n` |
| 242 | +} |
| 243 | + |
| 244 | +// Main function that triggers link validation across .mdx files |
| 245 | +export async function validateAllInternalLinks(basePath: string, setFailed: FailureFunction, sha?: string, useComment?: (comment: string, errorsExist: boolean) => Promise<void>): Promise<void> { |
| 246 | + try { |
| 247 | + const allMdxFilePaths = await getAllMdxFilePaths(basePath) |
| 248 | + |
| 249 | + documentMap = new Map( |
| 250 | + await Promise.all(allMdxFilePaths.map(route => prepareDocumentMapEntry(route, setFailed))) |
| 251 | + ) |
| 252 | + |
| 253 | + const docProcessingPromises = allMdxFilePaths.map(async (route) => { |
| 254 | + const doc = documentMap.get(route.slug) |
| 255 | + if (doc) { |
| 256 | + const tree = (await markdownProcessor.process(doc.body)).contents |
| 257 | + return traverseTreeAndValidateLinks(tree, doc, setFailed) |
| 258 | + } else { |
| 259 | + return { |
| 260 | + doc: {} as Document, |
| 261 | + link: [], |
| 262 | + hash: [], |
| 263 | + source: [], |
| 264 | + related: [], |
| 265 | + } as Errors |
| 266 | + } |
| 267 | + }) |
| 268 | + |
| 269 | + const allErrors = await Promise.all(docProcessingPromises) |
| 270 | + |
| 271 | + let errorsExist = false |
| 272 | + |
| 273 | + let errorRows: string[] = [] |
| 274 | + |
| 275 | + const errorTypes: ErrorType[] = ['link', 'hash', 'source', 'related'] |
| 276 | + allErrors.forEach((errors) => { |
| 277 | + const { |
| 278 | + doc: { path: docPath }, |
| 279 | + } = errors |
| 280 | + |
| 281 | + errorTypes.forEach((errorType) => { |
| 282 | + if (errors[errorType].length > 0) { |
| 283 | + errorsExist = true |
| 284 | + errors[errorType].forEach((link) => { |
| 285 | + errorRows.push(formatTableRow(link, errorType, docPath, sha)) |
| 286 | + }) |
| 287 | + } |
| 288 | + }) |
| 289 | + }) |
| 290 | + |
| 291 | + const errorComment = [ |
| 292 | + 'Hi there :wave:\n\nIt looks like this PR introduces broken links to the docs, please take a moment to fix them before merging:\n\n| Broken link | Type | File | \n| ----------- | ----------- | ----------- | \n', |
| 293 | + ...errorRows, |
| 294 | + '\nThank you :pray:', |
| 295 | + ].join('') |
| 296 | + |
| 297 | + if (errorsExist) { |
| 298 | + await useComment?.(errorComment, errorsExist) |
| 299 | + const errorTableData = allErrors.flatMap((errors) => { |
| 300 | + const { doc } = errors |
| 301 | + |
| 302 | + return errorTypes.flatMap((errorType) => |
| 303 | + errors[errorType].map((link) => ({ |
| 304 | + docPath: doc.path, |
| 305 | + errorType, |
| 306 | + link, |
| 307 | + })) |
| 308 | + ) |
| 309 | + }) |
| 310 | + |
| 311 | + console.log('This PR introduces broken links to the docs:') |
| 312 | + console.table(errorTableData, ['link', 'errorType', 'docPath']) |
| 313 | + } else { |
| 314 | + await useComment?.('All broken links are now fixed, thank you!', errorsExist) |
| 315 | + console.log("This PR doesn't introduce any broken links to the docs. :D") |
| 316 | + } |
| 317 | + } catch (error) { |
| 318 | + setFailed('Error validating internal links: ' + error) |
| 319 | + } |
| 320 | +} |
| 321 | + |
| 322 | +if (process.argv[2] === '--run-local-checker') { |
| 323 | + validateAllInternalLinks('../../../', (message) => {throw new Error(message)}) |
| 324 | +} |
0 commit comments