Skip to content

Commit

Permalink
ci: add link checker
Browse files Browse the repository at this point in the history
  • Loading branch information
Cahllagerfeld committed Jun 7, 2024
1 parent 2cdc593 commit b4d0204
Show file tree
Hide file tree
Showing 4 changed files with 108 additions and 1 deletion.
33 changes: 33 additions & 0 deletions .github/workflows/check-links.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
name: Check Links

on:
workflow_dispatch:
push:
branches:
- main
- future
pull_request:
types: [opened, synchronize, ready_for_review]
branches:
- main
- future
concurrency:
# New commit on branch cancels running workflows of the same branch
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true

jobs:
check:
if: github.event.pull_request.draft == false
runs-on: ubuntu-latest
steps:
- name: Checkout Repository
uses: actions/checkout@v4

- name: Set up Node.js
uses: actions/setup-node@v4
with:
node-version: "20"

- name: Check Links
run: bash scripts/check-links.sh
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -28,4 +28,6 @@ dist-ssr
/playwright/.cache/

.env*
!.env.example
!.env.example

urls.txt
50 changes: 50 additions & 0 deletions scripts/check-links.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import fs from "fs";
import { exit } from "process";

const allowedStatuses = [401, 403, 405];

const ignoreList = [];

const headers = new Headers({
"User-Agent": "ZenMLURLBot/1.0 (+http://zenml.io/bot)"
});

const noIndexRegex = /content="noindex"/gi;
const docsRegex = /docs.zenml.io/gi;

async function checkLink() {
let hasFailed = false;

const links = fs.readFileSync("urls.txt", "utf-8").split("\n").filter(Boolean);
for (const link of links) {
if (ignoreList.includes(link)) {
console.log("\x1b[33m", `Ignoring ${link}`);
continue;
}
try {
const response = await fetch(link, { method: "GET", headers });
const payload = await response.text();
const hasNoIndex = noIndexRegex.test(payload);

if (hasNoIndex && docsRegex.test(link)) {
console.log("\x1b[31m", `[${response.status}] ${link}`);
hasFailed = true;
continue;
}

if (response.ok || allowedStatuses.includes(response.status)) {
console.log("\x1b[32m", `[${response.status}] ${link}`);
} else {
console.log("\x1b[31m", `[${response.status}] ${link}`);
hasFailed = true;
}
} catch (error) {
console.error("\x1b[31m", `Error fetching ${link}: ${error}`);
hasFailed = true;
}
}

exit(hasFailed ? 1 : 0);
}

checkLink();
22 changes: 22 additions & 0 deletions scripts/check-links.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#!/bin/bash

# Define the file patterns to search for URLs
file_patterns=("*.json" "*.tsx" "*.ts")

# Define the output file for the URLs
output_file="urls.txt"

# Find unique URLs matching the specified pattern in the specified file types
find_unique_urls() {
include_patterns=""
for pattern in "${file_patterns[@]}"; do
include_patterns+="--include=${pattern} "
done
grep -E -o 'https?:\/\/([a-zA-Z0-9.-]*\.)?zenml\.io[^"'\''[:space:]]*' -r --no-filename $include_patterns "$@" | sort -u
}

# Find unique URLs in the specified file patterns within the "src" directory
find_unique_urls src legacy | sort -u > "$output_file"

# Run the link checker script
node scripts/check-links.js

0 comments on commit b4d0204

Please sign in to comment.