Manual Spellcheck Review & Approval #175
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Manual Spellcheck Review & Approval | |
on: | |
workflow_dispatch: # Runs only when manually triggered | |
permissions: | |
contents: write # Needed to push changes after approval | |
jobs: | |
spellcheck_review: | |
runs-on: ubuntu-latest | |
steps: | |
- name: Checkout Repository | |
uses: actions/checkout@v4 | |
- name: Install Dependencies | |
run: | | |
pip install codespell | |
pip install fuzzywuzzy[speedup] | |
pip install sentence-splitter | |
- name: Verify Spellcheck Ignore List Exists | |
run: | | |
if [ ! -f .github/spellcheck-ignore.txt ]; then | |
echo "❌ Error: spellcheck-ignore.txt not found!" | |
exit 1 | |
fi | |
- name: Run Spellcheck and Generate Corrections | |
run: | | |
set -e # Exit on error | |
python3 <<EOF | |
import re | |
import os | |
import subprocess | |
from fuzzywuzzy import process | |
from sentence_splitter import SentenceSplitter | |
# Load ignore list with correct case preservation | |
ignore_list = {} | |
ignore_file = ".github/spellcheck-ignore.txt" | |
if os.path.exists(ignore_file): | |
with open(ignore_file, "r", encoding="utf-8") as f: | |
ignore_list = {word.strip().lower(): word.strip() for word in f.readlines()} | |
# Context phrases to prioritize in spellcheck corrections | |
context_phrases = { | |
"identity provider": ["identiy provider", "identify provider"], | |
"access token": ["access toekn", "acess token"], | |
"user authentication": ["user authentification", "user authenthication"], | |
"API gateway": ["API getway", "API gatway"] | |
} | |
# Function to check if a word is inside a Markdown link | |
def is_inside_markdown_link(line, original): | |
return bool(re.search(r'\[.*?\]\(.*' + re.escape(original) + r'.*\)', line)) | |
# Function to check if a word is in a title or hint block | |
def is_in_title_or_hint(line): | |
return bool(re.search(r'{% (tab title|hint style)=', line)) | |
# Function to check if a match is a full-word match | |
def is_full_word_match(misspelled, correct): | |
return misspelled.lower() == correct.lower() # Ensure full word match | |
# Define absolute path for corrections file | |
repo_root = os.path.abspath(os.getcwd()) | |
corrections_dir = os.path.join(repo_root, ".github/corrections") | |
corrections_path = os.path.join(corrections_dir, "corrections.txt") | |
# Ensure corrections directory exists | |
os.makedirs(corrections_dir, exist_ok=True) | |
# Run codespell and capture output | |
result = subprocess.run([ | |
"codespell", | |
"--ignore-words=.github/spellcheck-ignore.txt", | |
"--skip=.git,*.lock,*.json,*.yaml,*.yml,*.css,*.html", | |
"--quiet-level=2" | |
], capture_output=True, text=True, check=False) | |
spellcheck_output = result.stdout.strip() | |
if not spellcheck_output: | |
print("✅ No spelling corrections found. Exiting.") | |
exit(0) | |
# Save corrections for manual review | |
with open(corrections_path, "w", encoding="utf-8") as f: | |
for line in spellcheck_output.splitlines(): | |
match = re.match(r"(.*):(\d+): (\S+) ==> (\S+)", line) | |
if match: | |
file_path, line_number, original, suggestion = match.groups() | |
# Read the full line for context | |
with open(file_path, "r", encoding="utf-8") as file: | |
lines = file.readlines() | |
context_line = lines[int(line_number) - 1].strip() if 0 <= int(line_number) - 1 < len(lines) else "" | |
# ✅ Ignore words inside Markdown links | |
if is_inside_markdown_link(context_line, original): | |
continue | |
# ✅ Always enforce exact case of ignore words in valid regions | |
if original.lower() in ignore_list: | |
suggestion = ignore_list[original.lower()] # Replace with exact capitalization | |
# ✅ Ensure 90% match rule only applies to full words | |
else: | |
best_match, score = process.extractOne(original, ignore_list.keys()) if ignore_list else (None, 0) | |
if best_match and score >= 90 and is_full_word_match(original, best_match): | |
suggestion = ignore_list[best_match] # Replace with exact capitalization from ignore list | |
# ✅ If a correction is an ignore word but capitalization is wrong, fix it | |
if suggestion.lower() in ignore_list and suggestion != ignore_list[suggestion.lower()]: | |
suggestion = ignore_list[suggestion.lower()] # Enforce correct capitalization | |
# ✅ Replace entire phrase if a context match is found | |
for correct_phrase, wrong_variants in context_phrases.items(): | |
for wrong_phrase in wrong_variants: | |
if wrong_phrase in context_line: | |
suggestion = correct_phrase # Replace entire phrase | |
# ✅ Prevent punctuation modifications | |
if suggestion.endswith((",", ".", ";")) and not original.endswith((",", ".", ";")): | |
suggestion = suggestion.rstrip(",.;") | |
f.write(f"File: {file_path}, Line: {line_number}\n") | |
f.write(f"**Original:** {original}\n") | |
f.write(f"**Suggested:** {suggestion}\n") | |
f.write("Approve? (yes/no)\n\n") | |
print(f"✅ {len(spellcheck_output.splitlines())} corrections generated. Review in `{corrections_path}` and upload the approved version.") | |
EOF | |
- name: Upload Corrections for Review | |
uses: actions/upload-artifact@v4 | |
with: | |
name: corrections_review | |
path: .github/corrections/corrections.txt | |
- name: Wait for Review and Approval | |
run: echo "⏳ Please review and approve the corrections. Re-upload `corrections.txt` when ready." | |
apply_approved_corrections: | |
needs: spellcheck_review | |
runs-on: ubuntu-latest | |
steps: | |
- name: Checkout Repository | |
uses: actions/checkout@v4 | |
- name: Debug List Corrections Directory | |
run: | | |
echo "📂 Listing .github/corrections/ directory contents:" | |
ls -lah .github/corrections/ | |
- name: Debug Check File Path | |
run: | | |
echo "Checking if corrections.txt exists..." | |
if [ -f "$GITHUB_WORKSPACE/.github/corrections/corrections.txt" ]; then | |
echo "✅ Found corrections.txt at $GITHUB_WORKSPACE/.github/corrections/corrections.txt" | |
else | |
echo "❌ Error: Reviewed corrections file NOT found at $GITHUB_WORKSPACE/.github/corrections/corrections.txt!" | |
exit 1 | |
fi | |
- name: Apply Approved Corrections | |
run: | | |
python3 .github/scripts/apply_corrections.py || { echo "❌ Failed to apply corrections!"; exit 1; } | |
- name: Commit and Push Changes | |
env: | |
GITHUB_TOKEN: ${{ secrets.PAT_GITHUB_ACTIONS }} | |
run: | | |
git config --global user.name "github-actions[bot]" | |
git config --global user.email "github-actions[bot]@users.noreply.github.com" | |
if git diff --quiet; then | |
echo "✅ No changes detected after approval. Exiting." | |
exit 0 | |
fi | |
git checkout -b spellcheck-approved-fixes | |
git add . | |
git commit -m "✅ Spellcheck & Grammar Fixes (Manually Approved)" | |
git push origin spellcheck-approved-fixes | |
gh pr create --base main --head spellcheck-approved-fixes --title "Spellcheck & Grammar Fixes (Approved)" --body "This PR contains manually approved spellcheck and grammar fixes." |