-
Notifications
You must be signed in to change notification settings - Fork 15
138 lines (112 loc) · 5.6 KB
/
spellcheck.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
name: Manual Spellcheck & AI Grammar Correction with Interactive Review
on:
workflow_dispatch:
permissions:
contents: write
pull-requests: write
jobs:
spellcheck_review:
runs-on: ubuntu-latest
steps:
- name: Checkout Repository
uses: actions/checkout@v4
- name: Install Dependencies
run: |
sudo apt update && sudo apt install -y default-jre
export JAVA_HOME=/usr/lib/jvm/java-11-openjdk-amd64
pip install codespell fuzzywuzzy[speedup] sentence-splitter --upgrade lxml
pip install git+https://github.com/jxmorris12/language_tool_python.git
- name: Verify Ignore List Exists
run: |
if [ ! -f .github/spellcheck-ignore.txt ]; then
echo "Error: spellcheck-ignore.txt not found!" && exit 1
fi
- name: Run Spellcheck and Generate Review File
run: |
set -e
python3 <<EOF
import re, os, language_tool_python
# ✅ Load the ignore list
ignore_list = {}
with open(".github/spellcheck-ignore.txt", "r", encoding="utf-8") as f:
for line in f:
word = line.strip()
ignore_list[word.lower()] = word # Preserve case sensitivity
# ✅ Load LanguageTool (error handling included)
try:
tool = language_tool_python.LanguageTool('en-US')
except Exception:
print("Error: LanguageTool failed to initialize. Skipping grammar check.")
tool = None
# ✅ Function to check if a line is inside a code block or comment
def is_comment(line, inside_code_block, inside_block_comment):
if re.match(r'^\s*```', line):
return not inside_code_block, inside_block_comment, False
if inside_code_block:
return inside_code_block, inside_block_comment, bool(re.match(r'^\s*(#|//|\*)', line))
if re.search(r'/\*', line): # Start of multi-line block comment
return inside_code_block, True, False
if re.search(r'\*/', line): # End of multi-line block comment
return inside_code_block, False, False
return inside_code_block, inside_block_comment, False
# ✅ Function to check if a line contains a URL or file path
def is_code_or_url(line):
return bool(re.search(r"https?://\\S+|`.*?`|www\\.\\S+", line)) # Properly escaped
# ✅ Function to apply spellchecking
def apply_spellcheck(sentence):
words = sentence.split()
return " ".join([ignore_list.get(word.lower(), word) for word in words])
# ✅ Function to apply grammar corrections safely
def apply_grammar(sentence):
if not tool:
return sentence
try:
matches = tool.check(sentence)
except Exception:
return sentence
corrections = []
for match in matches:
if match.replacements and match.context.lower() not in ignore_list:
corrections.append((match.offset, match.context, match.replacements[0]))
# Apply corrections in reverse order to prevent offset shifting
for offset, original, replacement in sorted(corrections, key=lambda x: -x[0]):
if offset + len(original) <= len(sentence):
sentence = sentence[:offset] + replacement + sentence[offset + len(original):]
return sentence
# ✅ Process each file
changes = []
for root, _, files in os.walk("."):
for file in files:
if file.endswith((".md", ".txt", ".py", ".js", ".java", ".cpp", ".ts")):
path = os.path.join(root, file)
lines = open(path, "r", encoding="utf-8").readlines()
inside_code, inside_block_comment = False, False
for line in lines:
orig = line.strip()
inside_code, inside_block_comment, is_comment = is_comment(line, inside_code, inside_block_comment)
# ✅ Skip non-editable lines
if inside_code and not is_comment or inside_block_comment or not orig or is_code_or_url(orig):
continue
# ✅ Apply spellcheck & grammar correction
fixed = apply_spellcheck(orig)
corrected = apply_grammar(fixed)
# ✅ Prevent punctuation issues
corrected = corrected.replace("..", ".").replace(",.", ".").replace(" ,", ",")
if corrected != orig:
changes.append((path, orig, corrected))
# ✅ Create review file
if changes:
with open("spellcheck_review.md", "w", encoding="utf-8") as f:
f.write("# Spellcheck & Grammar Fixes Review\n\n")
for i, (file, original, corrected) in enumerate(changes):
f.write(f"### Change {i+1}\n")
f.write(f"**File:** `{file}`\n")
f.write(f"**Original:** `{original}`\n")
f.write(f"**Suggested:** `{corrected}`\n")
f.write("Approve? (yes/no)\n\n")
EOF
- name: Upload Review File
uses: actions/upload-artifact@v4
with:
name: spellcheck-review
path: spellcheck_review.md