Update spelling-and-grammar.yml

gravitee-io · Feb 25, 2025 · 637bd76 · 637bd76
1 parent ca5e93f
commit 637bd76
Showing 1 changed file with 46 additions and 9 deletions.
diff --git a/.github/workflows/spelling-and-grammar.yml b/.github/workflows/spelling-and-grammar.yml
@@ -35,36 +35,62 @@ jobs:
           python3 <<EOF
           import re, os, language_tool_python
 
-          ignore_list = {line.strip().lower(): line.strip() for line in open(".github/spellcheck-ignore.txt", "r", encoding="utf-8")}
-          tool = language_tool_python.LanguageTool('en-US')
+          # Load the ignore list
+          ignore_list = {}
+          with open(".github/spellcheck-ignore.txt", "r", encoding="utf-8") as f:
+              for line in f:
+                  word = line.strip()
+                  ignore_list[word.lower()] = word  # Store lowercase -> correct-case
 
+          # Load LanguageTool
+          try:
+              tool = language_tool_python.LanguageTool('en-US')
+          except Exception:
+              print("Error: LanguageTool failed to initialize. Skipping grammar check.")
+              tool = None
+
+          # Function to check if a line is inside a code block or a comment
           def is_comment(line, inside_code_block, inside_block_comment):
               if re.match(r'^\s*```', line):  
                   return not inside_code_block, inside_block_comment, False
               if inside_code_block:
-                  return inside_code_block, inside_block_comment, bool(re.match(r'^\s*(#|//|\*/|\*\*)', line))
-              if re.search(r'/\*', line):
+                  return inside_code_block, inside_block_comment, bool(re.match(r'^\s*(#|//|\*|\*\*)', line))
+              if re.search(r'/\*', line):  # Start of multi-line block comment
                   return inside_code_block, True, False
-              if re.search(r'\*/', line):
+              if re.search(r'\*/', line):  # End of multi-line block comment
                   return inside_code_block, False, False
               return inside_code_block, inside_block_comment, False
 
+          # Function to check if a line contains a URL or a file path
           def is_code_or_url(line):
               return bool(re.search(r'https?://\S+|`.*?`|www\.\S+', line))
 
+          # Function to apply spellchecking
           def apply_spellcheck(sentence):
-              return " ".join([ignore_list.get(word.lower(), word) for word in sentence.split()])
+              words = sentence.split()
+              return " ".join([ignore_list.get(word.lower(), word) for word in words])
 
+          # Function to apply grammar corrections safely
           def apply_grammar(sentence):
+              if not tool:
+                  return sentence
               try:
                   matches = tool.check(sentence)
               except Exception:
                   return sentence
-              corrections = [(m.offset, m.context, m.replacements[0]) for m in matches if m.replacements and m.context.lower() not in ignore_list]
+              corrections = []
+              for match in matches:
+                  if match.replacements and match.context.lower() not in ignore_list:
+                      corrections.append((match.offset, match.context, match.replacements[0]))
+
+              # Apply corrections in reverse order to avoid offset issues
               for offset, original, replacement in sorted(corrections, key=lambda x: -x[0]):
-                  sentence = sentence[:offset] + replacement + sentence[offset + len(original):]
+                  if offset + len(original) <= len(sentence):
+                      sentence = sentence[:offset] + replacement + sentence[offset + len(original):]
+
               return sentence
 
+          # Process each file
           for root, _, files in os.walk("."):
               for file in files:
                   if file.endswith((".md", ".txt", ".py", ".js", ".java", ".cpp", ".ts")):
@@ -75,11 +101,22 @@ jobs:
                           for line in lines:
                               orig = line.strip()
                               inside_code, inside_block_comment, is_comment = is_comment(line, inside_code, inside_block_comment)
+
+                              # Skip grammar correction for code but apply to comments
                               if inside_code and not is_comment or inside_block_comment or not orig or is_code_or_url(orig):
                                   f.write(line)
                                   continue
+
+                              # Apply spellcheck
                               fixed = apply_spellcheck(orig)
-                              f.write(apply_grammar(fixed) + "\n")
+
+                              # Apply grammar correction
+                              corrected = apply_grammar(fixed)
+
+                              # Prevent punctuation issues
+                              corrected = corrected.replace("..", ".").replace(",.", ".").replace(" ,", ",")
+
+                              f.write(corrected + "\n")
           EOF
 
       - name: Create PR if Changes Exist