From c922e11aab768403eb1c9b3497a655c918f5666a Mon Sep 17 00:00:00 2001
From: Ludwig Kent <124366668+Gavin-WangSC@users.noreply.github.com>
Date: Thu, 10 Apr 2025 23:29:52 +0800
Subject: [PATCH 1/5] feat: Added LaTeX error modification using
 deepseek-reasoner

---
 writer.py | 167 +++++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 160 insertions(+), 7 deletions(-)

diff --git a/writer.py b/writer.py
index b8fe6f5..3526ee0 100644
--- a/writer.py
+++ b/writer.py
@@ -7,6 +7,9 @@
 import os
 import glob
 import yaml
+import re
+import subprocess
+import tempfile
 
 path_to = f'src/content/blog/{datetime.datetime.now().strftime("%Y-%m-%d")}'
 
@@ -84,25 +87,164 @@ def outline(topic):
 def write_from_outline(outline):
     global deepseek, existing_posts_text
     return generate([
-        {"role": "system", "content": "你是一位专业技术博客作者。在写作时请遵循以下中文排版规范：1) 中文与英文、数字之间需要有空格；2) 中文标点与英文、数字之间不加空格；3) 使用全角中文标点；4) 专有名词大小写正确；5) 英文、数字使用半角字符；6) 使用直角引号「」。"},
-        {"role": "user", "content": f"{outline}\n\n根据这个提纲中关于技术知识的部分，写出一篇技术博客文章。文章中避免出现图片，避免使用列表。每一段出现的代码都进行较为详细的解读。在讲述内容时尽量使用段落的语言，语言风格可以略偏专业，但保持清晰。使用Markdown（要求符合Common Markdown规范）输出，使用LaTeX公式（注意：数学的开闭定界符前后不能有字母或数字字符。像x$a + b = c$或$a + b = c$1将无法渲染为数学公式（所有$会被渲染为$）；但x $\\infty$ 1和($\\infty$)会正常渲染），标题尽量只用一级标题 `#` 和二级标题 `##`，不要用分割线。请遵循中文排版规范，确保中英文之间有空格，使用正确的标点符号。直接输出正文。"}
+        {"role": "system", "content": "你是一位专业技术博客作者。在写作时请遵循以下中文排版规范：使用全角中文标点；专有名词大小写正确；英文、数字使用半角字符；使用直角引号「」。"},
+        {"role": "user", "content": f"{outline}\n\n根据这个提纲中关于技术知识的部分，写出一篇技术博客文章。文章中避免出现图片，不能使用任何列表。每一段出现的代码都进行较为详细的解读。在讲述内容时尽量使用段落的语言，语言风格可以略偏专业，但保持清晰。使用Markdown（要求符合Common Markdown规范）输出，使用LaTeX公式（注意：数学的开闭定界符前后不能有字母或数字字符。像x$a + b = c$或$a + b = c$1将无法渲染为数学公式（所有$会被渲染为$）；但x $\\infty$ 1和($\\infty$)会正常渲染），标题尽量只用一级标题 `#` 和二级标题 `##`，不要用分割线。请遵循中文排版规范，使用正确的标点符号。直接输出正文。"}
     ], deepseek, "deepseek-reasoner")
 
 def summary(article):
     global deepseek
     return generate([
-        {"role": "system", "content": "你是一个技术博客简介写作者，简介不一定需要涵盖文章的全部内容，能起到一定的提示作用即可。直接输出简介。遵循以下中文排版规范：1) 中文与英文、数字之间需要有空格；2) 中文标点与英文、数字之间不加空格；3) 使用全角中文标点；4) 专有名词大小写正确；5) 英文、数字使用半角字符。注意简介被作为副标题使用，不是一句句子，不要以句号结尾。"},
+        {"role": "system", "content": "你是一个技术博客简介写作者，简介不一定需要涵盖文章的全部内容，能起到一定的提示作用即可。直接输出简介。遵循以下中文排版规范：使用全角中文标点；专有名词大小写正确；英文、数字使用半角字符。注意简介被作为副标题使用，不是一句句子，不要以句号结尾。"},
         {"role": "user", "content": f"给这篇文章写一个15字的简短介绍：\n\n{article}"}
     ], deepseek, "deepseek-chat")
 
+# LaTeX error handling
+def remove_latex_comments(latex_str: str) -> str:
+    lines = latex_str.splitlines()
+    cleaned_lines = []
+    for line in lines:
+        m = re.search(r'(?<!\\)%', line)
+        if m:
+            line = line[:m.start()]
+        cleaned_lines.append(line)
+    return "\n".join(cleaned_lines)
+
+def check_balanced_braces(latex_str: str) -> (bool, list):
+    stack = []
+    errors = []
+    for index, char in enumerate(latex_str):
+        if char == '{':
+            stack.append(index)
+        elif char == '}':
+            if not stack:
+                errors.append(f"位置 {index}: 右大括号 '}}' 没有对应的左大括号")
+            else:
+                stack.pop()
+    if stack:
+        for pos in stack:
+            errors.append(f"位置 {pos}: 左大括号 '{{' 没有对应的右大括号")
+    return (len(errors) == 0), errors
+
+def check_environment_matching(latex_str: str) -> (bool, list):
+    errors = []
+    env_stack = []
+    pattern = re.compile(r'\\(begin|end)\s*{([^}]+)}')
+    for m in pattern.finditer(latex_str):
+        cmd = m.group(1)
+        env = m.group(2).strip()
+        pos = m.start()
+        if cmd == "begin":
+            env_stack.append((env, pos))
+        else:  # cmd == "end"
+            if not env_stack:
+                errors.append(f"位置 {pos}: \\end{{{env}}} 没有对应的 \\begin")
+            else:
+                last_env, last_pos = env_stack.pop()
+                if last_env != env:
+                    errors.append(f"位置 {last_pos} 的 \\begin{{{last_env}}} 与位置 {pos} 的 \\end{{{env}}} 不匹配")
+    if env_stack:
+        for env, pos in env_stack:
+            errors.append(f"位置 {pos}: \\begin{{{env}}} 没有对应的 \\end")
+    return (len(errors) == 0), errors
+
+def run_static_checks(latex_snippet: str) -> list:
+    cleaned = remove_latex_comments(latex_snippet)
+    errors = []
+    ok_braces, brace_errors = check_balanced_braces(cleaned)
+    ok_env, env_errors = check_environment_matching(cleaned)
+    if not ok_braces:
+        errors.extend(["大括号错误: " + err for err in brace_errors])
+    if not ok_env:
+        errors.extend(["环境匹配错误: " + err for err in env_errors])
+    return errors
+
+def check_with_pdflatex(latex_snippet: str) -> list:
+    """
+    call pdflatex for compilation checking and return the error messages detected in the compilation log.
+    """
+    template = r"""
+\documentclass{article}
+\usepackage{amsmath}
+\begin{document}
+%s
+\end{document}
+    """ % latex_snippet
+    
+    errors = []
+    with tempfile.TemporaryDirectory() as tmpdirname:
+        tex_file = os.path.join(tmpdirname, "temp.tex")
+        with open(tex_file, "w", encoding="utf-8") as f:
+            f.write(template)
+        try:
+            proc = subprocess.run(
+                ["pdflatex", "-interaction=nonstopmode", tex_file],
+                stdout=subprocess.PIPE, stderr=subprocess.PIPE,
+                cwd=tmpdirname, timeout=15
+            )
+            output = proc.stdout.decode("utf-8") + proc.stderr.decode("utf-8")
+            for line in output.splitlines():
+                if line.startswith("!"):
+                    errors.append(line.strip())
+            if proc.returncode != 0 and not errors:
+                errors.append("pdflatex 返回非 0 错误码，编译可能存在问题。")
+        except Exception as e:
+            errors.append(f"调用 pdflatex 编译时出错: {e}")
+    return errors
+
+def extract_latex_segments(markdown_text: str) -> list:
+    """
+    extract latex segments from markdown
+    """
+    segments = []
+    block_pattern = re.compile(r'\$\$([\s\S]+?)\$\$', re.MULTILINE)
+    segments.extend(block_pattern.findall(markdown_text))
+    inline_pattern = re.compile(r'(?<!\$)\$([^$\n]+?)\$(?!\$)')
+    segments.extend(inline_pattern.findall(markdown_text))
+    return segments
+
+def latex_errors(markdown_text: str) -> dict:
+    segments = extract_latex_segments(markdown_text)
+    report = {}
+    for idx, seg in enumerate(segments):
+        seg = seg.strip()
+        static_errors = run_static_checks(seg)
+        pdflatex_errors = check_with_pdflatex(seg)
+        report[f"公式段 {idx+1}"] = {
+            "原始内容": seg,
+            "静态检测错误": static_errors,
+            "pdflatex 检测错误": pdflatex_errors
+        }
+    return report
+
+def modify_latex(markdown_text: str, error):
+    global deepseek
+    return generate([
+        {"role": "system", "content": "你是LaTeX校验员。以下是一段Markdown文本，其中的LaTeX代码有错误，请基于报错修正。同时文本要遵循以下中文排版规范：使用全角中文标点；专有名词大小写正确；英文、数字使用半角字符。直接在输出中输出文本内容。"},
+        {"role": "user", "content": f"<原文>\n{markdown_text}\n</原文>\n\n<报错>\n{error}\n</报错>"}
+    ], deepseek, "deepseek-reasoner")
+
+is_latin = lambda ch: '\u0000' <= ch <= '\u007F' or '\u00A0' <= ch <= '\u024F'
+is_nonspace_latin = lambda ch: is_latin(ch) and not ch.isspace() and not ch in """*()[]{}"'/-@#"""
+is_nonpunct_cjk = lambda ch: not is_latin(ch) and ch not in "·！￥…（）—【】、；：‘’“”，。《》？「」"
+
+def beautify_string(text):
+    res = ""
+    for idx in range(len(text)):
+        if idx and (
+            (is_nonspace_latin(text[idx])     and is_nonpunct_cjk(text[idx - 1])) or
+            (is_nonspace_latin(text[idx - 1]) and is_nonpunct_cjk(text[idx]))
+        ): res += " "
+        res += text[idx]
+    return res
+
 start = time.time()
 print("     Generating topic:")
-topic = extract_topic(topics_text)
+topic = beautify_string(extract_topic(topics_text))
 print(f"     Determined topic: {topic}; time spent {time.time() - start:.1f} s")
 
 start = time.time()
 print("   Generating outline:")
-outline_result = outline(topic)
+outline_result = beautify_string(outline(topic))
 print(f"   Determined outline: time spent {time.time() - start:.1f} s")
 
 start = time.time()
@@ -110,9 +252,20 @@ def summary(article):
 article = write_from_outline(outline_result)
 print(f"      Article written: time spent {time.time() - start:.1f} s")
 
+if latex_errors(article):
+    print("      latex_errors exist")
+    start = time.time()
+    article = modify_latex(article, latex_errors(article))
+    print(f"      LaTeX errors fixed: time spent {time.time() - start:.1f} s")
+
+start = time.time()
+article = beautify_string(article)
+print(f"      Article beautified: time spent {time.time() - start:.1f} s")
+
+
 start = time.time()
 print("   Generating summary:")
-summary_result = summary(article)
+summary_result = beautify_string(summary(article))
 print(f"      Decided Summary: {summary_result}; time spent {time.time() - start:.1f} s")
 
 lines = iter(article.split("\n"))
@@ -146,4 +299,4 @@ def summary(article):
 with open(f"{path_to}/index.md", "w", encoding="utf-8") as f:
     f.write(markdown_file)
 
-print(f"     Composed article: {path_to}/index.md")
+print(f"     Composed article: {path_to}/index.md")
\ No newline at end of file

From 69ee4f35bd98ab8ef833cbc4091ecc264be76312 Mon Sep 17 00:00:00 2001
From: Ludwig Kent <124366668+Gavin-WangSC@users.noreply.github.com>
Date: Fri, 11 Apr 2025 20:39:09 +0800
Subject: [PATCH 2/5] .

---
 .github/workflows/auto-writer.yml | 2 +-
 writer.py                         | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/auto-writer.yml b/.github/workflows/auto-writer.yml
index 194bc5e..adac53f 100644
--- a/.github/workflows/auto-writer.yml
+++ b/.github/workflows/auto-writer.yml
@@ -14,7 +14,7 @@ jobs:
         uses: actions/checkout@v4
 
       - name: Install Python Dependencies
-        run: pip install openai bs4 requests pyyaml
+        run: pip install openai bs4 requests pyyaml tempfile subprocess
 
       - name: Compose New Article
         env:
diff --git a/writer.py b/writer.py
index 3526ee0..b890be9 100644
--- a/writer.py
+++ b/writer.py
@@ -98,7 +98,7 @@ def summary(article):
         {"role": "user", "content": f"给这篇文章写一个15字的简短介绍：\n\n{article}"}
     ], deepseek, "deepseek-chat")
 
-# LaTeX error handling
+# LaTeX error handling 
 def remove_latex_comments(latex_str: str) -> str:
     lines = latex_str.splitlines()
     cleaned_lines = []

From 35675ab5c878be16b7e82c42bcbaca7893dd2780 Mon Sep 17 00:00:00 2001
From: Ludwig Kent <124366668+Gavin-WangSC@users.noreply.github.com>
Date: Fri, 11 Apr 2025 21:23:37 +0800
Subject: [PATCH 3/5] .

---
 writer.py | 141 +++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 140 insertions(+), 1 deletion(-)

diff --git a/writer.py b/writer.py
index a772b17..b890be9 100644
--- a/writer.py
+++ b/writer.py
@@ -98,6 +98,145 @@ def summary(article):
         {"role": "user", "content": f"给这篇文章写一个15字的简短介绍：\n\n{article}"}
     ], deepseek, "deepseek-chat")
 
+# LaTeX error handling 
+def remove_latex_comments(latex_str: str) -> str:
+    lines = latex_str.splitlines()
+    cleaned_lines = []
+    for line in lines:
+        m = re.search(r'(?<!\\)%', line)
+        if m:
+            line = line[:m.start()]
+        cleaned_lines.append(line)
+    return "\n".join(cleaned_lines)
+
+def check_balanced_braces(latex_str: str) -> (bool, list):
+    stack = []
+    errors = []
+    for index, char in enumerate(latex_str):
+        if char == '{':
+            stack.append(index)
+        elif char == '}':
+            if not stack:
+                errors.append(f"位置 {index}: 右大括号 '}}' 没有对应的左大括号")
+            else:
+                stack.pop()
+    if stack:
+        for pos in stack:
+            errors.append(f"位置 {pos}: 左大括号 '{{' 没有对应的右大括号")
+    return (len(errors) == 0), errors
+
+def check_environment_matching(latex_str: str) -> (bool, list):
+    errors = []
+    env_stack = []
+    pattern = re.compile(r'\\(begin|end)\s*{([^}]+)}')
+    for m in pattern.finditer(latex_str):
+        cmd = m.group(1)
+        env = m.group(2).strip()
+        pos = m.start()
+        if cmd == "begin":
+            env_stack.append((env, pos))
+        else:  # cmd == "end"
+            if not env_stack:
+                errors.append(f"位置 {pos}: \\end{{{env}}} 没有对应的 \\begin")
+            else:
+                last_env, last_pos = env_stack.pop()
+                if last_env != env:
+                    errors.append(f"位置 {last_pos} 的 \\begin{{{last_env}}} 与位置 {pos} 的 \\end{{{env}}} 不匹配")
+    if env_stack:
+        for env, pos in env_stack:
+            errors.append(f"位置 {pos}: \\begin{{{env}}} 没有对应的 \\end")
+    return (len(errors) == 0), errors
+
+def run_static_checks(latex_snippet: str) -> list:
+    cleaned = remove_latex_comments(latex_snippet)
+    errors = []
+    ok_braces, brace_errors = check_balanced_braces(cleaned)
+    ok_env, env_errors = check_environment_matching(cleaned)
+    if not ok_braces:
+        errors.extend(["大括号错误: " + err for err in brace_errors])
+    if not ok_env:
+        errors.extend(["环境匹配错误: " + err for err in env_errors])
+    return errors
+
+def check_with_pdflatex(latex_snippet: str) -> list:
+    """
+    call pdflatex for compilation checking and return the error messages detected in the compilation log.
+    """
+    template = r"""
+\documentclass{article}
+\usepackage{amsmath}
+\begin{document}
+%s
+\end{document}
+    """ % latex_snippet
+    
+    errors = []
+    with tempfile.TemporaryDirectory() as tmpdirname:
+        tex_file = os.path.join(tmpdirname, "temp.tex")
+        with open(tex_file, "w", encoding="utf-8") as f:
+            f.write(template)
+        try:
+            proc = subprocess.run(
+                ["pdflatex", "-interaction=nonstopmode", tex_file],
+                stdout=subprocess.PIPE, stderr=subprocess.PIPE,
+                cwd=tmpdirname, timeout=15
+            )
+            output = proc.stdout.decode("utf-8") + proc.stderr.decode("utf-8")
+            for line in output.splitlines():
+                if line.startswith("!"):
+                    errors.append(line.strip())
+            if proc.returncode != 0 and not errors:
+                errors.append("pdflatex 返回非 0 错误码，编译可能存在问题。")
+        except Exception as e:
+            errors.append(f"调用 pdflatex 编译时出错: {e}")
+    return errors
+
+def extract_latex_segments(markdown_text: str) -> list:
+    """
+    extract latex segments from markdown
+    """
+    segments = []
+    block_pattern = re.compile(r'\$\$([\s\S]+?)\$\$', re.MULTILINE)
+    segments.extend(block_pattern.findall(markdown_text))
+    inline_pattern = re.compile(r'(?<!\$)\$([^$\n]+?)\$(?!\$)')
+    segments.extend(inline_pattern.findall(markdown_text))
+    return segments
+
+def latex_errors(markdown_text: str) -> dict:
+    segments = extract_latex_segments(markdown_text)
+    report = {}
+    for idx, seg in enumerate(segments):
+        seg = seg.strip()
+        static_errors = run_static_checks(seg)
+        pdflatex_errors = check_with_pdflatex(seg)
+        report[f"公式段 {idx+1}"] = {
+            "原始内容": seg,
+            "静态检测错误": static_errors,
+            "pdflatex 检测错误": pdflatex_errors
+        }
+    return report
+
+def modify_latex(markdown_text: str, error):
+    global deepseek
+    return generate([
+        {"role": "system", "content": "你是LaTeX校验员。以下是一段Markdown文本，其中的LaTeX代码有错误，请基于报错修正。同时文本要遵循以下中文排版规范：使用全角中文标点；专有名词大小写正确；英文、数字使用半角字符。直接在输出中输出文本内容。"},
+        {"role": "user", "content": f"<原文>\n{markdown_text}\n</原文>\n\n<报错>\n{error}\n</报错>"}
+    ], deepseek, "deepseek-reasoner")
+
+is_latin = lambda ch: '\u0000' <= ch <= '\u007F' or '\u00A0' <= ch <= '\u024F'
+is_nonspace_latin = lambda ch: is_latin(ch) and not ch.isspace() and not ch in """*()[]{}"'/-@#"""
+is_nonpunct_cjk = lambda ch: not is_latin(ch) and ch not in "·！￥…（）—【】、；：‘’“”，。《》？「」"
+
+def beautify_string(text):
+    res = ""
+    for idx in range(len(text)):
+        if idx and (
+            (is_nonspace_latin(text[idx])     and is_nonpunct_cjk(text[idx - 1])) or
+            (is_nonspace_latin(text[idx - 1]) and is_nonpunct_cjk(text[idx]))
+        ): res += " "
+        res += text[idx]
+    return res
+
 start = time.time()
 print("     Generating topic:")
 topic = beautify_string(extract_topic(topics_text))
@@ -110,7 +249,7 @@ def summary(article):
 
 start = time.time()
 print("   Generating article:")
-article = beautify_string(write_from_outline(outline_result))
+article = write_from_outline(outline_result)
 print(f"      Article written: time spent {time.time() - start:.1f} s")
 
 if latex_errors(article):

From 8c517c08d136e19db0cd2d1df0bb8c7245798242 Mon Sep 17 00:00:00 2001
From: Ludwig Kent <124366668+Gavin-WangSC@users.noreply.github.com>
Date: Mon, 5 May 2025 01:24:03 +0800
Subject: [PATCH 4/5] feat: extended the latex checks; changed the
 modificatioin process in to a while loop; only put LaTeX errors in the prompt
 when requesting a rewriting (the error prompts haven't been tested yet, they
 are generated by llms); isolated LaTeX when beautifying;

---
 writer.py | 310 ++++++++++++++++++++++++++++++++----------------------
 1 file changed, 184 insertions(+), 126 deletions(-)

diff --git a/writer.py b/writer.py
index b890be9..3ebc1b2 100644
--- a/writer.py
+++ b/writer.py
@@ -8,8 +8,7 @@
 import glob
 import yaml
 import re
-import subprocess
-import tempfile
+from typing import List, Tuple, Dict
 
 path_to = f'src/content/blog/{datetime.datetime.now().strftime("%Y-%m-%d")}'
 
@@ -99,143 +98,201 @@ def summary(article):
     ], deepseek, "deepseek-chat")
 
 # LaTeX error handling 
-def remove_latex_comments(latex_str: str) -> str:
-    lines = latex_str.splitlines()
-    cleaned_lines = []
-    for line in lines:
-        m = re.search(r'(?<!\\)%', line)
-        if m:
-            line = line[:m.start()]
-        cleaned_lines.append(line)
-    return "\n".join(cleaned_lines)
-
-def check_balanced_braces(latex_str: str) -> (bool, list):
-    stack = []
-    errors = []
-    for index, char in enumerate(latex_str):
-        if char == '{':
-            stack.append(index)
-        elif char == '}':
+def extract_latex_segments(markdown_text: str) -> List[Tuple[str, int, int]]:
+    segments: List[Tuple[str,int,int]] = []
+    block_pattern = re.compile(r'(\$\$[\s\S]+?\$\$)', re.DOTALL)
+    for m in block_pattern.finditer(markdown_text):
+        segments.append((m.group(1), m.start(), m.end()))
+
+    inline_pattern = re.compile(r'(?<!\\)(\$(?:\\.|[^$])+?\$)', re.DOTALL)
+    for m in inline_pattern.finditer(markdown_text):
+        if any(start <= m.start() < end for _, start, end in segments):
+            continue
+        segments.append((m.group(1), m.start(), m.end()))
+
+    return segments
+
+def latex_checks(latex_str: str) -> List[str]:
+    errors: List[str] = []
+
+    # 命令后多余空格 (忽略 \tt, \it, \bf)
+    for m in re.finditer(r"\\([a-zA-Z]+)(\s+)", latex_str):
+        cmd = m.group(1)
+        if cmd not in ('tt', 'it', 'bf'):
+            errors.append(f"命令 '\\{cmd}' 后跟有空格，建议去掉空格。")
+
+    # 引用前多余空格，建议用 '~'
+    if re.search(r"\s+\\ref\{", latex_str):
+        errors.append("'\\ref' 前有空格，应使用 '~\\ref{...}' 保持断开。")
+
+    # 省略号 '...' 而非 \dots 或 \ldots
+    if re.search(r'(?<!\\)(?:\.\.\.|…)', latex_str):
+        errors.append("检测到省略号，建议使用 '\\dots'、'\\cdots' 或 '\\ldots'。")
+
+    # 缩写后不加特殊空格
+    for m in re.finditer(r"\b(e\.g|i\.e|etc)\.(\s+)", latex_str):
+        errors.append(f"缩写 '{m.group(1)}.' 后应使用 '\\ ' 或 '~' 保持空格。")
+
+    # 句末大写字母后应有两个空格
+    for m in re.finditer(r"([A-Z])\.(\s)(?=[A-Z])", latex_str):
+        errors.append(f"句子结尾 '{m.group(1)}.' 后只有单个空格，建议使用两个空格。")
+
+    # 再次检查数学mode的$
+    # 块级
+    block_marks = re.findall(r'\$\$', latex_str)
+    if len(block_marks) % 2 != 0:
+        errors.append("块级数学模式 '$$' 不成对。")
+    # 去掉所有 $$…$$ 段
+    no_block = re.sub(r'\$\$[\s\S]+?\$\$', '', latex_str)
+    # 行内
+    inline_marks = len(re.findall(r'(?<!\\)\$', no_block))
+    if inline_marks % 2 != 0:
+        errors.append("行内数学模式 '$' 不成对。")
+
+    # 引号 `` ''
+    if '"' in latex_str and not re.search(r"``.*?''", latex_str, re.DOTALL):
+        errors.append("检测到直引号 '\"'，建议使用 LaTeX 引号 ``...'' 。")
+
+    # \label 前空格
+    if re.search(r"\s+\\label\{", latex_str):
+        errors.append("'\\label' 前有空格，应紧贴前文。")
+
+    # \footnote 前空格
+    if re.search(r"\s+\\footnote\{", latex_str):
+        errors.append("'\\footnote' 前有空格，应紧贴前文。")
+
+    # 数学中用 x 而非 \times
+    for m in re.finditer(r"(?<!\\)\b(\d+)\s*x\s*(\d+)\b", latex_str):
+        errors.append(f"'{m.group(1)} x {m.group(2)}' 建议用 '$\\times$'。")
+
+    # 多余连续空格
+    if re.search(r" {2,}", latex_str):
+        errors.append("检测到连续多个空格，可能要删掉")
+
+    # 大括号匹配
+    stack: List[int] = []
+    for pos, ch in enumerate(latex_str):
+        if ch == '{':  stack.append(pos)
+        elif ch == '}':
             if not stack:
-                errors.append(f"位置 {index}: 右大括号 '}}' 没有对应的左大括号")
+                errors.append(f"位置 {pos}: 多余 '}}' 。")
             else:
                 stack.pop()
-    if stack:
-        for pos in stack:
-            errors.append(f"位置 {pos}: 左大括号 '{{' 没有对应的右大括号")
-    return (len(errors) == 0), errors
-
-def check_environment_matching(latex_str: str) -> (bool, list):
-    errors = []
-    env_stack = []
-    pattern = re.compile(r'\\(begin|end)\s*{([^}]+)}')
-    for m in pattern.finditer(latex_str):
-        cmd = m.group(1)
-        env = m.group(2).strip()
+    for pos in stack:
+        errors.append(f"位置 {pos}: 多余 '{{' 。")
+
+    # \begin / \end 匹配（修正 \end raw-string 报错）
+    env_stack: List[Tuple[str, int]] = []
+    for m in re.finditer(r"\\(begin|end)\s*\{([^}]+)\}", latex_str):
+        cmd, env = m.group(1), m.group(2)
         pos = m.start()
-        if cmd == "begin":
+        if cmd == 'begin':
             env_stack.append((env, pos))
-        else:  # cmd == "end"
-            if not env_stack:
-                errors.append(f"位置 {pos}: \\end{{{env}}} 没有对应的 \\begin")
+        else:  # cmd == 'end'
+            if not env_stack or env_stack[-1][0] != env:
+                # 注意这里用双反斜杠来正确表示 '\end'
+                errors.append(f"位置 {pos}: '\\end{{{env}}}' 无匹配或顺序错误。")
             else:
-                last_env, last_pos = env_stack.pop()
-                if last_env != env:
-                    errors.append(f"位置 {last_pos} 的 \\begin{{{last_env}}} 与位置 {pos} 的 \\end{{{env}}} 不匹配")
-    if env_stack:
-        for env, pos in env_stack:
-            errors.append(f"位置 {pos}: \\begin{{{env}}} 没有对应的 \\end")
-    return (len(errors) == 0), errors
-
-def run_static_checks(latex_snippet: str) -> list:
-    cleaned = remove_latex_comments(latex_snippet)
-    errors = []
-    ok_braces, brace_errors = check_balanced_braces(cleaned)
-    ok_env, env_errors = check_environment_matching(cleaned)
-    if not ok_braces:
-        errors.extend(["大括号错误: " + err for err in brace_errors])
-    if not ok_env:
-        errors.extend(["环境匹配错误: " + err for err in env_errors])
-    return errors
+                env_stack.pop()
+    # 剩余未闭合的 begin
+    for env, pos in env_stack:
+        errors.append(f"位置 {pos}: '\\begin{{{env}}}' 未关闭。")
 
-def check_with_pdflatex(latex_snippet: str) -> list:
-    """
-    call pdflatex for compilation checking and return the error messages detected in the compilation log.
-    """
-    template = r"""
-\documentclass{article}
-\usepackage{amsmath}
-\begin{document}
-%s
-\end{document}
-    """ % latex_snippet
-    
-    errors = []
-    with tempfile.TemporaryDirectory() as tmpdirname:
-        tex_file = os.path.join(tmpdirname, "temp.tex")
-        with open(tex_file, "w", encoding="utf-8") as f:
-            f.write(template)
-        try:
-            proc = subprocess.run(
-                ["pdflatex", "-interaction=nonstopmode", tex_file],
-                stdout=subprocess.PIPE, stderr=subprocess.PIPE,
-                cwd=tmpdirname, timeout=15
-            )
-            output = proc.stdout.decode("utf-8") + proc.stderr.decode("utf-8")
-            for line in output.splitlines():
-                if line.startswith("!"):
-                    errors.append(line.strip())
-            if proc.returncode != 0 and not errors:
-                errors.append("pdflatex 返回非 0 错误码，编译可能存在问题。")
-        except Exception as e:
-            errors.append(f"调用 pdflatex 编译时出错: {e}")
-    return errors
+    # 括号前多余空格
+    if re.search(r"\s+\(", latex_str):
+        errors.append("左括号 '(' 前有空格，应去除。")
 
-def extract_latex_segments(markdown_text: str) -> list:
-    """
-    extract latex segments from markdown
-    """
-    segments = []
-    block_pattern = re.compile(r'\$\$([\s\S]+?)\$\$', re.MULTILINE)
-    segments.extend(block_pattern.findall(markdown_text))
-    inline_pattern = re.compile(r'(?<!\$)\$([^$\n]+?)\$(?!\$)')
-    segments.extend(inline_pattern.findall(markdown_text))
-    return segments
+    # 数学模式中不应有标点
+    for m in re.finditer(r"\$(?:[^$]*?)[.,;:!?]+(?:[^$]*?)\$", latex_str):
+        errors.append("数学模式中包含标点符号，建议放在模式外。")
 
-def latex_errors(markdown_text: str) -> dict:
-    segments = extract_latex_segments(markdown_text)
+    return errors
+
+def latex_errors(markdown_text: str) -> Dict[Tuple[str, int], List[str]]:
     report = {}
-    for idx, seg in enumerate(segments):
-        seg = seg.strip()
-        static_errors = run_static_checks(seg)
-        pdflatex_errors = check_with_pdflatex(seg)
-        report[f"公式段 {idx+1}"] = {
-            "原始内容": seg,
-            "静态检测错误": static_errors,
-            "pdflatex 检测错误": pdflatex_errors
-        }
+    for seg, start_idx, _ in extract_latex_segments(markdown_text):
+        errs = latex_checks(seg)
+        if errs:
+            report[(seg, start_idx)] = errs
     return report
 
-def modify_latex(markdown_text: str, error):
-    global deepseek
-    return generate([
-        {"role": "system", "content": "你是LaTeX校验员。以下是一段Markdown文本，其中的LaTeX代码有错误，请基于报错修正。同时文本要遵循以下中文排版规范：使用全角中文标点；专有名词大小写正确；英文、数字使用半角字符。直接在输出中输出文本内容。"},
-        {"role": "user", "content": f"<原文>\n{markdown_text}\n</原文>\n\n<报错>\n{error}\n</报错>"}
-    ], deepseek, "deepseek-reasoner")
+def modify_latex(markdown_text: str, error_report: Dict[Tuple[str,int], List[str]]) -> str:
+    """
+    遍历 error_report，按 start_idx 从大到小替换，
+    保证后面的替换不影响前面的 start_idx。
+    """
+    corrected = markdown_text
+    items = sorted(error_report.items(), key=lambda x: x[0][1], reverse=True)
+
+    for (seg, start_idx), errs in items:
+        end_idx = start_idx + len(seg)
+        context = corrected[max(0, start_idx-50): end_idx+50]
+        user_msg = (
+            f"修正此 LaTeX 片段（包含 $ 定界符）：\n{seg}\n\n"
+            "检测到错误：\n- " + "\n- ".join(errs) +
+            "\n\n上下文：\n" + context +
+            "\n\n请只返回修正后的完整片段，不要添加其它标记。"
+        )
+        fixed = generate([
+            {"role":"system","content":"你是 LaTeX 专家，负责修正以下代码："},
+            {"role":"user","content":user_msg}
+        ], deepseek, "deepseek-reasoner").strip()
+
+        # 去掉```，如果不小心生成了
+        if fixed.startswith("```") and fixed.endswith("```"):
+            fixed = "\n".join(fixed.splitlines()[1:-1]).strip()
+
+        # 给重新生成的丢失的加上 $/$$，如果ds忘记了
+        if not fixed.startswith('$'):
+            if seg.startswith('$$') and seg.endswith('$$'):
+                fixed = '$$' + fixed + '$$'
+            elif seg.startswith('$') and seg.endswith('$'):
+                fixed = '$' + fixed + '$'
+
+        # 最终替换
+        corrected = corrected[:start_idx] + fixed + corrected[end_idx:]
+
+    return corrected
 
 is_latin = lambda ch: '\u0000' <= ch <= '\u007F' or '\u00A0' <= ch <= '\u024F'
 is_nonspace_latin = lambda ch: is_latin(ch) and not ch.isspace() and not ch in """*()[]{}"'/-@#"""
 is_nonpunct_cjk = lambda ch: not is_latin(ch) and ch not in "·！￥…（）—【】、；：‘’“”，。《》？「」"
 
-def beautify_string(text):
-    res = ""
-    for idx in range(len(text)):
-        if idx and (
-            (is_nonspace_latin(text[idx])     and is_nonpunct_cjk(text[idx - 1])) or
-            (is_nonspace_latin(text[idx - 1]) and is_nonpunct_cjk(text[idx]))
-        ): res += " "
-        res += text[idx]
-    return res
+# beautify的时候跳过 LaTeX
+def beautify_string(text: str) -> str:
+    segments = extract_latex_segments(text)
+    segments.sort(key=lambda x: x[1])
+
+    result_parts = []
+    last_end = 0
+
+    for seg_content, seg_start, seg_end in segments:
+        non_latex_part = text[last_end:seg_start]
+        processed_part = ""
+        for i, char in enumerate(non_latex_part):
+            if i > 0 and (
+                (is_nonspace_latin(char) and is_nonpunct_cjk(non_latex_part[i-1])) or
+                (is_nonspace_latin(non_latex_part[i-1]) and is_nonpunct_cjk(char))
+            ):
+                processed_part += " "
+            processed_part += char
+        result_parts.append(processed_part)
+
+        result_parts.append(seg_content)
+        last_end = seg_end
+
+    final_part = text[last_end:]
+    processed_final_part = ""
+    for i, char in enumerate(final_part):
+         if i > 0 and (
+             (is_nonspace_latin(char) and is_nonpunct_cjk(final_part[i-1])) or
+             (is_nonspace_latin(final_part[i-1]) and is_nonpunct_cjk(char))
+         ):
+            processed_final_part += " "
+         processed_final_part += char
+    result_parts.append(processed_final_part)
+
+    return "".join(result_parts)
 
 start = time.time()
 print("     Generating topic:")
@@ -252,11 +309,12 @@ def beautify_string(text):
 article = write_from_outline(outline_result)
 print(f"      Article written: time spent {time.time() - start:.1f} s")
 
-if latex_errors(article):
-    print("      latex_errors exist")
-    start = time.time()
+start = time.time()
+while latex_errors(article):
+    print("latex_errors still exist")
     article = modify_latex(article, latex_errors(article))
-    print(f"      LaTeX errors fixed: time spent {time.time() - start:.1f} s")
+
+print(f"      LaTeX errors fixed: time spent {time.time() - start:.1f} s")
 
 start = time.time()
 article = beautify_string(article)

From f9991d074ecead4eaa4cde181b8c6b43b1970b11 Mon Sep 17 00:00:00 2001
From: Ludwig Kent <124366668+Gavin-WangSC@users.noreply.github.com>
Date: Mon, 5 May 2025 09:33:07 +0800
Subject: [PATCH 5/5] unimportant dependencies

---
 .github/workflows/auto-writer.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/auto-writer.yml b/.github/workflows/auto-writer.yml
index adac53f..194bc5e 100644
--- a/.github/workflows/auto-writer.yml
+++ b/.github/workflows/auto-writer.yml
@@ -14,7 +14,7 @@ jobs:
         uses: actions/checkout@v4
 
       - name: Install Python Dependencies
-        run: pip install openai bs4 requests pyyaml tempfile subprocess
+        run: pip install openai bs4 requests pyyaml
 
       - name: Compose New Article
         env: