From a230b4ac5b5023c479e5ebe2edec95d90ca71c03 Mon Sep 17 00:00:00 2001
From: HikariLan <shaokeyibb@126.com>
Date: Mon, 1 Jul 2024 14:00:10 +0800
Subject: [PATCH 01/13] refactor: project

---
 .gitignore         | 36 -----------------
 index.html         | 13 -------
 package.json       | 15 --------
 public/vite.svg    |  1 -
 src/counter.ts     |  9 -----
 src/main.ts        | 24 ------------
 src/style.css      | 96 ----------------------------------------------
 src/typescript.svg |  1 -
 src/vite-env.d.ts  |  1 -
 tsconfig.json      | 23 -----------
 10 files changed, 219 deletions(-)
 delete mode 100755 .gitignore
 delete mode 100644 index.html
 delete mode 100644 package.json
 delete mode 100644 public/vite.svg
 delete mode 100644 src/counter.ts
 delete mode 100644 src/main.ts
 delete mode 100644 src/style.css
 delete mode 100644 src/typescript.svg
 delete mode 100644 src/vite-env.d.ts
 delete mode 100644 tsconfig.json
diff --git a/.gitignore b/.gitignore
deleted file mode 100755
index 826b026a..00000000
--- a/.gitignore
+++ /dev/null
@@ -1,36 +0,0 @@
-public
-.cache
-node_modules
-*DS_Store
-*.env
-
-.idea/
-
-yarn-error.log
-.vscode
-
-__generated__/
-# Logs
-logs
-*.log
-npm-debug.log*
-yarn-debug.log*
-yarn-error.log*
-pnpm-debug.log*
-lerna-debug.log*
-
-node_modules
-dist
-dist-ssr
-*.local
-
-# Editor directories and files
-.vscode/*
-!.vscode/extensions.json
-.idea
-.DS_Store
-*.suo
-*.ntvs*
-*.njsproj
-*.sln
-*.sw?
diff --git a/index.html b/index.html
deleted file mode 100644
index 44a93350..00000000
--- a/index.html
+++ /dev/null
@@ -1,13 +0,0 @@
-<!doctype html>
-<html lang="en">
-  <head>
-    <meta charset="UTF-8" />
-    <link rel="icon" type="image/svg+xml" href="/vite.svg" />
-    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <title>Vite + TS</title>
-  </head>
-  <body>
-    <div id="app"></div>
-    <script type="module" src="/src/main.ts"></script>
-  </body>
-</html>
diff --git a/package.json b/package.json
deleted file mode 100644
index 07c6a13a..00000000
--- a/package.json
+++ /dev/null
@@ -1,15 +0,0 @@
-{
-  "name": "feedback-sys",
-  "private": true,
-  "version": "0.0.0",
-  "type": "module",
-  "scripts": {
-    "dev": "vite",
-    "build": "tsc && vite build",
-    "preview": "vite preview"
-  },
-  "devDependencies": {
-    "typescript": "^5.2.2",
-    "vite": "^5.2.0"
-  }
-}
diff --git a/public/vite.svg b/public/vite.svg
deleted file mode 100644
index e7b8dfb1..00000000
--- a/public/vite.svg
+++ /dev/null
@@ -1 +0,0 @@
-<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" class="iconify iconify--logos" width="31.88" height="32" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 257"><defs><linearGradient id="IconifyId1813088fe1fbc01fb466" x1="-.828%" x2="57.636%" y1="7.652%" y2="78.411%"><stop offset="0%" stop-color="#41D1FF"></stop><stop offset="100%" stop-color="#BD34FE"></stop></linearGradient><linearGradient id="IconifyId1813088fe1fbc01fb467" x1="43.376%" x2="50.316%" y1="2.242%" y2="89.03%"><stop offset="0%" stop-color="#FFEA83"></stop><stop offset="8.333%" stop-color="#FFDD35"></stop><stop offset="100%" stop-color="#FFA800"></stop></linearGradient></defs><path fill="url(#IconifyId1813088fe1fbc01fb466)" d="M255.153 37.938L134.897 252.976c-2.483 4.44-8.862 4.466-11.382.048L.875 37.958c-2.746-4.814 1.371-10.646 6.827-9.67l120.385 21.517a6.537 6.537 0 0 0 2.322-.004l117.867-21.483c5.438-.991 9.574 4.796 6.877 9.62Z"></path><path fill="url(#IconifyId1813088fe1fbc01fb467)" d="M185.432.063L96.44 17.501a3.268 3.268 0 0 0-2.634 3.014l-5.474 92.456a3.268 3.268 0 0 0 3.997 3.378l24.777-5.718c2.318-.535 4.413 1.507 3.936 3.838l-7.361 36.047c-.495 2.426 1.782 4.5 4.151 3.78l15.304-4.649c2.372-.72 4.652 1.36 4.15 3.788l-11.698 56.621c-.732 3.542 3.979 5.473 5.943 2.437l1.313-2.028l72.516-144.72c1.215-2.423-.88-5.186-3.54-4.672l-25.505 4.922c-2.396.462-4.435-1.77-3.759-4.114l16.646-57.705c.677-2.35-1.37-4.583-3.769-4.113Z"></path></svg>
\ No newline at end of file
diff --git a/src/counter.ts b/src/counter.ts
deleted file mode 100644
index 09e5afd2..00000000
--- a/src/counter.ts
+++ /dev/null
@@ -1,9 +0,0 @@
-export function setupCounter(element: HTMLButtonElement) {
-  let counter = 0
-  const setCounter = (count: number) => {
-    counter = count
-    element.innerHTML = `count is ${counter}`
-  }
-  element.addEventListener('click', () => setCounter(counter + 1))
-  setCounter(0)
-}
diff --git a/src/main.ts b/src/main.ts
deleted file mode 100644
index 791547b0..00000000
--- a/src/main.ts
+++ /dev/null
@@ -1,24 +0,0 @@
-import './style.css'
-import typescriptLogo from './typescript.svg'
-import viteLogo from '/vite.svg'
-import { setupCounter } from './counter.ts'
-
-document.querySelector<HTMLDivElement>('#app')!.innerHTML = `
-  <div>
-    <a href="https://vitejs.dev" target="_blank">
-      <img src="${viteLogo}" class="logo" alt="Vite logo" />
-    </a>
-    <a href="https://www.typescriptlang.org/" target="_blank">
-      <img src="${typescriptLogo}" class="logo vanilla" alt="TypeScript logo" />
-    </a>
-    <h1>Vite + TypeScript</h1>
-    <div class="card">
-      <button id="counter" type="button"></button>
-    </div>
-    <p class="read-the-docs">
-      Click on the Vite and TypeScript logos to learn more
-    </p>
-  </div>
-`
-
-setupCounter(document.querySelector<HTMLButtonElement>('#counter')!)
diff --git a/src/style.css b/src/style.css
deleted file mode 100644
index f9c73502..00000000
--- a/src/style.css
+++ /dev/null
@@ -1,96 +0,0 @@
-:root {
-  font-family: Inter, system-ui, Avenir, Helvetica, Arial, sans-serif;
-  line-height: 1.5;
-  font-weight: 400;
-
-  color-scheme: light dark;
-  color: rgba(255, 255, 255, 0.87);
-  background-color: #242424;
-
-  font-synthesis: none;
-  text-rendering: optimizeLegibility;
-  -webkit-font-smoothing: antialiased;
-  -moz-osx-font-smoothing: grayscale;
-}
-
-a {
-  font-weight: 500;
-  color: #646cff;
-  text-decoration: inherit;
-}
-a:hover {
-  color: #535bf2;
-}
-
-body {
-  margin: 0;
-  display: flex;
-  place-items: center;
-  min-width: 320px;
-  min-height: 100vh;
-}
-
-h1 {
-  font-size: 3.2em;
-  line-height: 1.1;
-}
-
-#app {
-  max-width: 1280px;
-  margin: 0 auto;
-  padding: 2rem;
-  text-align: center;
-}
-
-.logo {
-  height: 6em;
-  padding: 1.5em;
-  will-change: filter;
-  transition: filter 300ms;
-}
-.logo:hover {
-  filter: drop-shadow(0 0 2em #646cffaa);
-}
-.logo.vanilla:hover {
-  filter: drop-shadow(0 0 2em #3178c6aa);
-}
-
-.card {
-  padding: 2em;
-}
-
-.read-the-docs {
-  color: #888;
-}
-
-button {
-  border-radius: 8px;
-  border: 1px solid transparent;
-  padding: 0.6em 1.2em;
-  font-size: 1em;
-  font-weight: 500;
-  font-family: inherit;
-  background-color: #1a1a1a;
-  cursor: pointer;
-  transition: border-color 0.25s;
-}
-button:hover {
-  border-color: #646cff;
-}
-button:focus,
-button:focus-visible {
-  outline: 4px auto -webkit-focus-ring-color;
-}
-
-@media (prefers-color-scheme: light) {
-  :root {
-    color: #213547;
-    background-color: #ffffff;
-  }
-  a:hover {
-    color: #747bff;
-  }
-  button {
-    background-color: #f9f9f9;
-  }
-}
diff --git a/src/typescript.svg b/src/typescript.svg
deleted file mode 100644
index d91c910c..00000000
--- a/src/typescript.svg
+++ /dev/null
@@ -1 +0,0 @@
-<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" class="iconify iconify--logos" width="32" height="32" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path fill="#007ACC" d="M0 128v128h256V0H0z"></path><path fill="#FFF" d="m56.612 128.85l-.081 10.483h33.32v94.68h23.568v-94.68h33.321v-10.28c0-5.69-.122-10.444-.284-10.566c-.122-.162-20.4-.244-44.983-.203l-44.74.122l-.121 10.443Zm149.955-10.742c6.501 1.625 11.459 4.51 16.01 9.224c2.357 2.52 5.851 7.111 6.136 8.208c.08.325-11.053 7.802-17.798 11.988c-.244.162-1.22-.894-2.317-2.52c-3.291-4.795-6.745-6.867-12.028-7.233c-7.76-.528-12.759 3.535-12.718 10.321c0 1.992.284 3.17 1.097 4.795c1.707 3.536 4.876 5.649 14.832 9.956c18.326 7.883 26.168 13.084 31.045 20.48c5.445 8.249 6.664 21.415 2.966 31.208c-4.063 10.646-14.14 17.879-28.323 20.276c-4.388.772-14.79.65-19.504-.203c-10.28-1.828-20.033-6.908-26.047-13.572c-2.357-2.6-6.949-9.387-6.664-9.874c.122-.163 1.178-.813 2.356-1.504c1.138-.65 5.446-3.129 9.509-5.485l7.355-4.267l1.544 2.276c2.154 3.29 6.867 7.801 9.712 9.305c8.167 4.307 19.383 3.698 24.909-1.26c2.357-2.153 3.332-4.388 3.332-7.68c0-2.966-.366-4.266-1.91-6.501c-1.99-2.845-6.054-5.242-17.595-10.24c-13.206-5.69-18.895-9.224-24.096-14.832c-3.007-3.25-5.852-8.452-7.03-12.8c-.975-3.617-1.22-12.678-.447-16.335c2.723-12.76 12.353-21.659 26.25-24.3c4.51-.853 14.994-.528 19.424.569Z"></path></svg>
\ No newline at end of file
diff --git a/src/vite-env.d.ts b/src/vite-env.d.ts
deleted file mode 100644
index 11f02fe2..00000000
--- a/src/vite-env.d.ts
+++ /dev/null
@@ -1 +0,0 @@
-/// <reference types="vite/client" />
diff --git a/tsconfig.json b/tsconfig.json
deleted file mode 100644
index 75abdef2..00000000
--- a/tsconfig.json
+++ /dev/null
@@ -1,23 +0,0 @@
-{
-  "compilerOptions": {
-    "target": "ES2020",
-    "useDefineForClassFields": true,
-    "module": "ESNext",
-    "lib": ["ES2020", "DOM", "DOM.Iterable"],
-    "skipLibCheck": true,
-
-    /* Bundler mode */
-    "moduleResolution": "bundler",
-    "allowImportingTsExtensions": true,
-    "resolveJsonModule": true,
-    "isolatedModules": true,
-    "noEmit": true,
-
-    /* Linting */
-    "strict": true,
-    "noUnusedLocals": true,
-    "noUnusedParameters": true,
-    "noFallthroughCasesInSwitch": true
-  },
-  "include": ["src"]
-}

From 8f6caebbb3033080830a3be1220d75d9071ab99e Mon Sep 17 00:00:00 2001
From: HikariLan <shaokeyibb@126.com>
Date: Tue, 2 Jul 2024 01:08:39 +0800
Subject: [PATCH 02/13] feat: the extension

---
 compiler-plugin/.gitignore                    | 162 ++++++++++++++++++
 compiler-plugin/pyproject.toml                |  18 ++
 .../__init__.py                               |   0
 .../extension.py                              |  87 ++++++++++
 4 files changed, 267 insertions(+)
 create mode 100644 compiler-plugin/.gitignore
 create mode 100644 compiler-plugin/pyproject.toml
 create mode 100644 compiler-plugin/src/python_markdown_mark_words_compiler_plugin/__init__.py
 create mode 100644 compiler-plugin/src/python_markdown_mark_words_compiler_plugin/extension.py

diff --git a/compiler-plugin/.gitignore b/compiler-plugin/.gitignore
new file mode 100644
index 00000000..82f92755
--- /dev/null
+++ b/compiler-plugin/.gitignore
@@ -0,0 +1,162 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
+.pdm.toml
+.pdm-python
+.pdm-build/
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
diff --git a/compiler-plugin/pyproject.toml b/compiler-plugin/pyproject.toml
new file mode 100644
index 00000000..0b619304
--- /dev/null
+++ b/compiler-plugin/pyproject.toml
@@ -0,0 +1,18 @@
+[build-system]
+requires = ["setuptools"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "python_markdown_mark_words_compiler_plugin"
+version = "0.0.1"
+authors = [
+    {name = "HikariLan", email = "hikarilan@minecraft.kim"},
+]
+description = "A Python-Markdown compiler plugin that put markdown words offset to the output HTML."
+license = {text = "Apache-2.0"}
+dependencies = [
+  "markdown"  
+]
+
+[project.entry-points."markdown.extensions"]
+mark-words = "python_markdown_mark_words_compiler_plugin.extension:MarkWordsExtension"
\ No newline at end of file
diff --git a/compiler-plugin/src/python_markdown_mark_words_compiler_plugin/__init__.py b/compiler-plugin/src/python_markdown_mark_words_compiler_plugin/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/compiler-plugin/src/python_markdown_mark_words_compiler_plugin/extension.py b/compiler-plugin/src/python_markdown_mark_words_compiler_plugin/extension.py
new file mode 100644
index 00000000..7d670c79
--- /dev/null
+++ b/compiler-plugin/src/python_markdown_mark_words_compiler_plugin/extension.py
@@ -0,0 +1,87 @@
+from markdown import Extension
+from markdown.preprocessors import Preprocessor
+from markdown.blockprocessors import BlockProcessor
+import xml.etree.ElementTree as etree
+
+MARK_PREVENT_RECURSION = "\t\t\t\r\r\rMARK_PREVENT_RECURSION\r\r\r\t\t\t"
+
+class MarkWordsExtension(Extension):
+    def extendMarkdown(self, md):
+        meta = {
+            "document_offsets": []
+        }
+        md.preprocessors.register(CalculateDocumentOffsetPreprocessor(md, meta),
+                                   'capture_document', 
+                                   1000) # Highest priority is required because we need to calc words offset from original document
+        md.parser.blockprocessors.register(MarkWordsBlockProcessor(md.parser, meta), 
+                                           'mark_words',
+                                           100) # high priority, usually larger than every other block processor
+
+class CalculateDocumentOffsetPreprocessor(Preprocessor):
+    def __init__(self, md, meta):
+        super(CalculateDocumentOffsetPreprocessor, self).__init__(md)
+        self.meta = meta
+    
+    def run(self, lines):
+        offset = 0
+        for line_num, line in enumerate(lines):
+            # Skip empty lines
+            if len(line) == 0:
+                offset += 1
+                continue
+            # store the line and offset
+            self.meta["document_offsets"].append((line, offset, offset + len(line)))
+            ## plus 1 is for the newline character (\n), use the CRLF file is unknown behavior
+            offset += (len(line) + 1)
+        return lines
+
+
+class MarkWordsBlockProcessor(BlockProcessor):
+    def __init__(self, parser, meta):
+        super(MarkWordsBlockProcessor, self).__init__(parser)
+        self.meta = meta
+    
+    def test(self, parent, block):
+        ## Test if there is any line in the block
+        for line in [line for (line, _, _) in self.meta["document_offsets"]]:
+            if line in block:
+                return True
+        return False
+    
+    def run(self, parent: etree.Element, blocks):
+        block = blocks[0]
+        
+        ## If the first block is handled, remove the marker and return, so that other block processors can process it
+        if MARK_PREVENT_RECURSION in blocks[0]:
+            blocks[0] = blocks[0].replace(MARK_PREVENT_RECURSION, "")
+            return False
+        
+        start = None
+        end = None
+        # Search for the block fragment in the document_offsets
+        for (line, offset, end_offset) in self.meta["document_offsets"]:
+            # If found one
+            if line in block:
+                # If none yet set, set the start offset
+                if start is None:
+                    start = offset
+                    end = end_offset
+                # Or, continuing searching for the end offset until the end of the block
+                else:
+                    end = end_offset
+            # If end is not found but new line not in block, reset the search and restart from the next line
+            elif end is None:
+                start = None
+                continue
+            # If both start and end are both set and no continuously block found, break the loop
+            else:
+                break
+        # If both start and end are found, store the result
+        if start is not None and end is not None:
+            blocks.pop(0)
+            ## append MARK_PREVENT_RECURSION to tail of the block to prevent recursion, we don't use a handled flaglist because we don't know if there's some same block in the document
+            self.parser.parseBlocks(parent, [block + MARK_PREVENT_RECURSION])
+            parent[-1].set("data-original-document-start", str(start))
+            parent[-1].set("data-original-document-end", str(end))
+            return True
+        return False
\ No newline at end of file

From 8554d797bd22acb74de74b8bf57254cd626e93ba Mon Sep 17 00:00:00 2001
From: HikariLan <shaokeyibb@126.com>
Date: Tue, 2 Jul 2024 01:09:36 +0800
Subject: [PATCH 03/13] test: basic test cases

---
 compiler-plugin/test/__main__.py | 51 ++++++++++++++++++++++++++++++++
 1 file changed, 51 insertions(+)
 create mode 100644 compiler-plugin/test/__main__.py

diff --git a/compiler-plugin/test/__main__.py b/compiler-plugin/test/__main__.py
new file mode 100644
index 00000000..80bc2f3c
--- /dev/null
+++ b/compiler-plugin/test/__main__.py
@@ -0,0 +1,51 @@
+import markdown
+from html.parser import HTMLParser
+
+test_cases = {
+    "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Praesent vel nulla ac diam dignissim congue ut sed ligula. Pellentesque aliquet ante sit amet risus iaculis, eget tincidunt nibh volutpat. Etiam non pulvinar enim. Mauris viverra augue urna, non aliquam ligula sodales in. Duis mattis ligula pretium dui bibendum, nec tincidunt neque placerat. Pellentesque eu est malesuada, dictum nulla quis, facilisis lectus. Fusce tempor mi ac tellus dictum porta. Cras venenatis pulvinar turpis. Suspendisse consequat nulla suscipit sagittis pretium.": (0, 544),
+    "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Proin sed lacus vitae neque vestibulum porttitor id et urna. Quisque nisl nisi, fermentum at justo quis, varius aliquet lorem. Ut fringilla vel purus et fermentum. Mauris ac lacinia nisi, sed ultricies dolor. Nunc ut augue quis eros iaculis tempor vel eu erat. Vestibulum efficitur porta justo. Fusce cursus magna dui, eget posuere neque tristique id. Suspendisse varius mauris arcu, nec congue metus efficitur in. Etiam ac pretium justo. Proin non ante faucibus, mattis mi et, consectetur sapien. Proin feugiat commodo euismod.": (546, 1131),
+    "Morbi neque lectus, faucibus a mattis at, aliquam quis est. Maecenas sed luctus elit. Nam vel consequat magna, ac dictum velit. Quisque non cursus enim, at ullamcorper massa. Integer quam mauris, scelerisque eu luctus et, facilisis nec ante. Proin feugiat vehicula felis at ornare. Maecenas est risus, tempus sit amet fermentum vel, sagittis in tellus. Integer ultrices velit at nulla tincidunt cursus. Curabitur non nunc in erat imperdiet imperdiet id sed felis. Quisque euismod velit a mi pellentesque, sit amet molestie eros dignissim. Morbi tincidunt dui vitae orci viverra, vitae gravida sapien semper. Pellentesque viverra a turpis blandit ornare. Quisque tincidunt quam a est facilisis, a fringilla augue sollicitudin. Pellentesque et eros sed arcu placerat sollicitudin. Donec diam eros, auctor non risus eu, interdum interdum mi.": (1133, 1971)
+}
+
+test_document = """Lorem ipsum dolor sit amet, consectetur adipiscing elit. Praesent vel nulla ac diam dignissim congue ut sed ligula. Pellentesque aliquet ante sit amet risus iaculis, eget tincidunt nibh volutpat. Etiam non pulvinar enim. Mauris viverra augue urna, non aliquam ligula sodales in. Duis mattis ligula pretium dui bibendum, nec tincidunt neque placerat. Pellentesque eu est malesuada, dictum nulla quis, facilisis lectus. Fusce tempor mi ac tellus dictum porta. Cras venenatis pulvinar turpis. Suspendisse consequat nulla suscipit sagittis pretium.
+
+Lorem ipsum dolor sit amet, consectetur adipiscing elit. Proin sed lacus vitae neque vestibulum porttitor id et urna. Quisque nisl nisi, fermentum at justo quis, varius aliquet lorem. Ut fringilla vel purus et fermentum. Mauris ac lacinia nisi, sed ultricies dolor. Nunc ut augue quis eros iaculis tempor vel eu erat. Vestibulum efficitur porta justo. Fusce cursus magna dui, eget posuere neque tristique id. Suspendisse varius mauris arcu, nec congue metus efficitur in. Etiam ac pretium justo. Proin non ante faucibus, mattis mi et, consectetur sapien. Proin feugiat commodo euismod.
+
+Morbi neque lectus, faucibus a mattis at, aliquam quis est. Maecenas sed luctus elit. Nam vel consequat magna, ac dictum velit. Quisque non cursus enim, at ullamcorper massa. Integer quam mauris, scelerisque eu luctus et, facilisis nec ante. Proin feugiat vehicula felis at ornare. Maecenas est risus, tempus sit amet fermentum vel, sagittis in tellus. Integer ultrices velit at nulla tincidunt cursus. Curabitur non nunc in erat imperdiet imperdiet id sed felis. Quisque euismod velit a mi pellentesque, sit amet molestie eros dignissim. Morbi tincidunt dui vitae orci viverra, vitae gravida sapien semper. Pellentesque viverra a turpis blandit ornare. Quisque tincidunt quam a est facilisis, a fringilla augue sollicitudin. Pellentesque et eros sed arcu placerat sollicitudin. Donec diam eros, auctor non risus eu, interdum interdum mi."""
+
+html = markdown.markdown(test_document, extensions=['mark-words'])
+
+class Tester(HTMLParser):
+    start = None
+    end = None
+    data = None
+    
+    def handle_starttag(self, tag, attrs):
+        for attr in attrs:
+            if attr[0] == "data-original-document-start":
+                self.start = int(attr[1])
+            if attr[0] == "data-original-document-end":
+                self.end = int(attr[1])
+        
+    def handle_data(self, data):
+        self.data = data
+        if(self.start is not None and self.end is not None and self.data is not None):
+            self._test()
+            self._reset()
+        
+    def _test(self):
+        if self.start is None or self.end is None or self.data is None:
+            raise AssertionError("Missing data")
+        case = test_cases[self.data]
+        print(f"Testing block offset ({self.start}, {self.end}) == {case}")
+        if self.start != case[0] or self.end != case[1]:
+            raise AssertionError(f"Block offset test failed, expected ({case[0]}, {case[1]}), got ({self.start}, {self.end})")
+    
+    def _reset(self):
+        self.start = None
+        self.end = None
+        self.data = None
+
+Tester().feed(html)
+
+print("All tests passed!")
\ No newline at end of file

From 68f175d195cd5e76fa6fb0538e37233992bb5fa8 Mon Sep 17 00:00:00 2001
From: HikariLan <shaokeyibb@126.com>
Date: Tue, 2 Jul 2024 15:39:07 +0800
Subject: [PATCH 04/13] fix: offset mismatch when continuously same fragement

---
 .../extension.py                              | 33 +++++++++++++++----
 1 file changed, 27 insertions(+), 6 deletions(-)

diff --git a/compiler-plugin/src/python_markdown_mark_words_compiler_plugin/extension.py b/compiler-plugin/src/python_markdown_mark_words_compiler_plugin/extension.py
index 7d670c79..8bf7d416 100644
--- a/compiler-plugin/src/python_markdown_mark_words_compiler_plugin/extension.py
+++ b/compiler-plugin/src/python_markdown_mark_words_compiler_plugin/extension.py
@@ -8,7 +8,8 @@
 class MarkWordsExtension(Extension):
     def extendMarkdown(self, md):
         meta = {
-            "document_offsets": []
+            "document_offsets": [],
+            "used_document_offsets": {}
         }
         md.preprocessors.register(CalculateDocumentOffsetPreprocessor(md, meta),
                                    'capture_document', 
@@ -17,6 +18,9 @@ def extendMarkdown(self, md):
                                            'mark_words',
                                            100) # high priority, usually larger than every other block processor
 
+'''
+A preprocessor to calculate the offset of each line in the document
+'''
 class CalculateDocumentOffsetPreprocessor(Preprocessor):
     def __init__(self, md, meta):
         super(CalculateDocumentOffsetPreprocessor, self).__init__(md)
@@ -30,12 +34,16 @@ def run(self, lines):
                 offset += 1
                 continue
             # store the line and offset
-            self.meta["document_offsets"].append((line, offset, offset + len(line)))
+            store = (line, offset, offset + len(line))
+            self.meta["document_offsets"].append(store)
+            self.meta["used_document_offsets"][store] = False
             ## plus 1 is for the newline character (\n), use the CRLF file is unknown behavior
             offset += (len(line) + 1)
         return lines
 
-
+'''
+A block processor to mark the words in the document and inject the offset of the block to the HTML element
+'''
 class MarkWordsBlockProcessor(BlockProcessor):
     def __init__(self, parser, meta):
         super(MarkWordsBlockProcessor, self).__init__(parser)
@@ -51,17 +59,25 @@ def test(self, parent, block):
     def run(self, parent: etree.Element, blocks):
         block = blocks[0]
         
-        ## If the first block is handled, remove the marker and return, so that other block processors can process it
+        # If the first block is handled, remove the marker and return, so that other block processors can process it
         if MARK_PREVENT_RECURSION in blocks[0]:
             blocks[0] = blocks[0].replace(MARK_PREVENT_RECURSION, "")
             return False
         
         start = None
         end = None
+        used = {}
         # Search for the block fragment in the document_offsets
-        for (line, offset, end_offset) in self.meta["document_offsets"]:
+        for store in self.meta["document_offsets"]:
+            # If already used, skip
+            if(self.meta["used_document_offsets"][store]):
+                continue
+            (line, offset, end_offset) = store
             # If found one
             if line in block:
+                # If the line already scanned (usually some lines with same content in different place), skip
+                if line in [line for (line, _, _) in used.keys()]:
+                    continue
                 # If none yet set, set the start offset
                 if start is None:
                     start = offset
@@ -69,9 +85,13 @@ def run(self, parent: etree.Element, blocks):
                 # Or, continuing searching for the end offset until the end of the block
                 else:
                     end = end_offset
+                # Mark the fragment as used
+                used[store] = True
             # If end is not found but new line not in block, reset the search and restart from the next line
             elif end is None:
                 start = None
+                # Clear the used list
+                used = {}
                 continue
             # If both start and end are both set and no continuously block found, break the loop
             else:
@@ -79,7 +99,8 @@ def run(self, parent: etree.Element, blocks):
         # If both start and end are found, store the result
         if start is not None and end is not None:
             blocks.pop(0)
-            ## append MARK_PREVENT_RECURSION to tail of the block to prevent recursion, we don't use a handled flaglist because we don't know if there's some same block in the document
+            self.meta["used_document_offsets"].update(used)
+            # append MARK_PREVENT_RECURSION to tail of the block to prevent recursion, we don't use a handled flaglist because we don't know if there's some same block in the document
             self.parser.parseBlocks(parent, [block + MARK_PREVENT_RECURSION])
             parent[-1].set("data-original-document-start", str(start))
             parent[-1].set("data-original-document-end", str(end))

From fab6d144ddabde36790a0f245e983ffc4a675739 Mon Sep 17 00:00:00 2001
From: HikariLan <shaokeyibb@126.com>
Date: Tue, 2 Jul 2024 22:38:51 +0800
Subject: [PATCH 05/13] refactor: switch to rye as build system

---
 compiler-plugin/.gitignore            | 160 +-------------------------
 compiler-plugin/.python-version       |   1 +
 compiler-plugin/pyproject.toml        |  34 ++++--
 compiler-plugin/requirements-dev.lock |  13 +++
 compiler-plugin/requirements.lock     |  13 +++
 5 files changed, 53 insertions(+), 168 deletions(-)
 create mode 100644 compiler-plugin/.python-version
 create mode 100644 compiler-plugin/requirements-dev.lock
 create mode 100644 compiler-plugin/requirements.lock

diff --git a/compiler-plugin/.gitignore b/compiler-plugin/.gitignore
index 82f92755..ae8554de 100644
--- a/compiler-plugin/.gitignore
+++ b/compiler-plugin/.gitignore
@@ -1,162 +1,10 @@
-# Byte-compiled / optimized / DLL files
+# python generated files
 __pycache__/
-*.py[cod]
-*$py.class
-
-# C extensions
-*.so
-
-# Distribution / packaging
-.Python
+*.py[oc]
 build/
-develop-eggs/
 dist/
-downloads/
-eggs/
-.eggs/
-lib/
-lib64/
-parts/
-sdist/
-var/
 wheels/
-share/python-wheels/
-*.egg-info/
-.installed.cfg
-*.egg
-MANIFEST
-
-# PyInstaller
-#  Usually these files are written by a python script from a template
-#  before PyInstaller builds the exe, so as to inject date/other infos into it.
-*.manifest
-*.spec
-
-# Installer logs
-pip-log.txt
-pip-delete-this-directory.txt
-
-# Unit test / coverage reports
-htmlcov/
-.tox/
-.nox/
-.coverage
-.coverage.*
-.cache
-nosetests.xml
-coverage.xml
-*.cover
-*.py,cover
-.hypothesis/
-.pytest_cache/
-cover/
-
-# Translations
-*.mo
-*.pot
-
-# Django stuff:
-*.log
-local_settings.py
-db.sqlite3
-db.sqlite3-journal
-
-# Flask stuff:
-instance/
-.webassets-cache
-
-# Scrapy stuff:
-.scrapy
-
-# Sphinx documentation
-docs/_build/
-
-# PyBuilder
-.pybuilder/
-target/
-
-# Jupyter Notebook
-.ipynb_checkpoints
-
-# IPython
-profile_default/
-ipython_config.py
-
-# pyenv
-#   For a library or package, you might want to ignore these files since the code is
-#   intended to run in multiple environments; otherwise, check them in:
-# .python-version
+*.egg-info
 
-# pipenv
-#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
-#   However, in case of collaboration, if having platform-specific dependencies or dependencies
-#   having no cross-platform support, pipenv may install dependencies that don't work, or not
-#   install all needed dependencies.
-#Pipfile.lock
-
-# poetry
-#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
-#   This is especially recommended for binary packages to ensure reproducibility, and is more
-#   commonly ignored for libraries.
-#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
-#poetry.lock
-
-# pdm
-#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
-#pdm.lock
-#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
-#   in version control.
-#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
-.pdm.toml
-.pdm-python
-.pdm-build/
-
-# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
-__pypackages__/
-
-# Celery stuff
-celerybeat-schedule
-celerybeat.pid
-
-# SageMath parsed files
-*.sage.py
-
-# Environments
-.env
+# venv
 .venv
-env/
-venv/
-ENV/
-env.bak/
-venv.bak/
-
-# Spyder project settings
-.spyderproject
-.spyproject
-
-# Rope project settings
-.ropeproject
-
-# mkdocs documentation
-/site
-
-# mypy
-.mypy_cache/
-.dmypy.json
-dmypy.json
-
-# Pyre type checker
-.pyre/
-
-# pytype static type analyzer
-.pytype/
-
-# Cython debug symbols
-cython_debug/
-
-# PyCharm
-#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
-#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
-#  and can be added to the global gitignore or merged into this file.  For a more nuclear
-#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
-#.idea/
diff --git a/compiler-plugin/.python-version b/compiler-plugin/.python-version
new file mode 100644
index 00000000..871f80a3
--- /dev/null
+++ b/compiler-plugin/.python-version
@@ -0,0 +1 @@
+3.12.3
diff --git a/compiler-plugin/pyproject.toml b/compiler-plugin/pyproject.toml
index 0b619304..39b6ba65 100644
--- a/compiler-plugin/pyproject.toml
+++ b/compiler-plugin/pyproject.toml
@@ -1,18 +1,28 @@
-[build-system]
-requires = ["setuptools"]
-build-backend = "setuptools.build_meta"
-
 [project]
 name = "python_markdown_mark_words_compiler_plugin"
 version = "0.0.1"
-authors = [
-    {name = "HikariLan", email = "hikarilan@minecraft.kim"},
-]
 description = "A Python-Markdown compiler plugin that put markdown words offset to the output HTML."
-license = {text = "Apache-2.0"}
-dependencies = [
-  "markdown"  
-]
+authors = [{ name = "HikariLan", email = "hikarilan@minecraft.kim" }]
+license = { text = "Apache-2.0" }
+dependencies = ["markdown>=3.6"]
+requires-python = ">= 3.8"
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[tool.rye]
+managed = true
+dev-dependencies = []
+
+[tool.hatch.metadata]
+allow-direct-references = true
+
+[tool.hatch.build.targets.wheel]
+packages = ["src/python_markdown_mark_words_compiler_plugin"]
+
+[tool.rye.scripts]
+test = "python ./test"
 
 [project.entry-points."markdown.extensions"]
-mark-words = "python_markdown_mark_words_compiler_plugin.extension:MarkWordsExtension"
\ No newline at end of file
+mark-words = "python_markdown_mark_words_compiler_plugin.extension:MarkWordsExtension"
diff --git a/compiler-plugin/requirements-dev.lock b/compiler-plugin/requirements-dev.lock
new file mode 100644
index 00000000..39d92f66
--- /dev/null
+++ b/compiler-plugin/requirements-dev.lock
@@ -0,0 +1,13 @@
+# generated by rye
+# use `rye lock` or `rye sync` to update this lockfile
+#
+# last locked with the following flags:
+#   pre: false
+#   features: []
+#   all-features: false
+#   with-sources: false
+#   generate-hashes: false
+
+-e file:.
+markdown==3.6
+    # via python-markdown-mark-words-compiler-plugin
diff --git a/compiler-plugin/requirements.lock b/compiler-plugin/requirements.lock
new file mode 100644
index 00000000..39d92f66
--- /dev/null
+++ b/compiler-plugin/requirements.lock
@@ -0,0 +1,13 @@
+# generated by rye
+# use `rye lock` or `rye sync` to update this lockfile
+#
+# last locked with the following flags:
+#   pre: false
+#   features: []
+#   all-features: false
+#   with-sources: false
+#   generate-hashes: false
+
+-e file:.
+markdown==3.6
+    # via python-markdown-mark-words-compiler-plugin

From 460072be5172d3dc6f4eb55715ed78c02fb1609f Mon Sep 17 00:00:00 2001
From: HikariLan <shaokeyibb@126.com>
Date: Tue, 2 Jul 2024 22:39:20 +0800
Subject: [PATCH 06/13] fmt: format code

---
 .../extension.py                              | 50 ++++++++++---------
 compiler-plugin/test/__main__.py              | 35 +++++++++----
 2 files changed, 50 insertions(+), 35 deletions(-)

diff --git a/compiler-plugin/src/python_markdown_mark_words_compiler_plugin/extension.py b/compiler-plugin/src/python_markdown_mark_words_compiler_plugin/extension.py
index 8bf7d416..8495d7a3 100644
--- a/compiler-plugin/src/python_markdown_mark_words_compiler_plugin/extension.py
+++ b/compiler-plugin/src/python_markdown_mark_words_compiler_plugin/extension.py
@@ -5,27 +5,27 @@
 
 MARK_PREVENT_RECURSION = "\t\t\t\r\r\rMARK_PREVENT_RECURSION\r\r\r\t\t\t"
 
+
 class MarkWordsExtension(Extension):
     def extendMarkdown(self, md):
-        meta = {
-            "document_offsets": [],
-            "used_document_offsets": {}
-        }
-        md.preprocessors.register(CalculateDocumentOffsetPreprocessor(md, meta),
-                                   'capture_document', 
-                                   1000) # Highest priority is required because we need to calc words offset from original document
-        md.parser.blockprocessors.register(MarkWordsBlockProcessor(md.parser, meta), 
-                                           'mark_words',
-                                           100) # high priority, usually larger than every other block processor
+        meta = {"document_offsets": [], "used_document_offsets": {}}
+        md.preprocessors.register(
+            CalculateDocumentOffsetPreprocessor(md, meta), "capture_document", 1000
+        )  # Highest priority is required because we need to calc words offset from original document
+        md.parser.blockprocessors.register(
+            MarkWordsBlockProcessor(md.parser, meta), "mark_words", 100
+        )  # high priority, usually larger than every other block processor
+
 
-'''
-A preprocessor to calculate the offset of each line in the document
-'''
 class CalculateDocumentOffsetPreprocessor(Preprocessor):
+    """
+    A preprocessor to calculate the offset of each line in the document
+    """
+
     def __init__(self, md, meta):
         super(CalculateDocumentOffsetPreprocessor, self).__init__(md)
         self.meta = meta
-    
+
     def run(self, lines):
         offset = 0
         for line_num, line in enumerate(lines):
@@ -38,39 +38,41 @@ def run(self, lines):
             self.meta["document_offsets"].append(store)
             self.meta["used_document_offsets"][store] = False
             ## plus 1 is for the newline character (\n), use the CRLF file is unknown behavior
-            offset += (len(line) + 1)
+            offset += len(line) + 1
         return lines
 
-'''
-A block processor to mark the words in the document and inject the offset of the block to the HTML element
-'''
+
 class MarkWordsBlockProcessor(BlockProcessor):
+    """
+    A block processor to mark the words in the document and inject the offset of the block to the HTML element
+    """
+
     def __init__(self, parser, meta):
         super(MarkWordsBlockProcessor, self).__init__(parser)
         self.meta = meta
-    
+
     def test(self, parent, block):
         ## Test if there is any line in the block
         for line in [line for (line, _, _) in self.meta["document_offsets"]]:
             if line in block:
                 return True
         return False
-    
+
     def run(self, parent: etree.Element, blocks):
         block = blocks[0]
-        
+
         # If the first block is handled, remove the marker and return, so that other block processors can process it
         if MARK_PREVENT_RECURSION in blocks[0]:
             blocks[0] = blocks[0].replace(MARK_PREVENT_RECURSION, "")
             return False
-        
+
         start = None
         end = None
         used = {}
         # Search for the block fragment in the document_offsets
         for store in self.meta["document_offsets"]:
             # If already used, skip
-            if(self.meta["used_document_offsets"][store]):
+            if self.meta["used_document_offsets"][store]:
                 continue
             (line, offset, end_offset) = store
             # If found one
@@ -105,4 +107,4 @@ def run(self, parent: etree.Element, blocks):
             parent[-1].set("data-original-document-start", str(start))
             parent[-1].set("data-original-document-end", str(end))
             return True
-        return False
\ No newline at end of file
+        return False
diff --git a/compiler-plugin/test/__main__.py b/compiler-plugin/test/__main__.py
index 80bc2f3c..161e767a 100644
--- a/compiler-plugin/test/__main__.py
+++ b/compiler-plugin/test/__main__.py
@@ -2,9 +2,18 @@
 from html.parser import HTMLParser
 
 test_cases = {
-    "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Praesent vel nulla ac diam dignissim congue ut sed ligula. Pellentesque aliquet ante sit amet risus iaculis, eget tincidunt nibh volutpat. Etiam non pulvinar enim. Mauris viverra augue urna, non aliquam ligula sodales in. Duis mattis ligula pretium dui bibendum, nec tincidunt neque placerat. Pellentesque eu est malesuada, dictum nulla quis, facilisis lectus. Fusce tempor mi ac tellus dictum porta. Cras venenatis pulvinar turpis. Suspendisse consequat nulla suscipit sagittis pretium.": (0, 544),
-    "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Proin sed lacus vitae neque vestibulum porttitor id et urna. Quisque nisl nisi, fermentum at justo quis, varius aliquet lorem. Ut fringilla vel purus et fermentum. Mauris ac lacinia nisi, sed ultricies dolor. Nunc ut augue quis eros iaculis tempor vel eu erat. Vestibulum efficitur porta justo. Fusce cursus magna dui, eget posuere neque tristique id. Suspendisse varius mauris arcu, nec congue metus efficitur in. Etiam ac pretium justo. Proin non ante faucibus, mattis mi et, consectetur sapien. Proin feugiat commodo euismod.": (546, 1131),
-    "Morbi neque lectus, faucibus a mattis at, aliquam quis est. Maecenas sed luctus elit. Nam vel consequat magna, ac dictum velit. Quisque non cursus enim, at ullamcorper massa. Integer quam mauris, scelerisque eu luctus et, facilisis nec ante. Proin feugiat vehicula felis at ornare. Maecenas est risus, tempus sit amet fermentum vel, sagittis in tellus. Integer ultrices velit at nulla tincidunt cursus. Curabitur non nunc in erat imperdiet imperdiet id sed felis. Quisque euismod velit a mi pellentesque, sit amet molestie eros dignissim. Morbi tincidunt dui vitae orci viverra, vitae gravida sapien semper. Pellentesque viverra a turpis blandit ornare. Quisque tincidunt quam a est facilisis, a fringilla augue sollicitudin. Pellentesque et eros sed arcu placerat sollicitudin. Donec diam eros, auctor non risus eu, interdum interdum mi.": (1133, 1971)
+    "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Praesent vel nulla ac diam dignissim congue ut sed ligula. Pellentesque aliquet ante sit amet risus iaculis, eget tincidunt nibh volutpat. Etiam non pulvinar enim. Mauris viverra augue urna, non aliquam ligula sodales in. Duis mattis ligula pretium dui bibendum, nec tincidunt neque placerat. Pellentesque eu est malesuada, dictum nulla quis, facilisis lectus. Fusce tempor mi ac tellus dictum porta. Cras venenatis pulvinar turpis. Suspendisse consequat nulla suscipit sagittis pretium.": (
+        0,
+        544,
+    ),
+    "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Proin sed lacus vitae neque vestibulum porttitor id et urna. Quisque nisl nisi, fermentum at justo quis, varius aliquet lorem. Ut fringilla vel purus et fermentum. Mauris ac lacinia nisi, sed ultricies dolor. Nunc ut augue quis eros iaculis tempor vel eu erat. Vestibulum efficitur porta justo. Fusce cursus magna dui, eget posuere neque tristique id. Suspendisse varius mauris arcu, nec congue metus efficitur in. Etiam ac pretium justo. Proin non ante faucibus, mattis mi et, consectetur sapien. Proin feugiat commodo euismod.": (
+        546,
+        1131,
+    ),
+    "Morbi neque lectus, faucibus a mattis at, aliquam quis est. Maecenas sed luctus elit. Nam vel consequat magna, ac dictum velit. Quisque non cursus enim, at ullamcorper massa. Integer quam mauris, scelerisque eu luctus et, facilisis nec ante. Proin feugiat vehicula felis at ornare. Maecenas est risus, tempus sit amet fermentum vel, sagittis in tellus. Integer ultrices velit at nulla tincidunt cursus. Curabitur non nunc in erat imperdiet imperdiet id sed felis. Quisque euismod velit a mi pellentesque, sit amet molestie eros dignissim. Morbi tincidunt dui vitae orci viverra, vitae gravida sapien semper. Pellentesque viverra a turpis blandit ornare. Quisque tincidunt quam a est facilisis, a fringilla augue sollicitudin. Pellentesque et eros sed arcu placerat sollicitudin. Donec diam eros, auctor non risus eu, interdum interdum mi.": (
+        1133,
+        1971,
+    ),
 }
 
 test_document = """Lorem ipsum dolor sit amet, consectetur adipiscing elit. Praesent vel nulla ac diam dignissim congue ut sed ligula. Pellentesque aliquet ante sit amet risus iaculis, eget tincidunt nibh volutpat. Etiam non pulvinar enim. Mauris viverra augue urna, non aliquam ligula sodales in. Duis mattis ligula pretium dui bibendum, nec tincidunt neque placerat. Pellentesque eu est malesuada, dictum nulla quis, facilisis lectus. Fusce tempor mi ac tellus dictum porta. Cras venenatis pulvinar turpis. Suspendisse consequat nulla suscipit sagittis pretium.
@@ -13,39 +22,43 @@
 
 Morbi neque lectus, faucibus a mattis at, aliquam quis est. Maecenas sed luctus elit. Nam vel consequat magna, ac dictum velit. Quisque non cursus enim, at ullamcorper massa. Integer quam mauris, scelerisque eu luctus et, facilisis nec ante. Proin feugiat vehicula felis at ornare. Maecenas est risus, tempus sit amet fermentum vel, sagittis in tellus. Integer ultrices velit at nulla tincidunt cursus. Curabitur non nunc in erat imperdiet imperdiet id sed felis. Quisque euismod velit a mi pellentesque, sit amet molestie eros dignissim. Morbi tincidunt dui vitae orci viverra, vitae gravida sapien semper. Pellentesque viverra a turpis blandit ornare. Quisque tincidunt quam a est facilisis, a fringilla augue sollicitudin. Pellentesque et eros sed arcu placerat sollicitudin. Donec diam eros, auctor non risus eu, interdum interdum mi."""
 
-html = markdown.markdown(test_document, extensions=['mark-words'])
+html = markdown.markdown(test_document, extensions=["mark-words"])
+
 
 class Tester(HTMLParser):
     start = None
     end = None
     data = None
-    
+
     def handle_starttag(self, tag, attrs):
         for attr in attrs:
             if attr[0] == "data-original-document-start":
                 self.start = int(attr[1])
             if attr[0] == "data-original-document-end":
                 self.end = int(attr[1])
-        
+
     def handle_data(self, data):
         self.data = data
-        if(self.start is not None and self.end is not None and self.data is not None):
+        if self.start is not None and self.end is not None and self.data is not None:
             self._test()
             self._reset()
-        
+
     def _test(self):
         if self.start is None or self.end is None or self.data is None:
             raise AssertionError("Missing data")
         case = test_cases[self.data]
         print(f"Testing block offset ({self.start}, {self.end}) == {case}")
         if self.start != case[0] or self.end != case[1]:
-            raise AssertionError(f"Block offset test failed, expected ({case[0]}, {case[1]}), got ({self.start}, {self.end})")
-    
+            raise AssertionError(
+                f"Block offset test failed, expected ({case[0]}, {case[1]}), got ({self.start}, {self.end})"
+            )
+
     def _reset(self):
         self.start = None
         self.end = None
         self.data = None
 
+
 Tester().feed(html)
 
-print("All tests passed!")
\ No newline at end of file
+print("All tests passed!")

From e37062d77975e9ab8ab2e9fdec53da091f857eae Mon Sep 17 00:00:00 2001
From: HikariLan <shaokeyibb@126.com>
Date: Wed, 3 Jul 2024 00:49:54 +0800
Subject: [PATCH 07/13] test: refactor test cases

---
 .../extension.py                              |   4 +-
 compiler-plugin/test/__main__.py              | 142 ++++++++++++------
 2 files changed, 102 insertions(+), 44 deletions(-)

diff --git a/compiler-plugin/src/python_markdown_mark_words_compiler_plugin/extension.py b/compiler-plugin/src/python_markdown_mark_words_compiler_plugin/extension.py
index 8495d7a3..fa53f525 100644
--- a/compiler-plugin/src/python_markdown_mark_words_compiler_plugin/extension.py
+++ b/compiler-plugin/src/python_markdown_mark_words_compiler_plugin/extension.py
@@ -37,7 +37,7 @@ def run(self, lines):
             store = (line, offset, offset + len(line))
             self.meta["document_offsets"].append(store)
             self.meta["used_document_offsets"][store] = False
-            ## plus 1 is for the newline character (\n), use the CRLF file is unknown behavior
+            # plus 1 is for the newline character (\n), use the CRLF file is unknown behavior
             offset += len(line) + 1
         return lines
 
@@ -52,7 +52,7 @@ def __init__(self, parser, meta):
         self.meta = meta
 
     def test(self, parent, block):
-        ## Test if there is any line in the block
+        # Test if there is any line in the block
         for line in [line for (line, _, _) in self.meta["document_offsets"]]:
             if line in block:
                 return True
diff --git a/compiler-plugin/test/__main__.py b/compiler-plugin/test/__main__.py
index 161e767a..0c8c277b 100644
--- a/compiler-plugin/test/__main__.py
+++ b/compiler-plugin/test/__main__.py
@@ -1,64 +1,122 @@
+import textwrap
+import unittest
 import markdown
 from html.parser import HTMLParser
 
-test_cases = {
-    "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Praesent vel nulla ac diam dignissim congue ut sed ligula. Pellentesque aliquet ante sit amet risus iaculis, eget tincidunt nibh volutpat. Etiam non pulvinar enim. Mauris viverra augue urna, non aliquam ligula sodales in. Duis mattis ligula pretium dui bibendum, nec tincidunt neque placerat. Pellentesque eu est malesuada, dictum nulla quis, facilisis lectus. Fusce tempor mi ac tellus dictum porta. Cras venenatis pulvinar turpis. Suspendisse consequat nulla suscipit sagittis pretium.": (
-        0,
-        544,
-    ),
-    "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Proin sed lacus vitae neque vestibulum porttitor id et urna. Quisque nisl nisi, fermentum at justo quis, varius aliquet lorem. Ut fringilla vel purus et fermentum. Mauris ac lacinia nisi, sed ultricies dolor. Nunc ut augue quis eros iaculis tempor vel eu erat. Vestibulum efficitur porta justo. Fusce cursus magna dui, eget posuere neque tristique id. Suspendisse varius mauris arcu, nec congue metus efficitur in. Etiam ac pretium justo. Proin non ante faucibus, mattis mi et, consectetur sapien. Proin feugiat commodo euismod.": (
-        546,
-        1131,
-    ),
-    "Morbi neque lectus, faucibus a mattis at, aliquam quis est. Maecenas sed luctus elit. Nam vel consequat magna, ac dictum velit. Quisque non cursus enim, at ullamcorper massa. Integer quam mauris, scelerisque eu luctus et, facilisis nec ante. Proin feugiat vehicula felis at ornare. Maecenas est risus, tempus sit amet fermentum vel, sagittis in tellus. Integer ultrices velit at nulla tincidunt cursus. Curabitur non nunc in erat imperdiet imperdiet id sed felis. Quisque euismod velit a mi pellentesque, sit amet molestie eros dignissim. Morbi tincidunt dui vitae orci viverra, vitae gravida sapien semper. Pellentesque viverra a turpis blandit ornare. Quisque tincidunt quam a est facilisis, a fringilla augue sollicitudin. Pellentesque et eros sed arcu placerat sollicitudin. Donec diam eros, auctor non risus eu, interdum interdum mi.": (
-        1133,
-        1971,
-    ),
-}
 
-test_document = """Lorem ipsum dolor sit amet, consectetur adipiscing elit. Praesent vel nulla ac diam dignissim congue ut sed ligula. Pellentesque aliquet ante sit amet risus iaculis, eget tincidunt nibh volutpat. Etiam non pulvinar enim. Mauris viverra augue urna, non aliquam ligula sodales in. Duis mattis ligula pretium dui bibendum, nec tincidunt neque placerat. Pellentesque eu est malesuada, dictum nulla quis, facilisis lectus. Fusce tempor mi ac tellus dictum porta. Cras venenatis pulvinar turpis. Suspendisse consequat nulla suscipit sagittis pretium.
+class Tester:
+    def __init__(self, case, test_case: unittest.TestCase):
+        self.case = case
+        self.result = markdown.markdown(
+            self.case["document"], extensions=["mark-words"]
+        )
+        self.test_case = test_case
 
-Lorem ipsum dolor sit amet, consectetur adipiscing elit. Proin sed lacus vitae neque vestibulum porttitor id et urna. Quisque nisl nisi, fermentum at justo quis, varius aliquet lorem. Ut fringilla vel purus et fermentum. Mauris ac lacinia nisi, sed ultricies dolor. Nunc ut augue quis eros iaculis tempor vel eu erat. Vestibulum efficitur porta justo. Fusce cursus magna dui, eget posuere neque tristique id. Suspendisse varius mauris arcu, nec congue metus efficitur in. Etiam ac pretium justo. Proin non ante faucibus, mattis mi et, consectetur sapien. Proin feugiat commodo euismod.
+    def test(self):
+        ParserTester(self.case, self.test_case).feed(self.result)
 
-Morbi neque lectus, faucibus a mattis at, aliquam quis est. Maecenas sed luctus elit. Nam vel consequat magna, ac dictum velit. Quisque non cursus enim, at ullamcorper massa. Integer quam mauris, scelerisque eu luctus et, facilisis nec ante. Proin feugiat vehicula felis at ornare. Maecenas est risus, tempus sit amet fermentum vel, sagittis in tellus. Integer ultrices velit at nulla tincidunt cursus. Curabitur non nunc in erat imperdiet imperdiet id sed felis. Quisque euismod velit a mi pellentesque, sit amet molestie eros dignissim. Morbi tincidunt dui vitae orci viverra, vitae gravida sapien semper. Pellentesque viverra a turpis blandit ornare. Quisque tincidunt quam a est facilisis, a fringilla augue sollicitudin. Pellentesque et eros sed arcu placerat sollicitudin. Donec diam eros, auctor non risus eu, interdum interdum mi."""
 
-html = markdown.markdown(test_document, extensions=["mark-words"])
+class ParserTester(HTMLParser):
+    tag = None
+    text = None
+    offset_start = None
+    offset_end = None
 
-
-class Tester(HTMLParser):
-    start = None
-    end = None
-    data = None
+    def __init__(self, case, test_case: unittest.TestCase):
+        super().__init__()
+        self.test_case = test_case
+        self.case = case
+        self.idx = 0
 
     def handle_starttag(self, tag, attrs):
+        self.tag = tag
         for attr in attrs:
             if attr[0] == "data-original-document-start":
-                self.start = int(attr[1])
+                self.offset_start = int(attr[1])
             if attr[0] == "data-original-document-end":
-                self.end = int(attr[1])
+                self.offset_end = int(attr[1])
 
     def handle_data(self, data):
-        self.data = data
-        if self.start is not None and self.end is not None and self.data is not None:
-            self._test()
-            self._reset()
+        self.text = data
+
+    def handle_endtag(self, tag):
+        self._test()
+        self._reset()
 
     def _test(self):
-        if self.start is None or self.end is None or self.data is None:
-            raise AssertionError("Missing data")
-        case = test_cases[self.data]
-        print(f"Testing block offset ({self.start}, {self.end}) == {case}")
-        if self.start != case[0] or self.end != case[1]:
-            raise AssertionError(
-                f"Block offset test failed, expected ({case[0]}, {case[1]}), got ({self.start}, {self.end})"
-            )
+        self.test_case.assertEqual(
+            self.tag,
+            self.case["expected"][self.idx]["tag"],
+            msg="Tag mismatch in index " + str(self.idx),
+        )
+        self.test_case.assertEqual(
+            self.text,
+            self.case["expected"][self.idx]["text"],
+            msg="Text mismatch in index " + str(self.idx),
+        )
+        self.test_case.assertEqual(
+            self.offset_start,
+            self.case["expected"][self.idx]["offset"][0],
+            msg="Offset start mismatch in index " + str(self.idx),
+        )
+        self.test_case.assertEqual(
+            self.offset_end,
+            self.case["expected"][self.idx]["offset"][1],
+            msg="Offset end mismatch in index " + str(self.idx),
+        )
+        self.idx += 1
 
     def _reset(self):
-        self.start = None
-        self.end = None
-        self.data = None
+        self.tag = None
+        self.text = None
+        self.offset_start = None
+        self.offset_end = None
+
+
+class TestParser(unittest.TestCase):
+    def test_normal(self):
+        case = {
+            "document": textwrap.dedent("""\
+                    # Lorem ipsum
+                    
+                    Lorem ipsum dolor sit amet, consectetur adipiscing elit. Proin sed lacus vitae neque vestibulum porttitor id et urna.
+                    
+                    ## Morbi neque lectus
+                    
+                    Morbi neque lectus, faucibus a mattis at, aliquam quis est. Maecenas sed luctus elit."""),
+            "expected": [
+                {"tag": "h1", "text": "Lorem ipsum", "offset": (0, 13)},
+                {
+                    "tag": "p",
+                    "text": "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Proin sed lacus vitae neque vestibulum porttitor id et urna.",
+                    "offset": (15, 132),
+                },
+                {"tag": "h2", "text": "Morbi neque lectus", "offset": (134, 155)},
+                {
+                    "tag": "p",
+                    "text": "Morbi neque lectus, faucibus a mattis at, aliquam quis est. Maecenas sed luctus elit.",
+                    "offset": (157, 242),
+                },
+            ],
+        }
+        Tester(case, self).test()
+
+    def test_empty(self):
+        case = {
+            "document": "",
+            "expected": [],
+        }
+        Tester(case, self).test()
 
+    def test_single(self):
+        case = {
+            "document": "Lorem ipsum",
+            "expected": [
+                {"tag": "p", "text": "Lorem ipsum", "offset": (0, 11)},
+            ],
+        }
+        Tester(case, self).test()
 
-Tester().feed(html)
 
-print("All tests passed!")
+if __name__ == "__main__":
+    unittest.main()

From 81a32f7b4247819c9721615440a67d337f6b9570 Mon Sep 17 00:00:00 2001
From: HikariLan <shaokeyibb@126.com>
Date: Wed, 3 Jul 2024 13:49:35 +0800
Subject: [PATCH 08/13] refactor: rename to
 python_markdown_document_offsets_injection_extension

---
 {compiler-plugin => python-markdown-extension}/.gitignore | 0
 .../.python-version                                       | 0
 .../pyproject.toml                                        | 6 +++---
 .../requirements-dev.lock                                 | 2 +-
 .../requirements.lock                                     | 2 +-
 .../__init__.py                                           | 0
 .../extension.py                                          | 8 ++++----
 .../test/__main__.py                                      | 2 +-
 8 files changed, 10 insertions(+), 10 deletions(-)
 rename {compiler-plugin => python-markdown-extension}/.gitignore (100%)
 rename {compiler-plugin => python-markdown-extension}/.python-version (100%)
 rename {compiler-plugin => python-markdown-extension}/pyproject.toml (71%)
 rename {compiler-plugin => python-markdown-extension}/requirements-dev.lock (79%)
 rename {compiler-plugin => python-markdown-extension}/requirements.lock (79%)
 rename {compiler-plugin/src/python_markdown_mark_words_compiler_plugin => python-markdown-extension/src/python_markdown_document_offsets_injection_extension}/__init__.py (100%)
 rename {compiler-plugin/src/python_markdown_mark_words_compiler_plugin => python-markdown-extension/src/python_markdown_document_offsets_injection_extension}/extension.py (94%)
 rename {compiler-plugin => python-markdown-extension}/test/__main__.py (97%)

diff --git a/compiler-plugin/.gitignore b/python-markdown-extension/.gitignore
similarity index 100%
rename from compiler-plugin/.gitignore
rename to python-markdown-extension/.gitignore
diff --git a/compiler-plugin/.python-version b/python-markdown-extension/.python-version
similarity index 100%
rename from compiler-plugin/.python-version
rename to python-markdown-extension/.python-version
diff --git a/compiler-plugin/pyproject.toml b/python-markdown-extension/pyproject.toml
similarity index 71%
rename from compiler-plugin/pyproject.toml
rename to python-markdown-extension/pyproject.toml
index 39b6ba65..d3bf8d22 100644
--- a/compiler-plugin/pyproject.toml
+++ b/python-markdown-extension/pyproject.toml
@@ -1,5 +1,5 @@
 [project]
-name = "python_markdown_mark_words_compiler_plugin"
+name = "python_markdown_document_offsets_injection_extension"
 version = "0.0.1"
 description = "A Python-Markdown compiler plugin that put markdown words offset to the output HTML."
 authors = [{ name = "HikariLan", email = "hikarilan@minecraft.kim" }]
@@ -19,10 +19,10 @@ dev-dependencies = []
 allow-direct-references = true
 
 [tool.hatch.build.targets.wheel]
-packages = ["src/python_markdown_mark_words_compiler_plugin"]
+packages = ["src/python_markdown_document_offsets_injection_extension"]
 
 [tool.rye.scripts]
 test = "python ./test"
 
 [project.entry-points."markdown.extensions"]
-mark-words = "python_markdown_mark_words_compiler_plugin.extension:MarkWordsExtension"
+document-offsets-injection = "python_markdown_document_offsets_injection_extension.extension:MainExtension"
diff --git a/compiler-plugin/requirements-dev.lock b/python-markdown-extension/requirements-dev.lock
similarity index 79%
rename from compiler-plugin/requirements-dev.lock
rename to python-markdown-extension/requirements-dev.lock
index 39d92f66..2e8b89cd 100644
--- a/compiler-plugin/requirements-dev.lock
+++ b/python-markdown-extension/requirements-dev.lock
@@ -10,4 +10,4 @@
 
 -e file:.
 markdown==3.6
-    # via python-markdown-mark-words-compiler-plugin
+    # via python-markdown-document-offsets-injection-extension
diff --git a/compiler-plugin/requirements.lock b/python-markdown-extension/requirements.lock
similarity index 79%
rename from compiler-plugin/requirements.lock
rename to python-markdown-extension/requirements.lock
index 39d92f66..2e8b89cd 100644
--- a/compiler-plugin/requirements.lock
+++ b/python-markdown-extension/requirements.lock
@@ -10,4 +10,4 @@
 
 -e file:.
 markdown==3.6
-    # via python-markdown-mark-words-compiler-plugin
+    # via python-markdown-document-offsets-injection-extension
diff --git a/compiler-plugin/src/python_markdown_mark_words_compiler_plugin/__init__.py b/python-markdown-extension/src/python_markdown_document_offsets_injection_extension/__init__.py
similarity index 100%
rename from compiler-plugin/src/python_markdown_mark_words_compiler_plugin/__init__.py
rename to python-markdown-extension/src/python_markdown_document_offsets_injection_extension/__init__.py
diff --git a/compiler-plugin/src/python_markdown_mark_words_compiler_plugin/extension.py b/python-markdown-extension/src/python_markdown_document_offsets_injection_extension/extension.py
similarity index 94%
rename from compiler-plugin/src/python_markdown_mark_words_compiler_plugin/extension.py
rename to python-markdown-extension/src/python_markdown_document_offsets_injection_extension/extension.py
index fa53f525..bd45c504 100644
--- a/compiler-plugin/src/python_markdown_mark_words_compiler_plugin/extension.py
+++ b/python-markdown-extension/src/python_markdown_document_offsets_injection_extension/extension.py
@@ -6,14 +6,14 @@
 MARK_PREVENT_RECURSION = "\t\t\t\r\r\rMARK_PREVENT_RECURSION\r\r\r\t\t\t"
 
 
-class MarkWordsExtension(Extension):
+class MainExtension(Extension):
     def extendMarkdown(self, md):
         meta = {"document_offsets": [], "used_document_offsets": {}}
         md.preprocessors.register(
             CalculateDocumentOffsetPreprocessor(md, meta), "capture_document", 1000
         )  # Highest priority is required because we need to calc words offset from original document
         md.parser.blockprocessors.register(
-            MarkWordsBlockProcessor(md.parser, meta), "mark_words", 100
+            OffsetsInjectionBlockProcessor(md.parser, meta), "mark_words", 100
         )  # high priority, usually larger than every other block processor
 
 
@@ -42,13 +42,13 @@ def run(self, lines):
         return lines
 
 
-class MarkWordsBlockProcessor(BlockProcessor):
+class OffsetsInjectionBlockProcessor(BlockProcessor):
     """
     A block processor to mark the words in the document and inject the offset of the block to the HTML element
     """
 
     def __init__(self, parser, meta):
-        super(MarkWordsBlockProcessor, self).__init__(parser)
+        super(OffsetsInjectionBlockProcessor, self).__init__(parser)
         self.meta = meta
 
     def test(self, parent, block):
diff --git a/compiler-plugin/test/__main__.py b/python-markdown-extension/test/__main__.py
similarity index 97%
rename from compiler-plugin/test/__main__.py
rename to python-markdown-extension/test/__main__.py
index 0c8c277b..da98c3de 100644
--- a/compiler-plugin/test/__main__.py
+++ b/python-markdown-extension/test/__main__.py
@@ -8,7 +8,7 @@ class Tester:
     def __init__(self, case, test_case: unittest.TestCase):
         self.case = case
         self.result = markdown.markdown(
-            self.case["document"], extensions=["mark-words"]
+            self.case["document"], extensions=["document-offsets-injection"]
         )
         self.test_case = test_case
 

From dac617697d9d9c85fb623e476c57c0be3be44eb0 Mon Sep 17 00:00:00 2001
From: HikariLan <shaokeyibb@126.com>
Date: Wed, 3 Jul 2024 16:48:54 +0800
Subject: [PATCH 09/13] ci: format and test pr

ci: update env

ci: refactor

ci: working dir

ci: install deps

ci: set working dir
---
 ...mat-and-test-python-markdown-extension.yml | 46 +++++++++++++++++++
 1 file changed, 46 insertions(+)
 create mode 100644 .github/workflows/check-format-and-test-python-markdown-extension.yml

diff --git a/.github/workflows/check-format-and-test-python-markdown-extension.yml b/.github/workflows/check-format-and-test-python-markdown-extension.yml
new file mode 100644
index 00000000..df6737de
--- /dev/null
+++ b/.github/workflows/check-format-and-test-python-markdown-extension.yml
@@ -0,0 +1,46 @@
+name: Check PR Format and Test for python-markdown-extension
+
+on:
+  pull_request:
+    branches:
+      - master
+    paths:
+      - python-markdown-extension/**
+
+jobs:
+  check-format:
+    name: Check PR Format
+    runs-on: ubuntu-latest
+    defaults:
+      run:
+        working-directory: ./python-markdown-extension
+    steps:
+      - uses: actions/checkout@v4
+        name: Checkout Repo
+      - uses: eifinger/setup-rye@v3
+        name: Setup Rye
+        with:
+          enable-cache: true
+          working-directory: python-markdown-extension
+      - run: rye sync
+        name: Install Dependencies
+      - run: rye fmt --check
+        name: Check Format
+  test:
+    name: Test PR
+    runs-on: ubuntu-latest
+    defaults:
+      run:
+        working-directory: ./python-markdown-extension
+    steps:
+      - uses: actions/checkout@v4
+        name: Checkout Repo
+      - uses: eifinger/setup-rye@v3
+        name: Setup Rye
+        with:
+          enable-cache: true
+          working-directory: python-markdown-extension
+      - run: rye sync
+        name: Install Dependencies
+      - run: rye run test
+        name: Run Tests

From cc1b2347fd06fc0b201202b3c662f8916af418b6 Mon Sep 17 00:00:00 2001
From: HikariLan <shaokeyibb@126.com>
Date: Wed, 3 Jul 2024 23:28:14 +0800
Subject: [PATCH 10/13] chore: type hint and more comment

---
 .../extension.py                              | 41 +++++++++++--------
 1 file changed, 25 insertions(+), 16 deletions(-)

diff --git a/python-markdown-extension/src/python_markdown_document_offsets_injection_extension/extension.py b/python-markdown-extension/src/python_markdown_document_offsets_injection_extension/extension.py
index bd45c504..1885e110 100644
--- a/python-markdown-extension/src/python_markdown_document_offsets_injection_extension/extension.py
+++ b/python-markdown-extension/src/python_markdown_document_offsets_injection_extension/extension.py
@@ -1,14 +1,15 @@
-from markdown import Extension
+from markdown import Extension, Markdown
 from markdown.preprocessors import Preprocessor
 from markdown.blockprocessors import BlockProcessor
+from markdown.blockparser import BlockParser
 import xml.etree.ElementTree as etree
 
-MARK_PREVENT_RECURSION = "\t\t\t\r\r\rMARK_PREVENT_RECURSION\r\r\r\t\t\t"
+MARK_PREVENT_RECURSION: str = "\t\t\t\r\r\rMARK_PREVENT_RECURSION\r\r\r\t\t\t"
 
 
 class MainExtension(Extension):
-    def extendMarkdown(self, md):
-        meta = {"document_offsets": [], "used_document_offsets": {}}
+    def extendMarkdown(self, md: Markdown):
+        meta: dict = {"document_offsets": [], "used_document_offsets": {}}
         md.preprocessors.register(
             CalculateDocumentOffsetPreprocessor(md, meta), "capture_document", 1000
         )  # Highest priority is required because we need to calc words offset from original document
@@ -22,19 +23,19 @@ class CalculateDocumentOffsetPreprocessor(Preprocessor):
     A preprocessor to calculate the offset of each line in the document
     """
 
-    def __init__(self, md, meta):
+    def __init__(self, md: Markdown, meta: dict):
         super(CalculateDocumentOffsetPreprocessor, self).__init__(md)
         self.meta = meta
 
-    def run(self, lines):
-        offset = 0
-        for line_num, line in enumerate(lines):
+    def run(self, lines: list[str]) -> list[str]:
+        offset: int = 0
+        for line in lines:
             # Skip empty lines
             if len(line) == 0:
                 offset += 1
                 continue
             # store the line and offset
-            store = (line, offset, offset + len(line))
+            store: tuple[str, int, int] = (line, offset, offset + len(line))
             self.meta["document_offsets"].append(store)
             self.meta["used_document_offsets"][store] = False
             # plus 1 is for the newline character (\n), use the CRLF file is unknown behavior
@@ -47,28 +48,36 @@ class OffsetsInjectionBlockProcessor(BlockProcessor):
     A block processor to mark the words in the document and inject the offset of the block to the HTML element
     """
 
-    def __init__(self, parser, meta):
+    def __init__(self, parser: BlockParser, meta: dict):
         super(OffsetsInjectionBlockProcessor, self).__init__(parser)
         self.meta = meta
 
-    def test(self, parent, block):
+    def test(self, _, block) -> bool:
         # Test if there is any line in the block
         for line in [line for (line, _, _) in self.meta["document_offsets"]]:
             if line in block:
                 return True
         return False
 
-    def run(self, parent: etree.Element, blocks):
-        block = blocks[0]
+    def run(self, parent: etree.Element, blocks: list[str]) -> bool:
+        """
+        注入文档中的偏移量到HTML元素中，以便在后续的处理中可以使用这些偏移量来定位文档中的位置。目前的算法如下：
+        1. 从文档中查找第一个包含文本的块
+        2. 查找这个块在文档中的位置，这通过遍历文档中的每一行，以找到所有被包含在该块中的行，通过获取这些行的起始和结束位置，来确定这个块在文档中的位置
+        3. 注入这个块的起始和结束位置到HTML元素中，这会先递归的解析这个块，然后再注入这个块的起始和结束位置注入到最后一个被生成的HTML元素中
+        由于递归解析块时该块仍会被本处理器捕获，为了避免循环递归，我们在块的末尾添加了MARK_PREVENT_RECURSION标记，当本处理器再次捕获到这个块时，会直接跳过这个块，并清除这个标记。
+        """
+
+        block: str = blocks[0]
 
         # If the first block is handled, remove the marker and return, so that other block processors can process it
         if MARK_PREVENT_RECURSION in blocks[0]:
             blocks[0] = blocks[0].replace(MARK_PREVENT_RECURSION, "")
             return False
 
-        start = None
-        end = None
-        used = {}
+        start: int | None = None
+        end: int | None = None
+        used: dict[tuple[str, int, int], bool] = {}
         # Search for the block fragment in the document_offsets
         for store in self.meta["document_offsets"]:
             # If already used, skip

From 70c02485d45b12b84bd71ac679012a1d18e0c20a Mon Sep 17 00:00:00 2001
From: HikariLan <shaokeyibb@126.com>
Date: Thu, 4 Jul 2024 01:44:08 +0800
Subject: [PATCH 11/13] test: complete test environment

---
 python-markdown-extension/pyproject.toml      |   9 +-
 .../requirements-dev.lock                     |   5 +
 python-markdown-extension/test/__main__.py    | 291 ++++++++++++++++--
 3 files changed, 283 insertions(+), 22 deletions(-)

diff --git a/python-markdown-extension/pyproject.toml b/python-markdown-extension/pyproject.toml
index d3bf8d22..b641660a 100644
--- a/python-markdown-extension/pyproject.toml
+++ b/python-markdown-extension/pyproject.toml
@@ -4,7 +4,9 @@ version = "0.0.1"
 description = "A Python-Markdown compiler plugin that put markdown words offset to the output HTML."
 authors = [{ name = "HikariLan", email = "hikarilan@minecraft.kim" }]
 license = { text = "Apache-2.0" }
-dependencies = ["markdown>=3.6"]
+dependencies = [
+    "markdown>=3.6",
+]
 requires-python = ">= 3.8"
 
 [build-system]
@@ -13,7 +15,10 @@ build-backend = "hatchling.build"
 
 [tool.rye]
 managed = true
-dev-dependencies = []
+dev-dependencies = [
+    "pygments>=2.18.0",
+    "pymdown-extensions>=10.8.1",
+]
 
 [tool.hatch.metadata]
 allow-direct-references = true
diff --git a/python-markdown-extension/requirements-dev.lock b/python-markdown-extension/requirements-dev.lock
index 2e8b89cd..a33eba1b 100644
--- a/python-markdown-extension/requirements-dev.lock
+++ b/python-markdown-extension/requirements-dev.lock
@@ -10,4 +10,9 @@
 
 -e file:.
 markdown==3.6
+    # via pymdown-extensions
     # via python-markdown-document-offsets-injection-extension
+pygments==2.18.0
+pymdown-extensions==10.8.1
+pyyaml==6.0.1
+    # via pymdown-extensions
diff --git a/python-markdown-extension/test/__main__.py b/python-markdown-extension/test/__main__.py
index da98c3de..eb15716d 100644
--- a/python-markdown-extension/test/__main__.py
+++ b/python-markdown-extension/test/__main__.py
@@ -3,22 +3,127 @@
 import markdown
 from html.parser import HTMLParser
 
+from pymdownx.emoji import to_svg
+from pymdownx.slugs import uslugify
+from pymdownx.arithmatex import fence_mathjax_format
+
 
 class Tester:
     def __init__(self, case, test_case: unittest.TestCase):
         self.case = case
+        """
+        @see: https://github.com/OI-wiki/OI-wiki/blob/65983038c40716dd0644778fe7875e91c9043618/mkdocs.yml#L586
+        
+        # Extensions
+        markdown_extensions:
+          - admonition
+          - def_list
+          - footnotes
+          - meta
+          - toc:
+              permalink: ""
+              slugify: !!python/name:pymdownx.slugs.uslugify
+          - pymdownx.arithmatex:
+              generic: true
+          - pymdownx.caret
+          - pymdownx.critic
+          - pymdownx.details
+          - pymdownx.emoji:
+              emoji_generator: !!python/name:pymdownx.emoji.to_svg
+          - pymdownx.highlight:
+              linenums: true
+          - pymdownx.inlinehilite
+          - pymdownx.keys
+          - pymdownx.magiclink
+          - pymdownx.mark
+          - pymdownx.snippets:
+              check_paths: true
+          - pymdownx.progressbar
+          - pymdownx.smartsymbols
+          - pymdownx.superfences:
+              custom_fences:
+                - name: math
+                  class: arithmatex
+                  format: !!python/name:pymdownx.arithmatex.fence_mathjax_format
+          - pymdownx.tasklist:
+              custom_checkbox: true
+          - pymdownx.tilde
+          - pymdownx.tabbed:
+              alternate_style: true
+        """
         self.result = markdown.markdown(
-            self.case["document"], extensions=["document-offsets-injection"]
+            self.case["document"],
+            extensions=[
+                "document-offsets-injection",
+                "admonition",
+                "def_list",
+                "footnotes",
+                "meta",
+                "toc",
+                "pymdownx.arithmatex",
+                "pymdownx.caret",
+                "pymdownx.critic",
+                "pymdownx.details",
+                "pymdownx.emoji",
+                "pymdownx.highlight",
+                "pymdownx.inlinehilite",
+                "pymdownx.keys",
+                "pymdownx.magiclink",
+                "pymdownx.mark",
+                "pymdownx.snippets",
+                "pymdownx.progressbar",
+                "pymdownx.smartsymbols",
+                "pymdownx.superfences",
+                "pymdownx.tasklist",
+                "pymdownx.tilde",
+                "pymdownx.tabbed",
+            ],
+            extension_configs={
+                "toc": {
+                    "permalink": "",
+                    "slugify": uslugify,
+                },
+                "pymdownx.arithmatex": {
+                    "generic": True,
+                },
+                "pymdownx.emoji": {
+                    "emoji_generator": to_svg,
+                },
+                "pymdownx.highlight": {
+                    "linenums": True,
+                },
+                "pymdownx.snippets": {
+                    "check_paths": True,
+                },
+                "pymdownx.superfences": {
+                    "custom_fences": [
+                        {
+                            "name": "math",
+                            "class": "arithmatex",
+                            "format": fence_mathjax_format,
+                        },
+                    ],
+                },
+                "pymdownx.tasklist": {
+                    "custom_checkbox": True,
+                },
+                "pymdownx.tabbed": {
+                    "alternate_style": True,
+                },
+            },
         )
+        print(self.case["document"])
+        print(self.result)
         self.test_case = test_case
 
     def test(self):
-        ParserTester(self.case, self.test_case).feed(self.result)
+        tester = ParserTester(self.case, self.test_case)
+        tester.feed(self.result)
+        tester.check_integrity()
 
 
 class ParserTester(HTMLParser):
     tag = None
-    text = None
     offset_start = None
     offset_end = None
 
@@ -29,17 +134,23 @@ def __init__(self, case, test_case: unittest.TestCase):
         self.idx = 0
 
     def handle_starttag(self, tag, attrs):
-        self.tag = tag
+        start = None
+        end = None
         for attr in attrs:
             if attr[0] == "data-original-document-start":
-                self.offset_start = int(attr[1])
+                start = int(attr[1])
             if attr[0] == "data-original-document-end":
-                self.offset_end = int(attr[1])
-
-    def handle_data(self, data):
-        self.text = data
+                end = int(attr[1])
+        if start is not None and end is not None:
+            self.tag = tag
+            self.offset_start = start
+            self.offset_end = end
 
     def handle_endtag(self, tag):
+        if self.tag != tag:
+            return  # ignore nested tags
+        if self.idx == len(self.case["expected"]):
+            return  # ignore extra tags
         self._test()
         self._reset()
 
@@ -49,11 +160,6 @@ def _test(self):
             self.case["expected"][self.idx]["tag"],
             msg="Tag mismatch in index " + str(self.idx),
         )
-        self.test_case.assertEqual(
-            self.text,
-            self.case["expected"][self.idx]["text"],
-            msg="Text mismatch in index " + str(self.idx),
-        )
         self.test_case.assertEqual(
             self.offset_start,
             self.case["expected"][self.idx]["offset"][0],
@@ -68,10 +174,16 @@ def _test(self):
 
     def _reset(self):
         self.tag = None
-        self.text = None
         self.offset_start = None
         self.offset_end = None
 
+    def check_integrity(self):
+        self.test_case.assertEqual(
+            self.idx,
+            len(self.case["expected"]),
+            msg="Not all tags were found",
+        )
+
 
 class TestParser(unittest.TestCase):
     def test_normal(self):
@@ -85,16 +197,14 @@ def test_normal(self):
                     
                     Morbi neque lectus, faucibus a mattis at, aliquam quis est. Maecenas sed luctus elit."""),
             "expected": [
-                {"tag": "h1", "text": "Lorem ipsum", "offset": (0, 13)},
+                {"tag": "h1", "offset": (0, 13)},
                 {
                     "tag": "p",
-                    "text": "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Proin sed lacus vitae neque vestibulum porttitor id et urna.",
                     "offset": (15, 132),
                 },
-                {"tag": "h2", "text": "Morbi neque lectus", "offset": (134, 155)},
+                {"tag": "h2", "offset": (134, 155)},
                 {
                     "tag": "p",
-                    "text": "Morbi neque lectus, faucibus a mattis at, aliquam quis est. Maecenas sed luctus elit.",
                     "offset": (157, 242),
                 },
             ],
@@ -112,7 +222,148 @@ def test_single(self):
         case = {
             "document": "Lorem ipsum",
             "expected": [
-                {"tag": "p", "text": "Lorem ipsum", "offset": (0, 11)},
+                {"tag": "p", "offset": (0, 11)},
+            ],
+        }
+        Tester(case, self).test()
+
+    def test_oi_wiki_index(self):
+        case = {
+            "document": textwrap.dedent("""\
+                disqus:
+                pagetime:
+                title: OI Wiki
+
+                ## 欢迎来到 **OI Wiki**！[![GitHub watchers](https://img.shields.io/github/watchers/OI-wiki/OI-wiki.svg?style=social&label=Watch)](https://github.com/OI-wiki/OI-wiki)  [![GitHub stars](https://img.shields.io/github/stars/OI-wiki/OI-wiki.svg?style=social&label=Stars)](https://github.com/OI-wiki/OI-wiki)
+
+                [![Word Art](images/wordArt.webp)](https://github.com/OI-wiki/OI-wiki)
+
+                **OI**（Olympiad in Informatics，信息学奥林匹克竞赛）在中国起源于 1984 年，是五大高中学科竞赛之一。
+
+                **ICPC**（International Collegiate Programming Contest，国际大学生程序设计竞赛）由 ICPC 基金会（ICPC Foundation）举办，是最具影响力的大学生计算机竞赛。由于以前 ACM 赞助这个竞赛，也有很多人习惯叫它 ACM 竞赛。
+
+                **OI Wiki** 致力于成为一个免费开放且持续更新的 **编程竞赛（competitive programming）** 知识整合站点，大家可以在这里获取与竞赛相关的、有趣又实用的知识。我们为大家准备了竞赛中的基础知识、常见题型、解题思路以及常用工具等内容，帮助大家更快速深入地学习编程竞赛中涉及到的知识。
+
+                本项目受 [CTF Wiki](https://ctf-wiki.org/) 的启发，在编写过程中参考了诸多资料，在此一并致谢。
+
+                <div align="center">
+                <a href="https://www.netlify.com/" target="_blank" style="margin-left: 60px;"><img style="height: 40px; " src="images/netlify.png"></a>
+                </div>
+
+                <script>
+                  // #758
+                  document.getElementsByClassName('md-nav__title')[1].click()
+                </script>"""),
+            "expected": [
+                {
+                    "tag": "h2",
+                    "offset": (34, 332),
+                },
+                {
+                    "tag": "p",
+                    "offset": (334, 404),
+                },
+                {
+                    "tag": "p",
+                    "offset": (406, 473),
+                },
+                {
+                    "tag": "p",
+                    "offset": (475, 620),
+                },
+                {
+                    "tag": "p",
+                    "offset": (622, 778),
+                },
+                {
+                    "tag": "p",
+                    "offset": (780, 844),
+                },
+                # there's a div tag and a script tag in the document, and they will not be considered.
+            ],
+        }
+        Tester(case, self).test()
+
+    def test_oi_wiki_search_dfs(self):
+        case = {
+            # I HATE TEXT BLOCKS
+            "document": textwrap.dedent("""\
+                ## 引入
+                
+                DFS 为图论中的概念，详见 [DFS（图论）](../graph/dfs.md) 页面。在 **搜索算法** 中，该词常常指利用递归函数方便地实现暴力枚举的算法，与图论中的 DFS 算法有一定相似之处，但并不完全相同。
+                
+                ## 解释
+                
+                考虑这个例子：
+                
+                ???+ note "例题"
+                    把正整数 $n$ 分解为 $3$ 个不同的正整数，如 $6=1+2+3$，排在后面的数必须大于等于前面的数，输出所有方案。
+                
+                对于这个问题，如果不知道搜索，应该怎么办呢？
+                
+                当然是三重循环，参考代码如下：
+                
+                ???+ note "实现"
+                    === "C++"
+                        ```cpp
+                        for (int i = 1; i <= n; ++i)
+                          for (int j = i; j <= n; ++j)
+                            for (int k = j; k <= n; ++k)
+                              if (i + j + k == n) printf("%d = %d + %d + %d\\n", n, i, j, k);
+                        ```
+                    
+                    === "Python"
+                        ```python
+                        for i in range(1, n + 1):
+                            for j in range(i, n + 1):
+                                for k in range(j, n + 1):
+                                    if i + j + k == n:
+                                        print("%d = %d + %d + %d" % (n, i, j, k))
+                        ```
+                    
+                    === "Java"
+                        ```Java
+                        for (int i = 1; i < n + 1; i++) {
+                            for (int j = i; j < n + 1; j++) {
+                                for (int k = j; k < n + 1; k++) {
+                                    if (i + j + k == n) System.out.printf("%d = %d + %d + %d%n", n, i, j, k);
+                                }
+                            }
+                        }
+                        ```
+                
+                那如果是分解成四个整数呢？再加一重循环？"""),
+            "expected": [
+                {
+                    "tag": "h2",
+                    "offset": (0, 5),
+                },
+                {
+                    "tag": "p",
+                    "offset": (7, 117),
+                },
+                {
+                    "tag": "h2",
+                    "offset": (119, 124),
+                },
+                {
+                    "tag": "p",
+                    "offset": (126, 133),
+                },
+                # <details class="note" open="open"> has been ignored
+                {
+                    "tag": "p",
+                    "offset": (217, 239),
+                },
+                {
+                    "tag": "p",
+                    "offset": (241, 256),
+                },
+                # <details class="note" open="open"> has been ignored
+                {
+                    "tag": "p",
+                    "offset": (1094, 1114),
+                },
             ],
         }
         Tester(case, self).test()

From e357c0496e4325456d6a60b038558a50a9f503d3 Mon Sep 17 00:00:00 2001
From: HikariLan <shaokeyibb@126.com>
Date: Fri, 5 Jul 2024 00:30:01 +0800
Subject: [PATCH 12/13] chore: ignore .idea

---
 python-markdown-extension/.gitignore | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/python-markdown-extension/.gitignore b/python-markdown-extension/.gitignore
index ae8554de..bf07c5cb 100644
--- a/python-markdown-extension/.gitignore
+++ b/python-markdown-extension/.gitignore
@@ -8,3 +8,5 @@ wheels/
 
 # venv
 .venv
+
+.idea/
\ No newline at end of file

From 5bc32d424cd39cd47f242ca520ec49792a8f1752 Mon Sep 17 00:00:00 2001
From: HikariLan <shaokeyibb@126.com>
Date: Fri, 5 Jul 2024 00:30:53 +0800
Subject: [PATCH 13/13] refacor: heuristics marking

---
 .../extension.py                              | 134 +++++++++++++++++-
 python-markdown-extension/test/__main__.py    |  42 +++---
 2 files changed, 153 insertions(+), 23 deletions(-)

diff --git a/python-markdown-extension/src/python_markdown_document_offsets_injection_extension/extension.py b/python-markdown-extension/src/python_markdown_document_offsets_injection_extension/extension.py
index 1885e110..a2aa3b5b 100644
--- a/python-markdown-extension/src/python_markdown_document_offsets_injection_extension/extension.py
+++ b/python-markdown-extension/src/python_markdown_document_offsets_injection_extension/extension.py
@@ -1,3 +1,4 @@
+import re
 from markdown import Extension, Markdown
 from markdown.preprocessors import Preprocessor
 from markdown.blockprocessors import BlockProcessor
@@ -6,15 +7,29 @@
 
 MARK_PREVENT_RECURSION: str = "\t\t\t\r\r\rMARK_PREVENT_RECURSION\r\r\r\t\t\t"
 
+MARK_CONTINUE: str = "\t\t\t\r\r\rMARK_CONTINUE\r\r\r\t\t\t"
+
+# @see: markdown.util.HTML_PLACEHOLDER_RE
+# PYTHON_MARKDOWN_HTML_PLACEHOLDER_RE: re.Pattern[str] = re.compile(
+#     "\u0002wzxhzdk:%s\u0003" % r"([0-9]+)"
+# )
+
 
 class MainExtension(Extension):
     def extendMarkdown(self, md: Markdown):
-        meta: dict = {"document_offsets": [], "used_document_offsets": {}}
+        meta: dict = {
+            "document_offsets": [],
+            "used_document_offsets": {},
+            "last_parent": None,
+        }
         md.preprocessors.register(
             CalculateDocumentOffsetPreprocessor(md, meta), "capture_document", 1000
         )  # Highest priority is required because we need to calc words offset from original document
+        md.preprocessors.register(
+            FixDocumentOffsetPreprocessor(md, meta), "fix_document", 0
+        )  # Lowest priority is required because we need to fix the offset after all other block processors
         md.parser.blockprocessors.register(
-            OffsetsInjectionBlockProcessor(md.parser, meta), "mark_words", 100
+            OffsetsInjectionBlockProcessor(md.parser, meta), "mark_words", 200
         )  # high priority, usually larger than every other block processor
 
 
@@ -32,6 +47,9 @@ def run(self, lines: list[str]) -> list[str]:
         for line in lines:
             # Skip empty lines
             if len(line) == 0:
+                store: tuple[str, int, int] = (line, offset, offset + 1)
+                self.meta["document_offsets"].append(store)
+                self.meta["used_document_offsets"][store] = False
                 offset += 1
                 continue
             # store the line and offset
@@ -43,6 +61,102 @@ def run(self, lines: list[str]) -> list[str]:
         return lines
 
 
+class FixDocumentOffsetPreprocessor(Preprocessor):
+    """
+    A preprocessor to fix the offset of each line after the 3rd party extension processed the document
+    """
+
+    def __init__(self, md: Markdown, meta: dict):
+        super(FixDocumentOffsetPreprocessor, self).__init__(md)
+        self.meta = meta
+
+    def run(self, lines: list[str]) -> list[str]:
+        document_offsets: list[tuple[str, int, int]] = self.meta["document_offsets"]
+
+        # 最后一次成功匹配的文档偏移量字典索引末，开区间
+        last_success_match_end: int = 0
+        num_lines: int = 0
+        num_document_offsets: int = 0
+        while num_document_offsets < len(document_offsets) and num_lines < len(lines):
+            line = lines[num_lines]
+            document_offset: tuple[str, int, int] = document_offsets[
+                num_document_offsets
+            ]
+
+            # 如果精准匹配
+            if document_offset[0] == line:
+                # 匹配该行
+                self.match(line, num_document_offsets, num_document_offsets + 1)
+                # 如果上次成功匹配的原文档偏移量未连续，匹配当前行到这部分未连续的原文档偏移量
+                if num_document_offsets > last_success_match_end and num_lines > 0:
+                    self.match(
+                        lines[num_lines - 1],
+                        last_success_match_end,
+                        num_document_offsets,
+                    )
+                last_success_match_end = num_document_offsets + 1
+                num_lines += 1
+                num_document_offsets += 1
+            # 如果未能精准匹配，查找该行在原文档偏移量字典中的位置
+            else:
+                remain: list[str] = [
+                    line for line, _, _ in document_offsets[num_document_offsets:]
+                ]
+                # 如果存在这样的行
+                if line in remain:
+                    # 找到第一次匹配的位置，匹配该行到此处
+                    idx = remain.index(line) + num_document_offsets
+                    self.match(line, idx, idx + 1)
+                    # 如果上次成功匹配的原文档偏移量未连续，匹配当前行到这部分未连续的原文档偏移量
+                    if idx > last_success_match_end and num_lines > 0:
+                        self.match(lines[num_lines - 1], last_success_match_end, idx)
+                    last_success_match_end = idx + 1
+                    num_lines += 1
+                    num_document_offsets = idx + 1
+                # 如果未找到匹配的位置，继续查找下一行
+                else:
+                    num_lines += 1
+
+        # 如果行匹配完成，但原文档偏移量未匹配完成，匹配剩余的原文档偏移量
+        if last_success_match_end < len(document_offsets):
+            self.match(
+                lines[num_lines - 1], last_success_match_end, len(document_offsets)
+            )
+
+        return lines
+
+    def match(
+        self,
+        matched_line: str,
+        num_document_offsets_start: int,
+        num_document_offsets_end: int,
+    ):
+        """
+        将单个匹配行设置到多个原文档偏移量字典，索引范围为[num_document_offsets_start, num_document_offsets_end)
+        """
+        document_offsets: list[tuple[str, int, int]] = self.meta["document_offsets"]
+        used_document_offsets: dict[tuple[str, int, int], bool] = self.meta[
+            "used_document_offsets"
+        ]
+        for i in range(num_document_offsets_start, num_document_offsets_end):
+            document_offset = document_offsets[i]
+            # 如果是第一个匹配的原文档偏移量，设置为匹配行，否则设置为 MARK_CONTINUE
+            if i == num_document_offsets_start:
+                document_offsets[i] = (
+                    matched_line,
+                    document_offset[1],
+                    document_offset[2],
+                )
+            else:
+                document_offsets[i] = (
+                    MARK_CONTINUE,
+                    document_offset[1],
+                    document_offset[2],
+                )
+            del used_document_offsets[document_offset]
+            used_document_offsets[document_offsets[i]] = False
+
+
 class OffsetsInjectionBlockProcessor(BlockProcessor):
     """
     A block processor to mark the words in the document and inject the offset of the block to the HTML element
@@ -80,10 +194,18 @@ def run(self, parent: etree.Element, blocks: list[str]) -> bool:
         used: dict[tuple[str, int, int], bool] = {}
         # Search for the block fragment in the document_offsets
         for store in self.meta["document_offsets"]:
+            # Skip empty lines
+            if len(store[0]) == 0:
+                continue
             # If already used, skip
             if self.meta["used_document_offsets"][store]:
                 continue
             (line, offset, end_offset) = store
+            # 如果收到 MARK_CONTINUE 标记，直接认为该标记之前的行是连续的
+            if line == MARK_CONTINUE:
+                end = end_offset
+                used[store] = True
+                continue
             # If found one
             if line in block:
                 # If the line already scanned (usually some lines with same content in different place), skip
@@ -111,9 +233,15 @@ def run(self, parent: etree.Element, blocks: list[str]) -> bool:
         if start is not None and end is not None:
             blocks.pop(0)
             self.meta["used_document_offsets"].update(used)
-            # append MARK_PREVENT_RECURSION to tail of the block to prevent recursion, we don't use a handled flaglist because we don't know if there's some same block in the document
+            # append MARK_PREVENT_RECURSION to tail of the block to prevent recursion, we don't use a handled
+            # flaglist because we don't know if there's some same block in the document
             self.parser.parseBlocks(parent, [block + MARK_PREVENT_RECURSION])
+            # fix multi blocks in same parents
+            if self.meta["last_parent"] == parent[-1]:
+                parent[-1].set("data-original-document-end", str(end))
+                return True
             parent[-1].set("data-original-document-start", str(start))
             parent[-1].set("data-original-document-end", str(end))
+            self.meta["last_parent"] = parent[-1]
             return True
         return False
diff --git a/python-markdown-extension/test/__main__.py b/python-markdown-extension/test/__main__.py
index eb15716d..93f2eb00 100644
--- a/python-markdown-extension/test/__main__.py
+++ b/python-markdown-extension/test/__main__.py
@@ -112,8 +112,6 @@ def __init__(self, case, test_case: unittest.TestCase):
                 },
             },
         )
-        print(self.case["document"])
-        print(self.result)
         self.test_case = test_case
 
     def test(self):
@@ -190,11 +188,11 @@ def test_normal(self):
         case = {
             "document": textwrap.dedent("""\
                     # Lorem ipsum
-                    
+
                     Lorem ipsum dolor sit amet, consectetur adipiscing elit. Proin sed lacus vitae neque vestibulum porttitor id et urna.
-                    
+
                     ## Morbi neque lectus
-                    
+
                     Morbi neque lectus, faucibus a mattis at, aliquam quis est. Maecenas sed luctus elit."""),
             "expected": [
                 {"tag": "h1", "offset": (0, 13)},
@@ -277,32 +275,30 @@ def test_oi_wiki_index(self):
                 },
                 {
                     "tag": "p",
-                    "offset": (780, 844),
+                    "offset": (780, 1101),  # FIXME: Correct one is (780, 1101)
                 },
-                # there's a div tag and a script tag in the document, and they will not be considered.
             ],
         }
         Tester(case, self).test()
 
     def test_oi_wiki_search_dfs(self):
         case = {
-            # I HATE TEXT BLOCKS
             "document": textwrap.dedent("""\
                 ## 引入
-                
+
                 DFS 为图论中的概念，详见 [DFS（图论）](../graph/dfs.md) 页面。在 **搜索算法** 中，该词常常指利用递归函数方便地实现暴力枚举的算法，与图论中的 DFS 算法有一定相似之处，但并不完全相同。
-                
+
                 ## 解释
-                
+
                 考虑这个例子：
-                
+
                 ???+ note "例题"
                     把正整数 $n$ 分解为 $3$ 个不同的正整数，如 $6=1+2+3$，排在后面的数必须大于等于前面的数，输出所有方案。
-                
+
                 对于这个问题，如果不知道搜索，应该怎么办呢？
-                
+
                 当然是三重循环，参考代码如下：
-                
+
                 ???+ note "实现"
                     === "C++"
                         ```cpp
@@ -311,7 +307,7 @@ def test_oi_wiki_search_dfs(self):
                             for (int k = j; k <= n; ++k)
                               if (i + j + k == n) printf("%d = %d + %d + %d\\n", n, i, j, k);
                         ```
-                    
+
                     === "Python"
                         ```python
                         for i in range(1, n + 1):
@@ -320,7 +316,7 @@ def test_oi_wiki_search_dfs(self):
                                     if i + j + k == n:
                                         print("%d = %d + %d + %d" % (n, i, j, k))
                         ```
-                    
+
                     === "Java"
                         ```Java
                         for (int i = 1; i < n + 1; i++) {
@@ -331,7 +327,7 @@ def test_oi_wiki_search_dfs(self):
                             }
                         }
                         ```
-                
+
                 那如果是分解成四个整数呢？再加一重循环？"""),
             "expected": [
                 {
@@ -350,7 +346,10 @@ def test_oi_wiki_search_dfs(self):
                     "tag": "p",
                     "offset": (126, 133),
                 },
-                # <details class="note" open="open"> has been ignored
+                {
+                    "tag": "details",
+                    "offset": (135, 215),
+                },
                 {
                     "tag": "p",
                     "offset": (217, 239),
@@ -359,7 +358,10 @@ def test_oi_wiki_search_dfs(self):
                     "tag": "p",
                     "offset": (241, 256),
                 },
-                # <details class="note" open="open"> has been ignored
+                {
+                    "tag": "details",
+                    "offset": (258, 1092),
+                },
                 {
                     "tag": "p",
                     "offset": (1094, 1114),