55"""
66
77import logging
8- from abc import ABC , abstractmethod
98from collections .abc import Callable
109from pathlib import Path
1110
12- from .base_models import DocumentData
11+ from .base_models import DocumentParser
1312
1413logger = logging .getLogger (__name__ )
1514
1615# 全局解析器注册表
17- PARSER_REGISTRY : dict [str , type [' DocumentParser' ]] = {}
16+ PARSER_REGISTRY : dict [str , type [DocumentParser ]] = {}
1817
1918
20- class DocumentParser (ABC ):
21- """文档解析器基类"""
22-
23- @abstractmethod
24- async def parse (self , file_path : str ) -> DocumentData :
25- """解析文档"""
26- pass
27-
28-
29- def register_parser (suffixes : list [str ]) -> Callable [[type ['DocumentParser' ]], type ['DocumentParser' ]]:
19+ def register_parser (suffixes : list [str ]) -> Callable [[type [DocumentParser ]], type [DocumentParser ]]:
3020 """
3121 解析器注册装饰器
3222
@@ -41,7 +31,7 @@ def register_parser(suffixes: list[str]) -> Callable[[type['DocumentParser']], t
4131 class DocxDocumentParser(DocumentParser):
4232 ...
4333 """
44- def decorator (cls : type [' DocumentParser' ]) -> type [' DocumentParser' ]:
34+ def decorator (cls : type [DocumentParser ]) -> type [DocumentParser ]:
4535 # 验证类是否继承自 DocumentParser
4636 if not issubclass (cls , DocumentParser ):
4737 raise TypeError (f"解析器类 { cls .__name__ } 必须继承自 DocumentParser" )
@@ -59,7 +49,7 @@ def decorator(cls: type['DocumentParser']) -> type['DocumentParser']:
5949 return decorator
6050
6151
62- def get_parser (file_path : str ) -> ' DocumentParser' | None :
52+ def get_parser (file_path : str ) -> DocumentParser | None :
6353 """
6454 根据文件路径获取合适的解析器实例
6555
@@ -83,22 +73,6 @@ def get_parser(file_path: str) -> 'DocumentParser' | None:
8373 logger .error (f"创建解析器实例失败: { parser_class .__name__ } , 错误: { e } " )
8474 return None
8575
86-
87- def can_parse (file_path : str ) -> bool :
88- """
89- 检查文件是否可以被解析
90-
91- Args:
92- file_path: 文件路径
93-
94- Returns:
95- bool: 是否支持该文件格式
96- """
97- file = Path (file_path )
98- suffix = file .suffix .lower ()
99- return suffix in PARSER_REGISTRY
100-
101-
10276def get_supported_formats () -> list [str ]:
10377 """
10478 获取所有支持的文件格式
@@ -109,7 +83,7 @@ def get_supported_formats() -> list[str]:
10983 return list (PARSER_REGISTRY .keys ())
11084
11185
112- def get_parser_class (suffix : str ) -> type [' DocumentParser' ] | None :
86+ def get_parser_class (suffix : str ) -> type [DocumentParser ] | None :
11387 """
11488 根据文件扩展名获取解析器类
11589
0 commit comments