From 644085760bcb9a46a40df8e75b719f80872c0681 Mon Sep 17 00:00:00 2001 From: myhloli Date: Mon, 21 Oct 2024 17:16:42 +0800 Subject: [PATCH 1/2] fix(ocr_mkcontent): expand para_to_standard_format_v2 to handle list and index blocks - Modified the condition to include List and Index block types- This change enhances the function's capability to process different paragraph types --- magic_pdf/dict2md/ocr_mkcontent.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/magic_pdf/dict2md/ocr_mkcontent.py b/magic_pdf/dict2md/ocr_mkcontent.py index 716a328..ad1de34 100644 --- a/magic_pdf/dict2md/ocr_mkcontent.py +++ b/magic_pdf/dict2md/ocr_mkcontent.py @@ -162,7 +162,7 @@ def merge_para_with_text(para_block): def para_to_standard_format_v2(para_block, img_buket_path, page_idx, drop_reason=None): para_type = para_block['type'] para_content = {} - if para_type == BlockType.Text: + if para_type in [BlockType.Text, BlockType.List, BlockType.Index]: para_content = { 'type': 'text', 'text': merge_para_with_text(para_block), From 790691d63cf2f1ed761f88a47917de38419a8426 Mon Sep 17 00:00:00 2001 From: myhloli Date: Wed, 23 Oct 2024 10:34:42 +0800 Subject: [PATCH 2/2] =?UTF-8?q?update=EF=BC=9Aupdate=20config=20json?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- magic-pdf.template.json | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/magic-pdf.template.json b/magic-pdf.template.json index fcb9995..487bbf0 100644 --- a/magic-pdf.template.json +++ b/magic-pdf.template.json @@ -6,9 +6,18 @@ "models-dir":"/tmp/models", "layoutreader-model-dir":"/tmp/layoutreader", "device-mode":"cpu", + "layout-config": { + "model": "doclayout_yolo" + }, + "formula-config": { + "mfd_model": "yolo_v8_mfd", + "mfr_model": "unimernet_small", + "enable": true + }, "table-config": { - "model": "TableMaster", - "is_table_recog_enable": false, + "model": "tablemaster", + "enable": false, "max_time": 400 - } + }, + "config_version": "1.0.0" } \ No newline at end of file