From a71db7031475b70ddb71e66b1c14e292826bc5d0 Mon Sep 17 00:00:00 2001 From: myhloli Date: Tue, 8 Oct 2024 11:35:04 +0800 Subject: [PATCH] feat: add arXiv paper link to header and adjust PDF parsing logic- Add arXiv paper link to the header template for easy access to the latest research paper. - Modify the PDF parsing logic to handle edge cases more accurately, particularly in determining the number of lines in a block based on its height. --- magic_pdf/pdf_parse_union_core_v2.py | 2 +- projects/gradio_app/header.html | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/magic_pdf/pdf_parse_union_core_v2.py b/magic_pdf/pdf_parse_union_core_v2.py index ec5905e0..eee1c04a 100644 --- a/magic_pdf/pdf_parse_union_core_v2.py +++ b/magic_pdf/pdf_parse_union_core_v2.py @@ -191,7 +191,7 @@ def insert_lines_into_block(block_bbox, line_height, page_w, page_h): # 如果block高度小于n行正文,则直接返回block的bbox if line_height*3 < block_height: if block_height > page_h*0.25 and page_w*0.5 > block_weight > page_w*0.25: # 可能是双列结构,可以切细点 - lines = int(block_height/line_height) + lines = int(block_height/line_height)+1 else: # 如果block的宽度超过0.4页面宽度,则将block分成3行 if block_weight > page_w*0.4: diff --git a/projects/gradio_app/header.html b/projects/gradio_app/header.html index d047f5df..99ec40ea 100644 --- a/projects/gradio_app/header.html +++ b/projects/gradio_app/header.html @@ -90,6 +90,16 @@ + + + + + + + Paper + + +