diff --git a/magic_pdf/libs/clean_memory.py b/magic_pdf/libs/clean_memory.py new file mode 100644 index 00000000..6bfc174f --- /dev/null +++ b/magic_pdf/libs/clean_memory.py @@ -0,0 +1,10 @@ +# Copyright (c) Opendatalab. All rights reserved. +import torch +import gc + + +def clean_memory(): + if torch.cuda.is_available(): + torch.cuda.empty_cache() + torch.cuda.ipc_collect() + gc.collect() \ No newline at end of file diff --git a/magic_pdf/model/pdf_extract_kit.py b/magic_pdf/model/pdf_extract_kit.py index 6c5b9d18..f0bc468d 100644 --- a/magic_pdf/model/pdf_extract_kit.py +++ b/magic_pdf/model/pdf_extract_kit.py @@ -3,6 +3,7 @@ import time from magic_pdf.libs.Constants import * +from magic_pdf.libs.clean_memory import clean_memory from magic_pdf.model.model_list import AtomicModel os.environ['NO_ALBUMENTATIONS_UPDATE'] = '1' # 禁止albumentations检查更新 @@ -330,6 +331,8 @@ def __call__(self, image): elif int(res['category_id']) in [5]: table_res_list.append(res) + clean_memory() + # ocr识别 if self.apply_ocr: ocr_start = time.time() diff --git a/magic_pdf/pdf_parse_union_core_v2.py b/magic_pdf/pdf_parse_union_core_v2.py index 60bb92e5..fdcab016 100644 --- a/magic_pdf/pdf_parse_union_core_v2.py +++ b/magic_pdf/pdf_parse_union_core_v2.py @@ -7,6 +7,7 @@ import torch +from magic_pdf.libs.clean_memory import clean_memory from magic_pdf.libs.commons import fitz, get_delta_time from magic_pdf.libs.convert_utils import dict_to_list from magic_pdf.libs.drop_reason import DropReason @@ -304,14 +305,6 @@ def parse_page_core(pdf_docs, magic_model, page_id, pdf_bytes_md5, imageWriter, return page_info -def clean_memory(): - import gc - if torch.cuda.is_available(): - torch.cuda.empty_cache() - torch.cuda.ipc_collect() - gc.collect() - - def pdf_parse_union(pdf_bytes, model_list, imageWriter,