From fb9949c44f4227beaa3f3526a34ee74acb80e196 Mon Sep 17 00:00:00 2001 From: myhloli Date: Tue, 8 Oct 2024 14:52:09 +0800 Subject: [PATCH] perf(pdf_extract_kit): conditional memory cleanup based on GPU capacity - Introduce a conditional memory cleanup step in the PDF extraction process - Assess available GPU memory before deciding to perform memory cleanup- Log the time taken for garbage collection when it occurs - This optimization helps to balance performance and resource utilization --- magic_pdf/model/pdf_extract_kit.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/magic_pdf/model/pdf_extract_kit.py b/magic_pdf/model/pdf_extract_kit.py index 30dd688a..1235a0a8 100644 --- a/magic_pdf/model/pdf_extract_kit.py +++ b/magic_pdf/model/pdf_extract_kit.py @@ -314,7 +314,8 @@ def __call__(self, image): mfr_res = [] for mf_img in dataloader: mf_img = mf_img.to(self.device) - output = self.mfr_model.generate({'image': mf_img}) + with torch.no_grad(): + output = self.mfr_model.generate({'image': mf_img}) mfr_res.extend(output['pred_str']) for res, latex in zip(latex_filling_list, mfr_res): res['latex'] = latex_rm_whitespace(latex) @@ -336,7 +337,14 @@ def __call__(self, image): elif int(res['category_id']) in [5]: table_res_list.append(res) - clean_memory() + if torch.cuda.is_available(): + properties = torch.cuda.get_device_properties(self.device) + total_memory = properties.total_memory / (1024 ** 3) # 将字节转换为 GB + if total_memory <= 8: + gc_start = time.time() + clean_memory() + gc_time = round(time.time() - gc_start, 2) + logger.info(f"gc time: {gc_time}") # ocr识别 if self.apply_ocr: