Skip to content

Commit

Permalink
perf(pdf_extract_kit): conditional memory cleanup based on GPU capacity
Browse files Browse the repository at this point in the history
- Introduce a conditional memory cleanup step in the PDF extraction process
- Assess available GPU memory before deciding to perform memory cleanup- Log the time taken for garbage collection when it occurs
- This optimization helps to balance performance and resource utilization
  • Loading branch information
myhloli committed Oct 8, 2024
1 parent a71db70 commit fb9949c
Showing 1 changed file with 10 additions and 2 deletions.
12 changes: 10 additions & 2 deletions magic_pdf/model/pdf_extract_kit.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,8 @@ def __call__(self, image):
mfr_res = []
for mf_img in dataloader:
mf_img = mf_img.to(self.device)
output = self.mfr_model.generate({'image': mf_img})
with torch.no_grad():
output = self.mfr_model.generate({'image': mf_img})
mfr_res.extend(output['pred_str'])
for res, latex in zip(latex_filling_list, mfr_res):
res['latex'] = latex_rm_whitespace(latex)
Expand All @@ -336,7 +337,14 @@ def __call__(self, image):
elif int(res['category_id']) in [5]:
table_res_list.append(res)

clean_memory()
if torch.cuda.is_available():
properties = torch.cuda.get_device_properties(self.device)
total_memory = properties.total_memory / (1024 ** 3) # 将字节转换为 GB
if total_memory <= 8:
gc_start = time.time()
clean_memory()
gc_time = round(time.time() - gc_start, 2)
logger.info(f"gc time: {gc_time}")

# ocr识别
if self.apply_ocr:
Expand Down

0 comments on commit fb9949c

Please sign in to comment.