Skip to content

Commit

Permalink
fix: fix pdf image processor
Browse files Browse the repository at this point in the history
  • Loading branch information
zmh-program committed Apr 25, 2024
1 parent f24427e commit f0eec8a
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 4 deletions.
6 changes: 3 additions & 3 deletions handlers/pdf.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from io import BytesIO

from fastapi import File, UploadFile
from fastapi import UploadFile
import fitz

from config import PDF_MAX_IMAGES
Expand All @@ -12,7 +12,7 @@ def is_pdf(filename: str) -> bool:
return filename.endswith(".pdf")


async def process(file: UploadFile = File(...)) -> str:
async def process(file: UploadFile, model: str) -> str:
filename = file.filename.replace(" ", "_").replace(".", "_")
doc = fitz.open("pdf", file.file.read()) # read the file from memory
stack = []
Expand Down Expand Up @@ -41,7 +41,7 @@ async def process(file: UploadFile = File(...)) -> str:

# create a file-like object for the image
image_file = UploadFile(io, filename=image_name)
stack.append(await process_image(image_file))
stack.append(await process_image(image_file, model))

print(f"[pdf] extracted image: {image_name} (page: {page.number}, cursor: {cursor}, max: {PDF_MAX_IMAGES})")

Expand Down
2 changes: 1 addition & 1 deletion handlers/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ async def process_file(file: UploadFile = File(...), model: str = "") -> str:

filename = file.filename.lower()
if pdf.is_pdf(filename):
return await pdf.process(file)
return await pdf.process(file, model)
elif word.is_docx(filename):
return word.process(file)
elif ppt.is_pptx(filename):
Expand Down

0 comments on commit f0eec8a

Please sign in to comment.