Skip to content

Commit

Permalink
✨ Fix on_download and on_pdf
Browse files Browse the repository at this point in the history
  • Loading branch information
asim-shrestha committed Mar 7, 2024
1 parent 3468845 commit 94b7a46
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 7 deletions.
6 changes: 3 additions & 3 deletions harambe/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ async def capture_download(

res = await asyncio.gather(
*[
o.on_download(download.suggested_filename, content)
o.on_download(download.url, download.suggested_filename, content)
for o in self._observers
]
)
Expand All @@ -189,9 +189,9 @@ async def capture_pdf(
from the observer to transform to a usable URL
"""
pdf_content = await self.page.pdf()
file_name = f"{self.page.url}-screen.pdf"
file_name = f"reworkd_page_snapshot.pdf"
res = await asyncio.gather(
*[o.on_download(file_name, pdf_content) for o in self._observers]
*[o.on_download(self.page.url, file_name, pdf_content) for o in self._observers]
)
return res[0]

Expand Down
7 changes: 4 additions & 3 deletions harambe/observer.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from abc import abstractmethod
from typing import Any, Dict, List, Protocol, Tuple, runtime_checkable, TypedDict
from urllib.parse import quote

from harambe.tracker import FileDataTracker
from harambe.types import URL, Context, Stage
Expand Down Expand Up @@ -31,7 +32,7 @@ async def on_queue_url(self, url: URL, context: Dict[str, Any]) -> None:
async def on_download(self, download_url: str, filename: str, content: bytes) -> "DownloadMeta":
print(f"Downloading file: {filename}") # TODO: use logger
return {
"url": f"{download_url}/{filename}",
"url": f"{download_url}/{quote(filename)}",
"filename": filename,
}

Expand All @@ -48,7 +49,7 @@ async def on_queue_url(self, url: URL, context: Dict[str, Any]) -> None:

async def on_download(self, download_url: str, filename: str, content: bytes) -> "DownloadMeta":
data = {
"url": f"{download_url}/{filename}",
"url": f"{download_url}/{quote(filename)}",
"filename": filename,
}
self._tracker.save_data(data)
Expand All @@ -69,7 +70,7 @@ async def on_queue_url(self, url: URL, context: Dict[str, Any]) -> None:

async def on_download(self, download_url: str, filename: str, content: bytes) -> "DownloadMeta":
data = {
"url": f"{download_url}/{filename}",
"url": f"{download_url}/{quote(filename)}",
"filename": filename,
}
self._files.append((filename, content))
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "harambe-sdk"
version = "0.8.2"
version = "0.8.3"
description = "Data extraction SDK for Playwright 🐒🍌"
authors = ["awtkns <[email protected]>"]
readme = "README.md"
Expand Down

0 comments on commit 94b7a46

Please sign in to comment.