dev(narugo): pixhost

deepghs · Sep 5, 2024 · 97659e6 · 97659e6
1 parent 98d99bc
commit 97659e6
Show file tree

Hide file tree

Showing 3 changed files with 87 additions and 0 deletions.
diff --git a/netdriveurls/drives/__init__.py b/netdriveurls/drives/__init__.py
@@ -11,4 +11,5 @@
 from .jpg5su import JPG5SuFileDownloadSession, get_direct_url_for_jpg5su, JPG5SuAlbumDownloadSession, \
     get_file_urls_for_jpg5su, get_og_image_url
 from .mediafire import MediaFireLinkInvalidError, MediaFireDownloadSession, get_direct_url_and_filename_for_mediafire
+from .pixhost import PixHostGalleryDownloadSession, PixHostShowDownloadSession, get_direct_url_for_pixhost
 from .saint2 import Saint2EmbedDownloadSession, get_direct_url_for_saint2
diff --git a/netdriveurls/drives/dispatch.py b/netdriveurls/drives/dispatch.py
@@ -8,6 +8,7 @@
 from .ibb import IbbFileDownloadSession
 from .jpg5su import JPG5SuFileDownloadSession, JPG5SuAlbumDownloadSession
 from .mediafire import MediaFireDownloadSession
+from .pixhost import PixHostShowDownloadSession, PixHostGalleryDownloadSession
 from .saint2 import Saint2EmbedDownloadSession
 from ..resolve import resolve_url
 
@@ -30,6 +31,8 @@ def register_net_drive(net_drive_cls: Type[NetDriveDownloadSession]):
 register_net_drive(Saint2EmbedDownloadSession)
 register_net_drive(BunkrImageDownloadSession)
 register_net_drive(BunkrAlbumDownloadSession)
+register_net_drive(PixHostGalleryDownloadSession)
+register_net_drive(PixHostShowDownloadSession)
 
 
 def from_url(url: str) -> Union[NetDriveDownloadSession, StandaloneFileNetDriveDownloadSession]:

diff --git a/netdriveurls/drives/pixhost.py b/netdriveurls/drives/pixhost.py
@@ -0,0 +1,83 @@
+import glob
+import os.path
+import zipfile
+from typing import Optional
+from urllib.parse import urljoin
+
+import requests
+from hbutils.system import urlsplit, TemporaryDirectory
+from pyquery import PyQuery as pq
+from urlobject import URLObject
+
+from .base import StandaloneFileNetDriveDownloadSession, NetDriveDownloadSession, ResourceInvalidError
+from ..utils import get_requests_session, download_file
+
+
+def get_direct_url_for_pixhost(url: str, session: Optional[requests.Session] = None):
+    session = session or get_requests_session()
+    resp = session.get(url)
+    resp.raise_for_status()
+
+    page = pq(resp.text)
+    relurl = page('.image img#image').attr('src')
+    if relurl:
+        return urljoin(resp.url, relurl)
+    else:
+        raise ResourceInvalidError(f'Invalid pixhost url - {url!r}.')
+
+
+class PixHostGalleryDownloadSession(NetDriveDownloadSession):
+    def __init__(self, url: str):
+        NetDriveDownloadSession.__init__(self)
+        self.page_url = url
+
+    def _get_resource_id(self) -> str:
+        split = urlsplit(self.page_url)
+        return f'pixhost_gallery_{split.path_segments[2]}'
+
+    def download_to_directory(self, dst_dir: str):
+        with TemporaryDirectory() as td:
+            split = urlsplit(self.page_url)
+            download_url = str(URLObject(self.page_url).
+                               with_path('/'.join(['', 'gallery', split.path_segments[2], 'download'])))
+            download_file(download_url, output_directory=td)
+            zip_file = glob.glob(os.path.join(td, '*.zip'))[0]
+            with zipfile.ZipFile(zip_file, 'r') as zf:
+                zf.extractall(dst_dir)
+
+    @classmethod
+    def from_url(cls, url: str):
+        return cls(url)
+
+    @classmethod
+    def is_valid_url(cls, url: str) -> bool:
+        split = urlsplit(url)
+        return tuple(split.host.split('.')[-2:]) == ('pixhost', 'to') and \
+            tuple(split.path_segments[1:2]) == ('gallery',)
+
+
+class PixHostShowDownloadSession(StandaloneFileNetDriveDownloadSession):
+    def __init__(self, url):
+        StandaloneFileNetDriveDownloadSession.__init__(self)
+        self.page_url = url
+
+    def _get_resource_id(self) -> str:
+        split = urlsplit(self.page_url)
+        return f'pixhost_show_{"_".join(split.path_segments[2:])}'
+
+    def download_to_directory(self, dst_dir: str):
+        session = get_requests_session()
+        url = get_direct_url_for_pixhost(self.page_url, session=session)
+        _, ext = os.path.splitext(urlsplit(url).filename.lower())
+        dst_file = os.path.join(dst_dir, f'{self.resource_id}{ext}')
+        download_file(url, filename=dst_file, session=session)
+
+    @classmethod
+    def from_url(cls, url: str):
+        return cls(url)
+
+    @classmethod
+    def is_valid_url(cls, url: str) -> bool:
+        split = urlsplit(url)
+        return tuple(split.host.split('.')[-2:]) == ('pixhost', 'to') and \
+            tuple(split.path_segments[1:2]) == ('show',)