diff --git a/netdriveurls/drives/__init__.py b/netdriveurls/drives/__init__.py index 92b6c98..818ab41 100644 --- a/netdriveurls/drives/__init__.py +++ b/netdriveurls/drives/__init__.py @@ -11,4 +11,5 @@ from .jpg5su import JPG5SuFileDownloadSession, get_direct_url_for_jpg5su, JPG5SuAlbumDownloadSession, \ get_file_urls_for_jpg5su, get_og_image_url from .mediafire import MediaFireLinkInvalidError, MediaFireDownloadSession, get_direct_url_and_filename_for_mediafire +from .pixhost import PixHostGalleryDownloadSession, PixHostShowDownloadSession, get_direct_url_for_pixhost from .saint2 import Saint2EmbedDownloadSession, get_direct_url_for_saint2 diff --git a/netdriveurls/drives/dispatch.py b/netdriveurls/drives/dispatch.py index e8730d9..7d47287 100644 --- a/netdriveurls/drives/dispatch.py +++ b/netdriveurls/drives/dispatch.py @@ -8,6 +8,7 @@ from .ibb import IbbFileDownloadSession from .jpg5su import JPG5SuFileDownloadSession, JPG5SuAlbumDownloadSession from .mediafire import MediaFireDownloadSession +from .pixhost import PixHostShowDownloadSession, PixHostGalleryDownloadSession from .saint2 import Saint2EmbedDownloadSession from ..resolve import resolve_url @@ -30,6 +31,8 @@ def register_net_drive(net_drive_cls: Type[NetDriveDownloadSession]): register_net_drive(Saint2EmbedDownloadSession) register_net_drive(BunkrImageDownloadSession) register_net_drive(BunkrAlbumDownloadSession) +register_net_drive(PixHostGalleryDownloadSession) +register_net_drive(PixHostShowDownloadSession) def from_url(url: str) -> Union[NetDriveDownloadSession, StandaloneFileNetDriveDownloadSession]: diff --git a/netdriveurls/drives/pixhost.py b/netdriveurls/drives/pixhost.py new file mode 100644 index 0000000..d8a0c04 --- /dev/null +++ b/netdriveurls/drives/pixhost.py @@ -0,0 +1,83 @@ +import glob +import os.path +import zipfile +from typing import Optional +from urllib.parse import urljoin + +import requests +from hbutils.system import urlsplit, TemporaryDirectory +from pyquery import PyQuery as pq +from urlobject import URLObject + +from .base import StandaloneFileNetDriveDownloadSession, NetDriveDownloadSession, ResourceInvalidError +from ..utils import get_requests_session, download_file + + +def get_direct_url_for_pixhost(url: str, session: Optional[requests.Session] = None): + session = session or get_requests_session() + resp = session.get(url) + resp.raise_for_status() + + page = pq(resp.text) + relurl = page('.image img#image').attr('src') + if relurl: + return urljoin(resp.url, relurl) + else: + raise ResourceInvalidError(f'Invalid pixhost url - {url!r}.') + + +class PixHostGalleryDownloadSession(NetDriveDownloadSession): + def __init__(self, url: str): + NetDriveDownloadSession.__init__(self) + self.page_url = url + + def _get_resource_id(self) -> str: + split = urlsplit(self.page_url) + return f'pixhost_gallery_{split.path_segments[2]}' + + def download_to_directory(self, dst_dir: str): + with TemporaryDirectory() as td: + split = urlsplit(self.page_url) + download_url = str(URLObject(self.page_url). + with_path('/'.join(['', 'gallery', split.path_segments[2], 'download']))) + download_file(download_url, output_directory=td) + zip_file = glob.glob(os.path.join(td, '*.zip'))[0] + with zipfile.ZipFile(zip_file, 'r') as zf: + zf.extractall(dst_dir) + + @classmethod + def from_url(cls, url: str): + return cls(url) + + @classmethod + def is_valid_url(cls, url: str) -> bool: + split = urlsplit(url) + return tuple(split.host.split('.')[-2:]) == ('pixhost', 'to') and \ + tuple(split.path_segments[1:2]) == ('gallery',) + + +class PixHostShowDownloadSession(StandaloneFileNetDriveDownloadSession): + def __init__(self, url): + StandaloneFileNetDriveDownloadSession.__init__(self) + self.page_url = url + + def _get_resource_id(self) -> str: + split = urlsplit(self.page_url) + return f'pixhost_show_{"_".join(split.path_segments[2:])}' + + def download_to_directory(self, dst_dir: str): + session = get_requests_session() + url = get_direct_url_for_pixhost(self.page_url, session=session) + _, ext = os.path.splitext(urlsplit(url).filename.lower()) + dst_file = os.path.join(dst_dir, f'{self.resource_id}{ext}') + download_file(url, filename=dst_file, session=session) + + @classmethod + def from_url(cls, url: str): + return cls(url) + + @classmethod + def is_valid_url(cls, url: str) -> bool: + split = urlsplit(url) + return tuple(split.host.split('.')[-2:]) == ('pixhost', 'to') and \ + tuple(split.path_segments[1:2]) == ('show',)