Skip to content

Commit

Permalink
^q^
Browse files Browse the repository at this point in the history
KurtBestor committed Jul 11, 2021

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature. The key has expired.
1 parent ed1b70d commit 9a978e4
Showing 7 changed files with 101 additions and 63 deletions.
7 changes: 4 additions & 3 deletions src/extractor/afreeca_downloader.py
Original file line number Diff line number Diff line change
@@ -28,6 +28,10 @@ class Downloader_afreeca(Downloader):
single = True
display_name = 'AfreecaTV'

@classmethod
def fix_url(cls, url):
return url.rstrip(' /')

def read(self):
session = Session()
video = get_video(self.url, session, self.cw)
@@ -52,9 +56,6 @@ def _get_stream(url_m3u8):
@try_n(8)
def get_video(url, session, cw):
print_ = get_print(cw)
while url.strip().endswith('/'):
url = url[:-1]

html = downloader.read_html(url, session=session)
if "document.location.href='https://login." in html:
raise errors.LoginRequired()
28 changes: 10 additions & 18 deletions src/extractor/navertoon_downloader.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,3 @@
# uncompyle6 version 3.5.0
# Python bytecode 2.7 (62211)
# Decompiled from: Python 2.7.16 (v2.7.16:413a49145e, Mar 4 2019, 01:30:55) [MSC v.1500 32 bit (Intel)]
# Embedded file name: navertoon_downloader.pyo
# Compiled at: 2019-10-03 10:19:35
import downloader
from utils import Soup, urljoin, Downloader, LazyUrl, get_imgs_already, clean_title, get_ext, get_print
from constants import try_n
@@ -47,9 +42,14 @@ class Downloader_navertoon(Downloader):
display_name = 'Naver Webtoon'

def init(self):
self.url = get_main(self.url)
self.__info, _ = get_pages(self.url, self.cw)

@classmethod
def fix_url(cls, url):
url = re.sub(r'[?&]page=[0-9]+', '', re.sub(r'[?&]no=[0-9]+', '', url)).replace('m.comic.naver.', 'comic.naver.')
url = url.replace('detail.nhn', 'list.nhn').replace('/detail?', '/list?')
return url.rstrip('#')

@property
def name(self):
id = self.__info.id
@@ -70,14 +70,6 @@ def read(self):
self.title = self.name


def get_main(url):
url_main = re.sub('[?&]page=[0-9]+', '', re.sub('[?&]no=[0-9]+', '', url)).replace('detail.nhn', 'list.nhn').replace('m.comic.naver.', 'comic.naver.')
while url_main.endswith('#'):
url_main = url_main[:-1]

return url_main


def set_no(url, p):
if '&no=' not in url:
url = url + ('&no={}').format(p)
@@ -101,7 +93,7 @@ def set_page(url, p):
@try_n(4)
def get_pages(url, cw=None):
print_ = get_print(cw)
url = get_main(url).replace('comic.naver.', 'm.comic.naver.')
url = Downloader_navertoon.fix_url(url).replace('comic.naver.', 'm.comic.naver.')
id = get_id(url)
print('id:', id)
print(url)
@@ -119,7 +111,7 @@ def get_pages(url, cw=None):

raise Exception(title)

print('artist:', artist)
print_('artist: {}'.format(artist))
title = soup.find('meta', {'property': 'og:title'}).attrs['content']
pages = []
nos = set()
@@ -134,7 +126,7 @@ def get_pages(url, cw=None):
view = soup.findAll('ul', class_='section_episode_list')[(-1)]
for lst in view.findAll('li'):
url_page = urljoin(url, lst.find('a').attrs['href'])
if 'detail.nhn' not in url_page.lower():
if 'detail.nhn' not in url_page.lower() and 'detail?' not in url_page.lower(): #3540
continue
print_('url_page: {}'.format(url_page))
text = lst.find('strong', class_='title').find('span', class_='name').text.strip()
@@ -160,7 +152,7 @@ def get_pages(url, cw=None):
@page_selector.register('navertoon')
@try_n(4)
def f(url):
url = get_main(url)
url = Downloader_navertoon.fix_url(url)
info, pages = get_pages(url)
return pages

15 changes: 11 additions & 4 deletions src/extractor/pixiv_downloader.py
Original file line number Diff line number Diff line change
@@ -19,6 +19,7 @@
from locker import lock
import threading
from ratelimit import limits, sleep_and_retry
##import asyncio
FORCE_LOGIN = True
LIMIT = 48
for header in ['pixiv_illust', 'pixiv_bmk', 'pixiv_search', 'pixiv_following', 'pixiv_following_r18']:
@@ -60,10 +61,16 @@ def key_id(cls, url):
return url.replace('://www.', '://').replace('/en/', '/')

def read(self):
info = get_info(self.url, self.cw)
for img in info['imgs']:
self.urls.append(img.url)
self.title = clean_title(info['title'])
## loop = asyncio.new_event_loop()
## asyncio.set_event_loop(loop)
try:
info = get_info(self.url, self.cw)
for img in info['imgs']:
self.urls.append(img.url)
self.title = clean_title(info['title'])
finally:
## loop.close()
pass


class PixivAPIError(errors.LoginRequired): pass
73 changes: 48 additions & 25 deletions src/extractor/pornhub_downloader.py
Original file line number Diff line number Diff line change
@@ -14,6 +14,7 @@
import utils
from m3u8_tools import playlist2stream, M3u8_stream
import ytdl
import errors



@@ -53,6 +54,7 @@ class Video(object):
thumb = None

def __init__(self, url, cw, session):
url = Downloader_pornhub.fix_url(url)
self.url = LazyUrl(url, self.get, self)
self.cw = cw
self.session = session
@@ -68,11 +70,22 @@ def get(self, url):
return self._url

id_ = re.find(r'viewkey=(\w+)', url, re.IGNORECASE) or \
re.find(r'/embed/(\w+)', url, re.IGNORECASE)
print('id: {}'.format(id_))
re.find(r'/embed/(\w+)', url, re.IGNORECASE, err='no id')
print_('id: {}'.format(id_))
if 'viewkey=' not in url.lower() and '/gif/' not in url.lower():
url = urljoin(url, '/view_video.php?viewkey={}'.format(id_))
html = downloader.read_html(url, session=session)

url_test = url.replace('pornhubpremium.com', 'pornhub.com')
try:
html = downloader.read_html(url_test, session=session)
soup = Soup(html)
if soup.find('div', id='lockedPlayer'):
print_('Locked player')
raise Exception('Locked player')
url = url_test
except: #3511
url = url.replace('pornhub.com', 'pornhubpremium.com')
html = downloader.read_html(url, session=session)

soup = Soup(html)
soup = fix_soup(soup, url, session, cw)
@@ -173,22 +186,30 @@ class Downloader_pornhub(Downloader):
type = 'pornhub'
single = True
strip_header = False
URLS = ['pornhub.com', 'pornhubpremium.com']
URLS = ['pornhub.com', 'pornhubpremium.com', 'pornhubthbh7ap3u.onion']

def init(self):
self.session = Session() # 1791
if 'pornhub_gif_' in self.url:
self.url = 'https://www.pornhub.com/gif/{}'.format(
self.url.replace('pornhub_gif_', ''))
elif 'pornhub_album_' in self.url:
self.url = 'https://www.pornhub.com/album/{}'.format(
self.url.replace('pornhub_album_', ''))
elif 'pornhub_' in self.url:
self.url = 'https://www.pornhub.com/view_video.php?viewkey={}'\
.format(self.url.replace('pornhub_', ''))
if 'pornhubpremium.com' in self.url.lower() and\
not is_login(self.session, self.cw):
return self.Invalid('[Pornhub] Login cookies required')
raise errors.LoginRequired()

@classmethod
def fix_url(cls, url):
if 'pornhub_gif_' in url:
url = 'https://www.pornhub.com/gif/{}'.format(
url.replace('pornhub_gif_', ''))
elif 'pornhub_album_' in url:
url = 'https://www.pornhub.com/album/{}'.format(
url.replace('pornhub_album_', ''))
elif 'pornhub_' in url:
url = 'https://www.pornhub.com/view_video.php?viewkey={}'\
.format(url.replace('pornhub_', ''))
if '/authenticate/goToLoggedIn' in url:
qs = utils.query_url(url)
url = urljoin(url, qs['url'][0])
url = url.replace('pornhubthbh7ap3u.onion', 'pornhub.com')
return url

@classmethod
def key_id(cls, url):
@@ -359,8 +380,10 @@ def get_videos(url, cw=None):

session = Session()

domain = utils.domain(url)

if mode in ['pornstar']:
url_main = 'https://www.pornhub.com/{}/{}'.format(mode, username)
url_main = 'https://{}/{}/{}'.format(domain, mode, username)
html = downloader.read_html(url_main, session=session)
soup = Soup(html)
soup = fix_soup(soup, url_main, session, cw)
@@ -414,41 +437,41 @@ def get_videos(url, cw=None):
try:
if mode in ['users', 'model']:
if mode == 'users':
url_api = 'https://www.pornhub.com/users/{}/videos/public/'\
'ajax?o=mr&page={}'.format(username, p)
url_api = 'https://{}/users/{}/videos/public/'\
'ajax?o=mr&page={}'.format(domain, username, p)
elif mode == 'model':
url_api = 'https://www.pornhub.com/model/{}/videos/upload/'\
'ajax?o=mr&page={}'.format(username, p)
url_api = 'https://{}/model/{}/videos/upload/'\
'ajax?o=mr&page={}'.format(domain, username, p)
r = session.post(url_api)
soup = Soup(r.text)
if soup.find('h1'):
print('break: h1')
break
elif mode in ['pornstar']:
if free:
url_api = 'https://www.pornhub.com/{}/{}/videos/upload'\
'?page={}'.format(mode, username, p)
url_api = 'https://{}/{}/{}/videos/upload'\
'?page={}'.format(domain, mode, username, p)
soup = downloader.read_soup(url_api, session=session)
soup = fix_soup(soup, url_api, session, cw)
soup = soup.find('div', class_='videoUList')
else:
url_api = 'https://www.pornhub.com/{}/{}?page={}'.format(mode, username, p)
url_api = 'https://{}/{}/{}?page={}'.format(domain, mode, username, p)
soup = downloader.read_soup(url_api, session=session)
soup = fix_soup(soup, url_api, session, cw)
soup = soup.find('ul', class_='pornstarsVideos')
elif mode in ['channels']:
url_api = 'https://www.pornhub.com/{}/{}/videos?page={}'.format(mode, username, p)
url_api = 'https://{}/{}/{}/videos?page={}'.format(domain, mode, username, p)
soup = downloader.read_soup(url_api, session=session)
soup = fix_soup(soup, url_api, session, cw)
try:
soup = soup.find('div', {'id': 'channelsBody'}).find('div', class_='rightSide')
except:
break
elif mode in ['playlist']:
#url_api = 'https://www.pornhub.com/playlist/viewChunked?id={}&offset={}&itemsPerPage=40'.format(username, len(hrefs))
#url_api = 'https://{}/playlist/viewChunked?id={}&offset={}&itemsPerPage=40'.format(domain, username, len(hrefs))
if token is None:
raise Exception('no token')
url_api = 'https://www.pornhub.com/playlist/viewChunked?id={}&token={}&page={}'.format(username, token, p)
url_api = 'https://{}/playlist/viewChunked?id={}&token={}&page={}'.format(domain, username, token, p)
soup = downloader.read_soup(url_api, session=session)
else:
raise NotImplementedError(mode)
20 changes: 12 additions & 8 deletions src/extractor/twitter_downloader.py
Original file line number Diff line number Diff line change
@@ -50,7 +50,7 @@ class Downloader_twitter(Downloader):
def init(self):
self.session = get_session()
#self.url = fix_url(self.url)
self.artist, self.username = get_artist_username(self.url, self.session)
self.artist, self.username = get_artist_username(self.url, self.session, self.cw)
if self.username == 'home':
raise Exception('No username: home')

@@ -97,8 +97,9 @@ def read(self):


@lock
def _guest_token(session, headers, cache=True):
def _guest_token(session, headers, cache=True, cw=None):
global CACHE_GUEST_TOKEN
print_ = get_print(cw)
token = None
if cache:
if CACHE_GUEST_TOKEN and time() - CACHE_GUEST_TOKEN[1] < TIMEOUT_GUEST_TOKEN:
@@ -111,6 +112,9 @@ def _guest_token(session, headers, cache=True):
r = session.post('https://api.twitter.com/1.1/guest/activate.json', headers=headers)
data = json.loads(r.text)
token = data['guest_token']
print_('token type: {}'.format(type(token)))
if isinstance(token ,int): #3525
token = str(token)
CACHE_GUEST_TOKEN = token, time()
return token

@@ -133,7 +137,7 @@ def __init__(self, session, cw=None, cache_guest_token=True):
print('auth_token:', auth_token)
else:
# guest token
guest_token = _guest_token(session, session.headers, cache=cache_guest_token)
guest_token = _guest_token(session, session.headers, cache=cache_guest_token, cw=cw)
session.headers["x-guest-token"] = guest_token
session.cookies.set("gt", guest_token, domain=".twitter.com")
print('guest_token:', guest_token)
@@ -264,7 +268,7 @@ def _pagination(self, url_api, params=None, entry_tweet="tweet-", entry_cursor="
return
params["cursor"] = cursor
if params.get("cursor") is None: # nothing
print_('no cursor')
self.print_('no cursor')
break


@@ -374,7 +378,7 @@ def get_imgs_more(username, session, title, types, n=None, format='[%y-%m-%d] id
imgs = imgs or []
print_('imgs: {}, types: {}'.format(len(imgs), ', '.join(types)))

artist, username = get_artist_username(username, session)#
artist, username = get_artist_username(username, session, cw)#

# Range
n = max(n or 0, get_max_range(cw))
@@ -594,21 +598,21 @@ def get(self, _):


@try_n(4)
def get_artist_username(url, session):
def get_artist_username(url, session, cw=None):
if 'twitter.' not in url:
username = url.strip('@')
else:
id = re.find('/status/([0-9]+)', url)
if id:
tweet = TwitterAPI(session).tweet(id, url)
tweet = TwitterAPI(session, cw).tweet(id, url)
user_id = tweet['globalObjects']['tweets'][id]['user_id_str']
username = tweet['globalObjects']['users'][user_id]['screen_name']
print('username fixed:', username)
else:
username = re.find('twitter.[^/]+/([^/?]+)', url)
if not username:
raise Exception('no username')
data = TwitterAPI(session).user_by_screen_name(username)
data = TwitterAPI(session, cw).user_by_screen_name(username)
artist = data['legacy']['name']
username = data['legacy']['screen_name']
return artist, username
14 changes: 9 additions & 5 deletions src/extractor/weibo_downloader.py
Original file line number Diff line number Diff line change
@@ -3,7 +3,7 @@
import ree as re
from timee import sleep, clock, time
from constants import clean_url
from utils import Downloader, urljoin, try_n, Session, get_print, clean_title, Soup, fix_protocol, domain
from utils import Downloader, urljoin, try_n, Session, get_print, clean_title, Soup, fix_protocol, domain, get_max_range
import os
from translator import tr_
import json
@@ -118,6 +118,8 @@ def get_id(url, cw=None):
def get_imgs(uid, oid, title, session, cw=None, d=None, parent=None):
print_ = get_print(cw)
print_('uid: {}, oid:{}'.format(uid, oid))

max_pid = get_max_range(cw)

@try_n(4)
def get_album_imgs(album, page):
@@ -168,21 +170,23 @@ def get_albums(page):
imgs = []
for album in albums:
print('Album:', album.id, album.type)
imgs_album = []
for p in range(1, 101):
imgs_new = get_album_imgs(album, p)
imgs += imgs_new
imgs_album += imgs_new
s = u'{} {} - {}'.format(tr_(u'읽는 중...'), title, len(imgs))
if cw:
if not cw.alive:
return []
cw.setTitle(s)
else:
print(s)
if len(imgs_album) >= max_pid:
break
if not imgs_new:
break
sleep(1)
imgs += imgs_album

imgs = sorted(imgs, key=lambda img: img.timestamp, reverse=True)
return imgs
return imgs[:max_pid]


7 changes: 7 additions & 0 deletions src/extractor/youtube_downloader.py
Original file line number Diff line number Diff line change
@@ -20,6 +20,7 @@
from PyQt import QtCore, QtGui
from translator import tr_
from m3u8_tools import dash2stream
from datetime import datetime


def print_streams(streams, cw):
@@ -74,6 +75,11 @@ def get(self, url, force=False):

streams = yt.streams.all()
print_streams(streams, cw)

#3528
time = datetime.strptime(yt.info['upload_date'], '%Y%m%d')
self.utime = (time-datetime(1970,1,1)).total_seconds()
print_('utime: {}'.format(self.utime))

if type == 'video':
streams[:] = [stream for stream in streams if stream.video_codec is not None]
@@ -328,6 +334,7 @@ class Downloader_youtube(Downloader):
URLS = ['youtube.co', 'youtu.be']
lock = True
display_name = 'YouTube'
keep_date = True #3528

def init(self):
ui_setting = self.ui_setting

0 comments on commit 9a978e4

Please sign in to comment.