Skip to content

Commit

Permalink
^q^
Browse files Browse the repository at this point in the history
  • Loading branch information
KurtBestor committed Jun 6, 2021
1 parent 7570075 commit dd4ca00
Show file tree
Hide file tree
Showing 17 changed files with 2,025 additions and 181 deletions.
21 changes: 7 additions & 14 deletions src/extractor/bili_downloader.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,3 @@
# uncompyle6 version 3.5.0
# Python bytecode 2.7 (62211)
# Decompiled from: Python 2.7.16 (v2.7.16:413a49145e, Mar 4 2019, 01:30:55) [MSC v.1500 32 bit (Intel)]
# Embedded file name: bili_downloader2.pyo
# Compiled at: 2019-10-07 03:49:44
import downloader
from utils import Soup, LazyUrl, Downloader, query_url, get_outdir, get_print, cut_pair, format_filename, clean_title, get_resolution, try_n
import hashlib, json
Expand Down Expand Up @@ -40,12 +35,8 @@ class Video(object):

def __init__(self, url, referer, id, p):
ext = os.path.splitext(url.split('?')[0])[1]
self.filename = (u'{}_{}{}').format(id, p, ext)

def f(_):
return url

self.url = LazyUrl(referer, f, self)
self.filename = (u'{}.part{}{}').format(id, p, ext)
self.url = LazyUrl(referer, lambda _: url, self, detect_local=False)


# 1804
Expand Down Expand Up @@ -76,7 +67,9 @@ class Downloader_bili(Downloader):
URLS = ['bilibili.com', 'bilibili.tv']
lock = True
detect_removed = False
detect_local_lazy = False
display_name = 'bilibili'
single = True

def init(self):
self.url = fix_url(self.url, self.cw)
Expand Down Expand Up @@ -108,7 +101,7 @@ def read(self):
title = format_filename(title, self.id_, '.mp4')[:-4]
n = int(math.ceil(8.0 / len(videos)))
self.print_(('n_threads: {}').format(n))
self.enableSegment(n_threads=n)
self.enableSegment(n_threads=n, overwrite=True)
self.title = title

def post_processing(self):
Expand All @@ -117,8 +110,8 @@ def post_processing(self):
outdir = get_outdir(self.type)
out = os.path.join(outdir, self.title + '.mp4')
ffmpeg.join(cw.names, out, cw)
utils.remove(self.dir)
self.single = True
for file in cw.names:
utils.remove(file)
cw.setNameAt(0, out)
del cw.imgs[1:]
cw.dones.add(os.path.realpath(out))
Expand Down
80 changes: 53 additions & 27 deletions src/extractor/iwara_downloader.py
Original file line number Diff line number Diff line change
@@ -1,41 +1,47 @@
from __future__ import division, print_function, unicode_literals
import downloader
from utils import Soup, urljoin, Downloader, LazyUrl, get_print, clean_url, clean_title
from utils import Soup, urljoin, Downloader, LazyUrl, get_print, clean_url, clean_title, check_alive, Session, try_n
import ree as re
import json
import os
from timee import sleep
from io import BytesIO
TIMEOUT = 300



class File(object):
thumb = None

def __init__(self, type, url, title, referer, p=0):
def __init__(self, type, url, title, referer, p=0, multi_post=False):
self.type = type
self.url = LazyUrl(referer, lambda _: url, self)
ext = os.path.splitext(url.split('?')[0])[1]
if ext.lower() == '.php':
ext = '.mp4'
if type == 'video':
self.filename = clean_title('{}{}'.format(title, ext))
elif type == 'image':
name = '{}_p{}'.format(clean_title(title), p) if multi_post else p
self.filename = '{}{}'.format(name, ext)
else:
self.filename = '{}{}'.format(p, ext)
raise NotImplementedError(type)
self.title = title


class LazyVideo(object):
type = 'video'
class LazyFile(object):
_url = None
thumb = None

def __init__(self, url):
def __init__(self, url, type_, session):
self.url = LazyUrl(url, self.get, self)
self.type = {'videos': 'video', 'images': 'image'}.get(type_) or type_
self.session = session

def get(self, url):
if self._url:
return self._url
file = get_files(url)[0]
file = get_files(url, self.session, multi_post=True)[0]
self.title = file.title
self.thumb = file.thumb
self.filename = file.filename
Expand All @@ -56,42 +62,61 @@ def fix_url(cls, url):
url = clean_url(url)
return url.split('?')[0]

def init(self):
self.session = Session()
self.session.cookies.update({'show_adult': '1', 'has_js': '1'})
self.setTimeout(TIMEOUT)

def read(self):
file = None
files = None
title = None
if '/users/' in self.url or '/user/' in self.url:
type_ = 'videos'
try:
if self.url.split('/users/')[1].split('/')[1] == 'images':
type_ = 'images'
except:
pass
info = read_channel(self.url, type_, self.cw)
info = read_channel(self.url, type_, self.session, self.cw)
title = info['title']
urls = info['urls']
if type_ == 'videos':
files = [LazyVideo(url) for url in urls]
file = self.process_playlist('[Channel] [{}] {}'.format(type_.capitalize(), info['title']), files)
files = [LazyFile(url, type_, self.session) for url in urls]
file = self.process_playlist('[Channel] [{}] {}'.format(type_.capitalize(), title), files)
elif type_ == 'images':
files = [LazyFile(url, type_, self.session) for url in urls]
title = '[Channel] [{}] {}'.format(type_.capitalize(), title)
else:
raise NotImplementedError('channel images')
else:
files = get_files(self.url, self.cw)
raise NotImplementedError(type_)

if file is None:
if files is None:
files = get_files(self.url, self.session, cw=self.cw)
for file in files:
self.urls.append(file.url)
file = files[0]

if file.type == 'youtube':
return self.Invalid('[iwara] Youtube: {}'.format(self.url))

if file.type == 'img':
if file.type == 'image':
self.single = False
self.title = clean_title(file.title)
self.title = clean_title(title or file.title)

if file.thumb is not None:
self.setIcon(file.thumb)



def read_channel(url, type_, cw=None):
@try_n(4)
def read_html(*args, **kwargs):
kwargs['timeout'] = TIMEOUT
return downloader.read_html(*args, **kwargs)


def read_channel(url, type_, session, cw=None):
print_ = get_print(cw)
html = downloader.read_html(url)
html = read_html(url, session=session)
soup = Soup(html)
if soup.find('div', id='block-mainblocks-user-connect'):
username = re.find(r'''/messages/new\?user=(.+)['"]''', html, err='no username')
Expand All @@ -102,13 +127,14 @@ def read_channel(url, type_, cw=None):
urls = []
urls_set = set()
for p in range(50):
check_alive(cw)
url = 'https://ecchi.iwara.tv/users/{}/{}?page={}'.format(username, type_, p)
print_(url)
html = downloader.read_html(url)
html = read_html(url, session=session)
soup = Soup(html)
if p == 0:
title = soup.find('h1', class_='page-title').text
info['title'] = title.replace("'s videos", '').strip()
info['title'] = title.replace("'s videos", '').replace("'s images", '').strip()

view = soup.find('div', class_='view-content')
if view is None:
Expand All @@ -129,9 +155,9 @@ def read_channel(url, type_, cw=None):
return info


def get_files(url, cw=None):
def get_files(url, session, multi_post=False, cw=None):
print_ = get_print(cw)
html = downloader.read_html(url)
html = read_html(url, session=session)
soup = Soup(html)
h = soup.find('h1', class_='title')
content = h.parent.parent.parent
Expand All @@ -143,10 +169,10 @@ def get_files(url, cw=None):
elif video:
type = 'video'
else:
type = 'img'
type = 'image'
print_(('type: {}').format(type))
files = []
if type == 'img':
if type == 'image':
urls = set()
for img in content.findAll('img'):
img = urljoin(url, img.parent.attrs['href'])
Expand All @@ -156,7 +182,7 @@ def get_files(url, cw=None):
print('duplicate')
continue
urls.add(img)
file = File(type, img, title, url, len(files))
file = File(type, img, title, url, len(files), multi_post=multi_post)
files.append(file)

elif type == 'youtube':
Expand All @@ -168,7 +194,7 @@ def get_files(url, cw=None):
print('url_thumb:', url_thumb)
id = re.find('videos/([0-9a-zA-Z_-]+)', url, err='no video id')
url_data = urljoin(url, '/api/video/{}'.format(id))
s_json = downloader.read_html(url_data, url)
s_json = read_html(url_data, url, session=session)
data = json.loads(s_json)
video = data[0]
url_video = urljoin(url, video['uri'])
Expand All @@ -178,7 +204,7 @@ def get_files(url, cw=None):
downloader.download(url_thumb, buffer=file.thumb, referer=url)
files.append(file)
else:
raise Exception(('type "{}" is not supported').format(type))
raise NotImplementedError(type)
return files


16 changes: 10 additions & 6 deletions src/extractor/kakaopage_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from translator import tr_
import page_selector
import json
UA = downloader.hdr['User-Agent']
import clf2


class Page(object):
Expand All @@ -28,13 +28,12 @@ def __init__(self, url, page, p):
class Downloader_kakaopage(Downloader):
type = 'kakaopage'
URLS = ['page.kakao.com/home']
MAX_CORE = 8
MAX_CORE = 4
MAX_SPEED = 4.0
display_name = 'KakaoPage'

def init(self):
self.session = Session()
self.session.headers['User-Agent'] = UA

@classmethod
def fix_url(cls, url):
Expand Down Expand Up @@ -99,9 +98,14 @@ def get_pages(url, session):
return pages


def read_html(url, session):
res = clf2.solve(url, session=session)
return res['html']


@try_n(2)
def get_imgs_page(page, session):
html = downloader.read_html(page.url, session=session)
html = read_html(page.url, session=session)
did = re.find('"did" *: *"(.+?)"', html, err='no did')
url_api = 'https://api2-page.kakao.com/api/v1/inven/get_download_data/web'
data = {
Expand Down Expand Up @@ -133,7 +137,7 @@ def get_info(url, session, cw=None):

info = {}

html = downloader.read_html(url, session=session)
html = read_html(url, session=session)
soup = Soup(html)

__NEXT_DATA__ = soup.find('script', id='__NEXT_DATA__')
Expand All @@ -142,7 +146,7 @@ def get_info(url, session, cw=None):
tid = data['props']['initialState']['common']['constant']['tid']
print_('tid: {}'.format(tid))
session.cookies['_kptid'] = tid
html = downloader.read_html(url, session=session)
html = read_html(url, session=session)
soup = Soup(html)

title = soup.find('h2').text.strip()
Expand Down
2 changes: 1 addition & 1 deletion src/extractor/lhscan_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def soup(self):

@property
def name(self):
title = self.soup.findAll('span', {'itemprop': 'name'})[-1].text.strip()
title = self.soup.find('ul', class_='manga-info').find('h3').text
return clean_title(title)

def read(self):
Expand Down
2 changes: 1 addition & 1 deletion src/extractor/nhentai_com_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
@Downloader.register
class Downloader_nhentai_com(Downloader):
type = 'nhentai_com'
URLS = ['nhentai.com']
URLS = [r'regex:https?://nhentai.com']
MAX_CORE = 16
display_name = 'nhentai.com'

Expand Down
Loading

0 comments on commit dd4ca00

Please sign in to comment.