Skip to content

Commit

Permalink
^q^
Browse files Browse the repository at this point in the history
  • Loading branch information
KurtBestor committed Sep 21, 2020
1 parent 1d5733b commit dd25699
Show file tree
Hide file tree
Showing 26 changed files with 364 additions and 89 deletions.
7 changes: 4 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,21 +36,22 @@
| **Facebook** | <https://facebook.com> |
| **Flickr** | <https://flickr.com> |
| **Gelbooru** | <https://gelbooru.com> |
| **Hanime.tv** | <https://hanime.tv> |
| **hanime.tv** | <https://hanime.tv> |
| **Hentai Foundry** | <https://hentai-foundry.com> |
| **Hitomi.la** | <https://hitomi.la> |
| **Hiyobi.me** | <https://hiyobi.me> |
| **Imgur** | <https://imgur.com> |
| **Instagram** | <https://instagram.com> |
| **爱淘美图** | <http://www.itmtu.com> |
| **iwara.tv** | <https://iwara.tv><br><https://ecchi.iwara.tv> |
| **Iwara** | <https://iwara.tv><br><https://ecchi.iwara.tv> |
| **Jmana** | <https://jmana.net> |
| **カクヨム** | <https://kakuyomu.jp> |
| **LHScan** | <https://loveheaven.net> |
| **Luscious** | <https://luscious.net> |
| **Manamoa** | <https://manamoa.net> |
| **MyReadingManga** | <https://myreadingmanga.info> |
| **Naver Blog** | <https://blog.naver.com> |
| **Naver Post** | <https://post.naver.com> |
| **Naver Webtoon** | <https://comic.naver.com> |
| **nhentai** | <https://nhentai.net> |
| **Niconico** | <http://nicovideo.jp> |
Expand All @@ -71,7 +72,7 @@
| **V LIVE** | <https://vlive.tv> |
| **Weibo** | <https://weibo.com> |
| **World Cosplay** | <https://worldcosplay.net> |
| **XHamster** | <https://xhamster.com> |
| **xHamster** | <https://xhamster.com> |
| **XNXX** | <https://xnxx.com> |
| **XVideos** | <https://xvideos.com> |
| **Yande.re** | <https://yande.re> |
Expand Down
1 change: 1 addition & 0 deletions src/extractor/afreeca_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ class Downloader_afreeca(Downloader):
type = 'afreeca'
URLS = ['afreecatv.com']
single = True
display_name = 'AfreecaTV'

def init(self):
self.url = self.url.replace('afreeca_', '')
Expand Down
4 changes: 2 additions & 2 deletions src/extractor/artstation_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ class Image(object):

def __init__(self, post_url, date, url, page):
self.post_url = post_url
self.date = date
self.url = LazyUrl(post_url, lambda _: url, self)
self.page = page
name = post_url.split('/')[(-1)]
Expand All @@ -26,6 +25,7 @@ def __repr__(self):
class Downloader_artstation(Downloader):
type = 'artstation'
URLS = ['artstation.com']
display_name = 'ArtStation'

def init(self):
self.url_main = 'https://www.artstation.com/{}'.format(self.id.replace('artstation_', '').replace('/', '/'))
Expand Down Expand Up @@ -193,7 +193,7 @@ def get_imgs_page(id_art, session, date=None, cw=None):
soup = Soup(html)
url = soup.find('video').find('source').attrs['src']
except Exception as e:
print(e)
pass
if not url:
try:
url = soup.find('link', {'rel': 'canonical'}).attrs['href']
Expand Down
1 change: 1 addition & 0 deletions src/extractor/asiansister_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
class Downloader_asiansister(Downloader):
type = 'asiansister'
URLS = ['asiansister.com']
display_name = 'AsianSister'

@try_n(4)
def init(self):
Expand Down
1 change: 1 addition & 0 deletions src/extractor/asmhentai_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ class Downloader_asmhentai(Downloader):
type = 'asmhentai'
URLS = ['asmhentai.com']
MAX_CORE = 8
display_name = 'AsmHentai'

def init(self):
pass
Expand Down
1 change: 1 addition & 0 deletions src/extractor/bcy_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ class Downloader_bcy(Downloader):
type = 'bcy'
URLS = ['bcy.net/item/detail/', 'bcy.net/u/']
MAX_CORE = 8
display_name = '半次元'

def init(self):
self.url = self.url.replace('bcy_', '')
Expand Down
1 change: 1 addition & 0 deletions src/extractor/bdsmlr_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
class Downloader_bdsmlr(Downloader):
type = 'bdsmlr'
URLS = ['bdsmlr.com']
display_name = 'BDSMlr'

def init(self):
self.url = self.url.replace('bdsmlr_', '')
Expand Down
1 change: 1 addition & 0 deletions src/extractor/bili_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ class Downloader_bili(Downloader):
URLS = ['bilibili.com', 'bilibili.tv']
lock = True
detect_removed = False
display_name = 'bilibili'

def init(self):
self.url = fix_url(self.url, self.customWidget)
Expand Down
162 changes: 162 additions & 0 deletions src/extractor/comicwalker_downloader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
#coding:utf8
import downloader
from utils import Soup, LazyUrl, urljoin, try_n, Downloader, get_print, clean_title, get_imgs_already
import ree as re
from itertools import cycle
from io import BytesIO
import json
from timee import sleep
from translator import tr_
import page_selector
import os


# https://static.comic-walker.com/viewer/cw-viewer.min.js
def decode(s, hash):
# generateKey
key = int(hash[:16], 16)

filter = [int((key>>i*8)%256) for i in range(8)][::-1] #
s2 = bytes(x^y for x, y in zip(s, cycle(filter)))
return s2


class Image(object):
def __init__(self, src, hash, p, page):
def f(_):
f = BytesIO()
downloader.download(src, referer=page.url, buffer=f)
s = f.read()
s2 = decode(s, hash)
f.seek(0)
f.write(s2)
f.seek(0)
return f
self.url = LazyUrl(page.url, f, self)
self.filename = u'{}/{:04}.jpg'.format(page.title, p)


class Page(object):
def __init__(self, url, title):
self.url = url
self.title = clean_title(title)


@Downloader.register
class Downloader_comicwalker(Downloader):
type = 'comicwalker'
URLS = ['comic-walker.com/contents/detail/', 'comic-walker.jp/contents/detail/']
MAX_CORE = 4
display_name = 'ComicWalker'
_soup = None
pages = None

def init(self):
self.url = url = self.url.replace('comicwalker_', '')

@property
def soup(self):
if self._soup is None:
html = downloader.read_html(self.url)
self._soup = Soup(html)
return self._soup

def read(self):
cw = self.customWidget
title = get_title(self.soup, cw)

self.imgs = get_imgs(self.url, self.soup, cw)

for img in self.imgs:
if isinstance(img, Image):
self.urls.append(img.url)
else:
self.urls.append(img)

self.title = title


def get_imgs_page(page):
cid = re.find('[?&]cid=([a-zA-Z0-9_]+)', page.url)
url_api = 'https://ssl.seiga.nicovideo.jp/api/v1/comicwalker/episodes/{}/frames'.format(cid)

html = downloader.read_html(url_api, referer=page.url)

meta = json.loads(html)
data = meta['data']
imgs = []
for item in data['result']:
src = item['meta']['source_url']
hash = item['meta']['drm_hash']
img = Image(src, hash, len(imgs), page)
imgs.append(img)

return imgs


def get_pages(url, soup=None):
if soup is None:
html = downloader.read_html(url)
soup = Soup(html)

pages = []
for item in soup.findAll('div', class_='acBacknumber-item-leftbox'):
item = item.parent
a = item.find('a')
title = a.attrs['title']
href = a.attrs['href']
href = urljoin(url, href)
page = Page(href, title)
pages.append(page)

return pages


def get_title(soup, cw=None):
print_ = get_print(cw)
for h1 in soup.findAll('h1'):
title = h1.text.strip()
if title:
break
else:
raise Exception('no title')
title_clean = clean_title(title)
print_('get_title: "{}"({}) "{}"({})'.format(title, title.encode('utf8'), title_clean, title_clean.encode('utf8')))
return title_clean


@page_selector.register('comicwalker')
@try_n(4)
def f(url):
if '/viewer/' in url:
raise Exception(tr_(u'목록 주소를 입력해주세요'))
pages = get_pages(url)
return pages


def get_imgs(url, soup=None, cw=None):
if soup is None:
html = downloader.read_html(url)
soup = Soup(hrml)

title = get_title(soup, cw)

pages = get_pages(url, soup)
pages = page_selector.filter(pages, cw)

imgs = []
for i, page in enumerate(pages):
imgs_already = get_imgs_already('comicwalker', title, page, cw)
if imgs_already:
imgs += imgs_already
continue

if cw is not None:
if not cw.alive:
return
cw.setTitle(u'{} {} / {} ({} / {})'.format(tr_(u'읽는 중...'), title, page.title, i+1, len(pages)))

imgs += get_imgs_page(page)

return imgs

1 change: 1 addition & 0 deletions src/extractor/hanime_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ class Downloader_hanime(Downloader):
type = 'hanime'
URLS = ['hanime.tv/hentai-videos/', 'hanime.tv/videos/']
single = True
display_name = 'hanime.tv'

def init(self):
if self.url.startswith('hanime_'):
Expand Down
2 changes: 2 additions & 0 deletions src/extractor/hf_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ class Downloader_hf(Downloader):
type = 'hf'
URLS = ['hentai-foundry.com']
MAX_CORE = 16
display_name = 'Hentai Foundry'

def init(self):
self.session = enter()

Expand Down
Loading

0 comments on commit dd25699

Please sign in to comment.