-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
189 lines (138 loc) · 4.9 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
import os
import re
import math
import json
import js2py
import imagehash
import requests
import pillow_avif
from PIL import Image
from urllib.request import urlopen, Request
from urllib.parse import urlparse
from urllib.error import HTTPError
from bs4 import BeautifulSoup
class Hitomi:
def __init__(self):
self.response = urlopen("https://ltn.hitomi.la/gg.js")
self.ggjs = self.response.read() #self.ggjs = "'use strict'; gg = {m: function(g){ return '123'; }, s: function(h){ return '456'; }, b: '7'};"
self.gg = js2py.eval_js(self.ggjs.decode('utf-8'))
def subdomain_from_url(self, url, base):
retval = 'b'
if base:
retval = base
b = 16
r = re.compile(r'\/[0-9a-f]{61}([0-9a-f]{2})([0-9a-f])')
m = r.search(url)
if not m:
return 'a'
g = int(m[2]+m[1], b)
if not math.isnan(g):
retval = chr(97 + self.gg.m(g)) + retval
return retval
def url_from_url(self, url, base):
return re.sub(r'\/\/..?\.hitomi\.la\/', '//'+self.subdomain_from_url(url, base)+'.hitomi.la/', url)
def full_path_from_hash(self, hash):
return self.gg.b+self.gg.s(hash)+'/'+hash
def real_full_path_from_hash(self, hash):
return re.sub(r'/^.*(..)(.)$/', '$2/$1/'+hash, hash)
def url_from_hash(self, galleryid, image, dir, ext):
ext = ext or dir or image['name'].split('.').pop()
dir = dir or 'images'
return 'https://a.hitomi.la/'+dir+'/'+self.full_path_from_hash(image['hash'])+'.'+ext
def url_from_url_from_hash(self, galleryid, image, dir, ext, base):
if 'tn' == base:
return self.url_from_url('https://a.hitomi.la/'+dir+'/'+self.real_full_path_from_hash(image.hash)+'.'+ext, base);
else:
return self.url_from_url(self.url_from_hash(galleryid, image, dir, ext), base)
def save(headers, url):
img_hash = ''
if not url:
print("[{0}] Error: img url not found!".format(iUrl))
return
else:
try:
response = requests.get(url, headers=headers, stream=True)
except HTTPError as err:
print("[{0}] Error: HTTPError {1}, {2}".format(iUrl, err.code, url))
return
try:
urlp = urlparse(url)
img = Image.open(response.raw)
img_ext = urlp.path.split('.').pop()
img_hash = str(imagehash.average_hash(img)) + '.' + img_ext
img.save(output_path + '\\' + img_hash)
print("[{0}] Success: {1}".format(iUrl, img_hash))
except Exception as ex:
print(url)
print("[{0}] Error: {1}".format(iUrl, ex))
return
def download(url):
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; rv:78.0) Gecko/20100101 Firefox/78.0'}
img_url = ""
urlp = urlparse(url)
try:
response = urlopen(url)
except HTTPError as err:
print("[{0}] Error: HTTPError {1}, {2}".format(iUrl, err.code, url))
return
html = response.read()
soup = BeautifulSoup(html, 'html.parser')
if "hitomi.la" in url:
galleryid = re.search(r'(?:\/)(\d+)(?:.html)', urlp.path).group(1)
page = int(urlp.fragment) - 1
try:
response = urlopen("http://ltn.hitomi.la/galleries/"+galleryid+".js")
except HTTPError as err:
print("[{0}] Error: HTTPError {1}, {2}".format(iUrl, err.code, url))
return
html = response.read()
data = json.loads(html[17:]) # remove len("var galleryinfo = ") = 17
h = Hitomi()
headers['Referer'] = urlp.scheme + '://' + urlp.netloc + urlp.path
img_url = h.url_from_url_from_hash(galleryid, data['files'][page], 'webp' if not data['files'][page]['hasavif'] else 'avif', None, 'a')
if "doujins.com" in url:
data_link = '#' + urlp.fragment
img_url = soup.find_all("img", {"class":"doujin active", "data-link":data_link})[0].get('data-file')
if "gelbooru.com" in url:
img_url = soup.find("img", {"id":"image"})['src']
save(headers, img_url)
def start():
global iUrl
iUrl = 1
urls = []
with open(list_path) as file:
for line in file:
urls.append(line.rstrip())
print("[i] Found url : {0} \n".format(len(urls)))
if len(urls) >= 1:
for url in urls:
download(url)
iUrl = iUrl + 1
def main():
global dir_path
global list_name
global list_path
global output_name
global output_path
print("[#] Doujin Downloader [#]")
print("[i] Author\t\t : RizkyBlackHat")
print("[i] Supported sites\t : hitomi.la, doujins.com, gelbooru.com\n")
dir_path = os.path.dirname(os.path.realpath(__file__))
list_name = input("[>] Enter the url list file name : ")
list_path = os.path.join(dir_path, "list", list_name)
if not os.path.isfile(list_path):
print("[!] File input not found!")
quit()
output_name = input("[>] Enter name output directory : ")
output_path = os.path.join(dir_path, "output", output_name)
if not os.path.isdir(output_path):
try:
os.makedirs(output_path)
except:
print("[!] Can't make output directory!")
start()
if __name__ == '__main__':
try:
main()
except KeyboardInterrupt:
print()