-
Notifications
You must be signed in to change notification settings - Fork 18
/
Copy pathrip.cgi
executable file
·469 lines (421 loc) · 13.1 KB
/
rip.cgi
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
#!/usr/bin/python
MAX_ALBUMS_PER_USER = 20
MAX_IMAGES_PER_CONTRIBUTOR = 1000
import cgitb; cgitb.enable() # for debugging
import cgi # for getting query keys/values
from sys import argv, stdout
from os import remove, path, stat, utime, SEEK_END, sep, walk, environ, listdir
from shutil import rmtree
from stat import ST_ATIME, ST_MTIME
from time import strftime
from urllib import unquote
from json import dumps
from sites.site_deviantart import deviantart
from sites.site_flickr import flickr
from sites.site_imagearn import imagearn
from sites.site_imagebam import imagebam
from sites.site_imagefap import imagefap
from sites.site_imgur import imgur
#from sites.site_webstagram import instagram
from sites.site_statigram import instagram
from sites.site_photobucket import photobucket
from sites.site_tumblr import tumblr
from sites.site_twitter import twitter
from sites.site_xhamster import xhamster
from sites.site_getgonewild import getgonewild
from sites.site_anonib import anonib
from sites.site_motherless import motherless
from sites.site_4chan import fourchan
from sites.site_minus import minus
from sites.site_gifyo import gifyo
from sites.site_five00px import five00px
from sites.site_cghub import cghub
from sites.site_chickupload import chickupload
from sites.site_teenplanet import teenplanet
from sites.site_chansluts import chansluts
from sites.site_buttoucher import buttoucher
from sites.site_pichunter import pichunter
from sites.site_soupio import soupio
from sites.site_imgbox import imgbox
from sites.site_reddit import reddit
from sites.site_gallerydump import gallerydump
from sites.site_fapdu import fapdu
from sites.site_fuskator import fuskator
from sites.site_kodiefiles import kodiefiles
from sites.site_pbase import pbase
from sites.site_8muses import eightmuses
from sites.site_setsdb import setsdb
from sites.site_nfsfw import nfsfw
from sites.site_shareimage import shareimage
from sites.site_seenive import seenive
from sites.site_vinebox import vinebox
from sites.site_imgchili import imgchili
from sites.site_fapproved import fapproved
from sites.site_gonewild import gonewild
from sites.site_vidble import vidble
from sites.site_soundcloud import soundcloud
# No longer supported
from sites.site_occ import occ
from sites.site_gonearch import gonearch
""" Print error in JSON format """
def print_error(text):
print dumps( { 'error' : text } )
"""
Where the magic happens.
Prints JSON response to query.
"""
def main():
# Keys are the query that's passed to the rip script, ex:
# ./rip.cgi?url=http://x.com&start=true&cached=false
# The dict would be { url : http://x.com, start: true, cached: false }
keys = get_keys()
if 'start' in keys and \
'url' in keys:
cached = True # Default to cached
if 'cached' in keys and keys['cached'] == 'false':
cached = False
rip(keys['url'], cached)
elif 'check' in keys and \
'url' in keys:
check(keys['url'])
elif 'recent' in keys:
lines = 10
if 'lines' in keys:
lines = int(keys['lines'])
recent(lines)
elif 'byuser' in keys:
ip = keys['byuser']
if ip == 'me': ip = environ.get('REMOTE_ADDR', '127.0.0.1')
print dumps({ 'albums' : albums_by_ip(ip) })
else:
print_error('invalid request')
""" Gets ripper, checks for existing rip, rips and zips as needed. """
def rip(url, cached):
url = unquote(url.strip()).replace(' ', '%20').replace('https://', 'http://')
if not passes_pre_rip_check(url): return
try:
# Get domain-specific ripper for URL
ripper = get_ripper(url)
except Exception, e:
print_error(str(e))
return
# Check URL against blacklist
if path.exists('url_blacklist.txt'):
for line in open('url_blacklist.txt', 'r'):
line = line.strip().lower()
if line == '': continue
if line in url.lower() or \
ripper.working_dir.lower().endswith(line):
print_error('cannot rip: URL is blacklisted')
return
# Check if there's already a zip for the album
if ripper.existing_zip_path() != None:
if not cached:
# If user specified the uncached version, remove the zip
remove(ripper.existing_zip_path())
if path.exists(ripper.working_dir):
rmtree(ripper.working_dir)
else:
# Mark the file as recently-accessed (top of FIFO queue)
update_file_modified(ripper.existing_zip_path())
add_recent(url)
response = {}
response['zip'] = ripper.existing_zip_path().replace(' ', '%20').replace('%20', '%2520')
response['size'] = ripper.get_size(ripper.existing_zip_path())
if path.exists(ripper.working_dir):
update_file_modified(ripper.working_dir)
image_count = 0
for root, subdirs, files in walk(ripper.working_dir):
if 'thumbs' in root: continue
for f in files:
if f.endswith('.txt'): continue
image_count += 1
response['album'] = ripper.working_dir.replace('rips/', '').replace('%20', '%2520')
response['url'] = './%s' % ripper.working_dir.replace('rips/', 'rips/#')
response['image_count'] = image_count
print dumps( response )
return
if is_contributor():
ripper.max_images = MAX_IMAGES_PER_CONTRIBUTOR
# Rip it
try:
ripper.download()
ripper.wait_for_threads()
except Exception, e:
print_error('download failed: %s' % str(e))
return
# If ripper fails silently, it will remove the directory of images
if not path.exists(ripper.working_dir):
print_error('unable to download album (empty? 404?)')
return
# Save IP of ripper
f = open('%s%sip.txt' % (ripper.working_dir, sep), 'w')
f.write(environ.get('REMOTE_ADDR', '127.0.0.1'))
f.close()
response = {}
response['image_count'] = ripper.image_count
if ripper.hit_image_limit():
response['limit'] = ripper.max_images
# Create zip flag
f = open('%s%szipping.txt' % (ripper.working_dir, sep), 'w')
f.write('\n')
f.close()
# Zip it
try:
ripper.zip()
except Exception, e:
print_error('zip failed: %s' % str(e))
return
# Delete zip flag
try: remove('%s%szipping.txt' % (ripper.working_dir, sep))
except: pass
# Mark album as completed
f = open('%s%scomplete.txt' % (ripper.working_dir, sep), 'w')
f.write('\n')
f.close()
response['album'] = ripper.working_dir.replace(' ', '%20').replace('%20', '%2520')
response['url'] = './%s' % ripper.working_dir.replace('rips/', 'rips/#')
response['zip'] = ripper.existing_zip_path().replace(' ', '%20').replace('%20', '%2520')
response['size'] = ripper.get_size(ripper.existing_zip_path())
# Add to recently-downloaded list
add_recent(url)
# Print it
print dumps(response)
""" Checks if current user is a 'contributor' """
def is_contributor():
if not path.exists('contributors.txt'): return False
cookies = get_cookies()
if not 'rip_contributor_password' in cookies: return False
f = open('contributors.txt', 'r')
contributors = f.read().split('\n')
f.close()
while '' in contributors: contributors.remove('')
return cookies['rip_contributor_password'] in contributors
""" Returns dict of requester's cookies """
def get_cookies():
if not 'HTTP_COOKIE' in environ: return {}
cookies = {}
txt = environ['HTTP_COOKIE']
for line in txt.split(';'):
if not '=' in line: continue
pairs = line.strip().split('=')
cookies[pairs[0]] = pairs[1]
return cookies
""" Ensures url can be ripped by user """
def passes_pre_rip_check(url):
# Check if site is in unsupported list
if not is_supported(url):
print_error('site is not supported; will not be supported')
return False
# Check if user passed max albums allowed
if not is_contributor():
ip = environ.get('REMOTE_ADDR', '127.0.0.1')
count = 0
for album in albums_by_ip(ip):
if not album['album'].startswith('gonewild_'):
count += 1
if count >= MAX_ALBUMS_PER_USER:
print_error('users are only allowed to rip %d albums at a time' % MAX_ALBUMS_PER_USER)
return False
return True
"""
Checks status of rip. Returns zip/size if finished, otherwise
returns the last log line from the rip.
"""
def check(url):
url = unquote(url).replace(' ', '%20')
try:
ripper = get_ripper(url)
except Exception, e:
print_error(str(e))
return
# Check if there's already a zip for the album
if ripper.existing_zip_path() != None:
response = {}
response['zip'] = ripper.existing_zip_path().replace(' ', '%20').replace('%20', '%2520')
response['size'] = ripper.get_size(ripper.existing_zip_path())
# Return link to zip
if path.exists(ripper.working_dir):
update_file_modified(ripper.working_dir)
image_count = 0
for root, subdirs, files in walk(ripper.working_dir):
if 'thumbs' in root: continue
for f in files:
if f.endswith('.txt'): continue
image_count += 1
response['album'] = ripper.working_dir.replace('rips/', '').replace('%20', '%2520')
response['url'] = './%s' % ripper.working_dir.replace('rips/', 'rips/#')
response['image_count'] = image_count
print dumps( response )
else:
# Print last log line ("status")
lines = ripper.get_log(tail_lines=1)
print dumps( {
'log' : '\\n'.join(lines)
} )
""" Returns an appropriate ripper for a URL, or throws exception """
def get_ripper(url):
sites = [ \
deviantart, \
flickr, \
imagearn, \
imagebam, \
imagefap, \
imgur, \
instagram, \
photobucket, \
tumblr, \
twitter, \
xhamster, \
getgonewild, \
anonib, \
motherless, \
fourchan, \
occ, \
minus, \
gifyo, \
five00px, \
chickupload, \
cghub, \
teenplanet, \
chansluts, \
buttoucher, \
pichunter, \
soupio, \
imgbox, \
reddit, \
gallerydump, \
fapdu, \
fuskator, \
kodiefiles, \
pbase, \
eightmuses, \
setsdb, \
nfsfw, \
shareimage, \
seenive, \
vinebox, \
imgchili, \
fapproved, \
gonewild, \
vidble, \
soundcloud]
for site in sites:
try:
ripper = site(url)
return ripper
except Exception, e:
# Rippers that aren't made for the URL throw blank Exception
error = str(e)
if error == '': continue
# If Exception isn't blank, then it's the right ripper but an error occurred
raise e
raise Exception('Ripper can not rip given URL')
""" Updates system 'modified time' for file to current time. """
def update_file_modified(f):
st = stat(f)
from time import time
atime = int(time())
mtime = int(time())
try:
utime(f, (atime, mtime))
except: pass
""" Retrieves key/value pairs from query, puts in dict """
def get_keys():
form = cgi.FieldStorage()
keys = {}
for key in form.keys():
keys[key] = form[key].value
if not 'url' in keys and not 'recent' in keys and len(argv) > 1:
keys['url'] = argv[1]
keys['start'] = 'true'
return keys
"""
Returns recently-downloaded zips
"""
def recent(lines):
recents = []
try:
f = open('recent_rips.lst', 'r')
recents = tail(f, lines=lines)
f.close()
except: pass
result = []
for rec in recents:
d = {}
try: ripper = get_ripper(rec)
except: continue
d['url'] = rec
d['view_url'] = ripper.working_dir.replace('rips/', 'rips/#')
result.append(d)
print dumps( {
'recent' : result
} )
""" Tail a file and get X lines from the end """
def tail(f, lines=1, _buffer=4098):
lines_found = []
block_counter = -1
while len(lines_found) < lines:
try:
f.seek(block_counter * _buffer, SEEK_END)
except IOError, e: # either file is too small, or too many lines requested
f.seek(0)
lines_found = f.readlines()
break
lines_found = f.readlines()
if len(lines_found) > lines:
break
block_counter -= 1
result = [word.strip() for word in lines_found[-lines:]]
result.reverse()
return result
""" Adds url to list of recently-downloaded albums """
def add_recent(url):
if path.exists('recent_rips.lst'):
already_added = False
f = open('recent_rips.lst', 'r')
if url in tail(f, lines=10): already_added = True
f.close()
if already_added: return
f = open('recent_rips.lst', 'a')
f.write('%s\n' % url)
f.close()
def albums_by_ip(ip):
albums = []
for thedir in listdir('rips'):
d = path.join('rips', thedir)
if not path.isdir(d): continue
iptxt = path.join(d, 'ip.txt')
if not path.exists(iptxt): continue
f = open(iptxt, 'r')
albumip = f.read().strip()
f.close()
if ip == albumip:
jsonalbum = {}
jsonalbum['album'] = thedir
url = ''
thelog = path.join(d, 'log.txt')
if path.exists(thelog):
f = open(thelog, 'r')
lines = f.read().split('\n')
f.close()
if len(lines) > 0:
url = lines[0]
url = url[url.rfind(' ')+1:]
jsonalbum['url'] = url
albums.append(jsonalbum)
return albums
def is_supported(url):
if not path.exists('unsupported.txt'): return True
for line in open('unsupported.txt', 'r'):
line = line.strip()
if line.lower() in url.lower():
return False
return True
""" Entry point. Print leading/trailing characters, executes main() """
if __name__ == '__main__':
print "Content-Type: application/json"
print ""
stdout.flush()
main()
print "\n"