From e6bd7f5fbe650e9fca2d40621fe847a167f80679 Mon Sep 17 00:00:00 2001 From: vintol Date: Sun, 31 Mar 2019 14:14:37 +0530 Subject: [PATCH] v1.0.0 First Stable Release --- CHANGELOG.md | 35 ++++++++ L-dl.py | 138 ++++++++++++++++++++++++++++++++ Listal.py | 4 +- listal-dl.py | 220 --------------------------------------------------- 4 files changed, 175 insertions(+), 222 deletions(-) create mode 100644 CHANGELOG.md create mode 100644 L-dl.py delete mode 100644 listal-dl.py diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..15c6fa3 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,35 @@ +# Change Log +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](http://keepachangelog.com/) +and this project adheres to [Semantic Versioning](http://semver.org/). + +------------ +## [Upcoming Release] + +------- + +## [1.0.0] - 2016-11-10 +First Stable release. +### Added +- Can now get images from `image` lists. +- Support for command-line Arguments. +- Added a progress bar. +- Download resume capability. + +### Changed +- Instead of entering the name of the person you enter the profile url. +- Downloader is now a seperate script rather then an option. +- Links are placed in a file in working dir; Instead of creating a seperate directory inside the working directory. +- No interactive input; instead command line args are accepted. + +### Removed +- Option to download images is removed from main file. + +------ +## [0.1.0] - 2016-04-20 +This is the first public release. **Pre Release** + +### Added +- Retrives the links of of all photos from any `person` profile on `listal.com`. +- Also, can download the photos from the retrived links using mulitple threads. diff --git a/L-dl.py b/L-dl.py new file mode 100644 index 0000000..a7c2ff0 --- /dev/null +++ b/L-dl.py @@ -0,0 +1,138 @@ +# +# Listal Downloader +# +# + +import urllib.request,ssl +import argparse +import time +import queue +import threading +import os,sys +import better_exceptions +# + +# Very Important. +ssl._create_default_https_context = ssl._create_unverified_context + +def download(): + global broken, failed, ls + while not qq.empty(): + mydata = threading.local() + mydata.name, mydata.url = qq.get() + mydata.keep_going, mydata.skip, mydata.retry = (True, False, 0) + #if mydata.name in ls:continue + while mydata.keep_going: + try:mydata.html = urllib.request.urlopen(mydata.url,timeout=120) + except urllib.error.HTTPError as HERR: + if HERR.code == 404 or HERR.code == 500: + broken += 1 + mydata.keep_going = False + mydata.skip = True + except: + mydata.retry += 1 + if mydata.retry > 5: + mydata.keep_going = False + mydata.skip = True + break + if mydata.skip:continue + while True: + try: + mydata.image = mydata.html.read() + open(mydata.name,'wb').write(mydata.image) + break + except: + mydata.retry += 1 + if mydata.retry > 10:break + +def mkqueue(): + global total,ld,ls + fhand = open(os.path.join(ld,args.fname),'r') + links = [] + for each in fhand: + if each.startswith('#') or len(each) < 10:continue + fname = each.strip().split('/')[-2].zfill(10) + "." + each.strip().split('.')[-1] + if fname not in ls:links.append((fname,each.strip())) + for each in sorted(set(links),reverse=True):qq.put(each) + total = qq.qsize() + print(str(total),"Files queued for download.") + fhand.close() + +def enqueue(): + if qq.qsize() != 0:print("\n WARNING: Queue was not empty. ") + for name,url in failed: + qq.put((name,url)) + +def init_threads(): + for i in range(args.threads if args.threads < qq.qsize() else qq.qsize()): + t = threading.Thread(target=download) + threads.append(t) + t.start() + +def update_progress(): + progress = 100 - int((100*qq.qsize()) / total) + te = time.strftime("%H:%M:%S",time.gmtime(time.time()-started)) + pbar = "\r {:0>3}% [{:<50}] ({},{}) Time Elapsed : {} ".format(progress, '#'*int((progress/2)), (total-qq.qsize()), total, te) + sys.stdout.write(pbar) + sys.stdout.flush() + +def check_progress(): + global t1,p1 + t2 = time.time() + p2 = 100 - int((100*qq.qsize()) / total) + if t2-t1 < 25:pass + else: + if p2 - p1 >0: + t1 = t2 + p1 = p2 + else:quit() +# + +parser = argparse.ArgumentParser(description='Start Tao Downloader.') +parser.add_argument('fname', type=str, + help='The File containing list of links.') +parser.add_argument('--dir', dest='directory', type = str, default = None, required = False, + help='The directory to download files in.') +parser.add_argument('--threads', dest='threads', type = int, default = 10, required = False, + help='No. of threads to use.') +args = parser.parse_args() + +# +qq = queue.Queue() +started = time.time() +threads = [] +links = [] +failed = [] +broken = 00 +internal_error = 00 +ld = os.getcwd() + + + +if args.directory is not None: + if not os.path.exists(args.directory):os.makedirs(args.directory) + os.chdir(args.directory) +ls = os.listdir(os.getcwd()) +#files = os.listdir(os.getcwd()) + +mkqueue() +init_threads() +t1, p1 = 0,0 +while not qq.empty(): + update_progress() + time.sleep(5) + #check_progress() +for t in threads:t.join() + +if len(failed) > 0: + print("\n INFO : Download failed for {} Items. Trying Again ...".format(len(failed))) + enqueue() + failed.clear() + init_threads() + print("\n INFO:",len(failed),"Downloads Failed.") + +print(" \n ============================ \n Time Taken : {} \n Files Downloaded : {} \n Failed Downloads\ + : {} \n Broken Links : {} \n ===============================================================".format(\ + time.strftime("%H:%M:%S",time.gmtime(time.time()-started)), total-len(failed),len(failed),broken)) + +# diff --git a/Listal.py b/Listal.py index a7d9b96..783b6b6 100644 --- a/Listal.py +++ b/Listal.py @@ -1,6 +1,6 @@ # Listal.py -# 08/11/2016 -# +# 08/11/2016 - 2017-04-13 +# import urllib.request, urllib.parse import bs4 diff --git a/listal-dl.py b/listal-dl.py deleted file mode 100644 index c5cc5d2..0000000 --- a/listal-dl.py +++ /dev/null @@ -1,220 +0,0 @@ -# -# Listal-dl -# -# v0.21 28/08/2016 -# -# Available under GNU GPL v3 -# -# listal-dl Copyright (C) 2016 Tejas Kumar -# -# This program comes with ABSOLUTELY NO WARRANTY. -# This is free software, and you are welcome to redistribute it -# under certain conditions; see file "LICENSE". -# -import urllib.request -from bs4 import * -import queue -import threading -import os -import time - -## - -class Imager (threading.Thread): - def __init__(self, threadID, queue, lock, function, store): - threading.Thread.__init__(self) - self.threadID = threadID - self.name = threadID - self.queue = queue - self.lock = lock - self.execution_function = function - self.output_store = store - - def run(self): - while not self.queue.empty(): - self.lock.acquire() - self.item = self.queue.get() - print(self.name,"got item",self.item) - self.lock.release() - self.execution_function(self) - if self.output_store is not None: - self.lock.acquire() - self.output_store.append(self.output) - self.lock.release() - self.queue.task_done() - -# - -def ipages(self): - try: - self.html = urllib.request.urlopen(self.item,timeout=2) - except: - while True: - try: - self.html = urllib.request.urlopen(self.item,timeout=5) - if self.html.getcode() == 200:break - except:continue - try:self.html_data = self.html.read() - except: - self.lock.acquire() - self.queue.put(self.item) - self.lock.release() - self.output = "\n" - return - self.soup = BeautifulSoup(self.html_data,"lxml") - self.output = [] - for link in self.soup.find_all('a'): - if link.get('href').startswith("http://www.listal.com/viewimage"): - self.output.append(link.get('href')+"h") -# - -def limages(self): - try: - self.html = urllib.request.urlopen(self.item,timeout=2) - except: - while True: - try: - self.html = urllib.request.urlopen(self.item,timeout=5) - if self.html.getcode() == 200:break - except:continue - try:self.html_data = self.html.read() - except: - self.lock.acquire() - self.queue.put(self.item) - self.lock.release() - self.output = "\n" - return - self.soup = BeautifulSoup(self.html_data,"lxml") - self.output = self.soup.find(title=name).get('src') - -# - -def idownload(self): - self.iname = self.item.split()[0] - self.link = self.item.split()[1] - try: - self.html = urllib.request.urlopen(self.link,timeout=10) - except: - while True: - try: - self.html = urllib.request.urlopen(self.link,timeout=100) - if self.html.getcode() == 200:break - except:continue - try:self.html_data = self.html.read() - except: - self.lock.acquire() - self.queue.put(self.item) - self.lock.release() - return - while True: - try: - open(self.iname,'wb').write(self.html_data) - except:continue - break - -# - -def pages(): - - url_name = name.strip().lower().replace(' ','-') - page_start = int(input("Start at Page No. : ")) - page_end = int(input("End at Page No. : ")) + 1 - no_threads = int(input("No. of Threads:")) - - for i in range(page_start,page_end): - qq.put("http://www.listal.com/"+url_name+"/pictures//"+str(i)) - - for n in range(no_threads): - t = Imager("thread-{}".format(n),qq,thlock,ipages,output) - threads.append(t) - t.start() - - qq.join() - - for t in threads: - t.join() - - for bulk in output: - for link in bulk: - links.append(link) - -# Now Image Pages to Image Links - - for each in links:qq.put(each) - output.clear() - - for n in range(no_threads): - t = Imager("thread-{}".format(n),qq,thlock,limages,output) - threads.append(t) - t.start() - - qq.join() - - for t in threads: - t.join() - - fhand = open("Images",'a') - for link in output: - fhand.write(link+"\n") - -# - -def images_download(): - - links = open("Images",'r').read().split() - if len(links) <= 8000: - for i in range(len(links)): - qq.put("{} {}".format("D"+str(1001+i)+".jpg",links[i])) - elif len(links) > 8000: - for i in range(8000): - qq.put("{} {}".format("D"+str(1001+i)+".jpg",links[i])) - for i in range(len(links)-8000): - qq.put("{} {}".format("E"+str(1001+i)+".jpg",links[8000+i])) - - for n in range(int(input("No. of Threads:"))): - t = Imager("thread-{}".format(n),qq,thlock,idownload,None) - threads.append(t) - t.start() - - qq.join() - - for t in threads: - t.join() - -## - -print (""" listal-dl Copyright (C) 2016 Tejas Kumar - - This program comes with ABSOLUTELY NO WARRANTY. - This is free software, and you are welcome to redistribute it - under certain conditions; see file "LICENSE". \n """) - -name = input("Name :") -qq = queue.Queue() -thlock = threading.Lock() -threads=[] -output=[] -links=[] - -dir_name=name.split()[0]+name.split()[1][0] -dirs = os.listdir(os.getcwd()) -if dir_name in dirs: - print(dir_name,"already exists !") - os.chdir(dir_name) - print("Moving to directory :",os.getcwd()) -else: - os.mkdir(dir_name) - os.chdir(dir_name) - print("Moving to directory :",os.getcwd()) - - -choise = input (" 0] Get Image Links \n 1] Download Images \n ===> ") -time_started = time.time() -if choise == "0":pages() -elif choise == "1":images_download() -else: print("Try Again.") - -time_taken = time.time() - time_started -print("Time Taken = {}:{}:{}".format(str(int(time_taken/3600)).zfill(2),str(int((time_taken%3600)/60)).zfill(2),str(int((time_taken%3600)%60)).zfill(2))) - -## \ No newline at end of file