From e6bd7f5fbe650e9fca2d40621fe847a167f80679 Mon Sep 17 00:00:00 2001
From: vintol <vintol@outlook.com>
Date: Sun, 31 Mar 2019 14:14:37 +0530
Subject: [PATCH] v1.0.0

First Stable Release
---
 CHANGELOG.md |  35 ++++++++
 L-dl.py      | 138 ++++++++++++++++++++++++++++++++
 Listal.py    |   4 +-
 listal-dl.py | 220 ---------------------------------------------------
 4 files changed, 175 insertions(+), 222 deletions(-)
 create mode 100644 CHANGELOG.md
 create mode 100644 L-dl.py
 delete mode 100644 listal-dl.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 0000000..15c6fa3
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,35 @@
+# Change Log
+All notable changes to this project will be documented in this file.
+
+The format is based on [Keep a Changelog](http://keepachangelog.com/)
+and this project adheres to [Semantic Versioning](http://semver.org/).
+
+------------
+## [Upcoming Release]
+
+-------
+
+## [1.0.0] - 2016-11-10
+First Stable release.
+### Added
+- Can now get images from `image` lists.
+- Support for command-line Arguments.
+- Added a progress bar.
+- Download resume capability.
+
+### Changed
+- Instead of entering the name of the person you enter the profile url.
+- Downloader is now a seperate script rather then an option.
+- Links are placed in a file in working dir; Instead of creating a seperate directory inside the working directory.
+- No interactive input; instead command line args are accepted.
+
+### Removed
+- Option to download images is removed from main file.
+
+------
+## [0.1.0] - 2016-04-20
+This is the first public release. **Pre Release**
+
+### Added
+- Retrives the links of of all photos from any `person` profile on `listal.com`.
+- Also, can download the photos from the retrived links using mulitple threads.
diff --git a/L-dl.py b/L-dl.py
new file mode 100644
index 0000000..a7c2ff0
--- /dev/null
+++ b/L-dl.py
@@ -0,0 +1,138 @@
+#
+# Listal Downloader
+#
+#
+
+import urllib.request,ssl
+import argparse
+import time
+import queue
+import threading
+import os,sys
+import better_exceptions
+#
+
+# Very Important. 
+ssl._create_default_https_context = ssl._create_unverified_context
+
+def download():
+    global broken, failed, ls
+    while not qq.empty():
+        mydata = threading.local()
+        mydata.name, mydata.url = qq.get()
+        mydata.keep_going, mydata.skip, mydata.retry = (True, False, 0)
+        #if mydata.name in ls:continue
+        while mydata.keep_going:
+            try:mydata.html = urllib.request.urlopen(mydata.url,timeout=120)
+            except urllib.error.HTTPError as HERR:
+                if HERR.code == 404 or HERR.code == 500:
+                    broken += 1
+                    mydata.keep_going = False
+                    mydata.skip = True
+            except:
+                mydata.retry += 1
+                if mydata.retry > 5:
+                    mydata.keep_going = False
+                    mydata.skip = True
+            break
+        if mydata.skip:continue
+        while True:
+            try:
+                mydata.image = mydata.html.read()
+                open(mydata.name,'wb').write(mydata.image)
+                break
+            except:
+                mydata.retry += 1
+                if mydata.retry > 10:break
+
+def mkqueue():
+    global total,ld,ls
+    fhand = open(os.path.join(ld,args.fname),'r')
+    links = []
+    for each in fhand:
+        if each.startswith('#') or len(each) < 10:continue
+        fname = each.strip().split('/')[-2].zfill(10) + "." + each.strip().split('.')[-1]
+        if fname not in ls:links.append((fname,each.strip()))
+    for each in sorted(set(links),reverse=True):qq.put(each)
+    total = qq.qsize()
+    print(str(total),"Files queued for download.")
+    fhand.close()
+
+def enqueue():
+    if qq.qsize() != 0:print("\n WARNING: Queue was not empty. ")
+    for name,url in failed:
+        qq.put((name,url))
+
+def init_threads():
+    for i in range(args.threads if args.threads < qq.qsize() else qq.qsize()):
+        t = threading.Thread(target=download)
+        threads.append(t)
+        t.start()
+
+def update_progress():
+    progress = 100 - int((100*qq.qsize()) / total)
+    te = time.strftime("%H:%M:%S",time.gmtime(time.time()-started))
+    pbar = "\r {:0>3}% [{:<50}] ({},{}) Time Elapsed : {} ".format(progress, '#'*int((progress/2)), (total-qq.qsize()), total, te)
+    sys.stdout.write(pbar)
+    sys.stdout.flush()
+
+def check_progress():
+    global t1,p1
+    t2 = time.time()
+    p2 = 100 - int((100*qq.qsize()) / total)
+    if t2-t1 < 25:pass
+    else:
+        if p2 - p1 >0:
+            t1 = t2
+            p1 = p2
+        else:quit()
+#
+
+parser = argparse.ArgumentParser(description='Start Tao Downloader.')
+parser.add_argument('fname', type=str,
+                    help='The File containing list of links.')
+parser.add_argument('--dir', dest='directory', type = str, default = None, required = False,
+                    help='The directory to download files in.')
+parser.add_argument('--threads', dest='threads', type = int, default = 10, required = False,
+                    help='No. of threads to use.')
+args = parser.parse_args()
+
+#
+qq = queue.Queue()
+started = time.time()
+threads = []
+links   = []
+failed  = []
+broken  = 00
+internal_error = 00
+ld = os.getcwd()
+
+
+
+if args.directory is not None:
+    if not os.path.exists(args.directory):os.makedirs(args.directory)
+    os.chdir(args.directory)
+ls = os.listdir(os.getcwd())
+#files = os.listdir(os.getcwd())
+
+mkqueue()
+init_threads()
+t1, p1 = 0,0
+while not qq.empty():
+    update_progress()
+    time.sleep(5)
+    #check_progress()
+for t in threads:t.join()
+
+if len(failed) > 0:
+    print("\n INFO : Download failed for {} Items. Trying Again ...".format(len(failed)))
+    enqueue()
+    failed.clear()
+    init_threads()
+    print("\n INFO:",len(failed),"Downloads Failed.")
+
+print(" \n ============================ \n    Time Taken : {} \n Files Downloaded : {} \n    Failed Downloads\
+    : {} \n    Broken Links : {}  \n ===============================================================".format(\
+    time.strftime("%H:%M:%S",time.gmtime(time.time()-started)), total-len(failed),len(failed),broken))
+
+#
diff --git a/Listal.py b/Listal.py
index a7d9b96..783b6b6 100644
--- a/Listal.py
+++ b/Listal.py
@@ -1,6 +1,6 @@
 #   Listal.py
-#   08/11/2016
-#
+#   08/11/2016 - 2017-04-13
+#  
 
 import urllib.request, urllib.parse
 import bs4
diff --git a/listal-dl.py b/listal-dl.py
deleted file mode 100644
index c5cc5d2..0000000
--- a/listal-dl.py
+++ /dev/null
@@ -1,220 +0,0 @@
-#
-# Listal-dl
-#
-# v0.21     28/08/2016
-#
-# Available under GNU GPL v3
-#
-#    listal-dl  Copyright (C) 2016  Tejas Kumar
-#
-#    This program comes with ABSOLUTELY NO WARRANTY.
-#    This is free software, and you are welcome to redistribute it
-#    under certain conditions; see file "LICENSE".
-#
-import urllib.request
-from bs4 import *
-import queue
-import threading
-import os
-import time
-
-##
-
-class Imager (threading.Thread):
-    def __init__(self, threadID, queue, lock, function, store):
-        threading.Thread.__init__(self)
-        self.threadID = threadID
-        self.name = threadID
-        self.queue = queue
-        self.lock = lock
-        self.execution_function = function
-        self.output_store = store
-    
-    def run(self):
-        while not self.queue.empty():
-            self.lock.acquire()
-            self.item = self.queue.get()
-            print(self.name,"got item",self.item)
-            self.lock.release()
-            self.execution_function(self)
-            if self.output_store is not None:
-                self.lock.acquire()
-                self.output_store.append(self.output)
-                self.lock.release()
-            self.queue.task_done()
-
-#
-
-def ipages(self):
-    try:
-        self.html = urllib.request.urlopen(self.item,timeout=2)
-    except:
-        while True:
-            try:
-                self.html = urllib.request.urlopen(self.item,timeout=5)
-                if self.html.getcode() == 200:break
-            except:continue
-    try:self.html_data = self.html.read()
-    except:
-        self.lock.acquire()
-        self.queue.put(self.item)
-        self.lock.release()
-        self.output = "\n"
-        return
-    self.soup = BeautifulSoup(self.html_data,"lxml")
-    self.output = []
-    for link in self.soup.find_all('a'):
-        if link.get('href').startswith("http://www.listal.com/viewimage"):
-            self.output.append(link.get('href')+"h")
-#
-
-def limages(self):
-    try:
-        self.html = urllib.request.urlopen(self.item,timeout=2)
-    except:
-        while True:
-            try:
-                self.html = urllib.request.urlopen(self.item,timeout=5)
-                if self.html.getcode() == 200:break
-            except:continue
-    try:self.html_data = self.html.read()
-    except:
-        self.lock.acquire()
-        self.queue.put(self.item)
-        self.lock.release()
-        self.output = "\n"
-        return
-    self.soup = BeautifulSoup(self.html_data,"lxml")
-    self.output = self.soup.find(title=name).get('src')
-
-#
-
-def idownload(self):
-    self.iname = self.item.split()[0]
-    self.link = self.item.split()[1]
-    try:
-        self.html = urllib.request.urlopen(self.link,timeout=10)
-    except:
-        while True:
-            try:
-                self.html = urllib.request.urlopen(self.link,timeout=100)
-                if self.html.getcode() == 200:break
-            except:continue
-    try:self.html_data = self.html.read()
-    except:
-        self.lock.acquire()
-        self.queue.put(self.item)
-        self.lock.release()
-        return
-    while True:
-        try:
-            open(self.iname,'wb').write(self.html_data)
-        except:continue
-        break
-
-#
-
-def pages():
-    
-    url_name = name.strip().lower().replace(' ','-')
-    page_start = int(input("Start at Page No. : "))
-    page_end = int(input("End at Page No. : ")) + 1
-    no_threads = int(input("No. of Threads:"))
-    
-    for i in range(page_start,page_end):
-        qq.put("http://www.listal.com/"+url_name+"/pictures//"+str(i))
-     
-    for n in range(no_threads):
-        t = Imager("thread-{}".format(n),qq,thlock,ipages,output)
-        threads.append(t)
-        t.start()
-    
-    qq.join()
-    
-    for t in threads:
-        t.join()
-    
-    for bulk in output:
-        for link in bulk:
-            links.append(link)
-
-# Now Image Pages to Image Links
-    
-    for each in links:qq.put(each)
-    output.clear()
-    
-    for n in range(no_threads):
-        t = Imager("thread-{}".format(n),qq,thlock,limages,output)
-        threads.append(t)
-        t.start()
-    
-    qq.join()
-    
-    for t in threads:
-        t.join()
-    
-    fhand = open("Images",'a')
-    for link in output:
-        fhand.write(link+"\n")
-
-#
-
-def images_download():
-    
-    links = open("Images",'r').read().split()
-    if len(links) <= 8000:
-        for i in range(len(links)):
-            qq.put("{} {}".format("D"+str(1001+i)+".jpg",links[i]))
-    elif len(links) > 8000:
-        for i in range(8000):
-            qq.put("{} {}".format("D"+str(1001+i)+".jpg",links[i]))
-        for i in range(len(links)-8000):
-            qq.put("{} {}".format("E"+str(1001+i)+".jpg",links[8000+i]))
-    
-    for n in range(int(input("No. of Threads:"))):
-        t = Imager("thread-{}".format(n),qq,thlock,idownload,None)
-        threads.append(t)
-        t.start()
-    
-    qq.join()
-    
-    for t in threads:
-        t.join()
-
-##
-
-print ("""    listal-dl  Copyright (C) 2016  Tejas Kumar
-
-    This program comes with ABSOLUTELY NO WARRANTY.
-    This is free software, and you are welcome to redistribute it
-    under certain conditions; see file "LICENSE". \n """)
-
-name = input("Name :")
-qq = queue.Queue()
-thlock = threading.Lock()
-threads=[]
-output=[]
-links=[]
-
-dir_name=name.split()[0]+name.split()[1][0]
-dirs = os.listdir(os.getcwd())
-if dir_name in dirs:
-    print(dir_name,"already exists !")
-    os.chdir(dir_name)
-    print("Moving to directory :",os.getcwd())
-else:
-    os.mkdir(dir_name)
-    os.chdir(dir_name)
-    print("Moving to directory :",os.getcwd())
-
-
-choise = input (" 0] Get Image Links \n 1] Download Images \n ===> ")
-time_started = time.time()
-if choise == "0":pages()
-elif choise == "1":images_download()
-else: print("Try Again.")
-
-time_taken = time.time() - time_started
-print("Time Taken = {}:{}:{}".format(str(int(time_taken/3600)).zfill(2),str(int((time_taken%3600)/60)).zfill(2),str(int((time_taken%3600)%60)).zfill(2)))
-
-##
\ No newline at end of file